Commit eef99cd9 by Giuliano Belinassi Committed by Giuliano Belinassi

Refactor tree-loop-distribution.c for thread safety

This patch refactors tree-loop-distribution.c for thread safety without
use of C11 __thread feature. All global variables were moved to
`class loop_distribution` which is initialized at ::execute time.

From-SVN: r278421
parent 8d890d37
2019-11-18 Giuliano Belinassi <giuliano.belinassi@usp.br>
* cfgloop.c (get_loop_body_in_custom_order): New.
* cfgloop.h (get_loop_body_in_custom_order): New prototype.
* tree-loop-distribution.c (class loop_distribution): New.
(bb_top_order_cmp): Remove.
(bb_top_order_cmp_r): New.
(create_rdg_vertices): Move into class loop_distribution.
(stmts_from_loop): Same as above.
(update_for_merge): Same as above.
(partition_merge_into): Same as above.
(get_data_dependence): Same as above.
(data_dep_in_cycle_p): Same as above.
(update_type_for_merge): Same as above.
(build_rdg_partition_for-vertex): Same as above.
(classify_builtin_ldst): Same as above.
(classify_partition): Same as above.
(share_memory_accesses): Same as above.
(rdg_build_partitions): Same as above.
(pg_add_dependence_edges): Same as above.
(build_partition_graph): Same as above.
(merge_dep_scc_partitions): Same as above.
(break_alias_scc_partitions): Same as above.
(finalize_partitions): Same as above.
(distribute_loop): Same as above.
(bb_top_order_init): New method
(bb_top_order_destroy): New method.
(get_bb_top_order_index_size): New method.
(get_bb_top_order_index_index): New method.
(get_bb_top_order_index_index): New method.
(loop_distribution::execute): New method.
(pass_loop_distribution::execute): Instantiate loop_distribution.
2019-11-18 Jan Hubicka <jh@suse.cz>
PR ipa/92508
......@@ -980,6 +980,19 @@ get_loop_body_in_custom_order (const class loop *loop,
return bbs;
}
/* Same as above, but use gcc_sort_r instead of qsort. */
basic_block *
get_loop_body_in_custom_order (const class loop *loop, void *data,
int (*bb_comparator) (const void *, const void *, void *))
{
basic_block *bbs = get_loop_body (loop);
gcc_sort_r (bbs, loop->num_nodes, sizeof (basic_block), bb_comparator, data);
return bbs;
}
/* Get body of a LOOP in breadth first sort order. */
basic_block *
......
......@@ -376,6 +376,8 @@ extern basic_block *get_loop_body_in_dom_order (const class loop *);
extern basic_block *get_loop_body_in_bfs_order (const class loop *);
extern basic_block *get_loop_body_in_custom_order (const class loop *,
int (*) (const void *, const void *));
extern basic_block *get_loop_body_in_custom_order (const class loop *, void *,
int (*) (const void *, const void *, void *));
extern vec<edge> get_loop_exit_edges (const class loop *);
extern edge single_exit (const class loop *);
......
......@@ -154,21 +154,10 @@ ddr_hasher::equal (const data_dependence_relation *ddr1,
return (DDR_A (ddr1) == DDR_A (ddr2) && DDR_B (ddr1) == DDR_B (ddr2));
}
/* The loop (nest) to be distributed. */
static vec<loop_p> loop_nest;
/* Vector of data references in the loop to be distributed. */
static vec<data_reference_p> datarefs_vec;
/* If there is nonaddressable data reference in above vector. */
static bool has_nonaddressable_dataref_p;
/* Store index of data reference in aux field. */
#define DR_INDEX(dr) ((uintptr_t) (dr)->aux)
/* Hash table for data dependence relation in the loop to be distributed. */
static hash_table<ddr_hasher> *ddrs_table;
/* A Reduced Dependence Graph (RDG) vertex representing a statement. */
struct rdg_vertex
{
......@@ -215,6 +204,83 @@ struct rdg_edge
#define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
/* Kind of distributed loop. */
enum partition_kind {
PKIND_NORMAL,
/* Partial memset stands for a paritition can be distributed into a loop
of memset calls, rather than a single memset call. It's handled just
like a normal parition, i.e, distributed as separate loop, no memset
call is generated.
Note: This is a hacking fix trying to distribute ZERO-ing stmt in a
loop nest as deep as possible. As a result, parloop achieves better
parallelization by parallelizing deeper loop nest. This hack should
be unnecessary and removed once distributed memset can be understood
and analyzed in data reference analysis. See PR82604 for more. */
PKIND_PARTIAL_MEMSET,
PKIND_MEMSET, PKIND_MEMCPY, PKIND_MEMMOVE
};
/* Type of distributed loop. */
enum partition_type {
/* The distributed loop can be executed parallelly. */
PTYPE_PARALLEL = 0,
/* The distributed loop has to be executed sequentially. */
PTYPE_SEQUENTIAL
};
/* Builtin info for loop distribution. */
struct builtin_info
{
/* data-references a kind != PKIND_NORMAL partition is about. */
data_reference_p dst_dr;
data_reference_p src_dr;
/* Base address and size of memory objects operated by the builtin. Note
both dest and source memory objects must have the same size. */
tree dst_base;
tree src_base;
tree size;
/* Base and offset part of dst_base after stripping constant offset. This
is only used in memset builtin distribution for now. */
tree dst_base_base;
unsigned HOST_WIDE_INT dst_base_offset;
};
/* Partition for loop distribution. */
struct partition
{
/* Statements of the partition. */
bitmap stmts;
/* True if the partition defines variable which is used outside of loop. */
bool reduction_p;
location_t loc;
enum partition_kind kind;
enum partition_type type;
/* Data references in the partition. */
bitmap datarefs;
/* Information of builtin parition. */
struct builtin_info *builtin;
};
/* Partitions are fused because of different reasons. */
enum fuse_type
{
FUSE_NON_BUILTIN = 0,
FUSE_REDUCTION = 1,
FUSE_SHARE_REF = 2,
FUSE_SAME_SCC = 3,
FUSE_FINALIZE = 4
};
/* Description on different fusing reason. */
static const char *fuse_message[] = {
"they are non-builtins",
"they have reductions",
"they have shared memory refs",
"they are in the same dependence scc",
"there is no point to distribute loop"};
/* Dump vertex I in RDG to FILE. */
static void
......@@ -435,11 +501,205 @@ create_rdg_cd_edges (struct graph *rdg, control_dependences *cd, loop_p loop)
}
}
/* Build the vertices of the reduced dependence graph RDG. Return false
if that failed. */
static bool
create_rdg_vertices (struct graph *rdg, vec<gimple *> stmts, loop_p loop)
class loop_distribution
{
private:
/* The loop (nest) to be distributed. */
vec<loop_p> loop_nest;
/* Vector of data references in the loop to be distributed. */
vec<data_reference_p> datarefs_vec;
/* If there is nonaddressable data reference in above vector. */
bool has_nonaddressable_dataref_p;
/* Store index of data reference in aux field. */
/* Hash table for data dependence relation in the loop to be distributed. */
hash_table<ddr_hasher> *ddrs_table;
/* Array mapping basic block's index to its topological order. */
int *bb_top_order_index;
/* And size of the array. */
int bb_top_order_index_size;
/* Build the vertices of the reduced dependence graph RDG. Return false
if that failed. */
bool create_rdg_vertices (struct graph *rdg, vec<gimple *> stmts, loop_p loop);
/* Initialize STMTS with all the statements of LOOP. We use topological
order to discover all statements. The order is important because
generate_loops_for_partition is using the same traversal for identifying
statements in loop copies. */
void stmts_from_loop (class loop *loop, vec<gimple *> *stmts);
/* Build the Reduced Dependence Graph (RDG) with one vertex per statement of
LOOP, and one edge per flow dependence or control dependence from control
dependence CD. During visiting each statement, data references are also
collected and recorded in global data DATAREFS_VEC. */
struct graph * build_rdg (class loop *loop, control_dependences *cd);
/* Merge PARTITION into the partition DEST. RDG is the reduced dependence
graph and we update type for result partition if it is non-NULL. */
void partition_merge_into (struct graph *rdg,
partition *dest, partition *partition,
enum fuse_type ft);
/* Return data dependence relation for data references A and B. The two
data references must be in lexicographic order wrto reduced dependence
graph RDG. We firstly try to find ddr from global ddr hash table. If
it doesn't exist, compute the ddr and cache it. */
data_dependence_relation * get_data_dependence (struct graph *rdg,
data_reference_p a,
data_reference_p b);
/* In reduced dependence graph RDG for loop distribution, return true if
dependence between references DR1 and DR2 leads to a dependence cycle
and such dependence cycle can't be resolved by runtime alias check. */
bool data_dep_in_cycle_p (struct graph *rdg, data_reference_p dr1,
data_reference_p dr2);
/* Given reduced dependence graph RDG, PARTITION1 and PARTITION2, update
PARTITION1's type after merging PARTITION2 into PARTITION1. */
void update_type_for_merge (struct graph *rdg,
partition *partition1, partition *partition2);
/* Returns a partition with all the statements needed for computing
the vertex V of the RDG, also including the loop exit conditions. */
partition *build_rdg_partition_for_vertex (struct graph *rdg, int v);
/* Given data references DST_DR and SRC_DR in loop nest LOOP and RDG, classify
if it forms builtin memcpy or memmove call. */
void classify_builtin_ldst (loop_p loop, struct graph *rdg, partition *partition,
data_reference_p dst_dr, data_reference_p src_dr);
/* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
For the moment we detect memset, memcpy and memmove patterns. Bitmap
STMT_IN_ALL_PARTITIONS contains statements belonging to all partitions.
Returns true if there is a reduction in all partitions and we
possibly did not mark PARTITION as having one for this reason. */
bool
classify_partition (loop_p loop,
struct graph *rdg, partition *partition,
bitmap stmt_in_all_partitions);
/* Returns true when PARTITION1 and PARTITION2 access the same memory
object in RDG. */
bool share_memory_accesses (struct graph *rdg,
partition *partition1, partition *partition2);
/* For each seed statement in STARTING_STMTS, this function builds
partition for it by adding depended statements according to RDG.
All partitions are recorded in PARTITIONS. */
void rdg_build_partitions (struct graph *rdg,
vec<gimple *> starting_stmts,
vec<partition *> *partitions);
/* Compute partition dependence created by the data references in DRS1
and DRS2, modify and return DIR according to that. IF ALIAS_DDR is
not NULL, we record dependence introduced by possible alias between
two data references in ALIAS_DDRS; otherwise, we simply ignore such
dependence as if it doesn't exist at all. */
int pg_add_dependence_edges (struct graph *rdg, int dir, bitmap drs1,
bitmap drs2, vec<ddr_p> *alias_ddrs);
/* Build and return partition dependence graph for PARTITIONS. RDG is
reduced dependence graph for the loop to be distributed. If IGNORE_ALIAS_P
is true, data dependence caused by possible alias between references
is ignored, as if it doesn't exist at all; otherwise all depdendences
are considered. */
struct graph *build_partition_graph (struct graph *rdg,
vec<struct partition *> *partitions,
bool ignore_alias_p);
/* Given reduced dependence graph RDG merge strong connected components
of PARTITIONS. If IGNORE_ALIAS_P is true, data dependence caused by
possible alias between references is ignored, as if it doesn't exist
at all; otherwise all depdendences are considered. */
void merge_dep_scc_partitions (struct graph *rdg, vec<struct partition *>
*partitions, bool ignore_alias_p);
/* This is the main function breaking strong conected components in
PARTITIONS giving reduced depdendence graph RDG. Store data dependence
relations for runtime alias check in ALIAS_DDRS. */
void break_alias_scc_partitions (struct graph *rdg, vec<struct partition *>
*partitions, vec<ddr_p> *alias_ddrs);
/* Fuse PARTITIONS of LOOP if necessary before finalizing distribution.
ALIAS_DDRS contains ddrs which need runtime alias check. */
void finalize_partitions (class loop *loop, vec<struct partition *>
*partitions, vec<ddr_p> *alias_ddrs);
/* Distributes the code from LOOP in such a way that producer statements
are placed before consumer statements. Tries to separate only the
statements from STMTS into separate loops. Returns the number of
distributed loops. Set NB_CALLS to number of generated builtin calls.
Set *DESTROY_P to whether LOOP needs to be destroyed. */
int distribute_loop (class loop *loop, vec<gimple *> stmts,
control_dependences *cd, int *nb_calls, bool *destroy_p,
bool only_patterns_p);
/* Compute topological order for basic blocks. Topological order is
needed because data dependence is computed for data references in
lexicographical order. */
void bb_top_order_init (void);
void bb_top_order_destroy (void);
public:
/* Getter for bb_top_order. */
inline int get_bb_top_order_index_size (void)
{
return bb_top_order_index_size;
}
inline int get_bb_top_order_index (int i)
{
return bb_top_order_index[i];
}
unsigned int execute (function *fun);
};
/* If X has a smaller topological sort number than Y, returns -1;
if greater, returns 1. */
static int
bb_top_order_cmp_r (const void *x, const void *y, void *loop)
{
loop_distribution *_loop =
(loop_distribution *) loop;
basic_block bb1 = *(const basic_block *) x;
basic_block bb2 = *(const basic_block *) y;
int bb_top_order_index_size = _loop->get_bb_top_order_index_size ();
gcc_assert (bb1->index < bb_top_order_index_size
&& bb2->index < bb_top_order_index_size);
gcc_assert (bb1 == bb2
|| _loop->get_bb_top_order_index(bb1->index)
!= _loop->get_bb_top_order_index(bb2->index));
return (_loop->get_bb_top_order_index(bb1->index) -
_loop->get_bb_top_order_index(bb2->index));
}
bool
loop_distribution::create_rdg_vertices (struct graph *rdg, vec<gimple *> stmts,
loop_p loop)
{
int i;
gimple *stmt;
......@@ -476,39 +736,11 @@ create_rdg_vertices (struct graph *rdg, vec<gimple *> stmts, loop_p loop)
return true;
}
/* Array mapping basic block's index to its topological order. */
static int *bb_top_order_index;
/* And size of the array. */
static int bb_top_order_index_size;
/* If X has a smaller topological sort number than Y, returns -1;
if greater, returns 1. */
static int
bb_top_order_cmp (const void *x, const void *y)
{
basic_block bb1 = *(const basic_block *) x;
basic_block bb2 = *(const basic_block *) y;
gcc_assert (bb1->index < bb_top_order_index_size
&& bb2->index < bb_top_order_index_size);
gcc_assert (bb1 == bb2
|| bb_top_order_index[bb1->index]
!= bb_top_order_index[bb2->index]);
return (bb_top_order_index[bb1->index] - bb_top_order_index[bb2->index]);
}
/* Initialize STMTS with all the statements of LOOP. We use topological
order to discover all statements. The order is important because
generate_loops_for_partition is using the same traversal for identifying
statements in loop copies. */
static void
stmts_from_loop (class loop *loop, vec<gimple *> *stmts)
void
loop_distribution::stmts_from_loop (class loop *loop, vec<gimple *> *stmts)
{
unsigned int i;
basic_block *bbs = get_loop_body_in_custom_order (loop, bb_top_order_cmp);
basic_block *bbs = get_loop_body_in_custom_order (loop, this, bb_top_order_cmp_r);
for (i = 0; i < loop->num_nodes; i++)
{
......@@ -557,13 +789,8 @@ free_rdg (struct graph *rdg)
free_graph (rdg);
}
/* Build the Reduced Dependence Graph (RDG) with one vertex per statement of
LOOP, and one edge per flow dependence or control dependence from control
dependence CD. During visiting each statement, data references are also
collected and recorded in global data DATAREFS_VEC. */
static struct graph *
build_rdg (class loop *loop, control_dependences *cd)
struct graph *
loop_distribution::build_rdg (class loop *loop, control_dependences *cd)
{
struct graph *rdg;
......@@ -586,65 +813,6 @@ build_rdg (class loop *loop, control_dependences *cd)
}
/* Kind of distributed loop. */
enum partition_kind {
PKIND_NORMAL,
/* Partial memset stands for a paritition can be distributed into a loop
of memset calls, rather than a single memset call. It's handled just
like a normal parition, i.e, distributed as separate loop, no memset
call is generated.
Note: This is a hacking fix trying to distribute ZERO-ing stmt in a
loop nest as deep as possible. As a result, parloop achieves better
parallelization by parallelizing deeper loop nest. This hack should
be unnecessary and removed once distributed memset can be understood
and analyzed in data reference analysis. See PR82604 for more. */
PKIND_PARTIAL_MEMSET,
PKIND_MEMSET, PKIND_MEMCPY, PKIND_MEMMOVE
};
/* Type of distributed loop. */
enum partition_type {
/* The distributed loop can be executed parallelly. */
PTYPE_PARALLEL = 0,
/* The distributed loop has to be executed sequentially. */
PTYPE_SEQUENTIAL
};
/* Builtin info for loop distribution. */
struct builtin_info
{
/* data-references a kind != PKIND_NORMAL partition is about. */
data_reference_p dst_dr;
data_reference_p src_dr;
/* Base address and size of memory objects operated by the builtin. Note
both dest and source memory objects must have the same size. */
tree dst_base;
tree src_base;
tree size;
/* Base and offset part of dst_base after stripping constant offset. This
is only used in memset builtin distribution for now. */
tree dst_base_base;
unsigned HOST_WIDE_INT dst_base_offset;
};
/* Partition for loop distribution. */
struct partition
{
/* Statements of the partition. */
bitmap stmts;
/* True if the partition defines variable which is used outside of loop. */
bool reduction_p;
location_t loc;
enum partition_kind kind;
enum partition_type type;
/* Data references in the partition. */
bitmap datarefs;
/* Information of builtin parition. */
struct builtin_info *builtin;
};
/* Allocate and initialize a partition from BITMAP. */
static partition *
......@@ -689,33 +857,9 @@ partition_reduction_p (partition *partition)
return partition->reduction_p;
}
/* Partitions are fused because of different reasons. */
enum fuse_type
{
FUSE_NON_BUILTIN = 0,
FUSE_REDUCTION = 1,
FUSE_SHARE_REF = 2,
FUSE_SAME_SCC = 3,
FUSE_FINALIZE = 4
};
/* Description on different fusing reason. */
static const char *fuse_message[] = {
"they are non-builtins",
"they have reductions",
"they have shared memory refs",
"they are in the same dependence scc",
"there is no point to distribute loop"};
static void
update_type_for_merge (struct graph *, partition *, partition *);
/* Merge PARTITION into the partition DEST. RDG is the reduced dependence
graph and we update type for result partition if it is non-NULL. */
static void
partition_merge_into (struct graph *rdg, partition *dest,
partition *partition, enum fuse_type ft)
void
loop_distribution::partition_merge_into (struct graph *rdg,
partition *dest, partition *partition, enum fuse_type ft)
{
if (dump_file && (dump_flags & TDF_DETAILS))
{
......@@ -1201,13 +1345,9 @@ generate_code_for_partition (class loop *loop,
return false;
}
/* Return data dependence relation for data references A and B. The two
data references must be in lexicographic order wrto reduced dependence
graph RDG. We firstly try to find ddr from global ddr hash table. If
it doesn't exist, compute the ddr and cache it. */
static data_dependence_relation *
get_data_dependence (struct graph *rdg, data_reference_p a, data_reference_p b)
data_dependence_relation *
loop_distribution::get_data_dependence (struct graph *rdg, data_reference_p a,
data_reference_p b)
{
struct data_dependence_relation ent, **slot;
struct data_dependence_relation *ddr;
......@@ -1228,13 +1368,10 @@ get_data_dependence (struct graph *rdg, data_reference_p a, data_reference_p b)
return *slot;
}
/* In reduced dependence graph RDG for loop distribution, return true if
dependence between references DR1 and DR2 leads to a dependence cycle
and such dependence cycle can't be resolved by runtime alias check. */
static bool
data_dep_in_cycle_p (struct graph *rdg,
data_reference_p dr1, data_reference_p dr2)
bool
loop_distribution::data_dep_in_cycle_p (struct graph *rdg,
data_reference_p dr1,
data_reference_p dr2)
{
struct data_dependence_relation *ddr;
......@@ -1264,12 +1401,10 @@ data_dep_in_cycle_p (struct graph *rdg,
return true;
}
/* Given reduced dependence graph RDG, PARTITION1 and PARTITION2, update
PARTITION1's type after merging PARTITION2 into PARTITION1. */
static void
update_type_for_merge (struct graph *rdg,
partition *partition1, partition *partition2)
void
loop_distribution::update_type_for_merge (struct graph *rdg,
partition *partition1,
partition *partition2)
{
unsigned i, j;
bitmap_iterator bi, bj;
......@@ -1297,11 +1432,8 @@ update_type_for_merge (struct graph *rdg,
}
}
/* Returns a partition with all the statements needed for computing
the vertex V of the RDG, also including the loop exit conditions. */
static partition *
build_rdg_partition_for_vertex (struct graph *rdg, int v)
partition *
loop_distribution::build_rdg_partition_for_vertex (struct graph *rdg, int v)
{
partition *partition = partition_alloc ();
auto_vec<int, 3> nodes;
......@@ -1595,9 +1727,11 @@ classify_builtin_st (loop_p loop, partition *partition, data_reference_p dr)
/* Given data references DST_DR and SRC_DR in loop nest LOOP and RDG, classify
if it forms builtin memcpy or memmove call. */
static void
classify_builtin_ldst (loop_p loop, struct graph *rdg, partition *partition,
data_reference_p dst_dr, data_reference_p src_dr)
void
loop_distribution::classify_builtin_ldst (loop_p loop, struct graph *rdg,
partition *partition,
data_reference_p dst_dr,
data_reference_p src_dr)
{
tree base, size, src_base, src_size;
auto_vec<tree> dst_steps, src_steps;
......@@ -1655,15 +1789,10 @@ classify_builtin_ldst (loop_p loop, struct graph *rdg, partition *partition,
return;
}
/* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
For the moment we detect memset, memcpy and memmove patterns. Bitmap
STMT_IN_ALL_PARTITIONS contains statements belonging to all partitions.
Returns true if there is a reduction in all partitions and we
possibly did not mark PARTITION as having one for this reason. */
static bool
classify_partition (loop_p loop, struct graph *rdg, partition *partition,
bitmap stmt_in_all_partitions)
bool
loop_distribution::classify_partition (loop_p loop,
struct graph *rdg, partition *partition,
bitmap stmt_in_all_partitions)
{
bitmap_iterator bi;
unsigned i;
......@@ -1721,11 +1850,8 @@ classify_partition (loop_p loop, struct graph *rdg, partition *partition,
return has_reduction;
}
/* Returns true when PARTITION1 and PARTITION2 access the same memory
object in RDG. */
static bool
share_memory_accesses (struct graph *rdg,
bool
loop_distribution::share_memory_accesses (struct graph *rdg,
partition *partition1, partition *partition2)
{
unsigned i, j;
......@@ -1772,10 +1898,10 @@ share_memory_accesses (struct graph *rdg,
partition for it by adding depended statements according to RDG.
All partitions are recorded in PARTITIONS. */
static void
rdg_build_partitions (struct graph *rdg,
vec<gimple *> starting_stmts,
vec<partition *> *partitions)
void
loop_distribution::rdg_build_partitions (struct graph *rdg,
vec<gimple *> starting_stmts,
vec<partition *> *partitions)
{
auto_bitmap processed;
int i;
......@@ -1891,14 +2017,8 @@ partition_contains_all_rw (struct graph *rdg,
return false;
}
/* Compute partition dependence created by the data references in DRS1
and DRS2, modify and return DIR according to that. IF ALIAS_DDR is
not NULL, we record dependence introduced by possible alias between
two data references in ALIAS_DDRS; otherwise, we simply ignore such
dependence as if it doesn't exist at all. */
static int
pg_add_dependence_edges (struct graph *rdg, int dir,
int
loop_distribution::pg_add_dependence_edges (struct graph *rdg, int dir,
bitmap drs1, bitmap drs2, vec<ddr_p> *alias_ddrs)
{
unsigned i, j;
......@@ -2114,10 +2234,10 @@ free_partition_graph_vdata (struct graph *pg)
is ignored, as if it doesn't exist at all; otherwise all depdendences
are considered. */
static struct graph *
build_partition_graph (struct graph *rdg,
vec<struct partition *> *partitions,
bool ignore_alias_p)
struct graph *
loop_distribution::build_partition_graph (struct graph *rdg,
vec<struct partition *> *partitions,
bool ignore_alias_p)
{
int i, j;
struct partition *partition1, *partition2;
......@@ -2199,15 +2319,10 @@ sort_partitions_by_post_order (struct graph *pg,
}
}
/* Given reduced dependence graph RDG merge strong connected components
of PARTITIONS. If IGNORE_ALIAS_P is true, data dependence caused by
possible alias between references is ignored, as if it doesn't exist
at all; otherwise all depdendences are considered. */
static void
merge_dep_scc_partitions (struct graph *rdg,
vec<struct partition *> *partitions,
bool ignore_alias_p)
void
loop_distribution::merge_dep_scc_partitions (struct graph *rdg,
vec<struct partition *> *partitions,
bool ignore_alias_p)
{
struct partition *partition1, *partition2;
struct pg_vdata *data;
......@@ -2280,11 +2395,10 @@ pg_collect_alias_ddrs (struct graph *g, struct graph_edge *e, void *data)
/* This is the main function breaking strong conected components in
PARTITIONS giving reduced depdendence graph RDG. Store data dependence
relations for runtime alias check in ALIAS_DDRS. */
static void
break_alias_scc_partitions (struct graph *rdg,
vec<struct partition *> *partitions,
vec<ddr_p> *alias_ddrs)
void
loop_distribution::break_alias_scc_partitions (struct graph *rdg,
vec<struct partition *> *partitions,
vec<ddr_p> *alias_ddrs)
{
int i, j, k, num_sccs, num_sccs_no_alias;
/* Build partition dependence graph. */
......@@ -2710,12 +2824,10 @@ fuse_memset_builtins (vec<struct partition *> *partitions)
}
}
/* Fuse PARTITIONS of LOOP if necessary before finalizing distribution.
ALIAS_DDRS contains ddrs which need runtime alias check. */
static void
finalize_partitions (class loop *loop, vec<struct partition *> *partitions,
vec<ddr_p> *alias_ddrs)
void
loop_distribution::finalize_partitions (class loop *loop,
vec<struct partition *> *partitions,
vec<ddr_p> *alias_ddrs)
{
unsigned i;
struct partition *partition, *a;
......@@ -2770,8 +2882,8 @@ finalize_partitions (class loop *loop, vec<struct partition *> *partitions,
distributed loops. Set NB_CALLS to number of generated builtin calls.
Set *DESTROY_P to whether LOOP needs to be destroyed. */
static int
distribute_loop (class loop *loop, vec<gimple *> stmts,
int
loop_distribution::distribute_loop (class loop *loop, vec<gimple *> stmts,
control_dependences *cd, int *nb_calls, bool *destroy_p,
bool only_patterns_p)
{
......@@ -3011,40 +3123,27 @@ distribute_loop (class loop *loop, vec<gimple *> stmts,
return nbp - *nb_calls;
}
/* Distribute all loops in the current function. */
namespace {
const pass_data pass_data_loop_distribution =
void loop_distribution::bb_top_order_init (void)
{
GIMPLE_PASS, /* type */
"ldist", /* name */
OPTGROUP_LOOP, /* optinfo_flags */
TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
( PROP_cfg | PROP_ssa ), /* properties_required */
0, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
0, /* todo_flags_finish */
};
int rpo_num;
int *rpo = XNEWVEC (int, last_basic_block_for_fn (cfun));
class pass_loop_distribution : public gimple_opt_pass
{
public:
pass_loop_distribution (gcc::context *ctxt)
: gimple_opt_pass (pass_data_loop_distribution, ctxt)
{}
bb_top_order_index = XNEWVEC (int, last_basic_block_for_fn (cfun));
bb_top_order_index_size = last_basic_block_for_fn (cfun);
rpo_num = pre_and_rev_post_order_compute_fn (cfun, NULL, rpo, true);
for (int i = 0; i < rpo_num; i++)
bb_top_order_index[rpo[i]] = i;
/* opt_pass methods: */
virtual bool gate (function *)
{
return flag_tree_loop_distribution
|| flag_tree_loop_distribute_patterns;
}
virtual unsigned int execute (function *);
free (rpo);
}
}; // class pass_loop_distribution
void loop_distribution::bb_top_order_destroy ()
{
free (bb_top_order_index);
bb_top_order_index = NULL;
bb_top_order_index_size = 0;
}
/* Given LOOP, this function records seed statements for distribution in
......@@ -3131,8 +3230,9 @@ prepare_perfect_loop_nest (class loop *loop)
return loop;
}
unsigned int
pass_loop_distribution::execute (function *fun)
loop_distribution::execute (function *fun)
{
class loop *loop;
bool changed = false;
......@@ -3143,22 +3243,7 @@ pass_loop_distribution::execute (function *fun)
if (number_of_loops (fun) <= 1)
return 0;
/* Compute topological order for basic blocks. Topological order is
needed because data dependence is computed for data references in
lexicographical order. */
if (bb_top_order_index == NULL)
{
int rpo_num;
int *rpo = XNEWVEC (int, last_basic_block_for_fn (cfun));
bb_top_order_index = XNEWVEC (int, last_basic_block_for_fn (cfun));
bb_top_order_index_size = last_basic_block_for_fn (cfun);
rpo_num = pre_and_rev_post_order_compute_fn (cfun, NULL, rpo, true);
for (int i = 0; i < rpo_num; i++)
bb_top_order_index[rpo[i]] = i;
free (rpo);
}
bb_top_order_init ();
FOR_ALL_BB_FN (bb, fun)
{
......@@ -3233,11 +3318,7 @@ pass_loop_distribution::execute (function *fun)
delete cd;
if (bb_top_order_index != NULL)
{
free (bb_top_order_index);
bb_top_order_index = NULL;
bb_top_order_index_size = 0;
}
bb_top_order_destroy ();
if (changed)
{
......@@ -3259,6 +3340,48 @@ pass_loop_distribution::execute (function *fun)
return changed ? TODO_cleanup_cfg : 0;
}
/* Distribute all loops in the current function. */
namespace {
const pass_data pass_data_loop_distribution =
{
GIMPLE_PASS, /* type */
"ldist", /* name */
OPTGROUP_LOOP, /* optinfo_flags */
TV_TREE_LOOP_DISTRIBUTION, /* tv_id */
( PROP_cfg | PROP_ssa ), /* properties_required */
0, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
0, /* todo_flags_finish */
};
class pass_loop_distribution : public gimple_opt_pass
{
public:
pass_loop_distribution (gcc::context *ctxt)
: gimple_opt_pass (pass_data_loop_distribution, ctxt)
{}
/* opt_pass methods: */
virtual bool gate (function *)
{
return flag_tree_loop_distribution
|| flag_tree_loop_distribute_patterns;
}
virtual unsigned int execute (function *);
}; // class pass_loop_distribution
unsigned int
pass_loop_distribution::execute (function *fun)
{
return loop_distribution ().execute (fun);
}
} // anon namespace
gimple_opt_pass *
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment