Commit 4a52eb19 by Bin Cheng Committed by Bin Cheng

tree-loop-distribution.c (classify_partition): New parameter and better handle reduction statement.

	* tree-loop-distribution.c (classify_partition): New parameter and
	better handle reduction statement.
	(rdg_build_partitions): Revise comment.
	(distribute_loop): Compute statements in all partitions and pass it
	to classify_partition.

	gcc/testsuite
	* gcc.dg/tree-ssa/ldist-26.c: New test.

From-SVN: r249993
parent f1eb4621
2017-07-05 Bin Cheng <bin.cheng@arm.com>
* tree-loop-distribution.c (classify_partition): New parameter and
better handle reduction statement.
(rdg_build_partitions): Revise comment.
(distribute_loop): Compute statements in all partitions and pass it
to classify_partition.
2017-07-05 Bin Cheng <bin.cheng@arm.com>
* tree-loop-distribution.c (enum partition_type): New.
(struct partition): New field type.
(partition_merge_into): Add parameter. Update partition type.
......
2017-07-05 Bin Cheng <bin.cheng@arm.com>
* gcc.dg/tree-ssa/ldist-26.c: New test.
2017-07-05 Bin Cheng <bin.cheng@arm.com>
* gcc.dg/tree-ssa/ldist-6.c: XFAIL.
2017-07-04 Uros Bizjak <ubizjak@gmail.com>
......
/* { dg-do run } */
/* { dg-options "-O3 -ftree-loop-distribution -fdump-tree-ldist-details" } */
extern void abort (void);
int a[130], b[128], c[128];
int __attribute__((noinline,noclone))
foo (int len, int x)
{
int i;
for (i = 1; i <= len; ++i)
{
a[i] = a[i + 2] + 1;
b[i] = 0;
a[i + 1] = a[i] - 3;
if (i < x)
c[i] = a[i];
}
return i;
}
int main()
{
int i;
for (i = 0; i < 130; ++i)
a[i] = i;
foo (127, 67);
if (a[0] != 0 || a[1] != 4 || a[127] != 130)
abort ();
return 0;
}
/* { dg-final { scan-tree-dump "distributed: split to 2 loops and 0 library calls" "ldist" } } */
/* { dg-final { scan-tree-dump "distributed: split to 1 loops and 1 library calls" "ldist" } } */
/* { dg-final { scan-tree-dump "generated memset zero" "ldist" } } */
......@@ -1254,17 +1254,18 @@ build_rdg_partition_for_vertex (struct graph *rdg, int v)
}
/* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
For the moment we detect only the memset zero pattern. */
For the moment we detect memset, memcpy and memmove patterns. Bitmap
STMT_IN_ALL_PARTITIONS contains statements belonging to all partitions. */
static void
classify_partition (loop_p loop, struct graph *rdg, partition *partition)
classify_partition (loop_p loop, struct graph *rdg, partition *partition,
bitmap stmt_in_all_partitions)
{
bitmap_iterator bi;
unsigned i;
tree nb_iter;
data_reference_p single_load, single_store;
bool volatiles_p = false;
bool plus_one = false;
bool volatiles_p = false, plus_one = false, has_reduction = false;
partition->kind = PKIND_NORMAL;
partition->main_dr = NULL;
......@@ -1279,16 +1280,31 @@ classify_partition (loop_p loop, struct graph *rdg, partition *partition)
if (gimple_has_volatile_ops (stmt))
volatiles_p = true;
/* If the stmt has uses outside of the loop mark it as reduction. */
/* If the stmt is not included by all partitions and there is uses
outside of the loop, then mark the partition as reduction. */
if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
{
partition->reduction_p = true;
return;
/* Due to limitation in the transform phase we have to fuse all
reduction partitions. As a result, this could cancel valid
loop distribution especially for loop that induction variable
is used outside of loop. To workaround this issue, we skip
marking partition as reudction if the reduction stmt belongs
to all partitions. In such case, reduction will be computed
correctly no matter how partitions are fused/distributed. */
if (!bitmap_bit_p (stmt_in_all_partitions, i))
{
partition->reduction_p = true;
return;
}
has_reduction = true;
}
}
/* Perform general partition disqualification for builtins. */
if (volatiles_p
/* Simple workaround to prevent classifying the partition as builtin
if it contains any use outside of loop. */
|| has_reduction
|| !flag_tree_loop_distribute_patterns)
return;
......@@ -1461,9 +1477,9 @@ share_memory_accesses (struct graph *rdg,
return false;
}
/* Aggregate several components into a useful partition that is
registered in the PARTITIONS vector. Partitions will be
distributed in different loops. */
/* For each seed statement in STARTING_STMTS, this function builds
partition for it by adding depended statements according to RDG.
All partitions are recorded in PARTITIONS. */
static void
rdg_build_partitions (struct graph *rdg,
......@@ -1731,10 +1747,15 @@ distribute_loop (struct loop *loop, vec<gimple *> stmts,
auto_vec<struct partition *, 3> partitions;
rdg_build_partitions (rdg, stmts, &partitions);
auto_bitmap stmt_in_all_partitions;
bitmap_copy (stmt_in_all_partitions, partitions[0]->stmts);
for (i = 1; partitions.iterate (i, &partition); ++i)
bitmap_and_into (stmt_in_all_partitions, partitions[i]->stmts);
any_builtin = false;
FOR_EACH_VEC_ELT (partitions, i, partition)
{
classify_partition (loop, rdg, partition);
classify_partition (loop, rdg, partition, stmt_in_all_partitions);
any_builtin |= partition_builtin_p (partition);
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment