Commit e7484357 by Richard Biener Committed by Richard Biener

tree-loop-distribution.c (classify_partition): Return whether a reduction…

tree-loop-distribution.c (classify_partition): Return whether a reduction appeared in all partitions and do not stop builtin...

2019-06-14  Richard Biener  <rguenther@suse.de>

	* tree-loop-distribution.c (classify_partition): Return
	whether a reduction appeared in all partitions and do not
	stop builtin detection because of this.
	(distribute_loop): Sort a non-builtin partition last if
	there's a reduction in all partitions and make sure the
	partition prevailing as last is not a builtin.

	* gcc.dg/tree-ssa/ldist-26.c: Adjust.

From-SVN: r272284
parent 46771da5
2019-06-14 Richard Biener <rguenther@suse.de>
* tree-loop-distribution.c (classify_partition): Return
whether a reduction appeared in all partitions and do not
stop builtin detection because of this.
(distribute_loop): Sort a non-builtin partition last if
there's a reduction in all partitions and make sure the
partition prevailing as last is not a builtin.
2019-06-14 Feng Xue <fxue@os.amperecomputing.com> 2019-06-14 Feng Xue <fxue@os.amperecomputing.com>
PR ipa/90401 PR ipa/90401
......
2019-06-14 Richard Biener <rguenther@suse.de>
* gcc.dg/tree-ssa/ldist-26.c: Adjust.
2019-06-14 Feng Xue <fxue@os.amperecomputing.com> 2019-06-14 Feng Xue <fxue@os.amperecomputing.com>
PR ipa/90401 PR ipa/90401
......
...@@ -31,6 +31,8 @@ int main() ...@@ -31,6 +31,8 @@ int main()
return 0; return 0;
} }
/* { dg-final { scan-tree-dump "distributed: split to 2 loops and 0 library calls" "ldist" } } */ /* Loop splitting splits the iteration space so we end up with two
/* { dg-final { scan-tree-dump "distributed: split to 1 loops and 1 library calls" "ldist" } } */ loops entering loop distribution. Both should have the b[i] = 0
/* { dg-final { scan-tree-dump "generated memset zero" "ldist" } } */ part split out as memset. */
/* { dg-final { scan-tree-dump-times "distributed: split to 1 loops and 1 library calls" 2 "ldist" } } */
/* { dg-final { scan-tree-dump-times "generated memset zero" 2 "ldist" } } */
...@@ -1658,9 +1658,11 @@ classify_builtin_ldst (loop_p loop, struct graph *rdg, partition *partition, ...@@ -1658,9 +1658,11 @@ classify_builtin_ldst (loop_p loop, struct graph *rdg, partition *partition,
/* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP. /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
For the moment we detect memset, memcpy and memmove patterns. Bitmap For the moment we detect memset, memcpy and memmove patterns. Bitmap
STMT_IN_ALL_PARTITIONS contains statements belonging to all partitions. */ STMT_IN_ALL_PARTITIONS contains statements belonging to all partitions.
Returns true if there is a reduction in all partitions and we
possibly did not mark PARTITION as having one for this reason. */
static void static bool
classify_partition (loop_p loop, struct graph *rdg, partition *partition, classify_partition (loop_p loop, struct graph *rdg, partition *partition,
bitmap stmt_in_all_partitions) bitmap stmt_in_all_partitions)
{ {
...@@ -1688,25 +1690,27 @@ classify_partition (loop_p loop, struct graph *rdg, partition *partition, ...@@ -1688,25 +1690,27 @@ classify_partition (loop_p loop, struct graph *rdg, partition *partition,
to all partitions. In such case, reduction will be computed to all partitions. In such case, reduction will be computed
correctly no matter how partitions are fused/distributed. */ correctly no matter how partitions are fused/distributed. */
if (!bitmap_bit_p (stmt_in_all_partitions, i)) if (!bitmap_bit_p (stmt_in_all_partitions, i))
{ partition->reduction_p = true;
partition->reduction_p = true; else
return; has_reduction = true;
}
has_reduction = true;
} }
} }
/* Simple workaround to prevent classifying the partition as builtin
if it contains any use outside of loop. For the case where all
partitions have the reduction this simple workaround is delayed
to only affect the last partition. */
if (partition->reduction_p)
return has_reduction;
/* Perform general partition disqualification for builtins. */ /* Perform general partition disqualification for builtins. */
if (volatiles_p if (volatiles_p
/* Simple workaround to prevent classifying the partition as builtin
if it contains any use outside of loop. */
|| has_reduction
|| !flag_tree_loop_distribute_patterns) || !flag_tree_loop_distribute_patterns)
return; return has_reduction;
/* Find single load/store data references for builtin partition. */ /* Find single load/store data references for builtin partition. */
if (!find_single_drs (loop, rdg, partition, &single_st, &single_ld)) if (!find_single_drs (loop, rdg, partition, &single_st, &single_ld))
return; return has_reduction;
partition->loc = gimple_location (DR_STMT (single_st)); partition->loc = gimple_location (DR_STMT (single_st));
...@@ -1715,6 +1719,7 @@ classify_partition (loop_p loop, struct graph *rdg, partition *partition, ...@@ -1715,6 +1719,7 @@ classify_partition (loop_p loop, struct graph *rdg, partition *partition,
classify_builtin_st (loop, partition, single_st); classify_builtin_st (loop, partition, single_st);
else else
classify_builtin_ldst (loop, rdg, partition, single_st, single_ld); classify_builtin_ldst (loop, rdg, partition, single_st, single_ld);
return has_reduction;
} }
/* Returns true when PARTITION1 and PARTITION2 access the same memory /* Returns true when PARTITION1 and PARTITION2 access the same memory
...@@ -2782,7 +2787,6 @@ distribute_loop (struct loop *loop, vec<gimple *> stmts, ...@@ -2782,7 +2787,6 @@ distribute_loop (struct loop *loop, vec<gimple *> stmts,
ddrs_table = new hash_table<ddr_hasher> (389); ddrs_table = new hash_table<ddr_hasher> (389);
struct graph *rdg; struct graph *rdg;
partition *partition; partition *partition;
bool any_builtin;
int i, nbp; int i, nbp;
*destroy_p = false; *destroy_p = false;
...@@ -2842,10 +2846,12 @@ distribute_loop (struct loop *loop, vec<gimple *> stmts, ...@@ -2842,10 +2846,12 @@ distribute_loop (struct loop *loop, vec<gimple *> stmts,
for (i = 1; partitions.iterate (i, &partition); ++i) for (i = 1; partitions.iterate (i, &partition); ++i)
bitmap_and_into (stmt_in_all_partitions, partitions[i]->stmts); bitmap_and_into (stmt_in_all_partitions, partitions[i]->stmts);
any_builtin = false; bool any_builtin = false;
bool reduction_in_all = false;
FOR_EACH_VEC_ELT (partitions, i, partition) FOR_EACH_VEC_ELT (partitions, i, partition)
{ {
classify_partition (loop, rdg, partition, stmt_in_all_partitions); reduction_in_all
|= classify_partition (loop, rdg, partition, stmt_in_all_partitions);
any_builtin |= partition_builtin_p (partition); any_builtin |= partition_builtin_p (partition);
} }
...@@ -2920,6 +2926,21 @@ distribute_loop (struct loop *loop, vec<gimple *> stmts, ...@@ -2920,6 +2926,21 @@ distribute_loop (struct loop *loop, vec<gimple *> stmts,
i--; i--;
} }
/* Put a non-builtin partition last if we need to preserve a reduction.
??? This is a workaround that makes sort_partitions_by_post_order do
the correct thing while in reality it should sort each component
separately and then put the component with a reduction or a non-builtin
last. */
if (reduction_in_all
&& partition_builtin_p (partitions.last()))
FOR_EACH_VEC_ELT (partitions, i, partition)
if (!partition_builtin_p (partition))
{
partitions.unordered_remove (i);
partitions.quick_push (partition);
break;
}
/* Build the partition dependency graph and fuse partitions in strong /* Build the partition dependency graph and fuse partitions in strong
connected component. */ connected component. */
if (partitions.length () > 1) if (partitions.length () > 1)
...@@ -2940,6 +2961,21 @@ distribute_loop (struct loop *loop, vec<gimple *> stmts, ...@@ -2940,6 +2961,21 @@ distribute_loop (struct loop *loop, vec<gimple *> stmts,
finalize_partitions (loop, &partitions, &alias_ddrs); finalize_partitions (loop, &partitions, &alias_ddrs);
/* If there is a reduction in all partitions make sure the last one
is not classified for builtin code generation. */
if (reduction_in_all)
{
partition = partitions.last ();
if (only_patterns_p
&& partition_builtin_p (partition)
&& !partition_builtin_p (partitions[0]))
{
nbp = 0;
goto ldist_done;
}
partition->kind = PKIND_NORMAL;
}
nbp = partitions.length (); nbp = partitions.length ();
if (nbp == 0 if (nbp == 0
|| (nbp == 1 && !partition_builtin_p (partitions[0])) || (nbp == 1 && !partition_builtin_p (partitions[0]))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment