Commit 957f0d8f by Bin Cheng Committed by Bin Cheng

tree-loop-distribution.c (INCLUDE_ALGORITHM): New header file.

	* tree-loop-distribution.c (INCLUDE_ALGORITHM): New header file.
	(tree-ssa-loop-ivopts.h): New header file.
	(struct builtin_info): New fields.
	(classify_builtin_1): Compute and record base and offset parts for
	memset builtin partition by calling strip_offset.
	(offset_cmp, fuse_memset_builtins): New functions.
	(finalize_partitions): Fuse adjacent memset partitions by calling
	above function.
	* tree-ssa-loop-ivopts.c (strip_offset): Delete static declaration.
	Expose the interface.
	* tree-ssa-loop-ivopts.h (strip_offset): New declaration.

	* gcc.dg/tree-ssa/ldist-17.c: Adjust test string.
	* gcc.dg/tree-ssa/ldist-32.c: New test.
	* gcc.dg/tree-ssa/ldist-35.c: New test.
	* gcc.dg/tree-ssa/ldist-36.c: New test.

From-SVN: r253859
parent 85aa9ed6
2017-10-18 Bin Cheng <bin.cheng@arm.com> 2017-10-18 Bin Cheng <bin.cheng@arm.com>
* tree-loop-distribution.c (INCLUDE_ALGORITHM): New header file.
(tree-ssa-loop-ivopts.h): New header file.
(struct builtin_info): New fields.
(classify_builtin_1): Compute and record base and offset parts for
memset builtin partition by calling strip_offset.
(offset_cmp, fuse_memset_builtins): New functions.
(finalize_partitions): Fuse adjacent memset partitions by calling
above function.
* tree-ssa-loop-ivopts.c (strip_offset): Delete static declaration.
Expose the interface.
* tree-ssa-loop-ivopts.h (strip_offset): New declaration.
2017-10-18 Bin Cheng <bin.cheng@arm.com>
PR tree-optimization/82574 PR tree-optimization/82574
* tree-loop-distribution.c (find_single_drs): New parameter. Check * tree-loop-distribution.c (find_single_drs): New parameter. Check
that data reference must be executed exactly once per iteration that data reference must be executed exactly once per iteration
2017-10-18 Bin Cheng <bin.cheng@arm.com> 2017-10-18 Bin Cheng <bin.cheng@arm.com>
* gcc.dg/tree-ssa/ldist-17.c: Adjust test string.
* gcc.dg/tree-ssa/ldist-32.c: New test.
* gcc.dg/tree-ssa/ldist-35.c: New test.
* gcc.dg/tree-ssa/ldist-36.c: New test.
2017-10-18 Bin Cheng <bin.cheng@arm.com>
PR tree-optimization/82574 PR tree-optimization/82574
* gcc.dg/tree-ssa/pr82574.c: New test. * gcc.dg/tree-ssa/pr82574.c: New test.
......
...@@ -45,5 +45,5 @@ mad_synth_mute (struct mad_synth *synth) ...@@ -45,5 +45,5 @@ mad_synth_mute (struct mad_synth *synth)
return; return;
} }
/* { dg-final { scan-tree-dump "distributed: split to 0 loops and 4 library calls" "ldist" } } */ /* { dg-final { scan-tree-dump "Loop nest . distributed: split to 0 loops and 1 library calls" "ldist" } } */
/* { dg-final { scan-tree-dump-times "generated memset zero" 4 "ldist" } } */ /* { dg-final { scan-tree-dump-times "generated memset zero" 1 "ldist" } } */
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-loop-distribution -ftree-loop-distribute-patterns -fdump-tree-ldist-details" } */
#define M (256)
#define N (512)
struct st
{
int a[M][N];
int c[M];
int b[M][N];
};
void
foo (struct st *p)
{
for (unsigned i = 0; i < M; ++i)
{
p->c[i] = 0;
for (unsigned j = N; j > 0; --j)
{
p->a[i][j - 1] = 0;
p->b[i][j - 1] = 0;
}
}
}
/* { dg-final { scan-tree-dump-times "Loop nest . distributed: split to 0 loops and 1 library" 1 "ldist" } } */
/* { dg-final { scan-tree-dump-times "__builtin_memset \\(.*, 0, 1049600\\);" 1 "ldist" } } */
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-loop-distribution -ftree-loop-distribute-patterns -fdump-tree-ldist-details" } */
#define M (256)
#define N (512)
struct st
{
int a[M][N];
int c[M];
int b[M][N];
};
void
foo (struct st * restrict p, struct st * restrict q)
{
for (unsigned i = 0; i < M; ++i)
{
p->c[i] = 0;
for (unsigned j = N; j > 0; --j)
{
p->a[i][j - 1] = q->a[i][j - 1];
p->b[i][j - 1] = 0;
}
}
}
/* { dg-final { scan-tree-dump-times "Loop nest . distributed: split to 0 loops and 1 library" 1 "ldist" { xfail *-*-* } } } */
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-loop-distribution -ftree-loop-distribute-patterns -fdump-tree-ldist-details" } */
#define M (256)
#define N (512)
struct st
{
int a[M][N];
int c[M];
int b[M][N];
};
void
foo (struct st * restrict p)
{
for (unsigned i = 0; i < M; ++i)
{
p->c[i] = 0;
for (unsigned j = N; j > 0; --j)
{
p->b[i][j - 1] = p->a[i][j - 1];
p->a[i][j - 1] = 0;
}
}
}
/* { dg-final { scan-tree-dump-times "Loop nest . distributed: split to 0 loops and 3 library" 1 "ldist" } } */
...@@ -90,6 +90,7 @@ along with GCC; see the file COPYING3. If not see ...@@ -90,6 +90,7 @@ along with GCC; see the file COPYING3. If not see
data reuse. */ data reuse. */
#include "config.h" #include "config.h"
#define INCLUDE_ALGORITHM /* stable_sort */
#include "system.h" #include "system.h"
#include "coretypes.h" #include "coretypes.h"
#include "backend.h" #include "backend.h"
...@@ -106,6 +107,7 @@ along with GCC; see the file COPYING3. If not see ...@@ -106,6 +107,7 @@ along with GCC; see the file COPYING3. If not see
#include "stor-layout.h" #include "stor-layout.h"
#include "tree-cfg.h" #include "tree-cfg.h"
#include "tree-ssa-loop-manip.h" #include "tree-ssa-loop-manip.h"
#include "tree-ssa-loop-ivopts.h"
#include "tree-ssa-loop.h" #include "tree-ssa-loop.h"
#include "tree-into-ssa.h" #include "tree-into-ssa.h"
#include "tree-ssa.h" #include "tree-ssa.h"
...@@ -604,6 +606,10 @@ struct builtin_info ...@@ -604,6 +606,10 @@ struct builtin_info
tree dst_base; tree dst_base;
tree src_base; tree src_base;
tree size; tree size;
/* Base and offset part of dst_base after stripping constant offset. This
is only used in memset builtin distribution for now. */
tree dst_base_base;
unsigned HOST_WIDE_INT dst_base_offset;
}; };
/* Partition for loop distribution. */ /* Partition for loop distribution. */
...@@ -1504,7 +1510,11 @@ classify_builtin_st (loop_p loop, partition *partition, data_reference_p dr) ...@@ -1504,7 +1510,11 @@ classify_builtin_st (loop_p loop, partition *partition, data_reference_p dr)
if (!compute_access_range (loop, dr, &base, &size)) if (!compute_access_range (loop, dr, &base, &size))
return; return;
partition->builtin = alloc_builtin (dr, NULL, base, NULL_TREE, size); struct builtin_info *builtin;
builtin = alloc_builtin (dr, NULL, base, NULL_TREE, size);
builtin->dst_base_base = strip_offset (builtin->dst_base,
&builtin->dst_base_offset);
partition->builtin = builtin;
partition->kind = PKIND_MEMSET; partition->kind = PKIND_MEMSET;
} }
...@@ -2480,6 +2490,113 @@ version_for_distribution_p (vec<struct partition *> *partitions, ...@@ -2480,6 +2490,113 @@ version_for_distribution_p (vec<struct partition *> *partitions,
return (alias_ddrs->length () > 0); return (alias_ddrs->length () > 0);
} }
/* Compare base offset of builtin mem* partitions P1 and P2. */
static bool
offset_cmp (struct partition *p1, struct partition *p2)
{
gcc_assert (p1 != NULL && p1->builtin != NULL);
gcc_assert (p2 != NULL && p2->builtin != NULL);
return p1->builtin->dst_base_offset < p2->builtin->dst_base_offset;
}
/* Fuse adjacent memset builtin PARTITIONS if possible. This is a special
case optimization transforming below code:
__builtin_memset (&obj, 0, 100);
_1 = &obj + 100;
__builtin_memset (_1, 0, 200);
_2 = &obj + 300;
__builtin_memset (_2, 0, 100);
into:
__builtin_memset (&obj, 0, 400);
Note we don't have dependence information between different partitions
at this point, as a result, we can't handle nonadjacent memset builtin
partitions since dependence might be broken. */
static void
fuse_memset_builtins (vec<struct partition *> *partitions)
{
unsigned i, j;
struct partition *part1, *part2;
for (i = 0; partitions->iterate (i, &part1);)
{
if (part1->kind != PKIND_MEMSET)
{
i++;
continue;
}
/* Find sub-array of memset builtins of the same base. Index range
of the sub-array is [i, j) with "j > i". */
for (j = i + 1; partitions->iterate (j, &part2); ++j)
{
if (part2->kind != PKIND_MEMSET
|| !operand_equal_p (part1->builtin->dst_base_base,
part2->builtin->dst_base_base, 0))
break;
}
/* Stable sort is required in order to avoid breaking dependence. */
std::stable_sort (&(*partitions)[i],
&(*partitions)[i] + j - i, offset_cmp);
/* Continue with next partition. */
i = j;
}
/* Merge all consecutive memset builtin partitions. */
for (i = 0; i < partitions->length () - 1;)
{
part1 = (*partitions)[i];
if (part1->kind != PKIND_MEMSET)
{
i++;
continue;
}
part2 = (*partitions)[i + 1];
/* Only merge memset partitions of the same base and with constant
access sizes. */
if (part2->kind != PKIND_MEMSET
|| TREE_CODE (part1->builtin->size) != INTEGER_CST
|| TREE_CODE (part2->builtin->size) != INTEGER_CST
|| !operand_equal_p (part1->builtin->dst_base_base,
part2->builtin->dst_base_base, 0))
{
i++;
continue;
}
tree rhs1 = gimple_assign_rhs1 (DR_STMT (part1->builtin->dst_dr));
tree rhs2 = gimple_assign_rhs1 (DR_STMT (part2->builtin->dst_dr));
int bytev1 = const_with_all_bytes_same (rhs1);
int bytev2 = const_with_all_bytes_same (rhs2);
/* Only merge memset partitions of the same value. */
if (bytev1 != bytev2 || bytev1 == -1)
{
i++;
continue;
}
wide_int end1 = wi::add (part1->builtin->dst_base_offset,
wi::to_wide (part1->builtin->size));
/* Only merge adjacent memset partitions. */
if (wi::ne_p (end1, part2->builtin->dst_base_offset))
{
i++;
continue;
}
/* Merge partitions[i] and partitions[i+1]. */
part1->builtin->size = fold_build2 (PLUS_EXPR, sizetype,
part1->builtin->size,
part2->builtin->size);
partition_free (part2);
partitions->ordered_remove (i + 1);
}
}
/* Fuse PARTITIONS of LOOP if necessary before finalizing distribution. /* Fuse PARTITIONS of LOOP if necessary before finalizing distribution.
ALIAS_DDRS contains ddrs which need runtime alias check. */ ALIAS_DDRS contains ddrs which need runtime alias check. */
...@@ -2523,6 +2640,10 @@ finalize_partitions (struct loop *loop, vec<struct partition *> *partitions, ...@@ -2523,6 +2640,10 @@ finalize_partitions (struct loop *loop, vec<struct partition *> *partitions,
} }
partitions->truncate (1); partitions->truncate (1);
} }
/* Fuse memset builtins if possible. */
if (partitions->length () > 1)
fuse_memset_builtins (partitions);
} }
/* Distributes the code from LOOP in such a way that producer statements /* Distributes the code from LOOP in such a way that producer statements
......
...@@ -1550,9 +1550,6 @@ record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use) ...@@ -1550,9 +1550,6 @@ record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op)); bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
} }
static tree
strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
/* Record a group of TYPE. */ /* Record a group of TYPE. */
static struct iv_group * static struct iv_group *
...@@ -2863,7 +2860,7 @@ strip_offset_1 (tree expr, bool inside_addr, bool top_compref, ...@@ -2863,7 +2860,7 @@ strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
/* Strips constant offsets from EXPR and stores them to OFFSET. */ /* Strips constant offsets from EXPR and stores them to OFFSET. */
static tree tree
strip_offset (tree expr, unsigned HOST_WIDE_INT *offset) strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
{ {
HOST_WIDE_INT off; HOST_WIDE_INT off;
......
...@@ -28,6 +28,7 @@ extern void dump_cand (FILE *, struct iv_cand *); ...@@ -28,6 +28,7 @@ extern void dump_cand (FILE *, struct iv_cand *);
extern bool contains_abnormal_ssa_name_p (tree); extern bool contains_abnormal_ssa_name_p (tree);
extern struct loop *outermost_invariant_loop_for_expr (struct loop *, tree); extern struct loop *outermost_invariant_loop_for_expr (struct loop *, tree);
extern bool expr_invariant_in_loop_p (struct loop *, tree); extern bool expr_invariant_in_loop_p (struct loop *, tree);
extern tree strip_offset (tree, unsigned HOST_WIDE_INT *);
bool may_be_nonaddressable_p (tree expr); bool may_be_nonaddressable_p (tree expr);
void tree_ssa_iv_optimize (void); void tree_ssa_iv_optimize (void);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment