Commit d0582dc1 by Richard Guenther Committed by Richard Biener

re PR tree-optimization/53081 (memcpy/memset loop recognition)

2012-06-06  Richard Guenther  <rguenther@suse.de>

	PR tree-optimization/53081
	* tree-data-ref.h (adjacent_store_dr_p): Rename to ...
	(adjacent_dr_p): ... this and make it work for reads, too.
	* tree-loop-distribution.c (enum partition_kind): Add PKIND_MEMCPY.
	(struct partition_s): Change main_stmt to main_dr, add
	secondary_dr member.
	(build_size_arg_loc): Change to date data-reference and not
	gimplify here.
	(build_addr_arg_loc): New function split out from ...
	(generate_memset_builtin): ... here.  Use it and simplify.
	(generate_memcpy_builtin): New function.
	(generate_code_for_partition): Adjust.
	(classify_partition): Streamline pattern detection.  Detect
	memcpy.
	(ldist_gen): Adjust.
	(tree_loop_distribution): Adjust seed statements for memcpy
	recognition.

	* gcc.dg/tree-ssa/ldist-20.c: New testcase.
	* gcc.dg/tree-ssa/loop-19.c: Add -fno-tree-loop-distribute-patterns.

From-SVN: r188261
parent 4ad4fa63
2012-06-06 Richard Guenther <rguenther@suse.de>
PR tree-optimization/53081
* tree-data-ref.h (adjacent_store_dr_p): Rename to ...
(adjacent_dr_p): ... this and make it work for reads, too.
* tree-loop-distribution.c (enum partition_kind): Add PKIND_MEMCPY.
(struct partition_s): Change main_stmt to main_dr, add
secondary_dr member.
(build_size_arg_loc): Change to date data-reference and not
gimplify here.
(build_addr_arg_loc): New function split out from ...
(generate_memset_builtin): ... here. Use it and simplify.
(generate_memcpy_builtin): New function.
(generate_code_for_partition): Adjust.
(classify_partition): Streamline pattern detection. Detect
memcpy.
(ldist_gen): Adjust.
(tree_loop_distribution): Adjust seed statements for memcpy
recognition.
2012-06-06 Matt Turner <mattst88@gmail.com> 2012-06-06 Matt Turner <mattst88@gmail.com>
* config/arm/mmintrin.h (_mm_empty): New. * config/arm/mmintrin.h (_mm_empty): New.
......
2012-06-06 Richard Guenther <rguenther@suse.de>
PR tree-optimization/53081
* gcc.dg/tree-ssa/ldist-20.c: New testcase.
* gcc.dg/tree-ssa/loop-19.c: Add -fno-tree-loop-distribute-patterns.
2012-06-05 Michael Meissner <meissner@linux.vnet.ibm.com> 2012-06-05 Michael Meissner <meissner@linux.vnet.ibm.com>
* gcc.target/powerpc/pr53487.c: New test. * gcc.target/powerpc/pr53487.c: New test.
......
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-loop-distribute-patterns -fdump-tree-ldist-details" } */
void foo(char *);
void my_memcpy (void *q, unsigned int n)
{
unsigned i;
char p[1024];
for (i = 0; i < n; ++i)
((char *)p)[i] = ((char *)q)[i];
foo(p);
}
struct S { int i; int j; };
void my_memcpy2 (void *q, unsigned int n)
{
unsigned i;
char p[1024];
for (i = 0; i < n; ++i)
((struct S *)p)[i] = ((struct S *)q)[i];
foo(p);
}
char p[1024];
void my_memmove (unsigned int n)
{
unsigned i;
for (i = 0; i < n; ++i)
p[i] = p[i+1];
foo(p);
}
/* { dg-final { scan-tree-dump-times "generated memcpy" 2 "ldist" } } */
/* { dg-final { scan-tree-dump-times "generated memmove" 1 "ldist" } } */
/* { dg-final { cleanup-tree-dump "ldist" } } */
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
/* { dg-do compile { target { i?86-*-* || { x86_64-*-* || powerpc_hard_double } } } } */ /* { dg-do compile { target { i?86-*-* || { x86_64-*-* || powerpc_hard_double } } } } */
/* { dg-require-effective-target nonpic } */ /* { dg-require-effective-target nonpic } */
/* { dg-options "-O3 -fno-prefetch-loop-arrays -fdump-tree-optimized" } */ /* { dg-options "-O3 -fno-tree-loop-distribute-patterns -fno-prefetch-loop-arrays -fdump-tree-optimized" } */
# define N 2000000 # define N 2000000
double a[N],c[N]; double a[N],c[N];
......
...@@ -615,11 +615,8 @@ bool rdg_defs_used_in_other_loops_p (struct graph *, int); ...@@ -615,11 +615,8 @@ bool rdg_defs_used_in_other_loops_p (struct graph *, int);
with a stride equal to its unit type size. */ with a stride equal to its unit type size. */
static inline bool static inline bool
adjacent_store_dr_p (struct data_reference *dr) adjacent_dr_p (struct data_reference *dr)
{ {
if (!DR_IS_WRITE (dr))
return false;
/* If this is a bitfield store bail out. */ /* If this is a bitfield store bail out. */
if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
&& DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1))) && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
......
...@@ -52,15 +52,16 @@ along with GCC; see the file COPYING3. If not see ...@@ -52,15 +52,16 @@ along with GCC; see the file COPYING3. If not see
#include "tree-scalar-evolution.h" #include "tree-scalar-evolution.h"
#include "tree-pass.h" #include "tree-pass.h"
enum partition_kind { PKIND_NORMAL, PKIND_MEMSET }; enum partition_kind { PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY };
typedef struct partition_s typedef struct partition_s
{ {
bitmap stmts; bitmap stmts;
bool has_writes; bool has_writes;
enum partition_kind kind; enum partition_kind kind;
/* Main statement a kind != PKIND_NORMAL partition is about. */ /* data-references a kind != PKIND_NORMAL partition is about. */
gimple main_stmt; data_reference_p main_dr;
data_reference_p secondary_dr;
} *partition_t; } *partition_t;
DEF_VEC_P (partition_t); DEF_VEC_P (partition_t);
...@@ -313,40 +314,53 @@ generate_loops_for_partition (struct loop *loop, partition_t partition, ...@@ -313,40 +314,53 @@ generate_loops_for_partition (struct loop *loop, partition_t partition,
free (bbs); free (bbs);
} }
/* Build the size argument for a memset call. */ /* Build the size argument for a memory operation call. */
static inline tree static tree
build_size_arg_loc (location_t loc, tree nb_iter, tree op, build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter)
gimple_seq *stmt_list)
{ {
gimple_seq stmts; tree size;
tree x = fold_build2_loc (loc, MULT_EXPR, size_type_node, size = fold_build2_loc (loc, MULT_EXPR, sizetype,
fold_convert_loc (loc, size_type_node, nb_iter), fold_convert_loc (loc, sizetype, nb_iter),
fold_convert_loc (loc, size_type_node, TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
TYPE_SIZE_UNIT (TREE_TYPE (op)))); return fold_convert_loc (loc, size_type_node, size);
x = force_gimple_operand (x, &stmts, true, NULL); }
gimple_seq_add_seq (stmt_list, stmts);
/* Build an address argument for a memory operation call. */
return x;
static tree
build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
{
tree addr_base;
addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
addr_base = fold_convert_loc (loc, sizetype, addr_base);
/* Test for a negative stride, iterating over every element. */
if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
{
addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
fold_convert_loc (loc, sizetype, nb_bytes));
addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
}
return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
} }
/* Generate a call to memset for PARTITION in LOOP. */ /* Generate a call to memset for PARTITION in LOOP. */
static void static void
generate_memset_builtin (struct loop *loop, struct graph *rdg, generate_memset_builtin (struct loop *loop, partition_t partition)
partition_t partition)
{ {
gimple_stmt_iterator gsi; gimple_stmt_iterator gsi;
gimple stmt, fn_call; gimple stmt, fn_call;
tree op0, nb_iter, mem, fn, addr_base, nb_bytes; tree nb_iter, mem, fn, nb_bytes;
gimple_seq stmt_list = NULL, stmts;
struct data_reference *dr = XCNEW (struct data_reference);
location_t loc; location_t loc;
tree val; tree val;
stmt = partition->main_stmt; stmt = DR_STMT (partition->main_dr);
loc = gimple_location (stmt); loc = gimple_location (stmt);
op0 = gimple_assign_lhs (stmt);
if (gimple_bb (stmt) == loop->latch) if (gimple_bb (stmt) == loop->latch)
nb_iter = number_of_latch_executions (loop); nb_iter = number_of_latch_executions (loop);
else else
...@@ -355,25 +369,12 @@ generate_memset_builtin (struct loop *loop, struct graph *rdg, ...@@ -355,25 +369,12 @@ generate_memset_builtin (struct loop *loop, struct graph *rdg,
/* The new statements will be placed before LOOP. */ /* The new statements will be placed before LOOP. */
gsi = gsi_last_bb (loop_preheader_edge (loop)->src); gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
dr = VEC_index (data_reference_p, nb_bytes = build_size_arg_loc (loc, partition->main_dr, nb_iter);
RDG_DATAREFS (rdg, rdg_vertex_for_stmt (rdg, stmt)), 0); nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list); false, GSI_CONTINUE_LINKING);
addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr)); mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
addr_base = fold_convert_loc (loc, sizetype, addr_base); mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
false, GSI_CONTINUE_LINKING);
/* Test for a negative stride, iterating over every element. */
if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
{
addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
fold_convert_loc (loc, sizetype, nb_bytes));
addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
TYPE_SIZE_UNIT (TREE_TYPE (op0)));
}
addr_base = fold_build_pointer_plus_loc (loc,
DR_BASE_ADDRESS (dr), addr_base);
mem = force_gimple_operand (addr_base, &stmts, true, NULL);
gimple_seq_add_seq (&stmt_list, stmts);
/* This exactly matches the pattern recognition in classify_partition. */ /* This exactly matches the pattern recognition in classify_partition. */
val = gimple_assign_rhs1 (stmt); val = gimple_assign_rhs1 (stmt);
...@@ -393,15 +394,14 @@ generate_memset_builtin (struct loop *loop, struct graph *rdg, ...@@ -393,15 +394,14 @@ generate_memset_builtin (struct loop *loop, struct graph *rdg,
tree tem = create_tmp_reg (integer_type_node, NULL); tree tem = create_tmp_reg (integer_type_node, NULL);
tem = make_ssa_name (tem, NULL); tem = make_ssa_name (tem, NULL);
cstmt = gimple_build_assign_with_ops (NOP_EXPR, tem, val, NULL_TREE); cstmt = gimple_build_assign_with_ops (NOP_EXPR, tem, val, NULL_TREE);
gimple_seq_add_stmt (&stmt_list, cstmt); gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING);
val = tem; val = tem;
} }
} }
fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET)); fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes); fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
gimple_seq_add_stmt (&stmt_list, fn_call); gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
gsi_insert_seq_after (&gsi, stmt_list, GSI_CONTINUE_LINKING);
if (dump_file && (dump_flags & TDF_DETAILS)) if (dump_file && (dump_flags & TDF_DETAILS))
{ {
...@@ -415,6 +415,54 @@ generate_memset_builtin (struct loop *loop, struct graph *rdg, ...@@ -415,6 +415,54 @@ generate_memset_builtin (struct loop *loop, struct graph *rdg,
} }
} }
/* Generate a call to memcpy for PARTITION in LOOP. */
static void
generate_memcpy_builtin (struct loop *loop, partition_t partition)
{
gimple_stmt_iterator gsi;
gimple stmt, fn_call;
tree nb_iter, dest, src, fn, nb_bytes;
location_t loc;
enum built_in_function kind;
stmt = DR_STMT (partition->main_dr);
loc = gimple_location (stmt);
if (gimple_bb (stmt) == loop->latch)
nb_iter = number_of_latch_executions (loop);
else
nb_iter = number_of_exit_cond_executions (loop);
/* The new statements will be placed before LOOP. */
gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
nb_bytes = build_size_arg_loc (loc, partition->main_dr, nb_iter);
nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
false, GSI_CONTINUE_LINKING);
dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
if (ptr_derefs_may_alias_p (dest, src))
kind = BUILT_IN_MEMMOVE;
else
kind = BUILT_IN_MEMCPY;
dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE,
false, GSI_CONTINUE_LINKING);
src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
false, GSI_CONTINUE_LINKING);
fn = build_fold_addr_expr (builtin_decl_implicit (kind));
fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
if (dump_file && (dump_flags & TDF_DETAILS))
{
if (kind == BUILT_IN_MEMCPY)
fprintf (dump_file, "generated memcpy\n");
else
fprintf (dump_file, "generated memmove\n");
}
}
/* Remove and destroy the loop LOOP. */ /* Remove and destroy the loop LOOP. */
static void static void
...@@ -466,13 +514,21 @@ destroy_loop (struct loop *loop) ...@@ -466,13 +514,21 @@ destroy_loop (struct loop *loop)
/* Generates code for PARTITION. */ /* Generates code for PARTITION. */
static void static void
generate_code_for_partition (struct loop *loop, struct graph *rdg, generate_code_for_partition (struct loop *loop,
partition_t partition, bool copy_p) partition_t partition, bool copy_p)
{ {
switch (partition->kind) switch (partition->kind)
{ {
case PKIND_MEMSET: case PKIND_MEMSET:
generate_memset_builtin (loop, rdg, partition); generate_memset_builtin (loop, partition);
/* If this is the last partition for which we generate code, we have
to destroy the loop. */
if (!copy_p)
destroy_loop (loop);
break;
case PKIND_MEMCPY:
generate_memcpy_builtin (loop, partition);
/* If this is the last partition for which we generate code, we have /* If this is the last partition for which we generate code, we have
to destroy the loop. */ to destroy the loop. */
if (!copy_p) if (!copy_p)
...@@ -849,9 +905,11 @@ classify_partition (loop_p loop, struct graph *rdg, partition_t partition) ...@@ -849,9 +905,11 @@ classify_partition (loop_p loop, struct graph *rdg, partition_t partition)
bitmap_iterator bi; bitmap_iterator bi;
unsigned i; unsigned i;
tree nb_iter; tree nb_iter;
data_reference_p single_load, single_store;
partition->kind = PKIND_NORMAL; partition->kind = PKIND_NORMAL;
partition->main_stmt = NULL; partition->main_dr = NULL;
partition->secondary_dr = NULL;
if (!flag_tree_loop_distribute_patterns) if (!flag_tree_loop_distribute_patterns)
return; return;
...@@ -880,10 +938,14 @@ classify_partition (loop_p loop, struct graph *rdg, partition_t partition) ...@@ -880,10 +938,14 @@ classify_partition (loop_p loop, struct graph *rdg, partition_t partition)
} }
} }
/* Detect memset. */ /* Detect memset and memcpy. */
single_load = NULL;
single_store = NULL;
EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi) EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
{ {
gimple stmt = RDG_STMT (rdg, i); gimple stmt = RDG_STMT (rdg, i);
data_reference_p dr;
unsigned j;
if (gimple_code (stmt) == GIMPLE_PHI) if (gimple_code (stmt) == GIMPLE_PHI)
continue; continue;
...@@ -892,41 +954,68 @@ classify_partition (loop_p loop, struct graph *rdg, partition_t partition) ...@@ -892,41 +954,68 @@ classify_partition (loop_p loop, struct graph *rdg, partition_t partition)
if (!gimple_vuse (stmt)) if (!gimple_vuse (stmt))
continue; continue;
/* Exactly one store. */ /* Otherwise just regular loads/stores. */
if (gimple_assign_single_p (stmt) if (!gimple_assign_single_p (stmt))
&& !is_gimple_reg (gimple_assign_lhs (stmt))) return;
/* But exactly one store and/or load. */
for (j = 0;
VEC_iterate (data_reference_p, RDG_DATAREFS (rdg, i), j, dr); ++j)
{ {
tree rhs; if (DR_IS_READ (dr))
if (partition->main_stmt != NULL) {
return; if (single_load != NULL)
partition->main_stmt = stmt; return;
rhs = gimple_assign_rhs1 (stmt); single_load = dr;
if (!(integer_zerop (rhs) }
|| integer_all_onesp (rhs) else
|| real_zerop (rhs) {
|| (TREE_CODE (rhs) == CONSTRUCTOR if (single_store != NULL)
&& !TREE_CLOBBER_P (rhs)) return;
|| (INTEGRAL_TYPE_P (TREE_TYPE (rhs)) single_store = dr;
&& (TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt))) }
== TYPE_MODE (unsigned_char_type_node)))))
return;
if (TREE_CODE (rhs) == SSA_NAME
&& !SSA_NAME_IS_DEFAULT_DEF (rhs)
&& flow_bb_inside_loop_p
(loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
return;
if (VEC_length (data_reference_p, RDG_DATAREFS (rdg, i)) != 1)
return;
if (!adjacent_store_dr_p (VEC_index (data_reference_p,
RDG_DATAREFS (rdg, i), 0)))
return;
} }
else
return;
} }
if (partition->main_stmt != NULL) if (single_store && !single_load)
partition->kind = PKIND_MEMSET; {
gimple stmt = DR_STMT (single_store);
tree rhs = gimple_assign_rhs1 (stmt);
if (!(integer_zerop (rhs)
|| integer_all_onesp (rhs)
|| real_zerop (rhs)
|| (TREE_CODE (rhs) == CONSTRUCTOR
&& !TREE_CLOBBER_P (rhs))
|| (INTEGRAL_TYPE_P (TREE_TYPE (rhs))
&& (TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt)))
== TYPE_MODE (unsigned_char_type_node)))))
return;
if (TREE_CODE (rhs) == SSA_NAME
&& !SSA_NAME_IS_DEFAULT_DEF (rhs)
&& flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
return;
if (!adjacent_dr_p (single_store))
return;
partition->kind = PKIND_MEMSET;
partition->main_dr = single_store;
}
else if (single_store && single_load)
{
gimple store = DR_STMT (single_store);
gimple load = DR_STMT (single_load);
/* Direct aggregate copy or via an SSA name temporary. */
if (load != store
&& gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
return;
if (!adjacent_dr_p (single_store)
|| !adjacent_dr_p (single_load)
|| !operand_equal_p (DR_STEP (single_store),
DR_STEP (single_load), 0))
return;
partition->kind = PKIND_MEMCPY;
partition->main_dr = single_store;
partition->secondary_dr = single_load;
}
} }
/* For a data reference REF, return the declaration of its base /* For a data reference REF, return the declaration of its base
...@@ -1259,7 +1348,7 @@ ldist_gen (struct loop *loop, struct graph *rdg, ...@@ -1259,7 +1348,7 @@ ldist_gen (struct loop *loop, struct graph *rdg,
dump_rdg_partitions (dump_file, partitions); dump_rdg_partitions (dump_file, partitions);
FOR_EACH_VEC_ELT (partition_t, partitions, i, partition) FOR_EACH_VEC_ELT (partition_t, partitions, i, partition)
generate_code_for_partition (loop, rdg, partition, i < nbp - 1); generate_code_for_partition (loop, partition, i < nbp - 1);
ldist_done: ldist_done:
...@@ -1392,22 +1481,6 @@ tree_loop_distribution (void) ...@@ -1392,22 +1481,6 @@ tree_loop_distribution (void)
|| is_gimple_reg (gimple_assign_lhs (stmt))) || is_gimple_reg (gimple_assign_lhs (stmt)))
continue; continue;
/* If we are only performing pattern detection restrict
what we try to distribute to stores from constants. */
if (!flag_tree_loop_distribution)
{
tree rhs = gimple_assign_rhs1 (stmt);
if (!is_gimple_min_invariant (rhs)
&& TREE_CODE (rhs) != CONSTRUCTOR
&& TREE_CODE (rhs) != SSA_NAME)
continue;
if (TREE_CODE (rhs) == SSA_NAME
&& !SSA_NAME_IS_DEFAULT_DEF (rhs)
&& flow_bb_inside_loop_p
(loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
continue;
}
VEC_safe_push (gimple, heap, work_list, stmt); VEC_safe_push (gimple, heap, work_list, stmt);
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment