Commit 38988cbf by Richard Biener Committed by Richard Biener

re PR tree-optimization/83518 (Missing optimization: useless instructions should be dropped)

2019-07-08  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/83518
	* tree-ssa-sccvn.c: Include splay-tree.h.
	(struct pd_range, struct pd_data): New.
	(struct vn_walk_cb_data): Add data to track partial definitions.
	(vn_walk_cb_data::~vn_walk_cb_data): New.
	(vn_walk_cb_data::push_partial_def): New.
	(pd_tree_alloc, pd_tree_dealloc, pd_range_compare): New.
	(vn_reference_lookup_2): When partial defs are registered give up.
	(vn_reference_lookup_3): Track partial defs for memset and
	constructor zeroing and for defs from constants.

	* gcc.dg/tree-ssa/ssa-fre-73.c: New testcase.
	* gcc.dg/tree-ssa/ssa-fre-74.c: Likewise.
	* gcc.dg/tree-ssa/ssa-fre-75.c: Likewise.
	* gcc.dg/tree-ssa/ssa-fre-76.c: Likewise.
	* g++.dg/tree-ssa/pr83518.C: Likewise.

From-SVN: r273194
parent 8849d503
2019-07-08 Richard Biener <rguenther@suse.de>
PR tree-optimization/83518
* tree-ssa-sccvn.c: Include splay-tree.h.
(struct pd_range, struct pd_data): New.
(struct vn_walk_cb_data): Add data to track partial definitions.
(vn_walk_cb_data::~vn_walk_cb_data): New.
(vn_walk_cb_data::push_partial_def): New.
(pd_tree_alloc, pd_tree_dealloc, pd_range_compare): New.
(vn_reference_lookup_2): When partial defs are registered give up.
(vn_reference_lookup_3): Track partial defs for memset and
constructor zeroing and for defs from constants.
2019-07-08 Richard Sandiford <richard.sandiford@arm.com>
* doc/install.texi (bootstrap-Og): Document.
......
2019-07-08 Richard Biener <rguenther@suse.de>
PR tree-optimization/83518
* gcc.dg/tree-ssa/ssa-fre-73.c: New testcase.
* gcc.dg/tree-ssa/ssa-fre-74.c: Likewise.
* gcc.dg/tree-ssa/ssa-fre-75.c: Likewise.
* gcc.dg/tree-ssa/ssa-fre-76.c: Likewise.
* g++.dg/tree-ssa/pr83518.C: Likewise.
2019-07-08 Richard Sandiford <richard.sandiford@arm.com>
* gcc.dg/guality/guality.h: Include <sys/prctl.h> on Linux targets.
......
/* { dg-do compile } */
/* { dg-options "-O3 -fdump-tree-optimized" } */
unsigned test()
{
int arr[] = {5,4,3,2,1};
int sum = 0;
for(int i = 0;i < 5;++i)
{
for(int j = 0; j < 5; ++j)
{
int t = arr[i];
arr[i] = arr[j];
arr[j] = t;
}
}
for(int i = 0; i < 5; ++i)
{
sum += arr[i];
}
return sum;
}
/* { dg-final { scan-tree-dump "return 15;" "optimized" } } */
/* { dg-do compile } */
/* { dg-options "-O -fdump-tree-fre1" } */
typedef int v2si __attribute__((vector_size(__SIZEOF_INT__ * 2)));
int foo (int *a)
{
a[0] = 1;
a[1] = 2;
v2si x = *(v2si *)a;
*(v2si *)&a[2] = x;
return a[3];
}
/* { dg-final { scan-tree-dump "return 2;" "fre1" } } */
/* { dg-do compile } */
/* { dg-options "-O -fdump-tree-fre1" } */
typedef int v4si __attribute__((vector_size(__SIZEOF_INT__ * 4)));
int foo (int *a)
{
a[2] = 2;
a[0] = 0;
a[1] = 1;
a[3] = 4;
v4si x = *(v4si *)a;
*(v4si *)&a[4] = x;
return a[4] + a[7];
}
/* { dg-final { scan-tree-dump "return 4;" "fre1" } } */
/* { dg-do compile } */
/* { dg-require-effective-target int32plus } */
/* { dg-options "-O -fgimple -fdump-tree-fre1" } */
typedef int v4si __attribute__((vector_size(__SIZEOF_INT__ * 4)));
#if __SIZEOF_INT__ == 4
__GIMPLE (ssa) int foo (int *a)
{
v4si _2;
int _3;
int _4;
int _5;
int _6;
int _7;
int _8;
int _9;
__BB(2):
__MEM <unsigned char[3 * __SIZEOF_INT__]> ((char *)a_1(D) + 4) = _Literal (unsigned char[3 * __SIZEOF_INT__]) {};
__MEM <int> (a_1(D) + 8) = 2;
__MEM <int> (a_1(D)) = 1;
_2 = __MEM <v4si> (a_1(D));
_3 = __BIT_FIELD_REF <int> (_2, 32, 0);
_4 = __BIT_FIELD_REF <int> (_2, 32, 32);
_5 = __BIT_FIELD_REF <int> (_2, 32, 64);
_6 = __BIT_FIELD_REF <int> (_2, 32, 96);
_7 = _3 + _4;
_8 = _7 + _5;
_9 = _8 + _6;
return _9;
}
#endif
/* { dg-final { scan-tree-dump "return 3;" "fre1" } } */
/* { dg-do compile } */
/* { dg-options "-O -fdump-tree-fre1" } */
typedef int v4si __attribute__((vector_size(__SIZEOF_INT__ * 4)));
int foo (int *a)
{
__builtin_memset (a, 0, 2 * __SIZEOF_INT__);
a[2] = 2;
a[0] = 1;
a[3] = 3;
v4si x = *(v4si *)a;
*(v4si *)&a[4] = x;
return a[4] + a[5] + a[7];
}
/* { dg-final { scan-tree-dump "return 4;" "fre1" } } */
......@@ -21,6 +21,7 @@ along with GCC; see the file COPYING3. If not see
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "splay-tree.h"
#include "backend.h"
#include "rtl.h"
#include "tree.h"
......@@ -360,6 +361,8 @@ static void init_vn_nary_op_from_stmt (vn_nary_op_t, gimple *);
static void init_vn_nary_op_from_pieces (vn_nary_op_t, unsigned int,
enum tree_code, tree, tree *);
static tree vn_lookup_simplify_result (gimple_match_op *);
static vn_reference_t vn_reference_lookup_or_insert_for_pieces
(tree, alias_set_type, tree, vec<vn_reference_op_s, va_heap>, tree);
/* Return whether there is value numbering information for a given SSA name. */
......@@ -1646,20 +1649,245 @@ vn_reference_lookup_1 (vn_reference_t vr, vn_reference_t *vnresult)
return NULL_TREE;
}
/* Partial definition tracking support. */
struct pd_range
{
HOST_WIDE_INT offset;
HOST_WIDE_INT size;
};
struct pd_data
{
tree rhs;
HOST_WIDE_INT offset;
HOST_WIDE_INT size;
};
/* Context for alias walking. */
struct vn_walk_cb_data
{
vn_walk_cb_data (vn_reference_t vr_, tree *last_vuse_ptr_,
vn_lookup_kind vn_walk_kind_, bool tbaa_p_)
: vr (vr_), last_vuse_ptr (last_vuse_ptr_), vn_walk_kind (vn_walk_kind_),
tbaa_p (tbaa_p_)
tbaa_p (tbaa_p_), known_ranges (NULL)
{}
~vn_walk_cb_data ();
void *push_partial_def (const pd_data& pd, tree, HOST_WIDE_INT);
vn_reference_t vr;
tree *last_vuse_ptr;
vn_lookup_kind vn_walk_kind;
bool tbaa_p;
/* The VDEFs of partial defs we come along. */
auto_vec<pd_data, 2> partial_defs;
/* The first defs range to avoid splay tree setup in most cases. */
pd_range first_range;
tree first_vuse;
splay_tree known_ranges;
obstack ranges_obstack;
};
vn_walk_cb_data::~vn_walk_cb_data ()
{
if (known_ranges)
{
splay_tree_delete (known_ranges);
obstack_free (&ranges_obstack, NULL);
}
}
/* pd_range splay-tree helpers. */
static int
pd_range_compare (splay_tree_key offset1p, splay_tree_key offset2p)
{
HOST_WIDE_INT offset1 = *(HOST_WIDE_INT *)offset1p;
HOST_WIDE_INT offset2 = *(HOST_WIDE_INT *)offset2p;
if (offset1 < offset2)
return -1;
else if (offset1 > offset2)
return 1;
return 0;
}
static void *
pd_tree_alloc (int size, void *data_)
{
vn_walk_cb_data *data = (vn_walk_cb_data *)data_;
return obstack_alloc (&data->ranges_obstack, size);
}
static void
pd_tree_dealloc (void *, void *)
{
}
/* Push PD to the vector of partial definitions returning a
value when we are ready to combine things with VUSE and MAXSIZEI,
NULL when we want to continue looking for partial defs or -1
on failure. */
void *
vn_walk_cb_data::push_partial_def (const pd_data &pd, tree vuse,
HOST_WIDE_INT maxsizei)
{
if (partial_defs.is_empty ())
{
partial_defs.safe_push (pd);
first_range.offset = pd.offset;
first_range.size = pd.size;
first_vuse = vuse;
last_vuse_ptr = NULL;
}
else
{
if (!known_ranges)
{
/* ??? Optimize the case where the second partial def
completes things. */
gcc_obstack_init (&ranges_obstack);
known_ranges
= splay_tree_new_with_allocator (pd_range_compare, 0, 0,
pd_tree_alloc,
pd_tree_dealloc, this);
splay_tree_insert (known_ranges,
(splay_tree_key)&first_range.offset,
(splay_tree_value)&first_range);
}
if (known_ranges)
{
pd_range newr = { pd.offset, pd.size };
splay_tree_node n;
pd_range *r;
/* Lookup the predecessor of offset + 1 and see if
we need to merge with it. */
HOST_WIDE_INT loffset = newr.offset + 1;
if ((n = splay_tree_predecessor (known_ranges,
(splay_tree_key)&loffset))
&& ((r = (pd_range *)n->value), true)
&& ranges_known_overlap_p (r->offset, r->size + 1,
newr.offset, newr.size))
{
/* Ignore partial defs already covered. */
if (known_subrange_p (newr.offset, newr.size,
r->offset, r->size))
return NULL;
r->size = MAX (r->offset + r->size,
newr.offset + newr.size) - r->offset;
}
else
{
/* newr.offset wasn't covered yet, insert the
range. */
r = XOBNEW (&ranges_obstack, pd_range);
*r = newr;
splay_tree_insert (known_ranges,
(splay_tree_key)&r->offset,
(splay_tree_value)r);
}
/* Merge r which now contains newr and is a member
of the splay tree with adjacent overlapping ranges. */
pd_range *rafter;
while ((n = splay_tree_successor (known_ranges,
(splay_tree_key)&r->offset))
&& ((rafter = (pd_range *)n->value), true)
&& ranges_known_overlap_p (r->offset, r->size + 1,
rafter->offset, rafter->size))
{
r->size = MAX (r->offset + r->size,
rafter->offset + rafter->size) - r->offset;
splay_tree_remove (known_ranges,
(splay_tree_key)&rafter->offset);
}
partial_defs.safe_push (pd);
/* Now we have merged newr into the range tree.
When we have covered [offseti, sizei] then the
tree will contain exactly one node which has
the desired properties and it will be 'r'. */
if (known_subrange_p (0, maxsizei / BITS_PER_UNIT,
r->offset, r->size))
{
/* Now simply native encode all partial defs
in reverse order. */
unsigned ndefs = partial_defs.length ();
/* We support up to 512-bit values (for V8DFmode). */
unsigned char buffer[64];
int len;
while (!partial_defs.is_empty ())
{
pd_data pd = partial_defs.pop ();
if (TREE_CODE (pd.rhs) == CONSTRUCTOR)
/* Empty CONSTRUCTOR. */
memset (buffer + MAX (0, pd.offset),
0, MIN ((HOST_WIDE_INT)sizeof (buffer), pd.size));
else
{
len = native_encode_expr (pd.rhs,
buffer + MAX (0, pd.offset),
sizeof (buffer - MAX (0, pd.offset)),
MAX (0, -pd.offset));
if (len <= 0
|| len < (pd.size - MAX (0, -pd.offset)))
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "Failed to encode %u "
"partial definitions\n", ndefs);
return (void *)-1;
}
}
}
tree type = vr->type;
/* Make sure to interpret in a type that has a range
covering the whole access size. */
if (INTEGRAL_TYPE_P (vr->type)
&& maxsizei != TYPE_PRECISION (vr->type))
type = build_nonstandard_integer_type (maxsizei,
TYPE_UNSIGNED (type));
tree val = native_interpret_expr (type, buffer,
maxsizei / BITS_PER_UNIT);
/* If we chop off bits because the types precision doesn't
match the memory access size this is ok when optimizing
reads but not when called from the DSE code during
elimination. */
if (val
&& type != vr->type)
{
if (! int_fits_type_p (val, vr->type))
val = NULL_TREE;
else
val = fold_convert (vr->type, val);
}
if (val)
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "Successfully combined %u "
"partial definitions\n", ndefs);
return vn_reference_lookup_or_insert_for_pieces
(first_vuse,
vr->set, vr->type, vr->operands, val);
}
else
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "Failed to interpret %u "
"encoded partial definitions\n", ndefs);
return (void *)-1;
}
}
}
}
/* Continue looking for partial defs. */
return NULL;
}
/* Callback for walk_non_aliased_vuses. Adjusts the vn_reference_t VR_
with the current VUSE and performs the expression lookup. */
......@@ -1671,6 +1899,11 @@ vn_reference_lookup_2 (ao_ref *op ATTRIBUTE_UNUSED, tree vuse, void *data_)
vn_reference_s **slot;
hashval_t hash;
/* If we have partial definitions recorded we have to go through
vn_reference_lookup_3. */
if (!data->partial_defs.is_empty ())
return NULL;
if (data->last_vuse_ptr)
*data->last_vuse_ptr = vuse;
......@@ -2179,7 +2412,9 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_,
else
return (void *)-1;
tree len = gimple_call_arg (def_stmt, 2);
if (known_subrange_p (offset, maxsize, offset2,
HOST_WIDE_INT leni, offset2i, offseti;
if (data->partial_defs.is_empty ()
&& known_subrange_p (offset, maxsize, offset2,
wi::to_poly_offset (len) << LOG2_BITS_PER_UNIT))
{
tree val;
......@@ -2209,6 +2444,19 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_,
return vn_reference_lookup_or_insert_for_pieces
(vuse, vr->set, vr->type, vr->operands, val);
}
/* For now handle clearing memory with partial defs. */
else if (integer_zerop (gimple_call_arg (def_stmt, 1))
&& tree_to_poly_int64 (len).is_constant (&leni)
&& offset.is_constant (&offseti)
&& offset2.is_constant (&offset2i)
&& maxsize.is_constant (&maxsizei))
{
pd_data pd;
pd.rhs = build_constructor (NULL_TREE, NULL);
pd.offset = offset2i - offseti;
pd.size = leni;
return data->push_partial_def (pd, vuse, maxsizei);
}
}
/* 2) Assignment from an empty CONSTRUCTOR. */
......@@ -2219,19 +2467,38 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_,
{
tree base2;
poly_int64 offset2, size2, maxsize2;
HOST_WIDE_INT offset2i, size2i;
bool reverse;
base2 = get_ref_base_and_extent (gimple_assign_lhs (def_stmt),
&offset2, &size2, &maxsize2, &reverse);
if (known_size_p (maxsize2)
&& known_eq (maxsize2, size2)
&& adjust_offsets_for_equal_base_address (base, &offset,
base2, &offset2)
base2, &offset2))
{
if (data->partial_defs.is_empty ()
&& known_subrange_p (offset, maxsize, offset2, size2))
{
tree val = build_zero_cst (vr->type);
return vn_reference_lookup_or_insert_for_pieces
(vuse, vr->set, vr->type, vr->operands, val);
}
else if (maxsize.is_constant (&maxsizei)
&& maxsizei % BITS_PER_UNIT == 0
&& offset.is_constant (&offseti)
&& offseti % BITS_PER_UNIT == 0
&& offset2.is_constant (&offset2i)
&& offset2i % BITS_PER_UNIT == 0
&& size2.is_constant (&size2i)
&& size2i % BITS_PER_UNIT == 0)
{
pd_data pd;
pd.rhs = gimple_assign_rhs1 (def_stmt);
pd.offset = (offset2i - offseti) / BITS_PER_UNIT;
pd.size = size2i / BITS_PER_UNIT;
return data->push_partial_def (pd, vuse, maxsizei);
}
}
}
/* 3) Assignment from a constant. We can use folds native encode/interpret
......@@ -2253,7 +2520,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_,
{
tree base2;
poly_int64 offset2, size2, maxsize2;
HOST_WIDE_INT offset2i;
HOST_WIDE_INT offset2i, size2i;
bool reverse;
base2 = get_ref_base_and_extent (gimple_assign_lhs (def_stmt),
&offset2, &size2, &maxsize2, &reverse);
......@@ -2266,6 +2533,9 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_,
base2, &offset2)
&& offset.is_constant (&offseti)
&& offset2.is_constant (&offset2i)
&& size2.is_constant (&size2i))
{
if (data->partial_defs.is_empty ()
&& known_subrange_p (offseti, maxsizei, offset2, size2))
{
/* We support up to 512-bit values (for V8DFmode). */
......@@ -2307,6 +2577,18 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_,
(vuse, vr->set, vr->type, vr->operands, val);
}
}
else if (ranges_known_overlap_p (offseti, maxsizei, offset2i, size2i))
{
pd_data pd;
tree rhs = gimple_assign_rhs1 (def_stmt);
if (TREE_CODE (rhs) == SSA_NAME)
rhs = SSA_VAL (rhs);
pd.rhs = rhs;
pd.offset = (offset2i - offseti) / BITS_PER_UNIT;
pd.size = size2i / BITS_PER_UNIT;
return data->push_partial_def (pd, vuse, maxsizei);
}
}
}
/* 4) Assignment from an SSA name which definition we may be able
......@@ -2315,7 +2597,12 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_,
&& is_gimple_reg_type (vr->type)
&& !contains_storage_order_barrier_p (vr->operands)
&& gimple_assign_single_p (def_stmt)
&& TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME)
&& TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME
/* A subset of partial defs from non-constants can be handled
by for example inserting a CONSTRUCTOR, a COMPLEX_EXPR or
even a (series of) BIT_INSERT_EXPR hoping for simplifications
downstream, not so much for actually doing the insertion. */
&& data->partial_defs.is_empty ())
{
tree base2;
poly_int64 offset2, size2, maxsize2;
......@@ -2363,7 +2650,9 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_,
&& gimple_assign_single_p (def_stmt)
&& (DECL_P (gimple_assign_rhs1 (def_stmt))
|| TREE_CODE (gimple_assign_rhs1 (def_stmt)) == MEM_REF
|| handled_component_p (gimple_assign_rhs1 (def_stmt))))
|| handled_component_p (gimple_assign_rhs1 (def_stmt)))
/* Handling this is more complicated, give up for now. */
&& data->partial_defs.is_empty ())
{
tree base2;
int i, j, k;
......@@ -2497,7 +2786,9 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_,
|| TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME)
&& (TREE_CODE (gimple_call_arg (def_stmt, 1)) == ADDR_EXPR
|| TREE_CODE (gimple_call_arg (def_stmt, 1)) == SSA_NAME)
&& poly_int_tree_p (gimple_call_arg (def_stmt, 2), &copy_size))
&& poly_int_tree_p (gimple_call_arg (def_stmt, 2), &copy_size)
/* Handling this is more complicated, give up for now. */
&& data->partial_defs.is_empty ())
{
tree lhs, rhs;
ao_ref r;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment