Commit 3d45dd59 by Richard Guenther Committed by Richard Biener

re PR tree-optimization/34043 (Missed optimization causing extra loads and…

re PR tree-optimization/34043 (Missed optimization causing extra loads and stores when using x86_64 builtin function together with aggregate types.)

2008-03-14  Richard Guenther  <rguenther@suse.de>

	PR tree-optimization/34043
	PR tree-optimization/33989
	* tree-ssa-pre.c (execute_pre): Allow SCCVN to do insertion
	when doing FRE.
	(bitmap_find_leader): Use extra argument to verify dominance
	relationship inside a basic-block.
	(can_PRE_operation): Add VIEW_CONVERT_EXPR.
	(find_leader_in_sets): Adjust.
	(create_component_ref_by_pieces): Take extra argument for
	dominance check, handle lookup failures.
	(find_or_generate_expression): Likewise.
	(create_expression_by_pieces): Likewise.
	(insert_into_preds_of_block): Adjust.
	(create_value_expr_from): If asked for, verify all operands
	are in the blocks AVAIL_OUT set.
	(make_values_for_stmt): Check for SSA_NAMEs that are life
	over an abnormal edge.
	(compute_avail): Remove such check.
	(do_SCCVN_insertion): New function.
	(eliminate): If we do not find a leader suitable for replacement
	insert a replacement expression from SCCVN if available.
	* tree-ssa-sccvn.h (run_scc_vn): Update prototype.
	(struct vn_ssa_aux): Add needs_insertion flag.
	* tree-ssa-sccvn.c (may_insert): New global flag.
	(copy_reference_ops_from_ref): Value-number union member access
	based on its size, not type and member if insertion is allowed.
	(visit_reference_op_load): For a weak match from union type
	punning lookup a view-converted value and insert a SSA_NAME
	for that value if that is not found.
	(visit_use): Make dumps shorter.  Do not disallow value numbering
	SSA_NAMEs that are life over an abnormal edge to constants.
	(free_scc_vn): Release inserted SSA_NAMEs.
	(run_scc_vn): New flag to specify whether insertion is allowed.
	Process SSA_NAMEs in forward order.
	* tree-ssa-loop-im.c (for_each_index): Handle invariant
	ADDR_EXPRs inside VIEW_CONVERT_EXPR.
	* fold-const.c (fold_unary): Fold VIEW_CONVERT_EXPRs from/to
	pointer type to/from integral types that do not change the
	precision to regular conversions.

	* gcc.dg/tree-ssa/ssa-fre-7.c: New testcase.
	* gcc.dg/tree-ssa/ssa-fre-8.c: Likewise.
	* gcc.dg/tree-ssa/ssa-fre-9.c: Likewise.
	* gcc.dg/tree-ssa/ssa-fre-10.c: Likewise.
	* gcc.dg/tree-ssa/ssa-pre-17.c: Likewise.

From-SVN: r133218
parent c539732f
2008-03-14 Richard Guenther <rguenther@suse.de>
PR tree-optimization/34043
PR tree-optimization/33989
* tree-ssa-pre.c (execute_pre): Allow SCCVN to do insertion
when doing FRE.
(bitmap_find_leader): Use extra argument to verify dominance
relationship inside a basic-block.
(can_PRE_operation): Add VIEW_CONVERT_EXPR.
(find_leader_in_sets): Adjust.
(create_component_ref_by_pieces): Take extra argument for
dominance check, handle lookup failures.
(find_or_generate_expression): Likewise.
(create_expression_by_pieces): Likewise.
(insert_into_preds_of_block): Adjust.
(create_value_expr_from): If asked for, verify all operands
are in the blocks AVAIL_OUT set.
(make_values_for_stmt): Check for SSA_NAMEs that are life
over an abnormal edge.
(compute_avail): Remove such check.
(do_SCCVN_insertion): New function.
(eliminate): If we do not find a leader suitable for replacement
insert a replacement expression from SCCVN if available.
* tree-ssa-sccvn.h (run_scc_vn): Update prototype.
(struct vn_ssa_aux): Add needs_insertion flag.
* tree-ssa-sccvn.c (may_insert): New global flag.
(copy_reference_ops_from_ref): Value-number union member access
based on its size, not type and member if insertion is allowed.
(visit_reference_op_load): For a weak match from union type
punning lookup a view-converted value and insert a SSA_NAME
for that value if that is not found.
(visit_use): Make dumps shorter. Do not disallow value numbering
SSA_NAMEs that are life over an abnormal edge to constants.
(free_scc_vn): Release inserted SSA_NAMEs.
(run_scc_vn): New flag to specify whether insertion is allowed.
Process SSA_NAMEs in forward order.
* tree-ssa-loop-im.c (for_each_index): Handle invariant
ADDR_EXPRs inside VIEW_CONVERT_EXPR.
* fold-const.c (fold_unary): Fold VIEW_CONVERT_EXPRs from/to
pointer type to/from integral types that do not change the
precision to regular conversions.
2008-03-13 Uros Bizjak <ubizjak@gmail.com>
* doc/extend.texi (X86 Built-in Functions) [__builtin_ia32_psll?,
......
......@@ -7961,19 +7961,24 @@ fold_unary (enum tree_code code, tree type, tree op0)
/* For integral conversions with the same precision or pointer
conversions use a NOP_EXPR instead. */
if ((INTEGRAL_TYPE_P (type) && INTEGRAL_TYPE_P (TREE_TYPE (op0))
&& TYPE_PRECISION (type) == TYPE_PRECISION (TREE_TYPE (op0))
/* Do not muck with VIEW_CONVERT_EXPRs that convert from
a sub-type to its base type as generated by the Ada FE. */
&& !TREE_TYPE (TREE_TYPE (op0)))
|| (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE (op0))))
if ((INTEGRAL_TYPE_P (type)
|| POINTER_TYPE_P (type))
&& (INTEGRAL_TYPE_P (TREE_TYPE (op0))
|| POINTER_TYPE_P (TREE_TYPE (op0)))
&& TYPE_PRECISION (type) == TYPE_PRECISION (TREE_TYPE (op0))
/* Do not muck with VIEW_CONVERT_EXPRs that convert from
a sub-type to its base type as generated by the Ada FE. */
&& !(INTEGRAL_TYPE_P (TREE_TYPE (op0))
&& TREE_TYPE (TREE_TYPE (op0))))
return fold_convert (type, op0);
/* Strip inner integral conversions that do not change the precision. */
if ((TREE_CODE (op0) == NOP_EXPR
|| TREE_CODE (op0) == CONVERT_EXPR)
&& INTEGRAL_TYPE_P (TREE_TYPE (op0))
&& INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (op0, 0)))
&& (INTEGRAL_TYPE_P (TREE_TYPE (op0))
|| POINTER_TYPE_P (TREE_TYPE (op0)))
&& (INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (op0, 0)))
|| POINTER_TYPE_P (TREE_TYPE (TREE_OPERAND (op0, 0))))
&& (TYPE_PRECISION (TREE_TYPE (op0))
== TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (op0, 0)))))
return fold_build1 (VIEW_CONVERT_EXPR, type, TREE_OPERAND (op0, 0));
......
2008-03-14 Richard Guenther <rguenther@suse.de>
PR tree-optimization/34043
PR tree-optimization/33989
* gcc.dg/tree-ssa/ssa-fre-7.c: New testcase.
* gcc.dg/tree-ssa/ssa-fre-8.c: Likewise.
* gcc.dg/tree-ssa/ssa-fre-9.c: Likewise.
* gcc.dg/tree-ssa/ssa-fre-10.c: Likewise.
* gcc.dg/tree-ssa/ssa-pre-17.c: Likewise.
2008-03-14 H.J. Lu <hongjiu.lu@intel.com>
Uros Bizjak <ubizjak@gmail.com>
/* { dg-do compile } */
/* { dg-options "-O2 -fdump-tree-pre-stats" } */
union loc { unsigned reg; signed offset; };
void __frame_state_for (volatile char *state_in, int x)
{
union loc fs;
int reg;
for (;;) {
switch (x) {
case 0:
*state_in = fs.reg;
case 1:
*state_in = fs.offset;
}
}
}
/* { dg-final { scan-tree-dump "Insertions: 0" "pre" } } */
/* { dg-final { cleanup-tree-dump "pre" } } */
/* { dg-do compile } */
/* { dg-options "-O -fdump-tree-fre-details -fdump-tree-optimized" } */
struct X {
int i;
union {
int j;
int k;
float f;
} u;
};
int foo(int j)
{
struct X a;
a.u.j = j;
a.u.f = a.u.f;
a.u.f = a.u.f;
a.u.j = a.u.j;
a.u.f = a.u.f;
return a.u.k;
}
/* { dg-final { scan-tree-dump-times "Inserted pretmp" 1 "fre" } } */
/* { dg-final { scan-tree-dump-times "Replaced a.u.f with pretmp" 3 "fre" } } */
/* { dg-final { scan-tree-dump-times "Replaced a.u.k with j" 1 "fre" } } */
/* { dg-final { scan-tree-dump "= VIEW_CONVERT_EXPR<float>\\\(j_" "fre" } } */
/* { dg-final { scan-tree-dump "return j" "optimized" } } */
/* { dg-final { cleanup-tree-dump "fre" } } */
/* { dg-final { cleanup-tree-dump "optimized" } } */
/* { dg-do compile } */
/* { dg-options "-O -fdump-tree-fre-details" } */
union U {
int i;
float f;
};
int foo(int i, int b)
{
union U u;
if (b)
{
i = i << 2;
u.i = i;
return u.f;
}
else
{
i = i << 2;
u.i = i;
return u.f;
}
}
/* { dg-final { scan-tree-dump-times "Replaced u.f with pretmp" 2 "fre" } } */
/* { dg-final { scan-tree-dump-times "Inserted pretmp" 2 "fre" } } */
/* { dg-final { cleanup-tree-dump "fre" } } */
/* { dg-do compile } */
/* { dg-options "-O -fdump-tree-fre-stats" } */
union loc {
unsigned reg;
signed offset;
};
void __frame_state_for2 (volatile char *state_in)
{
union loc fs;
{
*state_in = fs.reg;
*state_in = fs.offset;
}
}
void __frame_state_for1 (volatile char *state_in)
{
union loc fs;
for (;;)
{
*state_in = fs.offset;
*state_in = fs.reg;
}
}
/* { dg-final { scan-tree-dump-times "Eliminated: 1" 2 "fre" } } */
/* { dg-final { scan-tree-dump-times "Insertions: 1" 2 "fre" } } */
/* { dg-final { cleanup-tree-dump "fre" } } */
/* { dg-do compile } */
/* { dg-options "-O2 -fdump-tree-pre-stats" } */
typedef union {
int i;
float f;
} U;
int foo(U *u, int b, int i)
{
u->i = 0;
if (b)
u->i = i;
return u->i;
}
/* { dg-final { scan-tree-dump "Eliminated: 1" "pre" } } */
/* { dg-final { cleanup-tree-dump "pre" } } */
......@@ -208,6 +208,10 @@ for_each_index (tree *addr_p, bool (*cbck) (tree, tree *, void *), void *data)
case CONSTRUCTOR:
return true;
case ADDR_EXPR:
gcc_assert (is_gimple_min_invariant (*addr_p));
return true;
case TARGET_MEM_REF:
idx = &TMR_BASE (*addr_p);
if (*idx
......
......@@ -221,6 +221,9 @@ tree VN_TOP;
static unsigned int next_dfs_num;
static VEC (tree, heap) *sccstack;
static bool may_insert;
DEF_VEC_P(vn_ssa_aux_t);
DEF_VEC_ALLOC_P(vn_ssa_aux_t, heap);
......@@ -525,8 +528,21 @@ copy_reference_ops_from_ref (tree ref, VEC(vn_reference_op_s, heap) **result)
temp.op1 = TREE_OPERAND (ref, 2);
break;
case COMPONENT_REF:
/* Record field as operand. */
temp.op0 = TREE_OPERAND (ref, 1);
/* If this is a reference to a union member, record the union
member size as operand. Do so only if we are doing
expression insertion (during FRE), as PRE currently gets
confused with this. */
if (may_insert
&& TREE_CODE (DECL_CONTEXT (TREE_OPERAND (ref, 1))) == UNION_TYPE
&& integer_zerop (DECL_FIELD_OFFSET (TREE_OPERAND (ref, 1)))
&& integer_zerop (DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1))))
{
temp.type = NULL_TREE;
temp.op0 = TYPE_SIZE (TREE_TYPE (TREE_OPERAND (ref, 1)));
}
else
/* Record field as operand. */
temp.op0 = TREE_OPERAND (ref, 1);
break;
case ARRAY_RANGE_REF:
case ARRAY_REF:
......@@ -1017,6 +1033,9 @@ defs_to_varying (tree stmt)
return changed;
}
static tree
try_to_simplify (tree stmt, tree rhs);
/* Visit a copy between LHS and RHS, return true if the value number
changed. */
......@@ -1089,6 +1108,64 @@ visit_reference_op_load (tree lhs, tree op, tree stmt)
bool changed = false;
tree result = vn_reference_lookup (op, shared_vuses_from_stmt (stmt));
/* We handle type-punning through unions by value-numbering based
on offset and size of the access. Be prepared to handle a
type-mismatch here via creating a VIEW_CONVERT_EXPR. */
if (result
&& !useless_type_conversion_p (TREE_TYPE (result), TREE_TYPE (op)))
{
/* We will be setting the value number of lhs to the value number
of VIEW_CONVERT_EXPR <TREE_TYPE (result)> (result).
So first simplify and lookup this expression to see if it
is already available. */
tree val = fold_build1 (VIEW_CONVERT_EXPR, TREE_TYPE (op), result);
if (stmt
&& !is_gimple_min_invariant (val)
&& TREE_CODE (val) != SSA_NAME)
{
tree tem = try_to_simplify (stmt, val);
if (tem)
val = tem;
}
result = val;
if (!is_gimple_min_invariant (val)
&& TREE_CODE (val) != SSA_NAME)
result = vn_nary_op_lookup (val);
/* If the expression is not yet available, value-number lhs to
a new SSA_NAME we create. */
if (!result && may_insert)
{
result = make_ssa_name (SSA_NAME_VAR (lhs), NULL_TREE);
/* Initialize value-number information properly. */
VN_INFO_GET (result)->valnum = result;
VN_INFO (result)->expr = val;
VN_INFO (result)->needs_insertion = true;
/* As all "inserted" statements are singleton SCCs, insert
to the valid table. This is strictly needed to
avoid re-generating new value SSA_NAMEs for the same
expression during SCC iteration over and over (the
optimistic table gets cleared after each iteration).
We do not need to insert into the optimistic table, as
lookups there will fall back to the valid table. */
if (current_info == optimistic_info)
{
current_info = valid_info;
vn_nary_op_insert (val, result);
current_info = optimistic_info;
}
else
vn_nary_op_insert (val, result);
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, "Inserting name ");
print_generic_expr (dump_file, result, 0);
fprintf (dump_file, " for expression ");
print_generic_expr (dump_file, val, 0);
fprintf (dump_file, "\n");
}
}
}
if (result)
{
changed = set_ssa_val_to (lhs, result);
......@@ -1496,7 +1573,8 @@ visit_use (tree use)
VN_INFO (use)->use_processed = true;
gcc_assert (!SSA_NAME_IN_FREE_LIST (use));
if (dump_file && (dump_flags & TDF_DETAILS))
if (dump_file && (dump_flags & TDF_DETAILS)
&& !IS_EMPTY_STMT (stmt))
{
fprintf (dump_file, "Value numbering ");
print_generic_expr (dump_file, use, 0);
......@@ -1607,6 +1685,9 @@ visit_use (tree use)
}
if (TREE_CODE (lhs) == SSA_NAME
/* We can substitute SSA_NAMEs that are live over
abnormal edges with their constant value. */
&& !is_gimple_min_invariant (rhs)
&& SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
changed = defs_to_varying (stmt);
else if (REFERENCE_CLASS_P (lhs) || DECL_P (lhs))
......@@ -1985,6 +2066,9 @@ free_scc_vn (void)
&& SSA_NAME_VALUE (name)
&& TREE_CODE (SSA_NAME_VALUE (name)) == VALUE_HANDLE)
SSA_NAME_VALUE (name) = NULL;
if (name
&& VN_INFO (name)->needs_insertion)
release_ssa_name (name);
}
obstack_free (&vn_ssa_aux_obstack, NULL);
VEC_free (vn_ssa_aux_t, heap, vn_ssa_aux_table);
......@@ -2005,11 +2089,13 @@ free_scc_vn (void)
due to ressource constraints. */
bool
run_scc_vn (void)
run_scc_vn (bool may_insert_arg)
{
size_t i;
tree param;
may_insert = may_insert_arg;
init_scc_vn ();
current_info = valid_info;
......@@ -2024,7 +2110,7 @@ run_scc_vn (void)
}
}
for (i = num_ssa_names - 1; i > 0; i--)
for (i = 1; i < num_ssa_names; ++i)
{
tree name = ssa_name (i);
if (name
......@@ -2033,6 +2119,7 @@ run_scc_vn (void)
if (!DFS (name))
{
free_scc_vn ();
may_insert = false;
return false;
}
}
......@@ -2058,5 +2145,6 @@ run_scc_vn (void)
}
}
may_insert = false;
return true;
}
......@@ -44,12 +44,17 @@ typedef struct vn_ssa_aux
once. It cannot be used to avoid visitation for SSA_NAME's
involved in non-singleton SCC's. */
unsigned use_processed : 1;
/* Whether the SSA_NAME has no defining statement and thus an
insertion of such with EXPR as definition is required before
a use can be created of it. */
unsigned needs_insertion : 1;
} *vn_ssa_aux_t;
/* Return the value numbering info for an SSA_NAME. */
extern vn_ssa_aux_t VN_INFO (tree);
extern vn_ssa_aux_t VN_INFO_GET (tree);
bool run_scc_vn (void);
bool run_scc_vn (bool);
void free_scc_vn (void);
void switch_to_PRE_table (void);
tree vn_nary_op_lookup (tree);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment