Commit 268cb4b3 by Wilco Dijkstra Committed by Jiong Wang

Remove inefficient branchless conditional negate optimization

2015-03-06  Wilco Dijkstra  <wdijkstr@arm.com>

  gcc/
    * tree-ssa-phiopt.c (neg_replacement): Remove.
    (tree_ssa_phiopt_worker): Remove negate optimization.

  gcc/testsuite/
    * gcc.dg/tree-ssa/pr45685.c: Move to gcc.target/i386.
    * gcc.target/aarch64/csneg-1.c (test_csneg_cmp): New test.
    * gcc.target/i386/pr45685.c: Moved test, check for conditional move on x64.

From-SVN: r221246
parent c271e224
2015-03-06 Wilco Dijkstra <wilco.dijkstra@arm.com>
* tree-ssa-phiopt.c (neg_replacement): Remove.
(tree_ssa_phiopt_worker): Remove negate optimization.
2015-03-05 Jan Hubicka <hubicka@ucw.cz> 2015-03-05 Jan Hubicka <hubicka@ucw.cz>
PR ipa/65302 PR ipa/65302
......
2015-03-06 Wilco Dijkstra <wilco.dijkstra@arm.com>
* gcc.dg/tree-ssa/pr45685.c: Move to gcc.target/i386.
* gcc.target/aarch64/csneg-1.c (test_csneg_cmp): New test.
* gcc.target/i386/pr45685.c: Moved test, check for conditional move on
x64.
2015-03-05 Jan Hubicka <hubicka@ucw.cz> 2015-03-05 Jan Hubicka <hubicka@ucw.cz>
* g++.dg/lto/pr65302_1.C: New testcase. * g++.dg/lto/pr65302_1.C: New testcase.
......
...@@ -48,3 +48,11 @@ test_csneg64_condasn2(long long x0, ...@@ -48,3 +48,11 @@ test_csneg64_condasn2(long long x0,
x4 = (x0 == x1) ? x3 : -x2; x4 = (x0 == x1) ? x3 : -x2;
return x4; return x4;
} }
int test_csneg_cmp(int x)
{
/* { dg-final { scan-assembler "csneg\tw\[0-9\]" } } */
if (x > 3)
x = -x;
return x;
}
/* { dg-do compile } */ /* { dg-do compile { target { ! { ia32 } } } } */
/* { dg-options "-O3 -fdump-tree-phiopt1-details" } */ /* { dg-options "-O3" } */
typedef unsigned long int uint64_t; typedef unsigned long int uint64_t;
typedef long int int64_t; typedef long int int64_t;
...@@ -36,6 +36,4 @@ int summation_helper_2(int64_t* products, uint64_t count) ...@@ -36,6 +36,4 @@ int summation_helper_2(int64_t* products, uint64_t count)
return s; return s;
} }
/* { dg-final { scan-tree-dump-times "converted to straightline code" 2 "phiopt1" } } */ /* { dg-final { scan-assembler-times "cmov" 4 } } */
/* { dg-final { cleanup-tree-dump "phiopt1" } } */
...@@ -96,8 +96,6 @@ static bool minmax_replacement (basic_block, basic_block, ...@@ -96,8 +96,6 @@ static bool minmax_replacement (basic_block, basic_block,
edge, edge, gimple, tree, tree); edge, edge, gimple, tree, tree);
static bool abs_replacement (basic_block, basic_block, static bool abs_replacement (basic_block, basic_block,
edge, edge, gimple, tree, tree); edge, edge, gimple, tree, tree);
static bool neg_replacement (basic_block, basic_block,
edge, edge, gimple, tree, tree);
static bool cond_store_replacement (basic_block, basic_block, edge, edge, static bool cond_store_replacement (basic_block, basic_block, edge, edge,
hash_set<tree> *); hash_set<tree> *);
static bool cond_if_else_store_replacement (basic_block, basic_block, basic_block); static bool cond_if_else_store_replacement (basic_block, basic_block, basic_block);
...@@ -209,23 +207,6 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads) ...@@ -209,23 +207,6 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads)
/* Calculate the set of non-trapping memory accesses. */ /* Calculate the set of non-trapping memory accesses. */
nontrap = get_non_trapping (); nontrap = get_non_trapping ();
/* The replacement of conditional negation with a non-branching
sequence is really only a win when optimizing for speed and we
can avoid transformations by gimple if-conversion that result
in poor RTL generation.
Ideally either gimple if-conversion or the RTL expanders will
be improved and the code to emit branchless conditional negation
can be removed. */
bool replace_conditional_negation = false;
if (!do_store_elim)
replace_conditional_negation
= ((!optimize_size && optimize >= 2)
|| (((flag_tree_loop_vectorize || cfun->has_force_vectorize_loops)
&& flag_tree_loop_if_convert != 0)
|| flag_tree_loop_if_convert == 1
|| flag_tree_loop_if_convert_stores == 1));
/* Search every basic block for COND_EXPR we may be able to optimize. /* Search every basic block for COND_EXPR we may be able to optimize.
We walk the blocks in order that guarantees that a block with We walk the blocks in order that guarantees that a block with
...@@ -380,9 +361,6 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads) ...@@ -380,9 +361,6 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads)
cfgchanged = true; cfgchanged = true;
else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) else if (abs_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
cfgchanged = true; cfgchanged = true;
else if (replace_conditional_negation
&& neg_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
cfgchanged = true;
else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1)) else if (minmax_replacement (bb, bb1, e1, e2, phi, arg0, arg1))
cfgchanged = true; cfgchanged = true;
} }
...@@ -1319,140 +1297,6 @@ abs_replacement (basic_block cond_bb, basic_block middle_bb, ...@@ -1319,140 +1297,6 @@ abs_replacement (basic_block cond_bb, basic_block middle_bb,
return true; return true;
} }
/* The function neg_replacement replaces conditional negation with
equivalent straight line code. Returns TRUE if replacement is done,
otherwise returns FALSE.
COND_BB branches around negation occuring in MIDDLE_BB.
E0 and E1 are edges out of COND_BB. E0 reaches MIDDLE_BB and
E1 reaches the other successor which should contain PHI with
arguments ARG0 and ARG1.
Assuming negation is to occur when the condition is true,
then the non-branching sequence is:
result = (rhs ^ -cond) + cond
Inverting the condition or its result gives us negation
when the original condition is false. */
static bool
neg_replacement (basic_block cond_bb, basic_block middle_bb,
edge e0 ATTRIBUTE_UNUSED, edge e1,
gimple phi, tree arg0, tree arg1)
{
gimple new_stmt, cond;
gimple_stmt_iterator gsi;
gimple assign;
edge true_edge, false_edge;
tree rhs, lhs;
enum tree_code cond_code;
bool invert = false;
/* This transformation performs logical operations on the
incoming arguments. So force them to be integral types. */
if (!INTEGRAL_TYPE_P (TREE_TYPE (arg0)))
return false;
/* OTHER_BLOCK must have only one executable statement which must have the
form arg0 = -arg1 or arg1 = -arg0. */
assign = last_and_only_stmt (middle_bb);
/* If we did not find the proper negation assignment, then we can not
optimize. */
if (assign == NULL)
return false;
/* If we got here, then we have found the only executable statement
in OTHER_BLOCK. If it is anything other than arg0 = -arg1 or
arg1 = -arg0, then we can not optimize. */
if (gimple_code (assign) != GIMPLE_ASSIGN)
return false;
lhs = gimple_assign_lhs (assign);
if (gimple_assign_rhs_code (assign) != NEGATE_EXPR)
return false;
rhs = gimple_assign_rhs1 (assign);
/* The assignment has to be arg0 = -arg1 or arg1 = -arg0. */
if (!(lhs == arg0 && rhs == arg1)
&& !(lhs == arg1 && rhs == arg0))
return false;
/* The basic sequence assumes we negate when the condition is true.
If we need the opposite, then we will either need to invert the
condition or its result. */
extract_true_false_edges_from_block (cond_bb, &true_edge, &false_edge);
invert = false_edge->dest == middle_bb;
/* Unlike abs_replacement, we can handle arbitrary conditionals here. */
cond = last_stmt (cond_bb);
cond_code = gimple_cond_code (cond);
/* If inversion is needed, first try to invert the test since
that's cheapest. */
if (invert)
{
bool honor_nans = HONOR_NANS (gimple_cond_lhs (cond));
enum tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
/* If invert_tree_comparison was successful, then use its return
value as the new code and note that inversion is no longer
needed. */
if (new_code != ERROR_MARK)
{
cond_code = new_code;
invert = false;
}
}
tree cond_val = make_ssa_name (boolean_type_node);
new_stmt = gimple_build_assign (cond_val, cond_code,
gimple_cond_lhs (cond),
gimple_cond_rhs (cond));
gsi = gsi_last_bb (cond_bb);
gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT);
/* If we still need inversion, then invert the result of the
condition. */
if (invert)
{
tree tmp = make_ssa_name (boolean_type_node);
new_stmt = gimple_build_assign (tmp, BIT_XOR_EXPR, cond_val,
boolean_true_node);
gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
cond_val = tmp;
}
/* Get the condition in the right type so that we can perform
logical and arithmetic operations on it. */
tree cond_val_converted = make_ssa_name (TREE_TYPE (rhs));
new_stmt = gimple_build_assign (cond_val_converted, NOP_EXPR, cond_val);
gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
tree neg_cond_val_converted = make_ssa_name (TREE_TYPE (rhs));
new_stmt = gimple_build_assign (neg_cond_val_converted, NEGATE_EXPR,
cond_val_converted);
gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
tree tmp = make_ssa_name (TREE_TYPE (rhs));
new_stmt = gimple_build_assign (tmp, BIT_XOR_EXPR, rhs,
neg_cond_val_converted);
gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
tree new_lhs = make_ssa_name (TREE_TYPE (rhs));
new_stmt = gimple_build_assign (new_lhs, PLUS_EXPR, tmp, cond_val_converted);
gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT);
replace_phi_edge_with_variable (cond_bb, e1, phi, new_lhs);
/* Note that we optimized this PHI. */
return true;
}
/* Auxiliary functions to determine the set of memory accesses which /* Auxiliary functions to determine the set of memory accesses which
can't trap because they are preceded by accesses to the same memory can't trap because they are preceded by accesses to the same memory
portion. We do that for MEM_REFs, so we only need to track portion. We do that for MEM_REFs, so we only need to track
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment