Commit 6b5eea61 by Bill Schmidt Committed by William Schmidt

re PR tree-optimization/54245 (incorrect optimisation)

gcc:

2012-08-15  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

	PR tree-optimization/54245
	* gimple-ssa-strength-reduction.c (legal_cast_p_1): New function.
	(legal_cast_p): Split out logic to legal_cast_p_1.
	(analyze_increments): Avoid introducing multiplies in smaller types.


gcc/testsuite:

2012-08-15  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

	PR tree-optimization/54245
	* gcc.dg/tree-ssa/pr54245.c: New test.

From-SVN: r190412
parent 0a5f2683
2012-08-15 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
PR tree-optimization/54245
* gimple-ssa-strength-reduction.c (legal_cast_p_1): New function.
(legal_cast_p): Split out logic to legal_cast_p_1.
(analyze_increments): Avoid introducing multiplies in smaller types.
2012-08-15 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
PR tree-optimization/54240
* tree-ssa-phiopt.c (hoist_adjacent_loads): Correct test for
existence of conditional move with given mode.
......
......@@ -1089,6 +1089,32 @@ slsr_process_neg (gimple gs, tree rhs1, bool speed)
add_cand_for_stmt (gs, c);
}
/* Help function for legal_cast_p, operating on two trees. Checks
whether it's allowable to cast from RHS to LHS. See legal_cast_p
for more details. */
static bool
legal_cast_p_1 (tree lhs, tree rhs)
{
tree lhs_type, rhs_type;
unsigned lhs_size, rhs_size;
bool lhs_wraps, rhs_wraps;
lhs_type = TREE_TYPE (lhs);
rhs_type = TREE_TYPE (rhs);
lhs_size = TYPE_PRECISION (lhs_type);
rhs_size = TYPE_PRECISION (rhs_type);
lhs_wraps = TYPE_OVERFLOW_WRAPS (lhs_type);
rhs_wraps = TYPE_OVERFLOW_WRAPS (rhs_type);
if (lhs_size < rhs_size
|| (rhs_wraps && !lhs_wraps)
|| (rhs_wraps && lhs_wraps && rhs_size != lhs_size))
return false;
return true;
}
/* Return TRUE if GS is a statement that defines an SSA name from
a conversion and is legal for us to combine with an add and multiply
in the candidate table. For example, suppose we have:
......@@ -1129,28 +1155,11 @@ slsr_process_neg (gimple gs, tree rhs1, bool speed)
static bool
legal_cast_p (gimple gs, tree rhs)
{
tree lhs, lhs_type, rhs_type;
unsigned lhs_size, rhs_size;
bool lhs_wraps, rhs_wraps;
if (!is_gimple_assign (gs)
|| !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (gs)))
return false;
lhs = gimple_assign_lhs (gs);
lhs_type = TREE_TYPE (lhs);
rhs_type = TREE_TYPE (rhs);
lhs_size = TYPE_PRECISION (lhs_type);
rhs_size = TYPE_PRECISION (rhs_type);
lhs_wraps = TYPE_OVERFLOW_WRAPS (lhs_type);
rhs_wraps = TYPE_OVERFLOW_WRAPS (rhs_type);
if (lhs_size < rhs_size
|| (rhs_wraps && !lhs_wraps)
|| (rhs_wraps && lhs_wraps && rhs_size != lhs_size))
return false;
return true;
return legal_cast_p_1 (gimple_assign_lhs (gs), rhs);
}
/* Given GS which is a cast to a scalar integer type, determine whether
......@@ -1996,6 +2005,31 @@ analyze_increments (slsr_cand_t first_dep, enum machine_mode mode, bool speed)
!= POINTER_PLUS_EXPR)))
incr_vec[i].cost = COST_NEUTRAL;
/* FORNOW: If we need to add an initializer, give up if a cast from
the candidate's type to its stride's type can lose precision.
This could eventually be handled better by expressly retaining the
result of a cast to a wider type in the stride. Example:
short int _1;
_2 = (int) _1;
_3 = _2 * 10;
_4 = x + _3; ADD: x + (10 * _1) : int
_5 = _2 * 15;
_6 = x + _3; ADD: x + (15 * _1) : int
Right now replacing _6 would cause insertion of an initializer
of the form "short int T = _1 * 5;" followed by a cast to
int, which could overflow incorrectly. Had we recorded _2 or
(int)_1 as the stride, this wouldn't happen. However, doing
this breaks other opportunities, so this will require some
care. */
else if (!incr_vec[i].initializer
&& TREE_CODE (first_dep->stride) != INTEGER_CST
&& !legal_cast_p_1 (first_dep->stride,
gimple_assign_lhs (first_dep->cand_stmt)))
incr_vec[i].cost = COST_INFINITE;
/* For any other increment, if this is a multiply candidate, we
must introduce a temporary T and initialize it with
T_0 = stride * increment. When optimizing for speed, walk the
......
2012-08-15 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
PR tree-optimization/54245
* gcc.dg/tree-ssa/pr54245.c: New test.
2012-08-15 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
PR tree-optimization/54240
* gcc.target/powerpc/pr54240.c: New test.
* gcc.target/mips/pr54240.c: Likewise.
......
/* { dg-do compile } */
/* { dg-options "-O1 -fdump-tree-slsr-details" } */
#include <stdio.h>
#define W1 22725
#define W2 21407
#define W3 19266
#define W6 8867
void idct_row(short *row, int *dst)
{
int a0, a1, b0, b1;
a0 = W1 * row[0];
a1 = a0;
a0 += W2 * row[2];
a1 += W6 * row[2];
b0 = W1 * row[1];
b1 = W3 * row[1];
dst[0] = a0 + b0;
dst[1] = a0 - b0;
dst[2] = a1 + b1;
dst[3] = a1 - b1;
}
static short block[8] = { 1, 2, 3, 4 };
int main(void)
{
int out[4];
int i;
idct_row(block, out);
for (i = 0; i < 4; i++)
printf("%d\n", out[i]);
return !(out[2] == 87858 && out[3] == 10794);
}
/* For now, disable inserting an initializer when the multiplication will
take place in a smaller type than originally. This test may be deleted
in future when this case is handled more precisely. */
/* { dg-final { scan-tree-dump-times "Inserting initializer" 0 "slsr" } } */
/* { dg-final { cleanup-tree-dump "slsr" } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment