Commit 5b55e6e3 by Richard Biener Committed by Richard Biener

re PR tree-optimization/81082 (Failure to vectorise after reassociating index computation)

2018-01-26  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/81082
	* fold-const.c (fold_plusminus_mult_expr): Do not perform the
	association if it requires casting to unsigned.
	* match.pd ((A * C) +- (B * C) -> (A+-B)): New patterns derived
	from fold_plusminus_mult_expr to catch important cases late when
	range info is available.

	* gcc.dg/vect/pr81082.c: New testcase.
	* gcc.dg/tree-ssa/loop-15.c: XFAIL the (int)((unsigned)n + -1U) * n + n
	simplification to n * n.

From-SVN: r257077
parent af2e3244
2018-01-26 Richard Biener <rguenther@suse.de>
PR tree-optimization/81082
* fold-const.c (fold_plusminus_mult_expr): Do not perform the
association if it requires casting to unsigned.
* match.pd ((A * C) +- (B * C) -> (A+-B)): New patterns derived
from fold_plusminus_mult_expr to catch important cases late when
range info is available.
2018-01-26 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE> 2018-01-26 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE>
* config/i386/sol2.h (USE_HIDDEN_LINKONCE): Remove. * config/i386/sol2.h (USE_HIDDEN_LINKONCE): Remove.
......
...@@ -7097,7 +7097,7 @@ fold_plusminus_mult_expr (location_t loc, enum tree_code code, tree type, ...@@ -7097,7 +7097,7 @@ fold_plusminus_mult_expr (location_t loc, enum tree_code code, tree type,
/* Same may be zero and thus the operation 'code' may overflow. Likewise /* Same may be zero and thus the operation 'code' may overflow. Likewise
same may be minus one and thus the multiplication may overflow. Perform same may be minus one and thus the multiplication may overflow. Perform
the operations in an unsigned type. */ the sum operation in an unsigned type. */
tree utype = unsigned_type_for (type); tree utype = unsigned_type_for (type);
tree tem = fold_build2_loc (loc, code, utype, tree tem = fold_build2_loc (loc, code, utype,
fold_convert_loc (loc, utype, alt0), fold_convert_loc (loc, utype, alt0),
...@@ -7110,9 +7110,9 @@ fold_plusminus_mult_expr (location_t loc, enum tree_code code, tree type, ...@@ -7110,9 +7110,9 @@ fold_plusminus_mult_expr (location_t loc, enum tree_code code, tree type,
return fold_build2_loc (loc, MULT_EXPR, type, return fold_build2_loc (loc, MULT_EXPR, type,
fold_convert (type, tem), same); fold_convert (type, tem), same);
return fold_convert_loc (loc, type, /* Do not resort to unsigned multiplication because
fold_build2_loc (loc, MULT_EXPR, utype, tem, we lose the no-overflow property of the expression. */
fold_convert_loc (loc, utype, same))); return NULL_TREE;
} }
/* Subroutine of native_encode_expr. Encode the INTEGER_CST /* Subroutine of native_encode_expr. Encode the INTEGER_CST
......
...@@ -1939,6 +1939,37 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) ...@@ -1939,6 +1939,37 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(minus (convert (view_convert:stype @1)) (minus (convert (view_convert:stype @1))
(convert (view_convert:stype @2))))))) (convert (view_convert:stype @2)))))))
/* (A * C) +- (B * C) -> (A+-B) * C and (A * C) +- A -> A * (C+-1).
Modeled after fold_plusminus_mult_expr. */
(if (!TYPE_SATURATING (type)
&& (!FLOAT_TYPE_P (type) || flag_associative_math))
(for plusminus (plus minus)
(simplify
(plusminus (mult:cs @0 @1) (mult:cs @0 @2))
(if (!ANY_INTEGRAL_TYPE_P (type)
|| TYPE_OVERFLOW_WRAPS (type)
|| (INTEGRAL_TYPE_P (type)
&& tree_expr_nonzero_p (@0)
&& expr_not_equal_to (@0, wi::minus_one (TYPE_PRECISION (type)))))
(mult (plusminus @1 @2) @0)))
/* We cannot generate constant 1 for fract. */
(if (!ALL_FRACT_MODE_P (TYPE_MODE (type)))
(simplify
(plusminus @0 (mult:cs @0 @2))
(if (!ANY_INTEGRAL_TYPE_P (type)
|| TYPE_OVERFLOW_WRAPS (type)
|| (INTEGRAL_TYPE_P (type)
&& tree_expr_nonzero_p (@0)
&& expr_not_equal_to (@0, wi::minus_one (TYPE_PRECISION (type)))))
(mult (plusminus { build_one_cst (type); } @2) @0)))
(simplify
(plusminus (mult:cs @0 @2) @0)
(if (!ANY_INTEGRAL_TYPE_P (type)
|| TYPE_OVERFLOW_WRAPS (type)
|| (INTEGRAL_TYPE_P (type)
&& tree_expr_nonzero_p (@0)
&& expr_not_equal_to (@0, wi::minus_one (TYPE_PRECISION (type)))))
(mult (plusminus @2 { build_one_cst (type); }) @0))))))
/* Simplifications of MIN_EXPR, MAX_EXPR, fmin() and fmax(). */ /* Simplifications of MIN_EXPR, MAX_EXPR, fmin() and fmax(). */
......
2018-01-26 Richard Biener <rguenther@suse.de>
PR tree-optimization/81082
* gcc.dg/vect/pr81082.c: New testcase.
* gcc.dg/tree-ssa/loop-15.c: XFAIL the (int)((unsigned)n + -1U) * n + n
simplification to n * n.
2018-01-26 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE> 2018-01-26 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE>
* gcc.target/i386/mcount_pic.c: Only xfail get_pc_thunk scan on * gcc.target/i386/mcount_pic.c: Only xfail get_pc_thunk scan on
......
...@@ -19,7 +19,7 @@ int bla(void) ...@@ -19,7 +19,7 @@ int bla(void)
} }
/* Since the loop is removed, there should be no addition. */ /* Since the loop is removed, there should be no addition. */
/* { dg-final { scan-tree-dump-times " \\+ " 0 "optimized" } } */ /* { dg-final { scan-tree-dump-times " \\+ " 0 "optimized" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times " \\* " 1 "optimized" } } */ /* { dg-final { scan-tree-dump-times " \\* " 1 "optimized" } } */
/* The if from the loop header copying remains in the code. */ /* The if from the loop header copying remains in the code. */
......
/* { dg-do compile } */
/* { dg-require-effective-target vect_int } */
int
f (int *x, int b1, int b2, int b3)
{
int foo = 0;
for (int i1 = 0; i1 < b1; ++i1)
for (int i2 = 0; i2 < b2; ++i2)
for (int i3 = 0; i3 < b3; ++i3)
foo += x[i1 * b2 * b3 + i2 * b3 + (i3 - 1)];
return foo;
}
/* { dg-final { scan-tree-dump "vectorized 1 loops in function" "vect" } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment