Commit add6207a by Bill Schmidt Committed by William Schmidt

re PR rtl-optimization/44214 (Compiler does not optimize vector divide with…

re PR rtl-optimization/44214 (Compiler does not optimize vector divide with -freciprocal-math (or -ffast-math))

gcc:

2012-04-20  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

	PR rtl-optimization/44214
	* fold-const.c (exact_inverse): New function.
	(fold_binary_loc): Fold vector and complex division by constant into
	multiply by recripocal with flag_reciprocal_math; fold vector division
	by constant into multiply by reciprocal with exact inverse.

gcc/testsuite:

2012-04-20  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

	PR rtl-optimization/44214
	* gcc.dg/pr44214-1.c: New test.
	* gcc.dg/pr44214-2.c: Likewise.
	* gcc.dg/pr44214-3.c: Likewise.

From-SVN: r186625
parent ead84f73
2012-04-20 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
PR rtl-optimization/44214
* fold-const.c (exact_inverse): New function.
(fold_binary_loc): Fold vector and complex division by constant into
multiply by recripocal with flag_reciprocal_math; fold vector division
by constant into multiply by reciprocal with exact inverse.
2012-04-20 Jan Hubicka <jh@suse.cz>
* lto-symtab.c (lto_cgraph_replace_node): Merge needed instead of force flags.
......
......@@ -9693,6 +9693,48 @@ fold_addr_of_array_ref_difference (location_t loc, tree type,
return NULL_TREE;
}
/* If the real or vector real constant CST of type TYPE has an exact
inverse, return it, else return NULL. */
static tree
exact_inverse (tree type, tree cst)
{
REAL_VALUE_TYPE r;
tree unit_type, *elts;
enum machine_mode mode;
unsigned vec_nelts, i;
switch (TREE_CODE (cst))
{
case REAL_CST:
r = TREE_REAL_CST (cst);
if (exact_real_inverse (TYPE_MODE (type), &r))
return build_real (type, r);
return NULL_TREE;
case VECTOR_CST:
vec_nelts = VECTOR_CST_NELTS (cst);
elts = XALLOCAVEC (tree, vec_nelts);
unit_type = TREE_TYPE (type);
mode = TYPE_MODE (unit_type);
for (i = 0; i < vec_nelts; i++)
{
r = TREE_REAL_CST (VECTOR_CST_ELT (cst, i));
if (!exact_real_inverse (mode, &r))
return NULL_TREE;
elts[i] = build_real (unit_type, r);
}
return build_vector (type, elts);
default:
return NULL_TREE;
}
}
/* Fold a binary expression of code CODE and type TYPE with operands
OP0 and OP1. LOC is the location of the resulting expression.
Return the folded expression if folding is successful. Otherwise,
......@@ -11734,23 +11776,24 @@ fold_binary_loc (location_t loc,
so only do this if -freciprocal-math. We can actually
always safely do it if ARG1 is a power of two, but it's hard to
tell if it is or not in a portable manner. */
if (TREE_CODE (arg1) == REAL_CST)
if (optimize
&& (TREE_CODE (arg1) == REAL_CST
|| (TREE_CODE (arg1) == COMPLEX_CST
&& COMPLEX_FLOAT_TYPE_P (TREE_TYPE (arg1)))
|| (TREE_CODE (arg1) == VECTOR_CST
&& VECTOR_FLOAT_TYPE_P (TREE_TYPE (arg1)))))
{
if (flag_reciprocal_math
&& 0 != (tem = const_binop (code, build_real (type, dconst1),
arg1)))
&& 0 != (tem = const_binop (code, build_one_cst (type), arg1)))
return fold_build2_loc (loc, MULT_EXPR, type, arg0, tem);
/* Find the reciprocal if optimizing and the result is exact. */
if (optimize)
{
REAL_VALUE_TYPE r;
r = TREE_REAL_CST (arg1);
if (exact_real_inverse (TYPE_MODE(TREE_TYPE(arg0)), &r))
/* Find the reciprocal if optimizing and the result is exact.
TODO: Complex reciprocal not implemented. */
if (TREE_CODE (arg1) != COMPLEX_CST)
{
tem = build_real (type, r);
return fold_build2_loc (loc, MULT_EXPR, type,
fold_convert_loc (loc, type, arg0), tem);
}
tree inverse = exact_inverse (TREE_TYPE (arg0), arg1);
if (inverse)
return fold_build2_loc (loc, MULT_EXPR, type, arg0, inverse);
}
}
/* Convert A/B/C to A/(B*C). */
......
2012-04-20 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
PR rtl-optimization/44214
* gcc.dg/pr44214-1.c: New test.
* gcc.dg/pr44214-2.c: Likewise.
* gcc.dg/pr44214-3.c: Likewise.
2012-04-20 Richard Guenther <rguenther@suse.de>
* g++.dg/torture/20120420-1.C: New testcase.
......
/* { dg-do compile } */
/* { dg-options "-O2 -freciprocal-math -fdump-tree-ccp1" } */
typedef double v2df __attribute__ ((vector_size (16)));
void do_div (v2df *a, v2df *b)
{
*a = *b / (v2df) { 2.0, 3.0 };
}
/* Constant folding should multiply *b by the reciprocals of the
vector elements. The fold does not take place for generic
vectors until the first CCP pass. The string " * " occurs 3
times: one multiply and two indirect parameters. */
/* { dg-final { scan-tree-dump-times " \\\* " 3 "ccp1" } } */
/* { dg-final { scan-tree-dump-times " / " 0 "ccp1" } } */
/* { dg-final { cleanup-tree-dump "ccp1" } } */
/* { dg-do compile } */
/* { dg-options "-O2 -freciprocal-math -fdump-tree-original" } */
void do_div (_Complex double *a, _Complex double *b)
{
*a = *b / (4.0 - 5.0fi);
}
/* Constant folding should multiply *b by the reciprocal of 4 - 5i
= 4/41 + (5/41)i. */
/* { dg-final { scan-tree-dump-times " \\\* " 1 "original" } } */
/* { dg-final { scan-tree-dump-times " / " 0 "original" } } */
/* { dg-final { cleanup-tree-dump "original" } } */
/* { dg-do compile } */
/* { dg-options "-O2 -fdump-tree-ccp1" } */
typedef double v2df __attribute__ ((vector_size (16)));
void do_div (v2df *a, v2df *b)
{
*a = *b / (v2df) { 2.0, 2.0 };
}
/* Since 2.0 has an exact reciprocal, constant folding should multiply *b
by the reciprocals of the vector elements. As a result there should be
one vector multiply and zero divides in the optimized code. The fold
does not take place for generic vectors until the first CCP pass. The
string " * " occurs 3 times: one multiply and two indirect parameters. */
/* { dg-final { scan-tree-dump-times " \\\* " 3 "ccp1" } } */
/* { dg-final { scan-tree-dump-times " / " 0 "ccp1" } } */
/* { dg-final { cleanup-tree-dump "ccp1" } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment