Commit f8912a55 by Paolo Bonzini

Makefile.in: Add tree-ssa-math-opts.c.

gcc:
2005-05-17  Paolo Bonzini  <bonzini@gnu.org>

	* Makefile.in: Add tree-ssa-math-opts.c.
	* expr.c (expand_expr_real_1) <case RDIV_EXPR>: Never emit as a*(1/b).
	* fold-const.c (distribute_real_division): New.
	(fold_binary) <case PLUS_EXPR, case MINUS_EXPR>: Use it.
	* tree-pass.h (pass_cse_reciprocals): New.
	* tree-optimize.c (init_tree_optimization_passes): Run it.
	* tree-ssa-math-opts.c: New file.
	* doc/passes.texi: Document the new pass.

gcc/testsuite:
2005-05-17  Paolo Bonzini  <bonzini@gnu.org>

	* gcc.dg/fold-div-1.c, gcc.dg/recip-1.c, gcc.dg/recip-2.c: New.

From-SVN: r99826
parent fb664a2c
2005-05-17 Paolo Bonzini <bonzini@gnu.org>
* Makefile.in: Add tree-ssa-math-opts.c.
* expr.c (expand_expr_real_1) <case RDIV_EXPR>: Never emit as a*(1/b).
* fold-const.c (distribute_real_division): New.
(fold_binary) <case PLUS_EXPR, case MINUS_EXPR>: Use it.
* tree-pass.h (pass_cse_reciprocals): New.
* tree-optimize.c (init_tree_optimization_passes): Run it.
* tree-ssa-math-opts.c: New file.
* doc/passes.texi: Document the new pass.
2005-05-17 Richard Guenther <rguenth@gcc.gnu.org> 2005-05-17 Richard Guenther <rguenth@gcc.gnu.org>
PR middle-end/21595 PR middle-end/21595
......
...@@ -934,7 +934,7 @@ OBJS-common = \ ...@@ -934,7 +934,7 @@ OBJS-common = \
tree-phinodes.o tree-ssanames.o tree-sra.o tree-complex.o tree-ssa-loop.o \ tree-phinodes.o tree-ssanames.o tree-sra.o tree-complex.o tree-ssa-loop.o \
tree-ssa-loop-niter.o tree-ssa-loop-manip.o tree-ssa-threadupdate.o \ tree-ssa-loop-niter.o tree-ssa-loop-manip.o tree-ssa-threadupdate.o \
tree-vectorizer.o tree-vect-analyze.o tree-vect-transform.o \ tree-vectorizer.o tree-vect-analyze.o tree-vect-transform.o \
tree-ssa-loop-ivcanon.o tree-ssa-propagate.o \ tree-ssa-loop-ivcanon.o tree-ssa-propagate.o tree-ssa-math-opts.o \
tree-ssa-loop-ivopts.o tree-if-conv.o tree-ssa-loop-unswitch.o \ tree-ssa-loop-ivopts.o tree-if-conv.o tree-ssa-loop-unswitch.o \
alias.o bb-reorder.o bitmap.o builtins.o caller-save.o calls.o \ alias.o bb-reorder.o bitmap.o builtins.o caller-save.o calls.o \
cfg.o cfganal.o cfgbuild.o cfgcleanup.o cfglayout.o cfgloop.o \ cfg.o cfganal.o cfgbuild.o cfgcleanup.o cfglayout.o cfgloop.o \
...@@ -1803,6 +1803,8 @@ tree-ssa-loop-im.o : tree-ssa-loop-im.c $(TREE_FLOW_H) $(CONFIG_H) \ ...@@ -1803,6 +1803,8 @@ tree-ssa-loop-im.o : tree-ssa-loop-im.c $(TREE_FLOW_H) $(CONFIG_H) \
$(PARAMS_H) output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h \ $(PARAMS_H) output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h \
$(TREE_DUMP_H) tree-pass.h $(FLAGS_H) real.h $(BASIC_BLOCK_H) \ $(TREE_DUMP_H) tree-pass.h $(FLAGS_H) real.h $(BASIC_BLOCK_H) \
hard-reg-set.h hard-reg-set.h
tree-ssa-math-opts.o : tree-ssa-math-opts.c $(TREE_FLOW_H) $(CONFIG_H) \
$(SYSTEM_H) $(TREE_H) $(TIMEVAR_H) tree-pass.h $(TM_H) $(FLAGS_H)
tree-ssa-alias.o : tree-ssa-alias.c $(TREE_FLOW_H) $(CONFIG_H) $(SYSTEM_H) \ tree-ssa-alias.o : tree-ssa-alias.c $(TREE_FLOW_H) $(CONFIG_H) $(SYSTEM_H) \
$(RTL_H) $(TREE_H) $(TM_P_H) $(EXPR_H) $(GGC_H) tree-inline.h $(FLAGS_H) \ $(RTL_H) $(TREE_H) $(TM_P_H) $(EXPR_H) $(GGC_H) tree-inline.h $(FLAGS_H) \
function.h $(TIMEVAR_H) convert.h $(TM_H) coretypes.h langhooks.h \ function.h $(TIMEVAR_H) convert.h $(TM_H) coretypes.h langhooks.h \
......
...@@ -354,7 +354,7 @@ This pass transforms tail recursion into a loop. It is located in ...@@ -354,7 +354,7 @@ This pass transforms tail recursion into a loop. It is located in
This pass sinks stores and assignments down the flowgraph closer to it's This pass sinks stores and assignments down the flowgraph closer to it's
use point. The pass is located in @file{tree-ssa-sink.c} and is use point. The pass is located in @file{tree-ssa-sink.c} and is
described by @code{pass_sink_code} described by @code{pass_sink_code}.
@item Partial redundancy elimination @item Partial redundancy elimination
...@@ -362,6 +362,12 @@ This pass eliminates partially redundant computations, as well as ...@@ -362,6 +362,12 @@ This pass eliminates partially redundant computations, as well as
performing load motion. The pass is located in @file{tree-ssa-pre.c} performing load motion. The pass is located in @file{tree-ssa-pre.c}
and is described by @code{pass_pre}. and is described by @code{pass_pre}.
Just before partial redundancy elimination, if
@option{-funsafe-math-optimizations} is on, GCC tries to convert
divisions to multiplications by the reciprocal. The pass is located
in @file{tree-ssa-math-opts.c} and is described by
@code{pass_cse_reciprocal}.
@item Loop optimization @item Loop optimization
The main driver of the pass is placed in @file{tree-ssa-loop.c} The main driver of the pass is placed in @file{tree-ssa-loop.c}
......
...@@ -7806,18 +7806,6 @@ expand_expr_real_1 (tree exp, rtx target, enum machine_mode tmode, ...@@ -7806,18 +7806,6 @@ expand_expr_real_1 (tree exp, rtx target, enum machine_mode tmode,
return expand_divmod (0, code, mode, op0, op1, target, unsignedp); return expand_divmod (0, code, mode, op0, op1, target, unsignedp);
case RDIV_EXPR: case RDIV_EXPR:
/* Emit a/b as a*(1/b). Later we may manage CSE the reciprocal saving
expensive divide. If not, combine will rebuild the original
computation. */
if (flag_unsafe_math_optimizations && optimize && !optimize_size
&& TREE_CODE (type) == REAL_TYPE
&& !real_onep (TREE_OPERAND (exp, 0)))
return expand_expr (build2 (MULT_EXPR, type, TREE_OPERAND (exp, 0),
build2 (RDIV_EXPR, type,
build_real (type, dconst1),
TREE_OPERAND (exp, 1))),
target, tmode, modifier);
goto binop; goto binop;
case TRUNC_MOD_EXPR: case TRUNC_MOD_EXPR:
......
...@@ -3104,6 +3104,46 @@ distribute_bit_expr (enum tree_code code, tree type, tree arg0, tree arg1) ...@@ -3104,6 +3104,46 @@ distribute_bit_expr (enum tree_code code, tree type, tree arg0, tree arg1)
fold_build2 (code, type, left, right)); fold_build2 (code, type, left, right));
} }
/* Knowing that ARG0 and ARG1 are both RDIV_EXPRs, simplify a binary operation
with code CODE. This optimization is unsafe. */
static tree
distribute_real_division (enum tree_code code, tree type, tree arg0, tree arg1)
{
bool mul0 = TREE_CODE (arg0) == MULT_EXPR;
bool mul1 = TREE_CODE (arg1) == MULT_EXPR;
/* (A / C) +- (B / C) -> (A +- B) / C. */
if (mul0 == mul1
&& operand_equal_p (TREE_OPERAND (arg0, 1),
TREE_OPERAND (arg1, 1), 0))
return fold_build2 (mul0 ? MULT_EXPR : RDIV_EXPR, type,
fold_build2 (code, type,
TREE_OPERAND (arg0, 0),
TREE_OPERAND (arg1, 0)),
TREE_OPERAND (arg0, 1));
/* (A / C1) +- (A / C2) -> A * (1 / C1 +- 1 / C2). */
if (operand_equal_p (TREE_OPERAND (arg0, 0),
TREE_OPERAND (arg1, 0), 0)
&& TREE_CODE (TREE_OPERAND (arg0, 1)) == REAL_CST
&& TREE_CODE (TREE_OPERAND (arg1, 1)) == REAL_CST)
{
REAL_VALUE_TYPE r0, r1;
r0 = TREE_REAL_CST (TREE_OPERAND (arg0, 1));
r1 = TREE_REAL_CST (TREE_OPERAND (arg1, 1));
if (!mul0)
real_arithmetic (&r0, RDIV_EXPR, &dconst1, &r0);
if (!mul1)
real_arithmetic (&r1, RDIV_EXPR, &dconst1, &r1);
real_arithmetic (&r0, code, &r0, &r1);
return fold_build2 (MULT_EXPR, type,
TREE_OPERAND (arg0, 0),
build_real (type, r0));
}
return NULL_TREE;
}
/* Return a BIT_FIELD_REF of type TYPE to refer to BITSIZE bits of INNER /* Return a BIT_FIELD_REF of type TYPE to refer to BITSIZE bits of INNER
starting at BITPOS. The field is unsigned if UNSIGNEDP is nonzero. */ starting at BITPOS. The field is unsigned if UNSIGNEDP is nonzero. */
...@@ -7528,6 +7568,12 @@ fold_binary (enum tree_code code, tree type, tree op0, tree op1) ...@@ -7528,6 +7568,12 @@ fold_binary (enum tree_code code, tree type, tree op0, tree op1)
fold_convert (type, tem)); fold_convert (type, tem));
} }
if (flag_unsafe_math_optimizations
&& (TREE_CODE (arg0) == RDIV_EXPR || TREE_CODE (arg0) == MULT_EXPR)
&& (TREE_CODE (arg1) == RDIV_EXPR || TREE_CODE (arg1) == MULT_EXPR)
&& (tem = distribute_real_division (code, type, arg0, arg1)))
return tem;
/* Convert x+x into x*2.0. */ /* Convert x+x into x*2.0. */
if (operand_equal_p (arg0, arg1, 0) if (operand_equal_p (arg0, arg1, 0)
&& SCALAR_FLOAT_TYPE_P (type)) && SCALAR_FLOAT_TYPE_P (type))
...@@ -7925,6 +7971,12 @@ fold_binary (enum tree_code code, tree type, tree op0, tree op1) ...@@ -7925,6 +7971,12 @@ fold_binary (enum tree_code code, tree type, tree op0, tree op1)
return fold_convert (type, fold (tem)); return fold_convert (type, fold (tem));
} }
if (flag_unsafe_math_optimizations
&& (TREE_CODE (arg0) == RDIV_EXPR || TREE_CODE (arg0) == MULT_EXPR)
&& (TREE_CODE (arg1) == RDIV_EXPR || TREE_CODE (arg1) == MULT_EXPR)
&& (tem = distribute_real_division (code, type, arg0, arg1)))
return tem;
if (TREE_CODE (arg0) == MULT_EXPR if (TREE_CODE (arg0) == MULT_EXPR
&& TREE_CODE (arg1) == MULT_EXPR && TREE_CODE (arg1) == MULT_EXPR
&& (!FLOAT_TYPE_P (type) || flag_unsafe_math_optimizations)) && (!FLOAT_TYPE_P (type) || flag_unsafe_math_optimizations))
......
2005-05-17 Paolo Bonzini <bonzini@gnu.org>
* gcc.dg/fold-div-1.c, gcc.dg/recip-1.c, gcc.dg/recip-2.c: New.
2005-05-17 Richard Guenther <rguenth@gcc.gnu.org> 2005-05-17 Richard Guenther <rguenth@gcc.gnu.org>
PR middle-end/21595 PR middle-end/21595
......
/* { dg-do compile } */
/* { dg-options "-funsafe-math-optimizations -fdump-tree-gimple" } */
float f(float x)
{
return x/2 + x/3;
}
float g(float x)
{
return 2/x + 3/x;
}
float h(float x)
{
return x/2 - x/3;
}
float i(float x)
{
return 2/x - 3/x;
}
/* f and h should be turned into multiplications,
the divisions in g and i should be grouped together. */
/* { dg-final { scan-tree-dump-times " \\* " 2 "gimple" } } */
/* { dg-final { scan-tree-dump-times " / " 2 "gimple" } } */
/* { dg-final { cleanup-tree-dump "gimple" } } */
/* { dg-do compile } */
/* { dg-options "-O1 -funsafe-math-optimizations -fdump-tree-recip" } */
float e(float *x, float *y, float *z)
{
float m = __builtin_sqrt (*x * *x + *y * *y + *z * *z);
*x /= m;
*y /= m;
*z /= m;
}
/* Look for only one division. */
/* { dg-final { scan-tree-dump-times "= .* /" 1 "recip" } } */
/* { dg-final { cleanup-tree-dump "recip" } } */
/* { dg-do compile } */
/* { dg-options "-O1 -funsafe-math-optimizations -fdump-tree-recip" } */
float e(float a, float b, float c, float d, float e, float f)
{
if (a < b)
{
a = a + b;
c = c + d;
}
/* The PHI nodes for these divisions should be combined. */
e = e / a;
f = f / a;
a = a / c;
b = b / c;
return a + b + e + f;
}
/* { dg-final { scan-tree-dump-times " / " 2 "recip" } } */
/* { dg-final { cleanup-tree-dump "recip" } } */
...@@ -383,6 +383,7 @@ init_tree_optimization_passes (void) ...@@ -383,6 +383,7 @@ init_tree_optimization_passes (void)
we add may_alias right after fold builtins we add may_alias right after fold builtins
which can create arbitrary GIMPLE. */ which can create arbitrary GIMPLE. */
NEXT_PASS (pass_may_alias); NEXT_PASS (pass_may_alias);
NEXT_PASS (pass_cse_reciprocals);
NEXT_PASS (pass_split_crit_edges); NEXT_PASS (pass_split_crit_edges);
NEXT_PASS (pass_pre); NEXT_PASS (pass_pre);
NEXT_PASS (pass_sink_code); NEXT_PASS (pass_sink_code);
......
...@@ -196,6 +196,7 @@ extern struct tree_opt_pass pass_fold_builtins; ...@@ -196,6 +196,7 @@ extern struct tree_opt_pass pass_fold_builtins;
extern struct tree_opt_pass pass_stdarg; extern struct tree_opt_pass pass_stdarg;
extern struct tree_opt_pass pass_early_warn_uninitialized; extern struct tree_opt_pass pass_early_warn_uninitialized;
extern struct tree_opt_pass pass_late_warn_uninitialized; extern struct tree_opt_pass pass_late_warn_uninitialized;
extern struct tree_opt_pass pass_cse_reciprocals;
extern struct tree_opt_pass pass_warn_function_return; extern struct tree_opt_pass pass_warn_function_return;
extern struct tree_opt_pass pass_warn_function_noreturn; extern struct tree_opt_pass pass_warn_function_noreturn;
extern struct tree_opt_pass pass_phiopt; extern struct tree_opt_pass pass_phiopt;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment