Commit 19cf3a36 by Jackson Woodruff Committed by Wilco Dijkstra

Factor out division by squares

This patch implements the some of the division optimizations discussed in
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71026.

The division reciprocal optimization now handles divisions by squares:

     x / (y * y) -> x  * (1 / y) * (1 / y)

This requires at least one more division by y before it triggers - the
3 divisions of (1/ y) are then CSEd into a single division.  Overall
this changes 1 division into 1 multiply, which is generally much faster.


2017-11-24  Jackson Woodruff  <jackson.woodruff@arm.com>

    gcc/
	PR tree-optimization/71026
	* tree-ssa-math-opts (is_division_by_square, is_square_of): New.
	(insert_reciprocals): Change to insert reciprocals before a division
	by a square and to insert the square of a reciprocal.
	(execute_cse_reciprocals_1): Change to consider division by a square.
	(register_division_in): Add importance parameter.

    testsuite/
	PR tree-optimization/71026
	* gfortran.dg/extract_recip_1.f: New test.
	* gcc.dg/extract_recip_3.c: New test.
	* gcc.dg/extract_recip_4.c: New test.

From-SVN: r255141
parent 15b6695a
2017-11-24 Jackson Woodruff <jackson.woodruff@arm.com>
PR tree-optimization/71026
* tree-ssa-math-opts (is_division_by_square, is_square_of): New.
(insert_reciprocals): Change to insert reciprocals before a division
by a square and to insert the square of a reciprocal.
(execute_cse_reciprocals_1): Change to consider division by a square.
(register_division_in): Add importance parameter.
2017-11-24 Richard Biener <rguenther@suse.de>
PR tree-optimization/82402
2017-11-24 Jackson Woodruff <jackson.woodruff@arm.com>
PR tree-optimization/71026
* gfortran.dg/extract_recip_1.f: New test.
* gcc.dg/extract_recip_3.c: New test.
* gcc.dg/extract_recip_4.c: New test.
2017-11-24 Richard Biener <rguenther@suse.de>
PR tree-optimization/82402
......
/* { dg-do compile } */
/* { dg-options "-Ofast -fdump-tree-optimized-raw" } */
float
extract_square (float *a, float *b, float x, float y)
{
*a = 3 / (y * y);
*b = 5 / (y * y);
return x / (y * y);
}
/* Don't expect the 'powmult' (calculation of y * y)
to be deleted until a later pass, so look for one
more multiplication than strictly necessary. */
float
extract_recip (float *a, float *b, float x, float y, float z)
{
*a = 7 / y;
*b = x / (y * y);
return z / y;
}
/* 4 multiplications in 'extract_square', and 4 in 'extract_recip'. */
/* { dg-final { scan-tree-dump-times "mult_expr" 8 "optimized" } } */
/* 1 division in 'extract_square', 1 division in 'extract_recip'. */
/* { dg-final { scan-tree-dump-times "rdiv_expr" 2 "optimized" } } */
/* { dg-do compile } */
/* { dg-options "-Ofast -fdump-tree-optimized-raw" } */
/* Don't expect any of these divisions to be extracted. */
double f (double x, int p)
{
if (p > 0)
{
return 1.0/(x * x);
}
if (p > -1)
{
return x * x * x;
}
return 1.0 /(x);
}
/* Expect a reciprocal to be extracted here. */
double g (double *a, double x, double y)
{
*a = 3 / y;
double k = x / (y * y);
if (y * y == 2.0)
return k + 1 / y;
else
return k - 1 / y;
}
/* Expect 2 divisions in 'f' and 1 in 'g'. */
/* { dg-final { scan-tree-dump-times "rdiv_expr" 3 "optimized" } } */
/* Expect 3 multiplications in 'f' and 4 in 'g'. */
/* { dg-final { scan-tree-dump-times "mult_expr" 7 "optimized" } } */
! { dg-do compile }
! { dg-options "-Ofast -fdump-tree-optimized-raw" }
SUBROUTINE F(N,X,Y,Z,A,B)
DIMENSION X(4,4), Y(4), Z(4)
REAL, INTENT(INOUT) :: A, B
A = 1 / (Y(N)*Y(N))
DO I = 1, NV
X(I, I) = 1 + X(I, I)
ENDDO
Z(1) = B / Y(N)
Z(2) = N / Y(N)
RETURN
END
! { dg-final { scan-tree-dump-times "rdiv_expr" 1 "optimized" } }
......@@ -127,6 +127,10 @@ struct occurrence {
inserted in BB. */
tree recip_def;
/* If non-NULL, the SSA_NAME holding the definition for a squared
reciprocal inserted in BB. */
tree square_recip_def;
/* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
was inserted in BB. */
gimple *recip_def_stmt;
......@@ -270,10 +274,14 @@ insert_bb (struct occurrence *new_occ, basic_block idom,
*p_head = new_occ;
}
/* Register that we found a division in BB. */
/* Register that we found a division in BB.
IMPORTANCE is a measure of how much weighting to give
that division. Use IMPORTANCE = 2 to register a single
division. If the division is going to be found multiple
times use 1 (as it is with squares). */
static inline void
register_division_in (basic_block bb)
register_division_in (basic_block bb, int importance)
{
struct occurrence *occ;
......@@ -285,7 +293,7 @@ register_division_in (basic_block bb)
}
occ->bb_has_division = true;
occ->num_divisions++;
occ->num_divisions += importance;
}
......@@ -328,6 +336,39 @@ is_division_by (gimple *use_stmt, tree def)
&& gimple_assign_rhs1 (use_stmt) != def;
}
/* Return whether USE_STMT is DEF * DEF. */
static inline bool
is_square_of (gimple *use_stmt, tree def)
{
if (gimple_code (use_stmt) == GIMPLE_ASSIGN
&& gimple_assign_rhs_code (use_stmt) == MULT_EXPR)
{
tree op0 = gimple_assign_rhs1 (use_stmt);
tree op1 = gimple_assign_rhs2 (use_stmt);
return op0 == op1 && op0 == def;
}
return 0;
}
/* Return whether USE_STMT is a floating-point division by
DEF * DEF. */
static inline bool
is_division_by_square (gimple *use_stmt, tree def)
{
if (gimple_code (use_stmt) == GIMPLE_ASSIGN
&& gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
&& gimple_assign_rhs1 (use_stmt) != gimple_assign_rhs2 (use_stmt))
{
tree denominator = gimple_assign_rhs2 (use_stmt);
if (TREE_CODE (denominator) == SSA_NAME)
{
return is_square_of (SSA_NAME_DEF_STMT (denominator), def);
}
}
return 0;
}
/* Walk the subset of the dominator tree rooted at OCC, setting the
RECIP_DEF field to a definition of 1.0 / DEF that can be used in
the given basic block. The field may be left NULL, of course,
......@@ -335,20 +376,27 @@ is_division_by (gimple *use_stmt, tree def)
DEF_BSI is an iterator pointing at the statement defining DEF.
If RECIP_DEF is set, a dominator already has a computation that can
be used. */
be used.
If should_insert_square_recip is set, then this also inserts
the square of the reciprocal immediately after the definition
of the reciprocal. */
static void
insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
tree def, tree recip_def, int threshold)
tree def, tree recip_def, tree square_recip_def,
int should_insert_square_recip, int threshold)
{
tree type;
gassign *new_stmt;
gassign *new_stmt, *new_square_stmt;
gimple_stmt_iterator gsi;
struct occurrence *occ_child;
if (!recip_def
&& (occ->bb_has_division || !flag_trapping_math)
&& occ->num_divisions >= threshold)
/* Divide by two as all divisions are counted twice in
the costing loop. */
&& occ->num_divisions / 2 >= threshold)
{
/* Make a variable with the replacement and substitute it. */
type = TREE_TYPE (def);
......@@ -356,11 +404,20 @@ insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
new_stmt = gimple_build_assign (recip_def, RDIV_EXPR,
build_one_cst (type), def);
if (should_insert_square_recip)
{
square_recip_def = create_tmp_reg (type, "powmult_reciptmp");
new_square_stmt = gimple_build_assign (square_recip_def, MULT_EXPR,
recip_def, recip_def);
}
if (occ->bb_has_division)
{
/* Case 1: insert before an existing division. */
gsi = gsi_after_labels (occ->bb);
while (!gsi_end_p (gsi) && !is_division_by (gsi_stmt (gsi), def))
while (!gsi_end_p (gsi)
&& (!is_division_by (gsi_stmt (gsi), def))
&& (!is_division_by_square (gsi_stmt (gsi), def)))
gsi_next (&gsi);
gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
......@@ -371,6 +428,7 @@ insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
never happen if the definition statement can throw, because in
that case the sole successor of the statement's basic block will
dominate all the uses as well. */
gsi = *def_gsi;
gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
}
else
......@@ -380,14 +438,43 @@ insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
}
/* Regardless of which case the reciprocal as inserted in,
we insert the square immediately after the reciprocal. */
if (should_insert_square_recip)
gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
reciprocal_stats.rdivs_inserted++;
occ->recip_def_stmt = new_stmt;
}
occ->recip_def = recip_def;
occ->square_recip_def = square_recip_def;
for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
insert_reciprocals (def_gsi, occ_child, def, recip_def, threshold);
insert_reciprocals (def_gsi, occ_child, def, recip_def,
square_recip_def, should_insert_square_recip,
threshold);
}
/* Replace occurrences of expr / (x * x) with expr * ((1 / x) * (1 / x)).
Take as argument the use for (x * x). */
static inline void
replace_reciprocal_squares (use_operand_p use_p)
{
gimple *use_stmt = USE_STMT (use_p);
basic_block bb = gimple_bb (use_stmt);
struct occurrence *occ = (struct occurrence *) bb->aux;
if (optimize_bb_for_speed_p (bb) && occ->square_recip_def
&& occ->recip_def)
{
gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
gimple_assign_set_rhs2 (use_stmt, occ->square_recip_def);
SET_USE (use_p, occ->square_recip_def);
fold_stmt_inplace (&gsi);
update_stmt (use_stmt);
}
}
......@@ -448,32 +535,85 @@ free_bb (struct occurrence *occ)
static void
execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
{
use_operand_p use_p;
imm_use_iterator use_iter;
use_operand_p use_p, square_use_p;
imm_use_iterator use_iter, square_use_iter;
tree square_def;
struct occurrence *occ;
int count = 0, threshold;
int count = 0;
int threshold;
int square_recip_count = 0;
int sqrt_recip_count = 0;
gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && is_gimple_reg (def));
threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
/* If this is a square (x * x), we should check whether there are any
enough divisions by x on it's own to warrant waiting for that pass. */
if (TREE_CODE (def) == SSA_NAME)
{
gimple *def_stmt = SSA_NAME_DEF_STMT (def);
if (is_gimple_assign (def_stmt)
&& gimple_assign_rhs_code (def_stmt) == MULT_EXPR
&& gimple_assign_rhs1 (def_stmt) == gimple_assign_rhs2 (def_stmt))
{
/* This statement is a square of something. We should take this
in to account, as it may be more profitable to not extract
the reciprocal here. */
tree op0 = gimple_assign_rhs1 (def_stmt);
FOR_EACH_IMM_USE_FAST (use_p, use_iter, op0)
{
gimple *use_stmt = USE_STMT (use_p);
if (is_division_by (use_stmt, op0))
sqrt_recip_count ++;
}
}
}
FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
{
gimple *use_stmt = USE_STMT (use_p);
if (is_division_by (use_stmt, def))
{
register_division_in (gimple_bb (use_stmt));
register_division_in (gimple_bb (use_stmt), 2);
count++;
}
if (is_square_of (use_stmt, def))
{
square_def = gimple_assign_lhs (use_stmt);
FOR_EACH_IMM_USE_FAST (square_use_p, square_use_iter, square_def)
{
gimple *square_use_stmt = USE_STMT (square_use_p);
if (is_division_by (square_use_stmt, square_def))
{
/* Halve the relative importance as this is called twice
for each division by a square. */
register_division_in (gimple_bb (square_use_stmt), 1);
square_recip_count ++;
}
}
}
}
/* Square reciprocals will have been counted twice. */
square_recip_count /= 2;
if (sqrt_recip_count > square_recip_count)
/* It will be more profitable to extract a 1 / x expression later,
so it is not worth attempting to extract 1 / (x * x) now. */
return;
/* Do the expensive part only if we can hope to optimize something. */
threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
if (count >= threshold)
if (count + square_recip_count >= threshold
&& count >= 1)
{
gimple *use_stmt;
for (occ = occ_head; occ; occ = occ->next)
{
compute_merit (occ);
insert_reciprocals (def_gsi, occ, def, NULL, threshold);
insert_reciprocals (def_gsi, occ, def, NULL, NULL,
square_recip_count, threshold);
}
FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
......@@ -483,6 +623,27 @@ execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
replace_reciprocal (use_p);
}
else if (square_recip_count > 0
&& is_square_of (use_stmt, def))
{
FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
{
/* Find all uses of the square that are divisions and
* replace them by multiplications with the inverse. */
imm_use_iterator square_iterator;
gimple *powmult_use_stmt = USE_STMT (use_p);
tree powmult_def_name = gimple_assign_lhs (powmult_use_stmt);
FOR_EACH_IMM_USE_STMT (powmult_use_stmt,
square_iterator, powmult_def_name)
FOR_EACH_IMM_USE_ON_STMT (square_use_p, square_iterator)
{
gimple *powmult_use_stmt = USE_STMT (square_use_p);
if (is_division_by (powmult_use_stmt, powmult_def_name))
replace_reciprocal_squares (square_use_p);
}
}
}
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment