Commit 19cf3a36 by Jackson Woodruff Committed by Wilco Dijkstra

Factor out division by squares

This patch implements the some of the division optimizations discussed in
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71026.

The division reciprocal optimization now handles divisions by squares:

     x / (y * y) -> x  * (1 / y) * (1 / y)

This requires at least one more division by y before it triggers - the
3 divisions of (1/ y) are then CSEd into a single division.  Overall
this changes 1 division into 1 multiply, which is generally much faster.


2017-11-24  Jackson Woodruff  <jackson.woodruff@arm.com>

    gcc/
	PR tree-optimization/71026
	* tree-ssa-math-opts (is_division_by_square, is_square_of): New.
	(insert_reciprocals): Change to insert reciprocals before a division
	by a square and to insert the square of a reciprocal.
	(execute_cse_reciprocals_1): Change to consider division by a square.
	(register_division_in): Add importance parameter.

    testsuite/
	PR tree-optimization/71026
	* gfortran.dg/extract_recip_1.f: New test.
	* gcc.dg/extract_recip_3.c: New test.
	* gcc.dg/extract_recip_4.c: New test.

From-SVN: r255141
parent 15b6695a
2017-11-24 Jackson Woodruff <jackson.woodruff@arm.com>
PR tree-optimization/71026
* tree-ssa-math-opts (is_division_by_square, is_square_of): New.
(insert_reciprocals): Change to insert reciprocals before a division
by a square and to insert the square of a reciprocal.
(execute_cse_reciprocals_1): Change to consider division by a square.
(register_division_in): Add importance parameter.
2017-11-24 Richard Biener <rguenther@suse.de> 2017-11-24 Richard Biener <rguenther@suse.de>
PR tree-optimization/82402 PR tree-optimization/82402
2017-11-24 Jackson Woodruff <jackson.woodruff@arm.com>
PR tree-optimization/71026
* gfortran.dg/extract_recip_1.f: New test.
* gcc.dg/extract_recip_3.c: New test.
* gcc.dg/extract_recip_4.c: New test.
2017-11-24 Richard Biener <rguenther@suse.de> 2017-11-24 Richard Biener <rguenther@suse.de>
PR tree-optimization/82402 PR tree-optimization/82402
......
/* { dg-do compile } */
/* { dg-options "-Ofast -fdump-tree-optimized-raw" } */
float
extract_square (float *a, float *b, float x, float y)
{
*a = 3 / (y * y);
*b = 5 / (y * y);
return x / (y * y);
}
/* Don't expect the 'powmult' (calculation of y * y)
to be deleted until a later pass, so look for one
more multiplication than strictly necessary. */
float
extract_recip (float *a, float *b, float x, float y, float z)
{
*a = 7 / y;
*b = x / (y * y);
return z / y;
}
/* 4 multiplications in 'extract_square', and 4 in 'extract_recip'. */
/* { dg-final { scan-tree-dump-times "mult_expr" 8 "optimized" } } */
/* 1 division in 'extract_square', 1 division in 'extract_recip'. */
/* { dg-final { scan-tree-dump-times "rdiv_expr" 2 "optimized" } } */
/* { dg-do compile } */
/* { dg-options "-Ofast -fdump-tree-optimized-raw" } */
/* Don't expect any of these divisions to be extracted. */
double f (double x, int p)
{
if (p > 0)
{
return 1.0/(x * x);
}
if (p > -1)
{
return x * x * x;
}
return 1.0 /(x);
}
/* Expect a reciprocal to be extracted here. */
double g (double *a, double x, double y)
{
*a = 3 / y;
double k = x / (y * y);
if (y * y == 2.0)
return k + 1 / y;
else
return k - 1 / y;
}
/* Expect 2 divisions in 'f' and 1 in 'g'. */
/* { dg-final { scan-tree-dump-times "rdiv_expr" 3 "optimized" } } */
/* Expect 3 multiplications in 'f' and 4 in 'g'. */
/* { dg-final { scan-tree-dump-times "mult_expr" 7 "optimized" } } */
! { dg-do compile }
! { dg-options "-Ofast -fdump-tree-optimized-raw" }
SUBROUTINE F(N,X,Y,Z,A,B)
DIMENSION X(4,4), Y(4), Z(4)
REAL, INTENT(INOUT) :: A, B
A = 1 / (Y(N)*Y(N))
DO I = 1, NV
X(I, I) = 1 + X(I, I)
ENDDO
Z(1) = B / Y(N)
Z(2) = N / Y(N)
RETURN
END
! { dg-final { scan-tree-dump-times "rdiv_expr" 1 "optimized" } }
...@@ -127,6 +127,10 @@ struct occurrence { ...@@ -127,6 +127,10 @@ struct occurrence {
inserted in BB. */ inserted in BB. */
tree recip_def; tree recip_def;
/* If non-NULL, the SSA_NAME holding the definition for a squared
reciprocal inserted in BB. */
tree square_recip_def;
/* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
was inserted in BB. */ was inserted in BB. */
gimple *recip_def_stmt; gimple *recip_def_stmt;
...@@ -270,10 +274,14 @@ insert_bb (struct occurrence *new_occ, basic_block idom, ...@@ -270,10 +274,14 @@ insert_bb (struct occurrence *new_occ, basic_block idom,
*p_head = new_occ; *p_head = new_occ;
} }
/* Register that we found a division in BB. */ /* Register that we found a division in BB.
IMPORTANCE is a measure of how much weighting to give
that division. Use IMPORTANCE = 2 to register a single
division. If the division is going to be found multiple
times use 1 (as it is with squares). */
static inline void static inline void
register_division_in (basic_block bb) register_division_in (basic_block bb, int importance)
{ {
struct occurrence *occ; struct occurrence *occ;
...@@ -285,7 +293,7 @@ register_division_in (basic_block bb) ...@@ -285,7 +293,7 @@ register_division_in (basic_block bb)
} }
occ->bb_has_division = true; occ->bb_has_division = true;
occ->num_divisions++; occ->num_divisions += importance;
} }
...@@ -328,6 +336,39 @@ is_division_by (gimple *use_stmt, tree def) ...@@ -328,6 +336,39 @@ is_division_by (gimple *use_stmt, tree def)
&& gimple_assign_rhs1 (use_stmt) != def; && gimple_assign_rhs1 (use_stmt) != def;
} }
/* Return whether USE_STMT is DEF * DEF. */
static inline bool
is_square_of (gimple *use_stmt, tree def)
{
if (gimple_code (use_stmt) == GIMPLE_ASSIGN
&& gimple_assign_rhs_code (use_stmt) == MULT_EXPR)
{
tree op0 = gimple_assign_rhs1 (use_stmt);
tree op1 = gimple_assign_rhs2 (use_stmt);
return op0 == op1 && op0 == def;
}
return 0;
}
/* Return whether USE_STMT is a floating-point division by
DEF * DEF. */
static inline bool
is_division_by_square (gimple *use_stmt, tree def)
{
if (gimple_code (use_stmt) == GIMPLE_ASSIGN
&& gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
&& gimple_assign_rhs1 (use_stmt) != gimple_assign_rhs2 (use_stmt))
{
tree denominator = gimple_assign_rhs2 (use_stmt);
if (TREE_CODE (denominator) == SSA_NAME)
{
return is_square_of (SSA_NAME_DEF_STMT (denominator), def);
}
}
return 0;
}
/* Walk the subset of the dominator tree rooted at OCC, setting the /* Walk the subset of the dominator tree rooted at OCC, setting the
RECIP_DEF field to a definition of 1.0 / DEF that can be used in RECIP_DEF field to a definition of 1.0 / DEF that can be used in
the given basic block. The field may be left NULL, of course, the given basic block. The field may be left NULL, of course,
...@@ -335,20 +376,27 @@ is_division_by (gimple *use_stmt, tree def) ...@@ -335,20 +376,27 @@ is_division_by (gimple *use_stmt, tree def)
DEF_BSI is an iterator pointing at the statement defining DEF. DEF_BSI is an iterator pointing at the statement defining DEF.
If RECIP_DEF is set, a dominator already has a computation that can If RECIP_DEF is set, a dominator already has a computation that can
be used. */ be used.
If should_insert_square_recip is set, then this also inserts
the square of the reciprocal immediately after the definition
of the reciprocal. */
static void static void
insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ, insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
tree def, tree recip_def, int threshold) tree def, tree recip_def, tree square_recip_def,
int should_insert_square_recip, int threshold)
{ {
tree type; tree type;
gassign *new_stmt; gassign *new_stmt, *new_square_stmt;
gimple_stmt_iterator gsi; gimple_stmt_iterator gsi;
struct occurrence *occ_child; struct occurrence *occ_child;
if (!recip_def if (!recip_def
&& (occ->bb_has_division || !flag_trapping_math) && (occ->bb_has_division || !flag_trapping_math)
&& occ->num_divisions >= threshold) /* Divide by two as all divisions are counted twice in
the costing loop. */
&& occ->num_divisions / 2 >= threshold)
{ {
/* Make a variable with the replacement and substitute it. */ /* Make a variable with the replacement and substitute it. */
type = TREE_TYPE (def); type = TREE_TYPE (def);
...@@ -356,11 +404,20 @@ insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ, ...@@ -356,11 +404,20 @@ insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
new_stmt = gimple_build_assign (recip_def, RDIV_EXPR, new_stmt = gimple_build_assign (recip_def, RDIV_EXPR,
build_one_cst (type), def); build_one_cst (type), def);
if (should_insert_square_recip)
{
square_recip_def = create_tmp_reg (type, "powmult_reciptmp");
new_square_stmt = gimple_build_assign (square_recip_def, MULT_EXPR,
recip_def, recip_def);
}
if (occ->bb_has_division) if (occ->bb_has_division)
{ {
/* Case 1: insert before an existing division. */ /* Case 1: insert before an existing division. */
gsi = gsi_after_labels (occ->bb); gsi = gsi_after_labels (occ->bb);
while (!gsi_end_p (gsi) && !is_division_by (gsi_stmt (gsi), def)) while (!gsi_end_p (gsi)
&& (!is_division_by (gsi_stmt (gsi), def))
&& (!is_division_by_square (gsi_stmt (gsi), def)))
gsi_next (&gsi); gsi_next (&gsi);
gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
...@@ -371,6 +428,7 @@ insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ, ...@@ -371,6 +428,7 @@ insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
never happen if the definition statement can throw, because in never happen if the definition statement can throw, because in
that case the sole successor of the statement's basic block will that case the sole successor of the statement's basic block will
dominate all the uses as well. */ dominate all the uses as well. */
gsi = *def_gsi;
gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT); gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
} }
else else
...@@ -380,14 +438,43 @@ insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ, ...@@ -380,14 +438,43 @@ insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT); gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
} }
/* Regardless of which case the reciprocal as inserted in,
we insert the square immediately after the reciprocal. */
if (should_insert_square_recip)
gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
reciprocal_stats.rdivs_inserted++; reciprocal_stats.rdivs_inserted++;
occ->recip_def_stmt = new_stmt; occ->recip_def_stmt = new_stmt;
} }
occ->recip_def = recip_def; occ->recip_def = recip_def;
occ->square_recip_def = square_recip_def;
for (occ_child = occ->children; occ_child; occ_child = occ_child->next) for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
insert_reciprocals (def_gsi, occ_child, def, recip_def, threshold); insert_reciprocals (def_gsi, occ_child, def, recip_def,
square_recip_def, should_insert_square_recip,
threshold);
}
/* Replace occurrences of expr / (x * x) with expr * ((1 / x) * (1 / x)).
Take as argument the use for (x * x). */
static inline void
replace_reciprocal_squares (use_operand_p use_p)
{
gimple *use_stmt = USE_STMT (use_p);
basic_block bb = gimple_bb (use_stmt);
struct occurrence *occ = (struct occurrence *) bb->aux;
if (optimize_bb_for_speed_p (bb) && occ->square_recip_def
&& occ->recip_def)
{
gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
gimple_assign_set_rhs2 (use_stmt, occ->square_recip_def);
SET_USE (use_p, occ->square_recip_def);
fold_stmt_inplace (&gsi);
update_stmt (use_stmt);
}
} }
...@@ -448,32 +535,85 @@ free_bb (struct occurrence *occ) ...@@ -448,32 +535,85 @@ free_bb (struct occurrence *occ)
static void static void
execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def) execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
{ {
use_operand_p use_p; use_operand_p use_p, square_use_p;
imm_use_iterator use_iter; imm_use_iterator use_iter, square_use_iter;
tree square_def;
struct occurrence *occ; struct occurrence *occ;
int count = 0, threshold; int count = 0;
int threshold;
int square_recip_count = 0;
int sqrt_recip_count = 0;
gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && is_gimple_reg (def)); gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && is_gimple_reg (def));
threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
/* If this is a square (x * x), we should check whether there are any
enough divisions by x on it's own to warrant waiting for that pass. */
if (TREE_CODE (def) == SSA_NAME)
{
gimple *def_stmt = SSA_NAME_DEF_STMT (def);
if (is_gimple_assign (def_stmt)
&& gimple_assign_rhs_code (def_stmt) == MULT_EXPR
&& gimple_assign_rhs1 (def_stmt) == gimple_assign_rhs2 (def_stmt))
{
/* This statement is a square of something. We should take this
in to account, as it may be more profitable to not extract
the reciprocal here. */
tree op0 = gimple_assign_rhs1 (def_stmt);
FOR_EACH_IMM_USE_FAST (use_p, use_iter, op0)
{
gimple *use_stmt = USE_STMT (use_p);
if (is_division_by (use_stmt, op0))
sqrt_recip_count ++;
}
}
}
FOR_EACH_IMM_USE_FAST (use_p, use_iter, def) FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
{ {
gimple *use_stmt = USE_STMT (use_p); gimple *use_stmt = USE_STMT (use_p);
if (is_division_by (use_stmt, def)) if (is_division_by (use_stmt, def))
{ {
register_division_in (gimple_bb (use_stmt)); register_division_in (gimple_bb (use_stmt), 2);
count++; count++;
} }
if (is_square_of (use_stmt, def))
{
square_def = gimple_assign_lhs (use_stmt);
FOR_EACH_IMM_USE_FAST (square_use_p, square_use_iter, square_def)
{
gimple *square_use_stmt = USE_STMT (square_use_p);
if (is_division_by (square_use_stmt, square_def))
{
/* Halve the relative importance as this is called twice
for each division by a square. */
register_division_in (gimple_bb (square_use_stmt), 1);
square_recip_count ++;
}
}
}
} }
/* Square reciprocals will have been counted twice. */
square_recip_count /= 2;
if (sqrt_recip_count > square_recip_count)
/* It will be more profitable to extract a 1 / x expression later,
so it is not worth attempting to extract 1 / (x * x) now. */
return;
/* Do the expensive part only if we can hope to optimize something. */ /* Do the expensive part only if we can hope to optimize something. */
threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def))); if (count + square_recip_count >= threshold
if (count >= threshold) && count >= 1)
{ {
gimple *use_stmt; gimple *use_stmt;
for (occ = occ_head; occ; occ = occ->next) for (occ = occ_head; occ; occ = occ->next)
{ {
compute_merit (occ); compute_merit (occ);
insert_reciprocals (def_gsi, occ, def, NULL, threshold); insert_reciprocals (def_gsi, occ, def, NULL, NULL,
square_recip_count, threshold);
} }
FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def) FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
...@@ -483,6 +623,27 @@ execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def) ...@@ -483,6 +623,27 @@ execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter) FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
replace_reciprocal (use_p); replace_reciprocal (use_p);
} }
else if (square_recip_count > 0
&& is_square_of (use_stmt, def))
{
FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
{
/* Find all uses of the square that are divisions and
* replace them by multiplications with the inverse. */
imm_use_iterator square_iterator;
gimple *powmult_use_stmt = USE_STMT (use_p);
tree powmult_def_name = gimple_assign_lhs (powmult_use_stmt);
FOR_EACH_IMM_USE_STMT (powmult_use_stmt,
square_iterator, powmult_def_name)
FOR_EACH_IMM_USE_ON_STMT (square_use_p, square_iterator)
{
gimple *powmult_use_stmt = USE_STMT (square_use_p);
if (is_division_by (powmult_use_stmt, powmult_def_name))
replace_reciprocal_squares (square_use_p);
}
}
}
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment