Commit c1b3d827 by Richard Sandiford Committed by Richard Sandiford

Handle IFN_COND_MUL in tree-ssa-math-opts.c

This patch extends the FMA handling in tree-ssa-math-opts.c so
that it can cope with conditional multiplications as well as
unconditional multiplications.  The addition or subtraction must then
have the same condition as the multiplication (at least for now).

E.g. we can currently fold:

  (IFN_COND_ADD cond (mul x y) z fallback)
    -> (IFN_COND_FMA cond x y z fallback)

This patch also allows:

  (IFN_COND_ADD cond (IFN_COND_MUL cond x y <whatever>) z fallback)
    -> (IFN_COND_FMA cond x y z fallback)

2019-07-30  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
	* tree-ssa-math-opts.c (convert_mult_to_fma): Add a mul_cond
	parameter.  When nonnull, make sure that the addition or subtraction
	has the same condition.
	(math_opts_dom_walker::after_dom_children): Try convert_mult_to_fma
	for CFN_COND_MUL too.

gcc/testsuite/
	* gcc.dg/vect/vect-cond-arith-7.c: New test.

From-SVN: r273905
parent 8c955a4b
2019-07-30 Richard Sandiford <richard.sandiford@arm.com>
* tree-ssa-math-opts.c (convert_mult_to_fma): Add a mul_cond
parameter. When nonnull, make sure that the addition or subtraction
has the same condition.
(math_opts_dom_walker::after_dom_children): Try convert_mult_to_fma
for CFN_COND_MUL too.
2019-07-30 Richard Biener <rguenther@suse.de> 2019-07-30 Richard Biener <rguenther@suse.de>
PR tree-optimization/91291 PR tree-optimization/91291
......
2019-07-30 Richard Sandiford <richard.sandiford@arm.com>
* gcc.dg/vect/vect-cond-arith-7.c: New test.
2019-07-30 Jakub Jelinek <jakub@redhat.com> 2019-07-30 Jakub Jelinek <jakub@redhat.com>
PR middle-end/91282 PR middle-end/91282
......
/* { dg-require-effective-target scalar_all_fma } */
/* { dg-additional-options "-fdump-tree-optimized -ffp-contract=fast" } */
#include "tree-vect.h"
#define N (VECTOR_BITS * 11 / 64 + 3)
#define DEF(INV) \
void __attribute__ ((noipa)) \
f_##INV (double *restrict a, double *restrict b, \
double *restrict c, double *restrict d) \
{ \
for (int i = 0; i < N; ++i) \
{ \
double mb = (INV & 1 ? -b[i] : b[i]); \
double mc = c[i]; \
double md = (INV & 2 ? -d[i] : d[i]); \
a[i] = b[i] < 10 ? mb * mc + md : 10.0; \
} \
}
#define TEST(INV) \
{ \
f_##INV (a, b, c, d); \
for (int i = 0; i < N; ++i) \
{ \
double mb = (INV & 1 ? -b[i] : b[i]); \
double mc = c[i]; \
double md = (INV & 2 ? -d[i] : d[i]); \
double fma = __builtin_fma (mb, mc, md); \
if (a[i] != (i % 17 < 10 ? fma : 10.0)) \
__builtin_abort (); \
asm volatile ("" ::: "memory"); \
} \
}
#define FOR_EACH_INV(T) \
T (0) T (1) T (2) T (3)
FOR_EACH_INV (DEF)
int
main (void)
{
double a[N], b[N], c[N], d[N];
for (int i = 0; i < N; ++i)
{
b[i] = i % 17;
c[i] = i % 9 + 11;
d[i] = i % 13 + 14;
asm volatile ("" ::: "memory");
}
FOR_EACH_INV (TEST)
return 0;
}
/* { dg-final { scan-tree-dump-times { = \.COND_FMA } 1 "optimized" { target vect_double_cond_arith } } } */
/* { dg-final { scan-tree-dump-times { = \.COND_FMS } 1 "optimized" { target vect_double_cond_arith } } } */
/* { dg-final { scan-tree-dump-times { = \.COND_FNMA } 1 "optimized" { target vect_double_cond_arith } } } */
/* { dg-final { scan-tree-dump-times { = \.COND_FNMS } 1 "optimized" { target vect_double_cond_arith } } } */
...@@ -3044,6 +3044,8 @@ last_fma_candidate_feeds_initial_phi (fma_deferring_state *state, ...@@ -3044,6 +3044,8 @@ last_fma_candidate_feeds_initial_phi (fma_deferring_state *state,
/* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2 /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
with uses in additions and subtractions to form fused multiply-add with uses in additions and subtractions to form fused multiply-add
operations. Returns true if successful and MUL_STMT should be removed. operations. Returns true if successful and MUL_STMT should be removed.
If MUL_COND is nonnull, the multiplication in MUL_STMT is conditional
on MUL_COND, otherwise it is unconditional.
If STATE indicates that we are deferring FMA transformation, that means If STATE indicates that we are deferring FMA transformation, that means
that we do not produce FMAs for basic blocks which look like: that we do not produce FMAs for basic blocks which look like:
...@@ -3060,7 +3062,7 @@ last_fma_candidate_feeds_initial_phi (fma_deferring_state *state, ...@@ -3060,7 +3062,7 @@ last_fma_candidate_feeds_initial_phi (fma_deferring_state *state,
static bool static bool
convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2, convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2,
fma_deferring_state *state) fma_deferring_state *state, tree mul_cond = NULL_TREE)
{ {
tree mul_result = gimple_get_lhs (mul_stmt); tree mul_result = gimple_get_lhs (mul_stmt);
tree type = TREE_TYPE (mul_result); tree type = TREE_TYPE (mul_result);
...@@ -3174,6 +3176,9 @@ convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2, ...@@ -3174,6 +3176,9 @@ convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2,
return false; return false;
} }
if (mul_cond && cond != mul_cond)
return false;
if (cond) if (cond)
{ {
if (cond == result || else_value == result) if (cond == result || else_value == result)
...@@ -3785,38 +3790,48 @@ math_opts_dom_walker::after_dom_children (basic_block bb) ...@@ -3785,38 +3790,48 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
} }
else if (is_gimple_call (stmt)) else if (is_gimple_call (stmt))
{ {
tree fndecl = gimple_call_fndecl (stmt); switch (gimple_call_combined_fn (stmt))
if (fndecl && gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
{ {
switch (DECL_FUNCTION_CODE (fndecl)) CASE_CFN_POW:
if (gimple_call_lhs (stmt)
&& TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
&& real_equal (&TREE_REAL_CST (gimple_call_arg (stmt, 1)),
&dconst2)
&& convert_mult_to_fma (stmt,
gimple_call_arg (stmt, 0),
gimple_call_arg (stmt, 0),
&fma_state))
{ {
case BUILT_IN_POWF: unlink_stmt_vdef (stmt);
case BUILT_IN_POW: if (gsi_remove (&gsi, true)
case BUILT_IN_POWL: && gimple_purge_dead_eh_edges (bb))
if (gimple_call_lhs (stmt) *m_cfg_changed_p = true;
&& TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST release_defs (stmt);
&& real_equal continue;
(&TREE_REAL_CST (gimple_call_arg (stmt, 1)), }
&dconst2) break;
&& convert_mult_to_fma (stmt,
gimple_call_arg (stmt, 0),
gimple_call_arg (stmt, 0),
&fma_state))
{
unlink_stmt_vdef (stmt);
if (gsi_remove (&gsi, true)
&& gimple_purge_dead_eh_edges (bb))
*m_cfg_changed_p = true;
release_defs (stmt);
continue;
}
break;
default:; case CFN_COND_MUL:
if (convert_mult_to_fma (stmt,
gimple_call_arg (stmt, 1),
gimple_call_arg (stmt, 2),
&fma_state,
gimple_call_arg (stmt, 0)))
{
gsi_remove (&gsi, true);
release_defs (stmt);
continue;
} }
break;
case CFN_LAST:
cancel_fma_deferring (&fma_state);
break;
default:
break;
} }
else
cancel_fma_deferring (&fma_state);
} }
gsi_next (&gsi); gsi_next (&gsi);
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment