Commit b7244ccb by Kyrylo Tkachov Committed by Kyrylo Tkachov

[tree-complex.c] PR tree-optimization/70291: Inline floating-point complex…

[tree-complex.c] PR tree-optimization/70291: Inline floating-point complex multiplication more aggressively

We can improve the performance of complex floating-point multiplications by inlining the expansion a bit more aggressively.
We can inline complex x = a * b as:
x = (ar*br - ai*bi) + i(ar*bi + br*ai);
if (isunordered (__real__ x, __imag__ x))
  x = __muldc3 (a, b); //Or __mulsc3 for single-precision

That way the common case where no NaNs are produced we can avoid the libgcc call and fall back to the
NaN handling stuff in libgcc if either components of the expansion are NaN.

The implementation is done in expand_complex_multiplication in tree-complex.c and the above expansion
will be done when optimising for -O1 and greater and when not optimising for size.
At -O0 and -Os the single call to libgcc will be emitted.

For the code:
__complex double
foo (__complex double a, __complex double b)
{
  return a * b;
}

We will now emit at -O2 for aarch64:
foo:
        fmul    d16, d1, d3
        fmul    d6, d1, d2
        fnmsub  d5, d0, d2, d16
        fmadd   d4, d0, d3, d6
        fcmp    d5, d4
        bvs     .L8
        fmov    d1, d4
        fmov    d0, d5
        ret
.L8:
        stp     x29, x30, [sp, -16]!
        mov     x29, sp
        bl      __muldc3
        ldp     x29, x30, [sp], 16
        ret

Instead of just a branch to __muldc3.

	PR tree-optimization/70291
	* tree-complex.c (expand_complex_libcall): Add type, inplace_p
	arguments.  Change return type to tree.  Emit libcall as a new
	statement rather than replacing existing one when inplace_p is true.
	(expand_complex_multiplication_components): New function.
	(expand_complex_multiplication): Expand floating-point complex
	multiplication using the above.
	(expand_complex_division): Rename inner_type parameter to type.
	Update expand_complex_libcall call-site.
	(expand_complex_operations_1): Update expand_complex_multiplication
	and expand_complex_division call-sites.

	* gcc.dg/complex-6.c: New test.
	* gcc.dg/complex-7.c: Likewise.

From-SVN: r259889
parent 9e3ef542
2018-05-03 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
PR tree-optimization/70291
* tree-complex.c (expand_complex_libcall): Add type, inplace_p
arguments. Change return type to tree. Emit libcall as a new
statement rather than replacing existing one when inplace_p is true.
(expand_complex_multiplication_components): New function.
(expand_complex_multiplication): Expand floating-point complex
multiplication using the above.
(expand_complex_division): Rename inner_type parameter to type.
Update expand_complex_libcall call-site.
(expand_complex_operations_1): Update expand_complex_multiplication
and expand_complex_division call-sites.
2018-05-02 Jakub Jelinek <jakub@redhat.com> 2018-05-02 Jakub Jelinek <jakub@redhat.com>
PR target/85582 PR target/85582
......
2018-05-03 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
PR tree-optimization/70291
* gcc.dg/complex-6.c: New test.
* gcc.dg/complex-7.c: Likewise.
2018-05-03 Richard Biener <rguenther@suse.de> 2018-05-03 Richard Biener <rguenther@suse.de>
PR testsuite/85579 PR testsuite/85579
......
/* PR tree-optimization/70291. */
/* { dg-do compile } */
/* { dg-options "-O2 -fdump-tree-cplxlower" } */
__complex float
foo (__complex float a, __complex float b)
{
return a * b;
}
/* { dg-final { scan-tree-dump-times "unord" 1 "cplxlower1" } } */
/* { dg-final { scan-tree-dump-times "__mulsc3" 1 "cplxlower1" } } */
/* PR tree-optimization/70291. */
/* { dg-do compile } */
/* { dg-options "-O2 -fdump-tree-cplxlower" } */
__complex double
foo (__complex double a, __complex double b)
{
return a * b;
}
/* { dg-final { scan-tree-dump-times "unord" 1 "cplxlower1" } } */
/* { dg-final { scan-tree-dump-times "__muldc3" 1 "cplxlower1" } } */
...@@ -978,22 +978,22 @@ expand_complex_addition (gimple_stmt_iterator *gsi, tree inner_type, ...@@ -978,22 +978,22 @@ expand_complex_addition (gimple_stmt_iterator *gsi, tree inner_type,
} }
/* Expand a complex multiplication or division to a libcall to the c99 /* Expand a complex multiplication or division to a libcall to the c99
compliant routines. */ compliant routines. TYPE is the complex type of the operation.
If INPLACE_P replace the statement at GSI with
the libcall and return NULL_TREE. Else insert the call, assign its
result to an output variable and return that variable. If INPLACE_P
is true then the statement being replaced should be an assignment
statement. */
static void static tree
expand_complex_libcall (gimple_stmt_iterator *gsi, tree ar, tree ai, expand_complex_libcall (gimple_stmt_iterator *gsi, tree type, tree ar, tree ai,
tree br, tree bi, enum tree_code code) tree br, tree bi, enum tree_code code, bool inplace_p)
{ {
machine_mode mode; machine_mode mode;
enum built_in_function bcode; enum built_in_function bcode;
tree fn, type, lhs; tree fn, lhs;
gimple *old_stmt;
gcall *stmt; gcall *stmt;
old_stmt = gsi_stmt (*gsi);
lhs = gimple_assign_lhs (old_stmt);
type = TREE_TYPE (lhs);
mode = TYPE_MODE (type); mode = TYPE_MODE (type);
gcc_assert (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT); gcc_assert (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT);
...@@ -1008,21 +1008,65 @@ expand_complex_libcall (gimple_stmt_iterator *gsi, tree ar, tree ai, ...@@ -1008,21 +1008,65 @@ expand_complex_libcall (gimple_stmt_iterator *gsi, tree ar, tree ai,
fn = builtin_decl_explicit (bcode); fn = builtin_decl_explicit (bcode);
stmt = gimple_build_call (fn, 4, ar, ai, br, bi); stmt = gimple_build_call (fn, 4, ar, ai, br, bi);
gimple_call_set_lhs (stmt, lhs);
update_stmt (stmt);
gsi_replace (gsi, stmt, false);
if (maybe_clean_or_replace_eh_stmt (old_stmt, stmt))
gimple_purge_dead_eh_edges (gsi_bb (*gsi));
if (gimple_in_ssa_p (cfun)) if (inplace_p)
{ {
gimple *old_stmt = gsi_stmt (*gsi);
lhs = gimple_assign_lhs (old_stmt);
gimple_call_set_lhs (stmt, lhs);
update_stmt (stmt);
gsi_replace (gsi, stmt, false);
if (maybe_clean_or_replace_eh_stmt (old_stmt, stmt))
gimple_purge_dead_eh_edges (gsi_bb (*gsi));
type = TREE_TYPE (type); type = TREE_TYPE (type);
update_complex_components (gsi, stmt, update_complex_components (gsi, stmt,
build1 (REALPART_EXPR, type, lhs), build1 (REALPART_EXPR, type, lhs),
build1 (IMAGPART_EXPR, type, lhs)); build1 (IMAGPART_EXPR, type, lhs));
SSA_NAME_DEF_STMT (lhs) = stmt; SSA_NAME_DEF_STMT (lhs) = stmt;
return NULL_TREE;
} }
lhs = create_tmp_var (type);
gimple_call_set_lhs (stmt, lhs);
lhs = make_ssa_name (lhs, stmt);
gimple_call_set_lhs (stmt, lhs);
update_stmt (stmt);
gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
return lhs;
}
/* Perform a complex multiplication on two complex constants A, B represented
by AR, AI, BR, BI of type TYPE.
The operation we want is: a * b = (ar*br - ai*bi) + i(ar*bi + br*ai).
Insert the GIMPLE statements into GSI. Store the real and imaginary
components of the result into RR and RI. */
static void
expand_complex_multiplication_components (gimple_stmt_iterator *gsi,
tree type, tree ar, tree ai,
tree br, tree bi,
tree *rr, tree *ri)
{
tree t1, t2, t3, t4;
t1 = gimplify_build2 (gsi, MULT_EXPR, type, ar, br);
t2 = gimplify_build2 (gsi, MULT_EXPR, type, ai, bi);
t3 = gimplify_build2 (gsi, MULT_EXPR, type, ar, bi);
/* Avoid expanding redundant multiplication for the common
case of squaring a complex number. */
if (ar == br && ai == bi)
t4 = t3;
else
t4 = gimplify_build2 (gsi, MULT_EXPR, type, ai, br);
*rr = gimplify_build2 (gsi, MINUS_EXPR, type, t1, t2);
*ri = gimplify_build2 (gsi, PLUS_EXPR, type, t3, t4);
} }
/* Expand complex multiplication to scalars: /* Expand complex multiplication to scalars:
...@@ -1030,11 +1074,12 @@ expand_complex_libcall (gimple_stmt_iterator *gsi, tree ar, tree ai, ...@@ -1030,11 +1074,12 @@ expand_complex_libcall (gimple_stmt_iterator *gsi, tree ar, tree ai,
*/ */
static void static void
expand_complex_multiplication (gimple_stmt_iterator *gsi, tree inner_type, expand_complex_multiplication (gimple_stmt_iterator *gsi, tree type,
tree ar, tree ai, tree br, tree bi, tree ar, tree ai, tree br, tree bi,
complex_lattice_t al, complex_lattice_t bl) complex_lattice_t al, complex_lattice_t bl)
{ {
tree rr, ri; tree rr, ri;
tree inner_type = TREE_TYPE (type);
if (al < bl) if (al < bl)
{ {
...@@ -1080,27 +1125,77 @@ expand_complex_multiplication (gimple_stmt_iterator *gsi, tree inner_type, ...@@ -1080,27 +1125,77 @@ expand_complex_multiplication (gimple_stmt_iterator *gsi, tree inner_type,
case PAIR (VARYING, VARYING): case PAIR (VARYING, VARYING):
if (flag_complex_method == 2 && SCALAR_FLOAT_TYPE_P (inner_type)) if (flag_complex_method == 2 && SCALAR_FLOAT_TYPE_P (inner_type))
{ {
expand_complex_libcall (gsi, ar, ai, br, bi, MULT_EXPR); /* If optimizing for size or not at all just do a libcall.
return; Same if there are exception-handling edges or signaling NaNs. */
} if (optimize == 0 || optimize_bb_for_size_p (gsi_bb (*gsi))
else || stmt_can_throw_internal (gsi_stmt (*gsi))
{ || flag_signaling_nans)
tree t1, t2, t3, t4; {
expand_complex_libcall (gsi, type, ar, ai, br, bi,
t1 = gimplify_build2 (gsi, MULT_EXPR, inner_type, ar, br); MULT_EXPR, true);
t2 = gimplify_build2 (gsi, MULT_EXPR, inner_type, ai, bi); return;
t3 = gimplify_build2 (gsi, MULT_EXPR, inner_type, ar, bi); }
/* Avoid expanding redundant multiplication for the common
case of squaring a complex number. */
if (ar == br && ai == bi)
t4 = t3;
else
t4 = gimplify_build2 (gsi, MULT_EXPR, inner_type, ai, br);
rr = gimplify_build2 (gsi, MINUS_EXPR, inner_type, t1, t2); /* Else, expand x = a * b into
ri = gimplify_build2 (gsi, PLUS_EXPR, inner_type, t3, t4); x = (ar*br - ai*bi) + i(ar*bi + br*ai);
if (isunordered (__real__ x, __imag__ x))
x = __muldc3 (a, b); */
tree tmpr, tmpi;
expand_complex_multiplication_components (gsi, inner_type, ar, ai,
br, bi, &tmpr, &tmpi);
gimple *check
= gimple_build_cond (UNORDERED_EXPR, tmpr, tmpi,
NULL_TREE, NULL_TREE);
basic_block orig_bb = gsi_bb (*gsi);
/* We want to keep track of the original complex multiplication
statement as we're going to modify it later in
update_complex_assignment. Make sure that insert_cond_bb leaves
that statement in the join block. */
gsi_prev (gsi);
basic_block cond_bb
= insert_cond_bb (gsi_bb (*gsi), gsi_stmt (*gsi), check,
profile_probability::very_unlikely ());
gimple_stmt_iterator cond_bb_gsi = gsi_last_bb (cond_bb);
gsi_insert_after (&cond_bb_gsi, gimple_build_nop (), GSI_NEW_STMT);
tree libcall_res
= expand_complex_libcall (&cond_bb_gsi, type, ar, ai, br,
bi, MULT_EXPR, false);
tree cond_real = gimplify_build1 (&cond_bb_gsi, REALPART_EXPR,
inner_type, libcall_res);
tree cond_imag = gimplify_build1 (&cond_bb_gsi, IMAGPART_EXPR,
inner_type, libcall_res);
basic_block join_bb = single_succ_edge (cond_bb)->dest;
*gsi = gsi_start_nondebug_after_labels_bb (join_bb);
/* We have a conditional block with some assignments in cond_bb.
Wire up the PHIs to wrap up. */
rr = make_ssa_name (inner_type);
ri = make_ssa_name (inner_type);
edge cond_to_join = single_succ_edge (cond_bb);
edge orig_to_join = find_edge (orig_bb, join_bb);
gphi *real_phi = create_phi_node (rr, gsi_bb (*gsi));
add_phi_arg (real_phi, cond_real, cond_to_join,
UNKNOWN_LOCATION);
add_phi_arg (real_phi, tmpr, orig_to_join, UNKNOWN_LOCATION);
gphi *imag_phi = create_phi_node (ri, gsi_bb (*gsi));
add_phi_arg (imag_phi, cond_imag, cond_to_join,
UNKNOWN_LOCATION);
add_phi_arg (imag_phi, tmpi, orig_to_join, UNKNOWN_LOCATION);
} }
else
/* If we are not worrying about NaNs expand to
(ar*br - ai*bi) + i(ar*bi + br*ai) directly. */
expand_complex_multiplication_components (gsi, inner_type, ar, ai,
br, bi, &rr, &ri);
break; break;
default: default:
...@@ -1308,13 +1403,14 @@ expand_complex_div_wide (gimple_stmt_iterator *gsi, tree inner_type, ...@@ -1308,13 +1403,14 @@ expand_complex_div_wide (gimple_stmt_iterator *gsi, tree inner_type,
/* Expand complex division to scalars. */ /* Expand complex division to scalars. */
static void static void
expand_complex_division (gimple_stmt_iterator *gsi, tree inner_type, expand_complex_division (gimple_stmt_iterator *gsi, tree type,
tree ar, tree ai, tree br, tree bi, tree ar, tree ai, tree br, tree bi,
enum tree_code code, enum tree_code code,
complex_lattice_t al, complex_lattice_t bl) complex_lattice_t al, complex_lattice_t bl)
{ {
tree rr, ri; tree rr, ri;
tree inner_type = TREE_TYPE (type);
switch (PAIR (al, bl)) switch (PAIR (al, bl))
{ {
case PAIR (ONLY_REAL, ONLY_REAL): case PAIR (ONLY_REAL, ONLY_REAL):
...@@ -1362,7 +1458,7 @@ expand_complex_division (gimple_stmt_iterator *gsi, tree inner_type, ...@@ -1362,7 +1458,7 @@ expand_complex_division (gimple_stmt_iterator *gsi, tree inner_type,
case 2: case 2:
if (SCALAR_FLOAT_TYPE_P (inner_type)) if (SCALAR_FLOAT_TYPE_P (inner_type))
{ {
expand_complex_libcall (gsi, ar, ai, br, bi, code); expand_complex_libcall (gsi, type, ar, ai, br, bi, code, true);
break; break;
} }
/* FALLTHRU */ /* FALLTHRU */
...@@ -1630,7 +1726,7 @@ expand_complex_operations_1 (gimple_stmt_iterator *gsi) ...@@ -1630,7 +1726,7 @@ expand_complex_operations_1 (gimple_stmt_iterator *gsi)
break; break;
case MULT_EXPR: case MULT_EXPR:
expand_complex_multiplication (gsi, inner_type, ar, ai, br, bi, al, bl); expand_complex_multiplication (gsi, type, ar, ai, br, bi, al, bl);
break; break;
case TRUNC_DIV_EXPR: case TRUNC_DIV_EXPR:
...@@ -1638,7 +1734,7 @@ expand_complex_operations_1 (gimple_stmt_iterator *gsi) ...@@ -1638,7 +1734,7 @@ expand_complex_operations_1 (gimple_stmt_iterator *gsi)
case FLOOR_DIV_EXPR: case FLOOR_DIV_EXPR:
case ROUND_DIV_EXPR: case ROUND_DIV_EXPR:
case RDIV_EXPR: case RDIV_EXPR:
expand_complex_division (gsi, inner_type, ar, ai, br, bi, code, al, bl); expand_complex_division (gsi, type, ar, ai, br, bi, code, al, bl);
break; break;
case NEGATE_EXPR: case NEGATE_EXPR:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment