Commit 0a78ebe4 by Kyrylo Tkachov Committed by Kyrylo Tkachov

[AArch64] Properly handle SHIFT ops and EXTEND in aarch64_rtx_mult_cost

	* config/aarch64/aarch64.c (aarch64_shift_p): New function.
	(aarch64_rtx_mult_cost): Update comment to reflect that it also handles
	combined arithmetic-shift ops.  Properly handle all shift and extend
	operations that can occur in combination with PLUS/MINUS.
	Rename maybe_fma to compound_p.
	(aarch64_rtx_costs): Use aarch64_shift_p when costing compound
	arithmetic and shift operations.

From-SVN: r222624
parent 2533c820
2015-04-30 Kyrylo Tkachov <kyrylo.tkachov@arm.com> 2015-04-30 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* config/aarch64/aarch64.c (aarch64_shift_p): New function.
(aarch64_rtx_mult_cost): Update comment to reflect that it also handles
combined arithmetic-shift ops. Properly handle all shift and extend
operations that can occur in combination with PLUS/MINUS.
Rename maybe_fma to compound_p.
(aarch64_rtx_costs): Use aarch64_shift_p when costing compound
arithmetic and shift operations.
2015-04-30 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* config/aarch64/aarch64.c (aarch64_rtx_costs): Use extend_arith * config/aarch64/aarch64.c (aarch64_rtx_costs): Use extend_arith
rather than arith_shift cost when costing ADD/MINUS of an rather than arith_shift cost when costing ADD/MINUS of an
extended value. extended value.
......
...@@ -5158,9 +5158,17 @@ aarch64_strip_extend (rtx x) ...@@ -5158,9 +5158,17 @@ aarch64_strip_extend (rtx x)
return x; return x;
} }
/* Return true iff CODE is a shift supported in combination
with arithmetic instructions. */
static bool
aarch64_shift_p (enum rtx_code code)
{
return code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT;
}
/* Helper function for rtx cost calculation. Calculate the cost of /* Helper function for rtx cost calculation. Calculate the cost of
a MULT, which may be part of a multiply-accumulate rtx. Return a MULT or ASHIFT, which may be part of a compound PLUS/MINUS rtx.
the calculated cost of the expression, recursing manually in to Return the calculated cost of the expression, recursing manually in to
operands where needed. */ operands where needed. */
static int static int
...@@ -5170,7 +5178,7 @@ aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed) ...@@ -5170,7 +5178,7 @@ aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
const struct cpu_cost_table *extra_cost const struct cpu_cost_table *extra_cost
= aarch64_tune_params->insn_extra_cost; = aarch64_tune_params->insn_extra_cost;
int cost = 0; int cost = 0;
bool maybe_fma = (outer == PLUS || outer == MINUS); bool compound_p = (outer == PLUS || outer == MINUS);
machine_mode mode = GET_MODE (x); machine_mode mode = GET_MODE (x);
gcc_checking_assert (code == MULT); gcc_checking_assert (code == MULT);
...@@ -5185,18 +5193,35 @@ aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed) ...@@ -5185,18 +5193,35 @@ aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
if (GET_MODE_CLASS (mode) == MODE_INT) if (GET_MODE_CLASS (mode) == MODE_INT)
{ {
/* The multiply will be canonicalized as a shift, cost it as such. */ /* The multiply will be canonicalized as a shift, cost it as such. */
if (CONST_INT_P (op1) if (aarch64_shift_p (GET_CODE (x))
&& exact_log2 (INTVAL (op1)) > 0) || (CONST_INT_P (op1)
&& exact_log2 (INTVAL (op1)) > 0))
{ {
bool is_extend = GET_CODE (op0) == ZERO_EXTEND
|| GET_CODE (op0) == SIGN_EXTEND;
if (speed) if (speed)
{ {
if (maybe_fma) if (compound_p)
/* ADD (shifted register). */ {
cost += extra_cost->alu.arith_shift; if (REG_P (op1))
/* ARITH + shift-by-register. */
cost += extra_cost->alu.arith_shift_reg;
else if (is_extend)
/* ARITH + extended register. We don't have a cost field
for ARITH+EXTEND+SHIFT, so use extend_arith here. */
cost += extra_cost->alu.extend_arith;
else
/* ARITH + shift-by-immediate. */
cost += extra_cost->alu.arith_shift;
}
else else
/* LSL (immediate). */ /* LSL (immediate). */
cost += extra_cost->alu.shift; cost += extra_cost->alu.shift;
} }
/* Strip extends as we will have costed them in the case above. */
if (is_extend)
op0 = aarch64_strip_extend (op0);
cost += rtx_cost (op0, GET_CODE (op0), 0, speed); cost += rtx_cost (op0, GET_CODE (op0), 0, speed);
...@@ -5214,7 +5239,7 @@ aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed) ...@@ -5214,7 +5239,7 @@ aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
if (speed) if (speed)
{ {
if (maybe_fma) if (compound_p)
/* MADD/SMADDL/UMADDL. */ /* MADD/SMADDL/UMADDL. */
cost += extra_cost->mult[0].extend_add; cost += extra_cost->mult[0].extend_add;
else else
...@@ -5232,7 +5257,7 @@ aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed) ...@@ -5232,7 +5257,7 @@ aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
if (speed) if (speed)
{ {
if (maybe_fma) if (compound_p)
/* MADD. */ /* MADD. */
cost += extra_cost->mult[mode == DImode].add; cost += extra_cost->mult[mode == DImode].add;
else else
...@@ -5253,7 +5278,7 @@ aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed) ...@@ -5253,7 +5278,7 @@ aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
if (GET_CODE (op1) == NEG) if (GET_CODE (op1) == NEG)
op1 = XEXP (op1, 0); op1 = XEXP (op1, 0);
if (maybe_fma) if (compound_p)
/* FMADD/FNMADD/FNMSUB/FMSUB. */ /* FMADD/FNMADD/FNMSUB/FMSUB. */
cost += extra_cost->fp[mode == DFmode].fma; cost += extra_cost->fp[mode == DFmode].fma;
else else
...@@ -5831,7 +5856,7 @@ cost_minus: ...@@ -5831,7 +5856,7 @@ cost_minus:
/* Cost this as an FMA-alike operation. */ /* Cost this as an FMA-alike operation. */
if ((GET_CODE (new_op1) == MULT if ((GET_CODE (new_op1) == MULT
|| GET_CODE (new_op1) == ASHIFT) || aarch64_shift_p (GET_CODE (new_op1)))
&& code != COMPARE) && code != COMPARE)
{ {
*cost += aarch64_rtx_mult_cost (new_op1, MULT, *cost += aarch64_rtx_mult_cost (new_op1, MULT,
...@@ -5901,7 +5926,7 @@ cost_plus: ...@@ -5901,7 +5926,7 @@ cost_plus:
new_op0 = aarch64_strip_extend (op0); new_op0 = aarch64_strip_extend (op0);
if (GET_CODE (new_op0) == MULT if (GET_CODE (new_op0) == MULT
|| GET_CODE (new_op0) == ASHIFT) || aarch64_shift_p (GET_CODE (new_op0)))
{ {
*cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS, *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
speed); speed);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment