Commit b10f1009 by Andrew Pinski Committed by Andrew Pinski

aarch64-cost-tables.h (thunderx_extra_costs): Increment Arith_shift and Arith_shift_reg by 1.

2017-06-21  Andrew Pinski  <apinski@cavium.com>

        * config/aarch64/aarch64-cost-tables.h (thunderx_extra_costs):
        Increment Arith_shift and Arith_shift_reg by 1.
        * config/aarch64/aarch64-tuning-flags.def (cheap_shift_extend):
        New tuning flag.
        * config/aarch64/aarch64.c (thunderx_tunings): Enable
        AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND.
        (aarch64_strip_extend): Add new argument and test for it.
        (aarch64_cheap_mult_shift_p): New function.
        (aarch64_rtx_mult_cost): Call aarch64_cheap_mult_shift_p and don't
        add a cost if it is true.
        Update calls to aarch64_strip_extend.
        (aarch64_rtx_costs): Update calls to aarch64_strip_extend.

From-SVN: r249459
parent f1e247d0
2017-06-21 Andrew Pinski <apinski@cavium.com> 2017-06-21 Andrew Pinski <apinski@cavium.com>
* config/aarch64/aarch64-cost-tables.h (thunderx_extra_costs):
Increment Arith_shift and Arith_shift_reg by 1.
* config/aarch64/aarch64-tuning-flags.def (cheap_shift_extend):
New tuning flag.
* config/aarch64/aarch64.c (thunderx_tunings): Enable
AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND.
(aarch64_strip_extend): Add new argument and test for it.
(aarch64_cheap_mult_shift_p): New function.
(aarch64_rtx_mult_cost): Call aarch64_cheap_mult_shift_p and don't
add a cost if it is true.
Update calls to aarch64_strip_extend.
(aarch64_rtx_costs): Update calls to aarch64_strip_extend.
2017-06-21 Andrew Pinski <apinski@cavium.com>
* config/aarch64/aarch64-cores.def (thunderxt88p1): Use thunderxt88 * config/aarch64/aarch64-cores.def (thunderxt88p1): Use thunderxt88
tunings. tunings.
(thunderxt88): Likewise. (thunderxt88): Likewise.
......
...@@ -136,8 +136,8 @@ const struct cpu_cost_table thunderx_extra_costs = ...@@ -136,8 +136,8 @@ const struct cpu_cost_table thunderx_extra_costs =
0, /* Logical. */ 0, /* Logical. */
0, /* Shift. */ 0, /* Shift. */
0, /* Shift_reg. */ 0, /* Shift_reg. */
COSTS_N_INSNS (1), /* Arith_shift. */ COSTS_N_INSNS (1)+1, /* Arith_shift. */
COSTS_N_INSNS (1), /* Arith_shift_reg. */ COSTS_N_INSNS (1)+1, /* Arith_shift_reg. */
COSTS_N_INSNS (1), /* UNUSED: Log_shift. */ COSTS_N_INSNS (1), /* UNUSED: Log_shift. */
COSTS_N_INSNS (1), /* UNUSED: Log_shift_reg. */ COSTS_N_INSNS (1), /* UNUSED: Log_shift_reg. */
0, /* Extend. */ 0, /* Extend. */
......
...@@ -35,4 +35,10 @@ two load/stores are not at least 8 byte aligned don't create load/store ...@@ -35,4 +35,10 @@ two load/stores are not at least 8 byte aligned don't create load/store
pairs. */ pairs. */
AARCH64_EXTRA_TUNING_OPTION ("slow_unaligned_ldpw", SLOW_UNALIGNED_LDPW) AARCH64_EXTRA_TUNING_OPTION ("slow_unaligned_ldpw", SLOW_UNALIGNED_LDPW)
/* Some of the optional shift to some arthematic instructions are
considered cheap. Logical shift left <=4 with or without a
zero extend are considered cheap. Sign extend; non logical shift left
are not considered cheap. */
AARCH64_EXTRA_TUNING_OPTION ("cheap_shift_extend", CHEAP_SHIFT_EXTEND)
#undef AARCH64_EXTRA_TUNING_OPTION #undef AARCH64_EXTRA_TUNING_OPTION
...@@ -809,7 +809,8 @@ static const struct tune_params thunderx_tunings = ...@@ -809,7 +809,8 @@ static const struct tune_params thunderx_tunings =
2, /* min_div_recip_mul_df. */ 2, /* min_div_recip_mul_df. */
0, /* max_case_values. */ 0, /* max_case_values. */
tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */ tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
(AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW), /* tune_flags. */ (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW
| AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND), /* tune_flags. */
&thunderx_prefetch_tune &thunderx_prefetch_tune
}; };
...@@ -6120,9 +6121,10 @@ aarch64_strip_shift (rtx x) ...@@ -6120,9 +6121,10 @@ aarch64_strip_shift (rtx x)
/* Helper function for rtx cost calculation. Strip an extend /* Helper function for rtx cost calculation. Strip an extend
expression from X. Returns the inner operand if successful, or the expression from X. Returns the inner operand if successful, or the
original expression on failure. We deal with a number of possible original expression on failure. We deal with a number of possible
canonicalization variations here. */ canonicalization variations here. If STRIP_SHIFT is true, then
we can strip off a shift also. */
static rtx static rtx
aarch64_strip_extend (rtx x) aarch64_strip_extend (rtx x, bool strip_shift)
{ {
rtx op = x; rtx op = x;
...@@ -6146,7 +6148,8 @@ aarch64_strip_extend (rtx x) ...@@ -6146,7 +6148,8 @@ aarch64_strip_extend (rtx x)
/* Now handle extended register, as this may also have an optional /* Now handle extended register, as this may also have an optional
left shift by 1..4. */ left shift by 1..4. */
if (GET_CODE (op) == ASHIFT if (strip_shift
&& GET_CODE (op) == ASHIFT
&& CONST_INT_P (XEXP (op, 1)) && CONST_INT_P (XEXP (op, 1))
&& ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4) && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
op = XEXP (op, 0); op = XEXP (op, 0);
...@@ -6170,6 +6173,39 @@ aarch64_shift_p (enum rtx_code code) ...@@ -6170,6 +6173,39 @@ aarch64_shift_p (enum rtx_code code)
return code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT; return code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT;
} }
/* Return true iff X is a cheap shift without a sign extend. */
static bool
aarch64_cheap_mult_shift_p (rtx x)
{
rtx op0, op1;
op0 = XEXP (x, 0);
op1 = XEXP (x, 1);
if (!(aarch64_tune_params.extra_tuning_flags
& AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND))
return false;
if (GET_CODE (op0) == SIGN_EXTEND)
return false;
if (GET_CODE (x) == ASHIFT && CONST_INT_P (op1)
&& UINTVAL (op1) <= 4)
return true;
if (GET_CODE (x) != MULT || !CONST_INT_P (op1))
return false;
HOST_WIDE_INT l2 = exact_log2 (INTVAL (op1));
if (l2 > 0 && l2 <= 4)
return true;
return false;
}
/* Helper function for rtx cost calculation. Calculate the cost of /* Helper function for rtx cost calculation. Calculate the cost of
a MULT or ASHIFT, which may be part of a compound PLUS/MINUS rtx. a MULT or ASHIFT, which may be part of a compound PLUS/MINUS rtx.
Return the calculated cost of the expression, recursing manually in to Return the calculated cost of the expression, recursing manually in to
...@@ -6207,7 +6243,11 @@ aarch64_rtx_mult_cost (rtx x, enum rtx_code code, int outer, bool speed) ...@@ -6207,7 +6243,11 @@ aarch64_rtx_mult_cost (rtx x, enum rtx_code code, int outer, bool speed)
{ {
if (compound_p) if (compound_p)
{ {
if (REG_P (op1)) /* If the shift is considered cheap,
then don't add any cost. */
if (aarch64_cheap_mult_shift_p (x))
;
else if (REG_P (op1))
/* ARITH + shift-by-register. */ /* ARITH + shift-by-register. */
cost += extra_cost->alu.arith_shift_reg; cost += extra_cost->alu.arith_shift_reg;
else if (is_extend) else if (is_extend)
...@@ -6225,7 +6265,7 @@ aarch64_rtx_mult_cost (rtx x, enum rtx_code code, int outer, bool speed) ...@@ -6225,7 +6265,7 @@ aarch64_rtx_mult_cost (rtx x, enum rtx_code code, int outer, bool speed)
} }
/* Strip extends as we will have costed them in the case above. */ /* Strip extends as we will have costed them in the case above. */
if (is_extend) if (is_extend)
op0 = aarch64_strip_extend (op0); op0 = aarch64_strip_extend (op0, true);
cost += rtx_cost (op0, VOIDmode, code, 0, speed); cost += rtx_cost (op0, VOIDmode, code, 0, speed);
...@@ -7069,13 +7109,13 @@ cost_minus: ...@@ -7069,13 +7109,13 @@ cost_minus:
if (speed) if (speed)
*cost += extra_cost->alu.extend_arith; *cost += extra_cost->alu.extend_arith;
op1 = aarch64_strip_extend (op1); op1 = aarch64_strip_extend (op1, true);
*cost += rtx_cost (op1, VOIDmode, *cost += rtx_cost (op1, VOIDmode,
(enum rtx_code) GET_CODE (op1), 0, speed); (enum rtx_code) GET_CODE (op1), 0, speed);
return true; return true;
} }
rtx new_op1 = aarch64_strip_extend (op1); rtx new_op1 = aarch64_strip_extend (op1, false);
/* Cost this as an FMA-alike operation. */ /* Cost this as an FMA-alike operation. */
if ((GET_CODE (new_op1) == MULT if ((GET_CODE (new_op1) == MULT
...@@ -7148,7 +7188,7 @@ cost_plus: ...@@ -7148,7 +7188,7 @@ cost_plus:
if (speed) if (speed)
*cost += extra_cost->alu.extend_arith; *cost += extra_cost->alu.extend_arith;
op0 = aarch64_strip_extend (op0); op0 = aarch64_strip_extend (op0, true);
*cost += rtx_cost (op0, VOIDmode, *cost += rtx_cost (op0, VOIDmode,
(enum rtx_code) GET_CODE (op0), 0, speed); (enum rtx_code) GET_CODE (op0), 0, speed);
return true; return true;
...@@ -7156,7 +7196,7 @@ cost_plus: ...@@ -7156,7 +7196,7 @@ cost_plus:
/* Strip any extend, leave shifts behind as we will /* Strip any extend, leave shifts behind as we will
cost them through mult_cost. */ cost them through mult_cost. */
new_op0 = aarch64_strip_extend (op0); new_op0 = aarch64_strip_extend (op0, false);
if (GET_CODE (new_op0) == MULT if (GET_CODE (new_op0) == MULT
|| aarch64_shift_p (GET_CODE (new_op0))) || aarch64_shift_p (GET_CODE (new_op0)))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment