Commit b10f1009 by Andrew Pinski Committed by Andrew Pinski

aarch64-cost-tables.h (thunderx_extra_costs): Increment Arith_shift and Arith_shift_reg by 1.

2017-06-21  Andrew Pinski  <apinski@cavium.com>

        * config/aarch64/aarch64-cost-tables.h (thunderx_extra_costs):
        Increment Arith_shift and Arith_shift_reg by 1.
        * config/aarch64/aarch64-tuning-flags.def (cheap_shift_extend):
        New tuning flag.
        * config/aarch64/aarch64.c (thunderx_tunings): Enable
        AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND.
        (aarch64_strip_extend): Add new argument and test for it.
        (aarch64_cheap_mult_shift_p): New function.
        (aarch64_rtx_mult_cost): Call aarch64_cheap_mult_shift_p and don't
        add a cost if it is true.
        Update calls to aarch64_strip_extend.
        (aarch64_rtx_costs): Update calls to aarch64_strip_extend.

From-SVN: r249459
parent f1e247d0
2017-06-21 Andrew Pinski <apinski@cavium.com>
* config/aarch64/aarch64-cost-tables.h (thunderx_extra_costs):
Increment Arith_shift and Arith_shift_reg by 1.
* config/aarch64/aarch64-tuning-flags.def (cheap_shift_extend):
New tuning flag.
* config/aarch64/aarch64.c (thunderx_tunings): Enable
AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND.
(aarch64_strip_extend): Add new argument and test for it.
(aarch64_cheap_mult_shift_p): New function.
(aarch64_rtx_mult_cost): Call aarch64_cheap_mult_shift_p and don't
add a cost if it is true.
Update calls to aarch64_strip_extend.
(aarch64_rtx_costs): Update calls to aarch64_strip_extend.
2017-06-21 Andrew Pinski <apinski@cavium.com>
* config/aarch64/aarch64-cores.def (thunderxt88p1): Use thunderxt88
tunings.
(thunderxt88): Likewise.
......
......@@ -136,8 +136,8 @@ const struct cpu_cost_table thunderx_extra_costs =
0, /* Logical. */
0, /* Shift. */
0, /* Shift_reg. */
COSTS_N_INSNS (1), /* Arith_shift. */
COSTS_N_INSNS (1), /* Arith_shift_reg. */
COSTS_N_INSNS (1)+1, /* Arith_shift. */
COSTS_N_INSNS (1)+1, /* Arith_shift_reg. */
COSTS_N_INSNS (1), /* UNUSED: Log_shift. */
COSTS_N_INSNS (1), /* UNUSED: Log_shift_reg. */
0, /* Extend. */
......
......@@ -35,4 +35,10 @@ two load/stores are not at least 8 byte aligned don't create load/store
pairs. */
AARCH64_EXTRA_TUNING_OPTION ("slow_unaligned_ldpw", SLOW_UNALIGNED_LDPW)
/* Some of the optional shift to some arthematic instructions are
considered cheap. Logical shift left <=4 with or without a
zero extend are considered cheap. Sign extend; non logical shift left
are not considered cheap. */
AARCH64_EXTRA_TUNING_OPTION ("cheap_shift_extend", CHEAP_SHIFT_EXTEND)
#undef AARCH64_EXTRA_TUNING_OPTION
......@@ -809,7 +809,8 @@ static const struct tune_params thunderx_tunings =
2, /* min_div_recip_mul_df. */
0, /* max_case_values. */
tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
(AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW), /* tune_flags. */
(AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW
| AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND), /* tune_flags. */
&thunderx_prefetch_tune
};
......@@ -6120,9 +6121,10 @@ aarch64_strip_shift (rtx x)
/* Helper function for rtx cost calculation. Strip an extend
expression from X. Returns the inner operand if successful, or the
original expression on failure. We deal with a number of possible
canonicalization variations here. */
canonicalization variations here. If STRIP_SHIFT is true, then
we can strip off a shift also. */
static rtx
aarch64_strip_extend (rtx x)
aarch64_strip_extend (rtx x, bool strip_shift)
{
rtx op = x;
......@@ -6146,7 +6148,8 @@ aarch64_strip_extend (rtx x)
/* Now handle extended register, as this may also have an optional
left shift by 1..4. */
if (GET_CODE (op) == ASHIFT
if (strip_shift
&& GET_CODE (op) == ASHIFT
&& CONST_INT_P (XEXP (op, 1))
&& ((unsigned HOST_WIDE_INT) INTVAL (XEXP (op, 1))) <= 4)
op = XEXP (op, 0);
......@@ -6170,6 +6173,39 @@ aarch64_shift_p (enum rtx_code code)
return code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT;
}
/* Return true iff X is a cheap shift without a sign extend. */
static bool
aarch64_cheap_mult_shift_p (rtx x)
{
rtx op0, op1;
op0 = XEXP (x, 0);
op1 = XEXP (x, 1);
if (!(aarch64_tune_params.extra_tuning_flags
& AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND))
return false;
if (GET_CODE (op0) == SIGN_EXTEND)
return false;
if (GET_CODE (x) == ASHIFT && CONST_INT_P (op1)
&& UINTVAL (op1) <= 4)
return true;
if (GET_CODE (x) != MULT || !CONST_INT_P (op1))
return false;
HOST_WIDE_INT l2 = exact_log2 (INTVAL (op1));
if (l2 > 0 && l2 <= 4)
return true;
return false;
}
/* Helper function for rtx cost calculation. Calculate the cost of
a MULT or ASHIFT, which may be part of a compound PLUS/MINUS rtx.
Return the calculated cost of the expression, recursing manually in to
......@@ -6207,7 +6243,11 @@ aarch64_rtx_mult_cost (rtx x, enum rtx_code code, int outer, bool speed)
{
if (compound_p)
{
if (REG_P (op1))
/* If the shift is considered cheap,
then don't add any cost. */
if (aarch64_cheap_mult_shift_p (x))
;
else if (REG_P (op1))
/* ARITH + shift-by-register. */
cost += extra_cost->alu.arith_shift_reg;
else if (is_extend)
......@@ -6225,7 +6265,7 @@ aarch64_rtx_mult_cost (rtx x, enum rtx_code code, int outer, bool speed)
}
/* Strip extends as we will have costed them in the case above. */
if (is_extend)
op0 = aarch64_strip_extend (op0);
op0 = aarch64_strip_extend (op0, true);
cost += rtx_cost (op0, VOIDmode, code, 0, speed);
......@@ -7069,13 +7109,13 @@ cost_minus:
if (speed)
*cost += extra_cost->alu.extend_arith;
op1 = aarch64_strip_extend (op1);
op1 = aarch64_strip_extend (op1, true);
*cost += rtx_cost (op1, VOIDmode,
(enum rtx_code) GET_CODE (op1), 0, speed);
return true;
}
rtx new_op1 = aarch64_strip_extend (op1);
rtx new_op1 = aarch64_strip_extend (op1, false);
/* Cost this as an FMA-alike operation. */
if ((GET_CODE (new_op1) == MULT
......@@ -7148,7 +7188,7 @@ cost_plus:
if (speed)
*cost += extra_cost->alu.extend_arith;
op0 = aarch64_strip_extend (op0);
op0 = aarch64_strip_extend (op0, true);
*cost += rtx_cost (op0, VOIDmode,
(enum rtx_code) GET_CODE (op0), 0, speed);
return true;
......@@ -7156,7 +7196,7 @@ cost_plus:
/* Strip any extend, leave shifts behind as we will
cost them through mult_cost. */
new_op0 = aarch64_strip_extend (op0);
new_op0 = aarch64_strip_extend (op0, false);
if (GET_CODE (new_op0) == MULT
|| aarch64_shift_p (GET_CODE (new_op0)))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment