Commit d5a0a47b by Richard Earnshaw Committed by Richard Earnshaw

arm.c (struct processors): Pass for speed down into cost helper functions.

	* arm.c (struct processors): Pass for speed down into cost helper
	functions.
	(const_ok_for_op): Handle COMPARE and inequality nodes.
	(arm_rtx_costs_1): Rewrite.
	(arm_size_rtx_costs): Update prototype.
	(arm_rtx_costs): Pass speed down to helper functions.
	(arm_slowmul_rtx_costs): Rework cost calculations.
	(arm_fastmul_rtx_costs, arm_xscale_rtx_costs): Likewise.
	(arm_9e_rtx_costs): Likewise.

From-SVN: r143338
parent 43b2b720
2009-01-13 Richard Earnshaw <rearnsha@arm.com>
* arm.c (struct processors): Pass for speed down into cost helper
functions.
(const_ok_for_op): Handle COMPARE and inequality nodes.
(arm_rtx_costs_1): Rewrite.
(arm_size_rtx_costs): Update prototype.
(arm_rtx_costs): Pass speed down to helper functions.
(arm_slowmul_rtx_costs): Rework cost calculations.
(arm_fastmul_rtx_costs, arm_xscale_rtx_costs): Likewise.
(arm_9e_rtx_costs): Likewise.
2009-01-13 Uros Bizjak <ubizjak@gmail.com>
* config/alpha/alpha.c (alpha_legitimate_address_p): Explicit
......
......@@ -126,12 +126,12 @@ static bool arm_function_ok_for_sibcall (tree, tree);
static void arm_internal_label (FILE *, const char *, unsigned long);
static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
tree);
static int arm_rtx_costs_1 (rtx, enum rtx_code, enum rtx_code);
static bool arm_size_rtx_costs (rtx, int, int, int *);
static bool arm_slowmul_rtx_costs (rtx, int, int, int *);
static bool arm_fastmul_rtx_costs (rtx, int, int, int *);
static bool arm_xscale_rtx_costs (rtx, int, int, int *);
static bool arm_9e_rtx_costs (rtx, int, int, int *);
static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
static bool arm_rtx_costs (rtx, int, int, int *, bool);
static int arm_address_cost (rtx, bool);
static bool arm_memory_load_p (rtx);
......@@ -619,7 +619,7 @@ struct processors
enum processor_type core;
const char *arch;
const unsigned long flags;
bool (* rtx_costs) (rtx, int, int, int *);
bool (* rtx_costs) (rtx, enum rtx_code, enum rtx_code, int *, bool);
};
/* Not all of these give usefully different compilation alternatives,
......@@ -1883,6 +1883,24 @@ const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
switch (code)
{
case PLUS:
case COMPARE:
case EQ:
case NE:
case GT:
case LE:
case LT:
case GE:
case GEU:
case LTU:
case GTU:
case LEU:
case UNORDERED:
case ORDERED:
case UNEQ:
case UNGE:
case UNLT:
case UNGT:
case UNLE:
return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
case MINUS: /* Should only occur with (MINUS I reg) => rsb */
......@@ -4882,130 +4900,227 @@ thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
}
}
/* Worker routine for arm_rtx_costs. */
/* ??? This needs updating for thumb2. */
static inline int
arm_rtx_costs_1 (rtx x, enum rtx_code code, enum rtx_code outer)
static inline bool
arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
{
enum machine_mode mode = GET_MODE (x);
enum rtx_code subcode;
rtx operand;
enum rtx_code code = GET_CODE (x);
int extra_cost;
*total = 0;
switch (code)
{
case MEM:
/* Memory costs quite a lot for the first word, but subsequent words
load at the equivalent of a single insn each. */
return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
+ (GET_CODE (x) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (x) ? 4 : 0));
*total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
return true;
case DIV:
case MOD:
case UDIV:
case UMOD:
return optimize_size ? COSTS_N_INSNS (2) : 100;
if (TARGET_HARD_FLOAT && mode == SFmode)
*total = COSTS_N_INSNS (2);
else if (TARGET_HARD_FLOAT && mode == DFmode)
*total = COSTS_N_INSNS (4);
else
*total = COSTS_N_INSNS (20);
return false;
case ROTATE:
if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
return 4;
if (GET_CODE (XEXP (x, 1)) == REG)
*total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
*total = rtx_cost (XEXP (x, 1), code, speed);
/* Fall through */
case ROTATERT:
if (mode != SImode)
return 8;
{
*total += COSTS_N_INSNS (4);
return true;
}
/* Fall through */
case ASHIFT: case LSHIFTRT: case ASHIFTRT:
*total += rtx_cost (XEXP (x, 0), code, speed);
if (mode == DImode)
return (8 + (GET_CODE (XEXP (x, 1)) == CONST_INT ? 0 : 8)
+ ((GET_CODE (XEXP (x, 0)) == REG
|| (GET_CODE (XEXP (x, 0)) == SUBREG
&& GET_CODE (SUBREG_REG (XEXP (x, 0))) == REG))
? 0 : 8));
{
*total += COSTS_N_INSNS (3);
return true;
}
extra_cost = 1;
*total += COSTS_N_INSNS (1);
/* Increase the cost of complex shifts because they aren't any faster,
and reduce dual issue opportunities. */
if (arm_tune_cortex_a9
&& outer != SET && GET_CODE (XEXP (x, 1)) != CONST_INT)
extra_cost++;
return (extra_cost + ((GET_CODE (XEXP (x, 0)) == REG
|| (GET_CODE (XEXP (x, 0)) == SUBREG
&& GET_CODE (SUBREG_REG (XEXP (x, 0))) == REG))
? 0 : 4)
+ ((GET_CODE (XEXP (x, 1)) == REG
|| (GET_CODE (XEXP (x, 1)) == SUBREG
&& GET_CODE (SUBREG_REG (XEXP (x, 1))) == REG)
|| (GET_CODE (XEXP (x, 1)) == CONST_INT))
? 0 : 4));
++*total;
return true;
case MINUS:
if (GET_CODE (XEXP (x, 1)) == MULT && mode == SImode && arm_arch_thumb2)
if (TARGET_THUMB2)
{
extra_cost = rtx_cost (XEXP (x, 1), code, true);
if (!REG_OR_SUBREG_REG (XEXP (x, 0)))
extra_cost += 4 * ARM_NUM_REGS (mode);
return extra_cost;
if (GET_MODE_CLASS (mode) == MODE_FLOAT)
{
if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
*total = COSTS_N_INSNS (1);
else
*total = COSTS_N_INSNS (20);
}
else
*total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
/* Thumb2 does not have RSB, so all arguments must be
registers (subtracting a constant is canonicalized as
addition of the negated constant). */
return false;
}
if (mode == DImode)
return (4 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 8)
+ ((REG_OR_SUBREG_REG (XEXP (x, 0))
|| (GET_CODE (XEXP (x, 0)) == CONST_INT
&& const_ok_for_arm (INTVAL (XEXP (x, 0)))))
? 0 : 8));
{
*total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
if (GET_CODE (XEXP (x, 0)) == CONST_INT
&& const_ok_for_arm (INTVAL (XEXP (x, 0))))
{
*total += rtx_cost (XEXP (x, 1), code, speed);
return true;
}
if (GET_CODE (XEXP (x, 1)) == CONST_INT
&& const_ok_for_arm (INTVAL (XEXP (x, 1))))
{
*total += rtx_cost (XEXP (x, 0), code, speed);
return true;
}
return false;
}
if (GET_MODE_CLASS (mode) == MODE_FLOAT)
return (2 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
|| (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
&& arm_const_double_rtx (XEXP (x, 1))))
? 0 : 8)
+ ((REG_OR_SUBREG_REG (XEXP (x, 0))
|| (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
&& arm_const_double_rtx (XEXP (x, 0))))
? 0 : 8));
if (((GET_CODE (XEXP (x, 0)) == CONST_INT
&& const_ok_for_arm (INTVAL (XEXP (x, 0)))
&& REG_OR_SUBREG_REG (XEXP (x, 1))))
|| (((subcode = GET_CODE (XEXP (x, 1))) == ASHIFT
|| subcode == ASHIFTRT || subcode == LSHIFTRT
|| subcode == ROTATE || subcode == ROTATERT
|| (subcode == MULT
&& GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
&& ((INTVAL (XEXP (XEXP (x, 1), 1)) &
(INTVAL (XEXP (XEXP (x, 1), 1)) - 1)) == 0)))
&& REG_OR_SUBREG_REG (XEXP (XEXP (x, 1), 0))
&& (REG_OR_SUBREG_REG (XEXP (XEXP (x, 1), 1))
|| GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
&& REG_OR_SUBREG_REG (XEXP (x, 0))))
return 1;
{
if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
{
*total = COSTS_N_INSNS (1);
if (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
&& arm_const_double_rtx (XEXP (x, 0)))
{
*total += rtx_cost (XEXP (x, 1), code, speed);
return true;
}
if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
&& arm_const_double_rtx (XEXP (x, 1)))
{
*total += rtx_cost (XEXP (x, 0), code, speed);
return true;
}
return false;
}
*total = COSTS_N_INSNS (20);
return false;
}
*total = COSTS_N_INSNS (1);
if (GET_CODE (XEXP (x, 0)) == CONST_INT
&& const_ok_for_arm (INTVAL (XEXP (x, 0))))
{
*total += rtx_cost (XEXP (x, 1), code, speed);
return true;
}
subcode = GET_CODE (XEXP (x, 1));
if (subcode == ASHIFT || subcode == ASHIFTRT
|| subcode == LSHIFTRT
|| subcode == ROTATE || subcode == ROTATERT)
{
*total += rtx_cost (XEXP (x, 0), code, speed);
*total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
return true;
}
if (subcode == MULT
&& GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
&& ((INTVAL (XEXP (XEXP (x, 1), 1)) &
(INTVAL (XEXP (XEXP (x, 1), 1)) - 1)) == 0))
{
*total += rtx_cost (XEXP (x, 0), code, speed);
*total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, speed);
return true;
}
if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
|| GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
{
*total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
if (GET_CODE (XEXP (XEXP (x, 1), 0)) == REG
&& REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
*total += COSTS_N_INSNS (1);
return true;
}
/* Fall through */
case PLUS:
if (arm_arch6 && mode == SImode
if (code == PLUS && arm_arch6 && mode == SImode
&& (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
|| GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
return 1 + (GET_CODE (XEXP (XEXP (x, 0), 0)) == MEM ? 10 : 0)
+ (GET_CODE (XEXP (x, 1)) == MEM ? 10 : 0);
{
*total = COSTS_N_INSNS (1);
*total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
speed);
*total += rtx_cost (XEXP (x, 1), code, speed);
return true;
}
if (GET_CODE (XEXP (x, 0)) == MULT)
/* MLA: All arguments must be registers. We filter out
multiplication by a power of two, so that we fall down into
the code below. */
if (GET_CODE (XEXP (x, 0)) == MULT
&& ! (GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
&& ((INTVAL (XEXP (XEXP (x, 0), 1)) &
(INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0)))
{
extra_cost = rtx_cost (XEXP (x, 0), code, true);
if (!REG_OR_SUBREG_REG (XEXP (x, 1)))
extra_cost += 4 * ARM_NUM_REGS (mode);
return extra_cost;
/* The cost comes from the cost of the multiply. */
return false;
}
if (GET_MODE_CLASS (mode) == MODE_FLOAT)
return (2 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 8)
+ ((REG_OR_SUBREG_REG (XEXP (x, 1))
|| (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
&& arm_const_double_rtx (XEXP (x, 1))))
? 0 : 8));
{
if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
{
*total = COSTS_N_INSNS (1);
if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
&& arm_const_double_rtx (XEXP (x, 1)))
{
*total += rtx_cost (XEXP (x, 0), code, speed);
return true;
}
return false;
}
*total = COSTS_N_INSNS (20);
return false;
}
if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
|| GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
{
*total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, speed);
if (GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
&& REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
*total += COSTS_N_INSNS (1);
return true;
}
/* Fall through */
case AND: case XOR: case IOR:
extra_cost = 0;
......@@ -5019,38 +5134,56 @@ arm_rtx_costs_1 (rtx x, enum rtx_code code, enum rtx_code outer)
&& GET_CODE (XEXP (x, 1)) != CONST_INT)
|| (REG_OR_SUBREG_REG (XEXP (x, 0))
&& ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))))
extra_cost = 4;
*total = 4;
if (mode == DImode)
return (4 + extra_cost + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 8)
+ ((REG_OR_SUBREG_REG (XEXP (x, 1))
|| (GET_CODE (XEXP (x, 1)) == CONST_INT
&& const_ok_for_op (INTVAL (XEXP (x, 1)), code)))
? 0 : 8));
if (REG_OR_SUBREG_REG (XEXP (x, 0)))
return (1 + (GET_CODE (XEXP (x, 1)) == CONST_INT ? 0 : extra_cost)
+ ((REG_OR_SUBREG_REG (XEXP (x, 1))
|| (GET_CODE (XEXP (x, 1)) == CONST_INT
&& const_ok_for_op (INTVAL (XEXP (x, 1)), code)))
? 0 : 4));
else if (REG_OR_SUBREG_REG (XEXP (x, 1)))
return (1 + extra_cost
+ ((((subcode = GET_CODE (XEXP (x, 0))) == ASHIFT
|| subcode == LSHIFTRT || subcode == ASHIFTRT
|| subcode == ROTATE || subcode == ROTATERT
|| (subcode == MULT
&& GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
&& ((INTVAL (XEXP (XEXP (x, 0), 1)) &
(INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0)))
&& (REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 0)))
&& ((REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 1))
&& !arm_tune_cortex_a9)
|| GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT))
? 0 : 4));
{
*total += COSTS_N_INSNS (2);
if (GET_CODE (XEXP (x, 1)) == CONST_INT
&& const_ok_for_op (INTVAL (XEXP (x, 1)), code))
{
*total += rtx_cost (XEXP (x, 0), code, speed);
return true;
}
return 8;
return false;
}
*total += COSTS_N_INSNS (1);
if (GET_CODE (XEXP (x, 1)) == CONST_INT
&& const_ok_for_op (INTVAL (XEXP (x, 1)), code))
{
*total += rtx_cost (XEXP (x, 0), code, speed);
return true;
}
subcode = GET_CODE (XEXP (x, 0));
if (subcode == ASHIFT || subcode == ASHIFTRT
|| subcode == LSHIFTRT
|| subcode == ROTATE || subcode == ROTATERT)
{
*total += rtx_cost (XEXP (x, 1), code, speed);
*total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
return true;
}
if (subcode == MULT
&& GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
&& ((INTVAL (XEXP (XEXP (x, 0), 1)) &
(INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0))
{
*total += rtx_cost (XEXP (x, 1), code, speed);
*total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
return true;
}
if (subcode == UMIN || subcode == UMAX
|| subcode == SMIN || subcode == SMAX)
{
*total = COSTS_N_INSNS (3);
return true;
}
return false;
case MULT:
/* This should have been handled by the CPU specific routines. */
......@@ -5064,108 +5197,290 @@ arm_rtx_costs_1 (rtx x, enum rtx_code code, enum rtx_code outer)
== GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
&& (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
|| GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
return 8;
return 99;
{
*total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, speed);
return true;
}
*total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
return false;
case NEG:
if (GET_MODE_CLASS (mode) == MODE_FLOAT)
return 4 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 6);
{
if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
{
*total = COSTS_N_INSNS (1);
return false;
}
*total = COSTS_N_INSNS (2);
return false;
}
/* Fall through */
case NOT:
if (mode == DImode)
return 4 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4);
*total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
if (mode == SImode && code == NOT)
{
subcode = GET_CODE (XEXP (x, 0));
if (subcode == ASHIFT || subcode == ASHIFTRT
|| subcode == LSHIFTRT
|| subcode == ROTATE || subcode == ROTATERT
|| (subcode == MULT
&& GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
&& ((INTVAL (XEXP (XEXP (x, 0), 1)) &
(INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0)))
{
*total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
/* Register shifts cost an extra cycle. */
if (GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
*total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
subcode, speed);
return true;
}
}
return 1 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4);
return false;
case IF_THEN_ELSE:
if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
return 14;
return 2;
{
*total = COSTS_N_INSNS (4);
return true;
}
operand = XEXP (x, 0);
if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
|| GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
&& GET_CODE (XEXP (operand, 0)) == REG
&& REGNO (XEXP (operand, 0)) == CC_REGNUM))
*total += COSTS_N_INSNS (1);
*total += (rtx_cost (XEXP (x, 1), code, speed)
+ rtx_cost (XEXP (x, 2), code, speed));
return true;
case NE:
if (mode == SImode && XEXP (x, 1) == const0_rtx)
{
*total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
return true;
}
goto scc_insn;
case GE:
if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
&& mode == SImode && XEXP (x, 1) == const0_rtx)
{
*total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
return true;
}
goto scc_insn;
case LT:
if ((GET_CODE (XEXP (x, 0)) != REG || REGNO (XEXP (x, 0)) != CC_REGNUM)
&& mode == SImode && XEXP (x, 1) == const0_rtx)
{
*total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
return true;
}
goto scc_insn;
case EQ:
case GT:
case LE:
case GEU:
case LTU:
case GTU:
case LEU:
case UNORDERED:
case ORDERED:
case UNEQ:
case UNGE:
case UNLT:
case UNGT:
case UNLE:
scc_insn:
/* SCC insns. In the case where the comparison has already been
performed, then they cost 2 instructions. Otherwise they need
an additional comparison before them. */
*total = COSTS_N_INSNS (2);
if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
{
return true;
}
/* Fall through */
case COMPARE:
return 1;
if (GET_CODE (XEXP (x, 0)) == REG && REGNO (XEXP (x, 0)) == CC_REGNUM)
{
*total = 0;
return true;
}
*total += COSTS_N_INSNS (1);
if (GET_CODE (XEXP (x, 1)) == CONST_INT
&& const_ok_for_op (INTVAL (XEXP (x, 1)), code))
{
*total += rtx_cost (XEXP (x, 0), code, speed);
return true;
}
subcode = GET_CODE (XEXP (x, 0));
if (subcode == ASHIFT || subcode == ASHIFTRT
|| subcode == LSHIFTRT
|| subcode == ROTATE || subcode == ROTATERT)
{
*total += rtx_cost (XEXP (x, 1), code, speed);
*total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
return true;
}
if (subcode == MULT
&& GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
&& ((INTVAL (XEXP (XEXP (x, 0), 1)) &
(INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0))
{
*total += rtx_cost (XEXP (x, 1), code, speed);
*total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, speed);
return true;
}
return false;
case UMIN:
case UMAX:
case SMIN:
case SMAX:
*total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, speed);
if (GET_CODE (XEXP (x, 1)) != CONST_INT
|| !const_ok_for_arm (INTVAL (XEXP (x, 1))))
*total += rtx_cost (XEXP (x, 1), code, speed);
return true;
case ABS:
return 4 + (mode == DImode ? 4 : 0);
if (GET_MODE_CLASS (mode == MODE_FLOAT))
{
if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
{
*total = COSTS_N_INSNS (1);
return false;
}
*total = COSTS_N_INSNS (20);
return false;
}
*total = COSTS_N_INSNS (1);
if (mode == DImode)
*total += COSTS_N_INSNS (3);
return false;
case SIGN_EXTEND:
if (arm_arch_thumb2 && mode == SImode)
return 1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0);
if (GET_MODE_CLASS (mode) == MODE_INT)
{
*total = 0;
if (mode == DImode)
*total += COSTS_N_INSNS (1);
if (GET_MODE (XEXP (x, 0)) != SImode)
{
if (arm_arch6)
{
if (GET_CODE (XEXP (x, 0)) != MEM)
*total += COSTS_N_INSNS (1);
}
else if (!arm_arch4 || GET_CODE (XEXP (x, 0)) != MEM)
*total += COSTS_N_INSNS (2);
}
return false;
}
if (GET_MODE (XEXP (x, 0)) == QImode)
return (4 + (mode == DImode ? 4 : 0)
+ (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
/* Fall through */
case ZERO_EXTEND:
if (arm_arch6 && mode == SImode)
return 1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0);
switch (GET_MODE (XEXP (x, 0)))
*total = 0;
if (GET_MODE_CLASS (mode) == MODE_INT)
{
case QImode:
return (1 + (mode == DImode ? 4 : 0)
+ (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
if (mode == DImode)
*total += COSTS_N_INSNS (1);
case HImode:
return (4 + (mode == DImode ? 4 : 0)
+ (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
if (GET_MODE (XEXP (x, 0)) != SImode)
{
if (arm_arch6)
{
if (GET_CODE (XEXP (x, 0)) != MEM)
*total += COSTS_N_INSNS (1);
}
else if (!arm_arch4 || GET_CODE (XEXP (x, 0)) != MEM)
*total += COSTS_N_INSNS (GET_MODE (XEXP (x, 0)) == QImode ?
1 : 2);
}
case SImode:
return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
return false;
}
switch (GET_MODE (XEXP (x, 0)))
{
case V8QImode:
case V4HImode:
case V2SImode:
case V4QImode:
case V2HImode:
return 1;
*total = COSTS_N_INSNS (1);
return false;
default:
gcc_unreachable ();
}
gcc_unreachable ();
case ZERO_EXTRACT:
case SIGN_EXTRACT:
*total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, speed);
return true;
case CONST_INT:
if (const_ok_for_arm (INTVAL (x)))
return outer == SET ? 2 : -1;
else if (outer == AND
&& const_ok_for_arm (~INTVAL (x)))
return -1;
else if ((outer == COMPARE
|| outer == PLUS || outer == MINUS)
&& const_ok_for_arm (-INTVAL (x)))
return -1;
if (const_ok_for_arm (INTVAL (x))
|| const_ok_for_arm (~INTVAL (x)))
*total = COSTS_N_INSNS (1);
else
return 5;
*total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
INTVAL (x), NULL_RTX,
NULL_RTX, 0, 0));
return true;
case CONST:
case LABEL_REF:
case SYMBOL_REF:
return 6;
*total = COSTS_N_INSNS (3);
return true;
case HIGH:
*total = COSTS_N_INSNS (1);
return true;
case LO_SUM:
return (outer == SET) ? 1 : -1;
*total = COSTS_N_INSNS (1);
*total += rtx_cost (XEXP (x, 0), code, speed);
return true;
case CONST_DOUBLE:
if (arm_const_double_rtx (x) || vfp3_const_double_rtx (x))
return outer == SET ? 2 : -1;
else if ((outer == COMPARE || outer == PLUS)
&& neg_const_double_rtx_ok_for_fpa (x))
return -1;
return 7;
if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x))
*total = COSTS_N_INSNS (1);
else
*total = COSTS_N_INSNS (4);
return true;
default:
return 99;
*total = COSTS_N_INSNS (4);
return false;
}
}
/* RTX costs when optimizing for size. */
static bool
arm_size_rtx_costs (rtx x, int code, int outer_code, int *total)
arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
int *total)
{
enum machine_mode mode = GET_MODE (x);
if (TARGET_THUMB1)
{
/* XXX TBD. For now, use the standard costs. */
......@@ -5395,19 +5710,22 @@ arm_size_rtx_costs (rtx x, int code, int outer_code, int *total)
/* RTX costs when optimizing for size. */
static bool
arm_rtx_costs (rtx x, int code, int outer_code, int *total, bool speed)
arm_rtx_costs (rtx x, int code, int outer_code, int *total,
bool speed)
{
if (!speed)
return arm_size_rtx_costs (x, code, outer_code, total);
else
return all_cores[(int)arm_tune].rtx_costs (x, code, outer_code, total);
return all_cores[(int)arm_tune].rtx_costs (x, code, outer_code, total,
speed);
}
/* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
supported on any "slowmul" cores, so it can be ignored. */
static bool
arm_slowmul_rtx_costs (rtx x, int code, int outer_code, int *total)
arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
int *total, bool speed)
{
enum machine_mode mode = GET_MODE (x);
......@@ -5423,8 +5741,8 @@ arm_slowmul_rtx_costs (rtx x, int code, int outer_code, int *total)
if (GET_MODE_CLASS (mode) == MODE_FLOAT
|| mode == DImode)
{
*total = 30;
return true;
*total = COSTS_N_INSNS (20);
return false;
}
if (GET_CODE (XEXP (x, 1)) == CONST_INT)
......@@ -5440,20 +5758,19 @@ arm_slowmul_rtx_costs (rtx x, int code, int outer_code, int *total)
for (j = 0; i && j < 32; j += booth_unit_size)
{
i >>= booth_unit_size;
cost += 2;
cost++;
}
*total = cost;
*total = COSTS_N_INSNS (cost);
*total += rtx_cost (XEXP (x, 0), code, speed);
return true;
}
*total = 30 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
+ (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
return true;
*total = COSTS_N_INSNS (20);
return false;
default:
*total = arm_rtx_costs_1 (x, code, outer_code);
return true;
return arm_rtx_costs_1 (x, outer_code, total, speed);;
}
}
......@@ -5461,7 +5778,8 @@ arm_slowmul_rtx_costs (rtx x, int code, int outer_code, int *total)
/* RTX cost for cores with a fast multiply unit (M variants). */
static bool
arm_fastmul_rtx_costs (rtx x, int code, int outer_code, int *total)
arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
int *total, bool speed)
{
enum machine_mode mode = GET_MODE (x);
......@@ -5482,16 +5800,15 @@ arm_fastmul_rtx_costs (rtx x, int code, int outer_code, int *total)
&& (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
|| GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
{
*total = 8;
return true;
*total = COSTS_N_INSNS(2);
return false;
}
if (GET_MODE_CLASS (mode) == MODE_FLOAT
|| mode == DImode)
if (mode == DImode)
{
*total = 30;
return true;
*total = COSTS_N_INSNS (5);
return false;
}
if (GET_CODE (XEXP (x, 1)) == CONST_INT)
......@@ -5507,20 +5824,34 @@ arm_fastmul_rtx_costs (rtx x, int code, int outer_code, int *total)
for (j = 0; i && j < 32; j += booth_unit_size)
{
i >>= booth_unit_size;
cost += 2;
cost++;
}
*total = cost;
return true;
*total = COSTS_N_INSNS(cost);
return false;
}
*total = 8 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
+ (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
return true;
if (mode == SImode)
{
*total = COSTS_N_INSNS (4);
return false;
}
if (GET_MODE_CLASS (mode) == MODE_FLOAT)
{
if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
{
*total = COSTS_N_INSNS (1);
return false;
}
}
/* Requires a lib call */
*total = COSTS_N_INSNS (20);
return false;
default:
*total = arm_rtx_costs_1 (x, code, outer_code);
return true;
return arm_rtx_costs_1 (x, outer_code, total, speed);
}
}
......@@ -5529,7 +5860,7 @@ arm_fastmul_rtx_costs (rtx x, int code, int outer_code, int *total)
so it can be ignored. */
static bool
arm_xscale_rtx_costs (rtx x, int code, int outer_code, int *total)
arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, int *total, bool speed)
{
enum machine_mode mode = GET_MODE (x);
......@@ -5541,6 +5872,15 @@ arm_xscale_rtx_costs (rtx x, int code, int outer_code, int *total)
switch (code)
{
case COMPARE:
if (GET_CODE (XEXP (x, 0)) != MULT)
return arm_rtx_costs_1 (x, outer_code, total, speed);
/* A COMPARE of a MULT is slow on XScale; the muls instruction
will stall until the multiplication is complete. */
*total = COSTS_N_INSNS (3);
return false;
case MULT:
/* There is no point basing this on the tuning, since it is always the
fast variant if it exists at all. */
......@@ -5549,60 +5889,58 @@ arm_xscale_rtx_costs (rtx x, int code, int outer_code, int *total)
&& (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
|| GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
{
*total = 8;
return true;
*total = COSTS_N_INSNS (2);
return false;
}
if (GET_MODE_CLASS (mode) == MODE_FLOAT
|| mode == DImode)
if (mode == DImode)
{
*total = 30;
return true;
*total = COSTS_N_INSNS (5);
return false;
}
if (GET_CODE (XEXP (x, 1)) == CONST_INT)
{
unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
& (unsigned HOST_WIDE_INT) 0xffffffff);
int cost, const_ok = const_ok_for_arm (i);
/* If operand 1 is a constant we can more accurately
calculate the cost of the multiply. The multiplier can
retire 15 bits on the first cycle and a further 12 on the
second. We do, of course, have to load the constant into
a register first. */
unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
/* There's a general overhead of one cycle. */
int cost = 1;
unsigned HOST_WIDE_INT masked_const;
/* The cost will be related to two insns.
First a load of the constant (MOV or LDR), then a multiply. */
cost = 2;
if (! const_ok)
cost += 1; /* LDR is probably more expensive because
of longer result latency. */
if (i & 0x80000000)
i = ~i;
i &= (unsigned HOST_WIDE_INT) 0xffffffff;
masked_const = i & 0xffff8000;
if (masked_const != 0 && masked_const != 0xffff8000)
if (masked_const != 0)
{
cost++;
masked_const = i & 0xf8000000;
if (masked_const == 0 || masked_const == 0xf8000000)
cost += 1;
else
cost += 2;
if (masked_const != 0)
cost++;
}
*total = cost;
return true;
*total = COSTS_N_INSNS (cost);
return false;
}
*total = 8 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
+ (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
return true;
if (mode == SImode)
{
*total = COSTS_N_INSNS (3);
return false;
}
case COMPARE:
/* A COMPARE of a MULT is slow on XScale; the muls instruction
will stall until the multiplication is complete. */
if (GET_CODE (XEXP (x, 0)) == MULT)
*total = 4 + rtx_cost (XEXP (x, 0), code, true);
else
*total = arm_rtx_costs_1 (x, code, outer_code);
return true;
/* Requires a lib call */
*total = COSTS_N_INSNS (20);
return false;
default:
*total = arm_rtx_costs_1 (x, code, outer_code);
return true;
return arm_rtx_costs_1 (x, outer_code, total, speed);
}
}
......@@ -5610,11 +5948,10 @@ arm_xscale_rtx_costs (rtx x, int code, int outer_code, int *total)
/* RTX costs for 9e (and later) cores. */
static bool
arm_9e_rtx_costs (rtx x, int code, int outer_code, int *total)
arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
int *total, bool speed)
{
enum machine_mode mode = GET_MODE (x);
int nonreg_cost;
int cost;
if (TARGET_THUMB1)
{
......@@ -5640,35 +5977,37 @@ arm_9e_rtx_costs (rtx x, int code, int outer_code, int *total)
&& (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
|| GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
{
*total = 3;
return true;
*total = COSTS_N_INSNS (2);
return false;
}
if (GET_MODE_CLASS (mode) == MODE_FLOAT)
{
*total = 30;
return true;
}
if (mode == DImode)
{
cost = 7;
nonreg_cost = 8;
*total = COSTS_N_INSNS (5);
return false;
}
else
if (mode == SImode)
{
cost = 2;
nonreg_cost = 4;
*total = COSTS_N_INSNS (2);
return false;
}
if (GET_MODE_CLASS (mode) == MODE_FLOAT)
{
if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
{
*total = COSTS_N_INSNS (1);
return false;
}
}
*total = cost + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : nonreg_cost)
+ (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : nonreg_cost);
return true;
*total = COSTS_N_INSNS (20);
return false;
default:
*total = arm_rtx_costs_1 (x, code, outer_code);
return true;
return arm_rtx_costs_1 (x, outer_code, total, speed);
}
}
/* All address computations that can be done are free, but rtx cost returns
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment