Commit 5bea0c6c by Kyrylo Tkachov Committed by Kyrylo Tkachov

aarch-common-protos.h (struct alu_cost_table): New.

[gcc/]
2013-10-03  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
            Richard Earnshaw  <richard.earnshaw@arm.com>

	* config/arm/aarch-common-protos.h (struct alu_cost_table): New.
	(struct mult_cost_table): Likewise.
	(struct mem_cost_table): Likewise.
	(struct fp_cost_table): Likewise.
	(struct vector_cost_table): Likewise.
	(cpu_cost_table): Likewise.
	* config/arm/arm.opt (mold-rts-costs): New option.
	(mnew-generic-costs): Likewise.
	* config/arm/arm.c (generic_extra_costs): New table.
	(cortexa15_extra_costs): Likewise.
	(arm_slowmul_tune): Use NULL as new costs.
	(arm_fastmul_tune): Likewise.
	(arm_strongarm_tune): Likewise.
	(arm_xscale_tune): Likewise.
	(arm_9e_tune): Likewise.
	(arm_v6t2_tune): Likewise.
	(arm_cortex_a5_tune): Likewise.
	(arm_cortex_a9_tune): Likewise.
	(arm_v6m_tune): Likewise.
	(arm_fa726te_tune): Likewise.
	(arm_cortex_a15_tune): Use cortex15_extra_costs.
	(arm_cortex_tune): Use generict_extra_costs.
	(shifter_op_p): New function.
	(arm_unspec_cost): Likewise.
	(LIBCALL_COST): Define.
	(arm_new_rtx_costs): New function.
	(arm_rtx_costs): Use arm_new_rtx_costs when core-specific
	table is available. Use old costs otherwise unless mnew-generic-costs
	is specified.
	* config/arm/arm-protos.h (tune_params): Add insn_extra_cost field.
	(cpu_cost_table): Declare.

Co-Authored-By: Richard Earnshaw <rearnsha@arm.com>

From-SVN: r203160
parent 24c56925
2013-10-03 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Richard Earnshaw <richard.earnshaw@arm.com>
* config/arm/aarch-common-protos.h (struct alu_cost_table): New.
(struct mult_cost_table): Likewise.
(struct mem_cost_table): Likewise.
(struct fp_cost_table): Likewise.
(struct vector_cost_table): Likewise.
(cpu_cost_table): Likewise.
* config/arm/arm.opt (mold-rts-costs): New option.
(mnew-generic-costs): Likewise.
* config/arm/arm.c (generic_extra_costs): New table.
(cortexa15_extra_costs): Likewise.
(arm_slowmul_tune): Use NULL as new costs.
(arm_fastmul_tune): Likewise.
(arm_strongarm_tune): Likewise.
(arm_xscale_tune): Likewise.
(arm_9e_tune): Likewise.
(arm_v6t2_tune): Likewise.
(arm_cortex_a5_tune): Likewise.
(arm_cortex_a9_tune): Likewise.
(arm_v6m_tune): Likewise.
(arm_fa726te_tune): Likewise.
(arm_cortex_a15_tune): Use cortex15_extra_costs.
(arm_cortex_tune): Use generict_extra_costs.
(shifter_op_p): New function.
(arm_unspec_cost): Likewise.
(LIBCALL_COST): Define.
(arm_new_rtx_costs): New function.
(arm_rtx_costs): Use arm_new_rtx_costs when core-specific
table is available. Use old costs otherwise unless mnew-generic-costs
is specified.
* config/arm/arm-protos.h (tune_params): Add insn_extra_cost field.
(cpu_cost_table): Declare.
2013-10-03 Marcus Shawcroft <marcus.shawcroft@arm.com>
PR target/58460
......
/* Function prototypes for instruction scheduling dependeoncy routines,
defined in aarch-common.c
/* Functions and structures shared between arm and aarch64.
Copyright (C) 1991-2013 Free Software Foundation, Inc.
Contributed by ARM Ltd.
......@@ -33,4 +32,102 @@ extern int arm_no_early_alu_shift_value_dep (rtx, rtx);
extern int arm_no_early_mul_dep (rtx, rtx);
extern int arm_no_early_store_addr_dep (rtx, rtx);
/* RTX cost table definitions. These are used when tuning for speed rather
than for size and should reflect the _additional_ cost over the cost
of the fastest instruction in the machine, which is COSTS_N_INSNS (1).
Therefore it's okay for some costs to be 0.
Costs may not have a negative value. */
struct alu_cost_table
{
const int arith; /* ADD/SUB. */
const int logical; /* AND/ORR/EOR/BIC, etc. */
const int shift; /* Simple shift. */
const int shift_reg; /* Simple shift by reg. */
const int arith_shift; /* Additional when arith also shifts... */
const int arith_shift_reg; /* ... and when the shift is by a reg. */
const int log_shift; /* Additional when logic also shifts... */
const int log_shift_reg; /* ... and when the shift is by a reg. */
const int extnd; /* Zero/sign extension. */
const int extnd_arith; /* Extend and arith. */
const int bfi; /* Bit-field insert. */
const int bfx; /* Bit-field extraction. */
const int clz; /* Count Leading Zeros. */
const int non_exec; /* Extra cost when not executing insn. */
const bool non_exec_costs_exec; /* True if non-execution must add the exec
cost. */
};
struct mult_cost_table
{
const int simple;
const int flag_setting; /* Additional cost if multiply sets flags. */
const int extend;
const int add;
const int extend_add;
const int idiv;
};
/* Calculations of LDM costs are complex. We assume an initial cost
(ldm_1st) which will load the number of registers mentioned in
ldm_regs_per_insn_1st registers; then each additional
ldm_regs_per_insn_subsequent registers cost one more insn.
Similarly for STM operations.
Therefore the ldm_regs_per_insn_1st/stm_regs_per_insn_1st and
ldm_regs_per_insn_subsequent/stm_regs_per_insn_subsequent fields indicate
the number of registers loaded/stored and are expressed by a simple integer
and not by a COSTS_N_INSNS (N) expression.
*/
struct mem_cost_table
{
const int load;
const int load_sign_extend; /* Additional to load cost. */
const int ldrd; /* Cost of LDRD. */
const int ldm_1st;
const int ldm_regs_per_insn_1st;
const int ldm_regs_per_insn_subsequent;
const int loadf; /* SFmode. */
const int loadd; /* DFmode. */
const int load_unaligned; /* Extra for unaligned loads. */
const int store;
const int strd;
const int stm_1st;
const int stm_regs_per_insn_1st;
const int stm_regs_per_insn_subsequent;
const int storef; /* SFmode. */
const int stored; /* DFmode. */
const int store_unaligned; /* Extra for unaligned stores. */
};
struct fp_cost_table
{
const int div;
const int mult;
const int mult_addsub; /* Non-fused. */
const int fma; /* Fused. */
const int addsub;
const int fpconst; /* Immediate. */
const int neg; /* NEG and ABS. */
const int compare;
const int widen; /* Widen to this size. */
const int narrow; /* Narrow from this size. */
const int toint;
const int fromint;
const int roundint; /* V8 round to integral, remains FP format. */
};
struct vector_cost_table
{
const int alu;
};
struct cpu_cost_table
{
const struct alu_cost_table alu;
const struct mult_cost_table mult[2]; /* SImode and DImode. */
const struct mem_cost_table ldst;
const struct fp_cost_table fp[2]; /* SFmode and DFmode. */
const struct vector_cost_table vect;
};
#endif /* GCC_AARCH_COMMON_PROTOS_H */
......@@ -246,9 +246,12 @@ struct cpu_vec_costs {
#ifdef RTX_CODE
/* This needs to be here because we need RTX_CODE and similar. */
struct cpu_cost_table;
struct tune_params
{
bool (*rtx_costs) (rtx, RTX_CODE, RTX_CODE, int *, bool);
const struct cpu_cost_table *insn_extra_cost;
bool (*sched_adjust_cost) (rtx, rtx, rtx, int *);
int constant_limit;
/* Maximum number of instructions to conditionalise. */
......
......@@ -945,10 +945,212 @@ struct cpu_vec_costs arm_default_vec_cost = {
1, /* cond_not_taken_branch_cost. */
};
const struct cpu_cost_table generic_extra_costs =
{
/* ALU */
{
0, /* Arith. */
0, /* Logical. */
0, /* Shift. */
COSTS_N_INSNS (1), /* Shift_reg. */
0, /* Arith_shift. */
COSTS_N_INSNS (1), /* Arith_shift_reg. */
0, /* Log_shift. */
COSTS_N_INSNS (1), /* Log_shift_reg. */
0, /* Extend. */
COSTS_N_INSNS (1), /* Extend_arith. */
0, /* Bfi. */
0, /* Bfx. */
0, /* Clz. */
COSTS_N_INSNS (1), /* non_exec. */
false /* non_exec_costs_exec. */
},
/* MULT SImode */
{
{
COSTS_N_INSNS (2), /* Simple. */
COSTS_N_INSNS (1), /* Flag_setting. */
COSTS_N_INSNS (2), /* Extend. */
COSTS_N_INSNS (3), /* Add. */
COSTS_N_INSNS (3), /* Extend_add. */
COSTS_N_INSNS (8) /* Idiv. */
},
/* MULT DImode */
{
0, /* Simple (N/A). */
0, /* Flag_setting (N/A). */
COSTS_N_INSNS (2), /* Extend. */
0, /* Add (N/A). */
COSTS_N_INSNS (3), /* Extend_add. */
0 /* Idiv (N/A). */
}
},
/* LD/ST */
{
COSTS_N_INSNS (2), /* Load. */
COSTS_N_INSNS (2), /* Load_sign_extend. */
COSTS_N_INSNS (3), /* Ldrd. */
COSTS_N_INSNS (2), /* Ldm_1st. */
1, /* Ldm_regs_per_insn_1st. */
1, /* Ldm_regs_per_insn_subsequent. */
COSTS_N_INSNS (2), /* Loadf. */
COSTS_N_INSNS (3), /* Loadd. */
COSTS_N_INSNS (1), /* Load_unaligned. */
COSTS_N_INSNS (2), /* Store. */
COSTS_N_INSNS (3), /* Strd. */
COSTS_N_INSNS (2), /* Stm_1st. */
1, /* Stm_regs_per_insn_1st. */
1, /* Stm_regs_per_insn_subsequent. */
COSTS_N_INSNS (2), /* Storef. */
COSTS_N_INSNS (3), /* Stored. */
COSTS_N_INSNS (1) /* Store_unaligned. */
},
{
/* FP SFmode */
{
COSTS_N_INSNS (7), /* Div. */
COSTS_N_INSNS (2), /* Mult. */
COSTS_N_INSNS (3), /* Mult_addsub. */
COSTS_N_INSNS (3), /* Fma. */
COSTS_N_INSNS (1), /* Addsub. */
0, /* Fpconst. */
0, /* Neg. */
0, /* Compare. */
0, /* Widen. */
0, /* Narrow. */
0, /* Toint. */
0, /* Fromint. */
0 /* Roundint. */
},
/* FP DFmode */
{
COSTS_N_INSNS (15), /* Div. */
COSTS_N_INSNS (5), /* Mult. */
COSTS_N_INSNS (7), /* Mult_addsub. */
COSTS_N_INSNS (7), /* Fma. */
COSTS_N_INSNS (3), /* Addsub. */
0, /* Fpconst. */
0, /* Neg. */
0, /* Compare. */
0, /* Widen. */
0, /* Narrow. */
0, /* Toint. */
0, /* Fromint. */
0 /* Roundint. */
}
},
/* Vector */
{
COSTS_N_INSNS (1) /* Alu. */
}
};
const struct cpu_cost_table cortexa15_extra_costs =
{
/* ALU */
{
COSTS_N_INSNS (1), /* Arith. */
COSTS_N_INSNS (1), /* Logical. */
COSTS_N_INSNS (1), /* Shift. */
COSTS_N_INSNS (1), /* Shift_reg. */
COSTS_N_INSNS (1), /* Arith_shift. */
COSTS_N_INSNS (1), /* Arith_shift_reg. */
COSTS_N_INSNS (1), /* Log_shift. */
COSTS_N_INSNS (1), /* Log_shift_reg. */
COSTS_N_INSNS (1), /* Extend. */
COSTS_N_INSNS (2), /* Extend_arith. */
COSTS_N_INSNS (2), /* Bfi. */
COSTS_N_INSNS (1), /* Bfx. */
COSTS_N_INSNS (1), /* Clz. */
COSTS_N_INSNS (1), /* non_exec. */
true /* non_exec_costs_exec. */
},
/* MULT SImode */
{
{
COSTS_N_INSNS (3), /* Simple. */
COSTS_N_INSNS (4), /* Flag_setting. */
COSTS_N_INSNS (3), /* Extend. */
COSTS_N_INSNS (4), /* Add. */
COSTS_N_INSNS (4), /* Extend_add. */
COSTS_N_INSNS (19) /* Idiv. */
},
/* MULT DImode */
{
0, /* Simple (N/A). */
0, /* Flag_setting (N/A). */
COSTS_N_INSNS (4), /* Extend. */
0, /* Add (N/A). */
COSTS_N_INSNS (6), /* Extend_add. */
0 /* Idiv (N/A). */
}
},
/* LD/ST */
{
COSTS_N_INSNS (4), /* Load. */
COSTS_N_INSNS (4), /* Load_sign_extend. */
COSTS_N_INSNS (4), /* Ldrd. */
COSTS_N_INSNS (5), /* Ldm_1st. */
1, /* Ldm_regs_per_insn_1st. */
2, /* Ldm_regs_per_insn_subsequent. */
COSTS_N_INSNS (5), /* Loadf. */
COSTS_N_INSNS (5), /* Loadd. */
COSTS_N_INSNS (1), /* Load_unaligned. */
COSTS_N_INSNS (1), /* Store. */
COSTS_N_INSNS (1), /* Strd. */
COSTS_N_INSNS (2), /* Stm_1st. */
1, /* Stm_regs_per_insn_1st. */
2, /* Stm_regs_per_insn_subsequent. */
COSTS_N_INSNS (1), /* Storef. */
COSTS_N_INSNS (1), /* Stored. */
COSTS_N_INSNS (1) /* Store_unaligned. */
},
{
/* FP SFmode */
{
COSTS_N_INSNS (18), /* Div. */
COSTS_N_INSNS (5), /* Mult. */
COSTS_N_INSNS (3), /* Mult_addsub. */
COSTS_N_INSNS (13), /* Fma. */
COSTS_N_INSNS (5), /* Addsub. */
COSTS_N_INSNS (5), /* Fpconst. */
COSTS_N_INSNS (3), /* Neg. */
COSTS_N_INSNS (3), /* Compare. */
COSTS_N_INSNS (3), /* Widen. */
COSTS_N_INSNS (3), /* Narrow. */
COSTS_N_INSNS (3), /* Toint. */
COSTS_N_INSNS (3), /* Fromint. */
COSTS_N_INSNS (3) /* Roundint. */
},
/* FP DFmode */
{
COSTS_N_INSNS (32), /* Div. */
COSTS_N_INSNS (5), /* Mult. */
COSTS_N_INSNS (3), /* Mult_addsub. */
COSTS_N_INSNS (13), /* Fma. */
COSTS_N_INSNS (5), /* Addsub. */
COSTS_N_INSNS (3), /* Fpconst. */
COSTS_N_INSNS (3), /* Neg. */
COSTS_N_INSNS (3), /* Compare. */
COSTS_N_INSNS (3), /* Widen. */
COSTS_N_INSNS (3), /* Narrow. */
COSTS_N_INSNS (3), /* Toint. */
COSTS_N_INSNS (3), /* Fromint. */
COSTS_N_INSNS (3) /* Roundint. */
}
},
/* Vector */
{
COSTS_N_INSNS (1) /* Alu. */
}
};
const struct tune_params arm_slowmul_tune =
{
arm_slowmul_rtx_costs,
NULL,
NULL,
3, /* Constant limit. */
5, /* Max cond insns. */
ARM_PREFETCH_NOT_BENEFICIAL,
......@@ -964,6 +1166,7 @@ const struct tune_params arm_fastmul_tune =
{
arm_fastmul_rtx_costs,
NULL,
NULL,
1, /* Constant limit. */
5, /* Max cond insns. */
ARM_PREFETCH_NOT_BENEFICIAL,
......@@ -982,6 +1185,7 @@ const struct tune_params arm_strongarm_tune =
{
arm_fastmul_rtx_costs,
NULL,
NULL,
1, /* Constant limit. */
3, /* Max cond insns. */
ARM_PREFETCH_NOT_BENEFICIAL,
......@@ -996,6 +1200,7 @@ const struct tune_params arm_strongarm_tune =
const struct tune_params arm_xscale_tune =
{
arm_xscale_rtx_costs,
NULL,
xscale_sched_adjust_cost,
2, /* Constant limit. */
3, /* Max cond insns. */
......@@ -1012,6 +1217,7 @@ const struct tune_params arm_9e_tune =
{
arm_9e_rtx_costs,
NULL,
NULL,
1, /* Constant limit. */
5, /* Max cond insns. */
ARM_PREFETCH_NOT_BENEFICIAL,
......@@ -1027,6 +1233,7 @@ const struct tune_params arm_v6t2_tune =
{
arm_9e_rtx_costs,
NULL,
NULL,
1, /* Constant limit. */
5, /* Max cond insns. */
ARM_PREFETCH_NOT_BENEFICIAL,
......@@ -1042,6 +1249,7 @@ const struct tune_params arm_v6t2_tune =
const struct tune_params arm_cortex_tune =
{
arm_9e_rtx_costs,
&generic_extra_costs,
NULL,
1, /* Constant limit. */
5, /* Max cond insns. */
......@@ -1057,6 +1265,7 @@ const struct tune_params arm_cortex_tune =
const struct tune_params arm_cortex_a15_tune =
{
arm_9e_rtx_costs,
&cortexa15_extra_costs,
NULL,
1, /* Constant limit. */
2, /* Max cond insns. */
......@@ -1076,6 +1285,7 @@ const struct tune_params arm_cortex_a5_tune =
{
arm_9e_rtx_costs,
NULL,
NULL,
1, /* Constant limit. */
1, /* Max cond insns. */
ARM_PREFETCH_NOT_BENEFICIAL,
......@@ -1090,6 +1300,7 @@ const struct tune_params arm_cortex_a5_tune =
const struct tune_params arm_cortex_a9_tune =
{
arm_9e_rtx_costs,
NULL,
cortex_a9_sched_adjust_cost,
1, /* Constant limit. */
5, /* Max cond insns. */
......@@ -1108,6 +1319,7 @@ const struct tune_params arm_v6m_tune =
{
arm_9e_rtx_costs,
NULL,
NULL,
1, /* Constant limit. */
5, /* Max cond insns. */
ARM_PREFETCH_NOT_BENEFICIAL,
......@@ -1122,6 +1334,7 @@ const struct tune_params arm_v6m_tune =
const struct tune_params arm_fa726te_tune =
{
arm_9e_rtx_costs,
NULL,
fa726te_sched_adjust_cost,
1, /* Constant limit. */
5, /* Max cond insns. */
......@@ -8291,18 +8504,1707 @@ arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
}
}
/* RTX costs when optimizing for size. */
/* Helper function for arm_rtx_costs. If the operand is a valid shift
operand, then return the operand that is being shifted. If the shift
is not by a constant, then set SHIFT_REG to point to the operand.
Return NULL if OP is not a shifter operand. */
static rtx
shifter_op_p (rtx op, rtx *shift_reg)
{
enum rtx_code code = GET_CODE (op);
if (code == MULT && CONST_INT_P (XEXP (op, 1))
&& exact_log2 (INTVAL (XEXP (op, 1))) > 0)
return XEXP (op, 0);
else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
return XEXP (op, 0);
else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
|| code == ASHIFTRT)
{
if (!CONST_INT_P (XEXP (op, 1)))
*shift_reg = XEXP (op, 1);
return XEXP (op, 0);
}
return NULL;
}
static bool
arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
int *total, bool speed)
arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
{
if (!speed)
return arm_size_rtx_costs (x, (enum rtx_code) code,
(enum rtx_code) outer_code, total);
else
return current_tune->rtx_costs (x, (enum rtx_code) code,
(enum rtx_code) outer_code,
total, speed);
const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
gcc_assert (GET_CODE (x) == UNSPEC);
switch (XINT (x, 1))
{
case UNSPEC_UNALIGNED_LOAD:
/* We can only do unaligned loads into the integer unit, and we can't
use LDM or LDRD. */
*cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
if (speed_p)
*cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
+ extra_cost->ldst.load_unaligned);
#ifdef NOT_YET
*cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
ADDR_SPACE_GENERIC, speed_p);
#endif
return true;
case UNSPEC_UNALIGNED_STORE:
*cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
if (speed_p)
*cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
+ extra_cost->ldst.store_unaligned);
*cost += rtx_cost (XVECEXP (x, 0, 0), UNSPEC, 0, speed_p);
#ifdef NOT_YET
*cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
ADDR_SPACE_GENERIC, speed_p);
#endif
return true;
case UNSPEC_VRINTZ:
case UNSPEC_VRINTP:
case UNSPEC_VRINTM:
case UNSPEC_VRINTR:
case UNSPEC_VRINTX:
case UNSPEC_VRINTA:
*cost = COSTS_N_INSNS (1);
if (speed_p)
*cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
return true;
default:
*cost = COSTS_N_INSNS (2);
break;
}
return false;
}
/* Cost of a libcall. We assume one insn per argument, an amount for the
call (one insn for -Os) and then one for processing the result. */
#define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
/* RTX costs. Make an estimate of the cost of executing the operation
X, which is contained with an operation with code OUTER_CODE.
SPEED_P indicates whether the cost desired is the performance cost,
or the size cost. The estimate is stored in COST and the return
value is TRUE if the cost calculation is final, or FALSE if the
caller should recurse through the operands of X to add additional
costs.
We currently make no attempt to model the size savings of Thumb-2
16-bit instructions. At the normal points in compilation where
this code is called we have no measure of whether the condition
flags are live or not, and thus no realistic way to determine what
the size will eventually be. */
static bool
arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
const struct cpu_cost_table *extra_cost,
int *cost, bool speed_p)
{
enum machine_mode mode = GET_MODE (x);
if (TARGET_THUMB1)
{
if (speed_p)
*cost = thumb1_rtx_costs (x, code, outer_code);
else
*cost = thumb1_size_rtx_costs (x, code, outer_code);
return true;
}
switch (code)
{
case SET:
*cost = 0;
if (REG_P (SET_SRC (x))
&& REG_P (SET_DEST (x)))
{
/* Assume that most copies can be done with a single insn,
unless we don't have HW FP, in which case everything
larger than word mode will require two insns. */
*cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
&& GET_MODE_SIZE (mode) > 4)
|| mode == DImode)
? 2 : 1);
/* Conditional register moves can be encoded
in 16 bits in Thumb mode. */
if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
*cost >>= 1;
}
if (CONST_INT_P (SET_SRC (x)))
{
/* Handle CONST_INT here, since the value doesn't have a mode
and we would otherwise be unable to work out the true cost. */
*cost = rtx_cost (SET_DEST (x), SET, 0, speed_p);
mode = GET_MODE (SET_DEST (x));
outer_code = SET;
/* Slightly lower the cost of setting a core reg to a constant.
This helps break up chains and allows for better scheduling. */
if (REG_P (SET_DEST (x))
&& REGNO (SET_DEST (x)) <= LR_REGNUM)
*cost -= 1;
x = SET_SRC (x);
/* Immediate moves with an immediate in the range [0, 255] can be
encoded in 16 bits in Thumb mode. */
if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
&& INTVAL (x) >= 0 && INTVAL (x) <=255)
*cost >>= 1;
goto const_int_cost;
}
return false;
case MEM:
/* A memory access costs 1 insn if the mode is small, or the address is
a single register, otherwise it costs one insn per word. */
if (REG_P (XEXP (x, 0)))
*cost = COSTS_N_INSNS (1);
else if (flag_pic
&& GET_CODE (XEXP (x, 0)) == PLUS
&& will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
/* This will be split into two instructions.
See arm.md:calculate_pic_address. */
*cost = COSTS_N_INSNS (2);
else
*cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
/* For speed optimizations, add the costs of the address and
accessing memory. */
if (speed_p)
#ifdef NOT_YET
*cost += (extra_cost->ldst.load
+ arm_address_cost (XEXP (x, 0), mode,
ADDR_SPACE_GENERIC, speed_p));
#else
*cost += extra_cost->ldst.load;
#endif
return true;
case PARALLEL:
{
/* Calculations of LDM costs are complex. We assume an initial cost
(ldm_1st) which will load the number of registers mentioned in
ldm_regs_per_insn_1st registers; then each additional
ldm_regs_per_insn_subsequent registers cost one more insn. The
formula for N regs is thus:
ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
+ ldm_regs_per_insn_subsequent - 1)
/ ldm_regs_per_insn_subsequent).
Additional costs may also be added for addressing. A similar
formula is used for STM. */
bool is_ldm = load_multiple_operation (x, SImode);
bool is_stm = store_multiple_operation (x, SImode);
*cost = COSTS_N_INSNS (1);
if (is_ldm || is_stm)
{
if (speed_p)
{
HOST_WIDE_INT nregs = XVECLEN (x, 0);
HOST_WIDE_INT regs_per_insn_1st = is_ldm
? extra_cost->ldst.ldm_regs_per_insn_1st
: extra_cost->ldst.stm_regs_per_insn_1st;
HOST_WIDE_INT regs_per_insn_sub = is_ldm
? extra_cost->ldst.ldm_regs_per_insn_subsequent
: extra_cost->ldst.stm_regs_per_insn_subsequent;
*cost += regs_per_insn_1st
+ COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
+ regs_per_insn_sub - 1)
/ regs_per_insn_sub);
return true;
}
}
return false;
}
case DIV:
case UDIV:
if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
&& (mode == SFmode || !TARGET_VFP_SINGLE))
*cost = COSTS_N_INSNS (speed_p
? extra_cost->fp[mode != SFmode].div : 1);
else if (mode == SImode && TARGET_IDIV)
*cost = COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 1);
else
*cost = LIBCALL_COST (2);
return false; /* All arguments must be in registers. */
case MOD:
case UMOD:
*cost = LIBCALL_COST (2);
return false; /* All arguments must be in registers. */
case ROTATE:
if (mode == SImode && REG_P (XEXP (x, 1)))
{
*cost = (COSTS_N_INSNS (2)
+ rtx_cost (XEXP (x, 0), code, 0, speed_p));
if (speed_p)
*cost += extra_cost->alu.shift_reg;
return true;
}
/* Fall through */
case ROTATERT:
case ASHIFT:
case LSHIFTRT:
case ASHIFTRT:
if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
{
*cost = (COSTS_N_INSNS (3)
+ rtx_cost (XEXP (x, 0), code, 0, speed_p));
if (speed_p)
*cost += 2 * extra_cost->alu.shift;
return true;
}
else if (mode == SImode)
{
*cost = (COSTS_N_INSNS (1)
+ rtx_cost (XEXP (x, 0), code, 0, speed_p));
/* Slightly disparage register shifts at -Os, but not by much. */
if (!CONST_INT_P (XEXP (x, 1)))
*cost += (speed_p ? extra_cost->alu.shift_reg : 1
+ rtx_cost (XEXP (x, 1), code, 1, speed_p));
return true;
}
else if (GET_MODE_CLASS (mode) == MODE_INT
&& GET_MODE_SIZE (mode) < 4)
{
if (code == ASHIFT)
{
*cost = (COSTS_N_INSNS (1)
+ rtx_cost (XEXP (x, 0), code, 0, speed_p));
/* Slightly disparage register shifts at -Os, but not by
much. */
if (!CONST_INT_P (XEXP (x, 1)))
*cost += (speed_p ? extra_cost->alu.shift_reg : 1
+ rtx_cost (XEXP (x, 1), code, 1, speed_p));
}
else if (code == LSHIFTRT || code == ASHIFTRT)
{
if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
{
/* Can use SBFX/UBFX. */
*cost = COSTS_N_INSNS (1);
if (speed_p)
*cost += extra_cost->alu.bfx;
*cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
}
else
{
*cost = COSTS_N_INSNS (2);
*cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
if (speed_p)
{
if (CONST_INT_P (XEXP (x, 1)))
*cost += 2 * extra_cost->alu.shift;
else
*cost += (extra_cost->alu.shift
+ extra_cost->alu.shift_reg);
}
else
/* Slightly disparage register shifts. */
*cost += !CONST_INT_P (XEXP (x, 1));
}
}
else /* Rotates. */
{
*cost = COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x, 1)));
*cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
if (speed_p)
{
if (CONST_INT_P (XEXP (x, 1)))
*cost += (2 * extra_cost->alu.shift
+ extra_cost->alu.log_shift);
else
*cost += (extra_cost->alu.shift
+ extra_cost->alu.shift_reg
+ extra_cost->alu.log_shift_reg);
}
}
return true;
}
*cost = LIBCALL_COST (2);
return false;
case MINUS:
if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
&& (mode == SFmode || !TARGET_VFP_SINGLE))
{
*cost = COSTS_N_INSNS (1);
if (GET_CODE (XEXP (x, 0)) == MULT
|| GET_CODE (XEXP (x, 1)) == MULT)
{
rtx mul_op0, mul_op1, sub_op;
if (speed_p)
*cost += extra_cost->fp[mode != SFmode].mult_addsub;
if (GET_CODE (XEXP (x, 0)) == MULT)
{
mul_op0 = XEXP (XEXP (x, 0), 0);
mul_op1 = XEXP (XEXP (x, 0), 1);
sub_op = XEXP (x, 1);
}
else
{
mul_op0 = XEXP (XEXP (x, 1), 0);
mul_op1 = XEXP (XEXP (x, 1), 1);
sub_op = XEXP (x, 0);
}
/* The first operand of the multiply may be optionally
negated. */
if (GET_CODE (mul_op0) == NEG)
mul_op0 = XEXP (mul_op0, 0);
*cost += (rtx_cost (mul_op0, code, 0, speed_p)
+ rtx_cost (mul_op1, code, 0, speed_p)
+ rtx_cost (sub_op, code, 0, speed_p));
return true;
}
if (speed_p)
*cost += extra_cost->fp[mode != SFmode].addsub;
return false;
}
if (mode == SImode)
{
rtx shift_by_reg = NULL;
rtx shift_op;
rtx non_shift_op;
*cost = COSTS_N_INSNS (1);
shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
if (shift_op == NULL)
{
shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
non_shift_op = XEXP (x, 0);
}
else
non_shift_op = XEXP (x, 1);
if (shift_op != NULL)
{
if (shift_by_reg != NULL)
{
if (speed_p)
*cost += extra_cost->alu.arith_shift_reg;
*cost += rtx_cost (shift_by_reg, code, 0, speed_p);
}
else if (speed_p)
*cost += extra_cost->alu.arith_shift;
*cost += (rtx_cost (shift_op, code, 0, speed_p)
+ rtx_cost (non_shift_op, code, 0, speed_p));
return true;
}
if (arm_arch_thumb2
&& GET_CODE (XEXP (x, 1)) == MULT)
{
/* MLS. */
if (speed_p)
*cost += extra_cost->mult[0].add;
*cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
+ rtx_cost (XEXP (XEXP (x, 1), 0), MULT, 0, speed_p)
+ rtx_cost (XEXP (XEXP (x, 1), 1), MULT, 1, speed_p));
return true;
}
if (CONST_INT_P (XEXP (x, 0)))
{
int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
INTVAL (XEXP (x, 0)), NULL_RTX,
NULL_RTX, 1, 0);
*cost = COSTS_N_INSNS (insns);
if (speed_p)
*cost += insns * extra_cost->alu.arith;
*cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
return true;
}
return false;
}
if (GET_MODE_CLASS (mode) == MODE_INT
&& GET_MODE_SIZE (mode) < 4)
{
/* Slightly disparage, as we might need to widen the result. */
*cost = 1 + COSTS_N_INSNS (1);
if (speed_p)
*cost += extra_cost->alu.arith;
if (CONST_INT_P (XEXP (x, 0)))
{
*cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
return true;
}
return false;
}
if (mode == DImode)
{
*cost = COSTS_N_INSNS (2);
if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
{
rtx op1 = XEXP (x, 1);
if (speed_p)
*cost += 2 * extra_cost->alu.arith;
if (GET_CODE (op1) == ZERO_EXTEND)
*cost += rtx_cost (XEXP (op1, 0), ZERO_EXTEND, 0, speed_p);
else
*cost += rtx_cost (op1, MINUS, 1, speed_p);
*cost += rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND,
0, speed_p);
return true;
}
else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
{
if (speed_p)
*cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
*cost += (rtx_cost (XEXP (XEXP (x, 0), 0), SIGN_EXTEND,
0, speed_p)
+ rtx_cost (XEXP (x, 1), MINUS, 1, speed_p));
return true;
}
else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
|| GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
{
if (speed_p)
*cost += (extra_cost->alu.arith
+ (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
? extra_cost->alu.arith
: extra_cost->alu.arith_shift));
*cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
+ rtx_cost (XEXP (XEXP (x, 1), 0),
GET_CODE (XEXP (x, 1)), 0, speed_p));
return true;
}
if (speed_p)
*cost += 2 * extra_cost->alu.arith;
return false;
}
/* Vector mode? */
*cost = LIBCALL_COST (2);
return false;
case PLUS:
if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
&& (mode == SFmode || !TARGET_VFP_SINGLE))
{
*cost = COSTS_N_INSNS (1);
if (GET_CODE (XEXP (x, 0)) == MULT)
{
rtx mul_op0, mul_op1, add_op;
if (speed_p)
*cost += extra_cost->fp[mode != SFmode].mult_addsub;
mul_op0 = XEXP (XEXP (x, 0), 0);
mul_op1 = XEXP (XEXP (x, 0), 1);
add_op = XEXP (x, 1);
*cost += (rtx_cost (mul_op0, code, 0, speed_p)
+ rtx_cost (mul_op1, code, 0, speed_p)
+ rtx_cost (add_op, code, 0, speed_p));
return true;
}
if (speed_p)
*cost += extra_cost->fp[mode != SFmode].addsub;
return false;
}
else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
{
*cost = LIBCALL_COST (2);
return false;
}
if (GET_MODE_CLASS (mode) == MODE_INT
&& GET_MODE_SIZE (mode) < 4)
{
/* Narrow modes can be synthesized in SImode, but the range
of useful sub-operations is limited. */
if (CONST_INT_P (XEXP (x, 1)))
{
int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
INTVAL (XEXP (x, 1)), NULL_RTX,
NULL_RTX, 1, 0);
*cost = COSTS_N_INSNS (insns);
if (speed_p)
*cost += insns * extra_cost->alu.arith;
/* Slightly penalize a narrow operation as the result may
need widening. */
*cost += 1 + rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
return true;
}
/* Slightly penalize a narrow operation as the result may
need widening. */
*cost = 1 + COSTS_N_INSNS (1);
if (speed_p)
*cost += extra_cost->alu.arith;
return false;
}
if (mode == SImode)
{
rtx shift_op, shift_reg;
*cost = COSTS_N_INSNS (1);
if (TARGET_INT_SIMD
&& (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
|| GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
{
/* UXTA[BH] or SXTA[BH]. */
if (speed_p)
*cost += extra_cost->alu.extnd_arith;
*cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
speed_p)
+ rtx_cost (XEXP (x, 1), PLUS, 0, speed_p));
return true;
}
shift_reg = NULL;
shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
if (shift_op != NULL)
{
if (shift_reg)
{
if (speed_p)
*cost += extra_cost->alu.arith_shift_reg;
*cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
}
else if (speed_p)
*cost += extra_cost->alu.arith_shift;
*cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
+ rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
return true;
}
if (GET_CODE (XEXP (x, 0)) == MULT)
{
rtx mul_op = XEXP (x, 0);
*cost = COSTS_N_INSNS (1);
if (TARGET_DSP_MULTIPLY
&& ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
&& (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
|| (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
&& CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
&& INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
|| (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
&& CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
&& INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
&& (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
|| (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
&& CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
&& (INTVAL (XEXP (XEXP (mul_op, 1), 1))
== 16))))))
{
/* SMLA[BT][BT]. */
if (speed_p)
*cost += extra_cost->mult[0].extend_add;
*cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0),
SIGN_EXTEND, 0, speed_p)
+ rtx_cost (XEXP (XEXP (mul_op, 1), 0),
SIGN_EXTEND, 0, speed_p)
+ rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
return true;
}
if (speed_p)
*cost += extra_cost->mult[0].add;
*cost += (rtx_cost (XEXP (mul_op, 0), MULT, 0, speed_p)
+ rtx_cost (XEXP (mul_op, 1), MULT, 1, speed_p)
+ rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
return true;
}
if (CONST_INT_P (XEXP (x, 1)))
{
int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
INTVAL (XEXP (x, 1)), NULL_RTX,
NULL_RTX, 1, 0);
*cost = COSTS_N_INSNS (insns);
if (speed_p)
*cost += insns * extra_cost->alu.arith;
*cost += rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
return true;
}
return false;
}
if (mode == DImode)
{
if (arm_arch3m
&& GET_CODE (XEXP (x, 0)) == MULT
&& ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
&& GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
|| (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
&& GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
{
*cost = COSTS_N_INSNS (1);
if (speed_p)
*cost += extra_cost->mult[1].extend_add;
*cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
ZERO_EXTEND, 0, speed_p)
+ rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0),
ZERO_EXTEND, 0, speed_p)
+ rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
return true;
}
*cost = COSTS_N_INSNS (2);
if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
|| GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
{
if (speed_p)
*cost += (extra_cost->alu.arith
+ (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
? extra_cost->alu.arith
: extra_cost->alu.arith_shift));
*cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
speed_p)
+ rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
return true;
}
if (speed_p)
*cost += 2 * extra_cost->alu.arith;
return false;
}
/* Vector mode? */
*cost = LIBCALL_COST (2);
return false;
case AND: case XOR: case IOR:
if (mode == SImode)
{
enum rtx_code subcode = GET_CODE (XEXP (x, 0));
rtx op0 = XEXP (x, 0);
rtx shift_op, shift_reg;
*cost = COSTS_N_INSNS (1);
if (subcode == NOT
&& (code == AND
|| (code == IOR && TARGET_THUMB2)))
op0 = XEXP (op0, 0);
shift_reg = NULL;
shift_op = shifter_op_p (op0, &shift_reg);
if (shift_op != NULL)
{
if (shift_reg)
{
if (speed_p)
*cost += extra_cost->alu.log_shift_reg;
*cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
}
else if (speed_p)
*cost += extra_cost->alu.log_shift;
*cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
+ rtx_cost (XEXP (x, 1), code, 1, speed_p));
return true;
}
if (CONST_INT_P (XEXP (x, 1)))
{
int insns = arm_gen_constant (code, SImode, NULL_RTX,
INTVAL (XEXP (x, 1)), NULL_RTX,
NULL_RTX, 1, 0);
*cost = COSTS_N_INSNS (insns);
if (speed_p)
*cost += insns * extra_cost->alu.logical;
*cost += rtx_cost (op0, code, 0, speed_p);
return true;
}
if (speed_p)
*cost += extra_cost->alu.logical;
*cost += (rtx_cost (op0, code, 0, speed_p)
+ rtx_cost (XEXP (x, 1), code, 1, speed_p));
return true;
}
if (mode == DImode)
{
rtx op0 = XEXP (x, 0);
enum rtx_code subcode = GET_CODE (op0);
*cost = COSTS_N_INSNS (2);
if (subcode == NOT
&& (code == AND
|| (code == IOR && TARGET_THUMB2)))
op0 = XEXP (op0, 0);
if (GET_CODE (op0) == ZERO_EXTEND)
{
if (speed_p)
*cost += 2 * extra_cost->alu.logical;
*cost += (rtx_cost (XEXP (op0, 0), ZERO_EXTEND, 0, speed_p)
+ rtx_cost (XEXP (x, 1), code, 0, speed_p));
return true;
}
else if (GET_CODE (op0) == SIGN_EXTEND)
{
if (speed_p)
*cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
*cost += (rtx_cost (XEXP (op0, 0), SIGN_EXTEND, 0, speed_p)
+ rtx_cost (XEXP (x, 1), code, 0, speed_p));
return true;
}
if (speed_p)
*cost += 2 * extra_cost->alu.logical;
return true;
}
/* Vector mode? */
*cost = LIBCALL_COST (2);
return false;
case MULT:
if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
&& (mode == SFmode || !TARGET_VFP_SINGLE))
{
rtx op0 = XEXP (x, 0);
*cost = COSTS_N_INSNS (1);
if (GET_CODE (op0) == NEG)
op0 = XEXP (op0, 0);
if (speed_p)
*cost += extra_cost->fp[mode != SFmode].mult;
*cost += (rtx_cost (op0, MULT, 0, speed_p)
+ rtx_cost (XEXP (x, 1), MULT, 1, speed_p));
return true;
}
else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
{
*cost = LIBCALL_COST (2);
return false;
}
if (mode == SImode)
{
*cost = COSTS_N_INSNS (1);
if (TARGET_DSP_MULTIPLY
&& ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
&& (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
|| (GET_CODE (XEXP (x, 1)) == ASHIFTRT
&& CONST_INT_P (XEXP (XEXP (x, 1), 1))
&& INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
|| (GET_CODE (XEXP (x, 0)) == ASHIFTRT
&& CONST_INT_P (XEXP (XEXP (x, 0), 1))
&& INTVAL (XEXP (XEXP (x, 0), 1)) == 16
&& (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
|| (GET_CODE (XEXP (x, 1)) == ASHIFTRT
&& CONST_INT_P (XEXP (XEXP (x, 1), 1))
&& (INTVAL (XEXP (XEXP (x, 1), 1))
== 16))))))
{
/* SMUL[TB][TB]. */
if (speed_p)
*cost += extra_cost->mult[0].extend;
*cost += (rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed_p)
+ rtx_cost (XEXP (x, 1), SIGN_EXTEND, 0, speed_p));
return true;
}
if (speed_p)
*cost += extra_cost->mult[0].simple;
return false;
}
if (mode == DImode)
{
if (arm_arch3m
&& ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
&& GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
|| (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
&& GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
{
*cost = COSTS_N_INSNS (1);
if (speed_p)
*cost += extra_cost->mult[1].extend;
*cost += (rtx_cost (XEXP (XEXP (x, 0), 0),
ZERO_EXTEND, 0, speed_p)
+ rtx_cost (XEXP (XEXP (x, 1), 0),
ZERO_EXTEND, 0, speed_p));
return true;
}
*cost = LIBCALL_COST (2);
return false;
}
/* Vector mode? */
*cost = LIBCALL_COST (2);
return false;
case NEG:
if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
&& (mode == SFmode || !TARGET_VFP_SINGLE))
{
*cost = COSTS_N_INSNS (1);
if (speed_p)
*cost += extra_cost->fp[mode != SFmode].neg;
return false;
}
else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
{
*cost = LIBCALL_COST (1);
return false;
}
if (mode == SImode)
{
if (GET_CODE (XEXP (x, 0)) == ABS)
{
*cost = COSTS_N_INSNS (2);
/* Assume the non-flag-changing variant. */
if (speed_p)
*cost += (extra_cost->alu.log_shift
+ extra_cost->alu.arith_shift);
*cost += rtx_cost (XEXP (XEXP (x, 0), 0), ABS, 0, speed_p);
return true;
}
if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
|| GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
{
*cost = COSTS_N_INSNS (2);
/* No extra cost for MOV imm and MVN imm. */
/* If the comparison op is using the flags, there's no further
cost, otherwise we need to add the cost of the comparison. */
if (!(REG_P (XEXP (XEXP (x, 0), 0))
&& REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
&& XEXP (XEXP (x, 0), 1) == const0_rtx))
{
*cost += (COSTS_N_INSNS (1)
+ rtx_cost (XEXP (XEXP (x, 0), 0), COMPARE, 0,
speed_p)
+ rtx_cost (XEXP (XEXP (x, 0), 1), COMPARE, 1,
speed_p));
if (speed_p)
*cost += extra_cost->alu.arith;
}
return true;
}
*cost = COSTS_N_INSNS (1);
if (speed_p)
*cost += extra_cost->alu.arith;
return false;
}
if (GET_MODE_CLASS (mode) == MODE_INT
&& GET_MODE_SIZE (mode) < 4)
{
/* Slightly disparage, as we might need an extend operation. */
*cost = 1 + COSTS_N_INSNS (1);
if (speed_p)
*cost += extra_cost->alu.arith;
return false;
}
if (mode == DImode)
{
*cost = COSTS_N_INSNS (2);
if (speed_p)
*cost += 2 * extra_cost->alu.arith;
return false;
}
/* Vector mode? */
*cost = LIBCALL_COST (1);
return false;
case NOT:
if (mode == SImode)
{
rtx shift_op;
rtx shift_reg = NULL;
*cost = COSTS_N_INSNS (1);
shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
if (shift_op)
{
if (shift_reg != NULL)
{
if (speed_p)
*cost += extra_cost->alu.log_shift_reg;
*cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
}
else if (speed_p)
*cost += extra_cost->alu.log_shift;
*cost += rtx_cost (shift_op, ASHIFT, 0, speed_p);
return true;
}
if (speed_p)
*cost += extra_cost->alu.logical;
return false;
}
if (mode == DImode)
{
*cost = COSTS_N_INSNS (2);
return false;
}
/* Vector mode? */
*cost += LIBCALL_COST (1);
return false;
case IF_THEN_ELSE:
{
if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
{
*cost = COSTS_N_INSNS (4);
return true;
}
int op1cost = rtx_cost (XEXP (x, 1), SET, 1, speed_p);
int op2cost = rtx_cost (XEXP (x, 2), SET, 1, speed_p);
*cost = rtx_cost (XEXP (x, 0), IF_THEN_ELSE, 0, speed_p);
/* Assume that if one arm of the if_then_else is a register,
that it will be tied with the result and eliminate the
conditional insn. */
if (REG_P (XEXP (x, 1)))
*cost += op2cost;
else if (REG_P (XEXP (x, 2)))
*cost += op1cost;
else
{
if (speed_p)
{
if (extra_cost->alu.non_exec_costs_exec)
*cost += op1cost + op2cost + extra_cost->alu.non_exec;
else
*cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
}
else
*cost += op1cost + op2cost;
}
}
return true;
case COMPARE:
if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
*cost = 0;
else
{
enum machine_mode op0mode;
/* We'll mostly assume that the cost of a compare is the cost of the
LHS. However, there are some notable exceptions. */
/* Floating point compares are never done as side-effects. */
op0mode = GET_MODE (XEXP (x, 0));
if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
&& (op0mode == SFmode || !TARGET_VFP_SINGLE))
{
*cost = COSTS_N_INSNS (1);
if (speed_p)
*cost += extra_cost->fp[op0mode != SFmode].compare;
if (XEXP (x, 1) == CONST0_RTX (op0mode))
{
*cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
return true;
}
return false;
}
else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
{
*cost = LIBCALL_COST (2);
return false;
}
/* DImode compares normally take two insns. */
if (op0mode == DImode)
{
*cost = COSTS_N_INSNS (2);
if (speed_p)
*cost += 2 * extra_cost->alu.arith;
return false;
}
if (op0mode == SImode)
{
rtx shift_op;
rtx shift_reg;
if (XEXP (x, 1) == const0_rtx
&& !(REG_P (XEXP (x, 0))
|| (GET_CODE (XEXP (x, 0)) == SUBREG
&& REG_P (SUBREG_REG (XEXP (x, 0))))))
{
*cost = rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
/* Multiply operations that set the flags are often
significantly more expensive. */
if (speed_p
&& GET_CODE (XEXP (x, 0)) == MULT
&& !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
*cost += extra_cost->mult[0].flag_setting;
if (speed_p
&& GET_CODE (XEXP (x, 0)) == PLUS
&& GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
&& !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
0), 1), mode))
*cost += extra_cost->mult[0].flag_setting;
return true;
}
shift_reg = NULL;
shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
if (shift_op != NULL)
{
*cost = COSTS_N_INSNS (1);
if (shift_reg != NULL)
{
*cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
if (speed_p)
*cost += extra_cost->alu.arith_shift_reg;
}
else if (speed_p)
*cost += extra_cost->alu.arith_shift;
*cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
+ rtx_cost (XEXP (x, 1), COMPARE, 1, speed_p));
return true;
}
*cost = COSTS_N_INSNS (1);
if (speed_p)
*cost += extra_cost->alu.arith;
if (CONST_INT_P (XEXP (x, 1))
&& const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
{
*cost += rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
return true;
}
return false;
}
/* Vector mode? */
*cost = LIBCALL_COST (2);
return false;
}
return true;
case EQ:
case NE:
case LT:
case LE:
case GT:
case GE:
case LTU:
case LEU:
case GEU:
case GTU:
case ORDERED:
case UNORDERED:
case UNEQ:
case UNLE:
case UNLT:
case UNGE:
case UNGT:
case LTGT:
if (outer_code == SET)
{
/* Is it a store-flag operation? */
if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
&& XEXP (x, 1) == const0_rtx)
{
/* Thumb also needs an IT insn. */
*cost = COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
return true;
}
if (XEXP (x, 1) == const0_rtx)
{
switch (code)
{
case LT:
/* LSR Rd, Rn, #31. */
*cost = COSTS_N_INSNS (1);
if (speed_p)
*cost += extra_cost->alu.shift;
break;
case EQ:
/* RSBS T1, Rn, #0
ADC Rd, Rn, T1. */
case NE:
/* SUBS T1, Rn, #1
SBC Rd, Rn, T1. */
*cost = COSTS_N_INSNS (2);
break;
case LE:
/* RSBS T1, Rn, Rn, LSR #31
ADC Rd, Rn, T1. */
*cost = COSTS_N_INSNS (2);
if (speed_p)
*cost += extra_cost->alu.arith_shift;
break;
case GT:
/* RSB Rd, Rn, Rn, ASR #1
LSR Rd, Rd, #31. */
*cost = COSTS_N_INSNS (2);
if (speed_p)
*cost += (extra_cost->alu.arith_shift
+ extra_cost->alu.shift);
break;
case GE:
/* ASR Rd, Rn, #31
ADD Rd, Rn, #1. */
*cost = COSTS_N_INSNS (2);
if (speed_p)
*cost += extra_cost->alu.shift;
break;
default:
/* Remaining cases are either meaningless or would take
three insns anyway. */
*cost = COSTS_N_INSNS (3);
break;
}
*cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
return true;
}
else
{
*cost = COSTS_N_INSNS (TARGET_THUMB ? 4 : 3);
if (CONST_INT_P (XEXP (x, 1))
&& const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
{
*cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
return true;
}
return false;
}
}
/* Not directly inside a set. If it involves the condition code
register it must be the condition for a branch, cond_exec or
I_T_E operation. Since the comparison is performed elsewhere
this is just the control part which has no additional
cost. */
else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
&& XEXP (x, 1) == const0_rtx)
{
*cost = 0;
return true;
}
case ABS:
if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
&& (mode == SFmode || !TARGET_VFP_SINGLE))
{
*cost = COSTS_N_INSNS (1);
if (speed_p)
*cost += extra_cost->fp[mode != SFmode].neg;
return false;
}
else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
{
*cost = LIBCALL_COST (1);
return false;
}
if (mode == SImode)
{
*cost = COSTS_N_INSNS (1);
if (speed_p)
*cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
return false;
}
/* Vector mode? */
*cost = LIBCALL_COST (1);
return false;
case SIGN_EXTEND:
if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
&& MEM_P (XEXP (x, 0)))
{
*cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
if (mode == DImode)
*cost += COSTS_N_INSNS (1);
if (!speed_p)
return true;
if (GET_MODE (XEXP (x, 0)) == SImode)
*cost += extra_cost->ldst.load;
else
*cost += extra_cost->ldst.load_sign_extend;
if (mode == DImode)
*cost += extra_cost->alu.shift;
return true;
}
/* Widening from less than 32-bits requires an extend operation. */
if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
{
/* We have SXTB/SXTH. */
*cost = COSTS_N_INSNS (1);
*cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
if (speed_p)
*cost += extra_cost->alu.extnd;
}
else if (GET_MODE (XEXP (x, 0)) != SImode)
{
/* Needs two shifts. */
*cost = COSTS_N_INSNS (2);
*cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
if (speed_p)
*cost += 2 * extra_cost->alu.shift;
}
/* Widening beyond 32-bits requires one more insn. */
if (mode == DImode)
{
*cost += COSTS_N_INSNS (1);
if (speed_p)
*cost += extra_cost->alu.shift;
}
return true;
case ZERO_EXTEND:
if ((arm_arch4
|| GET_MODE (XEXP (x, 0)) == SImode
|| GET_MODE (XEXP (x, 0)) == QImode)
&& MEM_P (XEXP (x, 0)))
{
*cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
if (mode == DImode)
*cost += COSTS_N_INSNS (1); /* No speed penalty. */
return true;
}
/* Widening from less than 32-bits requires an extend operation. */
if (GET_MODE (XEXP (x, 0)) == QImode)
{
/* UXTB can be a shorter instruction in Thumb2, but it might
be slower than the AND Rd, Rn, #255 alternative. When
optimizing for speed it should never be slower to use
AND, and we don't really model 16-bit vs 32-bit insns
here. */
*cost = COSTS_N_INSNS (1);
if (speed_p)
*cost += extra_cost->alu.logical;
}
else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
{
/* We have UXTB/UXTH. */
*cost = COSTS_N_INSNS (1);
*cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
if (speed_p)
*cost += extra_cost->alu.extnd;
}
else if (GET_MODE (XEXP (x, 0)) != SImode)
{
/* Needs two shifts. It's marginally preferable to use
shifts rather than two BIC instructions as the second
shift may merge with a subsequent insn as a shifter
op. */
*cost = COSTS_N_INSNS (2);
*cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
if (speed_p)
*cost += 2 * extra_cost->alu.shift;
}
/* Widening beyond 32-bits requires one more insn. */
if (mode == DImode)
{
*cost += COSTS_N_INSNS (1); /* No speed penalty. */
}
return true;
case CONST_INT:
*cost = 0;
/* CONST_INT has no mode, so we cannot tell for sure how many
insns are really going to be needed. The best we can do is
look at the value passed. If it fits in SImode, then assume
that's the mode it will be used for. Otherwise assume it
will be used in DImode. */
if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
mode = SImode;
else
mode = DImode;
/* Avoid blowing up in arm_gen_constant (). */
if (!(outer_code == PLUS
|| outer_code == AND
|| outer_code == IOR
|| outer_code == XOR
|| outer_code == MINUS))
outer_code = SET;
const_int_cost:
if (mode == SImode)
{
*cost += 0;
*cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
INTVAL (x), NULL, NULL,
0, 0));
/* Extra costs? */
}
else
{
*cost += COSTS_N_INSNS (arm_gen_constant
(outer_code, SImode, NULL,
trunc_int_for_mode (INTVAL (x), SImode),
NULL, NULL, 0, 0)
+ arm_gen_constant (outer_code, SImode, NULL,
INTVAL (x) >> 32, NULL,
NULL, 0, 0));
/* Extra costs? */
}
return true;
case CONST:
case LABEL_REF:
case SYMBOL_REF:
if (speed_p)
{
if (arm_arch_thumb2 && !flag_pic)
*cost = COSTS_N_INSNS (2);
else
*cost = COSTS_N_INSNS (1) + extra_cost->ldst.load;
}
else
*cost = COSTS_N_INSNS (2);
if (flag_pic)
{
*cost += COSTS_N_INSNS (1);
if (speed_p)
*cost += extra_cost->alu.arith;
}
return true;
case CONST_FIXED:
*cost = COSTS_N_INSNS (4);
/* Fixme. */
return true;
case CONST_DOUBLE:
if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
&& (mode == SFmode || !TARGET_VFP_SINGLE))
{
if (vfp3_const_double_rtx (x))
{
*cost = COSTS_N_INSNS (1);
if (speed_p)
*cost += extra_cost->fp[mode == DFmode].fpconst;
return true;
}
if (speed_p)
{
*cost = COSTS_N_INSNS (1);
if (mode == DFmode)
*cost += extra_cost->ldst.loadd;
else
*cost += extra_cost->ldst.loadf;
}
else
*cost = COSTS_N_INSNS (2 + (mode == DFmode));
return true;
}
*cost = COSTS_N_INSNS (4);
return true;
case CONST_VECTOR:
/* Fixme. */
if (TARGET_NEON
&& TARGET_HARD_FLOAT
&& (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
&& neon_immediate_valid_for_move (x, mode, NULL, NULL))
*cost = COSTS_N_INSNS (1);
else
*cost = COSTS_N_INSNS (4);
return true;
case HIGH:
case LO_SUM:
*cost = COSTS_N_INSNS (1);
/* When optimizing for size, we prefer constant pool entries to
MOVW/MOVT pairs, so bump the cost of these slightly. */
if (!speed_p)
*cost += 1;
return true;
case CLZ:
*cost = COSTS_N_INSNS (1);
if (speed_p)
*cost += extra_cost->alu.clz;
return false;
case SMIN:
if (XEXP (x, 1) == const0_rtx)
{
*cost = COSTS_N_INSNS (1);
if (speed_p)
*cost += extra_cost->alu.log_shift;
*cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
return true;
}
/* Fall through. */
case SMAX:
case UMIN:
case UMAX:
*cost = COSTS_N_INSNS (2);
return false;
case TRUNCATE:
if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
&& CONST_INT_P (XEXP (XEXP (x, 0), 1))
&& INTVAL (XEXP (XEXP (x, 0), 1)) == 32
&& GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
&& ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
&& GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
|| (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
&& (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
== ZERO_EXTEND))))
{
*cost = COSTS_N_INSNS (1);
if (speed_p)
*cost += extra_cost->mult[1].extend;
*cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), ZERO_EXTEND, 0,
speed_p)
+ rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), ZERO_EXTEND,
0, speed_p));
return true;
}
*cost = LIBCALL_COST (1);
return false;
case UNSPEC:
return arm_unspec_cost (x, outer_code, speed_p, cost);
case PC:
/* Reading the PC is like reading any other register. Writing it
is more expensive, but we take that into account elsewhere. */
*cost = 0;
return true;
case ZERO_EXTRACT:
/* TODO: Simple zero_extract of bottom bits using AND. */
/* Fall through. */
case SIGN_EXTRACT:
if (arm_arch6
&& mode == SImode
&& CONST_INT_P (XEXP (x, 1))
&& CONST_INT_P (XEXP (x, 2)))
{
*cost = COSTS_N_INSNS (1);
if (speed_p)
*cost += extra_cost->alu.bfx;
*cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
return true;
}
/* Without UBFX/SBFX, need to resort to shift operations. */
*cost = COSTS_N_INSNS (2);
if (speed_p)
*cost += 2 * extra_cost->alu.shift;
*cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed_p);
return true;
case FLOAT_EXTEND:
if (TARGET_HARD_FLOAT)
{
*cost = COSTS_N_INSNS (1);
if (speed_p)
*cost += extra_cost->fp[mode == DFmode].widen;
if (!TARGET_FPU_ARMV8
&& GET_MODE (XEXP (x, 0)) == HFmode)
{
/* Pre v8, widening HF->DF is a two-step process, first
widening to SFmode. */
*cost += COSTS_N_INSNS (1);
if (speed_p)
*cost += extra_cost->fp[0].widen;
}
*cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
return true;
}
*cost = LIBCALL_COST (1);
return false;
case FLOAT_TRUNCATE:
if (TARGET_HARD_FLOAT)
{
*cost = COSTS_N_INSNS (1);
if (speed_p)
*cost += extra_cost->fp[mode == DFmode].narrow;
*cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
return true;
/* Vector modes? */
}
*cost = LIBCALL_COST (1);
return false;
case FIX:
case UNSIGNED_FIX:
if (TARGET_HARD_FLOAT)
{
if (GET_MODE_CLASS (mode) == MODE_INT)
{
*cost = COSTS_N_INSNS (1);
if (speed_p)
*cost += extra_cost->fp[GET_MODE (XEXP (x, 0)) == DFmode].toint;
/* Strip of the 'cost' of rounding towards zero. */
if (GET_CODE (XEXP (x, 0)) == FIX)
*cost += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed_p);
else
*cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
/* ??? Increase the cost to deal with transferring from
FP -> CORE registers? */
return true;
}
else if (GET_MODE_CLASS (mode) == MODE_FLOAT
&& TARGET_FPU_ARMV8)
{
*cost = COSTS_N_INSNS (1);
if (speed_p)
*cost += extra_cost->fp[mode == DFmode].roundint;
return false;
}
/* Vector costs? */
}
*cost = LIBCALL_COST (1);
return false;
case FLOAT:
case UNSIGNED_FLOAT:
if (TARGET_HARD_FLOAT)
{
/* ??? Increase the cost to deal with transferring from CORE
-> FP registers? */
*cost = COSTS_N_INSNS (1);
if (speed_p)
*cost += extra_cost->fp[mode == DFmode].fromint;
return false;
}
*cost = LIBCALL_COST (1);
return false;
case CALL:
*cost = COSTS_N_INSNS (1);
return true;
case ASM_OPERANDS:
/* Just a guess. Cost one insn per input. */
*cost = COSTS_N_INSNS (ASM_OPERANDS_INPUT_LENGTH (x));
return true;
default:
if (mode != VOIDmode)
*cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
else
*cost = COSTS_N_INSNS (4); /* Who knows? */
return false;
}
}
/* RTX costs when optimizing for size. */
static bool
arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
int *total, bool speed)
{
bool result;
if (TARGET_OLD_RTX_COSTS
|| (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
{
/* Old way. (Deprecated.) */
if (!speed)
result = arm_size_rtx_costs (x, (enum rtx_code) code,
(enum rtx_code) outer_code, total);
else
result = current_tune->rtx_costs (x, (enum rtx_code) code,
(enum rtx_code) outer_code, total,
speed);
}
else
{
/* New way. */
if (current_tune->insn_extra_cost)
result = arm_new_rtx_costs (x, (enum rtx_code) code,
(enum rtx_code) outer_code,
current_tune->insn_extra_cost,
total, speed);
/* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
&& current_tune->insn_extra_cost != NULL */
else
result = arm_new_rtx_costs (x, (enum rtx_code) code,
(enum rtx_code) outer_code,
&generic_extra_costs, total, speed);
}
if (dump_file && (dump_flags & TDF_DETAILS))
{
print_rtl_single (dump_file, x);
fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
*total, result ? "final" : "partial");
}
return result;
}
/* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
......
......@@ -243,6 +243,14 @@ mrestrict-it
Target Report Var(arm_restrict_it) Init(2)
Generate IT blocks appropriate for ARMv8.
mold-rtx-costs
Target Report Mask(OLD_RTX_COSTS)
Use the old RTX costing tables (transitional).
mnew-generic-costs
Target Report Mask(NEW_GENERIC_COSTS)
Use the new generic RTX cost tables if new core-specific cost table not available (transitional).
mfix-cortex-m3-ldrd
Target Report Var(fix_cm3_ldrd) Init(2)
Avoid overlapping destination and address registers on LDRD instructions
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment