Commit 19c93809 by Kito Cheng Committed by Chung-Ju Wu

[NDS32] Rewrite cost model.

gcc/
	* config/nds32/nds32-cost.c (rtx_cost_model_t): New structure.
	(insn_size_16bit, insn_size_32bit): New variables for cost evaluation.
	(nds32_rtx_costs_impl): Simplify.
	(nds32_address_cost_impl): Simplify.
	(nds32_init_rtx_costs): New function.
	(nds32_rtx_costs_speed_prefer): Likewise.
	(nds32_rtx_costs_size_prefer): Likewise.
	(nds32_address_cost_speed_prefer): Likewise.
	(nds32_address_cost_speed_fwprop): Likewise.
	(nds32_address_cost_size_prefer): Likewise.
	* config/nds32/nds32-protos.h (nds32_init_rtx_costs): Declare.
	* config/nds32/nds32.c (nds32_option_override): Use
	nds32_init_rtx_costs function.

Co-Authored-By: Chung-Ju Wu <jasonwucj@gmail.com>

From-SVN: r260411
parent b5535ac2
2018-05-20 Kito Cheng <kito.cheng@gmail.com>
Chung-Ju Wu <jasonwucj@gmail.com>
* config/nds32/nds32-cost.c (rtx_cost_model_t): New structure.
(insn_size_16bit, insn_size_32bit): New variables for cost evaluation.
(nds32_rtx_costs_impl): Simplify.
(nds32_address_cost_impl): Simplify.
(nds32_init_rtx_costs): New function.
(nds32_rtx_costs_speed_prefer): Likewise.
(nds32_rtx_costs_size_prefer): Likewise.
(nds32_address_cost_speed_prefer): Likewise.
(nds32_address_cost_speed_fwprop): Likewise.
(nds32_address_cost_size_prefer): Likewise.
* config/nds32/nds32-protos.h (nds32_init_rtx_costs): Declare.
* config/nds32/nds32.c (nds32_option_override): Use
nds32_init_rtx_costs function.
2018-05-20 Chung-Ju Wu <jasonwucj@gmail.com>
* config/nds32/nds32.c (nds32_asm_file_start): Output pipeline model.
......
......@@ -34,66 +34,379 @@
#include "optabs.h" /* For GEN_FCN. */
#include "recog.h"
#include "tm-constrs.h"
#include "tree-pass.h"
/* ------------------------------------------------------------------------ */
bool
nds32_rtx_costs_impl (rtx x,
machine_mode mode ATTRIBUTE_UNUSED,
int outer_code,
int opno ATTRIBUTE_UNUSED,
int *total,
bool speed)
{
int code = GET_CODE (x);
typedef bool (*rtx_cost_func) (rtx, int, int, int, int*);
/* According to 'speed', goto suitable cost model section. */
if (speed)
goto performance_cost;
else
goto size_cost;
struct rtx_cost_model_t {
rtx_cost_func speed_prefer;
rtx_cost_func size_prefer;
};
static rtx_cost_model_t rtx_cost_model;
performance_cost:
/* This is section for performance cost model. */
static int insn_size_16bit; /* Initial at nds32_init_rtx_costs. */
static const int insn_size_32bit = 4;
static bool
nds32_rtx_costs_speed_prefer (rtx x ATTRIBUTE_UNUSED,
int code,
int outer_code ATTRIBUTE_UNUSED,
int opno ATTRIBUTE_UNUSED,
int *total)
{
rtx op0;
rtx op1;
machine_mode mode = GET_MODE (x);
/* Scale cost by mode size. */
int cost = COSTS_N_INSNS (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
/* In gcc/rtl.h, the default value of COSTS_N_INSNS(N) is N*4.
We treat it as 4-cycle cost for each instruction
under performance consideration. */
switch (code)
{
case SET:
/* For 'SET' rtx, we need to return false
so that it can recursively calculate costs. */
return false;
case USE:
/* Used in combine.c as a marker. */
*total = 0;
break;
return true;
case CONST_INT:
/* When not optimizing for size, we care more about the cost
of hot code, and hot code is often in a loop. If a constant
operand needs to be forced into a register, we will often be
able to hoist the constant load out of the loop, so the load
should not contribute to the cost. */
if (outer_code == SET || outer_code == PLUS)
*total = satisfies_constraint_Is20 (x) ? 0 : 4;
else if (outer_code == AND || outer_code == IOR || outer_code == XOR
|| outer_code == MINUS)
*total = satisfies_constraint_Iu15 (x) ? 0 : 4;
else if (outer_code == ASHIFT || outer_code == ASHIFTRT
|| outer_code == LSHIFTRT)
*total = satisfies_constraint_Iu05 (x) ? 0 : 4;
else if (GET_RTX_CLASS (outer_code) == RTX_COMPARE
|| GET_RTX_CLASS (outer_code) == RTX_COMM_COMPARE)
*total = satisfies_constraint_Is16 (x) ? 0 : 4;
else
*total = COSTS_N_INSNS (1);
return true;
case CONST:
case LO_SUM:
case HIGH:
case SYMBOL_REF:
*total = COSTS_N_INSNS (1);
return true;
case MEM:
*total = COSTS_N_INSNS (1);
return true;
case SET:
op0 = SET_DEST (x);
op1 = SET_SRC (x);
mode = GET_MODE (op0);
/* Scale cost by mode size. */
cost = COSTS_N_INSNS (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
switch (GET_CODE (op1))
{
case REG:
case SUBREG:
/* Register move and Store instructions. */
if ((REG_P (op0) || MEM_P (op0))
&& GET_MODE_SIZE (mode) <= GET_MODE_SIZE (DImode))
*total = COSTS_N_INSNS (1);
else
*total = cost;
return true;
case MEM:
/* Load instructions. */
if (REG_P (op0) && GET_MODE_SIZE (mode) <= GET_MODE_SIZE (DImode))
*total = COSTS_N_INSNS (1);
else
*total = cost;
return true;
case CONST_INT:
/* movi instruction. */
if (REG_P (op0) && GET_MODE_SIZE (mode) < GET_MODE_SIZE (DImode))
{
if (satisfies_constraint_Is20 (op1))
*total = COSTS_N_INSNS (1) - 1;
else
*total = COSTS_N_INSNS (2);
}
else
*total = cost;
return true;
case CONST:
case SYMBOL_REF:
case LABEL_REF:
/* la instruction. */
if (REG_P (op0) && GET_MODE_SIZE (mode) < GET_MODE_SIZE (DImode))
*total = COSTS_N_INSNS (1) - 1;
else
*total = cost;
return true;
case VEC_SELECT:
*total = cost;
return true;
default:
*total = cost;
return true;
}
case PLUS:
op0 = XEXP (x, 0);
op1 = XEXP (x, 1);
if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode))
*total = cost;
else if (GET_CODE (op0) == MULT || GET_CODE (op0) == LSHIFTRT
|| GET_CODE (op1) == MULT || GET_CODE (op1) == LSHIFTRT)
/* ALU_SHIFT */
*total = COSTS_N_INSNS (2);
else if ((GET_CODE (op1) == CONST_INT
&& satisfies_constraint_Is15 (op1))
|| REG_P (op1))
/* ADD instructions */
*total = COSTS_N_INSNS (1);
else
/* ADD instructions: IMM out of range. */
*total = COSTS_N_INSNS (2);
return true;
case MINUS:
op0 = XEXP (x, 0);
op1 = XEXP (x, 1);
if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode))
*total = cost;
else if (GET_CODE (op0) == MULT || GET_CODE (op0) == LSHIFTRT
|| GET_CODE (op1) == MULT || GET_CODE (op1) == LSHIFTRT)
/* ALU_SHIFT */
*total = COSTS_N_INSNS (2);
else if ((GET_CODE (op0) == CONST_INT
&& satisfies_constraint_Is15 (op0))
|| REG_P (op0))
/* SUB instructions */
*total = COSTS_N_INSNS (1);
else
/* SUB instructions: IMM out of range. */
*total = COSTS_N_INSNS (2);
return true;
case TRUNCATE:
/* TRUNCATE and AND behavior is same. */
*total = COSTS_N_INSNS (1);
return true;
case AND:
case IOR:
case XOR:
op0 = XEXP (x, 0);
op1 = XEXP (x, 1);
if (NDS32_EXT_DSP_P ())
{
/* We prefer (and (ior) (ior)) than (ior (and) (and)) for
synthetize pk** and insb instruction. */
if (code == AND && GET_CODE (op0) == IOR && GET_CODE (op1) == IOR)
return COSTS_N_INSNS (1);
if (code == IOR && GET_CODE (op0) == AND && GET_CODE (op1) == AND)
return COSTS_N_INSNS (10);
}
if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode))
*total = cost;
else if (GET_CODE (op0) == ASHIFT || GET_CODE (op0) == LSHIFTRT)
*total = COSTS_N_INSNS (2);
else if ((GET_CODE (op1) == CONST_INT
&& satisfies_constraint_Iu15 (op1))
|| REG_P (op1))
/* AND, OR, XOR instructions */
*total = COSTS_N_INSNS (1);
else if (code == AND || GET_CODE (op0) == NOT)
/* BITC instruction */
*total = COSTS_N_INSNS (1);
else
/* AND, OR, XOR instructions: IMM out of range. */
*total = COSTS_N_INSNS (2);
return true;
case MULT:
if (GET_MODE (x) == DImode
|| GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
|| GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
/* MUL instructions */
*total = COSTS_N_INSNS (1);
break;
else if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode))
*total = cost;
else if (outer_code == PLUS || outer_code == MINUS)
*total = COSTS_N_INSNS (2);
else if ((GET_CODE (XEXP (x, 1)) == CONST_INT
&& satisfies_constraint_Iu05 (XEXP (x, 1)))
|| REG_P (XEXP (x, 1)))
/* MUL instructions */
*total = COSTS_N_INSNS (1);
else
/* MUL instructions: IMM out of range. */
*total = COSTS_N_INSNS (2);
if (TARGET_MUL_SLOW)
*total += COSTS_N_INSNS (4);
return true;
case LSHIFTRT:
if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode))
*total = cost;
else if (outer_code == PLUS || outer_code == MINUS
|| outer_code == AND || outer_code == IOR
|| outer_code == XOR)
*total = COSTS_N_INSNS (2);
else if ((GET_CODE (XEXP (x, 1)) == CONST_INT
&& satisfies_constraint_Iu05 (XEXP (x, 1)))
|| REG_P (XEXP (x, 1)))
/* SRL instructions */
*total = COSTS_N_INSNS (1);
else
/* SRL instructions: IMM out of range. */
*total = COSTS_N_INSNS (2);
return true;
case ASHIFT:
if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode))
*total = cost;
else if (outer_code == AND || outer_code == IOR
|| outer_code == XOR)
*total = COSTS_N_INSNS (2);
else if ((GET_CODE (XEXP (x, 1)) == CONST_INT
&& satisfies_constraint_Iu05 (XEXP (x, 1)))
|| REG_P (XEXP (x, 1)))
/* SLL instructions */
*total = COSTS_N_INSNS (1);
else
/* SLL instructions: IMM out of range. */
*total = COSTS_N_INSNS (2);
return true;
case ASHIFTRT:
case ROTATERT:
if (GET_MODE_SIZE (mode) >= GET_MODE_SIZE (DImode))
*total = cost;
else if ((GET_CODE (XEXP (x, 1)) == CONST_INT
&& satisfies_constraint_Iu05 (XEXP (x, 1)))
|| REG_P (XEXP (x, 1)))
/* ROTR, SLL instructions */
*total = COSTS_N_INSNS (1);
else
/* ROTR, SLL instructions: IMM out of range. */
*total = COSTS_N_INSNS (2);
return true;
case LT:
case LTU:
if (outer_code == SET)
{
if ((GET_CODE (XEXP (x, 1)) == CONST_INT
&& satisfies_constraint_Iu15 (XEXP (x, 1)))
|| REG_P (XEXP (x, 1)))
/* SLT, SLTI instructions */
*total = COSTS_N_INSNS (1);
else
/* SLT, SLT instructions: IMM out of range. */
*total = COSTS_N_INSNS (2);
}
else
/* branch */
*total = COSTS_N_INSNS (2);
return true;
case EQ:
case NE:
case GE:
case LE:
case GT:
/* branch */
*total = COSTS_N_INSNS (2);
return true;
case IF_THEN_ELSE:
if (GET_CODE (XEXP (x, 1)) == LABEL_REF)
/* branch */
*total = COSTS_N_INSNS (2);
else
/* cmovz, cmovn instructions */
*total = COSTS_N_INSNS (1);
return true;
case LABEL_REF:
if (outer_code == IF_THEN_ELSE)
/* branch */
*total = COSTS_N_INSNS (2);
else
*total = COSTS_N_INSNS (1);
return true;
case ZERO_EXTEND:
case SIGN_EXTEND:
if (MEM_P (XEXP (x, 0)))
/* Using memory access. */
*total = COSTS_N_INSNS (1);
else
/* Zero extend and sign extend instructions. */
*total = COSTS_N_INSNS (1);
return true;
case NEG:
case NOT:
*total = COSTS_N_INSNS (1);
return true;
case DIV:
case UDIV:
case MOD:
case UMOD:
*total = COSTS_N_INSNS (7);
break;
default:
*total = COSTS_N_INSNS (1);
break;
}
*total = COSTS_N_INSNS (20);
return true;
case CALL:
*total = COSTS_N_INSNS (2);
return true;
case CLZ:
case SMIN:
case SMAX:
case ZERO_EXTRACT:
if (TARGET_EXT_PERF)
*total = COSTS_N_INSNS (1);
else
*total = COSTS_N_INSNS (3);
return true;
case VEC_SELECT:
*total = COSTS_N_INSNS (1);
return true;
size_cost:
/* This is section for size cost model. */
default:
*total = COSTS_N_INSNS (3);
return true;
}
}
static bool
nds32_rtx_costs_size_prefer (rtx x,
int code,
int outer_code,
int opno ATTRIBUTE_UNUSED,
int *total)
{
/* In gcc/rtl.h, the default value of COSTS_N_INSNS(N) is N*4.
We treat it as 4-byte cost for each instruction
under code size consideration. */
......@@ -118,85 +431,162 @@ size_cost:
(set X imm20s), use movi, 4-byte cost.
(set X BIG_INT), use sethi/ori, 8-byte cost. */
if (satisfies_constraint_Is05 (x))
*total = COSTS_N_INSNS (1) - 2;
*total = insn_size_16bit;
else if (satisfies_constraint_Is20 (x))
*total = COSTS_N_INSNS (1);
*total = insn_size_32bit;
else
*total = COSTS_N_INSNS (2);
*total = insn_size_32bit * 2;
}
else if (outer_code == PLUS || outer_code == MINUS)
{
/* Possible addi333/subi333 or subi45/addi45, 2-byte cost.
General case, cost 1 instruction with 4-byte. */
if (satisfies_constraint_Iu05 (x))
*total = COSTS_N_INSNS (1) - 2;
*total = insn_size_16bit;
else
*total = COSTS_N_INSNS (1);
*total = insn_size_32bit;
}
else if (outer_code == ASHIFT)
{
/* Possible slli333, 2-byte cost.
General case, cost 1 instruction with 4-byte. */
if (satisfies_constraint_Iu03 (x))
*total = COSTS_N_INSNS (1) - 2;
*total = insn_size_16bit;
else
*total = COSTS_N_INSNS (1);
*total = insn_size_32bit;
}
else if (outer_code == ASHIFTRT || outer_code == LSHIFTRT)
{
/* Possible srai45 or srli45, 2-byte cost.
General case, cost 1 instruction with 4-byte. */
if (satisfies_constraint_Iu05 (x))
*total = COSTS_N_INSNS (1) - 2;
*total = insn_size_16bit;
else
*total = COSTS_N_INSNS (1);
*total = insn_size_32bit;
}
else
{
/* For other cases, simply set it 4-byte cost. */
*total = COSTS_N_INSNS (1);
*total = insn_size_32bit;
}
break;
case CONST_DOUBLE:
/* It requires high part and low part processing, set it 8-byte cost. */
*total = COSTS_N_INSNS (2);
*total = insn_size_32bit * 2;
break;
case CONST:
case SYMBOL_REF:
*total = insn_size_32bit * 2;
break;
default:
/* For other cases, generally we set it 4-byte cost
and stop resurively traversing. */
*total = COSTS_N_INSNS (1);
*total = insn_size_32bit;
break;
}
return true;
}
int
nds32_address_cost_impl (rtx address,
void
nds32_init_rtx_costs (void)
{
rtx_cost_model.speed_prefer = nds32_rtx_costs_speed_prefer;
rtx_cost_model.size_prefer = nds32_rtx_costs_size_prefer;
if (TARGET_16_BIT)
insn_size_16bit = 2;
else
insn_size_16bit = 4;
}
/* This target hook describes the relative costs of RTL expressions.
Return 'true' when all subexpressions of x have been processed.
Return 'false' to sum the costs of sub-rtx, plus cost of this operation.
Refer to gcc/rtlanal.c for more information. */
bool
nds32_rtx_costs_impl (rtx x,
machine_mode mode ATTRIBUTE_UNUSED,
addr_space_t as ATTRIBUTE_UNUSED,
int outer_code,
int opno,
int *total,
bool speed)
{
int code = GET_CODE (x);
/* According to 'speed', use suitable cost model section. */
if (speed)
return rtx_cost_model.speed_prefer(x, code, outer_code, opno, total);
else
return rtx_cost_model.size_prefer(x, code, outer_code, opno, total);
}
int nds32_address_cost_speed_prefer (rtx address)
{
rtx plus0, plus1;
enum rtx_code code;
code = GET_CODE (address);
/* According to 'speed', goto suitable cost model section. */
if (speed)
goto performance_cost;
else
goto size_cost;
switch (code)
{
case POST_MODIFY:
case POST_INC:
case POST_DEC:
/* We encourage that rtx contains
POST_MODIFY/POST_INC/POST_DEC behavior. */
return COSTS_N_INSNS (1) - 2;
performance_cost:
/* This is section for performance cost model. */
case SYMBOL_REF:
/* We can have gp-relative load/store for symbol_ref.
Have it 4-byte cost. */
return COSTS_N_INSNS (2);
/* FALLTHRU, currently we use same cost model as size_cost. */
case CONST:
/* It is supposed to be the pattern (const (plus symbol_ref const_int)).
Have it 4-byte cost. */
return COSTS_N_INSNS (2);
size_cost:
/* This is section for size cost model. */
case REG:
/* Simply return 4-byte costs. */
return COSTS_N_INSNS (1) - 2;
case PLUS:
/* We do not need to check if the address is a legitimate address,
because this hook is never called with an invalid address.
But we better check the range of
const_int value for cost, if it exists. */
plus0 = XEXP (address, 0);
plus1 = XEXP (address, 1);
if (REG_P (plus0) && CONST_INT_P (plus1))
return COSTS_N_INSNS (1) - 2;
else if (ARITHMETIC_P (plus0) || ARITHMETIC_P (plus1))
return COSTS_N_INSNS (1) - 1;
else if (REG_P (plus0) && REG_P (plus1))
return COSTS_N_INSNS (1);
/* For other 'plus' situation, make it cost 4-byte. */
return COSTS_N_INSNS (1);
default:
break;
}
return COSTS_N_INSNS (4);
}
int nds32_address_cost_speed_fwprop (rtx address)
{
rtx plus0, plus1;
enum rtx_code code;
code = GET_CODE (address);
switch (code)
{
......@@ -210,12 +600,12 @@ size_cost:
case SYMBOL_REF:
/* We can have gp-relative load/store for symbol_ref.
Have it 4-byte cost. */
return COSTS_N_INSNS (1);
return COSTS_N_INSNS (2);
case CONST:
/* It is supposed to be the pattern (const (plus symbol_ref const_int)).
Have it 4-byte cost. */
return COSTS_N_INSNS (1);
return COSTS_N_INSNS (2);
case REG:
/* Simply return 4-byte costs. */
......@@ -233,11 +623,78 @@ size_cost:
{
/* If it is possible to be lwi333/swi333 form,
make it 2-byte cost. */
if (satisfies_constraint_Iu05 (plus1))
if (satisfies_constraint_Iu03 (plus1))
return (COSTS_N_INSNS (1) - 2);
else
return COSTS_N_INSNS (1);
}
if (ARITHMETIC_P (plus0) || ARITHMETIC_P (plus1))
return COSTS_N_INSNS (1) - 2;
else if (REG_P (plus0) && REG_P (plus1))
return COSTS_N_INSNS (1);
/* For other 'plus' situation, make it cost 4-byte. */
return COSTS_N_INSNS (1);
default:
break;
}
return COSTS_N_INSNS (4);
}
int nds32_address_cost_size_prefer (rtx address)
{
rtx plus0, plus1;
enum rtx_code code;
code = GET_CODE (address);
switch (code)
{
case POST_MODIFY:
case POST_INC:
case POST_DEC:
/* We encourage that rtx contains
POST_MODIFY/POST_INC/POST_DEC behavior. */
return 0;
case SYMBOL_REF:
/* We can have gp-relative load/store for symbol_ref.
Have it 4-byte cost. */
return COSTS_N_INSNS (2);
case CONST:
/* It is supposed to be the pattern (const (plus symbol_ref const_int)).
Have it 4-byte cost. */
return COSTS_N_INSNS (2);
case REG:
/* Simply return 4-byte costs. */
return COSTS_N_INSNS (1) - 1;
case PLUS:
/* We do not need to check if the address is a legitimate address,
because this hook is never called with an invalid address.
But we better check the range of
const_int value for cost, if it exists. */
plus0 = XEXP (address, 0);
plus1 = XEXP (address, 1);
if (REG_P (plus0) && CONST_INT_P (plus1))
{
/* If it is possible to be lwi333/swi333 form,
make it 2-byte cost. */
if (satisfies_constraint_Iu03 (plus1))
return (COSTS_N_INSNS (1) - 2);
else
return COSTS_N_INSNS (1) - 1;
}
/* (plus (reg) (mult (reg) (const))) */
if (ARITHMETIC_P (plus0) || ARITHMETIC_P (plus1))
return (COSTS_N_INSNS (1) - 1);
/* For other 'plus' situation, make it cost 4-byte. */
return COSTS_N_INSNS (1);
......@@ -247,6 +704,23 @@ size_cost:
}
return COSTS_N_INSNS (4);
}
int nds32_address_cost_impl (rtx address,
machine_mode mode ATTRIBUTE_UNUSED,
addr_space_t as ATTRIBUTE_UNUSED,
bool speed_p)
{
if (speed_p)
{
if (current_pass->tv_id == TV_FWPROP)
return nds32_address_cost_speed_fwprop (address);
else
return nds32_address_cost_speed_prefer (address);
}
else
return nds32_address_cost_size_prefer (address);
}
/* ------------------------------------------------------------------------ */
......@@ -311,6 +311,7 @@ extern bool nds32_isr_function_p (tree);
/* Auxiliary functions for cost calculation. */
extern void nds32_init_rtx_costs (void);
extern bool nds32_rtx_costs_impl (rtx, machine_mode, int, int, int *, bool);
extern int nds32_address_cost_impl (rtx, machine_mode, addr_space_t, bool);
......
......@@ -4096,6 +4096,7 @@ nds32_option_override (void)
"must be enable '-mext-fpu-sp' or '-mext-fpu-dp'");
}
nds32_init_rtx_costs ();
nds32_register_passes ();
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment