Commit efd5897c by Sandra Loosemore Committed by Sandra Loosemore

nios2.c (nios2_rtx_costs): Make costs better reflect reality.

2017-10-23  Sandra Loosemore  <sandra@codesourcery.com>

	gcc/
	* config/nios2/nios2.c (nios2_rtx_costs): Make costs better
	reflect reality.
	(nios2_address_cost): Define.
	(nios2_legitimize_address): Recognize (exp + constant) directly.
	(TARGET_ADDRESS_COST): Define.

From-SVN: r254034
parent 82348675
2017-10-23 Sandra Loosemore <sandra@codesourcery.com> 2017-10-23 Sandra Loosemore <sandra@codesourcery.com>
* config/nios2/nios2.c (nios2_rtx_costs): Make costs better
reflect reality.
(nios2_address_cost): Define.
(nios2_legitimize_address): Recognize (exp + constant) directly.
(TARGET_ADDRESS_COST): Define.
2017-10-23 Sandra Loosemore <sandra@codesourcery.com>
* config/nios2/nios2-protos.h (nios2_large_constant_p): Declare. * config/nios2/nios2-protos.h (nios2_large_constant_p): Declare.
(nios2_symbolic_memory_operand_p): Declare. (nios2_symbolic_memory_operand_p): Declare.
(nios2_split_large_constant): Declare. (nios2_split_large_constant): Declare.
...@@ -62,6 +62,7 @@ static const char *nios2_unspec_reloc_name (int); ...@@ -62,6 +62,7 @@ static const char *nios2_unspec_reloc_name (int);
static void nios2_register_builtin_fndecl (unsigned, tree); static void nios2_register_builtin_fndecl (unsigned, tree);
static rtx nios2_ldst_parallel (bool, bool, bool, rtx, int, static rtx nios2_ldst_parallel (bool, bool, bool, rtx, int,
unsigned HOST_WIDE_INT, bool); unsigned HOST_WIDE_INT, bool);
static int nios2_address_cost (rtx, machine_mode, addr_space_t, bool);
/* Threshold for data being put into the small data/bss area, instead /* Threshold for data being put into the small data/bss area, instead
of the normal data area (references to the small data/bss area take of the normal data area (references to the small data/bss area take
...@@ -1430,29 +1431,25 @@ nios2_simple_const_p (const_rtx cst) ...@@ -1430,29 +1431,25 @@ nios2_simple_const_p (const_rtx cst)
cost has been computed, and false if subexpressions should be cost has been computed, and false if subexpressions should be
scanned. In either case, *TOTAL contains the cost result. */ scanned. In either case, *TOTAL contains the cost result. */
static bool static bool
nios2_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, nios2_rtx_costs (rtx x, machine_mode mode,
int outer_code ATTRIBUTE_UNUSED, int outer_code,
int opno ATTRIBUTE_UNUSED, int opno,
int *total, bool speed ATTRIBUTE_UNUSED) int *total, bool speed)
{ {
int code = GET_CODE (x); int code = GET_CODE (x);
switch (code) switch (code)
{ {
case CONST_INT: case CONST_INT:
if (INTVAL (x) == 0) if (INTVAL (x) == 0 || nios2_simple_const_p (x))
{ {
*total = COSTS_N_INSNS (0); *total = COSTS_N_INSNS (0);
return true; return true;
} }
else if (nios2_simple_const_p (x))
{
*total = COSTS_N_INSNS (2);
return true;
}
else else
{ {
*total = COSTS_N_INSNS (4); /* High + lo_sum. */
*total = COSTS_N_INSNS (1);
return true; return true;
} }
...@@ -1460,8 +1457,28 @@ nios2_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, ...@@ -1460,8 +1457,28 @@ nios2_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
case SYMBOL_REF: case SYMBOL_REF:
case CONST: case CONST:
case CONST_DOUBLE: case CONST_DOUBLE:
if (gprel_constant_p (x))
{
*total = COSTS_N_INSNS (1);
return true;
}
else
{
/* High + lo_sum. */
*total = COSTS_N_INSNS (1);
return true;
}
case HIGH:
{
/* This is essentially a constant. */
*total = COSTS_N_INSNS (0);
return true;
}
case LO_SUM:
{ {
*total = COSTS_N_INSNS (4); *total = COSTS_N_INSNS (0);
return true; return true;
} }
...@@ -1477,19 +1494,40 @@ nios2_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, ...@@ -1477,19 +1494,40 @@ nios2_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
return false; return false;
} }
/* For insns that have an execution latency (3 cycles), don't
penalize by the full amount since we can often schedule
to avoid it. */
case MULT: case MULT:
{ {
if (!TARGET_HAS_MUL)
*total = COSTS_N_INSNS (5); /* Guess? */
else if (speed)
*total = COSTS_N_INSNS (2); /* Latency adjustment. */
else
*total = COSTS_N_INSNS (1); *total = COSTS_N_INSNS (1);
return false; return false;
} }
case SIGN_EXTEND:
case DIV:
{ {
*total = COSTS_N_INSNS (3); if (!TARGET_HAS_DIV)
*total = COSTS_N_INSNS (5); /* Guess? */
else if (speed)
*total = COSTS_N_INSNS (2); /* Latency adjustment. */
else
*total = COSTS_N_INSNS (1);
return false; return false;
} }
case ZERO_EXTEND:
case ASHIFT:
case ASHIFTRT:
case LSHIFTRT:
case ROTATE:
{ {
if (!speed)
*total = COSTS_N_INSNS (1); *total = COSTS_N_INSNS (1);
else
*total = COSTS_N_INSNS (2); /* Latency adjustment. */
return false; return false;
} }
...@@ -1501,6 +1539,39 @@ nios2_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, ...@@ -1501,6 +1539,39 @@ nios2_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
} }
return false; return false;
case SIGN_EXTEND:
{
if (MEM_P (XEXP (x, 0)))
*total = COSTS_N_INSNS (1);
else
*total = COSTS_N_INSNS (3);
return false;
}
case MEM:
{
rtx addr = XEXP (x, 0);
/* Account for cost of different addressing modes. */
*total = nios2_address_cost (addr, mode, ADDR_SPACE_GENERIC, speed);
if (outer_code == SET && opno == 0)
/* Stores execute in 1 cycle accounted for by
the outer SET. */
;
else if (outer_code == SET || outer_code == SIGN_EXTEND
|| outer_code == ZERO_EXTEND)
/* Latency adjustment. */
{
if (speed)
*total += COSTS_N_INSNS (1);
}
else
/* This is going to have to be split into a load. */
*total += COSTS_N_INSNS (speed ? 2 : 1);
return true;
}
default: default:
return false; return false;
} }
...@@ -2090,6 +2161,37 @@ nios2_legitimate_address_p (machine_mode mode ATTRIBUTE_UNUSED, ...@@ -2090,6 +2161,37 @@ nios2_legitimate_address_p (machine_mode mode ATTRIBUTE_UNUSED,
return false; return false;
} }
/* Implement TARGET_ADDRESS_COST.
Experimentation has shown that we get better code by penalizing the
the (plus reg symbolic_constant) and (plus reg (const ...)) forms
but giving (plus reg symbol_ref) address modes the same cost as those
that don't require splitting. Also, from a theoretical point of view:
- This is in line with the recommendation in the GCC internals
documentation to make address forms involving multiple
registers more expensive than single-register forms.
- OTOH it still encourages fwprop1 to propagate constants into
address expressions more aggressively.
- We should discourage splitting (symbol + offset) into hi/lo pairs
to allow CSE'ing the symbol when it's used with more than one offset,
but not so heavily as to avoid this addressing mode at all. */
static int
nios2_address_cost (rtx address,
machine_mode mode ATTRIBUTE_UNUSED,
addr_space_t as ATTRIBUTE_UNUSED,
bool speed ATTRIBUTE_UNUSED)
{
if (nios2_plus_symbolic_constant_p (address))
return COSTS_N_INSNS (1);
if (nios2_symbolic_constant_p (address))
{
if (GET_CODE (address) == CONST)
return COSTS_N_INSNS (1);
else
return COSTS_N_INSNS (0);
}
return COSTS_N_INSNS (0);
}
/* Return true if X is a MEM whose address expression involves a symbolic /* Return true if X is a MEM whose address expression involves a symbolic
constant. */ constant. */
bool bool
...@@ -2396,8 +2498,16 @@ nios2_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, ...@@ -2396,8 +2498,16 @@ nios2_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
op0 = XEXP (x, 0); op0 = XEXP (x, 0);
op1 = XEXP (x, 1); op1 = XEXP (x, 1);
/* Target-independent code turns (exp + constant) into plain
register indirect. Although subsequent optimization passes will
eventually sort that out, ivopts uses the unoptimized form for
computing its cost model, so we get better results by generating
the correct form from the start. */
if (nios2_valid_addr_offset_p (op1))
return gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), copy_rtx (op1));
/* We may need to split symbolic constants now. */ /* We may need to split symbolic constants now. */
if (nios2_symbolic_constant_p (op1)) else if (nios2_symbolic_constant_p (op1))
{ {
if (nios2_symbolic_constant_allowed ()) if (nios2_symbolic_constant_allowed ())
return gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), copy_rtx (op1)); return gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), copy_rtx (op1));
...@@ -5259,6 +5369,9 @@ nios2_adjust_reg_alloc_order (void) ...@@ -5259,6 +5369,9 @@ nios2_adjust_reg_alloc_order (void)
#undef TARGET_RTX_COSTS #undef TARGET_RTX_COSTS
#define TARGET_RTX_COSTS nios2_rtx_costs #define TARGET_RTX_COSTS nios2_rtx_costs
#undef TARGET_ADDRESS_COST
#define TARGET_ADDRESS_COST nios2_address_cost
#undef TARGET_HAVE_TLS #undef TARGET_HAVE_TLS
#define TARGET_HAVE_TLS TARGET_LINUX_ABI #define TARGET_HAVE_TLS TARGET_LINUX_ABI
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment