Commit 18df6de9 by David S. Miller Committed by David S. Miller

sparc.h (processor_costs): Define.

2004-07-02  David S. Miller  <davem@nuts.davemloft.net>

	* config/sparc/sparc.h (processor_costs): Define.
	(sparc_costs): Declare.
	* config/sparc/sparc.c (cypress_costs, supersparc_costs,
	hypersparc_costs, sparclet_costs, ultrasparc_costs,
	ultrasparc3_costs): New.
	(sparc_override_options): Set sparc_costs as appropriate.
	(sparc_rtx_costs): Use sparc_costs instead of messy
	conditionals.

From-SVN: r84414
parent a3e8d8b4
2004-07-09 David S. Miller <davem@nuts.davemloft.net>
* config/sparc/sparc.h (processor_costs): Define.
(sparc_costs): Declare.
* config/sparc/sparc.c (cypress_costs, supersparc_costs,
hypersparc_costs, sparclet_costs, ultrasparc_costs,
ultrasparc3_costs): New.
(sparc_override_options): Set sparc_costs as appropriate.
(sparc_rtx_costs): Use sparc_costs instead of messy
conditionals.
2004-07-09 Steven Bosscher <stevenb@suse.de> 2004-07-09 Steven Bosscher <stevenb@suse.de>
* doc/md.texi (Processor pipeline description): Mention that * doc/md.texi (Processor pipeline description): Mention that
......
...@@ -49,6 +49,153 @@ Boston, MA 02111-1307, USA. */ ...@@ -49,6 +49,153 @@ Boston, MA 02111-1307, USA. */
#include "cfglayout.h" #include "cfglayout.h"
#include "tree-gimple.h" #include "tree-gimple.h"
/* Processor costs */
static const
struct processor_costs cypress_costs = {
2, /* int load */
2, /* int signed load */
2, /* int zeroed load */
2, /* float load */
5, /* fmov, fneg, fabs */
5, /* fadd, fsub */
1, /* fcmp */
1, /* fmov, fmovr */
7, /* fmul */
37, /* fdivs */
37, /* fdivd */
63, /* fsqrts */
63, /* fsqrtd */
1, /* imul */
1, /* imulX */
0, /* imul bit factor */
1, /* idiv */
1, /* idivX */
1, /* movcc/movr */
0, /* shift penalty */
};
static const
struct processor_costs supersparc_costs = {
1, /* int load */
1, /* int signed load */
1, /* int zeroed load */
0, /* float load */
3, /* fmov, fneg, fabs */
3, /* fadd, fsub */
3, /* fcmp */
1, /* fmov, fmovr */
3, /* fmul */
6, /* fdivs */
9, /* fdivd */
12, /* fsqrts */
12, /* fsqrtd */
4, /* imul */
4, /* imulX */
0, /* imul bit factor */
4, /* idiv */
4, /* idivX */
1, /* movcc/movr */
1, /* shift penalty */
};
static const
struct processor_costs hypersparc_costs = {
1, /* int load */
1, /* int signed load */
1, /* int zeroed load */
1, /* float load */
1, /* fmov, fneg, fabs */
1, /* fadd, fsub */
1, /* fcmp */
1, /* fmov, fmovr */
1, /* fmul */
8, /* fdivs */
12, /* fdivd */
17, /* fsqrts */
17, /* fsqrtd */
17, /* imul */
17, /* imulX */
0, /* imul bit factor */
17, /* idiv */
17, /* idivX */
1, /* movcc/movr */
0, /* shift penalty */
};
static const
struct processor_costs sparclet_costs = {
3, /* int load */
3, /* int signed load */
1, /* int zeroed load */
1, /* float load */
1, /* fmov, fneg, fabs */
1, /* fadd, fsub */
1, /* fcmp */
1, /* fmov, fmovr */
1, /* fmul */
1, /* fdivs */
1, /* fdivd */
1, /* fsqrts */
1, /* fsqrtd */
5, /* imul */
5, /* imulX */
0, /* imul bit factor */
5, /* idiv */
5, /* idivX */
1, /* movcc/movr */
0, /* shift penalty */
};
static const
struct processor_costs ultrasparc_costs = {
2, /* int load */
3, /* int signed load */
2, /* int zeroed load */
2, /* float load */
1, /* fmov, fneg, fabs */
4, /* fadd, fsub */
1, /* fcmp */
2, /* fmov, fmovr */
4, /* fmul */
13, /* fdivs */
23, /* fdivd */
13, /* fsqrts */
23, /* fsqrtd */
4, /* imul */
4, /* imulX */
2, /* imul bit factor */
37, /* idiv */
68, /* idivX */
2, /* movcc/movr */
2, /* shift penalty */
};
static const
struct processor_costs ultrasparc3_costs = {
2, /* int load */
3, /* int signed load */
3, /* int zeroed load */
2, /* float load */
3, /* fmov, fneg, fabs */
4, /* fadd, fsub */
5, /* fcmp */
3, /* fmov, fmovr */
4, /* fmul */
17, /* fdivs */
20, /* fdivd */
20, /* fsqrts */
29, /* fsqrtd */
6, /* imul */
6, /* imulX */
0, /* imul bit factor */
40, /* idiv */
71, /* idivX */
2, /* movcc/movr */
0, /* shift penalty */
};
const struct processor_costs *sparc_costs = &cypress_costs;
#ifdef HAVE_AS_RELAX_OPTION #ifdef HAVE_AS_RELAX_OPTION
/* If 'as' and 'ld' are relaxing tail call insns into branch always, use /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
"or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized. "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
...@@ -503,6 +650,36 @@ sparc_override_options (void) ...@@ -503,6 +650,36 @@ sparc_override_options (void)
/* Set up function hooks. */ /* Set up function hooks. */
init_machine_status = sparc_init_machine_status; init_machine_status = sparc_init_machine_status;
switch (sparc_cpu)
{
case PROCESSOR_V7:
case PROCESSOR_CYPRESS:
sparc_costs = &cypress_costs;
break;
case PROCESSOR_V8:
case PROCESSOR_SPARCLITE:
case PROCESSOR_SUPERSPARC:
sparc_costs = &supersparc_costs;
break;
case PROCESSOR_F930:
case PROCESSOR_F934:
case PROCESSOR_HYPERSPARC:
case PROCESSOR_SPARCLITE86X:
sparc_costs = &hypersparc_costs;
break;
case PROCESSOR_SPARCLET:
case PROCESSOR_TSC701:
sparc_costs = &sparclet_costs;
break;
case PROCESSOR_V9:
case PROCESSOR_ULTRASPARC:
sparc_costs = &ultrasparc_costs;
break;
case PROCESSOR_ULTRASPARC3:
sparc_costs = &ultrasparc3_costs;
break;
};
} }
/* Miscellaneous utilities. */ /* Miscellaneous utilities. */
...@@ -8071,362 +8248,181 @@ sparc_extra_constraint_check (rtx op, int c, int strict) ...@@ -8071,362 +8248,181 @@ sparc_extra_constraint_check (rtx op, int c, int strict)
static bool static bool
sparc_rtx_costs (rtx x, int code, int outer_code, int *total) sparc_rtx_costs (rtx x, int code, int outer_code, int *total)
{ {
enum machine_mode mode = GET_MODE (x);
bool float_mode_p = FLOAT_MODE_P (mode);
switch (code) switch (code)
{ {
case PLUS: case MINUS: case ABS: case NEG: case CONST_INT:
case FLOAT: case UNSIGNED_FLOAT: if (INTVAL (x) < 0x1000 && INTVAL (x) >= -0x1000)
case FIX: case UNSIGNED_FIX:
case FLOAT_EXTEND: case FLOAT_TRUNCATE:
if (FLOAT_MODE_P (GET_MODE (x)))
{
switch (sparc_cpu)
{
case PROCESSOR_ULTRASPARC:
case PROCESSOR_ULTRASPARC3:
*total = COSTS_N_INSNS (4);
return true;
case PROCESSOR_SUPERSPARC:
*total = COSTS_N_INSNS (3);
return true;
case PROCESSOR_CYPRESS:
*total = COSTS_N_INSNS (5);
return true;
case PROCESSOR_HYPERSPARC:
case PROCESSOR_SPARCLITE86X:
default:
*total = COSTS_N_INSNS (1);
return true;
}
}
*total = COSTS_N_INSNS (1);
return true;
case SQRT:
switch (sparc_cpu)
{ {
case PROCESSOR_ULTRASPARC: *total = 0;
if (GET_MODE (x) == SFmode)
*total = COSTS_N_INSNS (13);
else
*total = COSTS_N_INSNS (23);
return true;
case PROCESSOR_ULTRASPARC3:
if (GET_MODE (x) == SFmode)
*total = COSTS_N_INSNS (20);
else
*total = COSTS_N_INSNS (29);
return true;
case PROCESSOR_SUPERSPARC:
*total = COSTS_N_INSNS (12);
return true;
case PROCESSOR_CYPRESS:
*total = COSTS_N_INSNS (63);
return true;
case PROCESSOR_HYPERSPARC:
case PROCESSOR_SPARCLITE86X:
*total = COSTS_N_INSNS (17);
return true;
default:
*total = COSTS_N_INSNS (30);
return true; return true;
} }
/* FALLTHRU */
case COMPARE: case HIGH:
if (FLOAT_MODE_P (GET_MODE (x))) *total = 2;
{ return true;
switch (sparc_cpu)
{
case PROCESSOR_ULTRASPARC:
case PROCESSOR_ULTRASPARC3:
*total = COSTS_N_INSNS (1);
return true;
case PROCESSOR_SUPERSPARC:
*total = COSTS_N_INSNS (3);
return true;
case PROCESSOR_CYPRESS:
*total = COSTS_N_INSNS (5);
return true;
case PROCESSOR_HYPERSPARC:
case PROCESSOR_SPARCLITE86X:
default:
*total = COSTS_N_INSNS (1);
return true;
}
}
/* ??? Maybe mark integer compares as zero cost on case CONST:
??? all UltraSPARC processors because the result case LABEL_REF:
??? can be bypassed to a branch in the same group. */ case SYMBOL_REF:
*total = 4;
return true;
*total = COSTS_N_INSNS (1); case CONST_DOUBLE:
if (GET_MODE (x) == DImode
&& ((XINT (x, 3) == 0
&& (unsigned HOST_WIDE_INT) XINT (x, 2) < 0x1000)
|| (XINT (x, 3) == -1
&& XINT (x, 2) < 0
&& XINT (x, 2) >= -0x1000)))
*total = 0;
else
*total = 8;
return true; return true;
case MULT: case MEM:
if (FLOAT_MODE_P (GET_MODE (x))) /* If outer-code was a sign or zero extension, a cost
of COSTS_N_INSNS (1) was already added in. This is
why we are subtracting it back out. */
if (outer_code == ZERO_EXTEND)
{ {
switch (sparc_cpu) *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
{
case PROCESSOR_ULTRASPARC:
case PROCESSOR_ULTRASPARC3:
*total = COSTS_N_INSNS (4);
return true;
case PROCESSOR_SUPERSPARC:
*total = COSTS_N_INSNS (3);
return true;
case PROCESSOR_CYPRESS:
*total = COSTS_N_INSNS (7);
return true;
case PROCESSOR_HYPERSPARC:
case PROCESSOR_SPARCLITE86X:
*total = COSTS_N_INSNS (1);
return true;
default:
*total = COSTS_N_INSNS (5);
return true;
}
} }
else if (outer_code == SIGN_EXTEND)
/* The latency is actually variable for Ultra-I/II
And if one of the inputs have a known constant
value, we could calculate this precisely.
However, for that to be useful we would need to
add some machine description changes which would
make sure small constants ended up in rs1 of the
multiply instruction. This is because the multiply
latency is determined by the number of clear (or
set if the value is negative) bits starting from
the most significant bit of the first input.
The algorithm for computing num_cycles of a multiply
on Ultra-I/II is:
if (rs1 < 0)
highest_bit = highest_clear_bit(rs1);
else
highest_bit = highest_set_bit(rs1);
if (num_bits < 3)
highest_bit = 3;
num_cycles = 4 + ((highest_bit - 3) / 2);
If we did that we would have to also consider register
allocation issues that would result from forcing such
a value into a register.
There are other similar tricks we could play if we
knew, for example, that one input was an array index.
Since we do not play any such tricks currently the
safest thing to do is report the worst case latency. */
if (sparc_cpu == PROCESSOR_ULTRASPARC)
{ {
*total = (GET_MODE (x) == DImode *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
? COSTS_N_INSNS (34) : COSTS_N_INSNS (19));
return true;
} }
else if (float_mode_p)
/* Multiply latency on Ultra-III, fortunately, is constant. */
if (sparc_cpu == PROCESSOR_ULTRASPARC3)
{ {
*total = COSTS_N_INSNS (6); *total = sparc_costs->float_load;
return true;
} }
else
if (sparc_cpu == PROCESSOR_HYPERSPARC
|| sparc_cpu == PROCESSOR_SPARCLITE86X)
{ {
*total = COSTS_N_INSNS (17); *total = sparc_costs->int_load;
return true;
} }
*total = (TARGET_HARD_MUL ? COSTS_N_INSNS (5) : COSTS_N_INSNS (25));
return true; return true;
case DIV: case PLUS:
case UDIV: case MINUS:
case MOD: if (float_mode_p)
case UMOD: *total = sparc_costs->float_plusminus;
if (FLOAT_MODE_P (GET_MODE (x))) else
{ *total = COSTS_N_INSNS (1);
switch (sparc_cpu) return false;
{
case PROCESSOR_ULTRASPARC:
if (GET_MODE (x) == SFmode)
*total = COSTS_N_INSNS (13);
else
*total = COSTS_N_INSNS (23);
return true;
case PROCESSOR_ULTRASPARC3: case MULT:
if (GET_MODE (x) == SFmode) if (float_mode_p)
*total = COSTS_N_INSNS (17); *total = sparc_costs->float_mul;
else else
*total = COSTS_N_INSNS (20); {
return true; int bit_cost;
case PROCESSOR_SUPERSPARC: bit_cost = 0;
if (GET_MODE (x) == SFmode) if (sparc_costs->int_mul_bit_factor)
*total = COSTS_N_INSNS (6); {
else int nbits;
*total = COSTS_N_INSNS (9);
return true;
case PROCESSOR_HYPERSPARC: if (GET_CODE (XEXP (x, 1)) == CONST_INT)
case PROCESSOR_SPARCLITE86X: {
if (GET_MODE (x) == SFmode) unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
*total = COSTS_N_INSNS (8); for (nbits = 0; value != 0; value &= value - 1)
nbits++;
}
else if (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
&& GET_MODE (XEXP (x, 1)) == DImode)
{
rtx x1 = XEXP (x, 1);
unsigned HOST_WIDE_INT value1 = XINT (x1, 2);
unsigned HOST_WIDE_INT value2 = XINT (x1, 3);
for (nbits = 0; value1 != 0; value1 &= value1 - 1)
nbits++;
for (; value2 != 0; value2 &= value2 - 1)
nbits++;
}
else else
*total = COSTS_N_INSNS (12); nbits = 7;
return true;
default: if (nbits < 3)
*total = COSTS_N_INSNS (7); nbits = 3;
return true; bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
} }
if (mode == DImode)
*total = COSTS_N_INSNS (sparc_costs->int_mulX) + bit_cost;
else
*total = COSTS_N_INSNS (sparc_costs->int_mul) + bit_cost;
} }
return false;
if (sparc_cpu == PROCESSOR_ULTRASPARC) case ASHIFT:
*total = (GET_MODE (x) == DImode case ASHIFTRT:
? COSTS_N_INSNS (68) : COSTS_N_INSNS (37)); case LSHIFTRT:
else if (sparc_cpu == PROCESSOR_ULTRASPARC3) *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
*total = (GET_MODE (x) == DImode return false;
? COSTS_N_INSNS (71) : COSTS_N_INSNS (40));
else
*total = COSTS_N_INSNS (25);
return true;
case IF_THEN_ELSE: case DIV:
/* Conditional moves. */ case UDIV:
switch (sparc_cpu) case MOD:
case UMOD:
if (float_mode_p)
{ {
case PROCESSOR_ULTRASPARC: if (mode == DFmode)
*total = COSTS_N_INSNS (2); *total = sparc_costs->float_div_df;
return true;
case PROCESSOR_ULTRASPARC3:
if (FLOAT_MODE_P (GET_MODE (x)))
*total = COSTS_N_INSNS (3);
else else
*total = COSTS_N_INSNS (2); *total = sparc_costs->float_div_sf;
return true;
default:
*total = COSTS_N_INSNS (1);
return true;
} }
else
case MEM:
/* If outer-code is SIGN/ZERO extension we have to subtract
out COSTS_N_INSNS (1) from whatever we return in determining
the cost. */
switch (sparc_cpu)
{ {
case PROCESSOR_ULTRASPARC: if (mode == DImode)
if (outer_code == ZERO_EXTEND) *total = sparc_costs->int_divX;
*total = COSTS_N_INSNS (1);
else
*total = COSTS_N_INSNS (2);
return true;
case PROCESSOR_ULTRASPARC3:
if (outer_code == ZERO_EXTEND)
{
if (GET_MODE (x) == QImode
|| GET_MODE (x) == HImode
|| outer_code == SIGN_EXTEND)
*total = COSTS_N_INSNS (2);
else
*total = COSTS_N_INSNS (1);
}
else
{
/* This handles sign extension (3 cycles)
and everything else (2 cycles). */
*total = COSTS_N_INSNS (2);
}
return true;
case PROCESSOR_SUPERSPARC:
if (FLOAT_MODE_P (GET_MODE (x))
|| outer_code == ZERO_EXTEND
|| outer_code == SIGN_EXTEND)
*total = COSTS_N_INSNS (0);
else
*total = COSTS_N_INSNS (1);
return true;
case PROCESSOR_TSC701:
if (outer_code == ZERO_EXTEND
|| outer_code == SIGN_EXTEND)
*total = COSTS_N_INSNS (2);
else else
*total = COSTS_N_INSNS (3); *total = sparc_costs->int_div;
return true;
case PROCESSOR_CYPRESS:
if (outer_code == ZERO_EXTEND
|| outer_code == SIGN_EXTEND)
*total = COSTS_N_INSNS (1);
else
*total = COSTS_N_INSNS (2);
return true;
case PROCESSOR_HYPERSPARC:
case PROCESSOR_SPARCLITE86X:
default:
if (outer_code == ZERO_EXTEND
|| outer_code == SIGN_EXTEND)
*total = COSTS_N_INSNS (0);
else
*total = COSTS_N_INSNS (1);
return true;
} }
return false;
case CONST_INT: case NEG:
if (INTVAL (x) < 0x1000 && INTVAL (x) >= -0x1000) if (! float_mode_p)
{ {
*total = 0; *total = COSTS_N_INSNS (1);
return true; return false;
} }
/* FALLTHRU */ /* FALLTHRU */
case HIGH: case ABS:
*total = 2; case FLOAT:
return true; case UNSIGNED_FLOAT:
case FIX:
case UNSIGNED_FIX:
case FLOAT_EXTEND:
case FLOAT_TRUNCATE:
*total = sparc_costs->float_move;
return false;
case CONST: case SQRT:
case LABEL_REF: if (mode == DFmode)
case SYMBOL_REF: *total = sparc_costs->float_sqrt_df;
*total = 4; else
return true; *total = sparc_costs->float_sqrt_sf;
return false;
case CONST_DOUBLE: case COMPARE:
if (GET_MODE (x) == DImode if (float_mode_p)
&& ((XINT (x, 3) == 0 *total = sparc_costs->float_cmp;
&& (unsigned HOST_WIDE_INT) XINT (x, 2) < 0x1000)
|| (XINT (x, 3) == -1
&& XINT (x, 2) < 0
&& XINT (x, 2) >= -0x1000)))
*total = 0;
else else
*total = 8; *total = COSTS_N_INSNS (1);
return true; return false;
case IF_THEN_ELSE:
if (float_mode_p)
*total = sparc_costs->float_cmove;
else
*total = sparc_costs->int_cmove;
return false;
default: default:
return false; return false;
......
...@@ -25,6 +25,84 @@ Boston, MA 02111-1307, USA. */ ...@@ -25,6 +25,84 @@ Boston, MA 02111-1307, USA. */
/* Note that some other tm.h files include this one and then override /* Note that some other tm.h files include this one and then override
whatever definitions are necessary. */ whatever definitions are necessary. */
/* Define the specific costs for a given cpu */
struct processor_costs {
/* Integer load */
const int int_load;
/* Integer signed load */
const int int_sload;
/* Integer zeroed load */
const int int_zload;
/* Float load */
const int float_load;
/* fmov, fneg, fabs */
const int float_move;
/* fadd, fsub */
const int float_plusminus;
/* fcmp */
const int float_cmp;
/* fmov, fmovr */
const int float_cmove;
/* fmul */
const int float_mul;
/* fdivs */
const int float_div_sf;
/* fdivd */
const int float_div_df;
/* fsqrts */
const int float_sqrt_sf;
/* fsqrtd */
const int float_sqrt_df;
/* umul/smul */
const int int_mul;
/* mulX */
const int int_mulX;
/* integer multiply cost for each bit set past the most
significant 3, so the formula for multiply cost becomes:
if (rs1 < 0)
highest_bit = highest_clear_bit(rs1);
else
highest_bit = highest_set_bit(rs1);
if (highest_bit < 3)
highest_bit = 3;
cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
A value of zero indicates that the multiply costs is fixed,
and not variable. */
const int int_mul_bit_factor;
/* udiv/sdiv */
const int int_div;
/* divX */
const int int_divX;
/* movcc, movr */
const int int_cmove;
/* penalty for shifts, due to scheduling rules etc. */
const int shift_penalty;
};
extern const struct processor_costs *sparc_costs;
/* Target CPU builtins. FIXME: Defining sparc is for the benefit of /* Target CPU builtins. FIXME: Defining sparc is for the benefit of
Solaris only; otherwise just define __sparc__. Sadly the headers Solaris only; otherwise just define __sparc__. Sadly the headers
are such a mess there is no Solaris-specific header. */ are such a mess there is no Solaris-specific header. */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment