Commit bcbbac26 by Richard Henderson Committed by Richard Henderson

alpha.c (alpha_cpu_name): New variable.

* alpha.c (alpha_cpu_name): New variable.
(alpha_mlat_string): Likewise.
(alpha_memory_latency): Likewise.
(override_options): Handle -mmemory-latency.
(alpha_adjust_cost): Adjust load cost for latency.
* alpha.h (TARGET_OPTIONS): Add meory-latency.
(REGISTER_MOVE_COST): Define in terms of memory_latency.  Take
TARGET_CIX into account.
(MEMORY_MOVE_COST): Define in terms of memory_latency.
* invoke.texi (DEC Alpha Options): Document -mmemory-latency.
* alpha.h (ASM_COMMENT_START): New macro.

From-SVN: r17106
parent 1914f5da
Mon Dec 15 17:48:05 1997 Ricahrd Henderson <rth@cygnus.com> Mon Dec 15 18:31:43 1997 Richard Henderson <rth@cygnus.com>
* alpha.c (alpha_cpu_name): New variable.
(alpha_mlat_string): Likewise.
(alpha_memory_latency): Likewise.
(override_options): Handle -mmemory-latency.
(alpha_adjust_cost): Adjust load cost for latency.
* alpha.h (TARGET_OPTIONS): Add meory-latency.
(REGISTER_MOVE_COST): Define in terms of memory_latency. Take
TARGET_CIX into account.
(MEMORY_MOVE_COST): Define in terms of memory_latency.
* invoke.texi (DEC Alpha Options): Document -mmemory-latency.
* alpha.h (ASM_COMMENT_START): New macro.
Mon Dec 15 17:48:05 1997 Richard Henderson <rth@cygnus.com>
* reload.h, reload1.c (eliminate_regs), caller-save.c, dbxout.c, * reload.h, reload1.c (eliminate_regs), caller-save.c, dbxout.c,
dwarfout.c, dwarf2out.c, reload.c, sdbout.c: Revert March 15 change. dwarfout.c, dwarf2out.c, reload.c, sdbout.c: Revert March 15 change.
......
...@@ -22,6 +22,7 @@ Boston, MA 02111-1307, USA. */ ...@@ -22,6 +22,7 @@ Boston, MA 02111-1307, USA. */
#include "config.h" #include "config.h"
#include <stdio.h> #include <stdio.h>
#include <ctype.h>
#include "rtl.h" #include "rtl.h"
#include "regs.h" #include "regs.h"
#include "hard-reg-set.h" #include "hard-reg-set.h"
...@@ -47,6 +48,10 @@ extern int rtx_equal_function_value_matters; ...@@ -47,6 +48,10 @@ extern int rtx_equal_function_value_matters;
/* Specify which cpu to schedule for. */ /* Specify which cpu to schedule for. */
enum processor_type alpha_cpu; enum processor_type alpha_cpu;
static char* const alpha_cpu_name[] =
{
"ev4", "ev5", "ev6"
};
/* Specify how accurate floating-point traps need to be. */ /* Specify how accurate floating-point traps need to be. */
...@@ -62,10 +67,11 @@ enum alpha_fp_trap_mode alpha_fptm; ...@@ -62,10 +67,11 @@ enum alpha_fp_trap_mode alpha_fptm;
/* Strings decoded into the above options. */ /* Strings decoded into the above options. */
char *alpha_cpu_string; /* -mcpu=ev[4|5] */ char *alpha_cpu_string; /* -mcpu= */
char *alpha_tp_string; /* -mtrap-precision=[p|s|i] */ char *alpha_tp_string; /* -mtrap-precision=[p|s|i] */
char *alpha_fprm_string; /* -mfp-rounding-mode=[n|m|c|d] */ char *alpha_fprm_string; /* -mfp-rounding-mode=[n|m|c|d] */
char *alpha_fptm_string; /* -mfp-trap-mode=[n|u|su|sui] */ char *alpha_fptm_string; /* -mfp-trap-mode=[n|u|su|sui] */
char *alpha_mlat_string; /* -mmemory-latency= */
/* Save information from a "cmpxx" operation until the branch or scc is /* Save information from a "cmpxx" operation until the branch or scc is
emitted. */ emitted. */
...@@ -91,6 +97,10 @@ int alpha_function_needs_gp; ...@@ -91,6 +97,10 @@ int alpha_function_needs_gp;
static rtx alpha_return_addr_rtx; static rtx alpha_return_addr_rtx;
/* The number of cycles of latency we should assume on memory reads. */
int alpha_memory_latency = 3;
/* Declarations of static functions. */ /* Declarations of static functions. */
static void alpha_set_memflags_1 PROTO((rtx, int, int, int)); static void alpha_set_memflags_1 PROTO((rtx, int, int, int));
static rtx alpha_emit_set_const_1 PROTO((rtx, enum machine_mode, static rtx alpha_emit_set_const_1 PROTO((rtx, enum machine_mode,
...@@ -243,6 +253,52 @@ override_options () ...@@ -243,6 +253,52 @@ override_options ()
alpha_fptm = ALPHA_FPTM_SU; alpha_fptm = ALPHA_FPTM_SU;
} }
} }
{
char *end;
int lat;
if (!alpha_mlat_string)
alpha_mlat_string = "L1";
if (isdigit (alpha_mlat_string[0])
&& (lat = strtol (alpha_mlat_string, &end, 10), *end == '\0'))
;
else if ((alpha_mlat_string[0] == 'L' || alpha_mlat_string[0] == 'l')
&& isdigit (alpha_mlat_string[1])
&& alpha_mlat_string[2] == '\0')
{
static int const cache_latency[][4] =
{
{ 3, 30, -1 }, /* ev4 -- Bcache is a guess */
{ 2, 12, 38 }, /* ev5 -- Bcache from PC164 LMbench numbers */
{ 3, 12, -1 }, /* ev6 -- Ho hum, doesn't exist yet */
};
lat = alpha_mlat_string[1] - '0';
if (lat < 0 || lat > 3 || cache_latency[alpha_cpu][lat-1] == -1)
{
warning ("L%d cache latency unknown for %s",
lat, alpha_cpu_name[alpha_cpu]);
lat = 3;
}
else
lat = cache_latency[alpha_cpu][lat-1];
}
else if (! strcmp (alpha_mlat_string, "main"))
{
/* Most current memories have about 370ns latency. This is
a reasonable guess for a fast cpu. */
lat = 150;
}
else
{
warning ("bad value `%s' for -mmemory-latency", alpha_mlat_string);
lat = 3;
}
alpha_memory_latency = lat;
}
} }
/* Returns 1 if VALUE is a mask that contains full bytes of zero or ones. */ /* Returns 1 if VALUE is a mask that contains full bytes of zero or ones. */
...@@ -1217,6 +1273,10 @@ alpha_adjust_cost (insn, link, dep_insn, cost) ...@@ -1217,6 +1273,10 @@ alpha_adjust_cost (insn, link, dep_insn, cost)
insn_type = get_attr_type (insn); insn_type = get_attr_type (insn);
dep_insn_type = get_attr_type (dep_insn); dep_insn_type = get_attr_type (dep_insn);
/* Bring in the user-defined memory latency. */
if (dep_insn_type == TYPE_LD || dep_insn_type == TYPE_LDSYM)
cost += alpha_memory_latency-1;
if (alpha_cpu == PROCESSOR_EV5) if (alpha_cpu == PROCESSOR_EV5)
{ {
/* And the lord DEC saith: "A special bypass provides an effective /* And the lord DEC saith: "A special bypass provides an effective
......
...@@ -245,10 +245,11 @@ extern enum alpha_fp_trap_mode alpha_fptm; ...@@ -245,10 +245,11 @@ extern enum alpha_fp_trap_mode alpha_fptm;
extern char *m88k_short_data; extern char *m88k_short_data;
#define TARGET_OPTIONS { { "short-data-", &m88k_short_data } } */ #define TARGET_OPTIONS { { "short-data-", &m88k_short_data } } */
extern char *alpha_cpu_string; /* For -mcpu=ev[4|5] */ extern char *alpha_cpu_string; /* For -mcpu= */
extern char *alpha_fprm_string; /* For -mfp-rounding-mode=[n|m|c|d] */ extern char *alpha_fprm_string; /* For -mfp-rounding-mode=[n|m|c|d] */
extern char *alpha_fptm_string; /* For -mfp-trap-mode=[n|u|su|sui] */ extern char *alpha_fptm_string; /* For -mfp-trap-mode=[n|u|su|sui] */
extern char *alpha_tp_string; /* For -mtrap-precision=[p|f|i] */ extern char *alpha_tp_string; /* For -mtrap-precision=[p|f|i] */
extern char *alpha_mlat_string; /* For -mmemory-latency= */
#define TARGET_OPTIONS \ #define TARGET_OPTIONS \
{ \ { \
...@@ -256,6 +257,7 @@ extern char *alpha_tp_string; /* For -mtrap-precision=[p|f|i] */ ...@@ -256,6 +257,7 @@ extern char *alpha_tp_string; /* For -mtrap-precision=[p|f|i] */
{"fp-rounding-mode=", &alpha_fprm_string}, \ {"fp-rounding-mode=", &alpha_fprm_string}, \
{"fp-trap-mode=", &alpha_fptm_string}, \ {"fp-trap-mode=", &alpha_fptm_string}, \
{"trap-precision=", &alpha_tp_string}, \ {"trap-precision=", &alpha_tp_string}, \
{"memory-latency=", &alpha_mlat_string}, \
} }
/* Sometimes certain combinations of command options do not make sense /* Sometimes certain combinations of command options do not make sense
...@@ -792,15 +794,17 @@ enum reg_class { NO_REGS, GENERAL_REGS, FLOAT_REGS, ALL_REGS, ...@@ -792,15 +794,17 @@ enum reg_class { NO_REGS, GENERAL_REGS, FLOAT_REGS, ALL_REGS,
reduce the impact of not being able to allocate a pseudo to a reduce the impact of not being able to allocate a pseudo to a
hard register. */ hard register. */
#define REGISTER_MOVE_COST(CLASS1, CLASS2) \ #define REGISTER_MOVE_COST(CLASS1, CLASS2) \
(((CLASS1) == FLOAT_REGS) == ((CLASS2) == FLOAT_REGS) ? 2 : 20) (TARGET_CIX || ((CLASS1) == FLOAT_REGS) == ((CLASS2) == FLOAT_REGS) \
? 2 : 4+2*alpha_memory_latency)
/* A C expressions returning the cost of moving data of MODE from a register to /* A C expressions returning the cost of moving data of MODE from a register to
or from memory. or from memory.
On the Alpha, bump this up a bit. */ On the Alpha, bump this up a bit. */
#define MEMORY_MOVE_COST(MODE) 6 extern int alpha_memory_latency;
#define MEMORY_MOVE_COST(MODE) (2*alpha_memory_latency)
/* Provide the cost of a branch. Exact meaning under development. */ /* Provide the cost of a branch. Exact meaning under development. */
#define BRANCH_COST 5 #define BRANCH_COST 5
...@@ -1107,6 +1111,10 @@ extern int alpha_compare_fp_p; ...@@ -1107,6 +1111,10 @@ extern int alpha_compare_fp_p;
IS_LOCAL is 0 if name is used in call, 1 if name is used in definition. */ IS_LOCAL is 0 if name is used in call, 1 if name is used in definition. */
extern void alpha_need_linkage (); extern void alpha_need_linkage ();
/* This macro defines the start of an assembly comment. */
#define ASM_COMMENT_START " #"
/* This macro produces the initial definition of a function name. On the /* This macro produces the initial definition of a function name. On the
Alpha, we need to save the function name for the prologue and epilogue. */ Alpha, we need to save the function name for the prologue and epilogue. */
......
...@@ -45,11 +45,13 @@ ...@@ -45,11 +45,13 @@
;; the address, BBOX, used for branches, EBOX, used for integer ;; the address, BBOX, used for branches, EBOX, used for integer
;; operations, and FBOX, used for FP operations. ;; operations, and FBOX, used for FP operations.
;; Memory delivers its result in three cycles. ;; Memory delivers its result in three cycles. Actually return one and
;; take care of this in adjust_cost, since we want to handle user-defined
;; memory latencies.
(define_function_unit "ev4_abox" 1 0 (define_function_unit "ev4_abox" 1 0
(and (eq_attr "cpu" "ev4") (and (eq_attr "cpu" "ev4")
(eq_attr "type" "ld,ldsym,st")) (eq_attr "type" "ld,ldsym,st"))
3 1) 1 1)
;; Branches have no delay cost, but do tie up the unit for two cycles. ;; Branches have no delay cost, but do tie up the unit for two cycles.
(define_function_unit "ev4_bbox" 1 1 (define_function_unit "ev4_bbox" 1 1
...@@ -127,10 +129,11 @@ ...@@ -127,10 +129,11 @@
1 1) 1 1)
;; Memory takes at least 2 clocks, and load cannot dual issue with stores. ;; Memory takes at least 2 clocks, and load cannot dual issue with stores.
;; Return one from here and fix up with user-defined latencies in adjust_cost.
(define_function_unit "ev5_ebox" 2 0 (define_function_unit "ev5_ebox" 2 0
(and (eq_attr "cpu" "ev5") (and (eq_attr "cpu" "ev5")
(eq_attr "type" "ld,ldsym")) (eq_attr "type" "ld,ldsym"))
2 1) 1 1)
(define_function_unit "ev5_e0" 1 0 (define_function_unit "ev5_e0" 1 0
(and (eq_attr "cpu" "ev5") (and (eq_attr "cpu" "ev5")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment