Commit b4e89e2d by Jan Hubicka Committed by Jan Hubicka

i386.c (pentium4_cost): New.

	* i386.c (pentium4_cost): New.
	(m_PENT4): New macro.
	(x86_push_memory, x86_movx,x86_cmove, x86_deep_branch, x86_use_sahf
	x86_sub_esp_4, x86_sub_esp_8, x86_add_esp_4, x86_add_esp_8
	x86_integer_DFmode_moves, x86_partial_reg_dependency,
	x86_memory_mismatch_stall): Add Pentium4
	(x86_use_q_reg, x86_use_any_reg): Kill.
	(override_options): Add pentium4.
	(incdec_operand): Return 0 for pentium4.
	(ix86_issue_rate): Add PROCESSOR_PENTIUM4 and PROCESSOR_ATHLON.
	* i386.h (x86_use_q_reg, x86_use_any_reg): Kill.
	(TARGET_PENTIUM4): Define.
	(enum processor_type): Add PROCESSOR_PENTIUM4.
	(CPP_CPU_DEFAULT_SPEC): Add pentium4 support.
	* i386.md (attribute "cpu"): Add pentium4.
	* invoke.texi (march): Add pentium4.

From-SVN: r40134
parent 0073023d
Wed Feb 28 19:31:42 CET 2001 Jan Hubicka <jh@suse.cz>
* i386.c (pentium4_cost): New.
(m_PENT4): New macro.
(x86_push_memory, x86_movx,x86_cmove, x86_deep_branch, x86_use_sahf
x86_sub_esp_4, x86_sub_esp_8, x86_add_esp_4, x86_add_esp_8
x86_integer_DFmode_moves, x86_partial_reg_dependency,
x86_memory_mismatch_stall): Add Pentium4
(x86_use_q_reg, x86_use_any_reg): Kill.
(override_options): Add pentium4.
(incdec_operand): Return 0 for pentium4.
(ix86_issue_rate): Add PROCESSOR_PENTIUM4 and PROCESSOR_ATHLON.
* i386.h (x86_use_q_reg, x86_use_any_reg): Kill.
(TARGET_PENTIUM4): Define.
(enum processor_type): Add PROCESSOR_PENTIUM4.
(CPP_CPU_DEFAULT_SPEC): Add pentium4 support.
* i386.md (attribute "cpu"): Add pentium4.
* invoke.texi (march): Add pentium4.
Wed Feb 28 19:28:06 CET 2001 Jan Hubicka <jh@suse.cz> Wed Feb 28 19:28:06 CET 2001 Jan Hubicka <jh@suse.cz>
* i386.md (sse_mov?fcc*): New patterns and splitters. * i386.md (sse_mov?fcc*): New patterns and splitters.
......
...@@ -239,6 +239,38 @@ struct processor_costs athlon_cost = { ...@@ -239,6 +239,38 @@ struct processor_costs athlon_cost = {
6 /* MMX or SSE register to integer */ 6 /* MMX or SSE register to integer */
}; };
struct processor_costs pentium4_cost = {
1, /* cost of an add instruction */
1, /* cost of a lea instruction */
8, /* variable shift costs */
8, /* constant shift costs */
30, /* cost of starting a multiply */
0, /* cost of multiply per each bit set */
112, /* cost of a divide/mod */
16, /* "large" insn */
6, /* MOVE_RATIO */
2, /* cost for loading QImode using movzbl */
{4, 5, 4}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{2, 3, 2}, /* cost of storing integer registers */
2, /* cost of reg,reg fld/fst */
{2, 2, 6}, /* cost of loading fp registers
in SFmode, DFmode and XFmode */
{4, 4, 6}, /* cost of loading integer registers */
2, /* cost of moving MMX register */
{2, 2}, /* cost of loading MMX registers
in SImode and DImode */
{2, 2}, /* cost of storing MMX registers
in SImode and DImode */
12, /* cost of moving SSE register */
{12, 12, 12}, /* cost of loading SSE registers
in SImode, DImode and TImode */
{2, 2, 8}, /* cost of storing SSE registers
in SImode, DImode and TImode */
10, /* MMX or SSE register to integer */
};
struct processor_costs *ix86_cost = &pentium_cost; struct processor_costs *ix86_cost = &pentium_cost;
/* Processor feature/optimization bitmasks. */ /* Processor feature/optimization bitmasks. */
...@@ -248,19 +280,18 @@ struct processor_costs *ix86_cost = &pentium_cost; ...@@ -248,19 +280,18 @@ struct processor_costs *ix86_cost = &pentium_cost;
#define m_PPRO (1<<PROCESSOR_PENTIUMPRO) #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
#define m_K6 (1<<PROCESSOR_K6) #define m_K6 (1<<PROCESSOR_K6)
#define m_ATHLON (1<<PROCESSOR_ATHLON) #define m_ATHLON (1<<PROCESSOR_ATHLON)
#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
const int x86_use_leave = m_386 | m_K6 | m_ATHLON; const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
const int x86_push_memory = m_386 | m_K6 | m_ATHLON; const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
const int x86_zero_extend_with_and = m_486 | m_PENT; const int x86_zero_extend_with_and = m_486 | m_PENT;
const int x86_movx = m_ATHLON | m_PPRO /* m_386 | m_K6 */; const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
const int x86_double_with_add = ~m_386; const int x86_double_with_add = ~m_386;
const int x86_use_bit_test = m_386; const int x86_use_bit_test = m_386;
const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6; const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
const int x86_use_q_reg = m_PENT | m_PPRO | m_K6; const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
const int x86_use_any_reg = m_486; const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
const int x86_cmove = m_PPRO | m_ATHLON; const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON;
const int x86_use_sahf = m_PPRO | m_K6;
const int x86_partial_reg_stall = m_PPRO; const int x86_partial_reg_stall = m_PPRO;
const int x86_use_loop = m_K6; const int x86_use_loop = m_K6;
const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT); const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
...@@ -270,18 +301,18 @@ const int x86_read_modify_write = ~m_PENT; ...@@ -270,18 +301,18 @@ const int x86_read_modify_write = ~m_PENT;
const int x86_read_modify = ~(m_PENT | m_PPRO); const int x86_read_modify = ~(m_PENT | m_PPRO);
const int x86_split_long_moves = m_PPRO; const int x86_split_long_moves = m_PPRO;
const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486; const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486;
const int x86_single_stringop = m_386; const int x86_single_stringop = m_386 | m_PENT4;
const int x86_qimode_math = ~(0); const int x86_qimode_math = ~(0);
const int x86_promote_qi_regs = 0; const int x86_promote_qi_regs = 0;
const int x86_himode_math = ~(m_PPRO); const int x86_himode_math = ~(m_PPRO);
const int x86_promote_hi_regs = m_PPRO; const int x86_promote_hi_regs = m_PPRO;
const int x86_sub_esp_4 = m_ATHLON | m_PPRO; const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486; const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
const int x86_add_esp_4 = m_ATHLON | m_K6; const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486; const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
const int x86_integer_DFmode_moves = ~m_ATHLON; const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4);
const int x86_partial_reg_dependency = m_ATHLON; const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
const int x86_memory_mismatch_stall = m_ATHLON; const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
#define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx)) #define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
...@@ -577,7 +608,8 @@ override_options () ...@@ -577,7 +608,8 @@ override_options ()
{&pentium_cost, 0, 0, -4, -4, -4, 1}, {&pentium_cost, 0, 0, -4, -4, -4, 1},
{&pentiumpro_cost, 0, 0, 4, -4, 4, 1}, {&pentiumpro_cost, 0, 0, 4, -4, 4, 1},
{&k6_cost, 0, 0, -5, -5, 4, 1}, {&k6_cost, 0, 0, -5, -5, 4, 1},
{&athlon_cost, 0, 0, 4, -4, 4, 1} {&athlon_cost, 0, 0, 4, -4, 4, 1},
{&pentium4_cost, 0, 0, 2, 2, 2, 1}
}; };
static struct pta static struct pta
...@@ -595,6 +627,7 @@ override_options () ...@@ -595,6 +627,7 @@ override_options ()
{"pentiumpro", PROCESSOR_PENTIUMPRO}, {"pentiumpro", PROCESSOR_PENTIUMPRO},
{"k6", PROCESSOR_K6}, {"k6", PROCESSOR_K6},
{"athlon", PROCESSOR_ATHLON}, {"athlon", PROCESSOR_ATHLON},
{"pentium4", PROCESSOR_PENTIUM4},
}; };
int const pta_size = sizeof (processor_alias_table) / sizeof (struct pta); int const pta_size = sizeof (processor_alias_table) / sizeof (struct pta);
...@@ -1202,6 +1235,10 @@ incdec_operand (op, mode) ...@@ -1202,6 +1235,10 @@ incdec_operand (op, mode)
register rtx op; register rtx op;
enum machine_mode mode; enum machine_mode mode;
{ {
/* On Pentium4, the inc and dec operations causes extra dependancy on flag
registers, since carry flag is not set. */
if (TARGET_PENTIUM4 && !optimize_size)
return 0;
if (op == const1_rtx || op == constm1_rtx) if (op == const1_rtx || op == constm1_rtx)
return 1; return 1;
if (GET_CODE (op) != CONST_INT) if (GET_CODE (op) != CONST_INT)
...@@ -6900,6 +6937,8 @@ ix86_issue_rate () ...@@ -6900,6 +6937,8 @@ ix86_issue_rate ()
return 2; return 2;
case PROCESSOR_PENTIUMPRO: case PROCESSOR_PENTIUMPRO:
case PROCESSOR_PENTIUM4:
case PROCESSOR_ATHLON:
return 3; return 3;
default: default:
......
...@@ -184,11 +184,12 @@ extern int target_flags; ...@@ -184,11 +184,12 @@ extern int target_flags;
#define TARGET_PENTIUMPRO (ix86_cpu == PROCESSOR_PENTIUMPRO) #define TARGET_PENTIUMPRO (ix86_cpu == PROCESSOR_PENTIUMPRO)
#define TARGET_K6 (ix86_cpu == PROCESSOR_K6) #define TARGET_K6 (ix86_cpu == PROCESSOR_K6)
#define TARGET_ATHLON (ix86_cpu == PROCESSOR_ATHLON) #define TARGET_ATHLON (ix86_cpu == PROCESSOR_ATHLON)
#define TARGET_PENTIUM4 (ix86_cpu == PROCESSOR_PENTIUM4)
#define CPUMASK (1 << ix86_cpu) #define CPUMASK (1 << ix86_cpu)
extern const int x86_use_leave, x86_push_memory, x86_zero_extend_with_and; extern const int x86_use_leave, x86_push_memory, x86_zero_extend_with_and;
extern const int x86_use_bit_test, x86_cmove, x86_deep_branch; extern const int x86_use_bit_test, x86_cmove, x86_deep_branch;
extern const int x86_unroll_strlen, x86_use_q_reg, x86_use_any_reg; extern const int x86_unroll_strlen;
extern const int x86_double_with_add, x86_partial_reg_stall, x86_movx; extern const int x86_double_with_add, x86_partial_reg_stall, x86_movx;
extern const int x86_use_loop, x86_use_fiop, x86_use_mov0; extern const int x86_use_loop, x86_use_fiop, x86_use_mov0;
extern const int x86_use_cltd, x86_read_modify_write; extern const int x86_use_cltd, x86_read_modify_write;
...@@ -204,8 +205,6 @@ extern const int x86_partial_reg_dependency, x86_memory_mismatch_stall; ...@@ -204,8 +205,6 @@ extern const int x86_partial_reg_dependency, x86_memory_mismatch_stall;
#define TARGET_ZERO_EXTEND_WITH_AND (x86_zero_extend_with_and & CPUMASK) #define TARGET_ZERO_EXTEND_WITH_AND (x86_zero_extend_with_and & CPUMASK)
#define TARGET_USE_BIT_TEST (x86_use_bit_test & CPUMASK) #define TARGET_USE_BIT_TEST (x86_use_bit_test & CPUMASK)
#define TARGET_UNROLL_STRLEN (x86_unroll_strlen & CPUMASK) #define TARGET_UNROLL_STRLEN (x86_unroll_strlen & CPUMASK)
#define TARGET_USE_Q_REG (x86_use_q_reg & CPUMASK)
#define TARGET_USE_ANY_REG (x86_use_any_reg & CPUMASK)
/* For sane SSE instruction set generation we need fcomi instruction. It is /* For sane SSE instruction set generation we need fcomi instruction. It is
safe to enable all CMOVE instructions. */ safe to enable all CMOVE instructions. */
#define TARGET_CMOVE ((x86_cmove & (1 << ix86_arch)) || TARGET_SSE) #define TARGET_CMOVE ((x86_cmove & (1 << ix86_arch)) || TARGET_SSE)
...@@ -345,6 +344,7 @@ enum processor_type ...@@ -345,6 +344,7 @@ enum processor_type
PROCESSOR_PENTIUMPRO, PROCESSOR_PENTIUMPRO,
PROCESSOR_K6, PROCESSOR_K6,
PROCESSOR_ATHLON, PROCESSOR_ATHLON,
PROCESSOR_PENTIUM4,
PROCESSOR_max PROCESSOR_max
}; };
...@@ -431,6 +431,9 @@ extern int ix86_arch; ...@@ -431,6 +431,9 @@ extern int ix86_arch;
#if TARGET_CPU_DEFAULT == 5 #if TARGET_CPU_DEFAULT == 5
#define CPP_CPU_DEFAULT_SPEC "-D__tune_athlon__" #define CPP_CPU_DEFAULT_SPEC "-D__tune_athlon__"
#endif #endif
#if TARGET_CPU_DEFAULT == 6
#define CPP_CPU_DEFAULT_SPEC "-D__tune_pentium4__"
#endif
#ifndef CPP_CPU_DEFAULT_SPEC #ifndef CPP_CPU_DEFAULT_SPEC
#define CPP_CPU_DEFAULT_SPEC "-D__tune_i386__" #define CPP_CPU_DEFAULT_SPEC "-D__tune_i386__"
#endif #endif
...@@ -449,12 +452,14 @@ extern int ix86_arch; ...@@ -449,12 +452,14 @@ extern int ix86_arch;
%{!mcpu*:-D__tune_i686__ -D__tune_pentiumpro__ }}\ %{!mcpu*:-D__tune_i686__ -D__tune_pentiumpro__ }}\
%{march=k6:-D__k6 -D__k6__ %{!mcpu*:-D__tune_k6__ }}\ %{march=k6:-D__k6 -D__k6__ %{!mcpu*:-D__tune_k6__ }}\
%{march=athlon:-D__athlon -D__athlon__ %{!mcpu*:-D__tune_athlon__ }}\ %{march=athlon:-D__athlon -D__athlon__ %{!mcpu*:-D__tune_athlon__ }}\
%{mpentium4=pentium4:-D__pentium4 -D__pentium4__ %{!mcpu*:-D__tune_pentium4__ }}\
%{m386|mcpu=i386:-D__tune_i386__ }\ %{m386|mcpu=i386:-D__tune_i386__ }\
%{m486|mcpu=i486:-D__tune_i486__ }\ %{m486|mcpu=i486:-D__tune_i486__ }\
%{mpentium|mcpu=pentium|mcpu=i586:-D__tune_i586__ -D__tune_pentium__ }\ %{mpentium|mcpu=pentium|mcpu=i586:-D__tune_i586__ -D__tune_pentium__ }\
%{mpentiumpro|mcpu=pentiumpro|mcpu=i686:-D__tune_i686__ -D__tune_pentiumpro__ }\ %{mpentiumpro|mcpu=pentiumpro|mcpu=i686:-D__tune_i686__ -D__tune_pentiumpro__ }\
%{mcpu=k6:-D__tune_k6__ }\ %{mcpu=k6:-D__tune_k6__ }\
%{mcpu=athlon:-D__tune_athlon__ }\ %{mcpu=athlon:-D__tune_athlon__ }\
%{mcpu=pentium4:-D__tune_pentium4__ }\
%{!march*:%{!mcpu*:%{!m386:%{!m486:%{!mpentium*:%(cpp_cpu_default)}}}}}" %{!march*:%{!mcpu*:%{!m386:%{!m486:%{!mpentium*:%(cpp_cpu_default)}}}}}"
#endif #endif
......
...@@ -97,7 +97,7 @@ ...@@ -97,7 +97,7 @@
;; Processor type. This attribute must exactly match the processor_type ;; Processor type. This attribute must exactly match the processor_type
;; enumeration in i386.h. ;; enumeration in i386.h.
(define_attr "cpu" "i386,i486,pentium,pentiumpro,k6,athlon" (define_attr "cpu" "i386,i486,pentium,pentiumpro,k6,athlon,pentium4"
(const (symbol_ref "ix86_cpu"))) (const (symbol_ref "ix86_cpu")))
;; A basic instruction type. Refinements due to arguments to be ;; A basic instruction type. Refinements due to arguments to be
......
...@@ -6534,14 +6534,14 @@ These @samp{-m} options are defined for the i386 family of computers: ...@@ -6534,14 +6534,14 @@ These @samp{-m} options are defined for the i386 family of computers:
Assume the defaults for the machine type @var{cpu type} when scheduling Assume the defaults for the machine type @var{cpu type} when scheduling
instructions. The choices for @var{cpu type} are @samp{i386}, instructions. The choices for @var{cpu type} are @samp{i386},
@samp{i486}, @samp{i586}, @samp{i686}, @samp{pentium}, @samp{i486}, @samp{i586}, @samp{i686}, @samp{pentium},
@samp{pentiumpro}, @samp{k6}, and @samp{athlon} @samp{pentiumpro}, @samp{pentium4}, @samp{k6}, and @samp{athlon}
While picking a specific @var{cpu type} will schedule things appropriately While picking a specific @var{cpu type} will schedule things appropriately
for that particular chip, the compiler will not generate any code that for that particular chip, the compiler will not generate any code that
does not run on the i386 without the @samp{-march=@var{cpu type}} option does not run on the i386 without the @samp{-march=@var{cpu type}} option
being used. @samp{i586} is equivalent to @samp{pentium} and @samp{i686} being used. @samp{i586} is equivalent to @samp{pentium} and @samp{i686}
is equivalent to @samp{pentiumpro}. @samp{k6} is the AMD chip as is equivalent to @samp{pentiumpro}. @samp{k6} and @samp{athlon} sre the
opposed to the Intel ones. AMD chips as opposed to the Intel ones.
@item -march=@var{cpu type} @item -march=@var{cpu type}
Generate instructions for the machine type @var{cpu type}. The choices Generate instructions for the machine type @var{cpu type}. The choices
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment