Commit 0b5107cf by Jan Hubicka Committed by Jan Hubicka

i386.c (athlon_cost): Fix lea, divide and XFmode move costs.

	* i386.c (athlon_cost): Fix lea, divide and XFmode move costs.
	(x86_integer_DFmode_moves, x86_partial_reg_dependency,
	 x86_memory_mismatch_stall): New global variables.
	(ix86_adjust_cost): Handle MEMORY_BOTH on places MEMORY_STORE was only
	alloved; fix load penalties for Athlon.
	* i386.h (x86_integer_DFmode_moves, x86_partial_reg_dependency,
	x86_memory_mismatch_stall): Declare.
	(TARGET_INTEGER_DFMODE_MOVES, TARGET_PARTIAL_REG_DEPENDENCY,
	 TARGET_MEMORY_MISMATCH_STALL): New.
	* i386.md (athlon scheduling parameters): Fix latencies according to
	Athlon Optimization Manual.
	(sahf, xchg, fldcw, leave instruction patterns): Set athlon_decode to
	vector.
	(fsqrt instruction patterns): Set athlon_decode to direct.
	(movhi_1): Promote for TARGET_PARTIAL_REG_DEPENDENCY and for
	PARTIAL_REGISTER_STALL with !TARGET_HIMODE_MATH machines.
	(movqi_1): Handle promoting correctly for TARGET_PARTIAL_REG_DEPENDENCY
	and TARGET_PARTIAL_REGISTER_STALL machines.
	(pushdf_nointeger): New pattern.
	(pushdf_integer): Rename from pushdf.
	(movdf_nointger): Enable for !TARGET_INTEGER_DFMODE_MOVES machines.
	(movdf_intger): Disable for !TARGET_INTEGER_DFMODE_MOVES machines.

From-SVN: r33215
parent 10f13594
Mon Apr 17 23:35:29 MET DST 2000 Jan Hubicka <jh@suse.cz>
* i386.c (athlon_cost): Fix lea, divide and XFmode move costs.
(x86_integer_DFmode_moves, x86_partial_reg_dependency,
x86_memory_mismatch_stall): New global variables.
(ix86_adjust_cost): Handle MEMORY_BOTH on places MEMORY_STORE was only
alloved; fix load penalties for Athlon.
* i386.h (x86_integer_DFmode_moves, x86_partial_reg_dependency,
x86_memory_mismatch_stall): Declare.
(TARGET_INTEGER_DFMODE_MOVES, TARGET_PARTIAL_REG_DEPENDENCY,
TARGET_MEMORY_MISMATCH_STALL): New.
* i386.md (athlon scheduling parameters): Fix latencies according to
Athlon Optimization Manual.
(sahf, xchg, fldcw, leave instruction patterns): Set athlon_decode to
vector.
(fsqrt instruction patterns): Set athlon_decode to direct.
(movhi_1): Promote for TARGET_PARTIAL_REG_DEPENDENCY and for
PARTIAL_REGISTER_STALL with !TARGET_HIMODE_MATH machines.
(movqi_1): Handle promoting correctly for TARGET_PARTIAL_REG_DEPENDENCY
and TARGET_PARTIAL_REGISTER_STALL machines.
(pushdf_nointeger): New pattern.
(pushdf_integer): Rename from pushdf.
(movdf_nointger): Enable for !TARGET_INTEGER_DFMODE_MOVES machines.
(movdf_intger): Disable for !TARGET_INTEGER_DFMODE_MOVES machines.
2000-04-17 Richard Henderson <rth@cygnus.com> 2000-04-17 Richard Henderson <rth@cygnus.com>
* loop.c (canonicalize_condition): Add WANT_REG argument. * loop.c (canonicalize_condition): Add WANT_REG argument.
......
...@@ -163,12 +163,12 @@ struct processor_costs k6_cost = { ...@@ -163,12 +163,12 @@ struct processor_costs k6_cost = {
struct processor_costs athlon_cost = { struct processor_costs athlon_cost = {
1, /* cost of an add instruction */ 1, /* cost of an add instruction */
1, /* cost of a lea instruction */ 2, /* cost of a lea instruction */
1, /* variable shift costs */ 1, /* variable shift costs */
1, /* constant shift costs */ 1, /* constant shift costs */
5, /* cost of starting a multiply */ 5, /* cost of starting a multiply */
0, /* cost of multiply per each bit set */ 0, /* cost of multiply per each bit set */
19, /* cost of a divide/mod */ 42, /* cost of a divide/mod */
8, /* "large" insn */ 8, /* "large" insn */
9, /* MOVE_RATIO */ 9, /* MOVE_RATIO */
4, /* cost for loading QImode using movzbl */ 4, /* cost for loading QImode using movzbl */
...@@ -177,9 +177,9 @@ struct processor_costs athlon_cost = { ...@@ -177,9 +177,9 @@ struct processor_costs athlon_cost = {
Relative to reg-reg move (2). */ Relative to reg-reg move (2). */
{2, 3, 2}, /* cost of storing integer registers */ {2, 3, 2}, /* cost of storing integer registers */
4, /* cost of reg,reg fld/fst */ 4, /* cost of reg,reg fld/fst */
{6, 6, 6}, /* cost of loading fp registers {6, 6, 20}, /* cost of loading fp registers
in SFmode, DFmode and XFmode */ in SFmode, DFmode and XFmode */
{4, 4, 4} /* cost of loading integer registers */ {4, 4, 16} /* cost of loading integer registers */
}; };
struct processor_costs *ix86_cost = &pentium_cost; struct processor_costs *ix86_cost = &pentium_cost;
...@@ -222,6 +222,9 @@ const int x86_sub_esp_4 = m_ATHLON | m_PPRO; ...@@ -222,6 +222,9 @@ const int x86_sub_esp_4 = m_ATHLON | m_PPRO;
const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486; const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486;
const int x86_add_esp_4 = m_ATHLON | m_K6; const int x86_add_esp_4 = m_ATHLON | m_K6;
const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486; const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486;
const int x86_integer_DFmode_moves = ~m_ATHLON;
const int x86_partial_reg_dependency = m_ATHLON;
const int x86_memory_mismatch_stall = m_ATHLON;
#define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx)) #define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
...@@ -6287,6 +6290,7 @@ ix86_adjust_cost (insn, link, dep_insn, cost) ...@@ -6287,6 +6290,7 @@ ix86_adjust_cost (insn, link, dep_insn, cost)
int cost; int cost;
{ {
enum attr_type insn_type, dep_insn_type; enum attr_type insn_type, dep_insn_type;
enum attr_memory memory;
rtx set, set2; rtx set, set2;
int dep_insn_code_number; int dep_insn_code_number;
...@@ -6334,7 +6338,8 @@ ix86_adjust_cost (insn, link, dep_insn, cost) ...@@ -6334,7 +6338,8 @@ ix86_adjust_cost (insn, link, dep_insn, cost)
increase the cost here for non-imov insns. */ increase the cost here for non-imov insns. */
if (dep_insn_type != TYPE_IMOV if (dep_insn_type != TYPE_IMOV
&& dep_insn_type != TYPE_FMOV && dep_insn_type != TYPE_FMOV
&& get_attr_memory (dep_insn) == MEMORY_LOAD) && ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
|| memory == MEMORY_BOTH))
cost += 1; cost += 1;
/* INT->FP conversion is expensive. */ /* INT->FP conversion is expensive. */
...@@ -6359,7 +6364,8 @@ ix86_adjust_cost (insn, link, dep_insn, cost) ...@@ -6359,7 +6364,8 @@ ix86_adjust_cost (insn, link, dep_insn, cost)
/* Since we can't represent delayed latencies of load+operation, /* Since we can't represent delayed latencies of load+operation,
increase the cost here for non-imov insns. */ increase the cost here for non-imov insns. */
if (get_attr_memory (dep_insn) == MEMORY_LOAD) if ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
|| memory == MEMORY_BOTH)
cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1; cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
/* INT->FP conversion is expensive. */ /* INT->FP conversion is expensive. */
...@@ -6368,19 +6374,15 @@ ix86_adjust_cost (insn, link, dep_insn, cost) ...@@ -6368,19 +6374,15 @@ ix86_adjust_cost (insn, link, dep_insn, cost)
break; break;
case PROCESSOR_ATHLON: case PROCESSOR_ATHLON:
/* Address Generation Interlock cause problems on the Athlon CPU because if ((memory = get_attr_memory (dep_insn)) == MEMORY_LOAD
the loads and stores are done in order so once one load or store has || memory == MEMORY_BOTH)
to wait, others must too, so penalize the AGIs slightly by one cycle. {
We might experiment with this value later. */ if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
if (ix86_agi_dependant (insn, dep_insn, insn_type)) cost += 2;
cost += 1; else
cost += 3;
}
/* Since we can't represent delayed latencies of load+operation,
increase the cost here for non-imov insns. */
if (dep_insn_type != TYPE_IMOV
&& dep_insn_type != TYPE_FMOV
&& get_attr_memory (dep_insn) == MEMORY_LOAD)
cost += 2;
default: default:
break; break;
} }
......
...@@ -173,8 +173,9 @@ extern const int x86_use_cltd, x86_read_modify_write; ...@@ -173,8 +173,9 @@ extern const int x86_use_cltd, x86_read_modify_write;
extern const int x86_read_modify, x86_split_long_moves; extern const int x86_read_modify, x86_split_long_moves;
extern const int x86_promote_QImode, x86_single_stringop; extern const int x86_promote_QImode, x86_single_stringop;
extern const int x86_himode_math, x86_qimode_math, x86_promote_qi_regs; extern const int x86_himode_math, x86_qimode_math, x86_promote_qi_regs;
extern const int x86_promote_hi_regs; extern const int x86_promote_hi_regs, x86_integer_DFmode_moves;
extern const int x86_add_esp_4, x86_add_esp_8, x86_sub_esp_4, x86_sub_esp_8; extern const int x86_add_esp_4, x86_add_esp_8, x86_sub_esp_4, x86_sub_esp_8;
extern const int x86_partial_reg_dependency, x86_memory_mismatch_stall;
#define TARGET_USE_LEAVE (x86_use_leave & CPUMASK) #define TARGET_USE_LEAVE (x86_use_leave & CPUMASK)
#define TARGET_PUSH_MEMORY (x86_push_memory & CPUMASK) #define TARGET_PUSH_MEMORY (x86_push_memory & CPUMASK)
...@@ -206,6 +207,9 @@ extern const int x86_add_esp_4, x86_add_esp_8, x86_sub_esp_4, x86_sub_esp_8; ...@@ -206,6 +207,9 @@ extern const int x86_add_esp_4, x86_add_esp_8, x86_sub_esp_4, x86_sub_esp_8;
#define TARGET_ADD_ESP_8 (x86_add_esp_8 & CPUMASK) #define TARGET_ADD_ESP_8 (x86_add_esp_8 & CPUMASK)
#define TARGET_SUB_ESP_4 (x86_sub_esp_4 & CPUMASK) #define TARGET_SUB_ESP_4 (x86_sub_esp_4 & CPUMASK)
#define TARGET_SUB_ESP_8 (x86_sub_esp_8 & CPUMASK) #define TARGET_SUB_ESP_8 (x86_sub_esp_8 & CPUMASK)
#define TARGET_INTEGER_DFMODE_MOVES (x86_integer_DFmode_moves & CPUMASK)
#define TARGET_PARTIAL_REG_DEPENDENCY (x86_partial_reg_dependency & CPUMASK)
#define TARGET_MEMORY_MISMATCH_STALL (x86_memory_mismatch_stall & CPUMASK)
#define TARGET_STACK_PROBE (target_flags & MASK_STACK_PROBE) #define TARGET_STACK_PROBE (target_flags & MASK_STACK_PROBE)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment