Commit 0b5107cf by Jan Hubicka Committed by Jan Hubicka

i386.c (athlon_cost): Fix lea, divide and XFmode move costs.

	* i386.c (athlon_cost): Fix lea, divide and XFmode move costs.
	(x86_integer_DFmode_moves, x86_partial_reg_dependency,
	 x86_memory_mismatch_stall): New global variables.
	(ix86_adjust_cost): Handle MEMORY_BOTH on places MEMORY_STORE was only
	alloved; fix load penalties for Athlon.
	* i386.h (x86_integer_DFmode_moves, x86_partial_reg_dependency,
	x86_memory_mismatch_stall): Declare.
	(TARGET_INTEGER_DFMODE_MOVES, TARGET_PARTIAL_REG_DEPENDENCY,
	 TARGET_MEMORY_MISMATCH_STALL): New.
	* i386.md (athlon scheduling parameters): Fix latencies according to
	Athlon Optimization Manual.
	(sahf, xchg, fldcw, leave instruction patterns): Set athlon_decode to
	vector.
	(fsqrt instruction patterns): Set athlon_decode to direct.
	(movhi_1): Promote for TARGET_PARTIAL_REG_DEPENDENCY and for
	PARTIAL_REGISTER_STALL with !TARGET_HIMODE_MATH machines.
	(movqi_1): Handle promoting correctly for TARGET_PARTIAL_REG_DEPENDENCY
	and TARGET_PARTIAL_REGISTER_STALL machines.
	(pushdf_nointeger): New pattern.
	(pushdf_integer): Rename from pushdf.
	(movdf_nointger): Enable for !TARGET_INTEGER_DFMODE_MOVES machines.
	(movdf_intger): Disable for !TARGET_INTEGER_DFMODE_MOVES machines.

From-SVN: r33215
parent 10f13594
Mon Apr 17 23:35:29 MET DST 2000 Jan Hubicka <jh@suse.cz>
* i386.c (athlon_cost): Fix lea, divide and XFmode move costs.
(x86_integer_DFmode_moves, x86_partial_reg_dependency,
x86_memory_mismatch_stall): New global variables.
(ix86_adjust_cost): Handle MEMORY_BOTH on places MEMORY_STORE was only
alloved; fix load penalties for Athlon.
* i386.h (x86_integer_DFmode_moves, x86_partial_reg_dependency,
x86_memory_mismatch_stall): Declare.
(TARGET_INTEGER_DFMODE_MOVES, TARGET_PARTIAL_REG_DEPENDENCY,
TARGET_MEMORY_MISMATCH_STALL): New.
* i386.md (athlon scheduling parameters): Fix latencies according to
Athlon Optimization Manual.
(sahf, xchg, fldcw, leave instruction patterns): Set athlon_decode to
vector.
(fsqrt instruction patterns): Set athlon_decode to direct.
(movhi_1): Promote for TARGET_PARTIAL_REG_DEPENDENCY and for
PARTIAL_REGISTER_STALL with !TARGET_HIMODE_MATH machines.
(movqi_1): Handle promoting correctly for TARGET_PARTIAL_REG_DEPENDENCY
and TARGET_PARTIAL_REGISTER_STALL machines.
(pushdf_nointeger): New pattern.
(pushdf_integer): Rename from pushdf.
(movdf_nointger): Enable for !TARGET_INTEGER_DFMODE_MOVES machines.
(movdf_intger): Disable for !TARGET_INTEGER_DFMODE_MOVES machines.
2000-04-17 Richard Henderson <rth@cygnus.com> 2000-04-17 Richard Henderson <rth@cygnus.com>
* loop.c (canonicalize_condition): Add WANT_REG argument. * loop.c (canonicalize_condition): Add WANT_REG argument.
......
...@@ -163,12 +163,12 @@ struct processor_costs k6_cost = { ...@@ -163,12 +163,12 @@ struct processor_costs k6_cost = {
struct processor_costs athlon_cost = { struct processor_costs athlon_cost = {
1, /* cost of an add instruction */ 1, /* cost of an add instruction */
1, /* cost of a lea instruction */ 2, /* cost of a lea instruction */
1, /* variable shift costs */ 1, /* variable shift costs */
1, /* constant shift costs */ 1, /* constant shift costs */
5, /* cost of starting a multiply */ 5, /* cost of starting a multiply */
0, /* cost of multiply per each bit set */ 0, /* cost of multiply per each bit set */
19, /* cost of a divide/mod */ 42, /* cost of a divide/mod */
8, /* "large" insn */ 8, /* "large" insn */
9, /* MOVE_RATIO */ 9, /* MOVE_RATIO */
4, /* cost for loading QImode using movzbl */ 4, /* cost for loading QImode using movzbl */
...@@ -177,9 +177,9 @@ struct processor_costs athlon_cost = { ...@@ -177,9 +177,9 @@ struct processor_costs athlon_cost = {
Relative to reg-reg move (2). */ Relative to reg-reg move (2). */
{2, 3, 2}, /* cost of storing integer registers */ {2, 3, 2}, /* cost of storing integer registers */
4, /* cost of reg,reg fld/fst */ 4, /* cost of reg,reg fld/fst */
{6, 6, 6}, /* cost of loading fp registers {6, 6, 20}, /* cost of loading fp registers
in SFmode, DFmode and XFmode */ in SFmode, DFmode and XFmode */
{4, 4, 4} /* cost of loading integer registers */ {4, 4, 16} /* cost of loading integer registers */
}; };
struct processor_costs *ix86_cost = &pentium_cost; struct processor_costs *ix86_cost = &pentium_cost;
...@@ -222,6 +222,9 @@ const int x86_sub_esp_4 = m_ATHLON | m_PPRO; ...@@ -222,6 +222,9 @@ const int x86_sub_esp_4 = m_ATHLON | m_PPRO;
const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486; const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486;
const int x86_add_esp_4 = m_ATHLON | m_K6; const int x86_add_esp_4 = m_ATHLON | m_K6;
const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486; const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486;
const int x86_integer_DFmode_moves = ~m_ATHLON;
const int x86_partial_reg_dependency = m_ATHLON;
const int x86_memory_mismatch_stall = m_ATHLON;
#define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx)) #define AT_BP(mode) (gen_rtx_MEM ((mode), hard_frame_pointer_rtx))
...@@ -6287,6 +6290,7 @@ ix86_adjust_cost (insn, link, dep_insn, cost) ...@@ -6287,6 +6290,7 @@ ix86_adjust_cost (insn, link, dep_insn, cost)
int cost; int cost;
{ {
enum attr_type insn_type, dep_insn_type; enum attr_type insn_type, dep_insn_type;
enum attr_memory memory;
rtx set, set2; rtx set, set2;
int dep_insn_code_number; int dep_insn_code_number;
...@@ -6334,7 +6338,8 @@ ix86_adjust_cost (insn, link, dep_insn, cost) ...@@ -6334,7 +6338,8 @@ ix86_adjust_cost (insn, link, dep_insn, cost)
increase the cost here for non-imov insns. */ increase the cost here for non-imov insns. */
if (dep_insn_type != TYPE_IMOV if (dep_insn_type != TYPE_IMOV
&& dep_insn_type != TYPE_FMOV && dep_insn_type != TYPE_FMOV
&& get_attr_memory (dep_insn) == MEMORY_LOAD) && ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
|| memory == MEMORY_BOTH))
cost += 1; cost += 1;
/* INT->FP conversion is expensive. */ /* INT->FP conversion is expensive. */
...@@ -6359,7 +6364,8 @@ ix86_adjust_cost (insn, link, dep_insn, cost) ...@@ -6359,7 +6364,8 @@ ix86_adjust_cost (insn, link, dep_insn, cost)
/* Since we can't represent delayed latencies of load+operation, /* Since we can't represent delayed latencies of load+operation,
increase the cost here for non-imov insns. */ increase the cost here for non-imov insns. */
if (get_attr_memory (dep_insn) == MEMORY_LOAD) if ((memory = get_attr_memory (dep_insn) == MEMORY_LOAD)
|| memory == MEMORY_BOTH)
cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1; cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
/* INT->FP conversion is expensive. */ /* INT->FP conversion is expensive. */
...@@ -6368,19 +6374,15 @@ ix86_adjust_cost (insn, link, dep_insn, cost) ...@@ -6368,19 +6374,15 @@ ix86_adjust_cost (insn, link, dep_insn, cost)
break; break;
case PROCESSOR_ATHLON: case PROCESSOR_ATHLON:
/* Address Generation Interlock cause problems on the Athlon CPU because if ((memory = get_attr_memory (dep_insn)) == MEMORY_LOAD
the loads and stores are done in order so once one load or store has || memory == MEMORY_BOTH)
to wait, others must too, so penalize the AGIs slightly by one cycle. {
We might experiment with this value later. */ if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
if (ix86_agi_dependant (insn, dep_insn, insn_type)) cost += 2;
cost += 1; else
cost += 3;
}
/* Since we can't represent delayed latencies of load+operation,
increase the cost here for non-imov insns. */
if (dep_insn_type != TYPE_IMOV
&& dep_insn_type != TYPE_FMOV
&& get_attr_memory (dep_insn) == MEMORY_LOAD)
cost += 2;
default: default:
break; break;
} }
......
...@@ -173,8 +173,9 @@ extern const int x86_use_cltd, x86_read_modify_write; ...@@ -173,8 +173,9 @@ extern const int x86_use_cltd, x86_read_modify_write;
extern const int x86_read_modify, x86_split_long_moves; extern const int x86_read_modify, x86_split_long_moves;
extern const int x86_promote_QImode, x86_single_stringop; extern const int x86_promote_QImode, x86_single_stringop;
extern const int x86_himode_math, x86_qimode_math, x86_promote_qi_regs; extern const int x86_himode_math, x86_qimode_math, x86_promote_qi_regs;
extern const int x86_promote_hi_regs; extern const int x86_promote_hi_regs, x86_integer_DFmode_moves;
extern const int x86_add_esp_4, x86_add_esp_8, x86_sub_esp_4, x86_sub_esp_8; extern const int x86_add_esp_4, x86_add_esp_8, x86_sub_esp_4, x86_sub_esp_8;
extern const int x86_partial_reg_dependency, x86_memory_mismatch_stall;
#define TARGET_USE_LEAVE (x86_use_leave & CPUMASK) #define TARGET_USE_LEAVE (x86_use_leave & CPUMASK)
#define TARGET_PUSH_MEMORY (x86_push_memory & CPUMASK) #define TARGET_PUSH_MEMORY (x86_push_memory & CPUMASK)
...@@ -206,6 +207,9 @@ extern const int x86_add_esp_4, x86_add_esp_8, x86_sub_esp_4, x86_sub_esp_8; ...@@ -206,6 +207,9 @@ extern const int x86_add_esp_4, x86_add_esp_8, x86_sub_esp_4, x86_sub_esp_8;
#define TARGET_ADD_ESP_8 (x86_add_esp_8 & CPUMASK) #define TARGET_ADD_ESP_8 (x86_add_esp_8 & CPUMASK)
#define TARGET_SUB_ESP_4 (x86_sub_esp_4 & CPUMASK) #define TARGET_SUB_ESP_4 (x86_sub_esp_4 & CPUMASK)
#define TARGET_SUB_ESP_8 (x86_sub_esp_8 & CPUMASK) #define TARGET_SUB_ESP_8 (x86_sub_esp_8 & CPUMASK)
#define TARGET_INTEGER_DFMODE_MOVES (x86_integer_DFmode_moves & CPUMASK)
#define TARGET_PARTIAL_REG_DEPENDENCY (x86_partial_reg_dependency & CPUMASK)
#define TARGET_MEMORY_MISMATCH_STALL (x86_memory_mismatch_stall & CPUMASK)
#define TARGET_STACK_PROBE (target_flags & MASK_STACK_PROBE) #define TARGET_STACK_PROBE (target_flags & MASK_STACK_PROBE)
......
...@@ -738,7 +738,7 @@ ...@@ -738,7 +738,7 @@
;; communicates with all the execution units seperately instead. ;; communicates with all the execution units seperately instead.
(define_attr "athlon_decode" "direct,vector" (define_attr "athlon_decode" "direct,vector"
(cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc,str") (cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc,str,pop,cld,fcmov")
(const_string "vector") (const_string "vector")
(and (eq_attr "type" "push") (and (eq_attr "type" "push")
(match_operand 1 "memory_operand" "")) (match_operand 1 "memory_operand" ""))
...@@ -766,7 +766,7 @@ ...@@ -766,7 +766,7 @@
(define_function_unit "athlon_ieu" 3 0 (define_function_unit "athlon_ieu" 3 0
(and (eq_attr "cpu" "athlon") (and (eq_attr "cpu" "athlon")
(eq_attr "type" "alu1,negnot,alu,icmp,imov,imovx,lea,incdec,ishift,imul,idiv,ibr,setcc,push,pop,call,callv,icmov,str,cld")) (eq_attr "type" "alu1,negnot,alu,icmp,imov,imovx,lea,incdec,ishift,ibr,call,callv,icmov,cld,pop,setcc,push,pop"))
1 1) 1 1)
(define_function_unit "athlon_ieu" 3 0 (define_function_unit "athlon_ieu" 3 0
...@@ -777,12 +777,12 @@ ...@@ -777,12 +777,12 @@
(define_function_unit "athlon_ieu" 3 0 (define_function_unit "athlon_ieu" 3 0
(and (eq_attr "cpu" "athlon") (and (eq_attr "cpu" "athlon")
(eq_attr "type" "imul")) (eq_attr "type" "imul"))
4 0) 5 0)
(define_function_unit "athlon_ieu" 3 0 (define_function_unit "athlon_ieu" 3 0
(and (eq_attr "cpu" "athlon") (and (eq_attr "cpu" "athlon")
(eq_attr "type" "idiv")) (eq_attr "type" "idiv"))
27 0) 42 0)
(define_function_unit "athlon_muldiv" 1 0 (define_function_unit "athlon_muldiv" 1 0
(and (eq_attr "cpu" "athlon") (and (eq_attr "cpu" "athlon")
...@@ -792,56 +792,118 @@ ...@@ -792,56 +792,118 @@
(define_function_unit "athlon_muldiv" 1 0 (define_function_unit "athlon_muldiv" 1 0
(and (eq_attr "cpu" "athlon") (and (eq_attr "cpu" "athlon")
(eq_attr "type" "idiv")) (eq_attr "type" "idiv"))
27 27) 42 42)
(define_attr "athlon_fpunits" "none,store,mul,add,muladd,all" (define_attr "athlon_fpunits" "none,store,mul,add,muladd,any"
(cond [(eq_attr "type" "fop,fop1,fcmp") (cond [(eq_attr "type" "fop,fop1,fcmp")
(const_string "add") (const_string "add")
(eq_attr "type" "fmul,fdiv,fpspc,fsgn") (eq_attr "type" "fmul,fdiv,fpspc,fsgn,fcmov")
(const_string "mul") (const_string "mul")
(and (eq_attr "type" "fmov") (eq_attr "memory" "!none")) (and (eq_attr "type" "fmov") (eq_attr "memory" "store,both"))
(const_string "store") (const_string "store")
(and (eq_attr "type" "fmov") (eq_attr "memory" "load"))
(const_string "any")
(and (eq_attr "type" "fmov") (and (eq_attr "type" "fmov")
(ior (match_operand:SI 1 "register_operand" "") (ior (match_operand:SI 1 "register_operand" "")
(match_operand 1 "immediate_operand" ""))) (match_operand 1 "immediate_operand" "")))
(const_string "store") (const_string "store")
(eq_attr "type" "fmov") (eq_attr "type" "fmov")
(const_string "muladd") (const_string "muladd")]
(eq_attr "type" "fcmov")
(const_string "all")]
(const_string "none"))) (const_string "none")))
(define_function_unit "athlon_fp_mul" 1 0 ;; We use latencies 1 for definitions. This is OK to model colisions
;; in execution units. The real latencies are modeled in the "fp" pipeline.
;; fsin, fcos: 96-192
;; fsincos: 107-211
;; fsqrt: 19 for SFmode, 27 for DFmode, 35 for XFmode.
(define_function_unit "athlon_fp" 3 0
(and (eq_attr "cpu" "athlon") (and (eq_attr "cpu" "athlon")
(eq_attr "athlon_fpunits" "mul,all")) (eq_attr "type" "fpspc"))
4 1) 100 1)
(define_function_unit "athlon_fp_add" 1 0 ;; 16 cycles for SFmode, 20 for DFmode and 24 for XFmode.
(define_function_unit "athlon_fp" 3 0
(and (eq_attr "cpu" "athlon") (and (eq_attr "cpu" "athlon")
(eq_attr "athlon_fpunits" "add,all")) (eq_attr "type" "fdiv"))
24 1)
(define_function_unit "athlon_fp" 3 0
(and (eq_attr "cpu" "athlon")
(eq_attr "type" "fop,fop1,fmul"))
4 1) 4 1)
(define_function_unit "athlon_fp_muladd" 2 0 ;; XFmode loads are slow.
;; XFmode store is slow too (8 cycles), but we don't need to model it, because
;; there are no dependent instructions.
(define_function_unit "athlon_fp" 3 0
(and (eq_attr "cpu" "athlon") (and (eq_attr "cpu" "athlon")
(and (eq_attr "type" "fmov") (and (eq_attr "type" "fmov")
(eq_attr "athlon_fpunits" "muladd,mul,add,all"))) (match_operand:XF 1 "memory_operand" "")))
10 1)
(define_function_unit "athlon_fp" 3 0
(and (eq_attr "cpu" "athlon")
(eq_attr "type" "fmov,fsgn"))
2 1) 2 1)
;; fcmp and ftst instructions
(define_function_unit "athlon_fp" 3 0
(and (eq_attr "cpu" "athlon")
(and (eq_attr "type" "fcmp")
(eq_attr "athlon_decode" "direct")))
3 1)
;; fcmpi instructions.
(define_function_unit "athlon_fp" 3 0
(and (eq_attr "cpu" "athlon")
(and (eq_attr "type" "fcmp")
(eq_attr "athlon_decode" "vector")))
3 1)
(define_function_unit "athlon_fp" 3 0
(and (eq_attr "cpu" "athlon")
(eq_attr "type" "fcmov"))
7 1)
(define_function_unit "athlon_fp_mul" 1 0
(and (eq_attr "cpu" "athlon")
(eq_attr "athlon_fpunits" "mul"))
1 1)
(define_function_unit "athlon_fp_add" 1 0
(and (eq_attr "cpu" "athlon")
(eq_attr "athlon_fpunits" "add"))
1 1)
(define_function_unit "athlon_fp_muladd" 2 0 (define_function_unit "athlon_fp_muladd" 2 0
(and (eq_attr "cpu" "athlon") (and (eq_attr "cpu" "athlon")
(and (eq_attr "type" "!fmov") (eq_attr "athlon_fpunits" "muladd,mul,add"))
(eq_attr "athlon_fpunits" "muladd,mul,add,all"))) 1 1)
4 1)
(define_function_unit "athlon_fp_store" 1 0 (define_function_unit "athlon_fp_store" 1 0
(and (eq_attr "cpu" "athlon") (and (eq_attr "cpu" "athlon")
(eq_attr "athlon_fpunits" "store,all")) (eq_attr "athlon_fpunits" "store"))
1 1) 1 1)
(define_function_unit "athlon_agu" 3 0 ;; We don't need to model the Adress Generation Unit, since we don't model
;; the re-order buffer yet and thus we never schedule more than three operations
;; at time. Later we may want to experiment with MD_SCHED macros modeling the
;; decoders independently on the functional units.
;(define_function_unit "athlon_agu" 3 0
; (and (eq_attr "cpu" "athlon")
; (and (eq_attr "memory" "!none")
; (eq_attr "athlon_fpunits" "none")))
; 1 1)
;; Model load unit to avoid too long sequences of loads. We don't need to
;; model store queue, since it is hardly going to be bottleneck.
(define_function_unit "athlon_load" 2 0
(and (eq_attr "cpu" "athlon") (and (eq_attr "cpu" "athlon")
(and (eq_attr "memory" "!none") (eq_attr "memory" "load,both"))
(eq_attr "athlon_fpunits" "none")))
1 1) 1 1)
...@@ -1255,6 +1317,7 @@ ...@@ -1255,6 +1317,7 @@
"" ""
"sahf" "sahf"
[(set_attr "length" "1") [(set_attr "length" "1")
(set_attr "athlon_decode" "vector")
(set_attr "ppro_uops" "one")]) (set_attr "ppro_uops" "one")])
;; Pentium Pro can do steps 1 through 3 in one go. ;; Pentium Pro can do steps 1 through 3 in one go.
...@@ -1390,6 +1453,7 @@ ...@@ -1390,6 +1453,7 @@
"xchg{l}\\t%1, %0" "xchg{l}\\t%1, %0"
[(set_attr "type" "imov") [(set_attr "type" "imov")
(set_attr "pent_pair" "np") (set_attr "pent_pair" "np")
(set_attr "athlon_decode" "vector")
(set_attr "ppro_uops" "few")]) (set_attr "ppro_uops" "few")])
(define_expand "movhi" (define_expand "movhi"
...@@ -1437,8 +1501,10 @@ ...@@ -1437,8 +1501,10 @@
}" }"
[(set (attr "type") [(set (attr "type")
(cond [(and (eq_attr "alternative" "0") (cond [(and (eq_attr "alternative" "0")
(eq (symbol_ref "TARGET_PARTIAL_REG_STALL") (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL")
(const_int 0))) (const_int 0))
(eq (symbol_ref "TARGET_HIMODE_MATH")
(const_int 0))))
(const_string "imov") (const_string "imov")
(and (eq_attr "alternative" "1,2") (and (eq_attr "alternative" "1,2")
(match_operand:HI 1 "aligned_operand" "")) (match_operand:HI 1 "aligned_operand" ""))
...@@ -1456,8 +1522,10 @@ ...@@ -1456,8 +1522,10 @@
(match_operand:HI 1 "aligned_operand" "")) (match_operand:HI 1 "aligned_operand" ""))
(const_string "0") (const_string "0")
(and (eq_attr "alternative" "0") (and (eq_attr "alternative" "0")
(eq (symbol_ref "TARGET_PARTIAL_REG_STALL") (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL")
(const_int 0))) (const_int 0))
(eq (symbol_ref "TARGET_HIMODE_MATH")
(const_int 0))))
(const_string "0") (const_string "0")
] ]
(const_string "1"))) (const_string "1")))
...@@ -1547,9 +1615,19 @@ ...@@ -1547,9 +1615,19 @@
[(set_attr "type" "pop") [(set_attr "type" "pop")
(set_attr "length_prefix" "1")]) (set_attr "length_prefix" "1")])
;; Situation is quite tricky about when to choose full sized (SImode) move
;; over QImode moves. For Q_REG -> Q_REG move we use full size only for
;; partial register dependency machines (such as AMD Athlon), where QImode
;; moves issue extra dependency and for partial register stalls machines
;; that don't use QImode patterns (and QImode move cause stall on the next
;; instruction).
;;
;; For loads of Q_REG to NONQ_REG we use full sized moves except for partial
;; register stall machines with, where we use QImode instructions, since
;; partial register stall can be caused there. Then we use movzx.
(define_insn "*movqi_1" (define_insn "*movqi_1"
[(set (match_operand:QI 0 "nonimmediate_operand" "=q,q,r,?r,m") [(set (match_operand:QI 0 "nonimmediate_operand" "=q,q ,q ,r,r ,?r,m")
(match_operand:QI 1 "general_operand" "qn,qm,rn,qm,qn"))] (match_operand:QI 1 "general_operand" " q,qn,qm,q,rn,qm,qn"))]
"GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM" "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM"
"* "*
{ {
...@@ -1560,26 +1638,50 @@ ...@@ -1560,26 +1638,50 @@
abort (); abort ();
return \"movz{bl|x}\\t{%1, %k0|%k0, %1}\"; return \"movz{bl|x}\\t{%1, %k0|%k0, %1}\";
default: default:
if (which_alternative == 2) if (which_alternative == 4 || which_alternative == 3
|| (which_alternative == 1 && get_attr_length (insn) == 5)
|| (which_alternative == 0
&& ((TARGET_PARTIAL_REG_STALL && !TARGET_QIMODE_MATH)
|| TARGET_PARTIAL_REG_DEPENDENCY)))
return \"mov{l}\\t{%k1, %k0|%k0, %k1}\"; return \"mov{l}\\t{%k1, %k0|%k0, %k1}\";
else else
return \"mov{b}\\t{%1, %0|%0, %1}\"; return \"mov{b}\\t{%1, %0|%0, %1}\";
} }
}" }"
[(set (attr "type") [(set (attr "type")
(cond [(eq_attr "alternative" "3") (cond [(and (eq_attr "alternative" "3")
(ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL")
(const_int 0))
(eq (symbol_ref "TARGET_QIMODE_MATH")
(const_int 0))))
(const_string "imov")
(eq_attr "alternative" "3,5")
(const_string "imovx") (const_string "imovx")
(and (ne (symbol_ref "TARGET_MOVX") (and (ne (symbol_ref "TARGET_MOVX")
(const_int 0)) (const_int 0))
(eq_attr "alternative" "1")) (eq_attr "alternative" "2"))
(const_string "imovx") (const_string "imovx")
] ]
(const_string "imov"))) (const_string "imov")))
; There's no place to override just the immediate length ; There's no place to override just the immediate length
(set (attr "length") (set (attr "length")
(cond [(and (eq_attr "type" "imov") (cond [(and (eq_attr "type" "imov")
(and (eq_attr "alternative" "2") (and (match_operand:HI 1 "immediate_operand" "")
(match_operand:HI 1 "immediate_operand" ""))) (eq_attr "alternative" "4")))
(const_string "5")
;; Avoid extra dependency on partial register.
(and (eq_attr "type" "imov")
(and (eq_attr "alternative" "1")
(ne (symbol_ref "TARGET_PARTIAL_REG_DEPENDENCY")
(const_int 0))))
(const_string "5")
;; Avoid partial register stalls when not using QImode arithmetic
(and (eq_attr "type" "imov")
(and (eq_attr "alternative" "1")
(and (ne (symbol_ref "TARGET_PARTIAL_REG_STALL")
(const_int 0))
(eq (symbol_ref "TARGET_QIMODE_MATH")
(const_int 0)))))
(const_string "5") (const_string "5")
] ]
(const_string "*")))]) (const_string "*")))])
...@@ -1904,10 +2006,38 @@ ...@@ -1904,10 +2006,38 @@
;; On the average, pushdf using integers can be still shorter. Allow this ;; On the average, pushdf using integers can be still shorter. Allow this
;; pattern for optimize_size too. ;; pattern for optimize_size too.
(define_insn "*pushdf" (define_insn "*pushdf_nointeger"
[(set (match_operand:DF 0 "push_operand" "=<,<,<")
(match_operand:DF 1 "general_no_elim_operand" "f,Fo#f,*r#f"))]
"!TARGET_INTEGER_DFMODE_MOVES"
"*
{
switch (which_alternative)
{
case 0:
/* %%% We loose REG_DEAD notes for controling pops if we split late. */
operands[0] = gen_rtx_MEM (DFmode, stack_pointer_rtx);
operands[2] = stack_pointer_rtx;
operands[3] = GEN_INT (8);
if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
return \"sub{l}\\t{%3, %2|%2, %3}\;fstp%z0\\t%y0\";
else
return \"sub{l}\\t{%3, %2|%2, %3}\;fst%z0\\t%y0\";
case 1:
case 2:
return \"#\";
default:
abort ();
}
}"
[(set_attr "type" "multi")])
(define_insn "*pushdf_integer"
[(set (match_operand:DF 0 "push_operand" "=<,<") [(set (match_operand:DF 0 "push_operand" "=<,<")
(match_operand:DF 1 "general_no_elim_operand" "f#r,rFo#f"))] (match_operand:DF 1 "general_no_elim_operand" "f#r,rFo#f"))]
"" "TARGET_INTEGER_DFMODE_MOVES"
"* "*
{ {
switch (which_alternative) switch (which_alternative)
...@@ -1955,7 +2085,7 @@ ...@@ -1955,7 +2085,7 @@
[(set (match_operand:DF 0 "nonimmediate_operand" "=f,m,f,*r,o") [(set (match_operand:DF 0 "nonimmediate_operand" "=f,m,f,*r,o")
(match_operand:DF 1 "general_operand" "fm,f,G,*roF,F*r"))] (match_operand:DF 1 "general_operand" "fm,f,G,*roF,F*r"))]
"(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)
&& optimize_size && (optimize_size || !TARGET_INTEGER_DFMODE_MOVES)
&& (reload_in_progress || reload_completed && (reload_in_progress || reload_completed
|| GET_CODE (operands[1]) != CONST_DOUBLE || GET_CODE (operands[1]) != CONST_DOUBLE
|| memory_operand (operands[0], DFmode))" || memory_operand (operands[0], DFmode))"
...@@ -2002,7 +2132,7 @@ ...@@ -2002,7 +2132,7 @@
[(set (match_operand:DF 0 "nonimmediate_operand" "=f#r,m,f#r,r#f,o") [(set (match_operand:DF 0 "nonimmediate_operand" "=f#r,m,f#r,r#f,o")
(match_operand:DF 1 "general_operand" "fm#r,f#r,G,roF#f,Fr#f"))] (match_operand:DF 1 "general_operand" "fm#r,f#r,G,roF#f,Fr#f"))]
"(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)
&& !optimize_size && !optimize_size && TARGET_INTEGER_DFMODE_MOVES
&& (reload_in_progress || reload_completed && (reload_in_progress || reload_completed
|| GET_CODE (operands[1]) != CONST_DOUBLE || GET_CODE (operands[1]) != CONST_DOUBLE
|| memory_operand (operands[0], DFmode))" || memory_operand (operands[0], DFmode))"
...@@ -2304,7 +2434,8 @@ ...@@ -2304,7 +2434,8 @@
else else
return \"fxch\\t%0\"; return \"fxch\\t%0\";
}" }"
[(set_attr "type" "fxch")]) [(set_attr "type" "fxch")
(set_attr "athlon_decode" "vector")])
;; Zero extension instructions ;; Zero extension instructions
...@@ -3202,6 +3333,7 @@ ...@@ -3202,6 +3333,7 @@
"TARGET_80387" "TARGET_80387"
"fldcw\\t%0" "fldcw\\t%0"
[(set_attr "length_opcode" "2") [(set_attr "length_opcode" "2")
(set_attr "athlon_decode" "vector")
(set_attr "ppro_uops" "few")]) (set_attr "ppro_uops" "few")])
;; Conversion between fixed point and floating point. ;; Conversion between fixed point and floating point.
...@@ -7691,6 +7823,7 @@ ...@@ -7691,6 +7823,7 @@
"" ""
"leave" "leave"
[(set_attr "length" "1") [(set_attr "length" "1")
(set_attr "athlon_decode" "vector")
(set_attr "ppro_uops" "few")]) (set_attr "ppro_uops" "few")])
(define_expand "ffssi2" (define_expand "ffssi2"
...@@ -8123,7 +8256,8 @@ ...@@ -8123,7 +8256,8 @@
(sqrt:SF (match_operand:SF 1 "register_operand" "0")))] (sqrt:SF (match_operand:SF 1 "register_operand" "0")))]
"! TARGET_NO_FANCY_MATH_387 && TARGET_80387" "! TARGET_NO_FANCY_MATH_387 && TARGET_80387"
"fsqrt" "fsqrt"
[(set_attr "type" "fpspc")]) [(set_attr "type" "fpspc")
(set_attr "athlon_decode" "direct")])
(define_insn "sqrtdf2" (define_insn "sqrtdf2"
[(set (match_operand:DF 0 "register_operand" "=f") [(set (match_operand:DF 0 "register_operand" "=f")
...@@ -8131,7 +8265,8 @@ ...@@ -8131,7 +8265,8 @@
"! TARGET_NO_FANCY_MATH_387 && TARGET_80387 "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
&& (TARGET_IEEE_FP || flag_fast_math) " && (TARGET_IEEE_FP || flag_fast_math) "
"fsqrt" "fsqrt"
[(set_attr "type" "fpspc")]) [(set_attr "type" "fpspc")
(set_attr "athlon_decode" "direct")])
(define_insn "*sqrtextendsfdf2" (define_insn "*sqrtextendsfdf2"
[(set (match_operand:DF 0 "register_operand" "=f") [(set (match_operand:DF 0 "register_operand" "=f")
...@@ -8139,7 +8274,8 @@ ...@@ -8139,7 +8274,8 @@
(match_operand:SF 1 "register_operand" "0"))))] (match_operand:SF 1 "register_operand" "0"))))]
"! TARGET_NO_FANCY_MATH_387 && TARGET_80387" "! TARGET_NO_FANCY_MATH_387 && TARGET_80387"
"fsqrt" "fsqrt"
[(set_attr "type" "fpspc")]) [(set_attr "type" "fpspc")
(set_attr "athlon_decode" "direct")])
(define_insn "sqrtxf2" (define_insn "sqrtxf2"
[(set (match_operand:XF 0 "register_operand" "=f") [(set (match_operand:XF 0 "register_operand" "=f")
...@@ -8147,7 +8283,8 @@ ...@@ -8147,7 +8283,8 @@
"! TARGET_NO_FANCY_MATH_387 && TARGET_80387 "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
&& (TARGET_IEEE_FP || flag_fast_math) " && (TARGET_IEEE_FP || flag_fast_math) "
"fsqrt" "fsqrt"
[(set_attr "type" "fpspc")]) [(set_attr "type" "fpspc")
(set_attr "athlon_decode" "direct")])
(define_insn "*sqrtextenddfxf2" (define_insn "*sqrtextenddfxf2"
[(set (match_operand:XF 0 "register_operand" "=f") [(set (match_operand:XF 0 "register_operand" "=f")
...@@ -8155,7 +8292,8 @@ ...@@ -8155,7 +8292,8 @@
(match_operand:DF 1 "register_operand" "0"))))] (match_operand:DF 1 "register_operand" "0"))))]
"! TARGET_NO_FANCY_MATH_387 && TARGET_80387" "! TARGET_NO_FANCY_MATH_387 && TARGET_80387"
"fsqrt" "fsqrt"
[(set_attr "type" "fpspc")]) [(set_attr "type" "fpspc")
(set_attr "athlon_decode" "direct")])
(define_insn "*sqrtextendsfxf2" (define_insn "*sqrtextendsfxf2"
[(set (match_operand:XF 0 "register_operand" "=f") [(set (match_operand:XF 0 "register_operand" "=f")
...@@ -8163,7 +8301,8 @@ ...@@ -8163,7 +8301,8 @@
(match_operand:SF 1 "register_operand" "0"))))] (match_operand:SF 1 "register_operand" "0"))))]
"! TARGET_NO_FANCY_MATH_387 && TARGET_80387" "! TARGET_NO_FANCY_MATH_387 && TARGET_80387"
"fsqrt" "fsqrt"
[(set_attr "type" "fpspc")]) [(set_attr "type" "fpspc")
(set_attr "athlon_decode" "direct")])
(define_insn "sindf2" (define_insn "sindf2"
[(set (match_operand:DF 0 "register_operand" "=f") [(set (match_operand:DF 0 "register_operand" "=f")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment