Commit 90c56b45 by Uros Bizjak Committed by Uros Bizjak

i386.md (DIRFLAG_REG): Remove constant.

	* config/i386/i386.md (DIRFLAG_REG): Remove constant.
	(type): Remove cld.
	(length_immediate): Do not depend on cld type attribute.
	(length_address): Ditto.
	(modrm): Ditto.
	(memory): Ditto.
	(cld): Remove insn pattern.
	(strmov_singleop): Do not use DIRFLAG_REG.
	(*strmovdi_rex_1): Ditto.
	(*strmovsi_1): Ditto.
	(*strmovsi_rex_1): Ditto.
	(*strmovhi_1): Ditto.
	(*strmovhi_rex_1): Ditto.
	(*strmovqi_1): Ditto.
	(*strmovqi_rex_1): Ditto.
	(rep_mov): Ditto.
	(*rep_movdi_rex64): Ditto.
	(*rep_movsi): Ditto.
	(*rep_movsi_rex64): Ditto.
	(*rep_movqi): Ditto.
	(*rep_movqi_rex64): Ditto.
	(strset_singleop): Ditto.
	(*strsetdi_rex_1): Ditto.
	(*strsetsi_1): Ditto.
	(*strsetsi_rex_1): Ditto.
	(*strsethi_1): Ditto.
	(*strsethi_rex_1): Ditto.
	(*strsetqi_1): Ditto.
	(*strsetqi_rex_1): Ditto.
	(rep_stos): Ditto.
	(*rep_stosdi_rex64): Ditto.
	(*rep_stossi): Ditto.
	(*rep_stossi_rex64): Ditto.
	(*rep_stosqi): Ditto.
	(*rep_stosqi_rex64): Ditto.
	(cmpstrnsi): Do not generate cld insn.
	(cmpstrnqi_nz_1): Do not use DIRFLAG_REG.
	(*cmpstrnqi_nz_1): Ditto.
	(*cmpstrnqi_nz_rex_1): Ditto.
	(cmpstrnqi_1): Ditto.
	(*cmpstrnqi_1): Ditto.
	(*cmpstrnqi_rex_1): Ditto.
	(strlenqi_1): Ditto.
	(*strlenqi_1): Ditto.
	(*strlenqi_rex_1): Ditto.

	* config/i386/geode.md (shift): Do not depend on cld type attribute.
	* config/i386/pentium.md (pent_cld): Remove insn reservation.
	* config/i386/athlon.md (athlon_decode): Do not depend on cld
	type attribute.
	* config/i386/ppro.md (ppro_cld): Remove insn reservation.
	* config/i386/k6.md (k6_alux_only): Do not depend on cld type
	attribute.
	(k6_alux_only_load): Ditto.
	(k6_alux_only_store): Ditto.

	* config/i386/i386.c (ix86_expand_movmem): Remove cld_done boolean.
	Do not emit cld instruction.
	(ix86_expand_setmem): Ditto.
	(ix86_expand_strlen): Do not emit cld instruction.

From-SVN: r119581
parent 4b53c508
2006-12-06 Uros Bizjak <ubizjak@gmail.com>
* config/i386/i386.md (DIRFLAG_REG): Remove constant.
(type): Remove cld.
(length_immediate): Do not depend on cld type attribute.
(length_address): Ditto.
(modrm): Ditto.
(memory): Ditto.
(cld): Remove insn pattern.
(strmov_singleop): Do not use DIRFLAG_REG.
(*strmovdi_rex_1): Ditto.
(*strmovsi_1): Ditto.
(*strmovsi_rex_1): Ditto.
(*strmovhi_1): Ditto.
(*strmovhi_rex_1): Ditto.
(*strmovqi_1): Ditto.
(*strmovqi_rex_1): Ditto.
(rep_mov): Ditto.
(*rep_movdi_rex64): Ditto.
(*rep_movsi): Ditto.
(*rep_movsi_rex64): Ditto.
(*rep_movqi): Ditto.
(*rep_movqi_rex64): Ditto.
(strset_singleop): Ditto.
(*strsetdi_rex_1): Ditto.
(*strsetsi_1): Ditto.
(*strsetsi_rex_1): Ditto.
(*strsethi_1): Ditto.
(*strsethi_rex_1): Ditto.
(*strsetqi_1): Ditto.
(*strsetqi_rex_1): Ditto.
(rep_stos): Ditto.
(*rep_stosdi_rex64): Ditto.
(*rep_stossi): Ditto.
(*rep_stossi_rex64): Ditto.
(*rep_stosqi): Ditto.
(*rep_stosqi_rex64): Ditto.
(cmpstrnsi): Do not generate cld insn.
(cmpstrnqi_nz_1): Do not use DIRFLAG_REG.
(*cmpstrnqi_nz_1): Ditto.
(*cmpstrnqi_nz_rex_1): Ditto.
(cmpstrnqi_1): Ditto.
(*cmpstrnqi_1): Ditto.
(*cmpstrnqi_rex_1): Ditto.
(strlenqi_1): Ditto.
(*strlenqi_1): Ditto.
(*strlenqi_rex_1): Ditto.
* config/i386/geode.md (shift): Do not depend on cld type attribute.
* config/i386/pentium.md (pent_cld): Remove insn reservation.
* config/i386/athlon.md (athlon_decode): Do not depend on cld
type attribute.
* config/i386/ppro.md (ppro_cld): Remove insn reservation.
* config/i386/k6.md (k6_alux_only): Do not depend on cld type
attribute.
(k6_alux_only_load): Ditto.
(k6_alux_only_store): Ditto.
* config/i386/i386.c (ix86_expand_movmem): Remove cld_done boolean.
Do not emit cld instruction.
(ix86_expand_setmem): Ditto.
(ix86_expand_strlen): Do not emit cld instruction.
2006-12-06 Bernd Schmidt <bernd.schmidt@analog.com>
* config/bfin/bfin.c (bfin_rtx_costs): Add a number of new cases, tweak
......
......@@ -18,7 +18,7 @@
;; communicates with all the execution units separately instead.
(define_attr "athlon_decode" "direct,vector,double"
(cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc,str,pop,cld,leave")
(cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc,str,pop,leave")
(const_string "vector")
(and (eq_attr "type" "push")
(match_operand 1 "memory_operand" ""))
......
......@@ -52,7 +52,7 @@
(define_insn_reservation "shift" 2
(and (eq_attr "cpu" "geode")
(eq_attr "type" "ishift,ishift1,rotate,rotate1,cld"))
(eq_attr "type" "ishift,ishift1,rotate,rotate1"))
"geode_issue,geode_alu*2")
(define_insn_reservation "imul" 7
......
......@@ -13622,10 +13622,6 @@ ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
int desired_align = 0;
enum stringop_alg alg;
int dynamic_check;
/* Precise placement on cld depends whether stringops will be emit in
prologue, main copying body or epilogue. This variable keeps track
if cld was already needed. */
bool cld_done = false;
if (GET_CODE (align_exp) == CONST_INT)
align = INTVAL (align_exp);
......@@ -13690,8 +13686,7 @@ ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
&& !count)
{
int size = MAX (size_needed - 1, desired_align - align);
if (TARGET_SINGLE_STRINGOP)
emit_insn (gen_cld ()), cld_done = true;
label = gen_label_rtx ();
emit_cmp_and_jump_insns (count_exp,
GEN_INT (size),
......@@ -13725,8 +13720,6 @@ ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
the info early. */
src = change_address (src, BLKmode, srcreg);
dst = change_address (dst, BLKmode, destreg);
if (TARGET_SINGLE_STRINGOP && !cld_done)
emit_insn (gen_cld ()), cld_done = true;
expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
desired_align);
}
......@@ -13759,20 +13752,14 @@ ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
expected_size);
break;
case rep_prefix_8_byte:
if (!cld_done)
emit_insn (gen_cld ()), cld_done = true;
expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
DImode);
break;
case rep_prefix_4_byte:
if (!cld_done)
emit_insn (gen_cld ()), cld_done = true;
expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
SImode);
break;
case rep_prefix_1_byte:
if (!cld_done)
emit_insn (gen_cld ()), cld_done = true;
expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
QImode);
break;
......@@ -13808,12 +13795,8 @@ ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
LABEL_NUSES (label) = 1;
}
if (count_exp != const0_rtx && size_needed > 1)
{
if (TARGET_SINGLE_STRINGOP && !cld_done)
emit_insn (gen_cld ()), cld_done = true;
expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
size_needed);
}
expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
size_needed);
if (jump_around_label)
emit_label (jump_around_label);
return 1;
......@@ -13907,10 +13890,6 @@ ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
int size_needed = 0;
int desired_align = 0;
enum stringop_alg alg;
/* Precise placement on cld depends whether stringops will be emit in
prologue, main copying body or epilogue. This variable keeps track
if cld was already needed. */
bool cld_done = false;
rtx promoted_val = val_exp;
bool force_loopy_epilogue = false;
int dynamic_check;
......@@ -13977,8 +13956,6 @@ ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
code, so we need to use QImode accesses in epilogue. */
if (GET_CODE (val_exp) != CONST_INT && size_needed > 1)
force_loopy_epilogue = true;
else if (TARGET_SINGLE_STRINGOP)
emit_insn (gen_cld ()), cld_done = true;
label = gen_label_rtx ();
emit_cmp_and_jump_insns (count_exp,
GEN_INT (size),
......@@ -14013,8 +13990,7 @@ ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
&& !count && !label)
{
int size = MAX (size_needed - 1, desired_align - align);
if (TARGET_SINGLE_STRINGOP)
emit_insn (gen_cld ()), cld_done = true;
label = gen_label_rtx ();
emit_cmp_and_jump_insns (count_exp,
GEN_INT (size),
......@@ -14031,8 +14007,6 @@ ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
the pain to maintain it for the first move, so throw away
the info early. */
dst = change_address (dst, BLKmode, destreg);
if (TARGET_SINGLE_STRINGOP && !cld_done)
emit_insn (gen_cld ()), cld_done = true;
expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
desired_align);
}
......@@ -14060,20 +14034,14 @@ ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
count_exp, Pmode, 4, expected_size);
break;
case rep_prefix_8_byte:
if (!cld_done)
emit_insn (gen_cld ()), cld_done = true;
expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
DImode);
break;
case rep_prefix_4_byte:
if (!cld_done)
emit_insn (gen_cld ()), cld_done = true;
expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
SImode);
break;
case rep_prefix_1_byte:
if (!cld_done)
emit_insn (gen_cld ()), cld_done = true;
expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
QImode);
break;
......@@ -14106,12 +14074,8 @@ ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
size_needed);
else
{
if (TARGET_SINGLE_STRINGOP && !cld_done)
emit_insn (gen_cld ()), cld_done = true;
expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
size_needed);
}
expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
size_needed);
}
if (jump_around_label)
emit_label (jump_around_label);
......@@ -14169,7 +14133,6 @@ ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
emit_move_insn (scratch3, addr);
eoschar = force_reg (QImode, eoschar);
emit_insn (gen_cld ());
src = replace_equiv_address_nv (src, scratch3);
/* If .md starts supporting :P, this can be done in .md. */
......
......@@ -80,19 +80,19 @@
;; Shift instructions and certain arithmetic are issued only on Integer X.
(define_insn_reservation "k6_alux_only" 1
(and (eq_attr "cpu" "k6")
(and (eq_attr "type" "ishift,ishift1,rotate,rotate1,alu1,negnot,cld")
(and (eq_attr "type" "ishift,ishift1,rotate,rotate1,alu1,negnot")
(eq_attr "memory" "none")))
"k6_decode_short,k6_alux")
(define_insn_reservation "k6_alux_only_load" 3
(and (eq_attr "cpu" "k6")
(and (eq_attr "type" "ishift,ishift1,rotate,rotate1,alu1,negnot,cld")
(and (eq_attr "type" "ishift,ishift1,rotate,rotate1,alu1,negnot")
(eq_attr "memory" "load")))
"k6_decode_short,k6_load,k6_alux")
(define_insn_reservation "k6_alux_only_store" 3
(and (eq_attr "cpu" "k6")
(and (eq_attr "type" "ishift,ishift1,rotate,rotate1,alu1,negnot,cld")
(and (eq_attr "type" "ishift,ishift1,rotate,rotate1,alu1,negnot")
(eq_attr "memory" "store,both,unknown")))
"k6_decode_long,k6_load,k6_alux,k6_store")
......
......@@ -151,11 +151,6 @@
(eq_attr "type" "idiv"))
"pentium-np+pentium-fp")
(define_insn_reservation "pent_cld" 2
(and (eq_attr "cpu" "pentium")
(eq_attr "type" "cld"))
"pentium-np*2")
;; Moves usually have one cycle penalty, but there are exceptions.
(define_insn_reservation "pent_fmov" 1
(and (eq_attr "cpu" "pentium")
......
......@@ -60,8 +60,7 @@
;; This isn't necessary right now because we assume for every
;; instruction that it never blocks a decoder.
;; - Figure out where the p0 and p1 reservations come from. These
;; appear not to be in the manual (e.g. why is cld "(p0+p1)*2"
;; better than "(p0|p1)*4" ???)
;; appear not to be in the manual
;; - Lots more because I'm sure this is still far from optimal :-)
;; The ppro_idiv and ppro_fdiv automata are used to model issue
......@@ -196,10 +195,6 @@
(eq_attr "type" "ishift,ishift1,rotate,rotate1")))
"decoder0,p2+p0,p4+p3")
(define_insn_reservation "ppro_cld" 2
(and (eq_attr "cpu" "pentiumpro,generic32")
(eq_attr "type" "cld"))
"decoder0,(p0+p1)*2")
;; The P6 has a sophisticated branch prediction mechanism to minimize
;; latencies due to branching. In particular, it has a fast way to
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment