Commit 8acfdd43 by Richard Henderson Committed by Richard Henderson

i386.md (UNSPEC_BSF): Remove.

        * config/i386/i386.md (UNSPEC_BSF): Remove.
        (ffssi2): Split into cmove and no_cmove insns and splitters;
        lose pentium float trick for now.
        (ffssi_1): Add * to name; use CTZ instead of UNSPEC.
        (ctzsi2, clzsi2, bsr): New.

From-SVN: r62434
parent c407570a
2003-02-04 Richard Henderson <rth@redhat.com> 2003-02-04 Richard Henderson <rth@redhat.com>
* config/i386/i386.md (UNSPEC_BSF): Remove.
(ffssi2): Split into cmove and no_cmove insns and splitters;
lose pentium float trick for now.
(ffssi_1): Add * to name; use CTZ instead of UNSPEC.
(ctzsi2, clzsi2, bsr): New.
2003-02-04 Richard Henderson <rth@redhat.com>
* config/ia64/ia64.c (rtx_needs_barrier): Handle POPCOUNT, * config/ia64/ia64.c (rtx_needs_barrier): Handle POPCOUNT,
UNSPEC_GETF_EXP; remove UNSPEC_POPCNT. UNSPEC_GETF_EXP; remove UNSPEC_POPCNT.
* config/ia64/ia64.md (UNSPEC_POPCNT): Remove. * config/ia64/ia64.md (UNSPEC_POPCNT): Remove.
......
...@@ -80,7 +80,6 @@ ...@@ -80,7 +80,6 @@
(UNSPEC_SCAS 20) (UNSPEC_SCAS 20)
(UNSPEC_SIN 21) (UNSPEC_SIN 21)
(UNSPEC_COS 22) (UNSPEC_COS 22)
(UNSPEC_BSF 23)
(UNSPEC_FNSTSW 24) (UNSPEC_FNSTSW 24)
(UNSPEC_SAHF 25) (UNSPEC_SAHF 25)
(UNSPEC_FSTCW 26) (UNSPEC_FSTCW 26)
...@@ -14110,104 +14109,98 @@ ...@@ -14110,104 +14109,98 @@
[(set_attr "type" "leave")]) [(set_attr "type" "leave")])
(define_expand "ffssi2" (define_expand "ffssi2"
[(set (match_operand:SI 0 "nonimmediate_operand" "") [(parallel
(ffs:SI (match_operand:SI 1 "nonimmediate_operand" "")))] [(set (match_operand:SI 0 "register_operand" "")
(ffs:SI (match_operand:SI 1 "nonimmediate_operand" "")))
(clobber (match_scratch:SI 2 ""))
(clobber (reg:CC 17))])]
"" ""
{ "")
rtx out = gen_reg_rtx (SImode), tmp = gen_reg_rtx (SImode);
rtx in = operands[1];
if (TARGET_CMOVE)
{
emit_move_insn (tmp, constm1_rtx);
emit_insn (gen_ffssi_1 (out, in));
emit_insn (gen_rtx_SET (VOIDmode, out,
gen_rtx_IF_THEN_ELSE (SImode,
gen_rtx_EQ (VOIDmode, gen_rtx_REG (CCZmode, FLAGS_REG),
const0_rtx),
tmp,
out)));
emit_insn (gen_addsi3 (out, out, const1_rtx));
emit_move_insn (operands[0], out);
}
/* Pentium bsf instruction is extremely slow. The following code is
recommended by the Intel Optimizing Manual as a reasonable replacement:
TEST EAX,EAX
JZ SHORT BS2
XOR ECX,ECX
MOV DWORD PTR [TEMP+4],ECX
SUB ECX,EAX
AND EAX,ECX
MOV DWORD PTR [TEMP],EAX
FILD QWORD PTR [TEMP]
FSTP QWORD PTR [TEMP]
WAIT ; WAIT only needed for compatibility with
; earlier processors
MOV ECX, DWORD PTR [TEMP+4]
SHR ECX,20
SUB ECX,3FFH
TEST EAX,EAX ; clear zero flag
BS2:
Following piece of code expand ffs to similar beast.
*/
else if (TARGET_PENTIUM && !optimize_size && TARGET_80387)
{
rtx label = gen_label_rtx ();
rtx lo, hi;
rtx mem = assign_386_stack_local (DImode, 0);
rtx fptmp = gen_reg_rtx (DFmode);
split_di (&mem, 1, &lo, &hi);
emit_move_insn (out, const0_rtx);
emit_cmp_and_jump_insns (in, const0_rtx, EQ, 0, SImode, 1, label);
emit_move_insn (hi, out);
emit_insn (gen_subsi3 (out, out, in));
emit_insn (gen_andsi3 (out, out, in));
emit_move_insn (lo, out);
emit_insn (gen_floatdidf2 (fptmp,mem));
emit_move_insn (gen_rtx_MEM (DFmode, XEXP (mem, 0)), fptmp);
emit_move_insn (out, hi);
emit_insn (gen_lshrsi3 (out, out, GEN_INT (20)));
emit_insn (gen_subsi3 (out, out, GEN_INT (0x3ff - 1)));
emit_label (label); (define_insn_and_split "*ffs_cmove"
LABEL_NUSES (label) = 1; [(set (match_operand:SI 0 "register_operand" "=r")
(ffs:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))
(clobber (match_scratch:SI 2 "=&r"))
(clobber (reg:CC 17))]
"TARGET_CMOVE"
"#"
"&& reload_completed"
[(set (match_dup 2) (const_int -1))
(parallel [(set (reg:CCZ 17) (compare:CCZ (match_dup 1) (const_int 0)))
(set (match_dup 0) (ctz:SI (match_dup 1)))])
(set (match_dup 0) (if_then_else:SI
(eq (reg:CCZ 17) (const_int 0))
(match_dup 2)
(match_dup 0)))
(parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1)))
(clobber (reg:CC 17))])]
"")
emit_move_insn (operands[0], out); (define_insn_and_split "*ffs_no_cmove"
} [(set (match_operand:SI 0 "nonimmediate_operand" "=r")
else (ffs:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))
{ (clobber (match_scratch:SI 2 "=&r"))
emit_move_insn (tmp, const0_rtx); (clobber (reg:CC 17))]
emit_insn (gen_ffssi_1 (out, in)); ""
emit_insn (gen_rtx_SET (VOIDmode, "#"
gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (QImode, tmp)), "reload_completed"
gen_rtx_EQ (QImode, gen_rtx_REG (CCZmode, FLAGS_REG), [(parallel [(set (match_dup 2) (const_int 0))
const0_rtx))); (clobber (reg:CC 17))])
emit_insn (gen_negsi2 (tmp, tmp)); (parallel [(set (reg:CCZ 17) (compare:CCZ (match_dup 1) (const_int 0)))
emit_insn (gen_iorsi3 (out, out, tmp)); (set (match_dup 0) (ctz:SI (match_dup 1)))])
emit_insn (gen_addsi3 (out, out, const1_rtx)); (set (strict_low_part (match_dup 3))
emit_move_insn (operands[0], out); (eq:QI (reg:CCZ 17) (const_int 0)))
} (parallel [(set (match_dup 2) (neg:SI (match_dup 2)))
DONE; (clobber (reg:CC 17))])
(parallel [(set (match_dup 0) (ior:SI (match_dup 0) (match_dup 2)))
(clobber (reg:CC 17))])
(parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1)))
(clobber (reg:CC 17))])]
{
operands[3] = gen_lowpart (QImode, operands[2]);
}) })
(define_insn "ffssi_1" (define_insn "*ffssi_1"
[(set (reg:CCZ 17) [(set (reg:CCZ 17)
(compare:CCZ (match_operand:SI 1 "nonimmediate_operand" "rm") (compare:CCZ (match_operand:SI 1 "nonimmediate_operand" "rm")
(const_int 0))) (const_int 0)))
(set (match_operand:SI 0 "register_operand" "=r") (set (match_operand:SI 0 "register_operand" "=r")
(unspec:SI [(match_dup 1)] UNSPEC_BSF))] (ctz:SI (match_dup 1)))]
"" ""
"bsf{l}\t{%1, %0|%0, %1}" "bsf{l}\t{%1, %0|%0, %1}"
[(set_attr "prefix_0f" "1") [(set_attr "prefix_0f" "1")
(set_attr "ppro_uops" "few")]) (set_attr "ppro_uops" "few")])
;; ffshi2 is not useful -- 4 word prefix ops are needed, which is larger (define_insn "ctzsi2"
;; and slower than the two-byte movzx insn needed to do the work in SImode. [(set (match_operand:SI 0 "register_operand" "=r")
(ctz:SI (match_operand:SI 1 "nonimmediate_operand" "rm")))
(clobber (reg:CC 17))]
""
"bsf{l}\t{%1, %0|%0, %1}"
[(set_attr "prefix_0f" "1")
(set_attr "ppro_uops" "few")])
(define_expand "clzsi2"
[(parallel
[(set (match_operand:SI 0 "register_operand" "")
(minus:SI (const_int 31)
(clz:SI (match_operand:SI 1 "nonimmediate_operand" ""))))
(clobber (reg:CC 17))])
(parallel
[(set (match_dup 0) (xor:SI (match_dup 0) (const_int 31)))
(clobber (reg:CC 17))])]
""
"")
(define_insn "*bsr"
[(set (match_operand:SI 0 "register_operand" "=r")
(minus:SI (const_int 31)
(clz:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))))
(clobber (reg:CC 17))]
""
"bsr{l}\t{%1, %0|%0, %1}"
[(set_attr "prefix_0f" "1")
(set_attr "ppro_uops" "few")])
;; Thread-local storage patterns for ELF. ;; Thread-local storage patterns for ELF.
;; ;;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment