Commit 628448b3 by Stan Cox

(strlensi): New pattern.

From-SVN: r10831
parent 3f803cd9
......@@ -4183,7 +4183,7 @@
(const_int 1)
(match_operand:SI 2 "general_operand" "r"))
(match_operand:SI 3 "const_int_operand" "n"))]
"TARGET_BIT_TEST && GET_CODE (operands[2]) != CONST_INT"
"TARGET_USE_BIT_TEST && GET_CODE (operands[2]) != CONST_INT"
"*
{
CC_STATUS_INIT;
......@@ -4201,7 +4201,7 @@
(xor:SI (ashift:SI (const_int 1)
(match_operand:SI 1 "general_operand" "r"))
(match_operand:SI 2 "general_operand" "0")))]
"TARGET_BIT_TEST && GET_CODE (operands[1]) != CONST_INT"
"TARGET_USE_BIT_TEST && GET_CODE (operands[1]) != CONST_INT"
"*
{
CC_STATUS_INIT;
......@@ -4214,7 +4214,7 @@
(xor:SI (match_operand:SI 1 "general_operand" "0")
(ashift:SI (const_int 1)
(match_operand:SI 2 "general_operand" "r"))))]
"TARGET_BIT_TEST && GET_CODE (operands[2]) != CONST_INT"
"TARGET_USE_BIT_TEST && GET_CODE (operands[2]) != CONST_INT"
"*
{
CC_STATUS_INIT;
......@@ -5742,17 +5742,58 @@
(define_expand "strlensi"
[(parallel [(set (match_dup 4)
(unspec:SI [(mem:BLK (match_operand:BLK 1 "general_operand" ""))
(match_operand:QI 2 "register_operand" "")
(match_operand:QI 2 "immediate_operand" "")
(match_operand:SI 3 "immediate_operand" "")] 0))
(clobber (match_dup 1))])
(set (match_dup 5)
(not:SI (match_dup 4)))
(set (match_operand:SI 0 "register_operand" "")
(minus:SI (match_dup 5)
(const_int 1)))]
(plus:SI (match_dup 5)
(const_int -1)))]
""
"
{
if (TARGET_UNROLL_STRLEN && operands[2] == const0_rtx && optimize > 1)
{
rtx address;
rtx scratch;
/* well it seems that some optimizer does not combine a call like
foo(strlen(bar), strlen(bar));
when the move and the subtraction is done here. It does calculate
the length just once when these instructions are done inside of
output_strlen_unroll(). But I think since &bar[strlen(bar)] is
often used and I use one fewer register for the lifetime of
output_strlen_unroll() this is better. */
scratch = gen_reg_rtx (SImode);
address = force_reg (SImode, XEXP (operands[1], 0));
/* move address to scratch-register
this is done here because the i586 can do the following and
in the same cycle with the following move. */
if (GET_CODE (operands[3]) != CONST_INT || INTVAL (operands[3]) < 4)
emit_insn (gen_movsi (scratch, address));
emit_insn (gen_movsi (operands[0], address));
if(TARGET_USE_Q_REG)
emit_insn (gen_strlensi_unroll5 (operands[0],
operands[3],
scratch,
operands[0]));
else
emit_insn (gen_strlensi_unroll4 (operands[0],
operands[3],
scratch,
operands[0]));
/* gen_strlensi_unroll[45] returns the address of the zero
at the end of the string, like memchr(), so compute the
length by subtracting the startaddress. */
emit_insn (gen_subsi3 (operands[0], operands[0], address));
DONE;
}
operands[1] = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
operands[4] = gen_reg_rtx (SImode);
operands[5] = gen_reg_rtx (SImode);
......@@ -5765,7 +5806,7 @@
(define_insn ""
[(set (match_operand:SI 0 "register_operand" "=&c")
(unspec:SI [(mem:BLK (match_operand:SI 1 "address_operand" "D"))
(match_operand:QI 2 "register_operand" "a")
(match_operand:QI 2 "immediate_operand" "a")
(match_operand:SI 3 "immediate_operand" "i")] 0))
(clobber (match_dup 1))]
""
......@@ -5779,3 +5820,24 @@
output_asm_insn (AS2 (mov%L0,%1,%0), xops);
return \"repnz\;scas%B2\";
}")
;; the only difference between the following patterns is the register preference
;; on a pentium using a q-register saves one clock cycle per 4 characters
(define_insn "strlensi_unroll4"
[(set (match_operand:SI 0 "register_operand" "=r,r")
(unspec:SI [(mem:BLK (match_operand:SI 3 "register_operand" "0,0"))
(match_operand:SI 1 "immediate_operand" "i,i")
(match_operand:SI 2 "register_operand" "=&q,&!r")] 0))
(clobber (match_dup 2))]
"(TARGET_USE_ANY_REG && optimize > 1)"
"* return output_strlen_unroll (operands);")
(define_insn "strlensi_unroll5"
[(set (match_operand:SI 0 "register_operand" "=r")
(unspec:SI [(mem:BLK (match_operand:SI 3 "register_operand" "0"))
(match_operand:SI 1 "immediate_operand" "i")
(match_operand:SI 2 "register_operand" "=&q")] 0))
(clobber (match_dup 2))]
"(TARGET_USE_Q_REG && optimize > 1)"
"* return output_strlen_unroll (operands);")
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment