Commit 79f05c19 by Jan Hubicka Committed by Jan Hubicka

i386.md (movstrsi, clrstrsi): Support variable sized copies, align destination when needed.

	* i386.md (movstrsi, clrstrsi): Support variable sized copies, align
	destination when needed.
	(strmovsi, strsetsi): New expander.
	(strmovsi_1, strsetsi_1): New pattern.
	* i386.h (MASK_NO_ALIGN_STROP, MASK_INLINE_ALL_STROP,
	TARGET_ALIGN_STRINGOPS, TARGET_INLINE_ALL_STRINGOPS): New macros.
	(TARGET_SWITCHES) Add align-stringops and inline-all-stringops.
	* invoke.texi (align-stringops, inline-all-stringops): Document.

From-SVN: r31773
parent 31a72d3f
Thu Feb 3 15:08:13 MET 2000 Jan Hubicka <jh@suse.cz>
* i386.md (movstrsi, clrstrsi): Support variable sized copies, align
destination when needed.
(strmovsi, strsetsi): New expander.
(strmovsi_1, strsetsi_1): New pattern.
* i386.h (MASK_NO_ALIGN_STROP, MASK_INLINE_ALL_STROP,
TARGET_ALIGN_STRINGOPS, TARGET_INLINE_ALL_STRINGOPS): New macros.
(TARGET_SWITCHES) Add align-stringops and inline-all-stringops.
* invoke.texi (align-stringops, inline-all-stringops): Document.
Wed Feb 2 23:04:47 2000 Krister Walfridsson <cato@df.lth.se> Wed Feb 2 23:04:47 2000 Krister Walfridsson <cato@df.lth.se>
* i386/netbsd.h (INT_ASM_OP): Define. * i386/netbsd.h (INT_ASM_OP): Define.
......
...@@ -101,6 +101,8 @@ extern int target_flags; ...@@ -101,6 +101,8 @@ extern int target_flags;
#define MASK_NO_FANCY_MATH_387 0x00000040 /* Disable sin, cos, sqrt */ #define MASK_NO_FANCY_MATH_387 0x00000040 /* Disable sin, cos, sqrt */
#define MASK_OMIT_LEAF_FRAME_POINTER 0x080 /* omit leaf frame pointers */ #define MASK_OMIT_LEAF_FRAME_POINTER 0x080 /* omit leaf frame pointers */
#define MASK_STACK_PROBE 0x00000100 /* Enable stack probing */ #define MASK_STACK_PROBE 0x00000100 /* Enable stack probing */
#define MASK_NO_ALIGN_STROPS 0x00001000 /* Enable aligning of string ops. */
#define MASK_INLINE_ALL_STROPS 0x00002000 /* Inline stringops in all cases */
/* Temporary codegen switches */ /* Temporary codegen switches */
#define MASK_INTEL_SYNTAX 0x00000200 #define MASK_INTEL_SYNTAX 0x00000200
...@@ -190,6 +192,9 @@ extern const int x86_promote_QImode, x86_single_stringop; ...@@ -190,6 +192,9 @@ extern const int x86_promote_QImode, x86_single_stringop;
#define TARGET_STACK_PROBE (target_flags & MASK_STACK_PROBE) #define TARGET_STACK_PROBE (target_flags & MASK_STACK_PROBE)
#define TARGET_ALIGN_STRINGOPS (!(target_flags & MASK_NO_ALIGN_STROPS))
#define TARGET_INLINE_ALL_STRINGOPS (target_flags & MASK_INLINE_ALL_STROPS)
#define ASSEMBLER_DIALECT ((target_flags & MASK_INTEL_SYNTAX) != 0) #define ASSEMBLER_DIALECT ((target_flags & MASK_INTEL_SYNTAX) != 0)
#define TARGET_SWITCHES \ #define TARGET_SWITCHES \
...@@ -238,6 +243,14 @@ extern const int x86_promote_QImode, x86_single_stringop; ...@@ -238,6 +243,14 @@ extern const int x86_promote_QImode, x86_single_stringop;
{ "intel-syntax", MASK_INTEL_SYNTAX, \ { "intel-syntax", MASK_INTEL_SYNTAX, \
"Emit Intel syntax assembler opcodes" }, \ "Emit Intel syntax assembler opcodes" }, \
{ "no-intel-syntax", -MASK_INTEL_SYNTAX, "" }, \ { "no-intel-syntax", -MASK_INTEL_SYNTAX, "" }, \
{ "align-stringops", -MASK_NO_ALIGN_STROPS, \
"Align destination of the string operations" }, \
{ "no-align-stringops", MASK_NO_ALIGN_STROPS, \
"Do not align destination of the string operations" }, \
{ "inline-all-strinops", MASK_INLINE_ALL_STROPS, \
"Inline all known string operations" }, \
{ "no-inline-all-stringops", -MASK_INLINE_ALL_STROPS, \
"Do not inline all known string operations" }, \
SUBTARGET_SWITCHES \ SUBTARGET_SWITCHES \
{ "", TARGET_DEFAULT, 0 }} { "", TARGET_DEFAULT, 0 }}
......
...@@ -7838,49 +7838,208 @@ ...@@ -7838,49 +7838,208 @@
(define_expand "movstrsi" (define_expand "movstrsi"
[(use (match_operand:BLK 0 "memory_operand" "")) [(use (match_operand:BLK 0 "memory_operand" ""))
(use (match_operand:BLK 1 "memory_operand" "")) (use (match_operand:BLK 1 "memory_operand" ""))
(use (match_operand:SI 2 "const_int_operand" "")) (use (match_operand:SI 2 "nonmemory_operand" ""))
(use (match_operand:SI 3 "const_int_operand" ""))] (use (match_operand:SI 3 "const_int_operand" ""))]
"" ""
" "
{ {
rtx srcreg, destreg, countreg; rtx srcreg, destreg, countreg;
int align = 0;
int count = -1;
if (GET_CODE (operands[2]) != CONST_INT) if (GET_CODE (operands[3]) == CONST_INT)
FAIL; align = INTVAL (operands[3]);
/* This simple hack avoids all inlining code and simplifies code bellow. */
if (!TARGET_ALIGN_STRINGOPS)
align = 32;
if (GET_CODE (operands[2]) == CONST_INT)
count = INTVAL (operands[2]);
destreg = copy_to_mode_reg (Pmode, XEXP (operands[0], 0)); destreg = copy_to_mode_reg (Pmode, XEXP (operands[0], 0));
srcreg = copy_to_mode_reg (Pmode, XEXP (operands[1], 0)); srcreg = copy_to_mode_reg (Pmode, XEXP (operands[1], 0));
emit_insn (gen_cld()); emit_insn (gen_cld());
/* When optimizing for size emit simple rep ; movsb instruction for /* When optimizing for size emit simple rep ; movsb instruction for
counts not divisible by 4. */ counts not divisible by 4. */
if ((!optimize || optimize_size) && (INTVAL (operands[2]) & 0x03))
if ((!optimize || optimize_size)
&& (count < 0 || (count & 0x03)))
{ {
countreg = copy_to_mode_reg (SImode, operands[2]); countreg = copy_to_mode_reg (SImode, operands[2]);
emit_insn (gen_rep_movqi (destreg, srcreg, countreg, emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
destreg, srcreg, countreg)); destreg, srcreg, countreg));
} }
else
/* For constant aligned (or small unaligned) copies use rep movsl
followed by code copying the rest. For PentiumPro ensure 8 byte
alignment to allow rep movsl acceleration. */
else if (count >= 0
&& (align >= 8
|| (!TARGET_PENTIUMPRO && align >= 4)
|| optimize_size || count < 64))
{ {
if (INTVAL (operands[2]) & ~0x03) if (count & ~0x03)
{ {
countreg = copy_to_mode_reg (SImode, countreg = copy_to_mode_reg (SImode,
GEN_INT ((INTVAL (operands[2]) >> 2) GEN_INT ((count >> 2)
& 0x3fffffff)); & 0x3fffffff));
emit_insn (gen_rep_movsi (destreg, srcreg, countreg, emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
destreg, srcreg, countreg)); destreg, srcreg, countreg));
} }
if (INTVAL (operands[2]) & 0x02) if (count & 0x02)
emit_insn (gen_strmovhi (destreg, srcreg)); emit_insn (gen_strmovhi (destreg, srcreg));
if (INTVAL (operands[2]) & 0x01) if (count & 0x01)
emit_insn (gen_strmovqi (destreg, srcreg)); emit_insn (gen_strmovqi (destreg, srcreg));
} }
/* The generic code based on the glibc implementation:
- align destination to 4 bytes (8 byte alignment is used for PentiumPro
allowing accelerated copying there)
- copy the data using rep movsl
- copy the rest. */
else
{
rtx countreg2;
rtx label = NULL;
/* In case we don't know anything about the alignment, default to
library version, since it is usually equally fast and result in
shorter code. */
if (!TARGET_INLINE_ALL_STRINGOPS && align < 4)
FAIL;
if (TARGET_SINGLE_STRINGOP)
emit_insn (gen_cld());
countreg2 = gen_reg_rtx (SImode);
countreg = copy_to_mode_reg (SImode, operands[2]);
/* We don't use loops to align destination and to copy parts smaller
than 4 bytes, because gcc is able to optimize such code better (in
the case the destination or the count really is aligned, gcc is often
able to predict the branches) and also it is friendlier to the
hardware branch prediction.
Using loops is benefical for generic case, because we can
handle small counts using the loops. Many CPUs (such as Athlon)
have large REP prefix setup costs.
This is quite costy. Maybe we can revisit this decision later or
add some customizability to this code. */
if (count < 0
&& align < (TARGET_PENTIUMPRO && (count < 0 || count >= 260) ? 8 : 4))
{
label = gen_label_rtx ();
emit_cmp_and_jump_insns (countreg, GEN_INT (3),
LEU, 0, SImode, 1, 0, label);
}
if (align <= 1)
{
rtx label = gen_label_rtx ();
rtx tmpcount = gen_reg_rtx (SImode);
emit_insn (gen_andsi3 (tmpcount, destreg, GEN_INT (1)));
emit_cmp_and_jump_insns (tmpcount, GEN_INT (0), EQ, 0,
SImode, 1, 0, label);
emit_insn (gen_strmovqi (destreg, srcreg));
emit_insn (gen_addsi3 (countreg, countreg, constm1_rtx));
emit_label (label);
LABEL_NUSES (label) = 1;
}
if (align <= 2)
{
rtx label = gen_label_rtx ();
rtx tmpcount = gen_reg_rtx (SImode);
emit_insn (gen_andsi3 (tmpcount, destreg, GEN_INT (2)));
emit_cmp_and_jump_insns (tmpcount, GEN_INT (0), EQ, 0,
SImode, 1, 0, label);
emit_insn (gen_strmovhi (destreg, srcreg));
emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-2)));
emit_label (label);
LABEL_NUSES (label) = 1;
}
if (align <= 4 && TARGET_PENTIUMPRO && (count < 1 || count >= 260))
{
rtx label = gen_label_rtx ();
rtx tmpcount = gen_reg_rtx (SImode);
emit_insn (gen_andsi3 (tmpcount, destreg, GEN_INT (4)));
emit_cmp_and_jump_insns (tmpcount, GEN_INT (0), EQ, 0,
SImode, 1, 0, label);
emit_insn (gen_strmovsi (destreg, srcreg));
emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-4)));
emit_label (label);
LABEL_NUSES (label) = 1;
}
if (!TARGET_SINGLE_STRINGOP)
emit_insn (gen_cld());
emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
destreg, srcreg, countreg2));
if (label)
{
emit_label (label);
LABEL_NUSES (label) = 1;
}
if (align > 2 && count > 0 && (count & 2))
emit_insn (gen_strmovhi (destreg, srcreg));
if (align <= 2 || count < 0)
{
rtx label = gen_label_rtx ();
rtx tmpcount = gen_reg_rtx (SImode);
emit_insn (gen_andsi3 (tmpcount, countreg, GEN_INT (2)));
emit_cmp_and_jump_insns (tmpcount, GEN_INT (0), EQ, 0,
SImode, 1, 0, label);
emit_insn (gen_strmovhi (destreg, srcreg));
emit_label (label);
LABEL_NUSES (label) = 1;
}
if (align > 1 && count > 0 && (count & 1))
emit_insn (gen_strmovsi (destreg, srcreg));
if (align <= 1 || count < 0)
{
rtx label = gen_label_rtx ();
rtx tmpcount = gen_reg_rtx (SImode);
emit_insn (gen_andsi3 (tmpcount, countreg, GEN_INT (1)));
emit_cmp_and_jump_insns (tmpcount, GEN_INT (0), EQ, 0,
SImode, 1, 0, label);
emit_insn (gen_strmovqi (destreg, srcreg));
emit_label (label);
LABEL_NUSES (label) = 1;
}
}
DONE; DONE;
}") }")
;; Most CPUs don't like single string operations ;; Most CPUs don't like single string operations
;; Handle this case here to simplify previous expander. ;; Handle this case here to simplify previous expander.
(define_expand "strmovsi"
[(set (match_dup 2)
(mem:SI (match_operand:SI 1 "register_operand" "")))
(set (mem:SI (match_operand:SI 0 "register_operand" ""))
(match_dup 2))
(parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 4)))
(clobber (reg:CC 17))])
(parallel [(set (match_dup 1) (plus:SI (match_dup 1) (const_int 4)))
(clobber (reg:CC 17))])]
""
"
{
if (TARGET_SINGLE_STRINGOP || optimize_size)
{
emit_insn (gen_strmovsi_1 (operands[0], operands[1], operands[0],
operands[1]));
DONE;
}
else
operands[2] = gen_reg_rtx (SImode);
}")
(define_expand "strmovhi" (define_expand "strmovhi"
[(set (match_dup 2) [(set (match_dup 2)
(mem:HI (match_operand:SI 1 "register_operand" ""))) (mem:HI (match_operand:SI 1 "register_operand" "")))
...@@ -7925,6 +8084,21 @@ ...@@ -7925,6 +8084,21 @@
operands[2] = gen_reg_rtx (QImode); operands[2] = gen_reg_rtx (QImode);
}") }")
(define_insn "strmovsi_1"
[(set (mem:SI (match_operand:SI 2 "register_operand" "0"))
(mem:SI (match_operand:SI 3 "register_operand" "1")))
(set (match_operand:SI 0 "register_operand" "=D")
(plus:SI (match_dup 0)
(const_int 4)))
(set (match_operand:SI 1 "register_operand" "=S")
(plus:SI (match_dup 1)
(const_int 4)))
(use (reg:SI 19))]
"TARGET_SINGLE_STRINGOP || optimize_size"
"movsl"
[(set_attr "type" "str")
(set_attr "memory" "both")])
(define_insn "strmovhi_1" (define_insn "strmovhi_1"
[(set (mem:HI (match_operand:SI 2 "register_operand" "0")) [(set (mem:HI (match_operand:SI 2 "register_operand" "0"))
(mem:HI (match_operand:SI 3 "register_operand" "1"))) (mem:HI (match_operand:SI 3 "register_operand" "1")))
...@@ -7996,15 +8170,26 @@ ...@@ -7996,15 +8170,26 @@
(define_expand "clrstrsi" (define_expand "clrstrsi"
[(use (match_operand:BLK 0 "memory_operand" "")) [(use (match_operand:BLK 0 "memory_operand" ""))
(use (match_operand:SI 1 "const_int_operand" "")) (use (match_operand:SI 1 "nonmemory_operand" ""))
(use (match_operand:SI 2 "const_int_operand" ""))] (use (match_operand:SI 2 "const_int_operand" ""))]
"" ""
" "
{ {
/* See comments in movstr expanders. The code is mostly identical. */
rtx destreg, zeroreg, countreg; rtx destreg, zeroreg, countreg;
int align = 0;
int count = -1;
if (GET_CODE (operands[1]) != CONST_INT) if (GET_CODE (operands[2]) == CONST_INT)
FAIL; align = INTVAL (operands[2]);
/* This simple hack avoids all inlining code and simplifies code bellow. */
if (!TARGET_ALIGN_STRINGOPS)
align = 32;
if (GET_CODE (operands[1]) == CONST_INT)
count = INTVAL (operands[1]);
destreg = copy_to_mode_reg (Pmode, XEXP (operands[0], 0)); destreg = copy_to_mode_reg (Pmode, XEXP (operands[0], 0));
...@@ -8012,14 +8197,19 @@ ...@@ -8012,14 +8197,19 @@
/* When optimizing for size emit simple rep ; movsb instruction for /* When optimizing for size emit simple rep ; movsb instruction for
counts not divisible by 4. */ counts not divisible by 4. */
if ((!optimize || optimize_size) && (INTVAL (operands[1]) & 0x03))
if ((!optimize || optimize_size)
&& (count < 0 || (count & 0x03)))
{ {
countreg = copy_to_mode_reg (SImode, operands[1]); countreg = copy_to_mode_reg (SImode, operands[1]);
zeroreg = copy_to_mode_reg (QImode, const0_rtx); zeroreg = copy_to_mode_reg (QImode, const0_rtx);
emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg, emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
destreg, countreg)); destreg, countreg));
} }
else else if (count >= 0
&& (align >= 8
|| (!TARGET_PENTIUMPRO && align >= 4)
|| optimize_size || count < 64))
{ {
zeroreg = copy_to_mode_reg (SImode, const0_rtx); zeroreg = copy_to_mode_reg (SImode, const0_rtx);
if (INTVAL (operands[1]) & ~0x03) if (INTVAL (operands[1]) & ~0x03)
...@@ -8037,12 +8227,133 @@ ...@@ -8037,12 +8227,133 @@
emit_insn (gen_strsetqi (destreg, emit_insn (gen_strsetqi (destreg,
gen_rtx_SUBREG (QImode, zeroreg, 0))); gen_rtx_SUBREG (QImode, zeroreg, 0)));
} }
else
{
rtx countreg2;
rtx label = NULL;
/* In case we don't know anything about the alignment, default to
library version, since it is usually equally fast and result in
shorter code. */
if (!TARGET_INLINE_ALL_STRINGOPS && align < 4)
FAIL;
if (TARGET_SINGLE_STRINGOP)
emit_insn (gen_cld());
countreg2 = gen_reg_rtx (SImode);
countreg = copy_to_mode_reg (SImode, operands[1]);
zeroreg = copy_to_mode_reg (SImode, const0_rtx);
if (count < 0
&& align < (TARGET_PENTIUMPRO && (count < 0 || count >= 260) ? 8 : 4))
{
label = gen_label_rtx ();
emit_cmp_and_jump_insns (countreg, GEN_INT (3),
LEU, 0, SImode, 1, 0, label);
}
if (align <= 1)
{
rtx label = gen_label_rtx ();
rtx tmpcount = gen_reg_rtx (SImode);
emit_insn (gen_andsi3 (tmpcount, destreg, GEN_INT (1)));
emit_cmp_and_jump_insns (tmpcount, GEN_INT (0), EQ, 0,
SImode, 1, 0, label);
emit_insn (gen_strsetqi (destreg,
gen_rtx_SUBREG (QImode, zeroreg, 0)));
emit_insn (gen_addsi3 (countreg, countreg, constm1_rtx));
emit_label (label);
LABEL_NUSES (label) = 1;
}
if (align <= 2)
{
rtx label = gen_label_rtx ();
rtx tmpcount = gen_reg_rtx (SImode);
emit_insn (gen_andsi3 (tmpcount, destreg, GEN_INT (2)));
emit_cmp_and_jump_insns (tmpcount, GEN_INT (0), EQ, 0,
SImode, 1, 0, label);
emit_insn (gen_strsethi (destreg,
gen_rtx_SUBREG (HImode, zeroreg, 0)));
emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-2)));
emit_label (label);
LABEL_NUSES (label) = 1;
}
if (align <= 4 && TARGET_PENTIUMPRO && (count < 1 || count >= 260))
{
rtx label = gen_label_rtx ();
rtx tmpcount = gen_reg_rtx (SImode);
emit_insn (gen_andsi3 (tmpcount, destreg, GEN_INT (4)));
emit_cmp_and_jump_insns (tmpcount, GEN_INT (0), EQ, 0,
SImode, 1, 0, label);
emit_insn (gen_strsethi (destreg, zeroreg));
emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-4)));
emit_label (label);
LABEL_NUSES (label) = 1;
}
if (!TARGET_SINGLE_STRINGOP)
emit_insn (gen_cld());
emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
destreg, countreg2));
if (label)
{
emit_label (label);
LABEL_NUSES (label) = 1;
}
if (align > 2 && count > 0 && (count & 2))
emit_insn (gen_strsethi (destreg,
gen_rtx_SUBREG (HImode, zeroreg, 0)));
if (align <= 2 || count < 0)
{
rtx label = gen_label_rtx ();
rtx tmpcount = gen_reg_rtx (SImode);
emit_insn (gen_andsi3 (tmpcount, countreg, GEN_INT (2)));
emit_cmp_and_jump_insns (tmpcount, GEN_INT (0), EQ, 0,
SImode, 1, 0, label);
emit_insn (gen_strsethi (destreg,
gen_rtx_SUBREG (HImode, zeroreg, 0)));
emit_label (label);
LABEL_NUSES (label) = 1;
}
if (align > 1 && count > 0 && (count & 1))
emit_insn (gen_strsetqi (destreg,
gen_rtx_SUBREG (QImode, zeroreg, 0)));
if (align <= 1 || count < 0)
{
rtx label = gen_label_rtx ();
rtx tmpcount = gen_reg_rtx (SImode);
emit_insn (gen_andsi3 (tmpcount, countreg, GEN_INT (1)));
emit_cmp_and_jump_insns (tmpcount, GEN_INT (0), EQ, 0,
SImode, 1, 0, label);
emit_insn (gen_strsetqi (destreg,
gen_rtx_SUBREG (QImode, zeroreg, 0)));
emit_label (label);
LABEL_NUSES (label) = 1;
}
}
DONE; DONE;
}") }")
;; Most CPUs don't like single string operations ;; Most CPUs don't like single string operations
;; Handle this case here to simplify previous expander. ;; Handle this case here to simplify previous expander.
(define_expand "strsetsi"
[(set (mem:SI (match_operand:SI 0 "register_operand" ""))
(match_operand:SI 1 "register_operand" ""))
(parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 4)))
(clobber (reg:CC 17))])]
""
"
{
if (TARGET_SINGLE_STRINGOP || optimize_size)
{
emit_insn (gen_strsetsi_1 (operands[0], operands[0], operands[1]));
DONE;
}
}")
(define_expand "strsethi" (define_expand "strsethi"
[(set (mem:HI (match_operand:SI 0 "register_operand" "")) [(set (mem:HI (match_operand:SI 0 "register_operand" ""))
(match_operand:HI 1 "register_operand" "")) (match_operand:HI 1 "register_operand" ""))
...@@ -8073,6 +8384,18 @@ ...@@ -8073,6 +8384,18 @@
} }
}") }")
(define_insn "strsetsi_1"
[(set (mem:SI (match_operand:SI 1 "register_operand" "0"))
(match_operand:SI 2 "register_operand" "a"))
(set (match_operand:SI 0 "register_operand" "=D")
(plus:SI (match_dup 0)
(const_int 4)))
(use (reg:SI 19))]
"TARGET_SINGLE_STRINGOP || optimize_size"
"stosl"
[(set_attr "type" "str")
(set_attr "memory" "store")])
(define_insn "strsethi_1" (define_insn "strsethi_1"
[(set (mem:HI (match_operand:SI 1 "register_operand" "0")) [(set (mem:HI (match_operand:SI 1 "register_operand" "0"))
(match_operand:HI 2 "register_operand" "a")) (match_operand:HI 2 "register_operand" "a"))
...@@ -8252,6 +8575,14 @@ ...@@ -8252,6 +8575,14 @@
{ {
rtx out, addr, eoschar, align, scratch1, scratch2, scratch3; rtx out, addr, eoschar, align, scratch1, scratch2, scratch3;
/* The generic case of strlen expander is long. Avoid it's
expanding unless TARGET_INLINE_ALL_STRINGOPS. */
if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
&& !optimize_size
&& (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
FAIL;
out = operands[0]; out = operands[0];
addr = force_reg (Pmode, XEXP (operands[1], 0)); addr = force_reg (Pmode, XEXP (operands[1], 0));
eoschar = operands[2]; eoschar = operands[2];
...@@ -8271,6 +8602,7 @@ ...@@ -8271,6 +8602,7 @@
if (GET_CODE (align) != CONST_INT || INTVAL (align) < 4) if (GET_CODE (align) != CONST_INT || INTVAL (align) < 4)
emit_move_insn (scratch1, addr); emit_move_insn (scratch1, addr);
emit_move_insn (out, addr); emit_move_insn (out, addr);
ix86_expand_strlensi_unroll_1 (out, align, scratch1); ix86_expand_strlensi_unroll_1 (out, align, scratch1);
......
...@@ -360,7 +360,7 @@ in the following sections. ...@@ -360,7 +360,7 @@ in the following sections.
-mreg-alloc=@var{list} -mregparm=@var{num} -mreg-alloc=@var{list} -mregparm=@var{num}
-malign-jumps=@var{num} -malign-loops=@var{num} -malign-jumps=@var{num} -malign-loops=@var{num}
-malign-functions=@var{num} -mpreferred-stack-boundary=@var{num} -malign-functions=@var{num} -mpreferred-stack-boundary=@var{num}
-mthreads -mthreads -mno-align-stringops -minline-all-stringops
@emph{HPPA Options} @emph{HPPA Options}
-march=@var{architecture type} -march=@var{architecture type}
...@@ -5954,6 +5954,19 @@ on thread-safe exception handling must compile and link all code with the ...@@ -5954,6 +5954,19 @@ on thread-safe exception handling must compile and link all code with the
@samp{-mthreads} option. When compiling, @samp{-mthreads} defines @samp{-mthreads} option. When compiling, @samp{-mthreads} defines
@samp{-D_MT}; when linking, it links in a special thread helper library @samp{-D_MT}; when linking, it links in a special thread helper library
@samp{-lmingwthrd} which cleans up per thread exception handling data. @samp{-lmingwthrd} which cleans up per thread exception handling data.
@item -mno-align-stringops
@kindex -mno-align-stringops
Do not align destination of inlined string operations. This switch reduces
code size and improves performance in case the destination is already aligned,
but gcc don't know about it.
@item -minline-all-stringops
@kindex -minline-all-stringops
By default GCC inlines string operations only when destination is known to be
aligned at least to 4 byte boundary. This enables more inlining, increase code
size, but may improve performance of code that depends on fast memcpy, strlen
and memset for short lengths.
@end table @end table
@node HPPA Options @node HPPA Options
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment