Commit 8c996513 by Jan Hubicka Committed by Jan Hubicka

expr.c (emit_block_move_via_libcall): Export.


	* expr.c (emit_block_move_via_libcall): Export.
	(clear_storage_via_libcall): Rename to ...
	(set_storage_via_libcall): ... this one; handle arbitrary value to set.
	(clear_storage): Update to set_storage_via_libcall.
	* expr.h (emit_block_move_via_libcall): Declare
	* config/i386/i386.h (stringop_alg): New enum.
	(MAX_STRINGOP_ALGS): New constant.
	(stringop_algs): New struct.
	(processor_costs): Add memcpy/memset descriptors.
	(TARGET_REP_MOVEL_OPTIMAL): Remove.
	* config/i386/i386.md (movsi_insv_1_rex64): New pattern.
	(strmovsi, strmovdi, strsetsi, strsetdi): Update; accept all operands
	for memset.
	* config/i386/i386.opt (minline-stringops-dynamically,
	mstringop-strategy): New
	parameters.
	* config/i386/i386-prostos.h (ix86_expand_movmem): Update prototype.
	(ix86_expand-clrmem): Rename to ...
	(ix86_expand_setmem): ... this one; update prototype.
	* config/i386/i386.c (DUMMY_STRINGOP_ALGS): New constant.
	(size_cost, i386_cost, i486_cost, pentium_cost, pentiumpro_cost,
	geode_cost, k6_cost, athlon_cost, k8_cost, pentium4_cost,
	nocona_cost, core2_cost, generic_cost, generic64_cost,
	generic32_cost): Add memcpy/memset descriptors.
	(x86_rep_movl_optimal): Remove.
	(stringop_alg): New static variable.
	(ix86_expand_aligntest): Handle predictions.
	(override_options): Add strgop_alg handling.
	(predict_jump): New function.
	(scale_counter): New function.
	(expand_set_or_movmem_via_loop): New function.
	(expand_movmem_via_rep_mov): New function.
	(expand_setmem_via_rep_stots): New function.
	(emit_strmov): New function.
	(expand_movmem_epilogue): New function.
	(expand_setmem_epilogue_via_loop): New function.
	(expand_setmem_epilogue): New function.
	(expand_movmem_prologue): New function.
	(expand_setmem_prologue): New function.
	(decide_alg): New function.
	(decide_alignment): New function.
	(ix86_exand_movmem): Rewrite.
	(promote_duplicated_reg): New function.
	(ix86_expand_clrmem): Rename to ...
	(ix86_expand_setmem): ... this one. Rewrite.

	* invoke.texi (minline-stringops-dynamically): New command line option.
	(mstringop-strategy): Likewise.

From-SVN: r119252
parent d3d3d898
2006-11-27 Jan Hubicka <jh@suse.cz>
* expr.c (emit_block_move_via_libcall): Export.
(clear_storage_via_libcall): Rename to ...
(set_storage_via_libcall): ... this one; handle arbitrary value to set.
(clear_storage): Update to set_storage_via_libcall.
* expr.h (emit_block_move_via_libcall): Declare
* config/i386/i386.h (stringop_alg): New enum.
(MAX_STRINGOP_ALGS): New constant.
(stringop_algs): New struct.
(processor_costs): Add memcpy/memset descriptors.
(TARGET_REP_MOVEL_OPTIMAL): Remove.
* config/i386/i386.md (movsi_insv_1_rex64): New pattern.
(strmovsi, strmovdi, strsetsi, strsetdi): Update; accept all operands
for memset.
* config/i386/i386.opt (minline-stringops-dynamically,
mstringop-strategy): New
parameters.
* config/i386/i386-prostos.h (ix86_expand_movmem): Update prototype.
(ix86_expand-clrmem): Rename to ...
(ix86_expand_setmem): ... this one; update prototype.
* config/i386/i386.c (DUMMY_STRINGOP_ALGS): New constant.
(size_cost, i386_cost, i486_cost, pentium_cost, pentiumpro_cost,
geode_cost, k6_cost, athlon_cost, k8_cost, pentium4_cost,
nocona_cost, core2_cost, generic_cost, generic64_cost,
generic32_cost): Add memcpy/memset descriptors.
(x86_rep_movl_optimal): Remove.
(stringop_alg): New static variable.
(ix86_expand_aligntest): Handle predictions.
(override_options): Add strgop_alg handling.
(predict_jump): New function.
(scale_counter): New function.
(expand_set_or_movmem_via_loop): New function.
(expand_movmem_via_rep_mov): New function.
(expand_setmem_via_rep_stots): New function.
(emit_strmov): New function.
(expand_movmem_epilogue): New function.
(expand_setmem_epilogue_via_loop): New function.
(expand_setmem_epilogue): New function.
(expand_movmem_prologue): New function.
(expand_setmem_prologue): New function.
(decide_alg): New function.
(decide_alignment): New function.
(ix86_exand_movmem): Rewrite.
(promote_duplicated_reg): New function.
(ix86_expand_clrmem): Rename to ...
(ix86_expand_setmem): ... this one. Rewrite.
* invoke.texi (minline-stringops-dynamically): New command line option.
(mstringop-strategy): Likewise.
2006-11-27 Jan Hubicka <jh@suse.cz>
* cfgexpand.c (construct_exit_block): Don't disturb end of last BB.
* rtl.texi (NOTE_INSN_FUNCTION_END): Remove.
* final.c (final_scan_insn): Don't scan NOTE_INSN_FUNCTION_END.
......
......@@ -50,8 +50,8 @@ extern bool x86_extended_QIreg_mentioned_p (rtx);
extern bool x86_extended_reg_mentioned_p (rtx);
extern enum machine_mode ix86_cc_mode (enum rtx_code, rtx, rtx);
extern int ix86_expand_movmem (rtx, rtx, rtx, rtx);
extern int ix86_expand_clrmem (rtx, rtx, rtx);
extern int ix86_expand_movmem (rtx, rtx, rtx, rtx, rtx, rtx);
extern int ix86_expand_setmem (rtx, rtx, rtx, rtx, rtx, rtx);
extern int ix86_expand_strlen (rtx, rtx, rtx, rtx);
extern bool legitimate_constant_p (rtx);
......
......@@ -19,6 +19,38 @@ along with GCC; see the file COPYING. If not, write to
the Free Software Foundation, 51 Franklin Street, Fifth Floor,
Boston, MA 02110-1301, USA. */
/* Algorithm to expand string function with. */
enum stringop_alg
{
no_stringop,
libcall,
rep_prefix_1_byte,
rep_prefix_4_byte,
rep_prefix_8_byte,
loop_1_byte,
loop,
unrolled_loop
};
#define NAX_STRINGOP_ALGS 4
/* Specify what algorithm to use for stringops on known size.
When size is unknown, the UNKNOWN_SIZE alg is used. When size is
known at compile time or estimated via feedback, the SIZE array
is walked in order until MAX is greater then the estimate (or -1
means infinity). Corresponding ALG is used then.
For example initializer:
{{256, loop}, {-1, rep_prefix_4_byte}}
will use loop for blocks smaller or equal to 256 bytes, rep prefix will
be used otherwise.
*/
struct stringop_algs
{
const enum stringop_alg unknown_size;
const struct stringop_strategy {
const int max;
const enum stringop_alg alg;
} size [NAX_STRINGOP_ALGS];
};
/* The purpose of this file is to define the characteristics of the i386,
independent of assembler syntax or operating system.
......@@ -84,6 +116,9 @@ struct processor_costs {
const int fabs; /* cost of FABS instruction. */
const int fchs; /* cost of FCHS instruction. */
const int fsqrt; /* cost of FSQRT instruction. */
/* Specify what algorithm
to use for stringops on unknown size. */
struct stringop_algs memcpy[2], memset[2];
};
extern const struct processor_costs *ix86_cost;
......@@ -217,7 +252,6 @@ extern int x86_prefetch_sse;
#define TARGET_PREFETCH_SSE (x86_prefetch_sse)
#define TARGET_SHIFT1 (x86_shift1 & TUNEMASK)
#define TARGET_USE_FFREEP (x86_use_ffreep & TUNEMASK)
#define TARGET_REP_MOVL_OPTIMAL (x86_rep_movl_optimal & TUNEMASK)
#define TARGET_INTER_UNIT_MOVES (x86_inter_unit_moves & TUNEMASK)
#define TARGET_FOUR_JUMP_LIMIT (x86_four_jump_limit & TUNEMASK)
#define TARGET_SCHEDULE (x86_schedule & TUNEMASK)
......
......@@ -1805,6 +1805,16 @@
[(set_attr "type" "imov")
(set_attr "mode" "QI")])
(define_insn "*movsi_insv_1_rex64"
[(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q")
(const_int 8)
(const_int 8))
(match_operand:SI 1 "nonmemory_operand" "Qn"))]
"TARGET_64BIT"
"mov{b}\t{%b1, %h0|%h0, %b1}"
[(set_attr "type" "imov")
(set_attr "mode" "QI")])
(define_insn "movdi_insv_1_rex64"
[(set (zero_extract:DI (match_operand 0 "ext_register_operand" "+Q")
(const_int 8)
......@@ -18160,9 +18170,10 @@
(use (match_operand:BLK 1 "memory_operand" ""))
(use (match_operand:SI 2 "nonmemory_operand" ""))
(use (match_operand:SI 3 "const_int_operand" ""))]
"! optimize_size || TARGET_INLINE_ALL_STRINGOPS"
""
{
if (ix86_expand_movmem (operands[0], operands[1], operands[2], operands[3]))
if (ix86_expand_movmem (operands[0], operands[1], operands[2], operands[3],
operands[3], constm1_rtx))
DONE;
else
FAIL;
......@@ -18175,7 +18186,8 @@
(use (match_operand:DI 3 "const_int_operand" ""))]
"TARGET_64BIT"
{
if (ix86_expand_movmem (operands[0], operands[1], operands[2], operands[3]))
if (ix86_expand_movmem (operands[0], operands[1], operands[2], operands[3],
operands[3], constm1_rtx))
DONE;
else
FAIL;
......@@ -18450,11 +18462,9 @@
(use (match_operand 3 "const_int_operand" ""))]
""
{
/* If value to set is not zero, use the library routine. */
if (operands[2] != const0_rtx)
FAIL;
if (ix86_expand_clrmem (operands[0], operands[1], operands[3]))
if (ix86_expand_setmem (operands[0], operands[1],
operands[2], operands[3],
operands[3], constm1_rtx))
DONE;
else
FAIL;
......@@ -18464,14 +18474,14 @@
[(use (match_operand:BLK 0 "memory_operand" ""))
(use (match_operand:DI 1 "nonmemory_operand" ""))
(use (match_operand 2 "const_int_operand" ""))
(use (match_operand 3 "const_int_operand" ""))]
(use (match_operand 3 "const_int_operand" ""))
(use (match_operand 4 "const_int_operand" ""))
(use (match_operand 5 "const_int_operand" ""))]
"TARGET_64BIT"
{
/* If value to set is not zero, use the library routine. */
if (operands[2] != const0_rtx)
FAIL;
if (ix86_expand_clrmem (operands[0], operands[1], operands[3]))
if (ix86_expand_setmem (operands[0], operands[1],
operands[2], operands[3],
operands[3], constm1_rtx))
DONE;
else
FAIL;
......
......@@ -125,6 +125,10 @@ minline-all-stringops
Target Report Mask(INLINE_ALL_STRINGOPS)
Inline all known string operations
minline-stringops-dynamically
Target Report Mask(INLINE_STRINGOPS_DYNAMICALLY)
Inline memset/memcpy string operations, but perform inline version only for small blocks
mintel-syntax
Target Undocumented
;; Deprecated
......@@ -221,6 +225,10 @@ mstack-arg-probe
Target Report Mask(STACK_PROBE)
Enable stack probing
mstringop-strategy=
Target RejectNegative Joined Var(ix86_stringop_string)
Chose strategy to generate stringop using
mtls-dialect=
Target RejectNegative Joined Var(ix86_tls_dialect_string)
Use given thread-local storage dialect
......
......@@ -9721,6 +9721,19 @@ aligned at least to 4 byte boundary. This enables more inlining, increase code
size, but may improve performance of code that depends on fast memcpy, strlen
and memset for short lengths.
@item -minline-stringops-dynamically
@opindex minline-stringops-dynamically
For string operation of unknown size, inline runtime checks so for small
blocks inline code is used, while for large blocks librarly call is used.
@item -mstringop-strategy=@var{alg}
@optindex mstringop-strategy=@var{alg}
Overwrite internal decision heuristic about particular algorithm to inline
string opteration with. The allowed values are @code{rep_byte},
@code{rep_4byte}, @code{rep_8byte} for expanding using i386 @code{rep} prefix
of specified size, @code{loop}, @code{unrolled_loop} for expanding inline loop,
@code{libcall} for always expanding library call.
@item -momit-leaf-frame-pointer
@opindex momit-leaf-frame-pointer
Don't keep the frame pointer in a register for leaf functions. This
......
......@@ -127,7 +127,6 @@ static void move_by_pieces_1 (rtx (*) (rtx, ...), enum machine_mode,
struct move_by_pieces *);
static bool block_move_libcall_safe_for_call_parm (void);
static bool emit_block_move_via_movmem (rtx, rtx, rtx, unsigned);
static rtx emit_block_move_via_libcall (rtx, rtx, rtx, bool);
static tree emit_block_move_libcall_fn (int);
static void emit_block_move_via_loop (rtx, rtx, rtx, unsigned);
static rtx clear_by_pieces_1 (void *, HOST_WIDE_INT, enum machine_mode);
......@@ -135,7 +134,6 @@ static void clear_by_pieces (rtx, unsigned HOST_WIDE_INT, unsigned int);
static void store_by_pieces_1 (struct store_by_pieces *, unsigned int);
static void store_by_pieces_2 (rtx (*) (rtx, ...), enum machine_mode,
struct store_by_pieces *);
static rtx clear_storage_via_libcall (rtx, rtx, bool);
static tree clear_storage_libcall_fn (int);
static rtx compress_float_constant (rtx, rtx);
static rtx get_subtarget (rtx);
......@@ -1336,7 +1334,7 @@ emit_block_move_via_movmem (rtx x, rtx y, rtx size, unsigned int align)
/* A subroutine of emit_block_move. Expand a call to memcpy.
Return the return value from memcpy, 0 otherwise. */
static rtx
rtx
emit_block_move_via_libcall (rtx dst, rtx src, rtx size, bool tailcall)
{
rtx dst_addr, src_addr;
......@@ -2540,8 +2538,8 @@ clear_storage (rtx object, rtx size, enum block_op_methods method)
else if (set_storage_via_setmem (object, size, const0_rtx, align))
;
else
return clear_storage_via_libcall (object, size,
method == BLOCK_OP_TAILCALL);
return set_storage_via_libcall (object, size, const0_rtx,
method == BLOCK_OP_TAILCALL);
return NULL;
}
......@@ -2549,10 +2547,10 @@ clear_storage (rtx object, rtx size, enum block_op_methods method)
/* A subroutine of clear_storage. Expand a call to memset.
Return the return value of memset, 0 otherwise. */
static rtx
clear_storage_via_libcall (rtx object, rtx size, bool tailcall)
rtx
set_storage_via_libcall (rtx object, rtx size, rtx val, bool tailcall)
{
tree call_expr, arg_list, fn, object_tree, size_tree;
tree call_expr, arg_list, fn, object_tree, size_tree, val_tree;
enum machine_mode size_mode;
rtx retval;
......@@ -2572,11 +2570,14 @@ clear_storage_via_libcall (rtx object, rtx size, bool tailcall)
for returning pointers, we could end up generating incorrect code. */
object_tree = make_tree (ptr_type_node, object);
if (GET_CODE (val) != CONST_INT)
val = convert_to_mode (TYPE_MODE (integer_type_node), val, 1);
size_tree = make_tree (sizetype, size);
val_tree = make_tree (integer_type_node, val);
fn = clear_storage_libcall_fn (true);
arg_list = tree_cons (NULL_TREE, size_tree, NULL_TREE);
arg_list = tree_cons (NULL_TREE, integer_zero_node, arg_list);
arg_list = tree_cons (NULL_TREE, val_tree, arg_list);
arg_list = tree_cons (NULL_TREE, object_tree, arg_list);
/* Now we have to build up the CALL_EXPR itself. */
......@@ -2590,7 +2591,7 @@ clear_storage_via_libcall (rtx object, rtx size, bool tailcall)
return retval;
}
/* A subroutine of clear_storage_via_libcall. Create the tree node
/* A subroutine of set_storage_via_libcall. Create the tree node
for the function we use for block clears. The first time FOR_CALL
is true, we call assemble_external. */
......
......@@ -377,6 +377,7 @@ extern void init_block_move_fn (const char *);
extern void init_block_clear_fn (const char *);
extern rtx emit_block_move (rtx, rtx, rtx, enum block_op_methods);
extern rtx emit_block_move_via_libcall (rtx, rtx, rtx, bool);
/* Copy all or part of a value X into registers starting at REGNO.
The number of registers to be filled is NREGS. */
......@@ -423,6 +424,8 @@ extern void use_group_regs (rtx *, rtx);
/* Write zeros through the storage of OBJECT.
If OBJECT has BLKmode, SIZE is its length in bytes. */
extern rtx clear_storage (rtx, rtx, enum block_op_methods);
/* The same, but always output an library call. */
rtx set_storage_via_libcall (rtx, rtx, rtx, bool);
/* Expand a setmem pattern; return true if successful. */
extern bool set_storage_via_setmem (rtx, rtx, rtx, unsigned int);
......
2006-11-27 Jan Hubicka <jh@suse.cz>
* gcc.target/i386/memcpy-1.c: Adjust size.
* testsuite/gcc.dg/visibility-11.c: Likewise.
2006-11-27 Richard Guenther <rguenther@suse.de>
PR middle-end/25620
......@@ -13,7 +13,7 @@
extern void* memcpy (void *, const void *, __SIZE_TYPE__);
#pragma GCC visibility pop
struct a { int a[1024]; };
struct a { int a[4096]; };
extern void *bar (struct a *, struct a *, int);
......
......@@ -9,9 +9,9 @@
/* A and B are aligned, but we used to lose track of it.
Ensure that memcpy is inlined and alignment prologue is missing. */
char a[900];
char b[900];
char a[2048];
char b[2048];
t()
{
__builtin_memcpy (a,b,900);
__builtin_memcpy (a,b,2048);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment