Commit 8148fe65 by Jakub Jelinek Committed by Jakub Jelinek

re PR middle-end/21265 (GCC fails to optimize tail call to memset)

	PR middle-end/21265
	* expr.h (enum block_op_methods): Add BLOCK_OP_TAILCALL.
	(clear_storage): Add argument.
	* expr.c (emit_block_move_via_libcall, clear_storage_via_libcall):
	Add tailcall argument, set CALL_EXPR_TAILCALL of the CALL_EXPR to
	tailcall.
	(emit_block_move): Handle BLOCK_OP_TAILCALL method.
	(clear_storage): Add method argument, handle BLOCK_OP_TAILCALL.
	(store_expr, store_constructor): Adjust callers.
	* builtins.c (expand_builtin_memcpy): Pass BLOCK_OP_TAILCALL
	to emit_block_move if CALL_EXPR_TAILCALL (exp).
	(expand_builtin_memmove): Add ORIG_EXP argument, copy
	CALL_EXPR_TAILCALL from ORIG_EXP to the new CALL_EXPR.
	(expand_builtin_bcopy): Replace ARGLIST and TYPE arguments
	with EXP.  Pass EXP to expand_builtin_memmove.
	(expand_builtin_memset): Add ORIG_EXP argument, pass
	BLOCK_OP_TAILCALL to clear_storage if CALL_EXPR_TAILCALL (orig_exp).
	(expand_builtin_bzero): Replace ARGLIST argument with EXP.
	Pass EXP to expand_builtin_memset.
	(expand_builtin_strcmp): Copy CALL_EXPR_TAILCALL from EXP to
	the new CALL_EXPR.
	(expand_builtin_strncmp): Likewise.
	(expand_builtin_printf): Replace ARGLIST argument with EXP.
	Copy CALL_EXPR_TAILCALL from EXP to the new CALL_EXPR.
	(expand_builtin_fprintf): Likewise.
	(expand_builtin): Adjust calls to
	expand_builtin_{memmove,bcopy,memset,bzero,{,f}printf}.

	* gcc.dg/20050503-1.c: New test.

From-SVN: r99187
parent 125886c7
2005-05-04 Jakub Jelinek <jakub@redhat.com>
PR middle-end/21265
* expr.h (enum block_op_methods): Add BLOCK_OP_TAILCALL.
(clear_storage): Add argument.
* expr.c (emit_block_move_via_libcall, clear_storage_via_libcall):
Add tailcall argument, set CALL_EXPR_TAILCALL of the CALL_EXPR to
tailcall.
(emit_block_move): Handle BLOCK_OP_TAILCALL method.
(clear_storage): Add method argument, handle BLOCK_OP_TAILCALL.
(store_expr, store_constructor): Adjust callers.
* builtins.c (expand_builtin_memcpy): Pass BLOCK_OP_TAILCALL
to emit_block_move if CALL_EXPR_TAILCALL (exp).
(expand_builtin_memmove): Add ORIG_EXP argument, copy
CALL_EXPR_TAILCALL from ORIG_EXP to the new CALL_EXPR.
(expand_builtin_bcopy): Replace ARGLIST and TYPE arguments
with EXP. Pass EXP to expand_builtin_memmove.
(expand_builtin_memset): Add ORIG_EXP argument, pass
BLOCK_OP_TAILCALL to clear_storage if CALL_EXPR_TAILCALL (orig_exp).
(expand_builtin_bzero): Replace ARGLIST argument with EXP.
Pass EXP to expand_builtin_memset.
(expand_builtin_strcmp): Copy CALL_EXPR_TAILCALL from EXP to
the new CALL_EXPR.
(expand_builtin_strncmp): Likewise.
(expand_builtin_printf): Replace ARGLIST argument with EXP.
Copy CALL_EXPR_TAILCALL from EXP to the new CALL_EXPR.
(expand_builtin_fprintf): Likewise.
(expand_builtin): Adjust calls to
expand_builtin_{memmove,bcopy,memset,bzero,{,f}printf}.
* config/i386/i386.c (ix86_expand_vector_set): Fix setting 3rd and 4th
item in V4SF mode.
......
......@@ -126,7 +126,7 @@ static void move_by_pieces_1 (rtx (*) (rtx, ...), enum machine_mode,
struct move_by_pieces *);
static bool block_move_libcall_safe_for_call_parm (void);
static bool emit_block_move_via_movmem (rtx, rtx, rtx, unsigned);
static rtx emit_block_move_via_libcall (rtx, rtx, rtx);
static rtx emit_block_move_via_libcall (rtx, rtx, rtx, bool);
static tree emit_block_move_libcall_fn (int);
static void emit_block_move_via_loop (rtx, rtx, rtx, unsigned);
static rtx clear_by_pieces_1 (void *, HOST_WIDE_INT, enum machine_mode);
......@@ -135,7 +135,7 @@ static void store_by_pieces_1 (struct store_by_pieces *, unsigned int);
static void store_by_pieces_2 (rtx (*) (rtx, ...), enum machine_mode,
struct store_by_pieces *);
static bool clear_storage_via_clrmem (rtx, rtx, unsigned);
static rtx clear_storage_via_libcall (rtx, rtx);
static rtx clear_storage_via_libcall (rtx, rtx, bool);
static tree clear_storage_libcall_fn (int);
static rtx compress_float_constant (rtx, rtx);
static rtx get_subtarget (rtx);
......@@ -1148,6 +1148,7 @@ emit_block_move (rtx x, rtx y, rtx size, enum block_op_methods method)
switch (method)
{
case BLOCK_OP_NORMAL:
case BLOCK_OP_TAILCALL:
may_use_call = true;
break;
......@@ -1196,7 +1197,8 @@ emit_block_move (rtx x, rtx y, rtx size, enum block_op_methods method)
else if (emit_block_move_via_movmem (x, y, size, align))
;
else if (may_use_call)
retval = emit_block_move_via_libcall (x, y, size);
retval = emit_block_move_via_libcall (x, y, size,
method == BLOCK_OP_TAILCALL);
else
emit_block_move_via_loop (x, y, size, align);
......@@ -1325,7 +1327,7 @@ emit_block_move_via_movmem (rtx x, rtx y, rtx size, unsigned int align)
Return the return value from memcpy, 0 otherwise. */
static rtx
emit_block_move_via_libcall (rtx dst, rtx src, rtx size)
emit_block_move_via_libcall (rtx dst, rtx src, rtx size, bool tailcall)
{
rtx dst_addr, src_addr;
tree call_expr, arg_list, fn, src_tree, dst_tree, size_tree;
......@@ -1367,6 +1369,7 @@ emit_block_move_via_libcall (rtx dst, rtx src, rtx size)
call_expr = build1 (ADDR_EXPR, build_pointer_type (TREE_TYPE (fn)), fn);
call_expr = build3 (CALL_EXPR, TREE_TYPE (TREE_TYPE (fn)),
call_expr, arg_list, NULL_TREE);
CALL_EXPR_TAILCALL (call_expr) = tailcall;
retval = expand_expr (call_expr, NULL_RTX, VOIDmode, 0);
......@@ -2427,11 +2430,13 @@ store_by_pieces_2 (rtx (*genfun) (rtx, ...), enum machine_mode mode,
its length in bytes. */
rtx
clear_storage (rtx object, rtx size)
clear_storage (rtx object, rtx size, enum block_op_methods method)
{
enum machine_mode mode = GET_MODE (object);
unsigned int align;
gcc_assert (method == BLOCK_OP_NORMAL || method == BLOCK_OP_TAILCALL);
/* If OBJECT is not BLKmode and SIZE is the same size as its mode,
just move a zero. Otherwise, do this a piece at a time. */
if (mode != BLKmode
......@@ -2468,7 +2473,8 @@ clear_storage (rtx object, rtx size)
else if (clear_storage_via_clrmem (object, size, align))
;
else
return clear_storage_via_libcall (object, size);
return clear_storage_via_libcall (object, size,
method == BLOCK_OP_TAILCALL);
return NULL;
}
......@@ -2533,7 +2539,7 @@ clear_storage_via_clrmem (rtx object, rtx size, unsigned int align)
Return the return value of memset, 0 otherwise. */
static rtx
clear_storage_via_libcall (rtx object, rtx size)
clear_storage_via_libcall (rtx object, rtx size, bool tailcall)
{
tree call_expr, arg_list, fn, object_tree, size_tree;
enum machine_mode size_mode;
......@@ -2566,6 +2572,7 @@ clear_storage_via_libcall (rtx object, rtx size)
call_expr = build1 (ADDR_EXPR, build_pointer_type (TREE_TYPE (fn)), fn);
call_expr = build3 (CALL_EXPR, TREE_TYPE (TREE_TYPE (fn)),
call_expr, arg_list, NULL_TREE);
CALL_EXPR_TAILCALL (call_expr) = tailcall;
retval = expand_expr (call_expr, NULL_RTX, VOIDmode, 0);
......@@ -4305,7 +4312,7 @@ store_expr (tree exp, rtx target, int call_param_p)
}
if (size != const0_rtx)
clear_storage (target, size);
clear_storage (target, size, BLOCK_OP_NORMAL);
if (label)
emit_label (label);
......@@ -4659,7 +4666,7 @@ store_constructor (tree exp, rtx target, int cleared, HOST_WIDE_INT size)
&& ! CONSTRUCTOR_ELTS (exp))
/* If the constructor is empty, clear the union. */
{
clear_storage (target, expr_size (exp));
clear_storage (target, expr_size (exp), BLOCK_OP_NORMAL);
cleared = 1;
}
......@@ -4687,7 +4694,7 @@ store_constructor (tree exp, rtx target, int cleared, HOST_WIDE_INT size)
|| ((HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (target))
== size)))
{
clear_storage (target, GEN_INT (size));
clear_storage (target, GEN_INT (size), BLOCK_OP_NORMAL);
cleared = 1;
}
......@@ -4887,7 +4894,7 @@ store_constructor (tree exp, rtx target, int cleared, HOST_WIDE_INT size)
if (REG_P (target))
emit_move_insn (target, CONST0_RTX (GET_MODE (target)));
else
clear_storage (target, GEN_INT (size));
clear_storage (target, GEN_INT (size), BLOCK_OP_NORMAL);
cleared = 1;
}
......@@ -5132,7 +5139,7 @@ store_constructor (tree exp, rtx target, int cleared, HOST_WIDE_INT size)
if (REG_P (target))
emit_move_insn (target, CONST0_RTX (GET_MODE (target)));
else
clear_storage (target, GEN_INT (size));
clear_storage (target, GEN_INT (size), BLOCK_OP_NORMAL);
cleared = 1;
}
......
......@@ -367,7 +367,9 @@ enum block_op_methods
{
BLOCK_OP_NORMAL,
BLOCK_OP_NO_LIBCALL,
BLOCK_OP_CALL_PARM
BLOCK_OP_CALL_PARM,
/* Like BLOCK_OP_NORMAL, but the libcall can be tail call optimized. */
BLOCK_OP_TAILCALL
};
extern void init_block_move_fn (const char *);
......@@ -419,7 +421,7 @@ extern void use_group_regs (rtx *, rtx);
/* Write zeros through the storage of OBJECT.
If OBJECT has BLKmode, SIZE is its length in bytes. */
extern rtx clear_storage (rtx, rtx);
extern rtx clear_storage (rtx, rtx, enum block_op_methods);
/* Determine whether the LEN bytes can be moved by using several move
instructions. Return nonzero if a call to move_by_pieces should
......
2005-05-04 Jakub Jelinek <jakub@redhat.com>
PR middle-end/21265
* gcc.dg/20050503-1.c: New test.
PR rtl-optimization/21239
* gcc.dg/i386-sse-11.c: New test.
......
/* PR middle-end/21265
Test whether tail call information is propagated through builtin
expanders. */
/* { dg-do compile } */
/* { dg-options "-O2" } */
typedef __SIZE_TYPE__ size_t;
#define F(n, rettype, name, args, callargs) \
extern rettype name args; \
rettype test##n args \
{ \
return name callargs; \
}
#define F1(n, rettype, name, t1, callargs) \
F (n, rettype, name, (t1 a1), callargs)
#define F2(n, rettype, name, t1, t2, callargs) \
F (n, rettype, name, (t1 a1, t2 a2), callargs)
#define F3(n, rettype, name, t1, t2, t3, callargs) \
F (n, rettype, name, (t1 a1, t2 a2, t3 a3), callargs)
F3 (1a, void *, memcpy, void *, const void *, size_t, (a1, a2, a3))
F3 (1b, void *, memcpy, void *, const void *, size_t, (a1, a2, 10))
F3 (2a, void *, mempcpy, void *, const void *, size_t, (a1, a2, a3))
F3 (2b, void *, mempcpy, void *, const void *, size_t, (a1, a2, 10))
F3 (3a, void *, memmove, void *, const void *, size_t, (a1, a2, a3))
F3 (3b, void *, memmove, void *, const void *, size_t, (a1, "abcdefghijklmno", a3))
F3 (4a, void *, memset, void *, int, size_t, (a1, a2, a3))
F3 (4b, void *, memset, void *, int, size_t, (a1, a2, 156))
F3 (4c, void *, memset, void *, int, size_t, (a1, 0, a3))
F3 (4d, void *, memset, void *, int, size_t, (a1, 0, 10000))
F3 (5a, int, memcmp, const void *, const void *, size_t, (a1, a2, a3))
F3 (5b, int, memcmp, const void *, const void *, size_t, (a1, "abcdefghijkl", a3))
F2 (6, char *, strcpy, char *, const char *, (a1, a2))
F2 (7, char *, stpcpy, char *, const char *, (a1, a2))
F3 (8, char *, strncpy, char *, const char *, size_t, (a1, a2, a3))
F3 (9, char *, stpncpy, char *, const char *, size_t, (a1, a2, a3))
F2 (10, char *, strcat, char *, const char *, (a1, a2))
F3 (11, char *, strncat, char *, const char *, size_t, (a1, a2, a3))
F1 (12a, size_t, strlen, const char *, (a1))
F1 (12b, size_t, strlen, const char *, ("foobar"))
F2 (13a, int, strcmp, const char *, const char *, (a1, a2))
F2 (13b, int, strcmp, const char *, const char *, (a1, "abcdefghijklm"))
F3 (14a, int, strncmp, const char *, const char *, size_t, (a1, a2, a3))
F3 (14b, int, strncmp, const char *, const char *, size_t, (a1, "abcdefghijklm", 10))
F2 (15, char *, strchr, const char *, int, (a1, a2))
/* All the calls above should be tail call optimized on i?86/x86-64. */
/* { dg-final { scan-assembler-not "call" { target i?86-*-linux* x86_64-*-linux* } } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment