Commit 805903b5 by Jakub Jelinek Committed by Jakub Jelinek

re PR target/38488 (x86_64 generates much larger and slightly slower code for memset)

	* config/i386/i386.c (expand_setmem_via_rep_stos): Add ORIG_VALUE
	argument.  If ORIG_VALUE is const0_rtx and COUNT is constant,
	set MEM_SIZE on DESTMEM.
	(ix86_expand_setmem): Adjust callers.

	PR target/38488
	* expr.h (get_mem_align_offset): New prototype.
	* emit-rtl.c (get_mem_align_offset): New function.
	* config/i386/i386.c (expand_movmem_via_rep_mov): Set MEM_SIZE correctly.
	(expand_constant_movmem_prologue, expand_constant_setmem_prologue):
	New functions.
	(ix86_expand_movmem): Optimize if COUNT_EXP
	is constant, desired_align > align and dst & (desired_align - 1)
	is computable at compile time.
	(ix86_expand_setmem): Likewise.

	* builtins.c (get_memory_rtx): Try to derive MEM_ATTRS from not yet
	resolved SAVE_EXPR or POINTER_PLUS_EXPR.

From-SVN: r142891
parent d797a4ed
2008-12-23 Jakub Jelinek <jakub@redhat.com>
* config/i386/i386.c (expand_setmem_via_rep_stos): Add ORIG_VALUE
argument. If ORIG_VALUE is const0_rtx and COUNT is constant,
set MEM_SIZE on DESTMEM.
(ix86_expand_setmem): Adjust callers.
PR target/38488
* expr.h (get_mem_align_offset): New prototype.
* emit-rtl.c (get_mem_align_offset): New function.
* config/i386/i386.c (expand_movmem_via_rep_mov): Set MEM_SIZE correctly.
(expand_constant_movmem_prologue, expand_constant_setmem_prologue):
New functions.
(ix86_expand_movmem): Optimize if COUNT_EXP
is constant, desired_align > align and dst & (desired_align - 1)
is computable at compile time.
(ix86_expand_setmem): Likewise.
* builtins.c (get_memory_rtx): Try to derive MEM_ATTRS from not yet
resolved SAVE_EXPR or POINTER_PLUS_EXPR.
2008-12-22 Uros Bizjak <ubizjak@gmail.com>
* config/alpha/alpha.h (ASM_OUTPUT_EXTERNAL): New macro.
......@@ -1094,8 +1094,17 @@ expand_builtin_prefetch (tree exp)
static rtx
get_memory_rtx (tree exp, tree len)
{
rtx addr = expand_expr (exp, NULL_RTX, ptr_mode, EXPAND_NORMAL);
rtx mem = gen_rtx_MEM (BLKmode, memory_address (BLKmode, addr));
tree orig_exp = exp;
rtx addr, mem;
HOST_WIDE_INT off;
/* When EXP is not resolved SAVE_EXPR, MEM_ATTRS can be still derived
from its expression, for expr->a.b only <variable>.a.b is recorded. */
if (TREE_CODE (exp) == SAVE_EXPR && !SAVE_EXPR_RESOLVED_P (exp))
exp = TREE_OPERAND (exp, 0);
addr = expand_expr (orig_exp, NULL_RTX, ptr_mode, EXPAND_NORMAL);
mem = gen_rtx_MEM (BLKmode, memory_address (BLKmode, addr));
/* Get an expression we can use to find the attributes to assign to MEM.
If it is an ADDR_EXPR, use the operand. Otherwise, dereference it if
......@@ -1104,7 +1113,13 @@ get_memory_rtx (tree exp, tree len)
&& POINTER_TYPE_P (TREE_TYPE (TREE_OPERAND (exp, 0))))
exp = TREE_OPERAND (exp, 0);
if (TREE_CODE (exp) == ADDR_EXPR)
off = 0;
if (TREE_CODE (exp) == POINTER_PLUS_EXPR
&& TREE_CODE (TREE_OPERAND (exp, 0)) == ADDR_EXPR
&& host_integerp (TREE_OPERAND (exp, 1), 0)
&& (off = tree_low_cst (TREE_OPERAND (exp, 1), 0)) > 0)
exp = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
else if (TREE_CODE (exp) == ADDR_EXPR)
exp = TREE_OPERAND (exp, 0);
else if (POINTER_TYPE_P (TREE_TYPE (exp)))
exp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (exp)), exp);
......@@ -1118,6 +1133,9 @@ get_memory_rtx (tree exp, tree len)
{
set_mem_attributes (mem, exp, 0);
if (off)
mem = adjust_automodify_address_nv (mem, BLKmode, NULL, off);
/* Allow the string and memory builtins to overflow from one
field into another, see http://gcc.gnu.org/PR23561.
Thus avoid COMPONENT_REFs in MEM_EXPR unless we know the whole
......
......@@ -1490,6 +1490,90 @@ mem_expr_equal_p (const_tree expr1, const_tree expr2)
return 0;
}
/* Return OFFSET if XEXP (MEM, 0) - OFFSET is known to be ALIGN
bits aligned for 0 <= OFFSET < ALIGN / BITS_PER_UNIT, or
-1 if not known. */
int
get_mem_align_offset (rtx mem, int align)
{
tree expr;
unsigned HOST_WIDE_INT offset;
/* This function can't use
if (!MEM_EXPR (mem) || !MEM_OFFSET (mem)
|| !CONST_INT_P (MEM_OFFSET (mem))
|| (get_object_alignment (MEM_EXPR (mem), MEM_ALIGN (mem), align)
< align))
return -1;
else
return (- INTVAL (MEM_OFFSET (mem))) & (align / BITS_PER_UNIT - 1);
for two reasons:
- COMPONENT_REFs in MEM_EXPR can have NULL first operand,
for <variable>. get_inner_reference doesn't handle it and
even if it did, the alignment in that case needs to be determined
from DECL_FIELD_CONTEXT's TYPE_ALIGN.
- it would do suboptimal job for COMPONENT_REFs, even if MEM_EXPR
isn't sufficiently aligned, the object it is in might be. */
gcc_assert (MEM_P (mem));
expr = MEM_EXPR (mem);
if (expr == NULL_TREE
|| MEM_OFFSET (mem) == NULL_RTX
|| !CONST_INT_P (MEM_OFFSET (mem)))
return -1;
offset = INTVAL (MEM_OFFSET (mem));
if (DECL_P (expr))
{
if (DECL_ALIGN (expr) < align)
return -1;
}
else if (INDIRECT_REF_P (expr))
{
if (TYPE_ALIGN (TREE_TYPE (expr)) < (unsigned int) align)
return -1;
}
else if (TREE_CODE (expr) == COMPONENT_REF)
{
while (1)
{
tree inner = TREE_OPERAND (expr, 0);
tree field = TREE_OPERAND (expr, 1);
tree byte_offset = component_ref_field_offset (expr);
tree bit_offset = DECL_FIELD_BIT_OFFSET (field);
if (!byte_offset
|| !host_integerp (byte_offset, 1)
|| !host_integerp (bit_offset, 1))
return -1;
offset += tree_low_cst (byte_offset, 1);
offset += tree_low_cst (bit_offset, 1) / BITS_PER_UNIT;
if (inner == NULL_TREE)
{
if (TYPE_ALIGN (DECL_FIELD_CONTEXT (field))
< (unsigned int) align)
return -1;
break;
}
else if (DECL_P (inner))
{
if (DECL_ALIGN (inner) < align)
return -1;
break;
}
else if (TREE_CODE (inner) != COMPONENT_REF)
return -1;
expr = inner;
}
}
else
return -1;
return offset & ((align / BITS_PER_UNIT) - 1);
}
/* Given REF (a MEM) and T, either the type of X or the expression
corresponding to REF, set the memory attributes. OBJECTP is nonzero
if we are making a new object of this type. BITPOS is nonzero if
......
......@@ -695,6 +695,11 @@ extern void set_mem_attributes (rtx, tree, int);
expecting that it'll be added back in later. */
extern void set_mem_attributes_minus_bitpos (rtx, tree, int, HOST_WIDE_INT);
/* Return OFFSET if XEXP (MEM, 0) - OFFSET is known to be ALIGN
bits aligned for 0 <= OFFSET < ALIGN / BITS_PER_UNIT, or
-1 if not known. */
extern int get_mem_align_offset (rtx, int);
/* Assemble the static constant template for function entry trampolines. */
extern rtx assemble_trampoline_template (void);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment