Commit 3c285765 by Nathan Froyd Committed by Nathan Froyd

re PR rtl-optimization/11001 (global register %edi versus string builtins)

gcc/
	PR 11001
	* config/i386/i386.md (strmov): Check for esi and edi usage.
	* config/i386/i386.c (decide_alg): Check whether we can use a
	rep prefix and adjust algorithm choice accordingly.
	(ix86_expand_strlen): Check for eax, ecx, and edi usage.

gcc/testsuite/
	PR 11001
	* gcc.target/i386/pr11001-strlen-1.c: New testcase.
	* gcc.target/i386/pr11001-strlen-2.c: New testcase.
	* gcc.target/i386/pr11001-strlen-3.c: New testcase.
	* gcc.target/i386/pr11001-memset-1.c: New testcase.
	* gcc.target/i386/pr11001-memset-2.c: New testcase.
	* gcc.target/i386/pr11001-memset-3.c: New testcase.
	* gcc.target/i386/pr11001-memcpy-1.c: New testcase.
	* gcc.target/i386/pr11001-memcpy-2.c: New testcase.
	* gcc.target/i386/pr11001-memcpy-3.c: New testcase.

From-SVN: r129265
parent ece84738
2007-10-12 Nathan Froyd <froydnj@codesourcery.com>
PR 11001
* config/i386/i386.md (strmov): Check for esi and edi usage.
* config/i386/i386.c (decide_alg): Check whether we can use a
rep prefix and adjust algorithm choice accordingly.
(ix86_expand_strlen): Check for eax, ecx, and edi usage.
2007-10-12 Kaveh R. Ghazi <ghazi@caip.rutgers.edu> 2007-10-12 Kaveh R. Ghazi <ghazi@caip.rutgers.edu>
* config/c4x/c4x.md (mulqi3, mulhi3): Use optab_libfunc. * config/c4x/c4x.md (mulqi3, mulhi3): Use optab_libfunc.
...@@ -15056,21 +15056,32 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset, ...@@ -15056,21 +15056,32 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
int *dynamic_check) int *dynamic_check)
{ {
const struct stringop_algs * algs; const struct stringop_algs * algs;
/* Algorithms using the rep prefix want at least edi and ecx;
additionally, memset wants eax and memcpy wants esi. Don't
consider such algorithms if the user has appropriated those
registers for their own purposes. */
bool rep_prefix_usable = !(global_regs[2] || global_regs[5]
|| (memset ? global_regs[0] : global_regs[4]));
#define ALG_USABLE_P(alg) (rep_prefix_usable \
|| (alg != rep_prefix_1_byte \
&& alg != rep_prefix_4_byte \
&& alg != rep_prefix_8_byte))
*dynamic_check = -1; *dynamic_check = -1;
if (memset) if (memset)
algs = &ix86_cost->memset[TARGET_64BIT != 0]; algs = &ix86_cost->memset[TARGET_64BIT != 0];
else else
algs = &ix86_cost->memcpy[TARGET_64BIT != 0]; algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
if (stringop_alg != no_stringop) if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
return stringop_alg; return stringop_alg;
/* rep; movq or rep; movl is the smallest variant. */ /* rep; movq or rep; movl is the smallest variant. */
else if (optimize_size) else if (optimize_size)
{ {
if (!count || (count & 3)) if (!count || (count & 3))
return rep_prefix_1_byte; return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
else else
return rep_prefix_4_byte; return rep_prefix_usable ? rep_prefix_4_byte : loop;
} }
/* Very tiny blocks are best handled via the loop, REP is expensive to setup. /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
*/ */
...@@ -15082,27 +15093,34 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset, ...@@ -15082,27 +15093,34 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
enum stringop_alg alg = libcall; enum stringop_alg alg = libcall;
for (i = 0; i < NAX_STRINGOP_ALGS; i++) for (i = 0; i < NAX_STRINGOP_ALGS; i++)
{ {
gcc_assert (algs->size[i].max); /* We get here if the algorithms that were not libcall-based
were rep-prefix based and we are unable to use rep prefixes
based on global register usage. Break out of the loop and
use the heuristic below. */
if (algs->size[i].max == 0)
break;
if (algs->size[i].max >= expected_size || algs->size[i].max == -1) if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
{ {
if (algs->size[i].alg != libcall) enum stringop_alg candidate = algs->size[i].alg;
alg = algs->size[i].alg;
if (candidate != libcall && ALG_USABLE_P (candidate))
alg = candidate;
/* Honor TARGET_INLINE_ALL_STRINGOPS by picking /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
last non-libcall inline algorithm. */ last non-libcall inline algorithm. */
if (TARGET_INLINE_ALL_STRINGOPS) if (TARGET_INLINE_ALL_STRINGOPS)
{ {
/* When the current size is best to be copied by a libcall, /* When the current size is best to be copied by a libcall,
but we are still forced to inline, run the heuristic bellow but we are still forced to inline, run the heuristic below
that will pick code for medium sized blocks. */ that will pick code for medium sized blocks. */
if (alg != libcall) if (alg != libcall)
return alg; return alg;
break; break;
} }
else else if (ALG_USABLE_P (candidate))
return algs->size[i].alg; return candidate;
} }
} }
gcc_assert (TARGET_INLINE_ALL_STRINGOPS); gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
} }
/* When asked to inline the call anyway, try to pick meaningful choice. /* When asked to inline the call anyway, try to pick meaningful choice.
We look for maximal size of block that is faster to copy by hand and We look for maximal size of block that is faster to copy by hand and
...@@ -15112,15 +15130,32 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset, ...@@ -15112,15 +15130,32 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
If this turns out to be bad, we might simply specify the preferred If this turns out to be bad, we might simply specify the preferred
choice in ix86_costs. */ choice in ix86_costs. */
if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY) if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
&& algs->unknown_size == libcall) && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
{ {
int max = -1; int max = -1;
enum stringop_alg alg; enum stringop_alg alg;
int i; int i;
bool any_alg_usable_p = true;
for (i = 0; i < NAX_STRINGOP_ALGS; i++) for (i = 0; i < NAX_STRINGOP_ALGS; i++)
if (algs->size[i].alg != libcall && algs->size[i].alg) {
enum stringop_alg candidate = algs->size[i].alg;
any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
if (candidate != libcall && candidate
&& ALG_USABLE_P (candidate))
max = algs->size[i].max; max = algs->size[i].max;
}
/* If there aren't any usable algorithms, then recursing on
smaller sizes isn't going to find anything. Just return the
simple byte-at-a-time copy loop. */
if (!any_alg_usable_p)
{
/* Pick something reasonable. */
if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
*dynamic_check = 128;
return loop_1_byte;
}
if (max == -1) if (max == -1)
max = 4096; max = 4096;
alg = decide_alg (count, max / 2, memset, dynamic_check); alg = decide_alg (count, max / 2, memset, dynamic_check);
...@@ -15130,7 +15165,8 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset, ...@@ -15130,7 +15165,8 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
*dynamic_check = max; *dynamic_check = max;
return alg; return alg;
} }
return algs->unknown_size; return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
#undef ALG_USABLE_P
} }
/* Decide on alignment. We know that the operand is already aligned to ALIGN /* Decide on alignment. We know that the operand is already aligned to ALIGN
...@@ -15984,6 +16020,11 @@ ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align) ...@@ -15984,6 +16020,11 @@ ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
else else
{ {
rtx unspec; rtx unspec;
/* Can't use this if the user has appropriated eax, ecx, or edi. */
if (global_regs[0] || global_regs[2] || global_regs[5])
return false;
scratch2 = gen_reg_rtx (Pmode); scratch2 = gen_reg_rtx (Pmode);
scratch3 = gen_reg_rtx (Pmode); scratch3 = gen_reg_rtx (Pmode);
scratch4 = force_reg (Pmode, constm1_rtx); scratch4 = force_reg (Pmode, constm1_rtx);
......
...@@ -18702,7 +18702,9 @@ ...@@ -18702,7 +18702,9 @@
operands[5] = gen_rtx_PLUS (Pmode, operands[0], adjust); operands[5] = gen_rtx_PLUS (Pmode, operands[0], adjust);
operands[6] = gen_rtx_PLUS (Pmode, operands[2], adjust); operands[6] = gen_rtx_PLUS (Pmode, operands[2], adjust);
if (TARGET_SINGLE_STRINGOP || optimize_size) /* Can't use this if the user has appropriated esi or edi. */
if ((TARGET_SINGLE_STRINGOP || optimize_size)
&& !(global_regs[4] || global_regs[5]))
{ {
emit_insn (gen_strmov_singleop (operands[0], operands[1], emit_insn (gen_strmov_singleop (operands[0], operands[1],
operands[2], operands[3], operands[2], operands[3],
......
2007-10-12 Nathan Froyd <froydnj@codesourcery.com>
PR 11001
* gcc.target/i386/pr11001-strlen-1.c: New testcase.
* gcc.target/i386/pr11001-strlen-2.c: New testcase.
* gcc.target/i386/pr11001-strlen-3.c: New testcase.
* gcc.target/i386/pr11001-memset-1.c: New testcase.
* gcc.target/i386/pr11001-memset-2.c: New testcase.
* gcc.target/i386/pr11001-memset-3.c: New testcase.
* gcc.target/i386/pr11001-memcpy-1.c: New testcase.
* gcc.target/i386/pr11001-memcpy-2.c: New testcase.
* gcc.target/i386/pr11001-memcpy-3.c: New testcase.
2007-10-12 Richard Guenther <rguenther@suse.de> 2007-10-12 Richard Guenther <rguenther@suse.de>
PR middle-end/26198 PR middle-end/26198
/* Ensure that we don't use 'rep movX' in the presence of register globals. */
/* { dg-do compile } */
/* { dg-options "-Os -m32" } */
extern void *memcpy (void *, const void *, __SIZE_TYPE__);
register int regvar asm("%esi");
int foo[10];
int bar[10];
char baz[15];
char quux[15];
void
do_copy ()
{
memcpy (foo, bar, sizeof foo);
memcpy (baz, quux, sizeof baz);
}
/* { dg-final { scan-assembler-not "rep movsl" } } */
/* { dg-final { scan-assembler-not "rep movsb" } } */
/* Ensure that we don't use 'rep movX' in the presence of register globals. */
/* { dg-do compile } */
/* { dg-options "-Os -m32" } */
extern void *memcpy (void *, const void *, __SIZE_TYPE__);
register int regvar asm("%edi");
int foo[10];
int bar[10];
char baz[15];
char quux[15];
void
do_copy ()
{
memcpy (foo, bar, sizeof foo);
memcpy (baz, quux, sizeof baz);
}
/* { dg-final { scan-assembler-not "rep movsl" } } */
/* { dg-final { scan-assembler-not "rep movsb" } } */
/* Ensure that we don't use 'rep movX' in the presence of register globals. */
/* { dg-do compile } */
/* { dg-options "-Os -m32" } */
extern void *memcpy (void *, const void *, __SIZE_TYPE__);
register int regvar asm("%ecx"); /* { dg-warning "call-clobbered register" } */
int foo[10];
int bar[10];
char baz[15];
char quux[15];
void
do_copy ()
{
memcpy (foo, bar, sizeof foo);
memcpy (baz, quux, sizeof baz);
}
/* { dg-final { scan-assembler-not "rep movsl" } } */
/* { dg-final { scan-assembler-not "rep movsb" } } */
/* Ensure that we don't use 'rep stoX' in the presence of register globals. */
/* { dg-do compile } */
/* { dg-options "-Os -m32" } */
extern void *memset (void *, int, __SIZE_TYPE__);
register int regvar asm("%eax"); /* { dg-warning "call-clobbered register" } */
int foo[10];
int bar[10];
char baz[15];
char quux[15];
void
do_copy ()
{
memset (foo, 0, sizeof foo);
memset (baz, 0, sizeof baz);
}
/* { dg-final { scan-assembler-not "rep stosl" } } */
/* { dg-final { scan-assembler-not "rep stosb" } } */
/* Ensure that we don't use 'rep stoX' in the presence of register globals. */
/* { dg-do compile } */
/* { dg-options "-Os -m32" } */
extern void *memset (void *, int, __SIZE_TYPE__);
register int regvar asm("%ecx"); /* { dg-warning "call-clobbered register" } */
int foo[10];
int bar[10];
char baz[15];
char quux[15];
void
do_copy ()
{
memset (foo, 0, sizeof foo);
memset (baz, 0, sizeof baz);
}
/* { dg-final { scan-assembler-not "rep stosl" } } */
/* { dg-final { scan-assembler-not "rep stosb" } } */
/* Ensure that we don't use 'rep stoX' in the presence of register globals. */
/* { dg-do compile } */
/* { dg-options "-Os -m32" } */
extern void *memset (void *, int, __SIZE_TYPE__);
register int regvar asm("%edi");
int foo[10];
int bar[10];
char baz[15];
char quux[15];
void
do_copy ()
{
memset (foo, 0, sizeof foo);
memset (baz, 0, sizeof baz);
}
/* { dg-final { scan-assembler-not "rep stosl" } } */
/* { dg-final { scan-assembler-not "rep stosb" } } */
/* Ensure that we don't use 'repnz scasb' in the presence of register globals. */
/* { dg-do compile } */
/* { dg-options "-O1 -m32" } */
extern __SIZE_TYPE__ strlen (const char *);
extern void *malloc (__SIZE_TYPE__);
register int regvar asm("%edi");
char *
do_copy (char *str)
{
return malloc (strlen (str) + 1);
}
/* { dg-final { scan-assembler-not "repnz scasb" } } */
/* Ensure that we don't use 'repnz scasb' in the presence of register globals. */
/* { dg-do compile } */
/* { dg-options "-O1 -m32" } */
extern __SIZE_TYPE__ strlen (const char *);
extern void *malloc (__SIZE_TYPE__);
register int regvar asm("%eax"); /* { dg-warning "call-clobbered register" } */
char *
do_copy (char *str)
{
return malloc (strlen (str) + 1);
}
/* { dg-final { scan-assembler-not "repnz scasb" } } */
/* Ensure that we don't use 'repnz scasb' in the presence of register globals. */
/* { dg-do compile } */
/* { dg-options "-O1 -m32" } */
extern __SIZE_TYPE__ strlen (const char *);
extern void *malloc (__SIZE_TYPE__);
register int regvar asm("%ecx"); /* { dg-warning "call-clobbered register" } */
char *
do_copy (char *str)
{
return malloc (strlen (str) + 1);
}
/* { dg-final { scan-assembler-not "repnz scasb" } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment