Commit 52d28530 by Jeff Law Committed by Jeff Law

re PR target/15184 (Direct access to byte inside word not working with -march=pentiumpro)

	PR target/15184
	* combine.c (try_combine): If I0 is a memory load and I3 a store
	to a related address, increase the "goodness" of doing a 4-insn
	combination with I0-I3.
	(make_field_assignment): Handle SUBREGs in the ior+and case.

	PR target/15184
	* gcc.target/i386/pr15184-1.c: New test.
	* gcc.target/i386/pr15184-2.c: New test.

From-SVN: r220249
parent b8aa7083
2015-01-29 Jeff Law <law@redhat.com>
PR target/15184
* combine.c (try_combine): If I0 is a memory load and I3 a store
to a related address, increase the "goodness" of doing a 4-insn
combination with I0-I3.
(make_field_assignment): Handle SUBREGs in the ior+and case.
2015-01-29 Yuri Rumyantsev <ysrumyan@gmail.com>
PR tree-optimization/64746
......
......@@ -2620,6 +2620,7 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
int i;
int ngood = 0;
int nshift = 0;
rtx set0, set3;
if (!flag_expensive_optimizations)
return 0;
......@@ -2643,6 +2644,34 @@ try_combine (rtx_insn *i3, rtx_insn *i2, rtx_insn *i1, rtx_insn *i0,
|| GET_CODE (src) == LSHIFTRT)
nshift++;
}
/* If I0 loads a memory and I3 sets the same memory, then I2 and I3
are likely manipulating its value. Ideally we'll be able to combine
all four insns into a bitfield insertion of some kind.
Note the source in I0 might be inside a sign/zero extension and the
memory modes in I0 and I3 might be different. So extract the address
from the destination of I3 and search for it in the source of I0.
In the event that there's a match but the source/dest do not actually
refer to the same memory, the worst that happens is we try some
combinations that we wouldn't have otherwise. */
if ((set0 = single_set (i0))
/* Ensure the source of SET0 is a MEM, possibly buried inside
an extension. */
&& (GET_CODE (SET_SRC (set0)) == MEM
|| ((GET_CODE (SET_SRC (set0)) == ZERO_EXTEND
|| GET_CODE (SET_SRC (set0)) == SIGN_EXTEND)
&& GET_CODE (XEXP (SET_SRC (set0), 0)) == MEM))
&& (set3 = single_set (i3))
/* Ensure the destination of SET3 is a MEM. */
&& GET_CODE (SET_DEST (set3)) == MEM
/* Would it be better to extract the base address for the MEM
in SET3 and look for that? I don't have cases where it matters
but I could envision such cases. */
&& rtx_referenced_p (XEXP (SET_DEST (set3), 0), SET_SRC (set0)))
ngood += 2;
if (ngood < 2 && nshift < 2)
return 0;
}
......@@ -9272,6 +9301,13 @@ make_field_assignment (rtx x)
to the appropriate position, force it to the required mode, and
make the extraction. Check for the AND in both operands. */
/* One or more SUBREGs might obscure the constant-position field
assignment. The first one we are likely to encounter is an outer
narrowing SUBREG, which we can just strip for the purposes of
identifying the constant-field assignment. */
if (GET_CODE (src) == SUBREG && subreg_lowpart_p (src))
src = SUBREG_REG (src);
if (GET_CODE (src) != IOR && GET_CODE (src) != XOR)
return x;
......@@ -9282,10 +9318,38 @@ make_field_assignment (rtx x)
&& CONST_INT_P (XEXP (rhs, 1))
&& rtx_equal_for_field_assignment_p (XEXP (rhs, 0), dest))
c1 = INTVAL (XEXP (rhs, 1)), other = lhs;
/* The second SUBREG that might get in the way is a paradoxical
SUBREG around the first operand of the AND. We want to
pretend the operand is as wide as the destination here. We
do this by creating a new MEM in the wider mode for the sole
purpose of the call to rtx_equal_for_field_assignment_p. Also
note this trick only works for MEMs. */
else if (GET_CODE (rhs) == AND
&& paradoxical_subreg_p (XEXP (rhs, 0))
&& GET_CODE (SUBREG_REG (XEXP (rhs, 0))) == MEM
&& CONST_INT_P (XEXP (rhs, 1))
&& rtx_equal_for_field_assignment_p (gen_rtx_MEM (GET_MODE (dest),
XEXP (SUBREG_REG (XEXP (rhs, 0)), 0)),
dest))
c1 = INTVAL (XEXP (rhs, 1)), other = lhs;
else if (GET_CODE (lhs) == AND
&& CONST_INT_P (XEXP (lhs, 1))
&& rtx_equal_for_field_assignment_p (XEXP (lhs, 0), dest))
c1 = INTVAL (XEXP (lhs, 1)), other = rhs;
/* The second SUBREG that might get in the way is a paradoxical
SUBREG around the first operand of the AND. We want to
pretend the operand is as wide as the destination here. We
do this by creating a new MEM in the wider mode for the sole
purpose of the call to rtx_equal_for_field_assignment_p. Also
note this trick only works for MEMs. */
else if (GET_CODE (lhs) == AND
&& paradoxical_subreg_p (XEXP (lhs, 0))
&& GET_CODE (SUBREG_REG (XEXP (lhs, 0))) == MEM
&& CONST_INT_P (XEXP (lhs, 1))
&& rtx_equal_for_field_assignment_p (gen_rtx_MEM (GET_MODE (dest),
XEXP (SUBREG_REG (XEXP (lhs, 0)), 0)),
dest))
c1 = INTVAL (XEXP (lhs, 1)), other = rhs;
else
return x;
......
2015-01-29 Jeff Law <law@redhat.com>
PR target/15184
* gcc.target/i386/pr15184-1.c: New test.
* gcc.target/i386/pr15184-2.c: New test.
2015-01-29 Yuri Rumyantsev <ysrumyan@gmail.com>
PR tree-optimization/64746
......
/* PR 15184 first two tests, plus two addition ones. */
/* { dg-do compile } */
/* { dg-options "-O2 -m32 -march=pentiumpro" } */
#define regparm __attribute__((__regparm__(3)))
extern unsigned int x;
extern unsigned short y;
void regparm f0(unsigned char c)
{
x = (x & 0xFFFFFF00) | (unsigned int)c;
}
void regparm f1(unsigned char c)
{
x = (x & 0xFFFF00FF) | ((unsigned int)c << 8);
}
void regparm f2(unsigned char c)
{
x = (x & 0xFF00FFFF) | ((unsigned int)c << 16);
}
void regparm f3(unsigned char c)
{
x = (x & 0x00FFFFFF) | ((unsigned int)c << 24);
}
/* Each function should compile down to a byte move from
the input register into x, possibly at an offset within x. */
/* { dg-final { scan-assembler-times "\tmovb\t%al, x" 4 } } */
/* PR 15184 second two tests
/* { dg-do compile } */
/* { dg-options "-O2 -m32 -march=pentiumpro" } */
#define regparm __attribute__((__regparm__(3)))
extern unsigned int x;
extern unsigned short y;
void regparm g0(unsigned char c)
{
y = (y & 0xFF00) | (unsigned short)c;
}
void regparm g1(unsigned char c)
{
y = (y & 0x00FF) | ((unsigned short)c << 8);
}
/* Each function should compile down to a byte move from
the input register into x, possibly at an offset within x. */
/* { dg-final { scan-assembler-times "\tmovb\t%al, y" 2 } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment