Commit c272bbda by Matthew Malcomson Committed by Matthew Malcomson

[Patch] [arm] Fix 88714, Arm LDRD/STRD peepholes.

These peepholes match a pair of SImode loads or stores that can be
implemented with a single LDRD or STRD instruction.
When compiling for TARGET_ARM, these peepholes originally created a set
pattern in DI mode to be caught by movdi patterns.

This approach failed to take into account the possibility that the two
matched insns operated on memory with different aliasing information.
The peepholes lost the aliasing information on one of the insns, which
could then cause the scheduler to make an invalid transformation.

This patch changes the peepholes so they generate a PARALLEL expression
of the two relevant loads or stores, which means the aliasing
information of both is kept.  Such a PARALLEL pattern is what the
peepholes currently produce for TARGET_THUMB2.

In order to match these new insn patterns, we add two new define_insn's.  These
define_insn's use the same checks as the peepholes to find valid insns.

Note that the patterns now created by the peepholes for LDRD and STRD
are very similar to those created by the peepholes for LDM and STM.
Many patterns could be matched by the LDM and STM define_insns, which
means we rely on the order the define_insn patterns are defined in the
machine description, with those for LDRD/STRD defined before those for
LDM/STM.

The difference between the peepholes for LDRD/STRD and those for LDM/STM
are mainly that those for LDRD/STRD have some logic to ensure that the
two registers are consecutive and the first one is even.

Bootstrapped and regtested on arm-none-linux-gnu.
Demonstrated fix of bug 88714 by bootstrapping on armv7l.


gcc/ChangeLog:

2019-02-07  Matthew Malcomson  <matthew.malcomson@arm.com>
	    Jakub Jelinek  <jakub@redhat.com>

	PR bootstrap/88714
	* config/arm/arm-protos.h (valid_operands_ldrd_strd,
	arm_count_ldrdstrd_insns): New declarations.
	* config/arm/arm.c (mem_ok_for_ldrd_strd): Remove broken handling of
	MINUS.
	(valid_operands_ldrd_strd): New function.
	(arm_count_ldrdstrd_insns): New function.
	* config/arm/ldrdstrd.md: Change peepholes to generate PARALLEL SImode
	sets instead of single DImode set and define new insns to match this.

gcc/testsuite/ChangeLog:

2019-02-07  Matthew Malcomson  <matthew.malcomson@arm.com>
	    Jakub Jelinek  <jakub@redhat.com>

	PR bootstrap/88714
	* gcc.c-torture/execute/pr88714.c: New test.
	* gcc.dg/rtl/arm/ldrd-peepholes.c: New test.

Co-Authored-By: Jakub Jelinek <jakub@redhat.com>

From-SVN: r268644
parent 5eb9ac1e
2019-02-07 Matthew Malcomson <matthew.malcomson@arm.com>
Jakub Jelinek <jakub@redhat.com>
PR bootstrap/88714
* config/arm/arm-protos.h (valid_operands_ldrd_strd,
arm_count_ldrdstrd_insns): New declarations.
* config/arm/arm.c (mem_ok_for_ldrd_strd): Remove broken handling of
MINUS.
(valid_operands_ldrd_strd): New function.
(arm_count_ldrdstrd_insns): New function.
* config/arm/ldrdstrd.md: Change peepholes to generate PARALLEL SImode
sets instead of single DImode set and define new insns to match this.
2019-02-07 Tamar Christina <tamar.christina@arm.com> 2019-02-07 Tamar Christina <tamar.christina@arm.com>
* config/aarch64/aarch64-builtins.c (aarch64_fcmla_lane_builtin_data): * config/aarch64/aarch64-builtins.c (aarch64_fcmla_lane_builtin_data):
......
...@@ -125,6 +125,7 @@ extern rtx arm_gen_store_multiple (int *, int, rtx, int, rtx, HOST_WIDE_INT *); ...@@ -125,6 +125,7 @@ extern rtx arm_gen_store_multiple (int *, int, rtx, int, rtx, HOST_WIDE_INT *);
extern bool offset_ok_for_ldrd_strd (HOST_WIDE_INT); extern bool offset_ok_for_ldrd_strd (HOST_WIDE_INT);
extern bool operands_ok_ldrd_strd (rtx, rtx, rtx, HOST_WIDE_INT, bool, bool); extern bool operands_ok_ldrd_strd (rtx, rtx, rtx, HOST_WIDE_INT, bool, bool);
extern bool gen_operands_ldrd_strd (rtx *, bool, bool, bool); extern bool gen_operands_ldrd_strd (rtx *, bool, bool, bool);
extern bool valid_operands_ldrd_strd (rtx *, bool);
extern int arm_gen_movmemqi (rtx *); extern int arm_gen_movmemqi (rtx *);
extern bool gen_movmem_ldrd_strd (rtx *); extern bool gen_movmem_ldrd_strd (rtx *);
extern machine_mode arm_select_cc_mode (RTX_CODE, rtx, rtx); extern machine_mode arm_select_cc_mode (RTX_CODE, rtx, rtx);
...@@ -146,6 +147,7 @@ extern const char *output_mov_long_double_arm_from_arm (rtx *); ...@@ -146,6 +147,7 @@ extern const char *output_mov_long_double_arm_from_arm (rtx *);
extern const char *output_move_double (rtx *, bool, int *count); extern const char *output_move_double (rtx *, bool, int *count);
extern const char *output_move_quad (rtx *); extern const char *output_move_quad (rtx *);
extern int arm_count_output_move_double_insns (rtx *); extern int arm_count_output_move_double_insns (rtx *);
extern int arm_count_ldrdstrd_insns (rtx *, bool);
extern const char *output_move_vfp (rtx *operands); extern const char *output_move_vfp (rtx *operands);
extern const char *output_move_neon (rtx *operands); extern const char *output_move_neon (rtx *operands);
extern int arm_attr_length_move_neon (rtx_insn *); extern int arm_attr_length_move_neon (rtx_insn *);
......
...@@ -15574,7 +15574,7 @@ mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align) ...@@ -15574,7 +15574,7 @@ mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
*base = addr; *base = addr;
return true; return true;
} }
else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS) else if (GET_CODE (addr) == PLUS)
{ {
*base = XEXP (addr, 0); *base = XEXP (addr, 0);
*offset = XEXP (addr, 1); *offset = XEXP (addr, 1);
...@@ -15739,7 +15739,7 @@ gen_operands_ldrd_strd (rtx *operands, bool load, ...@@ -15739,7 +15739,7 @@ gen_operands_ldrd_strd (rtx *operands, bool load,
} }
/* Make sure accesses are to consecutive memory locations. */ /* Make sure accesses are to consecutive memory locations. */
if (gap != 4) if (gap != GET_MODE_SIZE (SImode))
return false; return false;
if (!align_ok_ldrd_strd (align[0], offset)) if (!align_ok_ldrd_strd (align[0], offset))
...@@ -15820,6 +15820,55 @@ gen_operands_ldrd_strd (rtx *operands, bool load, ...@@ -15820,6 +15820,55 @@ gen_operands_ldrd_strd (rtx *operands, bool load,
} }
/* Return true if parallel execution of the two word-size accesses provided
could be satisfied with a single LDRD/STRD instruction. Two word-size
accesses are represented by the OPERANDS array, where OPERANDS[0,1] are
register operands and OPERANDS[2,3] are the corresponding memory operands.
*/
bool
valid_operands_ldrd_strd (rtx *operands, bool load)
{
int nops = 2;
HOST_WIDE_INT offsets[2], offset, align[2];
rtx base = NULL_RTX;
rtx cur_base, cur_offset;
int i, gap;
/* Check that the memory references are immediate offsets from the
same base register. Extract the base register, the destination
registers, and the corresponding memory offsets. */
for (i = 0; i < nops; i++)
{
if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
&align[i]))
return false;
if (i == 0)
base = cur_base;
else if (REGNO (base) != REGNO (cur_base))
return false;
offsets[i] = INTVAL (cur_offset);
if (GET_CODE (operands[i]) == SUBREG)
return false;
}
if (offsets[0] > offsets[1])
return false;
gap = offsets[1] - offsets[0];
offset = offsets[0];
/* Make sure accesses are to consecutive memory locations. */
if (gap != GET_MODE_SIZE (SImode))
return false;
if (!align_ok_ldrd_strd (align[0], offset))
return false;
return operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
false, load);
}
/* Print a symbolic form of X to the debug file, F. */ /* Print a symbolic form of X to the debug file, F. */
...@@ -28498,6 +28547,26 @@ arm_count_output_move_double_insns (rtx *operands) ...@@ -28498,6 +28547,26 @@ arm_count_output_move_double_insns (rtx *operands)
return count; return count;
} }
/* Same as above, but operands are a register/memory pair in SImode.
Assumes operands has the base register in position 0 and memory in position
2 (which is the order provided by the arm_{ldrd,strd} patterns). */
int
arm_count_ldrdstrd_insns (rtx *operands, bool load)
{
int count;
rtx ops[2];
int regnum, memnum;
if (load)
regnum = 0, memnum = 1;
else
regnum = 1, memnum = 0;
ops[regnum] = gen_rtx_REG (DImode, REGNO (operands[0]));
ops[memnum] = adjust_address (operands[2], DImode, 0);
output_move_double (ops, false, &count);
return count;
}
int int
vfp3_const_double_for_fract_bits (rtx operand) vfp3_const_double_for_fract_bits (rtx operand)
{ {
......
2019-02-07 Matthew Malcomson <matthew.malcomson@arm.com>
Jakub Jelinek <jakub@redhat.com>
PR bootstrap/88714
* gcc.c-torture/execute/pr88714.c: New test.
* gcc.dg/rtl/arm/ldrd-peepholes.c: New test.
2019-02-07 Tamar Christina <tamar.christina@arm.com> 2019-02-07 Tamar Christina <tamar.christina@arm.com>
PR/target 88850 PR/target 88850
......
/* PR bootstrap/88714 */
struct S { int a, b, c; int *d; };
struct T { int *e, *f, *g; } *t = 0;
int *o = 0;
__attribute__((noipa))
void bar (int *x, int y, int z, int w)
{
if (w == -1)
{
if (x != 0 || y != 0 || z != 0)
__builtin_abort ();
}
else if (w != 0 || x != t->g || y != 0 || z != 12)
__builtin_abort ();
}
__attribute__((noipa)) void
foo (struct S *x, struct S *y, int *z, int w)
{
*o = w;
if (w)
bar (0, 0, 0, -1);
x->d = z;
if (y->d)
y->c = y->c + y->d[0];
bar (t->g, 0, y->c, 0);
}
int
main ()
{
int a[4] = { 8, 9, 10, 11 };
struct S s = { 1, 2, 3, &a[0] };
struct T u = { 0, 0, &a[3] };
o = &a[2];
t = &u;
foo (&s, &s, &a[1], 5);
if (s.c != 12 || s.d != &a[1])
__builtin_abort ();
return 0;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment