arm.c (adjacent_mem_locations): Reject volatile memory refs.

* arm.c (adjacent_mem_locations): Reject volatile memory refs. Also reject cases where this pattern will cause load delay stalls unless optimizing for size and it will produce a shorter sequence. * arm.md (arith_adjacent_mem): Make better use of ldm addressing variants to avoid pre-adjusting the base when possible. From-SVN: r97381

arm.c (adjacent_mem_locations): Reject volatile memory refs.
* arm.c (adjacent_mem_locations): Reject volatile memory refs. Also reject cases where this pattern will cause load delay stalls unless optimizing for size and it will produce a shorter sequence. * arm.md (arith_adjacent_mem): Make better use of ldm addressing variants to avoid pre-adjusting the base when possible. From-SVN: r97381
15b5c4c1 · Richard Earnshaw · Richard Earnshaw · 7fac69e5 · 15b5c4c1 · 15b5c4c1
Commit 15b5c4c1 authored Apr 01, 2005 by Richard Earnshaw Committed by Richard Earnshaw Apr 01, 2005
Hide whitespace changes
Inline Side-by-side

Showing with 57 additions and 16 deletions

gcc/ChangeLog
+8 -0

gcc/config/arm/arm.c
+15 -0

gcc/config/arm/arm.md
+34 -16

No files found.
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
 2005-04-01  Richard Earnshaw  <richard.earnshaw@arm.com>

+	* arm.c (adjacent_mem_locations): Reject volatile memory refs.
+	Also reject cases where this pattern will cause load delay stalls
+	unless optimizing for size and it will produce a shorter sequence.
+	* arm.md (arith_adjacent_mem): Make better use of ldm addressing
+	variants to avoid pre-adjusting the base when possible.
+
+2005-04-01  Richard Earnshaw  <richard.earnshaw@arm.com>
+
 	* arm.md (minmax_arithsi): Reject all eliminable registers, not just
 	the frame and argument pointers.
 	(strqi_preinc, strqi_predec, loadqi_preinc, loadqi_predec): Likewise.

--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -5139,6 +5139,10 @@ minmax_code (rtx x)
 int
 adjacent_mem_locations (rtx a, rtx b)
 {
+  /* We don't guarantee to preserve the order of these memory refs.  */
+  if (volatile_refs_p (a) || volatile_refs_p (b))
+    return 0;
+
  if ((GET_CODE (XEXP (a, 0)) == REG
       || (GET_CODE (XEXP (a, 0)) == PLUS
 	   && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
@@ -5178,6 +5182,17 @@ adjacent_mem_locations (rtx a, rtx b)
 	return 0;

      val_diff = val1 - val0;
+
+      if (arm_ld_sched)
+	{
+	  /* If the target has load delay slots, then there's no benefit
+	     to using an ldm instruction unless the offset is zero and
+	     we are optimizing for size.  */
+	  return (optimize_size && (REGNO (reg0) == REGNO (reg1))
+		  && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
+		  && (val_diff == 4 || val_diff == -4));
+	}
+
      return ((REGNO (reg0) == REGNO (reg1))
 	      && (val_diff == 4 || val_diff == -4));
    }

--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -9246,7 +9246,8 @@
  {
    rtx ldm[3];
    rtx arith[4];
-    int val1 = 0, val2 = 0;
+    rtx base_reg;
+    HOST_WIDE_INT val1 = 0, val2 = 0;

    if (REGNO (operands[0]) > REGNO (operands[4]))
      {
@@ -9258,12 +9259,21 @@
 	ldm[1] = operands[0];
 	ldm[2] = operands[4];
      }
-    if (GET_CODE (XEXP (operands[2], 0)) != REG)
-      val1 = INTVAL (XEXP (XEXP (operands[2], 0), 1));
-    if (GET_CODE (XEXP (operands[3], 0)) != REG)
+
+    base_reg = XEXP (operands[2], 0);
+
+    if (!REG_P (base_reg))
+      {
+	val1 = INTVAL (XEXP (base_reg, 1));
+	base_reg = XEXP (base_reg, 0);
+      }
+
+    if (!REG_P (XEXP (operands[3], 0)))
      val2 = INTVAL (XEXP (XEXP (operands[3], 0), 1));
+
    arith[0] = operands[0];
    arith[3] = operands[1];
+
    if (val1 < val2)
      {
 	arith[1] = ldm[1];
@@ -9274,21 +9284,30 @@
 	arith[1] = ldm[2];
 	arith[2] = ldm[1];
      }
-   if (val1 && val2)
+
+    ldm[0] = base_reg;
+    if (val1 !=0 && val2 != 0)
      {
-	rtx ops[3];
-	ldm[0] = ops[0] = operands[4];
-	ops[1] = XEXP (XEXP (operands[2], 0), 0);
-	ops[2] = XEXP (XEXP (operands[2], 0), 1);
-	output_add_immediate (ops);
-	if (val1 < val2)
-	  output_asm_insn (\"ldm%?ia\\t%0, {%1, %2}\", ldm);
+	if (val1 == 4 || val2 == 4)
+	  /* Other val must be 8, since we know they are adjacent and neither
+	     is zero.  */
+	  output_asm_insn (\"ldm%?ib\\t%0, {%1, %2}\", ldm);
 	else
-	  output_asm_insn (\"ldm%?da\\t%0, {%1, %2}\", ldm);
+	  {
+	    rtx ops[3];
+
+	    ldm[0] = ops[0] = operands[4];
+	    ops[1] = base_reg;
+	    ops[2] = GEN_INT (val1);
+	    output_add_immediate (ops);
+	    if (val1 < val2)
+	      output_asm_insn (\"ldm%?ia\\t%0, {%1, %2}\", ldm);
+	    else
+	      output_asm_insn (\"ldm%?da\\t%0, {%1, %2}\", ldm);
+	  }
      }
-    else if (val1)
+    else if (val1 != 0)
      {
-	ldm[0] = XEXP (operands[3], 0);
 	if (val1 < val2)
 	  output_asm_insn (\"ldm%?da\\t%0, {%1, %2}\", ldm);
 	else
@@ -9296,7 +9315,6 @@
      }
    else
      {
-	ldm[0] = XEXP (operands[2], 0);
 	if (val1 < val2)
 	  output_asm_insn (\"ldm%?ia\\t%0, {%1, %2}\", ldm);
 	else