Improve Cortex-A53 shift bypass

The aarch_forward_to_shift_is_not_shifted_reg bypass always returns true on AArch64 shifted instructions. This causes the bypass to activate in too many cases, resulting in slower execution on Cortex-A53 like reported in PR79665. This patch uses the arm_no_early_alu_shift_dep condition instead which improves the example in PR79665 by ~7%. Given it is no longer used, remove aarch_forward_to_shift_is_not_shifted_reg. Also remove an unnecessary REG_P check. gcc/ PR target/79665 * config/arm/aarch-common.c (arm_no_early_alu_shift_dep): Remove redundant if. (aarch_forward_to_shift_is_not_shifted_reg): Remove. * config/arm/aarch-common-protos.h (aarch_forward_to_shift_is_not_shifted_re): Remove. * config/arm/cortex-a53.md: Use arm_no_early_alu_shift_dep in bypass. From-SVN: r249740

Improve Cortex-A53 shift bypass
The aarch_forward_to_shift_is_not_shifted_reg bypass always returns true on AArch64 shifted instructions. This causes the bypass to activate in too many cases, resulting in slower execution on Cortex-A53 like reported in PR79665. This patch uses the arm_no_early_alu_shift_dep condition instead which improves the example in PR79665 by ~7%. Given it is no longer used, remove aarch_forward_to_shift_is_not_shifted_reg. Also remove an unnecessary REG_P check. gcc/ PR target/79665 * config/arm/aarch-common.c (arm_no_early_alu_shift_dep): Remove redundant if. (aarch_forward_to_shift_is_not_shifted_reg): Remove. * config/arm/aarch-common-protos.h (aarch_forward_to_shift_is_not_shifted_re): Remove. * config/arm/cortex-a53.md: Use arm_no_early_alu_shift_dep in bypass. From-SVN: r249740
55994b97 · Wilco Dijkstra · Wilco Dijkstra · 926c7865 · 55994b97 · 55994b97
Commit 55994b97 authored Jun 28, 2017 by Wilco Dijkstra Committed by Wilco Dijkstra Jun 28, 2017
Hide whitespace changes
Inline Side-by-side

Showing with 12 additions and 40 deletions

gcc/ChangeLog
+10 -0

gcc/config/arm/aarch-common-protos.h
+0 -1

gcc/config/arm/aarch-common.c
+1 -38

gcc/config/arm/cortex-a53.md
+1 -1

No files found.
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
+2017-06-28  Wilco Dijkstra  <wdijkstr@arm.com>
+	PR target/79665
+	* config/arm/aarch-common.c (arm_no_early_alu_shift_dep):
+	Remove redundant if.
+	(aarch_forward_to_shift_is_not_shifted_reg): Remove.
+	* config/arm/aarch-common-protos.h
+	(aarch_forward_to_shift_is_not_shifted_re): Remove.
+	* config/arm/cortex-a53.md: Use arm_no_early_alu_shift_dep in bypass.
 2017-06-28  Michael Meissner  <meissner@linux.vnet.ibm.com>
 	PR ipa/81238

--- a/gcc/config/arm/aarch-common-protos.h
+++ b/gcc/config/arm/aarch-common-protos.h
@@ -25,7 +25,6 @@
 extern int aarch_accumulator_forwarding (rtx_insn *, rtx_insn *);
 extern int aarch_crypto_can_dual_issue (rtx_insn *, rtx_insn *);
-extern int aarch_forward_to_shift_is_not_shifted_reg (rtx_insn *, rtx_insn *);
 extern bool aarch_rev16_p (rtx);
 extern bool aarch_rev16_shleft_mask_imm_p (rtx, machine_mode);
 extern bool aarch_rev16_shright_mask_imm_p (rtx, machine_mode);

--- a/gcc/config/arm/aarch-common.c
+++ b/gcc/config/arm/aarch-common.c
@@ -272,12 +272,7 @@ arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
    return 0;
  if ((early_op = arm_find_shift_sub_rtx (op)))
-    {
+    return !reg_overlap_mentioned_p (value, early_op);
-      if (REG_P (early_op))
-	early_op = op;
-      return !reg_overlap_mentioned_p (value, early_op);
-    }
  return 0;
 }
@@ -508,38 +503,6 @@ aarch_accumulator_forwarding (rtx_insn *producer, rtx_insn *consumer)
  return (REGNO (dest) == REGNO (accumulator));
 }
-/* Return nonzero if the CONSUMER instruction is some sort of
-   arithmetic or logic + shift operation, and the register we are
-   writing in PRODUCER is not used in a register shift by register
-   operation.  */
-int
-aarch_forward_to_shift_is_not_shifted_reg (rtx_insn *producer,
-					   rtx_insn *consumer)
-{
-  rtx value, op;
-  rtx early_op;
-  if (!arm_get_set_operands (producer, consumer, &value, &op))
-    return 0;
-  if ((early_op = arm_find_shift_sub_rtx (op)))
-    {
-      if (REG_P (early_op))
-	early_op = op;
-      /* Any other canonicalisation of a shift is a shift-by-constant
-	 so we don't care.  */
-      if (GET_CODE (early_op) == ASHIFT)
-	return (!REG_P (XEXP (early_op, 0))
-		|| !REG_P (XEXP (early_op, 1)));
-      else
-	return 1;
-    }
-  return 0;
-}
 /* Return non-zero if the consumer (a multiply-accumulate instruction)
   has an accumulator dependency on the result of the producer (a
   multiplication instruction) and no other dependency on that result.  */

--- a/gcc/config/arm/cortex-a53.md
+++ b/gcc/config/arm/cortex-a53.md
@@ -211,7 +211,7 @@
 (define_bypass 1 "cortex_a53_alu*"
 		 "cortex_a53_alu_shift*"
-		 "aarch_forward_to_shift_is_not_shifted_reg")
+		 "arm_no_early_alu_shift_dep")
 (define_bypass 2 "cortex_a53_alu*"
 		 "cortex_a53_alu_*,cortex_a53_shift*")