Commit 55994b97 by Wilco Dijkstra Committed by Wilco Dijkstra

Improve Cortex-A53 shift bypass

The aarch_forward_to_shift_is_not_shifted_reg bypass always returns true
on AArch64 shifted instructions.  This causes the bypass to activate in
too many cases, resulting in slower execution on Cortex-A53 like reported
in PR79665.

This patch uses the arm_no_early_alu_shift_dep condition instead which
improves the example in PR79665 by ~7%.  Given it is no longer used,
remove aarch_forward_to_shift_is_not_shifted_reg.  Also remove an
unnecessary REG_P check.

    gcc/
	PR target/79665
	* config/arm/aarch-common.c (arm_no_early_alu_shift_dep):
	Remove redundant if.
	(aarch_forward_to_shift_is_not_shifted_reg): Remove.
	* config/arm/aarch-common-protos.h
	(aarch_forward_to_shift_is_not_shifted_re): Remove.
	* config/arm/cortex-a53.md: Use arm_no_early_alu_shift_dep in bypass.

From-SVN: r249740
parent 926c7865
2017-06-28 Wilco Dijkstra <wdijkstr@arm.com>
PR target/79665
* config/arm/aarch-common.c (arm_no_early_alu_shift_dep):
Remove redundant if.
(aarch_forward_to_shift_is_not_shifted_reg): Remove.
* config/arm/aarch-common-protos.h
(aarch_forward_to_shift_is_not_shifted_re): Remove.
* config/arm/cortex-a53.md: Use arm_no_early_alu_shift_dep in bypass.
2017-06-28 Michael Meissner <meissner@linux.vnet.ibm.com> 2017-06-28 Michael Meissner <meissner@linux.vnet.ibm.com>
PR ipa/81238 PR ipa/81238
......
...@@ -25,7 +25,6 @@ ...@@ -25,7 +25,6 @@
extern int aarch_accumulator_forwarding (rtx_insn *, rtx_insn *); extern int aarch_accumulator_forwarding (rtx_insn *, rtx_insn *);
extern int aarch_crypto_can_dual_issue (rtx_insn *, rtx_insn *); extern int aarch_crypto_can_dual_issue (rtx_insn *, rtx_insn *);
extern int aarch_forward_to_shift_is_not_shifted_reg (rtx_insn *, rtx_insn *);
extern bool aarch_rev16_p (rtx); extern bool aarch_rev16_p (rtx);
extern bool aarch_rev16_shleft_mask_imm_p (rtx, machine_mode); extern bool aarch_rev16_shleft_mask_imm_p (rtx, machine_mode);
extern bool aarch_rev16_shright_mask_imm_p (rtx, machine_mode); extern bool aarch_rev16_shright_mask_imm_p (rtx, machine_mode);
......
...@@ -272,12 +272,7 @@ arm_no_early_alu_shift_dep (rtx producer, rtx consumer) ...@@ -272,12 +272,7 @@ arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
return 0; return 0;
if ((early_op = arm_find_shift_sub_rtx (op))) if ((early_op = arm_find_shift_sub_rtx (op)))
{ return !reg_overlap_mentioned_p (value, early_op);
if (REG_P (early_op))
early_op = op;
return !reg_overlap_mentioned_p (value, early_op);
}
return 0; return 0;
} }
...@@ -508,38 +503,6 @@ aarch_accumulator_forwarding (rtx_insn *producer, rtx_insn *consumer) ...@@ -508,38 +503,6 @@ aarch_accumulator_forwarding (rtx_insn *producer, rtx_insn *consumer)
return (REGNO (dest) == REGNO (accumulator)); return (REGNO (dest) == REGNO (accumulator));
} }
/* Return nonzero if the CONSUMER instruction is some sort of
arithmetic or logic + shift operation, and the register we are
writing in PRODUCER is not used in a register shift by register
operation. */
int
aarch_forward_to_shift_is_not_shifted_reg (rtx_insn *producer,
rtx_insn *consumer)
{
rtx value, op;
rtx early_op;
if (!arm_get_set_operands (producer, consumer, &value, &op))
return 0;
if ((early_op = arm_find_shift_sub_rtx (op)))
{
if (REG_P (early_op))
early_op = op;
/* Any other canonicalisation of a shift is a shift-by-constant
so we don't care. */
if (GET_CODE (early_op) == ASHIFT)
return (!REG_P (XEXP (early_op, 0))
|| !REG_P (XEXP (early_op, 1)));
else
return 1;
}
return 0;
}
/* Return non-zero if the consumer (a multiply-accumulate instruction) /* Return non-zero if the consumer (a multiply-accumulate instruction)
has an accumulator dependency on the result of the producer (a has an accumulator dependency on the result of the producer (a
multiplication instruction) and no other dependency on that result. */ multiplication instruction) and no other dependency on that result. */
......
...@@ -211,7 +211,7 @@ ...@@ -211,7 +211,7 @@
(define_bypass 1 "cortex_a53_alu*" (define_bypass 1 "cortex_a53_alu*"
"cortex_a53_alu_shift*" "cortex_a53_alu_shift*"
"aarch_forward_to_shift_is_not_shifted_reg") "arm_no_early_alu_shift_dep")
(define_bypass 2 "cortex_a53_alu*" (define_bypass 2 "cortex_a53_alu*"
"cortex_a53_alu_*,cortex_a53_shift*") "cortex_a53_alu_*,cortex_a53_shift*")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment