Commit 55994b97 by Wilco Dijkstra Committed by Wilco Dijkstra

Improve Cortex-A53 shift bypass

The aarch_forward_to_shift_is_not_shifted_reg bypass always returns true
on AArch64 shifted instructions.  This causes the bypass to activate in
too many cases, resulting in slower execution on Cortex-A53 like reported
in PR79665.

This patch uses the arm_no_early_alu_shift_dep condition instead which
improves the example in PR79665 by ~7%.  Given it is no longer used,
remove aarch_forward_to_shift_is_not_shifted_reg.  Also remove an
unnecessary REG_P check.

    gcc/
	PR target/79665
	* config/arm/aarch-common.c (arm_no_early_alu_shift_dep):
	Remove redundant if.
	(aarch_forward_to_shift_is_not_shifted_reg): Remove.
	* config/arm/aarch-common-protos.h
	(aarch_forward_to_shift_is_not_shifted_re): Remove.
	* config/arm/cortex-a53.md: Use arm_no_early_alu_shift_dep in bypass.

From-SVN: r249740
parent 926c7865
2017-06-28 Wilco Dijkstra <wdijkstr@arm.com>
PR target/79665
* config/arm/aarch-common.c (arm_no_early_alu_shift_dep):
Remove redundant if.
(aarch_forward_to_shift_is_not_shifted_reg): Remove.
* config/arm/aarch-common-protos.h
(aarch_forward_to_shift_is_not_shifted_re): Remove.
* config/arm/cortex-a53.md: Use arm_no_early_alu_shift_dep in bypass.
2017-06-28 Michael Meissner <meissner@linux.vnet.ibm.com>
PR ipa/81238
......
......@@ -25,7 +25,6 @@
extern int aarch_accumulator_forwarding (rtx_insn *, rtx_insn *);
extern int aarch_crypto_can_dual_issue (rtx_insn *, rtx_insn *);
extern int aarch_forward_to_shift_is_not_shifted_reg (rtx_insn *, rtx_insn *);
extern bool aarch_rev16_p (rtx);
extern bool aarch_rev16_shleft_mask_imm_p (rtx, machine_mode);
extern bool aarch_rev16_shright_mask_imm_p (rtx, machine_mode);
......
......@@ -272,12 +272,7 @@ arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
return 0;
if ((early_op = arm_find_shift_sub_rtx (op)))
{
if (REG_P (early_op))
early_op = op;
return !reg_overlap_mentioned_p (value, early_op);
}
return !reg_overlap_mentioned_p (value, early_op);
return 0;
}
......@@ -508,38 +503,6 @@ aarch_accumulator_forwarding (rtx_insn *producer, rtx_insn *consumer)
return (REGNO (dest) == REGNO (accumulator));
}
/* Return nonzero if the CONSUMER instruction is some sort of
arithmetic or logic + shift operation, and the register we are
writing in PRODUCER is not used in a register shift by register
operation. */
int
aarch_forward_to_shift_is_not_shifted_reg (rtx_insn *producer,
rtx_insn *consumer)
{
rtx value, op;
rtx early_op;
if (!arm_get_set_operands (producer, consumer, &value, &op))
return 0;
if ((early_op = arm_find_shift_sub_rtx (op)))
{
if (REG_P (early_op))
early_op = op;
/* Any other canonicalisation of a shift is a shift-by-constant
so we don't care. */
if (GET_CODE (early_op) == ASHIFT)
return (!REG_P (XEXP (early_op, 0))
|| !REG_P (XEXP (early_op, 1)));
else
return 1;
}
return 0;
}
/* Return non-zero if the consumer (a multiply-accumulate instruction)
has an accumulator dependency on the result of the producer (a
multiplication instruction) and no other dependency on that result. */
......
......@@ -211,7 +211,7 @@
(define_bypass 1 "cortex_a53_alu*"
"cortex_a53_alu_shift*"
"aarch_forward_to_shift_is_not_shifted_reg")
"arm_no_early_alu_shift_dep")
(define_bypass 2 "cortex_a53_alu*"
"cortex_a53_alu_*,cortex_a53_shift*")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment