Commit a4f3fa71 by Wilco Dijkstra Committed by Wilco Dijkstra

[AArch64] Add support for fused compare and branch

Add support for fused compare with branch.  Rename the existing
AARCH64_FUSE_CMP_BRANCH to ALU_BRANCH, and AARCH64_FUSE_ALU_BRANCH
to ALU_CBZ to make it clear what is being fused.

    gcc/
	* config/aarch64/aarch64.c
	(thunderxt88_tunings): Use AARCH64_FUSE_ALU_BRANCH.
	(thunderx_tunings): Likewise.
	(tsv110_tunings): Use AARCH64_FUSE_ALU_BRANCH and AARCH64_FUSE_ALU_CBZ.
	(thunderx2t99_tunings): Likewise.
	(aarch_macro_fusion_pair_p): Add support for AARCH64_FUSE_CMP_BRANCH.
	* config/aarch64/aarch64-fusion-pairs.def: Add ALU_CBZ fusion.

From-SVN: r278966
parent 6d8d58a8
2019-12-04 Wilco Dijkstra <wdijkstr@arm.com>
* config/aarch64/aarch64.c
(thunderxt88_tunings): Use AARCH64_FUSE_ALU_BRANCH.
(thunderx_tunings): Likewise.
(tsv110_tunings): Use AARCH64_FUSE_ALU_BRANCH and AARCH64_FUSE_ALU_CBZ.
(thunderx2t99_tunings): Likewise.
(aarch_macro_fusion_pair_p): Add support for AARCH64_FUSE_CMP_BRANCH.
* config/aarch64/aarch64-fusion-pairs.def: Add ALU_CBZ fusion.
2019-12-04 Richard Biener <rguenther@suse.de> 2019-12-04 Richard Biener <rguenther@suse.de>
* tree-ssa-sccvn.c (vn_reference_lookup_3): Properly guard * tree-ssa-sccvn.c (vn_reference_lookup_3): Properly guard
...@@ -35,5 +35,6 @@ AARCH64_FUSION_PAIR ("adrp+ldr", ADRP_LDR) ...@@ -35,5 +35,6 @@ AARCH64_FUSION_PAIR ("adrp+ldr", ADRP_LDR)
AARCH64_FUSION_PAIR ("cmp+branch", CMP_BRANCH) AARCH64_FUSION_PAIR ("cmp+branch", CMP_BRANCH)
AARCH64_FUSION_PAIR ("aes+aesmc", AES_AESMC) AARCH64_FUSION_PAIR ("aes+aesmc", AES_AESMC)
AARCH64_FUSION_PAIR ("alu+branch", ALU_BRANCH) AARCH64_FUSION_PAIR ("alu+branch", ALU_BRANCH)
AARCH64_FUSION_PAIR ("alu+cbz", ALU_CBZ)
#undef AARCH64_FUSION_PAIR #undef AARCH64_FUSION_PAIR
...@@ -915,7 +915,7 @@ static const struct tune_params thunderxt88_tunings = ...@@ -915,7 +915,7 @@ static const struct tune_params thunderxt88_tunings =
SVE_NOT_IMPLEMENTED, /* sve_width */ SVE_NOT_IMPLEMENTED, /* sve_width */
6, /* memmov_cost */ 6, /* memmov_cost */
2, /* issue_rate */ 2, /* issue_rate */
AARCH64_FUSE_CMP_BRANCH, /* fusible_ops */ AARCH64_FUSE_ALU_BRANCH, /* fusible_ops */
"8", /* function_align. */ "8", /* function_align. */
"8", /* jump_align. */ "8", /* jump_align. */
"8", /* loop_align. */ "8", /* loop_align. */
...@@ -941,7 +941,7 @@ static const struct tune_params thunderx_tunings = ...@@ -941,7 +941,7 @@ static const struct tune_params thunderx_tunings =
SVE_NOT_IMPLEMENTED, /* sve_width */ SVE_NOT_IMPLEMENTED, /* sve_width */
6, /* memmov_cost */ 6, /* memmov_cost */
2, /* issue_rate */ 2, /* issue_rate */
AARCH64_FUSE_CMP_BRANCH, /* fusible_ops */ AARCH64_FUSE_ALU_BRANCH, /* fusible_ops */
"8", /* function_align. */ "8", /* function_align. */
"8", /* jump_align. */ "8", /* jump_align. */
"8", /* loop_align. */ "8", /* loop_align. */
...@@ -968,8 +968,8 @@ static const struct tune_params tsv110_tunings = ...@@ -968,8 +968,8 @@ static const struct tune_params tsv110_tunings =
SVE_NOT_IMPLEMENTED, /* sve_width */ SVE_NOT_IMPLEMENTED, /* sve_width */
4, /* memmov_cost */ 4, /* memmov_cost */
4, /* issue_rate */ 4, /* issue_rate */
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_ALU_BRANCH
| AARCH64_FUSE_ALU_BRANCH), /* fusible_ops */ | AARCH64_FUSE_ALU_CBZ), /* fusible_ops */
"16", /* function_align. */ "16", /* function_align. */
"4", /* jump_align. */ "4", /* jump_align. */
"8", /* loop_align. */ "8", /* loop_align. */
...@@ -1103,8 +1103,8 @@ static const struct tune_params thunderx2t99_tunings = ...@@ -1103,8 +1103,8 @@ static const struct tune_params thunderx2t99_tunings =
SVE_NOT_IMPLEMENTED, /* sve_width */ SVE_NOT_IMPLEMENTED, /* sve_width */
4, /* memmov_cost. */ 4, /* memmov_cost. */
4, /* issue_rate. */ 4, /* issue_rate. */
(AARCH64_FUSE_CMP_BRANCH | AARCH64_FUSE_AES_AESMC (AARCH64_FUSE_ALU_BRANCH | AARCH64_FUSE_AES_AESMC
| AARCH64_FUSE_ALU_BRANCH), /* fusible_ops */ | AARCH64_FUSE_ALU_CBZ), /* fusible_ops */
"16", /* function_align. */ "16", /* function_align. */
"8", /* jump_align. */ "8", /* jump_align. */
"16", /* loop_align. */ "16", /* loop_align. */
...@@ -20396,7 +20396,16 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) ...@@ -20396,7 +20396,16 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
} }
} }
/* Fuse compare (CMP/CMN/TST/BICS) and conditional branch. */
if (aarch64_fusion_enabled_p (AARCH64_FUSE_CMP_BRANCH) if (aarch64_fusion_enabled_p (AARCH64_FUSE_CMP_BRANCH)
&& prev_set && curr_set && any_condjump_p (curr)
&& GET_CODE (SET_SRC (prev_set)) == COMPARE
&& SCALAR_INT_MODE_P (GET_MODE (XEXP (SET_SRC (prev_set), 0)))
&& reg_referenced_p (SET_DEST (prev_set), PATTERN (curr)))
return true;
/* Fuse flag-setting ALU instructions and conditional branch. */
if (aarch64_fusion_enabled_p (AARCH64_FUSE_ALU_BRANCH)
&& any_condjump_p (curr)) && any_condjump_p (curr))
{ {
unsigned int condreg1, condreg2; unsigned int condreg1, condreg2;
...@@ -20420,9 +20429,10 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr) ...@@ -20420,9 +20429,10 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
} }
} }
/* Fuse ALU instructions and CBZ/CBNZ. */
if (prev_set if (prev_set
&& curr_set && curr_set
&& aarch64_fusion_enabled_p (AARCH64_FUSE_ALU_BRANCH) && aarch64_fusion_enabled_p (AARCH64_FUSE_ALU_CBZ)
&& any_condjump_p (curr)) && any_condjump_p (curr))
{ {
/* We're trying to match: /* We're trying to match:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment