Commit 6a569cdd by Kyrylo Tkachov Committed by Kyrylo Tkachov

[AArch64][1/5] Implement TARGET_SCHED_MACRO_FUSION_PAIR_P

	* config/aarch64/aarch64-protos.h (struct tune_params): Add
	fuseable_ops field.
	* config/aarch64/aarch64.c (generic_tunings): Specify fuseable_ops.
	(cortexa53_tunings): Likewise.
	(cortexa57_tunings): Likewise.
	(thunderx_tunings): Likewise.
	(aarch64_macro_fusion_p): New function.
	(aarch_macro_fusion_pair_p): Likewise.
	(TARGET_SCHED_MACRO_FUSION_P): Define.
	(TARGET_SCHED_MACRO_FUSION_PAIR_P): Likewise.
	(AARCH64_FUSE_MOV_MOVK): Likewise.
	(AARCH64_FUSE_NOTHING): Likewise.

From-SVN: r218007
parent a3dc1a45
2014-11-24 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* config/aarch64/aarch64-protos.h (struct tune_params): Add
fuseable_ops field.
* config/aarch64/aarch64.c (generic_tunings): Specify fuseable_ops.
(cortexa53_tunings): Likewise.
(cortexa57_tunings): Likewise.
(thunderx_tunings): Likewise.
(aarch64_macro_fusion_p): New function.
(aarch_macro_fusion_pair_p): Likewise.
(TARGET_SCHED_MACRO_FUSION_P): Define.
(TARGET_SCHED_MACRO_FUSION_PAIR_P): Likewise.
(AARCH64_FUSE_MOV_MOVK): Likewise.
(AARCH64_FUSE_NOTHING): Likewise.
2014-11-24 Martin Liska <mliska@suse.cz> 2014-11-24 Martin Liska <mliska@suse.cz>
PR lto/63968 PR lto/63968
...@@ -170,6 +170,7 @@ struct tune_params ...@@ -170,6 +170,7 @@ struct tune_params
const struct cpu_vector_cost *const vec_costs; const struct cpu_vector_cost *const vec_costs;
const int memmov_cost; const int memmov_cost;
const int issue_rate; const int issue_rate;
const unsigned int fuseable_ops;
}; };
HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned); HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned);
......
...@@ -304,6 +304,9 @@ static const struct cpu_vector_cost cortexa57_vector_cost = ...@@ -304,6 +304,9 @@ static const struct cpu_vector_cost cortexa57_vector_cost =
NAMED_PARAM (cond_not_taken_branch_cost, 1) NAMED_PARAM (cond_not_taken_branch_cost, 1)
}; };
#define AARCH64_FUSE_NOTHING (0)
#define AARCH64_FUSE_MOV_MOVK (1 << 0)
#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007 #if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
__extension__ __extension__
#endif #endif
...@@ -314,7 +317,8 @@ static const struct tune_params generic_tunings = ...@@ -314,7 +317,8 @@ static const struct tune_params generic_tunings =
&generic_regmove_cost, &generic_regmove_cost,
&generic_vector_cost, &generic_vector_cost,
NAMED_PARAM (memmov_cost, 4), NAMED_PARAM (memmov_cost, 4),
NAMED_PARAM (issue_rate, 2) NAMED_PARAM (issue_rate, 2),
NAMED_PARAM (fuseable_ops, AARCH64_FUSE_NOTHING)
}; };
static const struct tune_params cortexa53_tunings = static const struct tune_params cortexa53_tunings =
...@@ -324,7 +328,8 @@ static const struct tune_params cortexa53_tunings = ...@@ -324,7 +328,8 @@ static const struct tune_params cortexa53_tunings =
&cortexa53_regmove_cost, &cortexa53_regmove_cost,
&generic_vector_cost, &generic_vector_cost,
NAMED_PARAM (memmov_cost, 4), NAMED_PARAM (memmov_cost, 4),
NAMED_PARAM (issue_rate, 2) NAMED_PARAM (issue_rate, 2),
NAMED_PARAM (fuseable_ops, AARCH64_FUSE_MOV_MOVK)
}; };
static const struct tune_params cortexa57_tunings = static const struct tune_params cortexa57_tunings =
...@@ -334,7 +339,8 @@ static const struct tune_params cortexa57_tunings = ...@@ -334,7 +339,8 @@ static const struct tune_params cortexa57_tunings =
&cortexa57_regmove_cost, &cortexa57_regmove_cost,
&cortexa57_vector_cost, &cortexa57_vector_cost,
NAMED_PARAM (memmov_cost, 4), NAMED_PARAM (memmov_cost, 4),
NAMED_PARAM (issue_rate, 3) NAMED_PARAM (issue_rate, 3),
NAMED_PARAM (fuseable_ops, AARCH64_FUSE_MOV_MOVK)
}; };
static const struct tune_params thunderx_tunings = static const struct tune_params thunderx_tunings =
...@@ -344,7 +350,8 @@ static const struct tune_params thunderx_tunings = ...@@ -344,7 +350,8 @@ static const struct tune_params thunderx_tunings =
&thunderx_regmove_cost, &thunderx_regmove_cost,
&generic_vector_cost, &generic_vector_cost,
NAMED_PARAM (memmov_cost, 6), NAMED_PARAM (memmov_cost, 6),
NAMED_PARAM (issue_rate, 2) NAMED_PARAM (issue_rate, 2),
NAMED_PARAM (fuseable_ops, AARCH64_FUSE_NOTHING)
}; };
/* A processor implementing AArch64. */ /* A processor implementing AArch64. */
...@@ -10370,6 +10377,59 @@ aarch64_gen_ccmp_next (rtx prev, int cmp_code, rtx op0, rtx op1, int bit_code) ...@@ -10370,6 +10377,59 @@ aarch64_gen_ccmp_next (rtx prev, int cmp_code, rtx op0, rtx op1, int bit_code)
#undef TARGET_GEN_CCMP_NEXT #undef TARGET_GEN_CCMP_NEXT
#define TARGET_GEN_CCMP_NEXT aarch64_gen_ccmp_next #define TARGET_GEN_CCMP_NEXT aarch64_gen_ccmp_next
/* Implement TARGET_SCHED_MACRO_FUSION_P. Return true if target supports
instruction fusion of some sort. */
static bool
aarch64_macro_fusion_p (void)
{
return aarch64_tune_params->fuseable_ops != AARCH64_FUSE_NOTHING;
}
/* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P. Return true if PREV and CURR
should be kept together during scheduling. */
static bool
aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
{
rtx set_dest;
rtx prev_set = single_set (prev);
rtx curr_set = single_set (curr);
/* prev and curr are simple SET insns i.e. no flag setting or branching. */
bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
if (!aarch64_macro_fusion_p ())
return false;
if (simple_sets_p
&& (aarch64_tune_params->fuseable_ops & AARCH64_FUSE_MOV_MOVK))
{
/* We are trying to match:
prev (mov) == (set (reg r0) (const_int imm16))
curr (movk) == (set (zero_extract (reg r0)
(const_int 16)
(const_int 16))
(const_int imm16_1)) */
set_dest = SET_DEST (curr_set);
if (GET_CODE (set_dest) == ZERO_EXTRACT
&& CONST_INT_P (SET_SRC (curr_set))
&& CONST_INT_P (SET_SRC (prev_set))
&& CONST_INT_P (XEXP (set_dest, 2))
&& INTVAL (XEXP (set_dest, 2)) == 16
&& REG_P (XEXP (set_dest, 0))
&& REG_P (SET_DEST (prev_set))
&& REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
{
return true;
}
}
return false;
}
#undef TARGET_ADDRESS_COST #undef TARGET_ADDRESS_COST
#define TARGET_ADDRESS_COST aarch64_address_cost #define TARGET_ADDRESS_COST aarch64_address_cost
...@@ -10629,6 +10689,12 @@ aarch64_gen_ccmp_next (rtx prev, int cmp_code, rtx op0, rtx op1, int bit_code) ...@@ -10629,6 +10689,12 @@ aarch64_gen_ccmp_next (rtx prev, int cmp_code, rtx op0, rtx op1, int bit_code)
#undef TARGET_CAN_USE_DOLOOP_P #undef TARGET_CAN_USE_DOLOOP_P
#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
#undef TARGET_SCHED_MACRO_FUSION_P
#define TARGET_SCHED_MACRO_FUSION_P aarch64_macro_fusion_p
#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
#define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
struct gcc_target targetm = TARGET_INITIALIZER; struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-aarch64.h" #include "gt-aarch64.h"
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment