Commit 8b898d4c by Kyrylo Tkachov Committed by Kyrylo Tkachov

[ARM] Implement TARGET_SCHED_MACRO_FUSION_PAIR_P

	* config/arm/arm-protos.h (tune_params): Add fuseable_ops field.
	* config/arm/arm.c (arm_macro_fusion_p): New function.
	(arm_macro_fusion_pair_p): Likewise.
	(TARGET_SCHED_MACRO_FUSION_P): Define.
	(TARGET_SCHED_MACRO_FUSION_PAIR_P): Likewise.
	(ARM_FUSE_NOTHING): Likewise.
	(ARM_FUSE_MOVW_MOVT): Likewise.
	(arm_slowmul_tune, arm_fastmul_tune, arm_strongarm_tune,
	arm_xscale_tune, arm_9e_tune, arm_v6t2_tune, arm_cortex_tune,
	arm_cortex_a8_tune, arm_cortex_a7_tune, arm_cortex_a15_tune,
	arm_cortex_a53_tune, arm_cortex_a57_tune, arm_cortex_a9_tune,
	arm_cortex_a12_tune, arm_v7m_tune, arm_v6m_tune, arm_fa726te_tune
	arm_cortex_a5_tune): Specify fuseable_ops value.

From-SVN: r219470
parent 49c8bc0c
2015-01-12 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* config/arm/arm-protos.h (tune_params): Add fuseable_ops field.
* config/arm/arm.c (arm_macro_fusion_p): New function.
(arm_macro_fusion_pair_p): Likewise.
(TARGET_SCHED_MACRO_FUSION_P): Define.
(TARGET_SCHED_MACRO_FUSION_PAIR_P): Likewise.
(ARM_FUSE_NOTHING): Likewise.
(ARM_FUSE_MOVW_MOVT): Likewise.
(arm_slowmul_tune, arm_fastmul_tune, arm_strongarm_tune,
arm_xscale_tune, arm_9e_tune, arm_v6t2_tune, arm_cortex_tune,
arm_cortex_a8_tune, arm_cortex_a7_tune, arm_cortex_a15_tune,
arm_cortex_a53_tune, arm_cortex_a57_tune, arm_cortex_a9_tune,
arm_cortex_a12_tune, arm_v7m_tune, arm_v6m_tune, arm_fa726te_tune
arm_cortex_a5_tune): Specify fuseable_ops value.
2015-01-12 H.J. Lu <hongjiu.lu@intel.com> 2015-01-12 H.J. Lu <hongjiu.lu@intel.com>
PR bootstrap/64561 PR bootstrap/64561
......
...@@ -289,6 +289,8 @@ struct tune_params ...@@ -289,6 +289,8 @@ struct tune_params
bool string_ops_prefer_neon; bool string_ops_prefer_neon;
/* Maximum number of instructions to inline calls to memset. */ /* Maximum number of instructions to inline calls to memset. */
int max_insns_inline_memset; int max_insns_inline_memset;
/* Bitfield encoding the fuseable pairs of instructions. */
unsigned int fuseable_ops;
}; };
extern const struct tune_params *current_tune; extern const struct tune_params *current_tune;
......
...@@ -257,6 +257,7 @@ static void arm_expand_builtin_va_start (tree, rtx); ...@@ -257,6 +257,7 @@ static void arm_expand_builtin_va_start (tree, rtx);
static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *); static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
static void arm_option_override (void); static void arm_option_override (void);
static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode); static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
static bool arm_macro_fusion_p (void);
static bool arm_cannot_copy_insn_p (rtx_insn *); static bool arm_cannot_copy_insn_p (rtx_insn *);
static int arm_issue_rate (void); static int arm_issue_rate (void);
static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED; static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
...@@ -297,6 +298,8 @@ static int arm_cortex_m_branch_cost (bool, bool); ...@@ -297,6 +298,8 @@ static int arm_cortex_m_branch_cost (bool, bool);
static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode, static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
const unsigned char *sel); const unsigned char *sel);
static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
tree vectype, tree vectype,
int misalign ATTRIBUTE_UNUSED); int misalign ATTRIBUTE_UNUSED);
...@@ -404,6 +407,12 @@ static const struct attribute_spec arm_attribute_table[] = ...@@ -404,6 +407,12 @@ static const struct attribute_spec arm_attribute_table[] =
#undef TARGET_COMP_TYPE_ATTRIBUTES #undef TARGET_COMP_TYPE_ATTRIBUTES
#define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
#undef TARGET_SCHED_MACRO_FUSION_P
#define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
#define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
#undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
#define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
...@@ -1647,6 +1656,9 @@ const struct cpu_cost_table v7m_extra_costs = ...@@ -1647,6 +1656,9 @@ const struct cpu_cost_table v7m_extra_costs =
} }
}; };
#define ARM_FUSE_NOTHING (0)
#define ARM_FUSE_MOVW_MOVT (1 << 0)
const struct tune_params arm_slowmul_tune = const struct tune_params arm_slowmul_tune =
{ {
arm_slowmul_rtx_costs, arm_slowmul_rtx_costs,
...@@ -1663,7 +1675,8 @@ const struct tune_params arm_slowmul_tune = ...@@ -1663,7 +1675,8 @@ const struct tune_params arm_slowmul_tune =
false, /* Prefer Neon for 64-bits bitops. */ false, /* Prefer Neon for 64-bits bitops. */
false, false, /* Prefer 32-bit encodings. */ false, false, /* Prefer 32-bit encodings. */
false, /* Prefer Neon for stringops. */ false, /* Prefer Neon for stringops. */
8 /* Maximum insns to inline memset. */ 8, /* Maximum insns to inline memset. */
ARM_FUSE_NOTHING /* Fuseable pairs of instructions. */
}; };
const struct tune_params arm_fastmul_tune = const struct tune_params arm_fastmul_tune =
...@@ -1682,7 +1695,8 @@ const struct tune_params arm_fastmul_tune = ...@@ -1682,7 +1695,8 @@ const struct tune_params arm_fastmul_tune =
false, /* Prefer Neon for 64-bits bitops. */ false, /* Prefer Neon for 64-bits bitops. */
false, false, /* Prefer 32-bit encodings. */ false, false, /* Prefer 32-bit encodings. */
false, /* Prefer Neon for stringops. */ false, /* Prefer Neon for stringops. */
8 /* Maximum insns to inline memset. */ 8, /* Maximum insns to inline memset. */
ARM_FUSE_NOTHING /* Fuseable pairs of instructions. */
}; };
/* StrongARM has early execution of branches, so a sequence that is worth /* StrongARM has early execution of branches, so a sequence that is worth
...@@ -1704,7 +1718,8 @@ const struct tune_params arm_strongarm_tune = ...@@ -1704,7 +1718,8 @@ const struct tune_params arm_strongarm_tune =
false, /* Prefer Neon for 64-bits bitops. */ false, /* Prefer Neon for 64-bits bitops. */
false, false, /* Prefer 32-bit encodings. */ false, false, /* Prefer 32-bit encodings. */
false, /* Prefer Neon for stringops. */ false, /* Prefer Neon for stringops. */
8 /* Maximum insns to inline memset. */ 8, /* Maximum insns to inline memset. */
ARM_FUSE_NOTHING /* Fuseable pairs of instructions. */
}; };
const struct tune_params arm_xscale_tune = const struct tune_params arm_xscale_tune =
...@@ -1723,7 +1738,8 @@ const struct tune_params arm_xscale_tune = ...@@ -1723,7 +1738,8 @@ const struct tune_params arm_xscale_tune =
false, /* Prefer Neon for 64-bits bitops. */ false, /* Prefer Neon for 64-bits bitops. */
false, false, /* Prefer 32-bit encodings. */ false, false, /* Prefer 32-bit encodings. */
false, /* Prefer Neon for stringops. */ false, /* Prefer Neon for stringops. */
8 /* Maximum insns to inline memset. */ 8, /* Maximum insns to inline memset. */
ARM_FUSE_NOTHING /* Fuseable pairs of instructions. */
}; };
const struct tune_params arm_9e_tune = const struct tune_params arm_9e_tune =
...@@ -1742,7 +1758,8 @@ const struct tune_params arm_9e_tune = ...@@ -1742,7 +1758,8 @@ const struct tune_params arm_9e_tune =
false, /* Prefer Neon for 64-bits bitops. */ false, /* Prefer Neon for 64-bits bitops. */
false, false, /* Prefer 32-bit encodings. */ false, false, /* Prefer 32-bit encodings. */
false, /* Prefer Neon for stringops. */ false, /* Prefer Neon for stringops. */
8 /* Maximum insns to inline memset. */ 8, /* Maximum insns to inline memset. */
ARM_FUSE_NOTHING /* Fuseable pairs of instructions. */
}; };
const struct tune_params arm_v6t2_tune = const struct tune_params arm_v6t2_tune =
...@@ -1761,7 +1778,8 @@ const struct tune_params arm_v6t2_tune = ...@@ -1761,7 +1778,8 @@ const struct tune_params arm_v6t2_tune =
false, /* Prefer Neon for 64-bits bitops. */ false, /* Prefer Neon for 64-bits bitops. */
false, false, /* Prefer 32-bit encodings. */ false, false, /* Prefer 32-bit encodings. */
false, /* Prefer Neon for stringops. */ false, /* Prefer Neon for stringops. */
8 /* Maximum insns to inline memset. */ 8, /* Maximum insns to inline memset. */
ARM_FUSE_NOTHING /* Fuseable pairs of instructions. */
}; };
/* Generic Cortex tuning. Use more specific tunings if appropriate. */ /* Generic Cortex tuning. Use more specific tunings if appropriate. */
...@@ -1781,7 +1799,8 @@ const struct tune_params arm_cortex_tune = ...@@ -1781,7 +1799,8 @@ const struct tune_params arm_cortex_tune =
false, /* Prefer Neon for 64-bits bitops. */ false, /* Prefer Neon for 64-bits bitops. */
false, false, /* Prefer 32-bit encodings. */ false, false, /* Prefer 32-bit encodings. */
false, /* Prefer Neon for stringops. */ false, /* Prefer Neon for stringops. */
8 /* Maximum insns to inline memset. */ 8, /* Maximum insns to inline memset. */
ARM_FUSE_NOTHING /* Fuseable pairs of instructions. */
}; };
const struct tune_params arm_cortex_a8_tune = const struct tune_params arm_cortex_a8_tune =
...@@ -1800,7 +1819,8 @@ const struct tune_params arm_cortex_a8_tune = ...@@ -1800,7 +1819,8 @@ const struct tune_params arm_cortex_a8_tune =
false, /* Prefer Neon for 64-bits bitops. */ false, /* Prefer Neon for 64-bits bitops. */
false, false, /* Prefer 32-bit encodings. */ false, false, /* Prefer 32-bit encodings. */
true, /* Prefer Neon for stringops. */ true, /* Prefer Neon for stringops. */
8 /* Maximum insns to inline memset. */ 8, /* Maximum insns to inline memset. */
ARM_FUSE_NOTHING /* Fuseable pairs of instructions. */
}; };
const struct tune_params arm_cortex_a7_tune = const struct tune_params arm_cortex_a7_tune =
...@@ -1819,7 +1839,8 @@ const struct tune_params arm_cortex_a7_tune = ...@@ -1819,7 +1839,8 @@ const struct tune_params arm_cortex_a7_tune =
false, /* Prefer Neon for 64-bits bitops. */ false, /* Prefer Neon for 64-bits bitops. */
false, false, /* Prefer 32-bit encodings. */ false, false, /* Prefer 32-bit encodings. */
true, /* Prefer Neon for stringops. */ true, /* Prefer Neon for stringops. */
8 /* Maximum insns to inline memset. */ 8, /* Maximum insns to inline memset. */
ARM_FUSE_NOTHING /* Fuseable pairs of instructions. */
}; };
const struct tune_params arm_cortex_a15_tune = const struct tune_params arm_cortex_a15_tune =
...@@ -1838,7 +1859,8 @@ const struct tune_params arm_cortex_a15_tune = ...@@ -1838,7 +1859,8 @@ const struct tune_params arm_cortex_a15_tune =
false, /* Prefer Neon for 64-bits bitops. */ false, /* Prefer Neon for 64-bits bitops. */
true, true, /* Prefer 32-bit encodings. */ true, true, /* Prefer 32-bit encodings. */
true, /* Prefer Neon for stringops. */ true, /* Prefer Neon for stringops. */
8 /* Maximum insns to inline memset. */ 8, /* Maximum insns to inline memset. */
ARM_FUSE_NOTHING /* Fuseable pairs of instructions. */
}; };
const struct tune_params arm_cortex_a53_tune = const struct tune_params arm_cortex_a53_tune =
...@@ -1857,7 +1879,8 @@ const struct tune_params arm_cortex_a53_tune = ...@@ -1857,7 +1879,8 @@ const struct tune_params arm_cortex_a53_tune =
false, /* Prefer Neon for 64-bits bitops. */ false, /* Prefer Neon for 64-bits bitops. */
false, false, /* Prefer 32-bit encodings. */ false, false, /* Prefer 32-bit encodings. */
false, /* Prefer Neon for stringops. */ false, /* Prefer Neon for stringops. */
8 /* Maximum insns to inline memset. */ 8, /* Maximum insns to inline memset. */
ARM_FUSE_MOVW_MOVT /* Fuseable pairs of instructions. */
}; };
const struct tune_params arm_cortex_a57_tune = const struct tune_params arm_cortex_a57_tune =
...@@ -1876,7 +1899,8 @@ const struct tune_params arm_cortex_a57_tune = ...@@ -1876,7 +1899,8 @@ const struct tune_params arm_cortex_a57_tune =
false, /* Prefer Neon for 64-bits bitops. */ false, /* Prefer Neon for 64-bits bitops. */
true, true, /* Prefer 32-bit encodings. */ true, true, /* Prefer 32-bit encodings. */
false, /* Prefer Neon for stringops. */ false, /* Prefer Neon for stringops. */
8 /* Maximum insns to inline memset. */ 8, /* Maximum insns to inline memset. */
ARM_FUSE_MOVW_MOVT /* Fuseable pairs of instructions. */
}; };
/* Branches can be dual-issued on Cortex-A5, so conditional execution is /* Branches can be dual-issued on Cortex-A5, so conditional execution is
...@@ -1898,7 +1922,8 @@ const struct tune_params arm_cortex_a5_tune = ...@@ -1898,7 +1922,8 @@ const struct tune_params arm_cortex_a5_tune =
false, /* Prefer Neon for 64-bits bitops. */ false, /* Prefer Neon for 64-bits bitops. */
false, false, /* Prefer 32-bit encodings. */ false, false, /* Prefer 32-bit encodings. */
true, /* Prefer Neon for stringops. */ true, /* Prefer Neon for stringops. */
8 /* Maximum insns to inline memset. */ 8, /* Maximum insns to inline memset. */
ARM_FUSE_NOTHING /* Fuseable pairs of instructions. */
}; };
const struct tune_params arm_cortex_a9_tune = const struct tune_params arm_cortex_a9_tune =
...@@ -1917,7 +1942,8 @@ const struct tune_params arm_cortex_a9_tune = ...@@ -1917,7 +1942,8 @@ const struct tune_params arm_cortex_a9_tune =
false, /* Prefer Neon for 64-bits bitops. */ false, /* Prefer Neon for 64-bits bitops. */
false, false, /* Prefer 32-bit encodings. */ false, false, /* Prefer 32-bit encodings. */
false, /* Prefer Neon for stringops. */ false, /* Prefer Neon for stringops. */
8 /* Maximum insns to inline memset. */ 8, /* Maximum insns to inline memset. */
ARM_FUSE_NOTHING /* Fuseable pairs of instructions. */
}; };
const struct tune_params arm_cortex_a12_tune = const struct tune_params arm_cortex_a12_tune =
...@@ -1936,7 +1962,8 @@ const struct tune_params arm_cortex_a12_tune = ...@@ -1936,7 +1962,8 @@ const struct tune_params arm_cortex_a12_tune =
false, /* Prefer Neon for 64-bits bitops. */ false, /* Prefer Neon for 64-bits bitops. */
false, false, /* Prefer 32-bit encodings. */ false, false, /* Prefer 32-bit encodings. */
true, /* Prefer Neon for stringops. */ true, /* Prefer Neon for stringops. */
8 /* Maximum insns to inline memset. */ 8, /* Maximum insns to inline memset. */
ARM_FUSE_MOVW_MOVT /* Fuseable pairs of instructions. */
}; };
/* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
...@@ -1962,7 +1989,8 @@ const struct tune_params arm_v7m_tune = ...@@ -1962,7 +1989,8 @@ const struct tune_params arm_v7m_tune =
false, /* Prefer Neon for 64-bits bitops. */ false, /* Prefer Neon for 64-bits bitops. */
false, false, /* Prefer 32-bit encodings. */ false, false, /* Prefer 32-bit encodings. */
false, /* Prefer Neon for stringops. */ false, /* Prefer Neon for stringops. */
8 /* Maximum insns to inline memset. */ 8, /* Maximum insns to inline memset. */
ARM_FUSE_NOTHING /* Fuseable pairs of instructions. */
}; };
/* Cortex-M7 tuning. */ /* Cortex-M7 tuning. */
...@@ -1983,7 +2011,8 @@ const struct tune_params arm_cortex_m7_tune = ...@@ -1983,7 +2011,8 @@ const struct tune_params arm_cortex_m7_tune =
false, /* Prefer Neon for 64-bits bitops. */ false, /* Prefer Neon for 64-bits bitops. */
false, false, /* Prefer 32-bit encodings. */ false, false, /* Prefer 32-bit encodings. */
false, /* Prefer Neon for stringops. */ false, /* Prefer Neon for stringops. */
8 /* Maximum insns to inline memset. */ 8, /* Maximum insns to inline memset. */
ARM_FUSE_NOTHING /* Fuseable pairs of instructions. */
}; };
/* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
...@@ -2004,7 +2033,8 @@ const struct tune_params arm_v6m_tune = ...@@ -2004,7 +2033,8 @@ const struct tune_params arm_v6m_tune =
false, /* Prefer Neon for 64-bits bitops. */ false, /* Prefer Neon for 64-bits bitops. */
false, false, /* Prefer 32-bit encodings. */ false, false, /* Prefer 32-bit encodings. */
false, /* Prefer Neon for stringops. */ false, /* Prefer Neon for stringops. */
8 /* Maximum insns to inline memset. */ 8, /* Maximum insns to inline memset. */
ARM_FUSE_NOTHING /* Fuseable pairs of instructions. */
}; };
const struct tune_params arm_fa726te_tune = const struct tune_params arm_fa726te_tune =
...@@ -2023,7 +2053,8 @@ const struct tune_params arm_fa726te_tune = ...@@ -2023,7 +2053,8 @@ const struct tune_params arm_fa726te_tune =
false, /* Prefer Neon for 64-bits bitops. */ false, /* Prefer Neon for 64-bits bitops. */
false, false, /* Prefer 32-bit encodings. */ false, false, /* Prefer 32-bit encodings. */
false, /* Prefer Neon for stringops. */ false, /* Prefer Neon for stringops. */
8 /* Maximum insns to inline memset. */ 8, /* Maximum insns to inline memset. */
ARM_FUSE_NOTHING /* Fuseable pairs of instructions. */
}; };
...@@ -29157,6 +29188,73 @@ arm_gen_setmem (rtx *operands) ...@@ -29157,6 +29188,73 @@ arm_gen_setmem (rtx *operands)
return arm_block_set_aligned_non_vect (dstbase, length, value, align); return arm_block_set_aligned_non_vect (dstbase, length, value, align);
} }
static bool
arm_macro_fusion_p (void)
{
return current_tune->fuseable_ops != ARM_FUSE_NOTHING;
}
static bool
aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
{
rtx set_dest;
rtx prev_set = single_set (prev);
rtx curr_set = single_set (curr);
if (!prev_set
|| !curr_set)
return false;
if (any_condjump_p (curr))
return false;
if (!arm_macro_fusion_p ())
return false;
if (current_tune->fuseable_ops & ARM_FUSE_MOVW_MOVT)
{
/* We are trying to fuse
movw imm / movt imm
instructions as a group that gets scheduled together. */
set_dest = SET_DEST (curr_set);
if (GET_MODE (set_dest) != SImode)
return false;
/* We are trying to match:
prev (movw) == (set (reg r0) (const_int imm16))
curr (movt) == (set (zero_extract (reg r0)
(const_int 16)
(const_int 16))
(const_int imm16_1))
or
prev (movw) == (set (reg r1)
(high (symbol_ref ("SYM"))))
curr (movt) == (set (reg r0)
(lo_sum (reg r1)
(symbol_ref ("SYM")))) */
if (GET_CODE (set_dest) == ZERO_EXTRACT)
{
if (CONST_INT_P (SET_SRC (curr_set))
&& CONST_INT_P (SET_SRC (prev_set))
&& REG_P (XEXP (set_dest, 0))
&& REG_P (SET_DEST (prev_set))
&& REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
return true;
}
else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
&& REG_P (SET_DEST (curr_set))
&& REG_P (SET_DEST (prev_set))
&& GET_CODE (SET_SRC (prev_set)) == HIGH
&& REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
return true;
}
return false;
}
/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */ /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
static unsigned HOST_WIDE_INT static unsigned HOST_WIDE_INT
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment