Commit a98824ac by Siddhesh Poyarekar Committed by Siddhesh Poyarekar

[aarch64] Avoid tag collisions for loads falkor

This is a rewrite of the tag collision avoidance patch that Kugan had
written as a machine reorg pass back in February.

The falkor hardware prefetching system uses a combination of the
source, destination and offset to decide which prefetcher unit to
train with the load.  This is great when loads in a loop are
sequential but sub-optimal if there are unrelated loads in a loop that
tag to the same prefetcher unit.

This pass attempts to rename the desination register of such colliding
loads using routines available in regrename.c so that their tags do
not collide.  This shows some performance gains with mcf and xalancbmk
(~5% each) and will be tweaked further.  The pass is placed near the
fag end of the pass list so that subsequent passes don't inadvertantly
end up undoing the renames.

2018-07-02  Siddhesh Poyarekar  <siddhesh@sourceware.org>
            Kugan Vivekanandarajah  <kugan.vivekanandarajah@linaro.org>

	* config/aarch64/falkor-tag-collision-avoidance.c: New file.
	* config.gcc (extra_objs): Build it.
	* config/aarch64/t-aarch64 (falkor-tag-collision-avoidance.o):
	Likewise.
	* config/aarch64/aarch64-passes.def
	(pass_tag_collision_avoidance): New pass.
	* config/aarch64/aarch64.c (qdf24xx_tunings): Add
	AARCH64_EXTRA_TUNE_RENAME_LOAD_REGS to tuning_flags.
	(aarch64_classify_address): Remove static qualifier.
	(aarch64_address_info, aarch64_address_type): Move to...
	* config/aarch64/aarch64-protos.h: ... here.
	(make_pass_tag_collision_avoidance): New function.
	* config/aarch64/aarch64-tuning-flags.def (rename_load_regs):
	New tuning flag.													


Co-Authored-By: Kugan Vivekanandarajah <kuganv@linaro.org>

From-SVN: r264115
parent 41fbb3ec
2018-09-05 Siddhesh Poyarekar <siddhesh@sourceware.org>
Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org>
* config/aarch64/falkor-tag-collision-avoidance.c: New file.
* config.gcc (extra_objs): Build it.
* config/aarch64/t-aarch64 (falkor-tag-collision-avoidance.o):
Likewise.
* config/aarch64/aarch64-passes.def
(pass_tag_collision_avoidance): New pass.
* config/aarch64/aarch64.c (qdf24xx_tunings): Add
AARCH64_EXTRA_TUNE_RENAME_LOAD_REGS to tuning_flags.
(aarch64_classify_address): Remove static qualifier.
(aarch64_address_info, aarch64_address_type): Move to...
* config/aarch64/aarch64-protos.h: ... here.
(make_pass_tag_collision_avoidance): New function.
* config/aarch64/aarch64-tuning-flags.def (rename_load_regs):
New tuning flag.
2018-09-05 Martin Liska <mliska@suse.cz> 2018-09-05 Martin Liska <mliska@suse.cz>
* doc/gcov.texi: Update documentation of humar * doc/gcov.texi: Update documentation of humar
......
...@@ -304,7 +304,7 @@ aarch64*-*-*) ...@@ -304,7 +304,7 @@ aarch64*-*-*)
extra_headers="arm_fp16.h arm_neon.h arm_acle.h" extra_headers="arm_fp16.h arm_neon.h arm_acle.h"
c_target_objs="aarch64-c.o" c_target_objs="aarch64-c.o"
cxx_target_objs="aarch64-c.o" cxx_target_objs="aarch64-c.o"
extra_objs="aarch64-builtins.o aarch-common.o cortex-a57-fma-steering.o aarch64-speculation.o" extra_objs="aarch64-builtins.o aarch-common.o cortex-a57-fma-steering.o aarch64-speculation.o falkor-tag-collision-avoidance.o"
target_gtfiles="\$(srcdir)/config/aarch64/aarch64-builtins.c" target_gtfiles="\$(srcdir)/config/aarch64/aarch64-builtins.c"
target_has_targetm_common=yes target_has_targetm_common=yes
;; ;;
......
...@@ -20,3 +20,4 @@ ...@@ -20,3 +20,4 @@
INSERT_PASS_AFTER (pass_regrename, 1, pass_fma_steering); INSERT_PASS_AFTER (pass_regrename, 1, pass_fma_steering);
INSERT_PASS_BEFORE (pass_reorder_blocks, 1, pass_track_speculation); INSERT_PASS_BEFORE (pass_reorder_blocks, 1, pass_track_speculation);
INSERT_PASS_AFTER (pass_machine_reorg, 1, pass_tag_collision_avoidance);
...@@ -288,6 +288,49 @@ struct tune_params ...@@ -288,6 +288,49 @@ struct tune_params
const struct cpu_prefetch_tune *prefetch; const struct cpu_prefetch_tune *prefetch;
}; };
/* Classifies an address.
ADDRESS_REG_IMM
A simple base register plus immediate offset.
ADDRESS_REG_WB
A base register indexed by immediate offset with writeback.
ADDRESS_REG_REG
A base register indexed by (optionally scaled) register.
ADDRESS_REG_UXTW
A base register indexed by (optionally scaled) zero-extended register.
ADDRESS_REG_SXTW
A base register indexed by (optionally scaled) sign-extended register.
ADDRESS_LO_SUM
A LO_SUM rtx with a base register and "LO12" symbol relocation.
ADDRESS_SYMBOLIC:
A constant symbolic address, in pc-relative literal pool. */
enum aarch64_address_type {
ADDRESS_REG_IMM,
ADDRESS_REG_WB,
ADDRESS_REG_REG,
ADDRESS_REG_UXTW,
ADDRESS_REG_SXTW,
ADDRESS_LO_SUM,
ADDRESS_SYMBOLIC
};
/* Address information. */
struct aarch64_address_info {
enum aarch64_address_type type;
rtx base;
rtx offset;
poly_int64 const_offset;
int shift;
enum aarch64_symbol_type symbol_type;
};
#define AARCH64_FUSION_PAIR(x, name) \ #define AARCH64_FUSION_PAIR(x, name) \
AARCH64_FUSE_##name##_index, AARCH64_FUSE_##name##_index,
/* Supported fusion operations. */ /* Supported fusion operations. */
...@@ -561,6 +604,11 @@ void aarch64_swap_ldrstr_operands (rtx *, bool); ...@@ -561,6 +604,11 @@ void aarch64_swap_ldrstr_operands (rtx *, bool);
extern void aarch64_asm_output_pool_epilogue (FILE *, const char *, extern void aarch64_asm_output_pool_epilogue (FILE *, const char *,
tree, HOST_WIDE_INT); tree, HOST_WIDE_INT);
extern bool aarch64_classify_address (struct aarch64_address_info *, rtx,
machine_mode, bool,
aarch64_addr_query_type = ADDR_QUERY_M);
/* Defined in common/config/aarch64-common.c. */ /* Defined in common/config/aarch64-common.c. */
bool aarch64_handle_option (struct gcc_options *, struct gcc_options *, bool aarch64_handle_option (struct gcc_options *, struct gcc_options *,
const struct cl_decoded_option *, location_t); const struct cl_decoded_option *, location_t);
...@@ -572,6 +620,7 @@ std::string aarch64_get_extension_string_for_isa_flags (unsigned long, ...@@ -572,6 +620,7 @@ std::string aarch64_get_extension_string_for_isa_flags (unsigned long,
rtl_opt_pass *make_pass_fma_steering (gcc::context *); rtl_opt_pass *make_pass_fma_steering (gcc::context *);
rtl_opt_pass *make_pass_track_speculation (gcc::context *); rtl_opt_pass *make_pass_track_speculation (gcc::context *);
rtl_opt_pass *make_pass_tag_collision_avoidance (gcc::context *);
poly_uint64 aarch64_regmode_natural_size (machine_mode); poly_uint64 aarch64_regmode_natural_size (machine_mode);
......
...@@ -44,4 +44,6 @@ AARCH64_EXTRA_TUNING_OPTION ("cheap_shift_extend", CHEAP_SHIFT_EXTEND) ...@@ -44,4 +44,6 @@ AARCH64_EXTRA_TUNING_OPTION ("cheap_shift_extend", CHEAP_SHIFT_EXTEND)
/* Disallow load/store pair instructions on Q-registers. */ /* Disallow load/store pair instructions on Q-registers. */
AARCH64_EXTRA_TUNING_OPTION ("no_ldp_stp_qregs", NO_LDP_STP_QREGS) AARCH64_EXTRA_TUNING_OPTION ("no_ldp_stp_qregs", NO_LDP_STP_QREGS)
AARCH64_EXTRA_TUNING_OPTION ("rename_load_regs", RENAME_LOAD_REGS)
#undef AARCH64_EXTRA_TUNING_OPTION #undef AARCH64_EXTRA_TUNING_OPTION
...@@ -78,48 +78,6 @@ ...@@ -78,48 +78,6 @@
/* Defined for convenience. */ /* Defined for convenience. */
#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT) #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
/* Classifies an address.
ADDRESS_REG_IMM
A simple base register plus immediate offset.
ADDRESS_REG_WB
A base register indexed by immediate offset with writeback.
ADDRESS_REG_REG
A base register indexed by (optionally scaled) register.
ADDRESS_REG_UXTW
A base register indexed by (optionally scaled) zero-extended register.
ADDRESS_REG_SXTW
A base register indexed by (optionally scaled) sign-extended register.
ADDRESS_LO_SUM
A LO_SUM rtx with a base register and "LO12" symbol relocation.
ADDRESS_SYMBOLIC:
A constant symbolic address, in pc-relative literal pool. */
enum aarch64_address_type {
ADDRESS_REG_IMM,
ADDRESS_REG_WB,
ADDRESS_REG_REG,
ADDRESS_REG_UXTW,
ADDRESS_REG_SXTW,
ADDRESS_LO_SUM,
ADDRESS_SYMBOLIC
};
struct aarch64_address_info {
enum aarch64_address_type type;
rtx base;
rtx offset;
poly_int64 const_offset;
int shift;
enum aarch64_symbol_type symbol_type;
};
/* Information about a legitimate vector immediate operand. */ /* Information about a legitimate vector immediate operand. */
struct simd_immediate_info struct simd_immediate_info
{ {
...@@ -927,7 +885,7 @@ static const struct tune_params qdf24xx_tunings = ...@@ -927,7 +885,7 @@ static const struct tune_params qdf24xx_tunings =
2, /* min_div_recip_mul_df. */ 2, /* min_div_recip_mul_df. */
0, /* max_case_values. */ 0, /* max_case_values. */
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
(AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */ AARCH64_EXTRA_TUNE_RENAME_LOAD_REGS, /* tune_flags. */
&qdf24xx_prefetch_tune &qdf24xx_prefetch_tune
}; };
...@@ -5671,10 +5629,10 @@ virt_or_elim_regno_p (unsigned regno) ...@@ -5671,10 +5629,10 @@ virt_or_elim_regno_p (unsigned regno)
If it is, fill in INFO appropriately. STRICT_P is true if If it is, fill in INFO appropriately. STRICT_P is true if
REG_OK_STRICT is in effect. */ REG_OK_STRICT is in effect. */
static bool bool
aarch64_classify_address (struct aarch64_address_info *info, aarch64_classify_address (struct aarch64_address_info *info,
rtx x, machine_mode mode, bool strict_p, rtx x, machine_mode mode, bool strict_p,
aarch64_addr_query_type type = ADDR_QUERY_M) aarch64_addr_query_type type)
{ {
enum rtx_code code = GET_CODE (x); enum rtx_code code = GET_CODE (x);
rtx op0, op1; rtx op0, op1;
......
...@@ -77,6 +77,16 @@ aarch64-speculation.o: $(srcdir)/config/aarch64/aarch64-speculation.cc \ ...@@ -77,6 +77,16 @@ aarch64-speculation.o: $(srcdir)/config/aarch64/aarch64-speculation.cc \
$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_SPPFLAGS) $(INCLUDES) \ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_SPPFLAGS) $(INCLUDES) \
$(srcdir)/config/aarch64/aarch64-speculation.cc $(srcdir)/config/aarch64/aarch64-speculation.cc
falkor-tag-collision-avoidance.o: \
$(srcdir)/config/aarch64/falkor-tag-collision-avoidance.c \
$(CONFIG_H) $(SYSTEM_H) $(TM_H) $(REGS_H) insn-config.h $(RTL_BASE_H) \
dominance.h cfg.h cfganal.h $(BASIC_BLOCK_H) $(INSN_ATTR_H) $(RECOG_H) \
output.h hash-map.h $(DF_H) $(OBSTACK_H) $(TARGET_H) $(RTL_H) \
$(CONTEXT_H) $(TREE_PASS_H) regrename.h \
$(srcdir)/config/aarch64/aarch64-protos.h
$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
$(srcdir)/config/aarch64/falkor-tag-collision-avoidance.c
comma=, comma=,
MULTILIB_OPTIONS = $(subst $(comma),/, $(patsubst %, mabi=%, $(subst $(comma),$(comma)mabi=,$(TM_MULTILIB_CONFIG)))) MULTILIB_OPTIONS = $(subst $(comma),/, $(patsubst %, mabi=%, $(subst $(comma),$(comma)mabi=,$(TM_MULTILIB_CONFIG))))
MULTILIB_DIRNAMES = $(subst $(comma), ,$(TM_MULTILIB_CONFIG)) MULTILIB_DIRNAMES = $(subst $(comma), ,$(TM_MULTILIB_CONFIG))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment