Commit 9d2c6e2e by Maxim Kuvyrkov Committed by Maxim Kuvyrkov

Add prefetch configuration to aarch64 backend.

	* config/aarch64/aarch64-protos.h (struct cpu_prefetch_tune):
	New tune structure.
	(struct tune_params): Use cpu_prefetch_tune instead of cache_line_size.
	[Unrelated to main purpose of the patch] Place the pointer field last
	to enable type checking errors when tune structure are wrongly merged.
	* config/aarch64/aarch64.c (generic_prefetch_tune,)
	(exynosm1_prefetch_tune, qdf24xx_prefetch_tune,)
	(thunderx2t99_prefetch_tune): New tune constants.
	(tune_params *_tunings): Update all tunings (no functional change).
	(aarch64_override_options_internal): Set PARAM_SIMULTANEOUS_PREFETCHES,
	PARAM_L1_CACHE_SIZE, PARAM_L1_CACHE_LINE_SIZE, and PARAM_L2_CACHE_SIZE
	from tunings structures.

Change-Id: I1ddbac1863dcf078a2e5b14dd904debc76a7da94

From-SVN: r249240
parent b783399a
2017-06-16 Maxim Kuvyrkov <maxim.kuvyrkov@linaro.org>
* config/aarch64/aarch64-protos.h (struct cpu_prefetch_tune):
New tune structure.
(struct tune_params): Use cpu_prefetch_tune instead of cache_line_size.
[Unrelated to main purpose of the patch] Place the pointer field last
to enable type checking errors when tune structure are wrongly merged.
* config/aarch64/aarch64.c (generic_prefetch_tune,)
(exynosm1_prefetch_tune, qdf24xx_prefetch_tune,)
(thunderx2t99_prefetch_tune): New tune constants.
(tune_params *_tunings): Update all tunings (no functional change).
(aarch64_override_options_internal): Set PARAM_SIMULTANEOUS_PREFETCHES,
PARAM_L1_CACHE_SIZE, PARAM_L1_CACHE_LINE_SIZE, and PARAM_L2_CACHE_SIZE
from tunings structures.
2017-06-16 Jakub Jelinek <jakub@redhat.com> 2017-06-16 Jakub Jelinek <jakub@redhat.com>
PR sanitizer/81094 PR sanitizer/81094
......
...@@ -203,6 +203,15 @@ struct cpu_approx_modes ...@@ -203,6 +203,15 @@ struct cpu_approx_modes
const unsigned int recip_sqrt; /* Reciprocal square root. */ const unsigned int recip_sqrt; /* Reciprocal square root. */
}; };
/* Cache prefetch settings for prefetch-loop-arrays. */
struct cpu_prefetch_tune
{
const int num_slots;
const int l1_cache_size;
const int l1_cache_line_size;
const int l2_cache_size;
};
struct tune_params struct tune_params
{ {
const struct cpu_cost_table *insn_extra_cost; const struct cpu_cost_table *insn_extra_cost;
...@@ -224,9 +233,6 @@ struct tune_params ...@@ -224,9 +233,6 @@ struct tune_params
int min_div_recip_mul_df; int min_div_recip_mul_df;
/* Value for aarch64_case_values_threshold; or 0 for the default. */ /* Value for aarch64_case_values_threshold; or 0 for the default. */
unsigned int max_case_values; unsigned int max_case_values;
/* Value for PARAM_L1_CACHE_LINE_SIZE; or 0 to use the default. */
unsigned int cache_line_size;
/* An enum specifying how to take into account CPU autoprefetch capabilities /* An enum specifying how to take into account CPU autoprefetch capabilities
during instruction scheduling: during instruction scheduling:
- AUTOPREFETCHER_OFF: Do not take autoprefetch capabilities into account. - AUTOPREFETCHER_OFF: Do not take autoprefetch capabilities into account.
...@@ -244,6 +250,10 @@ struct tune_params ...@@ -244,6 +250,10 @@ struct tune_params
} autoprefetcher_model; } autoprefetcher_model;
unsigned int extra_tuning_flags; unsigned int extra_tuning_flags;
/* Place prefetch struct pointer at the end to enable type checking
errors when tune_params misses elements (e.g., from erroneous merges). */
const struct cpu_prefetch_tune *prefetch;
}; };
#define AARCH64_FUSION_PAIR(x, name) \ #define AARCH64_FUSION_PAIR(x, name) \
......
...@@ -526,6 +526,39 @@ static const cpu_approx_modes xgene1_approx_modes = ...@@ -526,6 +526,39 @@ static const cpu_approx_modes xgene1_approx_modes =
AARCH64_APPROX_ALL /* recip_sqrt */ AARCH64_APPROX_ALL /* recip_sqrt */
}; };
/* Generic prefetch settings (which disable prefetch). */
static const cpu_prefetch_tune generic_prefetch_tune =
{
0, /* num_slots */
-1, /* l1_cache_size */
-1, /* l1_cache_line_size */
-1 /* l2_cache_size */
};
static const cpu_prefetch_tune exynosm1_prefetch_tune =
{
0, /* num_slots */
-1, /* l1_cache_size */
64, /* l1_cache_line_size */
-1 /* l2_cache_size */
};
static const cpu_prefetch_tune qdf24xx_prefetch_tune =
{
0, /* num_slots */
-1, /* l1_cache_size */
64, /* l1_cache_line_size */
-1 /* l2_cache_size */
};
static const cpu_prefetch_tune thunderx2t99_prefetch_tune =
{
0, /* num_slots */
-1, /* l1_cache_size */
64, /* l1_cache_line_size */
-1 /* l2_cache_size */
};
static const struct tune_params generic_tunings = static const struct tune_params generic_tunings =
{ {
&cortexa57_extra_costs, &cortexa57_extra_costs,
...@@ -546,9 +579,9 @@ static const struct tune_params generic_tunings = ...@@ -546,9 +579,9 @@ static const struct tune_params generic_tunings =
2, /* min_div_recip_mul_sf. */ 2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */ 2, /* min_div_recip_mul_df. */
0, /* max_case_values. */ 0, /* max_case_values. */
0, /* cache_line_size. */
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
(AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */ (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
&generic_prefetch_tune
}; };
static const struct tune_params cortexa35_tunings = static const struct tune_params cortexa35_tunings =
...@@ -572,9 +605,9 @@ static const struct tune_params cortexa35_tunings = ...@@ -572,9 +605,9 @@ static const struct tune_params cortexa35_tunings =
2, /* min_div_recip_mul_sf. */ 2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */ 2, /* min_div_recip_mul_df. */
0, /* max_case_values. */ 0, /* max_case_values. */
0, /* cache_line_size. */
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
(AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */ (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
&generic_prefetch_tune
}; };
static const struct tune_params cortexa53_tunings = static const struct tune_params cortexa53_tunings =
...@@ -598,9 +631,9 @@ static const struct tune_params cortexa53_tunings = ...@@ -598,9 +631,9 @@ static const struct tune_params cortexa53_tunings =
2, /* min_div_recip_mul_sf. */ 2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */ 2, /* min_div_recip_mul_df. */
0, /* max_case_values. */ 0, /* max_case_values. */
0, /* cache_line_size. */
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
(AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */ (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
&generic_prefetch_tune
}; };
static const struct tune_params cortexa57_tunings = static const struct tune_params cortexa57_tunings =
...@@ -624,9 +657,9 @@ static const struct tune_params cortexa57_tunings = ...@@ -624,9 +657,9 @@ static const struct tune_params cortexa57_tunings =
2, /* min_div_recip_mul_sf. */ 2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */ 2, /* min_div_recip_mul_df. */
0, /* max_case_values. */ 0, /* max_case_values. */
0, /* cache_line_size. */
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
(AARCH64_EXTRA_TUNE_RENAME_FMA_REGS) /* tune_flags. */ (AARCH64_EXTRA_TUNE_RENAME_FMA_REGS), /* tune_flags. */
&generic_prefetch_tune
}; };
static const struct tune_params cortexa72_tunings = static const struct tune_params cortexa72_tunings =
...@@ -650,9 +683,9 @@ static const struct tune_params cortexa72_tunings = ...@@ -650,9 +683,9 @@ static const struct tune_params cortexa72_tunings =
2, /* min_div_recip_mul_sf. */ 2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */ 2, /* min_div_recip_mul_df. */
0, /* max_case_values. */ 0, /* max_case_values. */
0, /* cache_line_size. */
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
(AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */ (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
&generic_prefetch_tune
}; };
static const struct tune_params cortexa73_tunings = static const struct tune_params cortexa73_tunings =
...@@ -676,11 +709,13 @@ static const struct tune_params cortexa73_tunings = ...@@ -676,11 +709,13 @@ static const struct tune_params cortexa73_tunings =
2, /* min_div_recip_mul_sf. */ 2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */ 2, /* min_div_recip_mul_df. */
0, /* max_case_values. */ 0, /* max_case_values. */
0, /* cache_line_size. */
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
(AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */ (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
&generic_prefetch_tune
}; };
static const struct tune_params exynosm1_tunings = static const struct tune_params exynosm1_tunings =
{ {
&exynosm1_extra_costs, &exynosm1_extra_costs,
...@@ -701,9 +736,9 @@ static const struct tune_params exynosm1_tunings = ...@@ -701,9 +736,9 @@ static const struct tune_params exynosm1_tunings =
2, /* min_div_recip_mul_sf. */ 2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */ 2, /* min_div_recip_mul_df. */
48, /* max_case_values. */ 48, /* max_case_values. */
64, /* cache_line_size. */
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
(AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */ (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
&exynosm1_prefetch_tune
}; };
static const struct tune_params thunderx_tunings = static const struct tune_params thunderx_tunings =
...@@ -726,9 +761,9 @@ static const struct tune_params thunderx_tunings = ...@@ -726,9 +761,9 @@ static const struct tune_params thunderx_tunings =
2, /* min_div_recip_mul_sf. */ 2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */ 2, /* min_div_recip_mul_df. */
0, /* max_case_values. */ 0, /* max_case_values. */
0, /* cache_line_size. */
tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */ tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
(AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW) /* tune_flags. */ (AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW), /* tune_flags. */
&generic_prefetch_tune
}; };
static const struct tune_params xgene1_tunings = static const struct tune_params xgene1_tunings =
...@@ -751,9 +786,9 @@ static const struct tune_params xgene1_tunings = ...@@ -751,9 +786,9 @@ static const struct tune_params xgene1_tunings =
2, /* min_div_recip_mul_sf. */ 2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */ 2, /* min_div_recip_mul_df. */
0, /* max_case_values. */ 0, /* max_case_values. */
0, /* cache_line_size. */
tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */ tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
(AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */ (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
&generic_prefetch_tune
}; };
static const struct tune_params qdf24xx_tunings = static const struct tune_params qdf24xx_tunings =
...@@ -777,9 +812,9 @@ static const struct tune_params qdf24xx_tunings = ...@@ -777,9 +812,9 @@ static const struct tune_params qdf24xx_tunings =
2, /* min_div_recip_mul_sf. */ 2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */ 2, /* min_div_recip_mul_df. */
0, /* max_case_values. */ 0, /* max_case_values. */
64, /* cache_line_size. */
tune_params::AUTOPREFETCHER_STRONG, /* autoprefetcher_model. */ tune_params::AUTOPREFETCHER_STRONG, /* autoprefetcher_model. */
(AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */ (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
&qdf24xx_prefetch_tune
}; };
static const struct tune_params thunderx2t99_tunings = static const struct tune_params thunderx2t99_tunings =
...@@ -802,9 +837,9 @@ static const struct tune_params thunderx2t99_tunings = ...@@ -802,9 +837,9 @@ static const struct tune_params thunderx2t99_tunings =
2, /* min_div_recip_mul_sf. */ 2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */ 2, /* min_div_recip_mul_df. */
0, /* max_case_values. */ 0, /* max_case_values. */
64, /* cache_line_size. */
tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */ tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
(AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */ (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */
&thunderx2t99_prefetch_tune
}; };
/* Support for fine-grained override of the tuning structures. */ /* Support for fine-grained override of the tuning structures. */
...@@ -8747,10 +8782,27 @@ aarch64_override_options_internal (struct gcc_options *opts) ...@@ -8747,10 +8782,27 @@ aarch64_override_options_internal (struct gcc_options *opts)
opts->x_param_values, opts->x_param_values,
global_options_set.x_param_values); global_options_set.x_param_values);
/* Set the L1 cache line size. */ /* Set up parameters to be used in prefetching algorithm. Do not
if (selected_cpu->tune->cache_line_size != 0) override the defaults unless we are tuning for a core we have
researched values for. */
if (aarch64_tune_params.prefetch->num_slots > 0)
maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
aarch64_tune_params.prefetch->num_slots,
opts->x_param_values,
global_options_set.x_param_values);
if (aarch64_tune_params.prefetch->l1_cache_size >= 0)
maybe_set_param_value (PARAM_L1_CACHE_SIZE,
aarch64_tune_params.prefetch->l1_cache_size,
opts->x_param_values,
global_options_set.x_param_values);
if (aarch64_tune_params.prefetch->l1_cache_line_size >= 0)
maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
selected_cpu->tune->cache_line_size, aarch64_tune_params.prefetch->l1_cache_line_size,
opts->x_param_values,
global_options_set.x_param_values);
if (aarch64_tune_params.prefetch->l2_cache_size >= 0)
maybe_set_param_value (PARAM_L2_CACHE_SIZE,
aarch64_tune_params.prefetch->l2_cache_size,
opts->x_param_values, opts->x_param_values,
global_options_set.x_param_values); global_options_set.x_param_values);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment