Commit d2ff35c0 by Luis Machado Committed by Luis Machado

[Patch 02/02] Introduce prefetch-dynamic-strides option

The following patch adds an option to control software prefetching of memory
references with non-constant/unknown strides.

Currently we prefetch these references if the pass thinks there is benefit to
doing so. But, since this is all based on heuristics, it's not always the case
that we end up with better performance.

For Falkor there is also the problem of conflicts with the hardware prefetcher,
so we need to be more conservative in terms of what we issue software prefetch
hints for.

This also aligns GCC with what LLVM does for Falkor.

Similarly to the previous patch, the defaults guarantee no change in behavior
for other targets and architectures.

gcc/ChangeLog:

2018-05-23  Luis Machado  <luis.machado@linaro.org>

	* config/aarch64/aarch64-protos.h (cpu_prefetch_tune)
	<prefetch_dynamic_strides>: New const bool field.
	* config/aarch64/aarch64.c (generic_prefetch_tune): Update to include
	prefetch_dynamic_strides.
	(exynosm1_prefetch_tune): Likewise.
	(thunderxt88_prefetch_tune): Likewise.
	(thunderx_prefetch_tune): Likewise.
	(thunderx2t99_prefetch_tune): Likewise.
	(qdf24xx_prefetch_tune): Likewise. Set prefetch_dynamic_strides to
	false.
	(aarch64_override_options_internal): Update to set
	PARAM_PREFETCH_DYNAMIC_STRIDES.
	* doc/invoke.texi (prefetch-dynamic-strides): Document new option.
	* params.def (PARAM_PREFETCH_DYNAMIC_STRIDES): New.
	* params.h (PARAM_PREFETCH_DYNAMIC_STRIDES): Define.
	* tree-ssa-loop-prefetch.c (should_issue_prefetch_p): Account for
	prefetch-dynamic-strides setting.

From-SVN: r260618
parent 59100dfc
2018-05-23 Luis Machado <luis.machado@linaro.org> 2018-05-23 Luis Machado <luis.machado@linaro.org>
* config/aarch64/aarch64-protos.h (cpu_prefetch_tune) * config/aarch64/aarch64-protos.h (cpu_prefetch_tune)
<prefetch_dynamic_strides>: New const bool field.
* config/aarch64/aarch64.c (generic_prefetch_tune): Update to include
prefetch_dynamic_strides.
(exynosm1_prefetch_tune): Likewise.
(thunderxt88_prefetch_tune): Likewise.
(thunderx_prefetch_tune): Likewise.
(thunderx2t99_prefetch_tune): Likewise.
(qdf24xx_prefetch_tune): Likewise. Set prefetch_dynamic_strides to
false.
(aarch64_override_options_internal): Update to set
PARAM_PREFETCH_DYNAMIC_STRIDES.
* doc/invoke.texi (prefetch-dynamic-strides): Document new option.
* params.def (PARAM_PREFETCH_DYNAMIC_STRIDES): New.
* params.h (PARAM_PREFETCH_DYNAMIC_STRIDES): Define.
* tree-ssa-loop-prefetch.c (should_issue_prefetch_p): Account for
prefetch-dynamic-strides setting.
2018-05-23 Luis Machado <luis.machado@linaro.org>
* config/aarch64/aarch64-protos.h (cpu_prefetch_tune)
<minimum_stride>: New const int field. <minimum_stride>: New const int field.
* config/aarch64/aarch64.c (generic_prefetch_tune): Update to include * config/aarch64/aarch64.c (generic_prefetch_tune): Update to include
minimum_stride field defaulting to -1. minimum_stride field defaulting to -1.
......
...@@ -230,6 +230,9 @@ struct cpu_prefetch_tune ...@@ -230,6 +230,9 @@ struct cpu_prefetch_tune
const int l1_cache_size; const int l1_cache_size;
const int l1_cache_line_size; const int l1_cache_line_size;
const int l2_cache_size; const int l2_cache_size;
/* Whether software prefetch hints should be issued for non-constant
strides. */
const bool prefetch_dynamic_strides;
/* The minimum constant stride beyond which we should use prefetch /* The minimum constant stride beyond which we should use prefetch
hints for. */ hints for. */
const int minimum_stride; const int minimum_stride;
......
...@@ -550,6 +550,7 @@ static const cpu_prefetch_tune generic_prefetch_tune = ...@@ -550,6 +550,7 @@ static const cpu_prefetch_tune generic_prefetch_tune =
-1, /* l1_cache_size */ -1, /* l1_cache_size */
-1, /* l1_cache_line_size */ -1, /* l1_cache_line_size */
-1, /* l2_cache_size */ -1, /* l2_cache_size */
true, /* prefetch_dynamic_strides */
-1, /* minimum_stride */ -1, /* minimum_stride */
-1 /* default_opt_level */ -1 /* default_opt_level */
}; };
...@@ -560,6 +561,7 @@ static const cpu_prefetch_tune exynosm1_prefetch_tune = ...@@ -560,6 +561,7 @@ static const cpu_prefetch_tune exynosm1_prefetch_tune =
-1, /* l1_cache_size */ -1, /* l1_cache_size */
64, /* l1_cache_line_size */ 64, /* l1_cache_line_size */
-1, /* l2_cache_size */ -1, /* l2_cache_size */
true, /* prefetch_dynamic_strides */
-1, /* minimum_stride */ -1, /* minimum_stride */
-1 /* default_opt_level */ -1 /* default_opt_level */
}; };
...@@ -570,6 +572,7 @@ static const cpu_prefetch_tune qdf24xx_prefetch_tune = ...@@ -570,6 +572,7 @@ static const cpu_prefetch_tune qdf24xx_prefetch_tune =
32, /* l1_cache_size */ 32, /* l1_cache_size */
64, /* l1_cache_line_size */ 64, /* l1_cache_line_size */
512, /* l2_cache_size */ 512, /* l2_cache_size */
false, /* prefetch_dynamic_strides */
2048, /* minimum_stride */ 2048, /* minimum_stride */
3 /* default_opt_level */ 3 /* default_opt_level */
}; };
...@@ -580,6 +583,7 @@ static const cpu_prefetch_tune thunderxt88_prefetch_tune = ...@@ -580,6 +583,7 @@ static const cpu_prefetch_tune thunderxt88_prefetch_tune =
32, /* l1_cache_size */ 32, /* l1_cache_size */
128, /* l1_cache_line_size */ 128, /* l1_cache_line_size */
16*1024, /* l2_cache_size */ 16*1024, /* l2_cache_size */
true, /* prefetch_dynamic_strides */
-1, /* minimum_stride */ -1, /* minimum_stride */
3 /* default_opt_level */ 3 /* default_opt_level */
}; };
...@@ -590,6 +594,7 @@ static const cpu_prefetch_tune thunderx_prefetch_tune = ...@@ -590,6 +594,7 @@ static const cpu_prefetch_tune thunderx_prefetch_tune =
32, /* l1_cache_size */ 32, /* l1_cache_size */
128, /* l1_cache_line_size */ 128, /* l1_cache_line_size */
-1, /* l2_cache_size */ -1, /* l2_cache_size */
true, /* prefetch_dynamic_strides */
-1, /* minimum_stride */ -1, /* minimum_stride */
-1 /* default_opt_level */ -1 /* default_opt_level */
}; };
...@@ -600,6 +605,7 @@ static const cpu_prefetch_tune thunderx2t99_prefetch_tune = ...@@ -600,6 +605,7 @@ static const cpu_prefetch_tune thunderx2t99_prefetch_tune =
32, /* l1_cache_size */ 32, /* l1_cache_size */
64, /* l1_cache_line_size */ 64, /* l1_cache_line_size */
256, /* l2_cache_size */ 256, /* l2_cache_size */
true, /* prefetch_dynamic_strides */
-1, /* minimum_stride */ -1, /* minimum_stride */
-1 /* default_opt_level */ -1 /* default_opt_level */
}; };
...@@ -10635,6 +10641,11 @@ aarch64_override_options_internal (struct gcc_options *opts) ...@@ -10635,6 +10641,11 @@ aarch64_override_options_internal (struct gcc_options *opts)
aarch64_tune_params.prefetch->l2_cache_size, aarch64_tune_params.prefetch->l2_cache_size,
opts->x_param_values, opts->x_param_values,
global_options_set.x_param_values); global_options_set.x_param_values);
if (!aarch64_tune_params.prefetch->prefetch_dynamic_strides)
maybe_set_param_value (PARAM_PREFETCH_DYNAMIC_STRIDES,
0,
opts->x_param_values,
global_options_set.x_param_values);
if (aarch64_tune_params.prefetch->minimum_stride >= 0) if (aarch64_tune_params.prefetch->minimum_stride >= 0)
maybe_set_param_value (PARAM_PREFETCH_MINIMUM_STRIDE, maybe_set_param_value (PARAM_PREFETCH_MINIMUM_STRIDE,
aarch64_tune_params.prefetch->minimum_stride, aarch64_tune_params.prefetch->minimum_stride,
......
...@@ -10734,6 +10734,16 @@ The size of L1 cache, in kilobytes. ...@@ -10734,6 +10734,16 @@ The size of L1 cache, in kilobytes.
@item l2-cache-size @item l2-cache-size
The size of L2 cache, in kilobytes. The size of L2 cache, in kilobytes.
@item prefetch-dynamic-strides
Whether the loop array prefetch pass should issue software prefetch hints
for strides that are non-constant. In some cases this may be
beneficial, though the fact the stride is non-constant may make it
hard to predict when there is clear benefit to issuing these hints.
Set to 1, the default, if the prefetch hints should be issued for non-constant
strides. Set to 0 if prefetch hints should be issued only for strides that
are known to be constant and below @option{prefetch-minimum-stride}.
@item prefetch-minimum-stride @item prefetch-minimum-stride
Minimum constant stride, in bytes, to start using prefetch hints for. If Minimum constant stride, in bytes, to start using prefetch hints for. If
the stride is less than this threshold, prefetch hints will not be issued. the stride is less than this threshold, prefetch hints will not be issued.
...@@ -795,6 +795,15 @@ DEFPARAM (PARAM_L2_CACHE_SIZE, ...@@ -795,6 +795,15 @@ DEFPARAM (PARAM_L2_CACHE_SIZE,
"The size of L2 cache.", "The size of L2 cache.",
512, 0, 0) 512, 0, 0)
/* Whether software prefetch hints should be issued for non-constant
strides. */
DEFPARAM (PARAM_PREFETCH_DYNAMIC_STRIDES,
"prefetch-dynamic-strides",
"Whether software prefetch hints should be issued for non-constant "
"strides.",
1, 0, 1)
/* The minimum constant stride beyond which we should use prefetch hints /* The minimum constant stride beyond which we should use prefetch hints
for. */ for. */
......
...@@ -196,6 +196,8 @@ extern void init_param_values (int *params); ...@@ -196,6 +196,8 @@ extern void init_param_values (int *params);
PARAM_VALUE (PARAM_L1_CACHE_LINE_SIZE) PARAM_VALUE (PARAM_L1_CACHE_LINE_SIZE)
#define L2_CACHE_SIZE \ #define L2_CACHE_SIZE \
PARAM_VALUE (PARAM_L2_CACHE_SIZE) PARAM_VALUE (PARAM_L2_CACHE_SIZE)
#define PREFETCH_DYNAMIC_STRIDES \
PARAM_VALUE (PARAM_PREFETCH_DYNAMIC_STRIDES)
#define PREFETCH_MINIMUM_STRIDE \ #define PREFETCH_MINIMUM_STRIDE \
PARAM_VALUE (PARAM_PREFETCH_MINIMUM_STRIDE) PARAM_VALUE (PARAM_PREFETCH_MINIMUM_STRIDE)
#define USE_CANONICAL_TYPES \ #define USE_CANONICAL_TYPES \
......
...@@ -992,6 +992,16 @@ prune_by_reuse (struct mem_ref_group *groups) ...@@ -992,6 +992,16 @@ prune_by_reuse (struct mem_ref_group *groups)
static bool static bool
should_issue_prefetch_p (struct mem_ref *ref) should_issue_prefetch_p (struct mem_ref *ref)
{ {
/* Do we want to issue prefetches for non-constant strides? */
if (!cst_and_fits_in_hwi (ref->group->step) && PREFETCH_DYNAMIC_STRIDES == 0)
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file,
"Skipping non-constant step for reference %u:%u\n",
ref->group->uid, ref->uid);
return false;
}
/* Some processors may have a hardware prefetcher that may conflict with /* Some processors may have a hardware prefetcher that may conflict with
prefetch hints for a range of strides. Make sure we don't issue prefetch hints for a range of strides. Make sure we don't issue
prefetches for such cases if the stride is within this particular prefetches for such cases if the stride is within this particular
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment