Commit 76a34e3f by Richard Sandiford Committed by Richard Sandiford

Add an empty_mask_is_expensive hook

This patch adds a hook to control whether we avoid executing masked
(predicated) stores when the mask is all false.  We don't want to do
that by default for SVE.

2018-01-13  Richard Sandiford  <richard.sandiford@linaro.org>
	    Alan Hayward  <alan.hayward@arm.com>
	    David Sherwood  <david.sherwood@arm.com>

gcc/
	* target.def (empty_mask_is_expensive): New hook.
	* doc/tm.texi.in (TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE): New hook.
	* doc/tm.texi: Regenerate.
	* targhooks.h (default_empty_mask_is_expensive): Declare.
	* targhooks.c (default_empty_mask_is_expensive): New function.
	* tree-vectorizer.c (vectorize_loops): Only call optimize_mask_stores
	if the target says that empty masks are expensive.
	* config/aarch64/aarch64.c (aarch64_empty_mask_is_expensive):
	New function.
	(TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE): Redefine.

Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>

From-SVN: r256631
parent 535e7c11
...@@ -2,6 +2,21 @@ ...@@ -2,6 +2,21 @@
Alan Hayward <alan.hayward@arm.com> Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com> David Sherwood <david.sherwood@arm.com>
* target.def (empty_mask_is_expensive): New hook.
* doc/tm.texi.in (TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE): New hook.
* doc/tm.texi: Regenerate.
* targhooks.h (default_empty_mask_is_expensive): Declare.
* targhooks.c (default_empty_mask_is_expensive): New function.
* tree-vectorizer.c (vectorize_loops): Only call optimize_mask_stores
if the target says that empty masks are expensive.
* config/aarch64/aarch64.c (aarch64_empty_mask_is_expensive):
New function.
(TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE): Redefine.
2018-01-13 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
* tree-vectorizer.h (_loop_vec_info::mask_skip_niters): New field. * tree-vectorizer.h (_loop_vec_info::mask_skip_niters): New field.
(LOOP_VINFO_MASK_SKIP_NITERS): New macro. (LOOP_VINFO_MASK_SKIP_NITERS): New macro.
(vect_use_loop_mask_for_alignment_p): New function. (vect_use_loop_mask_for_alignment_p): New function.
......
...@@ -16875,6 +16875,16 @@ aarch64_gen_adjusted_ldpstp (rtx *operands, bool load, ...@@ -16875,6 +16875,16 @@ aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
return true; return true;
} }
/* Implement TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE. Assume for now that
it isn't worth branching around empty masked ops (including masked
stores). */
static bool
aarch64_empty_mask_is_expensive (unsigned)
{
return false;
}
/* Return 1 if pseudo register should be created and used to hold /* Return 1 if pseudo register should be created and used to hold
GOT address for PIC code. */ GOT address for PIC code. */
...@@ -17499,6 +17509,9 @@ aarch64_libgcc_floating_mode_supported_p ...@@ -17499,6 +17509,9 @@ aarch64_libgcc_floating_mode_supported_p
#undef TARGET_VECTORIZE_GET_MASK_MODE #undef TARGET_VECTORIZE_GET_MASK_MODE
#define TARGET_VECTORIZE_GET_MASK_MODE aarch64_get_mask_mode #define TARGET_VECTORIZE_GET_MASK_MODE aarch64_get_mask_mode
#undef TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE
#define TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE \
aarch64_empty_mask_is_expensive
#undef TARGET_INIT_LIBFUNCS #undef TARGET_INIT_LIBFUNCS
#define TARGET_INIT_LIBFUNCS aarch64_init_libfuncs #define TARGET_INIT_LIBFUNCS aarch64_init_libfuncs
......
...@@ -5924,6 +5924,12 @@ is @var{length} bytes long and that contains @var{nunits} elements, ...@@ -5924,6 +5924,12 @@ is @var{length} bytes long and that contains @var{nunits} elements,
if such a mode exists. if such a mode exists.
@end deftypefn @end deftypefn
@deftypefn {Target Hook} bool TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE (unsigned @var{ifn})
This hook returns true if masked internal function @var{ifn} (really of
type @code{internal_fn}) should be considered expensive when the mask is
all zeros. GCC can then try to branch around the instruction instead.
@end deftypefn
@deftypefn {Target Hook} {void *} TARGET_VECTORIZE_INIT_COST (struct loop *@var{loop_info}) @deftypefn {Target Hook} {void *} TARGET_VECTORIZE_INIT_COST (struct loop *@var{loop_info})
This hook should initialize target-specific data structures in preparation for modeling the costs of vectorizing a loop or basic block. The default allocates three unsigned integers for accumulating costs for the prologue, body, and epilogue of the loop or basic block. If @var{loop_info} is non-NULL, it identifies the loop being vectorized; otherwise a single block is being vectorized. This hook should initialize target-specific data structures in preparation for modeling the costs of vectorizing a loop or basic block. The default allocates three unsigned integers for accumulating costs for the prologue, body, and epilogue of the loop or basic block. If @var{loop_info} is non-NULL, it identifies the loop being vectorized; otherwise a single block is being vectorized.
@end deftypefn @end deftypefn
......
...@@ -4108,6 +4108,8 @@ address; but often a machine-dependent strategy can generate better code. ...@@ -4108,6 +4108,8 @@ address; but often a machine-dependent strategy can generate better code.
@hook TARGET_VECTORIZE_GET_MASK_MODE @hook TARGET_VECTORIZE_GET_MASK_MODE
@hook TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE
@hook TARGET_VECTORIZE_INIT_COST @hook TARGET_VECTORIZE_INIT_COST
@hook TARGET_VECTORIZE_ADD_STMT_COST @hook TARGET_VECTORIZE_ADD_STMT_COST
......
...@@ -1933,6 +1933,17 @@ if such a mode exists.", ...@@ -1933,6 +1933,17 @@ if such a mode exists.",
(poly_uint64 nunits, poly_uint64 length), (poly_uint64 nunits, poly_uint64 length),
default_get_mask_mode) default_get_mask_mode)
/* Function to say whether a masked operation is expensive when the
mask is all zeros. */
DEFHOOK
(empty_mask_is_expensive,
"This hook returns true if masked internal function @var{ifn} (really of\n\
type @code{internal_fn}) should be considered expensive when the mask is\n\
all zeros. GCC can then try to branch around the instruction instead.",
bool,
(unsigned ifn),
default_empty_mask_is_expensive)
/* Target builtin that implements vector gather operation. */ /* Target builtin that implements vector gather operation. */
DEFHOOK DEFHOOK
(builtin_gather, (builtin_gather,
......
...@@ -1319,6 +1319,14 @@ default_get_mask_mode (poly_uint64 nunits, poly_uint64 vector_size) ...@@ -1319,6 +1319,14 @@ default_get_mask_mode (poly_uint64 nunits, poly_uint64 vector_size)
return opt_machine_mode (); return opt_machine_mode ();
} }
/* By default consider masked stores to be expensive. */
bool
default_empty_mask_is_expensive (unsigned ifn)
{
return ifn == IFN_MASK_STORE;
}
/* By default, the cost model accumulates three separate costs (prologue, /* By default, the cost model accumulates three separate costs (prologue,
loop body, and epilogue) for a vectorized loop or block. So allocate an loop body, and epilogue) for a vectorized loop or block. So allocate an
array of three unsigned ints, set it to zero, and return its address. */ array of three unsigned ints, set it to zero, and return its address. */
......
...@@ -111,6 +111,7 @@ extern machine_mode default_preferred_simd_mode (scalar_mode mode); ...@@ -111,6 +111,7 @@ extern machine_mode default_preferred_simd_mode (scalar_mode mode);
extern machine_mode default_split_reduction (machine_mode); extern machine_mode default_split_reduction (machine_mode);
extern void default_autovectorize_vector_sizes (vector_sizes *); extern void default_autovectorize_vector_sizes (vector_sizes *);
extern opt_machine_mode default_get_mask_mode (poly_uint64, poly_uint64); extern opt_machine_mode default_get_mask_mode (poly_uint64, poly_uint64);
extern bool default_empty_mask_is_expensive (unsigned);
extern void *default_init_cost (struct loop *); extern void *default_init_cost (struct loop *);
extern unsigned default_add_stmt_cost (void *, int, enum vect_cost_for_stmt, extern unsigned default_add_stmt_cost (void *, int, enum vect_cost_for_stmt,
struct _stmt_vec_info *, int, struct _stmt_vec_info *, int,
......
...@@ -826,7 +826,8 @@ vectorize_loops (void) ...@@ -826,7 +826,8 @@ vectorize_loops (void)
if (loop_vinfo) if (loop_vinfo)
has_mask_store = LOOP_VINFO_HAS_MASK_STORE (loop_vinfo); has_mask_store = LOOP_VINFO_HAS_MASK_STORE (loop_vinfo);
delete loop_vinfo; delete loop_vinfo;
if (has_mask_store) if (has_mask_store
&& targetm.vectorize.empty_mask_is_expensive (IFN_MASK_STORE))
optimize_mask_stores (loop); optimize_mask_stores (loop);
loop->aux = NULL; loop->aux = NULL;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment