Commit 1f3cb663 by Richard Sandiford Committed by Richard Sandiford

Handle vector boolean types when calculating the SLP unroll factor

The SLP unrolling factor is calculated by finding the smallest
scalar type for each SLP statement and taking the number of required
lanes from the vector versions of those scalar types.  E.g. for an
int32->int64 conversion, it's the vector of int32s rather than the
vector of int64s that determines the unroll factor.

We rely on tree-vect-patterns.c to replace boolean operations like:

   bool a, b, c;
   a = b & c;

with integer operations of whatever the best size is in context.
E.g. if b and c are fed by comparisons of ints, a, b and c will become
the appropriate size for an int comparison.  For most targets this means
that a, b and c will end up as int-sized themselves, but on targets like
SVE and AVX512 with packed vector booleans, they'll instead become a
small bitfield like :1, padded to a byte for memory purposes.
The SLP code would then take these scalar types and try to calculate
the vector type for them, causing the unroll factor to be much higher
than necessary.

This patch tries to make the SLP code use the same approach as the
loop vectorizer, by splitting out the code that calculates the
statement vector type and the vector type that should be used for
the number of units.

2018-05-16  Richard Sandiford  <richard.sandiford@linaro.org>

gcc/
	* tree-vectorizer.h (vect_get_vector_types_for_stmt): Declare.
	(vect_get_mask_type_for_stmt): Likewise.
	* tree-vect-slp.c (vect_two_operations_perm_ok_p): New function,
	split out from...
	(vect_build_slp_tree_1): ...here.  Use vect_get_vector_types_for_stmt
	to determine the statement's vector type and the vector type that
	should be used for calculating nunits.  Deal with cases in which
	the type has to be deferred.
	(vect_slp_analyze_node_operations): Use vect_get_vector_types_for_stmt
	and vect_get_mask_type_for_stmt to calculate STMT_VINFO_VECTYPE.
	* tree-vect-loop.c (vect_determine_vf_for_stmt_1)
	(vect_determine_vf_for_stmt): New functions, split out from...
	(vect_determine_vectorization_factor): ...here.
	* tree-vect-stmts.c (vect_get_vector_types_for_stmt)
	(vect_get_mask_type_for_stmt): New functions, split out from
	vect_determine_vectorization_factor.

gcc/testsuite/
	* gcc.target/aarch64/sve/vcond_10.c: New test.
	* gcc.target/aarch64/sve/vcond_10_run.c: Likewise.
	* gcc.target/aarch64/sve/vcond_11.c: Likewise.
	* gcc.target/aarch64/sve/vcond_11_run.c: Likewise.

From-SVN: r260287
parent c448fede
2018-05-16 Richard Sandiford <richard.sandiford@linaro.org>
* tree-vectorizer.h (vect_get_vector_types_for_stmt): Declare.
(vect_get_mask_type_for_stmt): Likewise.
* tree-vect-slp.c (vect_two_operations_perm_ok_p): New function,
split out from...
(vect_build_slp_tree_1): ...here. Use vect_get_vector_types_for_stmt
to determine the statement's vector type and the vector type that
should be used for calculating nunits. Deal with cases in which
the type has to be deferred.
(vect_slp_analyze_node_operations): Use vect_get_vector_types_for_stmt
and vect_get_mask_type_for_stmt to calculate STMT_VINFO_VECTYPE.
* tree-vect-loop.c (vect_determine_vf_for_stmt_1)
(vect_determine_vf_for_stmt): New functions, split out from...
(vect_determine_vectorization_factor): ...here.
* tree-vect-stmts.c (vect_get_vector_types_for_stmt)
(vect_get_mask_type_for_stmt): New functions, split out from
vect_determine_vectorization_factor.
2018-05-16 Richard Biener <rguenther@suse.de> 2018-05-16 Richard Biener <rguenther@suse.de>
* tree-cfg.c (verify_gimple_assign_ternary): Properly * tree-cfg.c (verify_gimple_assign_ternary): Properly
......
2018-05-16 Richard Sandiford <richard.sandiford@linaro.org>
* gcc.target/aarch64/sve/vcond_10.c: New test.
* gcc.target/aarch64/sve/vcond_10_run.c: Likewise.
* gcc.target/aarch64/sve/vcond_11.c: Likewise.
* gcc.target/aarch64/sve/vcond_11_run.c: Likewise.
2018-05-15 Martin Sebor <msebor@redhat.com> 2018-05-15 Martin Sebor <msebor@redhat.com>
PR tree-optimization/85753 PR tree-optimization/85753
......
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
#include <stdint.h>
#define DEF_LOOP(TYPE) \
void __attribute__ ((noinline, noclone)) \
test_##TYPE (TYPE *a, TYPE a1, TYPE a2, TYPE a3, TYPE a4, int n) \
{ \
for (int i = 0; i < n; i += 2) \
{ \
a[i] = a[i] >= 1 && a[i] != 3 ? a1 : a2; \
a[i + 1] = a[i + 1] >= 1 && a[i + 1] != 3 ? a3 : a4; \
} \
}
#define FOR_EACH_TYPE(T) \
T (int8_t) \
T (uint8_t) \
T (int16_t) \
T (uint16_t) \
T (int32_t) \
T (uint32_t) \
T (int64_t) \
T (uint64_t) \
T (_Float16) \
T (float) \
T (double)
FOR_EACH_TYPE (DEF_LOOP)
/* { dg-final { scan-assembler-times {\tld1b\t} 2 } } */
/* { dg-final { scan-assembler-times {\tld1h\t} 3 } } */
/* { dg-final { scan-assembler-times {\tld1w\t} 3 } } */
/* { dg-final { scan-assembler-times {\tld1d\t} 3 } } */
/* { dg-final { scan-assembler-times {\tsel\tz[0-9]} 11 } } */
/* { dg-do run { target aarch64_sve_hw } } */
/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
#include "vcond_10.c"
#define N 133
#define TEST_LOOP(TYPE) \
{ \
TYPE a[N]; \
for (int i = 0; i < N; ++i) \
a[i] = i % 7; \
test_##TYPE (a, 10, 11, 12, 13, N); \
for (int i = 0; i < N; ++i) \
if (a[i] != 10 + (i & 1) * 2 + (i % 7 == 0 || i % 7 == 3)) \
__builtin_abort (); \
}
int
main (void)
{
FOR_EACH_TYPE (TEST_LOOP);
return 0;
}
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
#include <stdint.h>
#define DEF_LOOP(TYPE) \
void __attribute__ ((noinline, noclone)) \
test_##TYPE (int *restrict a, TYPE *restrict b, int a1, int a2, \
int a3, int a4, int n) \
{ \
for (int i = 0; i < n; i += 2) \
{ \
a[i] = a[i] >= 1 & b[i] != 3 ? a1 : a2; \
a[i + 1] = a[i + 1] >= 1 & b[i + 1] != 3 ? a3 : a4; \
} \
}
#define FOR_EACH_TYPE(T) \
T (int8_t) \
T (uint8_t) \
T (int16_t) \
T (uint16_t) \
T (int64_t) \
T (uint64_t) \
T (double)
FOR_EACH_TYPE (DEF_LOOP)
/* { dg-final { scan-assembler-times {\tld1b\t} 2 } } */
/* { dg-final { scan-assembler-times {\tld1h\t} 2 } } */
/* 4 for each 8-bit function, 2 for each 16-bit function, 1 for
each 64-bit function. */
/* { dg-final { scan-assembler-times {\tld1w\t} 15 } } */
/* 3 64-bit functions * 2 64-bit vectors per 32-bit vector. */
/* { dg-final { scan-assembler-times {\tld1d\t} 6 } } */
/* { dg-final { scan-assembler-times {\tsel\tz[0-9]} 15 } } */
/* { dg-do run { target aarch64_sve_hw } } */
/* { dg-options "-O2 -ftree-vectorize -march=armv8-a+sve" } */
#include "vcond_11.c"
#define N 133
#define TEST_LOOP(TYPE) \
{ \
int a[N]; \
TYPE b[N]; \
for (int i = 0; i < N; ++i) \
{ \
a[i] = i % 5; \
b[i] = i % 7; \
} \
test_##TYPE (a, b, 10, 11, 12, 13, N); \
for (int i = 0; i < N; ++i) \
if (a[i] != 10 + (i & 1) * 2 + (i % 5 == 0 || i % 7 == 3)) \
__builtin_abort (); \
}
int
main (void)
{
FOR_EACH_TYPE (TEST_LOOP);
return 0;
}
...@@ -608,6 +608,33 @@ vect_record_max_nunits (vec_info *vinfo, gimple *stmt, unsigned int group_size, ...@@ -608,6 +608,33 @@ vect_record_max_nunits (vec_info *vinfo, gimple *stmt, unsigned int group_size,
return true; return true;
} }
/* STMTS is a group of GROUP_SIZE SLP statements in which some
statements do the same operation as the first statement and in which
the others do ALT_STMT_CODE. Return true if we can take one vector
of the first operation and one vector of the second and permute them
to get the required result. VECTYPE is the type of the vector that
would be permuted. */
static bool
vect_two_operations_perm_ok_p (vec<gimple *> stmts, unsigned int group_size,
tree vectype, tree_code alt_stmt_code)
{
unsigned HOST_WIDE_INT count;
if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&count))
return false;
vec_perm_builder sel (count, count, 1);
for (unsigned int i = 0; i < count; ++i)
{
unsigned int elt = i;
if (gimple_assign_rhs_code (stmts[i % group_size]) == alt_stmt_code)
elt += count;
sel.quick_push (elt);
}
vec_perm_indices indices (sel, 2, count);
return can_vec_perm_const_p (TYPE_MODE (vectype), indices);
}
/* Verify if the scalar stmts STMTS are isomorphic, require data /* Verify if the scalar stmts STMTS are isomorphic, require data
permutation or are of unsupported types of operation. Return permutation or are of unsupported types of operation. Return
true if they are, otherwise return false and indicate in *MATCHES true if they are, otherwise return false and indicate in *MATCHES
...@@ -636,17 +663,17 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, ...@@ -636,17 +663,17 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
enum tree_code first_cond_code = ERROR_MARK; enum tree_code first_cond_code = ERROR_MARK;
tree lhs; tree lhs;
bool need_same_oprnds = false; bool need_same_oprnds = false;
tree vectype = NULL_TREE, scalar_type, first_op1 = NULL_TREE; tree vectype = NULL_TREE, first_op1 = NULL_TREE;
optab optab; optab optab;
int icode; int icode;
machine_mode optab_op2_mode; machine_mode optab_op2_mode;
machine_mode vec_mode; machine_mode vec_mode;
HOST_WIDE_INT dummy;
gimple *first_load = NULL, *prev_first_load = NULL; gimple *first_load = NULL, *prev_first_load = NULL;
/* For every stmt in NODE find its def stmt/s. */ /* For every stmt in NODE find its def stmt/s. */
FOR_EACH_VEC_ELT (stmts, i, stmt) FOR_EACH_VEC_ELT (stmts, i, stmt)
{ {
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
swap[i] = 0; swap[i] = 0;
matches[i] = false; matches[i] = false;
...@@ -685,15 +712,19 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, ...@@ -685,15 +712,19 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
return false; return false;
} }
scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy); tree nunits_vectype;
vectype = get_vectype_for_scalar_type (scalar_type); if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
if (!vect_record_max_nunits (vinfo, stmt, group_size, vectype, &nunits_vectype)
max_nunits)) || (nunits_vectype
&& !vect_record_max_nunits (vinfo, stmt, group_size,
nunits_vectype, max_nunits)))
{ {
/* Fatal mismatch. */ /* Fatal mismatch. */
matches[0] = false; matches[0] = false;
return false; return false;
} }
gcc_assert (vectype);
if (gcall *call_stmt = dyn_cast <gcall *> (stmt)) if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
{ {
...@@ -730,6 +761,17 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, ...@@ -730,6 +761,17 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
|| rhs_code == LROTATE_EXPR || rhs_code == LROTATE_EXPR
|| rhs_code == RROTATE_EXPR) || rhs_code == RROTATE_EXPR)
{ {
if (vectype == boolean_type_node)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"Build SLP failed: shift of a"
" boolean.\n");
/* Fatal mismatch. */
matches[0] = false;
return false;
}
vec_mode = TYPE_MODE (vectype); vec_mode = TYPE_MODE (vectype);
/* First see if we have a vector/vector shift. */ /* First see if we have a vector/vector shift. */
...@@ -973,29 +1015,12 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, ...@@ -973,29 +1015,12 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
/* If we allowed a two-operation SLP node verify the target can cope /* If we allowed a two-operation SLP node verify the target can cope
with the permute we are going to use. */ with the permute we are going to use. */
poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
if (alt_stmt_code != ERROR_MARK if (alt_stmt_code != ERROR_MARK
&& TREE_CODE_CLASS (alt_stmt_code) != tcc_reference) && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
{ {
unsigned HOST_WIDE_INT count; if (vectype == boolean_type_node
if (!nunits.is_constant (&count)) || !vect_two_operations_perm_ok_p (stmts, group_size,
{ vectype, alt_stmt_code))
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"Build SLP failed: different operations "
"not allowed with variable-length SLP.\n");
return false;
}
vec_perm_builder sel (count, count, 1);
for (i = 0; i < count; ++i)
{
unsigned int elt = i;
if (gimple_assign_rhs_code (stmts[i % group_size]) == alt_stmt_code)
elt += count;
sel.quick_push (elt);
}
vec_perm_indices indices (sel, 2, count);
if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
{ {
for (i = 0; i < group_size; ++i) for (i = 0; i < group_size; ++i)
if (gimple_assign_rhs_code (stmts[i]) == alt_stmt_code) if (gimple_assign_rhs_code (stmts[i]) == alt_stmt_code)
...@@ -2759,36 +2784,18 @@ vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node, ...@@ -2759,36 +2784,18 @@ vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
if (bb_vinfo if (bb_vinfo
&& ! STMT_VINFO_DATA_REF (stmt_info)) && ! STMT_VINFO_DATA_REF (stmt_info))
{ {
gcc_assert (PURE_SLP_STMT (stmt_info)); tree vectype, nunits_vectype;
if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
tree scalar_type = TREE_TYPE (gimple_get_lhs (stmt)); &nunits_vectype))
if (dump_enabled_p ()) /* We checked this when building the node. */
{ gcc_unreachable ();
dump_printf_loc (MSG_NOTE, vect_location, if (vectype == boolean_type_node)
"get vectype for scalar type: ");
dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
dump_printf (MSG_NOTE, "\n");
}
tree vectype = get_vectype_for_scalar_type (scalar_type);
if (!vectype)
{
if (dump_enabled_p ())
{
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"not SLPed: unsupported data-type ");
dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
scalar_type);
dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
return false;
}
if (dump_enabled_p ())
{ {
dump_printf_loc (MSG_NOTE, vect_location, "vectype: "); vectype = vect_get_mask_type_for_stmt (stmt_info);
dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype); if (!vectype)
dump_printf (MSG_NOTE, "\n"); /* vect_get_mask_type_for_stmt has already explained the
failure. */
return false;
} }
gimple *sstmt; gimple *sstmt;
......
...@@ -10520,3 +10520,311 @@ vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index, ...@@ -10520,3 +10520,311 @@ vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
gimple_seq_add_stmt (seq, call); gimple_seq_add_stmt (seq, call);
return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp); return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
} }
/* Try to compute the vector types required to vectorize STMT_INFO,
returning true on success and false if vectorization isn't possible.
On success:
- Set *STMT_VECTYPE_OUT to:
- NULL_TREE if the statement doesn't need to be vectorized;
- boolean_type_node if the statement is a boolean operation whose
vector type can only be determined once all the other vector types
are known; and
- the equivalent of STMT_VINFO_VECTYPE otherwise.
- Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
number of units needed to vectorize STMT_INFO, or NULL_TREE if the
statement does not help to determine the overall number of units. */
bool
vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
tree *stmt_vectype_out,
tree *nunits_vectype_out)
{
gimple *stmt = stmt_info->stmt;
*stmt_vectype_out = NULL_TREE;
*nunits_vectype_out = NULL_TREE;
if (gimple_get_lhs (stmt) == NULL_TREE
/* MASK_STORE has no lhs, but is ok. */
&& !gimple_call_internal_p (stmt, IFN_MASK_STORE))
{
if (is_a <gcall *> (stmt))
{
/* Ignore calls with no lhs. These must be calls to
#pragma omp simd functions, and what vectorization factor
it really needs can't be determined until
vectorizable_simd_clone_call. */
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"defer to SIMD clone analysis.\n");
return true;
}
if (dump_enabled_p ())
{
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"not vectorized: irregular stmt.");
dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
}
return false;
}
if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
{
if (dump_enabled_p ())
{
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"not vectorized: vector stmt in loop:");
dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
}
return false;
}
tree vectype;
tree scalar_type = NULL_TREE;
if (STMT_VINFO_VECTYPE (stmt_info))
*stmt_vectype_out = vectype = STMT_VINFO_VECTYPE (stmt_info);
else
{
gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
else
scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
/* Pure bool ops don't participate in number-of-units computation.
For comparisons use the types being compared. */
if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type)
&& is_gimple_assign (stmt)
&& gimple_assign_rhs_code (stmt) != COND_EXPR)
{
*stmt_vectype_out = boolean_type_node;
tree rhs1 = gimple_assign_rhs1 (stmt);
if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
&& !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
scalar_type = TREE_TYPE (rhs1);
else
{
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"pure bool operation.\n");
return true;
}
}
if (dump_enabled_p ())
{
dump_printf_loc (MSG_NOTE, vect_location,
"get vectype for scalar type: ");
dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
dump_printf (MSG_NOTE, "\n");
}
vectype = get_vectype_for_scalar_type (scalar_type);
if (!vectype)
{
if (dump_enabled_p ())
{
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"not vectorized: unsupported data-type ");
dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
scalar_type);
dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
return false;
}
if (!*stmt_vectype_out)
*stmt_vectype_out = vectype;
if (dump_enabled_p ())
{
dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
dump_printf (MSG_NOTE, "\n");
}
}
/* Don't try to compute scalar types if the stmt produces a boolean
vector; use the existing vector type instead. */
tree nunits_vectype;
if (VECTOR_BOOLEAN_TYPE_P (vectype))
nunits_vectype = vectype;
else
{
/* The number of units is set according to the smallest scalar
type (or the largest vector size, but we only support one
vector size per vectorization). */
if (*stmt_vectype_out != boolean_type_node)
{
HOST_WIDE_INT dummy;
scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
}
if (dump_enabled_p ())
{
dump_printf_loc (MSG_NOTE, vect_location,
"get vectype for scalar type: ");
dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
dump_printf (MSG_NOTE, "\n");
}
nunits_vectype = get_vectype_for_scalar_type (scalar_type);
}
if (!nunits_vectype)
{
if (dump_enabled_p ())
{
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"not vectorized: unsupported data-type ");
dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
return false;
}
if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
GET_MODE_SIZE (TYPE_MODE (nunits_vectype))))
{
if (dump_enabled_p ())
{
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"not vectorized: different sized vector "
"types in statement, ");
dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, vectype);
dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, nunits_vectype);
dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
return false;
}
if (dump_enabled_p ())
{
dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
dump_generic_expr (MSG_NOTE, TDF_SLIM, nunits_vectype);
dump_printf (MSG_NOTE, "\n");
dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
dump_printf (MSG_NOTE, "\n");
}
*nunits_vectype_out = nunits_vectype;
return true;
}
/* Try to determine the correct vector type for STMT_INFO, which is a
statement that produces a scalar boolean result. Return the vector
type on success, otherwise return NULL_TREE. */
tree
vect_get_mask_type_for_stmt (stmt_vec_info stmt_info)
{
gimple *stmt = stmt_info->stmt;
tree mask_type = NULL;
tree vectype, scalar_type;
if (is_gimple_assign (stmt)
&& TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
&& !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt))))
{
scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
mask_type = get_mask_type_for_scalar_type (scalar_type);
if (!mask_type)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"not vectorized: unsupported mask\n");
return NULL_TREE;
}
}
else
{
tree rhs;
ssa_op_iter iter;
gimple *def_stmt;
enum vect_def_type dt;
FOR_EACH_SSA_TREE_OPERAND (rhs, stmt, iter, SSA_OP_USE)
{
if (!vect_is_simple_use (rhs, stmt_info->vinfo,
&def_stmt, &dt, &vectype))
{
if (dump_enabled_p ())
{
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"not vectorized: can't compute mask type "
"for statement, ");
dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt,
0);
}
return NULL_TREE;
}
/* No vectype probably means external definition.
Allow it in case there is another operand which
allows to determine mask type. */
if (!vectype)
continue;
if (!mask_type)
mask_type = vectype;
else if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_type),
TYPE_VECTOR_SUBPARTS (vectype)))
{
if (dump_enabled_p ())
{
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"not vectorized: different sized masks "
"types in statement, ");
dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
mask_type);
dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
vectype);
dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
return NULL_TREE;
}
else if (VECTOR_BOOLEAN_TYPE_P (mask_type)
!= VECTOR_BOOLEAN_TYPE_P (vectype))
{
if (dump_enabled_p ())
{
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"not vectorized: mixed mask and "
"nonmask vector types in statement, ");
dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
mask_type);
dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
vectype);
dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
return NULL_TREE;
}
}
/* We may compare boolean value loaded as vector of integers.
Fix mask_type in such case. */
if (mask_type
&& !VECTOR_BOOLEAN_TYPE_P (mask_type)
&& gimple_code (stmt) == GIMPLE_ASSIGN
&& TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison)
mask_type = build_same_sized_truth_vector_type (mask_type);
}
/* No mask_type should mean loop invariant predicate.
This is probably a subject for optimization in if-conversion. */
if (!mask_type && dump_enabled_p ())
{
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"not vectorized: can't compute mask type "
"for statement, ");
dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
}
return mask_type;
}
...@@ -1467,6 +1467,8 @@ extern tree vect_gen_perm_mask_checked (tree, const vec_perm_indices &); ...@@ -1467,6 +1467,8 @@ extern tree vect_gen_perm_mask_checked (tree, const vec_perm_indices &);
extern void optimize_mask_stores (struct loop*); extern void optimize_mask_stores (struct loop*);
extern gcall *vect_gen_while (tree, tree, tree); extern gcall *vect_gen_while (tree, tree, tree);
extern tree vect_gen_while_not (gimple_seq *, tree, tree, tree); extern tree vect_gen_while_not (gimple_seq *, tree, tree, tree);
extern bool vect_get_vector_types_for_stmt (stmt_vec_info, tree *, tree *);
extern tree vect_get_mask_type_for_stmt (stmt_vec_info);
/* In tree-vect-data-refs.c. */ /* In tree-vect-data-refs.c. */
extern bool vect_can_force_dr_alignment_p (const_tree, unsigned int); extern bool vect_can_force_dr_alignment_p (const_tree, unsigned int);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment