Commit c3a8f964 by Richard Sandiford Committed by Richard Sandiford

Make vectorizable_load/store handle IFN_MASK_LOAD/STORE

After the previous patches, it's easier to see that the remaining
inlined transform code in vectorizable_mask_load_store is just a
cut-down version of the VMAT_CONTIGUOUS handling in vectorizable_load
and vectorizable_store.  This patch therefore makes those functions
handle masked loads and stores instead.

This makes it easier to handle more forms of masked load and store
without duplicating logic from the unmasked forms.  It also helps with
support for fully-masked loops.

2018-01-03  Richard Sandiford  <richard.sandiford@linaro.org>

gcc/
	* tree-vect-stmts.c (vect_get_store_rhs): New function.
	(vectorizable_mask_load_store): Delete.
	(vectorizable_call): Return false for masked loads and stores.
	(vectorizable_store): Handle IFN_MASK_STORE.  Use vect_get_store_rhs
	instead of gimple_assign_rhs1.
	(vectorizable_load): Handle IFN_MASK_LOAD.
	(vect_transform_stmt): Don't set is_store for call_vec_info_type.

From-SVN: r256216
parent c48d2d35
2018-01-03 Richard Sandiford <richard.sandiford@linaro.org> 2018-01-03 Richard Sandiford <richard.sandiford@linaro.org>
* tree-vect-stmts.c (vect_get_store_rhs): New function.
(vectorizable_mask_load_store): Delete.
(vectorizable_call): Return false for masked loads and stores.
(vectorizable_store): Handle IFN_MASK_STORE. Use vect_get_store_rhs
instead of gimple_assign_rhs1.
(vectorizable_load): Handle IFN_MASK_LOAD.
(vect_transform_stmt): Don't set is_store for call_vec_info_type.
2018-01-03 Richard Sandiford <richard.sandiford@linaro.org>
* tree-vect-stmts.c (vect_build_gather_load_calls): New function, * tree-vect-stmts.c (vect_build_gather_load_calls): New function,
split out from.., split out from..,
(vectorizable_mask_load_store): ...here. (vectorizable_mask_load_store): ...here.
......
...@@ -1726,6 +1726,26 @@ perm_mask_for_reverse (tree vectype) ...@@ -1726,6 +1726,26 @@ perm_mask_for_reverse (tree vectype)
return vect_gen_perm_mask_checked (vectype, indices); return vect_gen_perm_mask_checked (vectype, indices);
} }
/* STMT is either a masked or unconditional store. Return the value
being stored. */
static tree
vect_get_store_rhs (gimple *stmt)
{
if (gassign *assign = dyn_cast <gassign *> (stmt))
{
gcc_assert (gimple_assign_single_p (assign));
return gimple_assign_rhs1 (assign);
}
if (gcall *call = dyn_cast <gcall *> (stmt))
{
internal_fn ifn = gimple_call_internal_fn (call);
gcc_assert (ifn == IFN_MASK_STORE);
return gimple_call_arg (stmt, 3);
}
gcc_unreachable ();
}
/* A subroutine of get_load_store_type, with a subset of the same /* A subroutine of get_load_store_type, with a subset of the same
arguments. Handle the case where STMT is part of a grouped load arguments. Handle the case where STMT is part of a grouped load
or store. or store.
...@@ -2398,251 +2418,6 @@ vect_build_gather_load_calls (gimple *stmt, gimple_stmt_iterator *gsi, ...@@ -2398,251 +2418,6 @@ vect_build_gather_load_calls (gimple *stmt, gimple_stmt_iterator *gsi,
} }
} }
/* Function vectorizable_mask_load_store.
Check if STMT performs a conditional load or store that can be vectorized.
If VEC_STMT is also passed, vectorize the STMT: create a vectorized
stmt to replace it, put it in VEC_STMT, and insert it at GSI.
Return FALSE if not a vectorizable STMT, TRUE otherwise. */
static bool
vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
gimple **vec_stmt, slp_tree slp_node)
{
tree vec_dest = NULL;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
stmt_vec_info prev_stmt_info;
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
tree rhs_vectype = NULL_TREE;
tree mask_vectype;
tree elem_type;
gimple *new_stmt;
tree dummy;
tree dataref_ptr = NULL_TREE;
gimple *ptr_incr;
int ncopies;
int i;
bool inv_p;
gather_scatter_info gs_info;
vec_load_store_type vls_type;
tree mask;
gimple *def_stmt;
enum vect_def_type dt;
if (slp_node != NULL)
return false;
ncopies = vect_get_num_copies (loop_vinfo, vectype);
gcc_assert (ncopies >= 1);
/* FORNOW. This restriction should be relaxed. */
if (nested_in_vect_loop && ncopies > 1)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"multiple types in nested loop.");
return false;
}
if (!STMT_VINFO_RELEVANT_P (stmt_info))
return false;
if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
&& ! vec_stmt)
return false;
if (!STMT_VINFO_DATA_REF (stmt_info))
return false;
mask = gimple_call_arg (stmt, 2);
if (!vect_check_load_store_mask (stmt, mask, &mask_vectype))
return false;
elem_type = TREE_TYPE (vectype);
if (gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
{
tree rhs = gimple_call_arg (stmt, 3);
if (!vect_check_store_rhs (stmt, rhs, &rhs_vectype, &vls_type))
return false;
}
else
vls_type = VLS_LOAD;
vect_memory_access_type memory_access_type;
if (!get_load_store_type (stmt, vectype, false, vls_type, ncopies,
&memory_access_type, &gs_info))
return false;
if (memory_access_type == VMAT_GATHER_SCATTER)
{
tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
tree masktype
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
if (TREE_CODE (masktype) == INTEGER_TYPE)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"masked gather with integer mask not supported.");
return false;
}
}
else if (memory_access_type != VMAT_CONTIGUOUS)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"unsupported access type for masked %s.\n",
vls_type == VLS_LOAD ? "load" : "store");
return false;
}
else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
|| !can_vec_mask_load_store_p (TYPE_MODE (vectype),
TYPE_MODE (mask_vectype),
vls_type == VLS_LOAD))
return false;
if (!vec_stmt) /* transformation not required. */
{
STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
if (vls_type == VLS_LOAD)
vect_model_load_cost (stmt_info, ncopies, memory_access_type,
NULL, NULL, NULL);
else
vect_model_store_cost (stmt_info, ncopies, memory_access_type,
vls_type, NULL, NULL, NULL);
return true;
}
gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
/* Transform. */
if (memory_access_type == VMAT_GATHER_SCATTER)
{
vect_build_gather_load_calls (stmt, gsi, vec_stmt, &gs_info, mask);
return true;
}
else if (vls_type != VLS_LOAD)
{
tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
prev_stmt_info = NULL;
LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
for (i = 0; i < ncopies; i++)
{
unsigned align, misalign;
if (i == 0)
{
tree rhs = gimple_call_arg (stmt, 3);
vec_rhs = vect_get_vec_def_for_operand (rhs, stmt);
vec_mask = vect_get_vec_def_for_operand (mask, stmt,
mask_vectype);
/* We should have catched mismatched types earlier. */
gcc_assert (useless_type_conversion_p (vectype,
TREE_TYPE (vec_rhs)));
dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
NULL_TREE, &dummy, gsi,
&ptr_incr, false, &inv_p);
gcc_assert (!inv_p);
}
else
{
vect_is_simple_use (vec_rhs, loop_vinfo, &def_stmt, &dt);
vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
TYPE_SIZE_UNIT (vectype));
}
align = DR_TARGET_ALIGNMENT (dr);
if (aligned_access_p (dr))
misalign = 0;
else if (DR_MISALIGNMENT (dr) == -1)
{
align = TYPE_ALIGN_UNIT (elem_type);
misalign = 0;
}
else
misalign = DR_MISALIGNMENT (dr);
set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
misalign);
tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
misalign ? least_bit_hwi (misalign) : align);
gcall *call
= gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
ptr, vec_mask, vec_rhs);
gimple_call_set_nothrow (call, true);
new_stmt = call;
vect_finish_stmt_generation (stmt, new_stmt, gsi);
if (i == 0)
STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
else
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
prev_stmt_info = vinfo_for_stmt (new_stmt);
}
}
else
{
tree vec_mask = NULL_TREE;
prev_stmt_info = NULL;
vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
for (i = 0; i < ncopies; i++)
{
unsigned align, misalign;
if (i == 0)
{
vec_mask = vect_get_vec_def_for_operand (mask, stmt,
mask_vectype);
dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
NULL_TREE, &dummy, gsi,
&ptr_incr, false, &inv_p);
gcc_assert (!inv_p);
}
else
{
vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
TYPE_SIZE_UNIT (vectype));
}
align = DR_TARGET_ALIGNMENT (dr);
if (aligned_access_p (dr))
misalign = 0;
else if (DR_MISALIGNMENT (dr) == -1)
{
align = TYPE_ALIGN_UNIT (elem_type);
misalign = 0;
}
else
misalign = DR_MISALIGNMENT (dr);
set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
misalign);
tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
misalign ? least_bit_hwi (misalign) : align);
gcall *call
= gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
ptr, vec_mask);
gimple_call_set_lhs (call, make_ssa_name (vec_dest));
gimple_call_set_nothrow (call, true);
vect_finish_stmt_generation (stmt, call, gsi);
if (i == 0)
STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = call;
else
STMT_VINFO_RELATED_STMT (prev_stmt_info) = call;
prev_stmt_info = vinfo_for_stmt (call);
}
}
return true;
}
/* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */ /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
static bool static bool
...@@ -2833,8 +2608,8 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt, ...@@ -2833,8 +2608,8 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
if (gimple_call_internal_p (stmt) if (gimple_call_internal_p (stmt)
&& (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
|| gimple_call_internal_fn (stmt) == IFN_MASK_STORE)) || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
return vectorizable_mask_load_store (stmt, gsi, vec_stmt, /* Handled by vectorizable_load and vectorizable_store. */
slp_node); return false;
if (gimple_call_lhs (stmt) == NULL_TREE if (gimple_call_lhs (stmt) == NULL_TREE
|| TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME) || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
...@@ -5851,7 +5626,6 @@ static bool ...@@ -5851,7 +5626,6 @@ static bool
vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
slp_tree slp_node) slp_tree slp_node)
{ {
tree scalar_dest;
tree data_ref; tree data_ref;
tree op; tree op;
tree vec_oprnd = NULL_TREE; tree vec_oprnd = NULL_TREE;
...@@ -5900,28 +5674,48 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, ...@@ -5900,28 +5674,48 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
/* Is vectorizable store? */ /* Is vectorizable store? */
if (!is_gimple_assign (stmt)) tree mask = NULL_TREE, mask_vectype = NULL_TREE;
return false; if (is_gimple_assign (stmt))
{
tree scalar_dest = gimple_assign_lhs (stmt);
if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
&& is_pattern_stmt_p (stmt_info))
scalar_dest = TREE_OPERAND (scalar_dest, 0);
if (TREE_CODE (scalar_dest) != ARRAY_REF
&& TREE_CODE (scalar_dest) != BIT_FIELD_REF
&& TREE_CODE (scalar_dest) != INDIRECT_REF
&& TREE_CODE (scalar_dest) != COMPONENT_REF
&& TREE_CODE (scalar_dest) != IMAGPART_EXPR
&& TREE_CODE (scalar_dest) != REALPART_EXPR
&& TREE_CODE (scalar_dest) != MEM_REF)
return false;
}
else
{
gcall *call = dyn_cast <gcall *> (stmt);
if (!call || !gimple_call_internal_p (call, IFN_MASK_STORE))
return false;
scalar_dest = gimple_assign_lhs (stmt); if (slp_node != NULL)
if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR {
&& is_pattern_stmt_p (stmt_info)) if (dump_enabled_p ())
scalar_dest = TREE_OPERAND (scalar_dest, 0); dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
if (TREE_CODE (scalar_dest) != ARRAY_REF "SLP of masked stores not supported.\n");
&& TREE_CODE (scalar_dest) != BIT_FIELD_REF return false;
&& TREE_CODE (scalar_dest) != INDIRECT_REF }
&& TREE_CODE (scalar_dest) != COMPONENT_REF
&& TREE_CODE (scalar_dest) != IMAGPART_EXPR ref_type = TREE_TYPE (gimple_call_arg (call, 1));
&& TREE_CODE (scalar_dest) != REALPART_EXPR mask = gimple_call_arg (call, 2);
&& TREE_CODE (scalar_dest) != MEM_REF) if (!vect_check_load_store_mask (stmt, mask, &mask_vectype))
return false; return false;
}
op = vect_get_store_rhs (stmt);
/* Cannot have hybrid store SLP -- that would mean storing to the /* Cannot have hybrid store SLP -- that would mean storing to the
same location twice. */ same location twice. */
gcc_assert (slp == PURE_SLP_STMT (stmt_info)); gcc_assert (slp == PURE_SLP_STMT (stmt_info));
gcc_assert (gimple_assign_single_p (stmt));
tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE; tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
...@@ -5952,18 +5746,12 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, ...@@ -5952,18 +5746,12 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
return false; return false;
} }
op = gimple_assign_rhs1 (stmt);
if (!vect_check_store_rhs (stmt, op, &rhs_vectype, &vls_type)) if (!vect_check_store_rhs (stmt, op, &rhs_vectype, &vls_type))
return false; return false;
elem_type = TREE_TYPE (vectype); elem_type = TREE_TYPE (vectype);
vec_mode = TYPE_MODE (vectype); vec_mode = TYPE_MODE (vectype);
/* FORNOW. In some cases can vectorize even if data-type not supported
(e.g. - array initialization with 0). */
if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
return false;
if (!STMT_VINFO_DATA_REF (stmt_info)) if (!STMT_VINFO_DATA_REF (stmt_info))
return false; return false;
...@@ -5972,6 +5760,28 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, ...@@ -5972,6 +5760,28 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
&memory_access_type, &gs_info)) &memory_access_type, &gs_info))
return false; return false;
if (mask)
{
if (memory_access_type != VMAT_CONTIGUOUS)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"unsupported access type for masked store.\n");
return false;
}
if (!VECTOR_MODE_P (vec_mode)
|| !can_vec_mask_load_store_p (vec_mode, TYPE_MODE (mask_vectype),
false))
return false;
}
else
{
/* FORNOW. In some cases can vectorize even if data-type not supported
(e.g. - array initialization with 0). */
if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
return false;
}
if (!vec_stmt) /* transformation not required. */ if (!vec_stmt) /* transformation not required. */
{ {
STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type; STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
...@@ -5990,7 +5800,7 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, ...@@ -5990,7 +5800,7 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
if (memory_access_type == VMAT_GATHER_SCATTER) if (memory_access_type == VMAT_GATHER_SCATTER)
{ {
tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, op, src; tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl)); tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
tree rettype, srctype, ptrtype, idxtype, masktype, scaletype; tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
tree ptr, mask, var, scale, perm_mask = NULL_TREE; tree ptr, mask, var, scale, perm_mask = NULL_TREE;
...@@ -6069,7 +5879,7 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, ...@@ -6069,7 +5879,7 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
if (j == 0) if (j == 0)
{ {
src = vec_oprnd1 src = vec_oprnd1
= vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt), stmt); = vect_get_vec_def_for_operand (op, stmt);
op = vec_oprnd0 op = vec_oprnd0
= vect_get_vec_def_for_operand (gs_info.offset, stmt); = vect_get_vec_def_for_operand (gs_info.offset, stmt);
} }
...@@ -6169,7 +5979,7 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, ...@@ -6169,7 +5979,7 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0]; first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt); gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)); first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
op = gimple_assign_rhs1 (first_stmt); op = vect_get_store_rhs (first_stmt);
} }
else else
/* VEC_NUM is the number of vect stmts to be created for this /* VEC_NUM is the number of vect stmts to be created for this
...@@ -6338,7 +6148,7 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, ...@@ -6338,7 +6148,7 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype))); elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
for (j = 0; j < ncopies; j++) for (j = 0; j < ncopies; j++)
{ {
/* We've set op and dt above, from gimple_assign_rhs1(stmt), /* We've set op and dt above, from vect_get_store_rhs,
and first_stmt == stmt. */ and first_stmt == stmt. */
if (j == 0) if (j == 0)
{ {
...@@ -6350,8 +6160,7 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, ...@@ -6350,8 +6160,7 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
} }
else else
{ {
gcc_assert (gimple_assign_single_p (next_stmt)); op = vect_get_store_rhs (next_stmt);
op = gimple_assign_rhs1 (next_stmt);
vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt); vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
} }
} }
...@@ -6438,8 +6247,9 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, ...@@ -6438,8 +6247,9 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false); alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
gcc_assert (alignment_support_scheme); gcc_assert (alignment_support_scheme);
/* Targets with store-lane instructions must not require explicit /* Targets with store-lane instructions must not require explicit
realignment. */ realignment. vect_supportable_dr_alignment always returns either
gcc_assert (memory_access_type != VMAT_LOAD_STORE_LANES dr_aligned or dr_unaligned_supported for masked operations. */
gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES && !mask)
|| alignment_support_scheme == dr_aligned || alignment_support_scheme == dr_aligned
|| alignment_support_scheme == dr_unaligned_supported); || alignment_support_scheme == dr_unaligned_supported);
...@@ -6452,6 +6262,9 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, ...@@ -6452,6 +6262,9 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
else else
aggr_type = vectype; aggr_type = vectype;
if (mask)
LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
/* In case the vectorization factor (VF) is bigger than the number /* In case the vectorization factor (VF) is bigger than the number
of elements that we can fit in a vectype (nunits), we have to generate of elements that we can fit in a vectype (nunits), we have to generate
more than one vector stmt - i.e - we need to "unroll" the more than one vector stmt - i.e - we need to "unroll" the
...@@ -6492,6 +6305,7 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, ...@@ -6492,6 +6305,7 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
*/ */
prev_stmt_info = NULL; prev_stmt_info = NULL;
tree vec_mask = NULL_TREE;
for (j = 0; j < ncopies; j++) for (j = 0; j < ncopies; j++)
{ {
...@@ -6522,15 +6336,15 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, ...@@ -6522,15 +6336,15 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
Therefore, NEXT_STMT can't be NULL_TREE. In case that Therefore, NEXT_STMT can't be NULL_TREE. In case that
there is no interleaving, GROUP_SIZE is 1, and only one there is no interleaving, GROUP_SIZE is 1, and only one
iteration of the loop will be executed. */ iteration of the loop will be executed. */
gcc_assert (next_stmt op = vect_get_store_rhs (next_stmt);
&& gimple_assign_single_p (next_stmt));
op = gimple_assign_rhs1 (next_stmt);
vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt); vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
dr_chain.quick_push (vec_oprnd); dr_chain.quick_push (vec_oprnd);
oprnds.quick_push (vec_oprnd); oprnds.quick_push (vec_oprnd);
next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt)); next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
} }
if (mask)
vec_mask = vect_get_vec_def_for_operand (mask, stmt,
mask_vectype);
} }
/* We should have catched mismatched types earlier. */ /* We should have catched mismatched types earlier. */
...@@ -6575,6 +6389,11 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, ...@@ -6575,6 +6389,11 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
dr_chain[i] = vec_oprnd; dr_chain[i] = vec_oprnd;
oprnds[i] = vec_oprnd; oprnds[i] = vec_oprnd;
} }
if (mask)
{
vect_is_simple_use (vec_mask, vinfo, &def_stmt, &dt);
vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
}
if (dataref_offset) if (dataref_offset)
dataref_offset dataref_offset
= int_const_binop (PLUS_EXPR, dataref_offset, = int_const_binop (PLUS_EXPR, dataref_offset,
...@@ -6635,11 +6454,6 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, ...@@ -6635,11 +6454,6 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
vect_permute_store_chain(). */ vect_permute_store_chain(). */
vec_oprnd = result_chain[i]; vec_oprnd = result_chain[i];
data_ref = fold_build2 (MEM_REF, vectype,
dataref_ptr,
dataref_offset
? dataref_offset
: build_int_cst (ref_type, 0));
align = DR_TARGET_ALIGNMENT (first_dr); align = DR_TARGET_ALIGNMENT (first_dr);
if (aligned_access_p (first_dr)) if (aligned_access_p (first_dr))
misalign = 0; misalign = 0;
...@@ -6647,17 +6461,9 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, ...@@ -6647,17 +6461,9 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
{ {
align = dr_alignment (vect_dr_behavior (first_dr)); align = dr_alignment (vect_dr_behavior (first_dr));
misalign = 0; misalign = 0;
TREE_TYPE (data_ref)
= build_aligned_type (TREE_TYPE (data_ref),
align * BITS_PER_UNIT);
} }
else else
{ misalign = DR_MISALIGNMENT (first_dr);
TREE_TYPE (data_ref)
= build_aligned_type (TREE_TYPE (data_ref),
TYPE_ALIGN (elem_type));
misalign = DR_MISALIGNMENT (first_dr);
}
if (dataref_offset == NULL_TREE if (dataref_offset == NULL_TREE
&& TREE_CODE (dataref_ptr) == SSA_NAME) && TREE_CODE (dataref_ptr) == SSA_NAME)
set_ptr_info_alignment (get_ptr_info (dataref_ptr), align, set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
...@@ -6667,7 +6473,7 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, ...@@ -6667,7 +6473,7 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
{ {
tree perm_mask = perm_mask_for_reverse (vectype); tree perm_mask = perm_mask_for_reverse (vectype);
tree perm_dest tree perm_dest
= vect_create_destination_var (gimple_assign_rhs1 (stmt), = vect_create_destination_var (vect_get_store_rhs (stmt),
vectype); vectype);
tree new_temp = make_ssa_name (perm_dest); tree new_temp = make_ssa_name (perm_dest);
...@@ -6682,7 +6488,36 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, ...@@ -6682,7 +6488,36 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
} }
/* Arguments are ready. Create the new vector stmt. */ /* Arguments are ready. Create the new vector stmt. */
new_stmt = gimple_build_assign (data_ref, vec_oprnd); if (mask)
{
align = least_bit_hwi (misalign | align);
tree ptr = build_int_cst (ref_type, align);
gcall *call
= gimple_build_call_internal (IFN_MASK_STORE, 4,
dataref_ptr, ptr,
vec_mask, vec_oprnd);
gimple_call_set_nothrow (call, true);
new_stmt = call;
}
else
{
data_ref = fold_build2 (MEM_REF, vectype,
dataref_ptr,
dataref_offset
? dataref_offset
: build_int_cst (ref_type, 0));
if (aligned_access_p (first_dr))
;
else if (DR_MISALIGNMENT (first_dr) == -1)
TREE_TYPE (data_ref)
= build_aligned_type (TREE_TYPE (data_ref),
align * BITS_PER_UNIT);
else
TREE_TYPE (data_ref)
= build_aligned_type (TREE_TYPE (data_ref),
TYPE_ALIGN (elem_type));
new_stmt = gimple_build_assign (data_ref, vec_oprnd);
}
vect_finish_stmt_generation (stmt, new_stmt, gsi); vect_finish_stmt_generation (stmt, new_stmt, gsi);
if (slp) if (slp)
...@@ -6865,7 +6700,6 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, ...@@ -6865,7 +6700,6 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
int vec_num; int vec_num;
bool slp = (slp_node != NULL); bool slp = (slp_node != NULL);
bool slp_perm = false; bool slp_perm = false;
enum tree_code code;
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
poly_uint64 vf; poly_uint64 vf;
tree aggr_type; tree aggr_type;
...@@ -6880,24 +6714,46 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, ...@@ -6880,24 +6714,46 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
&& ! vec_stmt) && ! vec_stmt)
return false; return false;
/* Is vectorizable load? */ tree mask = NULL_TREE, mask_vectype = NULL_TREE;
if (!is_gimple_assign (stmt)) if (is_gimple_assign (stmt))
return false; {
scalar_dest = gimple_assign_lhs (stmt);
if (TREE_CODE (scalar_dest) != SSA_NAME)
return false;
scalar_dest = gimple_assign_lhs (stmt); tree_code code = gimple_assign_rhs_code (stmt);
if (TREE_CODE (scalar_dest) != SSA_NAME) if (code != ARRAY_REF
return false; && code != BIT_FIELD_REF
&& code != INDIRECT_REF
&& code != COMPONENT_REF
&& code != IMAGPART_EXPR
&& code != REALPART_EXPR
&& code != MEM_REF
&& TREE_CODE_CLASS (code) != tcc_declaration)
return false;
}
else
{
gcall *call = dyn_cast <gcall *> (stmt);
if (!call || !gimple_call_internal_p (call, IFN_MASK_LOAD))
return false;
code = gimple_assign_rhs_code (stmt); scalar_dest = gimple_call_lhs (call);
if (code != ARRAY_REF if (!scalar_dest)
&& code != BIT_FIELD_REF return false;
&& code != INDIRECT_REF
&& code != COMPONENT_REF if (slp_node != NULL)
&& code != IMAGPART_EXPR {
&& code != REALPART_EXPR if (dump_enabled_p ())
&& code != MEM_REF dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
&& TREE_CODE_CLASS (code) != tcc_declaration) "SLP of masked loads not supported.\n");
return false; return false;
}
mask = gimple_call_arg (call, 2);
if (!vect_check_load_store_mask (stmt, mask, &mask_vectype))
return false;
}
if (!STMT_VINFO_DATA_REF (stmt_info)) if (!STMT_VINFO_DATA_REF (stmt_info))
return false; return false;
...@@ -7008,6 +6864,38 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, ...@@ -7008,6 +6864,38 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
&memory_access_type, &gs_info)) &memory_access_type, &gs_info))
return false; return false;
if (mask)
{
if (memory_access_type == VMAT_CONTIGUOUS)
{
if (!VECTOR_MODE_P (TYPE_MODE (vectype))
|| !can_vec_mask_load_store_p (TYPE_MODE (vectype),
TYPE_MODE (mask_vectype), true))
return false;
}
else if (memory_access_type == VMAT_GATHER_SCATTER)
{
tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
tree masktype
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
if (TREE_CODE (masktype) == INTEGER_TYPE)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"masked gather with integer mask not"
" supported.");
return false;
}
}
else
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"unsupported access type for masked load.\n");
return false;
}
}
if (!vec_stmt) /* transformation not required. */ if (!vec_stmt) /* transformation not required. */
{ {
if (!slp) if (!slp)
...@@ -7034,7 +6922,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, ...@@ -7034,7 +6922,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
if (memory_access_type == VMAT_GATHER_SCATTER) if (memory_access_type == VMAT_GATHER_SCATTER)
{ {
vect_build_gather_load_calls (stmt, gsi, vec_stmt, &gs_info, NULL_TREE); vect_build_gather_load_calls (stmt, gsi, vec_stmt, &gs_info, mask);
return true; return true;
} }
...@@ -7471,6 +7359,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, ...@@ -7471,6 +7359,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
else else
aggr_type = vectype; aggr_type = vectype;
tree vec_mask = NULL_TREE;
prev_stmt_info = NULL; prev_stmt_info = NULL;
poly_uint64 group_elt = 0; poly_uint64 group_elt = 0;
for (j = 0; j < ncopies; j++) for (j = 0; j < ncopies; j++)
...@@ -7518,13 +7407,26 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, ...@@ -7518,13 +7407,26 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
offset, &dummy, gsi, &ptr_incr, offset, &dummy, gsi, &ptr_incr,
simd_lane_access_p, &inv_p, simd_lane_access_p, &inv_p,
byte_offset); byte_offset);
if (mask)
vec_mask = vect_get_vec_def_for_operand (mask, stmt,
mask_vectype);
} }
else if (dataref_offset)
dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
TYPE_SIZE_UNIT (aggr_type));
else else
dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt, {
TYPE_SIZE_UNIT (aggr_type)); if (dataref_offset)
dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
TYPE_SIZE_UNIT (aggr_type));
else
dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
TYPE_SIZE_UNIT (aggr_type));
if (mask)
{
gimple *def_stmt;
vect_def_type dt;
vect_is_simple_use (vec_mask, vinfo, &def_stmt, &dt);
vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
}
}
if (grouped_load || slp_perm) if (grouped_load || slp_perm)
dr_chain.create (vec_num); dr_chain.create (vec_num);
...@@ -7572,11 +7474,6 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, ...@@ -7572,11 +7474,6 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
{ {
unsigned int align, misalign; unsigned int align, misalign;
data_ref
= fold_build2 (MEM_REF, vectype, dataref_ptr,
dataref_offset
? dataref_offset
: build_int_cst (ref_type, 0));
align = DR_TARGET_ALIGNMENT (dr); align = DR_TARGET_ALIGNMENT (dr);
if (alignment_support_scheme == dr_aligned) if (alignment_support_scheme == dr_aligned)
{ {
...@@ -7587,21 +7484,44 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, ...@@ -7587,21 +7484,44 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
{ {
align = dr_alignment (vect_dr_behavior (first_dr)); align = dr_alignment (vect_dr_behavior (first_dr));
misalign = 0; misalign = 0;
TREE_TYPE (data_ref)
= build_aligned_type (TREE_TYPE (data_ref),
align * BITS_PER_UNIT);
} }
else else
{ misalign = DR_MISALIGNMENT (first_dr);
TREE_TYPE (data_ref)
= build_aligned_type (TREE_TYPE (data_ref),
TYPE_ALIGN (elem_type));
misalign = DR_MISALIGNMENT (first_dr);
}
if (dataref_offset == NULL_TREE if (dataref_offset == NULL_TREE
&& TREE_CODE (dataref_ptr) == SSA_NAME) && TREE_CODE (dataref_ptr) == SSA_NAME)
set_ptr_info_alignment (get_ptr_info (dataref_ptr), set_ptr_info_alignment (get_ptr_info (dataref_ptr),
align, misalign); align, misalign);
if (mask)
{
align = least_bit_hwi (misalign | align);
tree ptr = build_int_cst (ref_type, align);
gcall *call
= gimple_build_call_internal (IFN_MASK_LOAD, 3,
dataref_ptr, ptr,
vec_mask);
gimple_call_set_nothrow (call, true);
new_stmt = call;
data_ref = NULL_TREE;
}
else
{
data_ref
= fold_build2 (MEM_REF, vectype, dataref_ptr,
dataref_offset
? dataref_offset
: build_int_cst (ref_type, 0));
if (alignment_support_scheme == dr_aligned)
;
else if (DR_MISALIGNMENT (first_dr) == -1)
TREE_TYPE (data_ref)
= build_aligned_type (TREE_TYPE (data_ref),
align * BITS_PER_UNIT);
else
TREE_TYPE (data_ref)
= build_aligned_type (TREE_TYPE (data_ref),
TYPE_ALIGN (elem_type));
}
break; break;
} }
case dr_explicit_realign: case dr_explicit_realign:
...@@ -7677,9 +7597,11 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, ...@@ -7677,9 +7597,11 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
gcc_unreachable (); gcc_unreachable ();
} }
vec_dest = vect_create_destination_var (scalar_dest, vectype); vec_dest = vect_create_destination_var (scalar_dest, vectype);
new_stmt = gimple_build_assign (vec_dest, data_ref); /* DATA_REF is null if we've already built the statement. */
if (data_ref)
new_stmt = gimple_build_assign (vec_dest, data_ref);
new_temp = make_ssa_name (vec_dest, new_stmt); new_temp = make_ssa_name (vec_dest, new_stmt);
gimple_assign_set_lhs (new_stmt, new_temp); gimple_set_lhs (new_stmt, new_temp);
vect_finish_stmt_generation (stmt, new_stmt, gsi); vect_finish_stmt_generation (stmt, new_stmt, gsi);
/* 3. Handle explicit realignment if necessary/supported. /* 3. Handle explicit realignment if necessary/supported.
...@@ -8898,8 +8820,6 @@ vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi, ...@@ -8898,8 +8820,6 @@ vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
case call_vec_info_type: case call_vec_info_type:
done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node); done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
stmt = gsi_stmt (*gsi); stmt = gsi_stmt (*gsi);
if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
is_store = true;
break; break;
case call_simd_clone_vec_info_type: case call_simd_clone_vec_info_type:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment