Commit c48d2d35 by Richard Sandiford Committed by Richard Sandiford

Split gather load handling out of vectorizable_{mask_load_store,load}

vectorizable_mask_load_store and vectorizable_load used the same
code to build a gather load call, except that the former also
vectorised a mask argument and used it for both the merge and mask
inputs.  The latter instead used a merge input of zero and a mask
input of all-ones.  This patch splits it out into a subroutine.

2018-01-03  Richard Sandiford  <richard.sandiford@linaro.org>

gcc/
	* tree-vect-stmts.c (vect_build_gather_load_calls): New function,
	split out from..,
	(vectorizable_mask_load_store): ...here.
	(vectorizable_load): ...and here.

From-SVN: r256215
parent bc9587eb
2018-01-03 Richard Sandiford <richard.sandiford@linaro.org> 2018-01-03 Richard Sandiford <richard.sandiford@linaro.org>
* tree-vect-stmts.c (vect_build_gather_load_calls): New function,
split out from..,
(vectorizable_mask_load_store): ...here.
(vectorizable_load): ...and here.
2018-01-03 Richard Sandiford <richard.sandiford@linaro.org>
* tree-vect-stmts.c (vect_build_all_ones_mask) * tree-vect-stmts.c (vect_build_all_ones_mask)
(vect_build_zero_merge_argument): New functions, split out from... (vect_build_zero_merge_argument): New functions, split out from...
(vectorizable_load): ...here. (vectorizable_load): ...here.
......
...@@ -2194,154 +2194,39 @@ vect_build_zero_merge_argument (gimple *stmt, tree vectype) ...@@ -2194,154 +2194,39 @@ vect_build_zero_merge_argument (gimple *stmt, tree vectype)
return vect_init_vector (stmt, merge, vectype, NULL); return vect_init_vector (stmt, merge, vectype, NULL);
} }
/* Function vectorizable_mask_load_store. /* Build a gather load call while vectorizing STMT. Insert new instructions
before GSI and add them to VEC_STMT. GS_INFO describes the gather load
operation. If the load is conditional, MASK is the unvectorized
condition, otherwise MASK is null. */
Check if STMT performs a conditional load or store that can be vectorized. static void
If VEC_STMT is also passed, vectorize the STMT: create a vectorized vect_build_gather_load_calls (gimple *stmt, gimple_stmt_iterator *gsi,
stmt to replace it, put it in VEC_STMT, and insert it at GSI. gimple **vec_stmt, gather_scatter_info *gs_info,
Return FALSE if not a vectorizable STMT, TRUE otherwise. */ tree mask)
static bool
vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
gimple **vec_stmt, slp_tree slp_node)
{ {
tree vec_dest = NULL;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt); stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
stmt_vec_info prev_stmt_info;
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
tree vectype = STMT_VINFO_VECTYPE (stmt_info); tree vectype = STMT_VINFO_VECTYPE (stmt_info);
tree rhs_vectype = NULL_TREE;
tree mask_vectype;
tree elem_type;
gimple *new_stmt;
tree dummy;
tree dataref_ptr = NULL_TREE;
gimple *ptr_incr;
poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
int ncopies; int ncopies = vect_get_num_copies (loop_vinfo, vectype);
int i, j;
bool inv_p;
gather_scatter_info gs_info;
vec_load_store_type vls_type;
tree mask;
gimple *def_stmt;
enum vect_def_type dt;
if (slp_node != NULL)
return false;
ncopies = vect_get_num_copies (loop_vinfo, vectype);
gcc_assert (ncopies >= 1);
/* FORNOW. This restriction should be relaxed. */
if (nested_in_vect_loop && ncopies > 1)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"multiple types in nested loop.");
return false;
}
if (!STMT_VINFO_RELEVANT_P (stmt_info))
return false;
if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
&& ! vec_stmt)
return false;
if (!STMT_VINFO_DATA_REF (stmt_info))
return false;
mask = gimple_call_arg (stmt, 2);
if (!vect_check_load_store_mask (stmt, mask, &mask_vectype))
return false;
elem_type = TREE_TYPE (vectype);
if (gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
{
tree rhs = gimple_call_arg (stmt, 3);
if (!vect_check_store_rhs (stmt, rhs, &rhs_vectype, &vls_type))
return false;
}
else
vls_type = VLS_LOAD;
vect_memory_access_type memory_access_type;
if (!get_load_store_type (stmt, vectype, false, vls_type, ncopies,
&memory_access_type, &gs_info))
return false;
if (memory_access_type == VMAT_GATHER_SCATTER)
{
tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
tree masktype
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
if (TREE_CODE (masktype) == INTEGER_TYPE)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"masked gather with integer mask not supported.");
return false;
}
}
else if (memory_access_type != VMAT_CONTIGUOUS)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"unsupported access type for masked %s.\n",
vls_type == VLS_LOAD ? "load" : "store");
return false;
}
else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
|| !can_vec_mask_load_store_p (TYPE_MODE (vectype),
TYPE_MODE (mask_vectype),
vls_type == VLS_LOAD))
return false;
if (!vec_stmt) /* transformation not required. */
{
STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
if (vls_type == VLS_LOAD)
vect_model_load_cost (stmt_info, ncopies, memory_access_type,
NULL, NULL, NULL);
else
vect_model_store_cost (stmt_info, ncopies, memory_access_type,
vls_type, NULL, NULL, NULL);
return true;
}
gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
/* Transform. */
if (memory_access_type == VMAT_GATHER_SCATTER)
{
tree vec_oprnd0 = NULL_TREE, op;
tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
tree mask_perm_mask = NULL_TREE;
edge pe = loop_preheader_edge (loop); edge pe = loop_preheader_edge (loop);
gimple_seq seq;
basic_block new_bb;
enum { NARROW, NONE, WIDEN } modifier; enum { NARROW, NONE, WIDEN } modifier;
poly_uint64 gather_off_nunits poly_uint64 gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype); = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
rettype = TREE_TYPE (TREE_TYPE (gs_info.decl)); tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
scaletype = TREE_VALUE (arglist); tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
tree scaletype = TREE_VALUE (arglist);
gcc_checking_assert (types_compatible_p (srctype, rettype) gcc_checking_assert (types_compatible_p (srctype, rettype)
&& types_compatible_p (srctype, masktype)); && (!mask || types_compatible_p (srctype, masktype)));
tree perm_mask = NULL_TREE;
tree mask_perm_mask = NULL_TREE;
if (known_eq (nunits, gather_off_nunits)) if (known_eq (nunits, gather_off_nunits))
modifier = NONE; modifier = NONE;
else if (known_eq (nunits * 2, gather_off_nunits)) else if (known_eq (nunits * 2, gather_off_nunits))
...@@ -2352,11 +2237,11 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi, ...@@ -2352,11 +2237,11 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
fixed-length vectors. */ fixed-length vectors. */
int count = gather_off_nunits.to_constant (); int count = gather_off_nunits.to_constant ();
vec_perm_builder sel (count, count, 1); vec_perm_builder sel (count, count, 1);
for (i = 0; i < count; ++i) for (int i = 0; i < count; ++i)
sel.quick_push (i | (count / 2)); sel.quick_push (i | (count / 2));
vec_perm_indices indices (sel, 1, count); vec_perm_indices indices (sel, 1, count);
perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
indices); indices);
} }
else if (known_eq (nunits, gather_off_nunits * 2)) else if (known_eq (nunits, gather_off_nunits * 2))
...@@ -2368,44 +2253,64 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi, ...@@ -2368,44 +2253,64 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
int count = nunits.to_constant (); int count = nunits.to_constant ();
vec_perm_builder sel (count, count, 1); vec_perm_builder sel (count, count, 1);
sel.quick_grow (count); sel.quick_grow (count);
for (i = 0; i < count; ++i) for (int i = 0; i < count; ++i)
sel[i] = i < count / 2 ? i : i + count / 2; sel[i] = i < count / 2 ? i : i + count / 2;
vec_perm_indices indices (sel, 2, count); vec_perm_indices indices (sel, 2, count);
perm_mask = vect_gen_perm_mask_checked (vectype, indices); perm_mask = vect_gen_perm_mask_checked (vectype, indices);
ncopies *= 2; ncopies *= 2;
for (i = 0; i < count; ++i)
if (mask)
{
for (int i = 0; i < count; ++i)
sel[i] = i | (count / 2); sel[i] = i | (count / 2);
indices.new_vector (sel, 2, count); indices.new_vector (sel, 2, count);
mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices); mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
} }
}
else else
gcc_unreachable (); gcc_unreachable ();
vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype); tree vec_dest = vect_create_destination_var (gimple_get_lhs (stmt),
vectype);
ptr = fold_convert (ptrtype, gs_info.base); tree ptr = fold_convert (ptrtype, gs_info->base);
if (!is_gimple_min_invariant (ptr)) if (!is_gimple_min_invariant (ptr))
{ {
gimple_seq seq;
ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE); ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
new_bb = gsi_insert_seq_on_edge_immediate (pe, seq); basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
gcc_assert (!new_bb); gcc_assert (!new_bb);
} }
scale = build_int_cst (scaletype, gs_info.scale); tree scale = build_int_cst (scaletype, gs_info->scale);
prev_stmt_info = NULL; tree vec_oprnd0 = NULL_TREE;
for (j = 0; j < ncopies; ++j) tree vec_mask = NULL_TREE;
tree src_op = NULL_TREE;
tree mask_op = NULL_TREE;
tree prev_res = NULL_TREE;
stmt_vec_info prev_stmt_info = NULL;
if (!mask)
{
src_op = vect_build_zero_merge_argument (stmt, rettype);
mask_op = vect_build_all_ones_mask (stmt, masktype);
}
for (int j = 0; j < ncopies; ++j)
{ {
tree op, var;
gimple *new_stmt;
if (modifier == WIDEN && (j & 1)) if (modifier == WIDEN && (j & 1))
op = permute_vec_elements (vec_oprnd0, vec_oprnd0, op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
perm_mask, stmt, gsi); perm_mask, stmt, gsi);
else if (j == 0) else if (j == 0)
op = vec_oprnd0 op = vec_oprnd0
= vect_get_vec_def_for_operand (gs_info.offset, stmt); = vect_get_vec_def_for_operand (gs_info->offset, stmt);
else else
op = vec_oprnd0 op = vec_oprnd0
= vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, vec_oprnd0); = vect_get_vec_def_for_stmt_copy (gs_info->offset_dt, vec_oprnd0);
if (!useless_type_conversion_p (idxtype, TREE_TYPE (op))) if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
{ {
...@@ -2413,12 +2318,13 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi, ...@@ -2413,12 +2318,13 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
TYPE_VECTOR_SUBPARTS (idxtype))); TYPE_VECTOR_SUBPARTS (idxtype)));
var = vect_get_new_ssa_name (idxtype, vect_simple_var); var = vect_get_new_ssa_name (idxtype, vect_simple_var);
op = build1 (VIEW_CONVERT_EXPR, idxtype, op); op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
new_stmt new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
= gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
vect_finish_stmt_generation (stmt, new_stmt, gsi); vect_finish_stmt_generation (stmt, new_stmt, gsi);
op = var; op = var;
} }
if (mask)
{
if (mask_perm_mask && (j & 1)) if (mask_perm_mask && (j & 1))
mask_op = permute_vec_elements (mask_op, mask_op, mask_op = permute_vec_elements (mask_op, mask_op,
mask_perm_mask, stmt, gsi); mask_perm_mask, stmt, gsi);
...@@ -2428,6 +2334,8 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi, ...@@ -2428,6 +2334,8 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
vec_mask = vect_get_vec_def_for_operand (mask, stmt); vec_mask = vect_get_vec_def_for_operand (mask, stmt);
else else
{ {
gimple *def_stmt;
enum vect_def_type dt;
vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt); vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask); vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
} }
...@@ -2440,16 +2348,17 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi, ...@@ -2440,16 +2348,17 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
TYPE_VECTOR_SUBPARTS (masktype))); TYPE_VECTOR_SUBPARTS (masktype)));
var = vect_get_new_ssa_name (masktype, vect_simple_var); var = vect_get_new_ssa_name (masktype, vect_simple_var);
mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op); mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
new_stmt new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR,
= gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op); mask_op);
vect_finish_stmt_generation (stmt, new_stmt, gsi); vect_finish_stmt_generation (stmt, new_stmt, gsi);
mask_op = var; mask_op = var;
} }
} }
src_op = mask_op;
}
new_stmt new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
= gimple_build_call (gs_info.decl, 5, mask_op, ptr, op, mask_op, mask_op, scale);
scale);
if (!useless_type_conversion_p (vectype, rettype)) if (!useless_type_conversion_p (vectype, rettype))
{ {
...@@ -2477,8 +2386,7 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi, ...@@ -2477,8 +2386,7 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
prev_res = var; prev_res = var;
continue; continue;
} }
var = permute_vec_elements (prev_res, var, var = permute_vec_elements (prev_res, var, perm_mask, stmt, gsi);
perm_mask, stmt, gsi);
new_stmt = SSA_NAME_DEF_STMT (var); new_stmt = SSA_NAME_DEF_STMT (var);
} }
...@@ -2488,6 +2396,134 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi, ...@@ -2488,6 +2396,134 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
prev_stmt_info = vinfo_for_stmt (new_stmt); prev_stmt_info = vinfo_for_stmt (new_stmt);
} }
}
/* Function vectorizable_mask_load_store.
Check if STMT performs a conditional load or store that can be vectorized.
If VEC_STMT is also passed, vectorize the STMT: create a vectorized
stmt to replace it, put it in VEC_STMT, and insert it at GSI.
Return FALSE if not a vectorizable STMT, TRUE otherwise. */
static bool
vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
gimple **vec_stmt, slp_tree slp_node)
{
tree vec_dest = NULL;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
stmt_vec_info prev_stmt_info;
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
tree rhs_vectype = NULL_TREE;
tree mask_vectype;
tree elem_type;
gimple *new_stmt;
tree dummy;
tree dataref_ptr = NULL_TREE;
gimple *ptr_incr;
int ncopies;
int i;
bool inv_p;
gather_scatter_info gs_info;
vec_load_store_type vls_type;
tree mask;
gimple *def_stmt;
enum vect_def_type dt;
if (slp_node != NULL)
return false;
ncopies = vect_get_num_copies (loop_vinfo, vectype);
gcc_assert (ncopies >= 1);
/* FORNOW. This restriction should be relaxed. */
if (nested_in_vect_loop && ncopies > 1)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"multiple types in nested loop.");
return false;
}
if (!STMT_VINFO_RELEVANT_P (stmt_info))
return false;
if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
&& ! vec_stmt)
return false;
if (!STMT_VINFO_DATA_REF (stmt_info))
return false;
mask = gimple_call_arg (stmt, 2);
if (!vect_check_load_store_mask (stmt, mask, &mask_vectype))
return false;
elem_type = TREE_TYPE (vectype);
if (gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
{
tree rhs = gimple_call_arg (stmt, 3);
if (!vect_check_store_rhs (stmt, rhs, &rhs_vectype, &vls_type))
return false;
}
else
vls_type = VLS_LOAD;
vect_memory_access_type memory_access_type;
if (!get_load_store_type (stmt, vectype, false, vls_type, ncopies,
&memory_access_type, &gs_info))
return false;
if (memory_access_type == VMAT_GATHER_SCATTER)
{
tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
tree masktype
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
if (TREE_CODE (masktype) == INTEGER_TYPE)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"masked gather with integer mask not supported.");
return false;
}
}
else if (memory_access_type != VMAT_CONTIGUOUS)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"unsupported access type for masked %s.\n",
vls_type == VLS_LOAD ? "load" : "store");
return false;
}
else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
|| !can_vec_mask_load_store_p (TYPE_MODE (vectype),
TYPE_MODE (mask_vectype),
vls_type == VLS_LOAD))
return false;
if (!vec_stmt) /* transformation not required. */
{
STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
if (vls_type == VLS_LOAD)
vect_model_load_cost (stmt_info, ncopies, memory_access_type,
NULL, NULL, NULL);
else
vect_model_store_cost (stmt_info, ncopies, memory_access_type,
vls_type, NULL, NULL, NULL);
return true;
}
gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
/* Transform. */
if (memory_access_type == VMAT_GATHER_SCATTER)
{
vect_build_gather_load_calls (stmt, gsi, vec_stmt, &gs_info, mask);
return true; return true;
} }
else if (vls_type != VLS_LOAD) else if (vls_type != VLS_LOAD)
...@@ -6998,142 +7034,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt, ...@@ -6998,142 +7034,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
if (memory_access_type == VMAT_GATHER_SCATTER) if (memory_access_type == VMAT_GATHER_SCATTER)
{ {
tree vec_oprnd0 = NULL_TREE, op; vect_build_gather_load_calls (stmt, gsi, vec_stmt, &gs_info, NULL_TREE);
tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
edge pe = loop_preheader_edge (loop);
gimple_seq seq;
basic_block new_bb;
enum { NARROW, NONE, WIDEN } modifier;
poly_uint64 gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
if (known_eq (nunits, gather_off_nunits))
modifier = NONE;
else if (known_eq (nunits * 2, gather_off_nunits))
{
modifier = WIDEN;
/* Currently widening gathers are only supported for
fixed-length vectors. */
int count = gather_off_nunits.to_constant ();
vec_perm_builder sel (count, count, 1);
for (i = 0; i < count; ++i)
sel.quick_push (i | (count / 2));
vec_perm_indices indices (sel, 1, count);
perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
indices);
}
else if (known_eq (nunits, gather_off_nunits * 2))
{
modifier = NARROW;
/* Currently narrowing gathers are only supported for
fixed-length vectors. */
int count = nunits.to_constant ();
vec_perm_builder sel (count, count, 1);
for (i = 0; i < count; ++i)
sel.quick_push (i < count / 2 ? i : i + count / 2);
vec_perm_indices indices (sel, 2, count);
perm_mask = vect_gen_perm_mask_checked (vectype, indices);
ncopies *= 2;
}
else
gcc_unreachable ();
rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
scaletype = TREE_VALUE (arglist);
gcc_checking_assert (types_compatible_p (srctype, rettype));
vec_dest = vect_create_destination_var (scalar_dest, vectype);
ptr = fold_convert (ptrtype, gs_info.base);
if (!is_gimple_min_invariant (ptr))
{
ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
gcc_assert (!new_bb);
}
/* Currently we support only unconditional gather loads,
so mask should be all ones. */
mask = vect_build_all_ones_mask (stmt, masktype);
scale = build_int_cst (scaletype, gs_info.scale);
merge = vect_build_zero_merge_argument (stmt, rettype);
prev_stmt_info = NULL;
for (j = 0; j < ncopies; ++j)
{
if (modifier == WIDEN && (j & 1))
op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
perm_mask, stmt, gsi);
else if (j == 0)
op = vec_oprnd0
= vect_get_vec_def_for_operand (gs_info.offset, stmt);
else
op = vec_oprnd0
= vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, vec_oprnd0);
if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
{
gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
TYPE_VECTOR_SUBPARTS (idxtype)));
var = vect_get_new_ssa_name (idxtype, vect_simple_var);
op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
new_stmt
= gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
op = var;
}
new_stmt
= gimple_build_call (gs_info.decl, 5, merge, ptr, op, mask, scale);
if (!useless_type_conversion_p (vectype, rettype))
{
gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
TYPE_VECTOR_SUBPARTS (rettype)));
op = vect_get_new_ssa_name (rettype, vect_simple_var);
gimple_call_set_lhs (new_stmt, op);
vect_finish_stmt_generation (stmt, new_stmt, gsi);
var = make_ssa_name (vec_dest);
op = build1 (VIEW_CONVERT_EXPR, vectype, op);
new_stmt
= gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
}
else
{
var = make_ssa_name (vec_dest, new_stmt);
gimple_call_set_lhs (new_stmt, var);
}
vect_finish_stmt_generation (stmt, new_stmt, gsi);
if (modifier == NARROW)
{
if ((j & 1) == 0)
{
prev_res = var;
continue;
}
var = permute_vec_elements (prev_res, var,
perm_mask, stmt, gsi);
new_stmt = SSA_NAME_DEF_STMT (var);
}
if (prev_stmt_info == NULL)
STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
else
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
prev_stmt_info = vinfo_for_stmt (new_stmt);
}
return true; return true;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment