Commit 48b63a31 by Jakub Jelinek Committed by Jakub Jelinek

tree-vect-stmts.c (enum scan_store_kind): New type.

	* tree-vect-stmts.c (enum scan_store_kind): New type.
	(scan_store_can_perm_p): Change last argument from int * to
	vec<enum scan_store_kind> *, record precisely which permutations
	need whole vector left shift or that plus VEC_COND_EXPR.
	(vectorizable_scan_store): Adjust caller, use whole vector left shift
	and additional VEC_COND_EXPR only for those iterations that need it.

From-SVN: r272497
parent 170f2d18
2019-06-20 Jakub Jelinek <jakub@redhat.com>
* tree-vect-stmts.c (enum scan_store_kind): New type.
(scan_store_can_perm_p): Change last argument from int * to
vec<enum scan_store_kind> *, record precisely which permutations
need whole vector left shift or that plus VEC_COND_EXPR.
(vectorizable_scan_store): Adjust caller, use whole vector left shift
and additional VEC_COND_EXPR only for those iterations that need it.
2019-06-20 Alexandre Oliva <oliva@adacore.com> 2019-06-20 Alexandre Oliva <oliva@adacore.com>
* config.gcc: Fix ARM --with-fpu checking and error message. * config.gcc: Fix ARM --with-fpu checking and error message.
......
...@@ -6354,13 +6354,27 @@ scan_operand_equal_p (tree ref1, tree ref2) ...@@ -6354,13 +6354,27 @@ scan_operand_equal_p (tree ref1, tree ref2)
} }
enum scan_store_kind {
/* Normal permutation. */
scan_store_kind_perm,
/* Whole vector left shift permutation with zero init. */
scan_store_kind_lshift_zero,
/* Whole vector left shift permutation and VEC_COND_EXPR. */
scan_store_kind_lshift_cond
};
/* Function check_scan_store. /* Function check_scan_store.
Verify if we can perform the needed permutations or whole vector shifts. Verify if we can perform the needed permutations or whole vector shifts.
Return -1 on failure, otherwise exact log2 of vectype's nunits. */ Return -1 on failure, otherwise exact log2 of vectype's nunits.
USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
to do at each step. */
static int static int
scan_store_can_perm_p (tree vectype, tree init, int *use_whole_vector_p = NULL) scan_store_can_perm_p (tree vectype, tree init,
vec<enum scan_store_kind> *use_whole_vector = NULL)
{ {
enum machine_mode vec_mode = TYPE_MODE (vectype); enum machine_mode vec_mode = TYPE_MODE (vectype);
unsigned HOST_WIDE_INT nunits; unsigned HOST_WIDE_INT nunits;
...@@ -6371,50 +6385,59 @@ scan_store_can_perm_p (tree vectype, tree init, int *use_whole_vector_p = NULL) ...@@ -6371,50 +6385,59 @@ scan_store_can_perm_p (tree vectype, tree init, int *use_whole_vector_p = NULL)
return -1; return -1;
int i; int i;
enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
for (i = 0; i <= units_log2; ++i) for (i = 0; i <= units_log2; ++i)
{ {
unsigned HOST_WIDE_INT j, k; unsigned HOST_WIDE_INT j, k;
enum scan_store_kind kind = scan_store_kind_perm;
vec_perm_builder sel (nunits, nunits, 1); vec_perm_builder sel (nunits, nunits, 1);
sel.quick_grow (nunits); sel.quick_grow (nunits);
if (i == 0) if (i == units_log2)
{ {
for (j = 0; j < nunits; ++j) for (j = 0; j < nunits; ++j)
sel[j] = nunits - 1; sel[j] = nunits - 1;
} }
else else
{ {
for (j = 0; j < (HOST_WIDE_INT_1U << (i - 1)); ++j) for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
sel[j] = j; sel[j] = j;
for (k = 0; j < nunits; ++j, ++k) for (k = 0; j < nunits; ++j, ++k)
sel[j] = nunits + k; sel[j] = nunits + k;
} }
vec_perm_indices indices (sel, i == 0 ? 1 : 2, nunits); vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
if (!can_vec_perm_const_p (vec_mode, indices)) if (!can_vec_perm_const_p (vec_mode, indices))
break;
}
if (i == 0)
return -1;
if (i <= units_log2)
{
if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
return -1;
int kind = 1;
/* Whole vector shifts shift in zeros, so if init is all zero constant,
there is no need to do anything further. */
if ((TREE_CODE (init) != INTEGER_CST
&& TREE_CODE (init) != REAL_CST)
|| !initializer_zerop (init))
{ {
tree masktype = build_same_sized_truth_vector_type (vectype); if (i == units_log2)
if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
return -1; return -1;
kind = 2;
if (whole_vector_shift_kind == scan_store_kind_perm)
{
if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
return -1;
whole_vector_shift_kind = scan_store_kind_lshift_zero;
/* Whole vector shifts shift in zeros, so if init is all zero
constant, there is no need to do anything further. */
if ((TREE_CODE (init) != INTEGER_CST
&& TREE_CODE (init) != REAL_CST)
|| !initializer_zerop (init))
{
tree masktype = build_same_sized_truth_vector_type (vectype);
if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
return -1;
whole_vector_shift_kind = scan_store_kind_lshift_cond;
}
}
kind = whole_vector_shift_kind;
}
if (use_whole_vector)
{
if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
use_whole_vector->safe_grow_cleared (i);
if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
use_whole_vector->safe_push (kind);
} }
if (use_whole_vector_p)
*use_whole_vector_p = kind;
} }
return units_log2; return units_log2;
} }
...@@ -6726,11 +6749,12 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, ...@@ -6726,11 +6749,12 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
unsigned HOST_WIDE_INT nunits; unsigned HOST_WIDE_INT nunits;
if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)) if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
gcc_unreachable (); gcc_unreachable ();
int use_whole_vector_p = 0; auto_vec<enum scan_store_kind, 16> use_whole_vector;
int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector_p); int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
gcc_assert (units_log2 > 0); gcc_assert (units_log2 > 0);
auto_vec<tree, 16> perms; auto_vec<tree, 16> perms;
perms.quick_grow (units_log2 + 1); perms.quick_grow (units_log2 + 1);
tree zero_vec = NULL_TREE, masktype = NULL_TREE;
for (int i = 0; i <= units_log2; ++i) for (int i = 0; i <= units_log2; ++i)
{ {
unsigned HOST_WIDE_INT j, k; unsigned HOST_WIDE_INT j, k;
...@@ -6739,23 +6763,28 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, ...@@ -6739,23 +6763,28 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
if (i == units_log2) if (i == units_log2)
for (j = 0; j < nunits; ++j) for (j = 0; j < nunits; ++j)
sel[j] = nunits - 1; sel[j] = nunits - 1;
else else
{ {
for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j) for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
sel[j] = j; sel[j] = j;
for (k = 0; j < nunits; ++j, ++k) for (k = 0; j < nunits; ++j, ++k)
sel[j] = nunits + k; sel[j] = nunits + k;
} }
vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits); vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
if (use_whole_vector_p && i < units_log2) if (!use_whole_vector.is_empty ()
perms[i] = vect_gen_perm_mask_any (vectype, indices); && use_whole_vector[i] != scan_store_kind_perm)
{
if (zero_vec == NULL_TREE)
zero_vec = build_zero_cst (vectype);
if (masktype == NULL_TREE
&& use_whole_vector[i] == scan_store_kind_lshift_cond)
masktype = build_same_sized_truth_vector_type (vectype);
perms[i] = vect_gen_perm_mask_any (vectype, indices);
}
else else
perms[i] = vect_gen_perm_mask_checked (vectype, indices); perms[i] = vect_gen_perm_mask_checked (vectype, indices);
} }
tree zero_vec = use_whole_vector_p ? build_zero_cst (vectype) : NULL_TREE;
tree masktype = (use_whole_vector_p == 2
? build_same_sized_truth_vector_type (vectype) : NULL_TREE);
stmt_vec_info prev_stmt_info = NULL; stmt_vec_info prev_stmt_info = NULL;
tree vec_oprnd1 = NULL_TREE; tree vec_oprnd1 = NULL_TREE;
tree vec_oprnd2 = NULL_TREE; tree vec_oprnd2 = NULL_TREE;
...@@ -6788,7 +6817,10 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, ...@@ -6788,7 +6817,10 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
{ {
tree new_temp = make_ssa_name (vectype); tree new_temp = make_ssa_name (vectype);
gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR, gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
zero_vec ? zero_vec : vec_oprnd1, v, (zero_vec
&& (use_whole_vector[i]
!= scan_store_kind_perm))
? zero_vec : vec_oprnd1, v,
perms[i]); perms[i]);
new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi); new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
if (prev_stmt_info == NULL) if (prev_stmt_info == NULL)
...@@ -6797,7 +6829,7 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, ...@@ -6797,7 +6829,7 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
prev_stmt_info = new_stmt_info; prev_stmt_info = new_stmt_info;
if (use_whole_vector_p == 2) if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
{ {
/* Whole vector shift shifted in zero bits, but if *init /* Whole vector shift shifted in zero bits, but if *init
is not initializer_zerop, we need to replace those elements is not initializer_zerop, we need to replace those elements
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment