Commit cf7aa6a3 by Alan Lawrence Committed by Alan Lawrence

[Vectorizer] Use a VEC_PERM_EXPR instead of VEC_RSHIFT_EXPR; expand appropriate…

[Vectorizer] Use a VEC_PERM_EXPR instead of VEC_RSHIFT_EXPR; expand appropriate VEC_PERM_EXPRs using vec_shr_optab

	* optabs.c (can_vec_perm_p): Update comment, does not consider vec_shr.
	(shift_amt_for_vec_perm_mask): New.
	(expand_vec_perm_1): Use vec_shr_optab if second vector is const0_rtx
	and mask appropriate.

	* tree-vect-loop.c (calc_vec_perm_mask_for_shift): New.
	(have_whole_vector_shift): New.
	(vect_model_reduction_cost): Call have_whole_vector_shift instead of
	looking for vec_shr_optab.
	(vect_create_epilog_for_reduction): Likewise; also rename local variable
	have_whole_vector_shift to reduce_with_shift; output VEC_PERM_EXPRs
	instead of VEC_RSHIFT_EXPRs.

	* tree-vect-stmts.c (vect_gen_perm_mask_checked): Extend comment.

From-SVN: r217509
parent 557be5a8
2014-11-13 Alan Lawrence <alan.lawrence@arm.com> 2014-11-13 Alan Lawrence <alan.lawrence@arm.com>
* optabs.c (can_vec_perm_p): Update comment, does not consider vec_shr.
(shift_amt_for_vec_perm_mask): New.
(expand_vec_perm_1): Use vec_shr_optab if second vector is const0_rtx
and mask appropriate.
* tree-vect-loop.c (calc_vec_perm_mask_for_shift): New.
(have_whole_vector_shift): New.
(vect_model_reduction_cost): Call have_whole_vector_shift instead of
looking for vec_shr_optab.
(vect_create_epilog_for_reduction): Likewise; also rename local variable
have_whole_vector_shift to reduce_with_shift; output VEC_PERM_EXPRs
instead of VEC_RSHIFT_EXPRs.
* tree-vect-stmts.c (vect_gen_perm_mask_checked): Extend comment.
2014-11-13 Alan Lawrence <alan.lawrence@arm.com>
* tree-vectorizer.h (vect_gen_perm_mask): Remove. * tree-vectorizer.h (vect_gen_perm_mask): Remove.
(vect_gen_perm_mask_checked, vect_gen_perm_mask_any): New. (vect_gen_perm_mask_checked, vect_gen_perm_mask_any): New.
...@@ -6567,8 +6567,11 @@ vector_compare_rtx (enum tree_code tcode, tree t_op0, tree t_op1, ...@@ -6567,8 +6567,11 @@ vector_compare_rtx (enum tree_code tcode, tree t_op0, tree t_op1,
return gen_rtx_fmt_ee (rcode, VOIDmode, ops[0].value, ops[1].value); return gen_rtx_fmt_ee (rcode, VOIDmode, ops[0].value, ops[1].value);
} }
/* Return true if VEC_PERM_EXPR can be expanded using SIMD extensions /* Return true if VEC_PERM_EXPR of arbitrary input vectors can be expanded using
of the CPU. SEL may be NULL, which stands for an unknown constant. */ SIMD extensions of the CPU. SEL may be NULL, which stands for an unknown
constant. Note that additional permutations representing whole-vector shifts
may also be handled via the vec_shr optab, but only where the second input
vector is entirely constant zeroes; this case is not dealt with here. */
bool bool
can_vec_perm_p (machine_mode mode, bool variable, can_vec_perm_p (machine_mode mode, bool variable,
...@@ -6621,6 +6624,36 @@ can_vec_perm_p (machine_mode mode, bool variable, ...@@ -6621,6 +6624,36 @@ can_vec_perm_p (machine_mode mode, bool variable,
return true; return true;
} }
/* Checks if vec_perm mask SEL is a constant equivalent to a shift of the first
vec_perm operand, assuming the second operand is a constant vector of zeroes.
Return the shift distance in bits if so, or NULL_RTX if the vec_perm is not a
shift. */
static rtx
shift_amt_for_vec_perm_mask (rtx sel)
{
unsigned int i, first, nelt = GET_MODE_NUNITS (GET_MODE (sel));
unsigned int bitsize = GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (sel)));
if (GET_CODE (sel) != CONST_VECTOR)
return NULL_RTX;
first = INTVAL (CONST_VECTOR_ELT (sel, 0));
if (first >= 2*nelt)
return NULL_RTX;
for (i = 1; i < nelt; i++)
{
int idx = INTVAL (CONST_VECTOR_ELT (sel, i));
unsigned int expected = (i + first) & (2 * nelt - 1);
/* Indices into the second vector are all equivalent. */
if (idx < 0 || (MIN (nelt, (unsigned) idx) != MIN (nelt, expected)))
return NULL_RTX;
}
if (BYTES_BIG_ENDIAN)
first = (2 * nelt) - first;
return GEN_INT (first * bitsize);
}
/* A subroutine of expand_vec_perm for expanding one vec_perm insn. */ /* A subroutine of expand_vec_perm for expanding one vec_perm insn. */
static rtx static rtx
...@@ -6649,6 +6682,17 @@ expand_vec_perm_1 (enum insn_code icode, rtx target, ...@@ -6649,6 +6682,17 @@ expand_vec_perm_1 (enum insn_code icode, rtx target,
else else
{ {
create_input_operand (&ops[1], v0, tmode); create_input_operand (&ops[1], v0, tmode);
/* See if this can be handled with a vec_shr. We only do this if the
second vector is all zeroes. */
enum insn_code shift_code = optab_handler (vec_shr_optab, GET_MODE (v0));
if (v1 == CONST0_RTX (GET_MODE (v1)) && shift_code)
if (rtx shift_amt = shift_amt_for_vec_perm_mask (sel))
{
create_convert_operand_from_type (&ops[2], shift_amt,
sizetype_tab[(int) stk_sizetype]);
if (maybe_expand_insn (shift_code, 3, ops))
return ops[0].value;
}
create_input_operand (&ops[2], v1, tmode); create_input_operand (&ops[2], v1, tmode);
} }
......
...@@ -3083,6 +3083,41 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo, ...@@ -3083,6 +3083,41 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
*ret_min_profitable_estimate = min_profitable_estimate; *ret_min_profitable_estimate = min_profitable_estimate;
} }
/* Writes into SEL a mask for a vec_perm, equivalent to a vec_shr by OFFSET
vector elements (not bits) for a vector of mode MODE. */
static void
calc_vec_perm_mask_for_shift (enum machine_mode mode, unsigned int offset,
unsigned char *sel)
{
unsigned int i, nelt = GET_MODE_NUNITS (mode);
for (i = 0; i < nelt; i++)
sel[i] = (BYTES_BIG_ENDIAN ? i - offset : i + offset) & (2*nelt - 1);
}
/* Checks whether the target supports whole-vector shifts for vectors of mode
MODE. This is the case if _either_ the platform handles vec_shr_optab, _or_
it supports vec_perm_const with masks for all necessary shift amounts. */
static bool
have_whole_vector_shift (enum machine_mode mode)
{
if (optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing)
return true;
if (direct_optab_handler (vec_perm_const_optab, mode) == CODE_FOR_nothing)
return false;
unsigned int i, nelt = GET_MODE_NUNITS (mode);
unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
for (i = nelt/2; i >= 1; i/=2)
{
calc_vec_perm_mask_for_shift (mode, i, sel);
if (!can_vec_perm_p (mode, false, sel))
return false;
}
return true;
}
/* TODO: Close dependency between vect_model_*_cost and vectorizable_* /* TODO: Close dependency between vect_model_*_cost and vectorizable_*
functions. Design better to avoid maintenance issues. */ functions. Design better to avoid maintenance issues. */
...@@ -3185,7 +3220,7 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code, ...@@ -3185,7 +3220,7 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
/* We have a whole vector shift available. */ /* We have a whole vector shift available. */
if (VECTOR_MODE_P (mode) if (VECTOR_MODE_P (mode)
&& optab_handler (optab, mode) != CODE_FOR_nothing && optab_handler (optab, mode) != CODE_FOR_nothing
&& optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing) && have_whole_vector_shift (mode))
{ {
/* Final reduction via vector shifts and the reduction operator. /* Final reduction via vector shifts and the reduction operator.
Also requires scalar extract. */ Also requires scalar extract. */
...@@ -3788,7 +3823,6 @@ get_initial_def_for_reduction (gimple stmt, tree init_val, ...@@ -3788,7 +3823,6 @@ get_initial_def_for_reduction (gimple stmt, tree init_val,
return init_def; return init_def;
} }
/* Function vect_create_epilog_for_reduction /* Function vect_create_epilog_for_reduction
Create code at the loop-epilog to finalize the result of a reduction Create code at the loop-epilog to finalize the result of a reduction
...@@ -4212,18 +4246,11 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple stmt, ...@@ -4212,18 +4246,11 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple stmt,
} }
else else
{ {
enum tree_code shift_code = ERROR_MARK; bool reduce_with_shift = have_whole_vector_shift (mode);
bool have_whole_vector_shift = true;
int bit_offset;
int element_bitsize = tree_to_uhwi (bitsize); int element_bitsize = tree_to_uhwi (bitsize);
int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype)); int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
tree vec_temp; tree vec_temp;
if (optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing)
shift_code = VEC_RSHIFT_EXPR;
else
have_whole_vector_shift = false;
/* Regardless of whether we have a whole vector shift, if we're /* Regardless of whether we have a whole vector shift, if we're
emulating the operation via tree-vect-generic, we don't want emulating the operation via tree-vect-generic, we don't want
to use it. Only the first round of the reduction is likely to use it. Only the first round of the reduction is likely
...@@ -4231,18 +4258,24 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple stmt, ...@@ -4231,18 +4258,24 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple stmt,
/* ??? It might be better to emit a reduction tree code here, so that /* ??? It might be better to emit a reduction tree code here, so that
tree-vect-generic can expand the first round via bit tricks. */ tree-vect-generic can expand the first round via bit tricks. */
if (!VECTOR_MODE_P (mode)) if (!VECTOR_MODE_P (mode))
have_whole_vector_shift = false; reduce_with_shift = false;
else else
{ {
optab optab = optab_for_tree_code (code, vectype, optab_default); optab optab = optab_for_tree_code (code, vectype, optab_default);
if (optab_handler (optab, mode) == CODE_FOR_nothing) if (optab_handler (optab, mode) == CODE_FOR_nothing)
have_whole_vector_shift = false; reduce_with_shift = false;
} }
if (have_whole_vector_shift && !slp_reduc) if (reduce_with_shift && !slp_reduc)
{ {
int nelements = vec_size_in_bits / element_bitsize;
unsigned char *sel = XALLOCAVEC (unsigned char, nelements);
int elt_offset;
tree zero_vec = build_zero_cst (vectype);
/*** Case 2: Create: /*** Case 2: Create:
for (offset = VS/2; offset >= element_size; offset/=2) for (offset = nelements/2; offset >= 1; offset/=2)
{ {
Create: va' = vec_shift <va, offset> Create: va' = vec_shift <va, offset>
Create: va = vop <va, va'> Create: va = vop <va, va'>
...@@ -4254,14 +4287,15 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple stmt, ...@@ -4254,14 +4287,15 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple stmt,
vec_dest = vect_create_destination_var (scalar_dest, vectype); vec_dest = vect_create_destination_var (scalar_dest, vectype);
new_temp = new_phi_result; new_temp = new_phi_result;
for (bit_offset = vec_size_in_bits/2; for (elt_offset = nelements / 2;
bit_offset >= element_bitsize; elt_offset >= 1;
bit_offset /= 2) elt_offset /= 2)
{ {
tree bitpos = size_int (bit_offset); calc_vec_perm_mask_for_shift (mode, elt_offset, sel);
tree mask = vect_gen_perm_mask_any (vectype, sel);
epilog_stmt = gimple_build_assign_with_ops (shift_code, epilog_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR,
vec_dest, new_temp, bitpos); vec_dest, new_temp,
zero_vec, mask);
new_name = make_ssa_name (vec_dest, epilog_stmt); new_name = make_ssa_name (vec_dest, epilog_stmt);
gimple_assign_set_lhs (epilog_stmt, new_name); gimple_assign_set_lhs (epilog_stmt, new_name);
gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
...@@ -4277,8 +4311,6 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple stmt, ...@@ -4277,8 +4311,6 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple stmt,
} }
else else
{ {
tree rhs;
/*** Case 3: Create: /*** Case 3: Create:
s = extract_field <v_out2, 0> s = extract_field <v_out2, 0>
for (offset = element_size; for (offset = element_size;
...@@ -4296,11 +4328,12 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple stmt, ...@@ -4296,11 +4328,12 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple stmt,
vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype)); vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
FOR_EACH_VEC_ELT (new_phis, i, new_phi) FOR_EACH_VEC_ELT (new_phis, i, new_phi)
{ {
int bit_offset;
if (gimple_code (new_phi) == GIMPLE_PHI) if (gimple_code (new_phi) == GIMPLE_PHI)
vec_temp = PHI_RESULT (new_phi); vec_temp = PHI_RESULT (new_phi);
else else
vec_temp = gimple_assign_lhs (new_phi); vec_temp = gimple_assign_lhs (new_phi);
rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize, tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
bitsize_zero_node); bitsize_zero_node);
epilog_stmt = gimple_build_assign (new_scalar_dest, rhs); epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
new_temp = make_ssa_name (new_scalar_dest, epilog_stmt); new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
......
...@@ -5495,7 +5495,8 @@ vect_gen_perm_mask_any (tree vectype, const unsigned char *sel) ...@@ -5495,7 +5495,8 @@ vect_gen_perm_mask_any (tree vectype, const unsigned char *sel)
return mask_vec; return mask_vec;
} }
/* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p. */ /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
i.e. that the target supports the pattern _for arbitrary input vectors_. */
tree tree
vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel) vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment