Commit 1a1c441d by Richard Sandiford Committed by Richard Sandiford

Rework VEC_PERM_EXPR folding

This patch reworks the VEC_PERM_EXPR folding so that more of it
works for variable-length vectors.  E.g. it means that we can
now recognise variable-length permutes that reduce to a single
vector, or cases in which a variable-length permute only needs
one input.  There should be no functional change for fixed-length
vectors.

2018-01-02  Richard Sandiford  <richard.sandiford@linaro.org>

gcc/
	* selftest.h (selftest::vec_perm_indices_c_tests): Declare.
	* selftest-run-tests.c (selftest::run_tests): Call it.
	* vector-builder.h (vector_builder::operator ==): New function.
	(vector_builder::operator !=): Likewise.
	* vec-perm-indices.h (vec_perm_indices::series_p): Declare.
	(vec_perm_indices::all_from_input_p): New function.
	* vec-perm-indices.c (vec_perm_indices::series_p): Likewise.
	(test_vec_perm_12, selftest::vec_perm_indices_c_tests): Likewise.
	* fold-const.c (fold_ternary_loc): Use tree_to_vec_perm_builder
	instead of reading the VECTOR_CST directly.  Detect whether both
	vector inputs are the same before constructing the vec_perm_indices,
	and update the number of inputs argument accordingly.  Use the
	utility functions added above.  Only construct sel2 if we need to.

From-SVN: r256098
parent d980067b
2018-01-02 Richard Sandiford <richard.sandiford@linaro.org> 2018-01-02 Richard Sandiford <richard.sandiford@linaro.org>
* selftest.h (selftest::vec_perm_indices_c_tests): Declare.
* selftest-run-tests.c (selftest::run_tests): Call it.
* vector-builder.h (vector_builder::operator ==): New function.
(vector_builder::operator !=): Likewise.
* vec-perm-indices.h (vec_perm_indices::series_p): Declare.
(vec_perm_indices::all_from_input_p): New function.
* vec-perm-indices.c (vec_perm_indices::series_p): Likewise.
(test_vec_perm_12, selftest::vec_perm_indices_c_tests): Likewise.
* fold-const.c (fold_ternary_loc): Use tree_to_vec_perm_builder
instead of reading the VECTOR_CST directly. Detect whether both
vector inputs are the same before constructing the vec_perm_indices,
and update the number of inputs argument accordingly. Use the
utility functions added above. Only construct sel2 if we need to.
2018-01-02 Richard Sandiford <richard.sandiford@linaro.org>
* optabs.c (expand_vec_perm_var): Use an explicit encoding for * optabs.c (expand_vec_perm_var): Use an explicit encoding for
the broadcast of the low byte. the broadcast of the low byte.
(expand_mult_highpart): Use an explicit encoding for the permutes. (expand_mult_highpart): Use an explicit encoding for the permutes.
...@@ -11705,99 +11705,65 @@ fold_ternary_loc (location_t loc, enum tree_code code, tree type, ...@@ -11705,99 +11705,65 @@ fold_ternary_loc (location_t loc, enum tree_code code, tree type,
case VEC_PERM_EXPR: case VEC_PERM_EXPR:
if (TREE_CODE (arg2) == VECTOR_CST) if (TREE_CODE (arg2) == VECTOR_CST)
{ {
unsigned int nelts = VECTOR_CST_NELTS (arg2), i, mask, mask2; /* Build a vector of integers from the tree mask. */
bool need_mask_canon = false; vec_perm_builder builder;
bool need_mask_canon2 = false; if (!tree_to_vec_perm_builder (&builder, arg2))
bool all_in_vec0 = true; return NULL_TREE;
bool all_in_vec1 = true;
bool maybe_identity = true;
bool single_arg = (op0 == op1);
bool changed = false;
mask2 = 2 * nelts - 1;
mask = single_arg ? (nelts - 1) : mask2;
gcc_assert (nelts == TYPE_VECTOR_SUBPARTS (type));
vec_perm_builder sel (nelts, nelts, 1);
vec_perm_builder sel2 (nelts, nelts, 1);
for (i = 0; i < nelts; i++)
{
tree val = VECTOR_CST_ELT (arg2, i);
if (TREE_CODE (val) != INTEGER_CST)
return NULL_TREE;
/* Make sure that the perm value is in an acceptable
range. */
wi::tree_to_wide_ref t = wi::to_wide (val);
need_mask_canon |= wi::gtu_p (t, mask);
need_mask_canon2 |= wi::gtu_p (t, mask2);
unsigned int elt = t.to_uhwi () & mask;
unsigned int elt2 = t.to_uhwi () & mask2;
if (elt < nelts)
all_in_vec1 = false;
else
all_in_vec0 = false;
if ((elt & (nelts - 1)) != i)
maybe_identity = false;
sel.quick_push (elt); /* Create a vec_perm_indices for the integer vector. */
sel2.quick_push (elt2); unsigned int nelts = TYPE_VECTOR_SUBPARTS (type);
} bool single_arg = (op0 == op1);
vec_perm_indices sel (builder, single_arg ? 1 : 2, nelts);
if (maybe_identity) /* Check for cases that fold to OP0 or OP1 in their original
{ element order. */
if (all_in_vec0) if (sel.series_p (0, 1, 0, 1))
return op0; return op0;
if (all_in_vec1) if (sel.series_p (0, 1, nelts, 1))
return op1; return op1;
}
if (all_in_vec0) if (!single_arg)
op1 = op0;
else if (all_in_vec1)
{ {
op0 = op1; if (sel.all_from_input_p (0))
for (i = 0; i < nelts; i++) op1 = op0;
sel[i] -= nelts; else if (sel.all_from_input_p (1))
need_mask_canon = true; {
op0 = op1;
sel.rotate_inputs (1);
}
} }
vec_perm_indices indices (sel, 2, nelts);
if ((TREE_CODE (op0) == VECTOR_CST if ((TREE_CODE (op0) == VECTOR_CST
|| TREE_CODE (op0) == CONSTRUCTOR) || TREE_CODE (op0) == CONSTRUCTOR)
&& (TREE_CODE (op1) == VECTOR_CST && (TREE_CODE (op1) == VECTOR_CST
|| TREE_CODE (op1) == CONSTRUCTOR)) || TREE_CODE (op1) == CONSTRUCTOR))
{ {
tree t = fold_vec_perm (type, op0, op1, indices); tree t = fold_vec_perm (type, op0, op1, sel);
if (t != NULL_TREE) if (t != NULL_TREE)
return t; return t;
} }
if (op0 == op1 && !single_arg) bool changed = (op0 == op1 && !single_arg);
changed = true;
/* Some targets are deficient and fail to expand a single
argument permutation while still allowing an equivalent
2-argument version. */
if (need_mask_canon && arg2 == op2
&& !can_vec_perm_const_p (TYPE_MODE (type), indices, false)
&& can_vec_perm_const_p (TYPE_MODE (type),
vec_perm_indices (sel2, 2, nelts),
false))
{
need_mask_canon = need_mask_canon2;
sel.truncate (0);
sel.splice (sel2);
}
if (need_mask_canon && arg2 == op2) /* Generate a canonical form of the selector. */
if (arg2 == op2 && sel.encoding () != builder)
{ {
tree eltype = TREE_TYPE (TREE_TYPE (arg2)); /* Some targets are deficient and fail to expand a single
tree_vector_builder tsel (TREE_TYPE (arg2), nelts, 1); argument permutation while still allowing an equivalent
for (i = 0; i < nelts; i++) 2-argument version. */
tsel.quick_push (build_int_cst (eltype, sel[i])); if (sel.ninputs () == 2
op2 = tsel.build (); || can_vec_perm_const_p (TYPE_MODE (type), sel, false))
op2 = vec_perm_indices_to_tree (TREE_TYPE (arg2), sel);
else
{
vec_perm_indices sel2 (builder, 2, nelts);
if (can_vec_perm_const_p (TYPE_MODE (type), sel2, false))
op2 = vec_perm_indices_to_tree (TREE_TYPE (arg2), sel2);
else
/* Not directly supported with either encoding,
so use the preferred form. */
op2 = vec_perm_indices_to_tree (TREE_TYPE (arg2), sel);
}
changed = true; changed = true;
} }
......
...@@ -73,6 +73,7 @@ selftest::run_tests () ...@@ -73,6 +73,7 @@ selftest::run_tests ()
/* Mid-level data structures. */ /* Mid-level data structures. */
input_c_tests (); input_c_tests ();
vec_perm_indices_c_tests ();
tree_c_tests (); tree_c_tests ();
gimple_c_tests (); gimple_c_tests ();
rtl_tests_c_tests (); rtl_tests_c_tests ();
......
...@@ -215,6 +215,7 @@ extern void vec_c_tests (); ...@@ -215,6 +215,7 @@ extern void vec_c_tests ();
extern void wide_int_cc_tests (); extern void wide_int_cc_tests ();
extern void predict_c_tests (); extern void predict_c_tests ();
extern void simplify_rtx_c_tests (); extern void simplify_rtx_c_tests ();
extern void vec_perm_indices_c_tests ();
extern int num_passes; extern int num_passes;
......
...@@ -28,6 +28,7 @@ along with GCC; see the file COPYING3. If not see ...@@ -28,6 +28,7 @@ along with GCC; see the file COPYING3. If not see
#include "rtl.h" #include "rtl.h"
#include "memmodel.h" #include "memmodel.h"
#include "emit-rtl.h" #include "emit-rtl.h"
#include "selftest.h"
/* Switch to a new permutation vector that selects between NINPUTS vector /* Switch to a new permutation vector that selects between NINPUTS vector
inputs that have NELTS_PER_INPUT elements each. Take the elements of the inputs that have NELTS_PER_INPUT elements each. Take the elements of the
...@@ -85,6 +86,54 @@ vec_perm_indices::rotate_inputs (int delta) ...@@ -85,6 +86,54 @@ vec_perm_indices::rotate_inputs (int delta)
m_encoding[i] = clamp (m_encoding[i] + element_delta); m_encoding[i] = clamp (m_encoding[i] + element_delta);
} }
/* Return true if index OUT_BASE + I * OUT_STEP selects input
element IN_BASE + I * IN_STEP. */
bool
vec_perm_indices::series_p (unsigned int out_base, unsigned int out_step,
element_type in_base, element_type in_step) const
{
/* Check the base value. */
if (clamp (m_encoding.elt (out_base)) != clamp (in_base))
return false;
unsigned int full_nelts = m_encoding.full_nelts ();
unsigned int npatterns = m_encoding.npatterns ();
/* Calculate which multiple of OUT_STEP elements we need to get
back to the same pattern. */
unsigned int cycle_length = least_common_multiple (out_step, npatterns);
/* Check the steps. */
in_step = clamp (in_step);
out_base += out_step;
unsigned int limit = 0;
for (;;)
{
/* Succeed if we've checked all the elements in the vector. */
if (out_base >= full_nelts)
return true;
if (out_base >= npatterns)
{
/* We've got to the end of the "foreground" values. Check
2 elements from each pattern in the "background" values. */
if (limit == 0)
limit = out_base + cycle_length * 2;
else if (out_base >= limit)
return true;
}
element_type v0 = m_encoding.elt (out_base - out_step);
element_type v1 = m_encoding.elt (out_base);
if (clamp (v1 - v0) != in_step)
return false;
out_base += out_step;
}
return true;
}
/* Return true if all elements of the permutation vector are in the range /* Return true if all elements of the permutation vector are in the range
[START, START + SIZE). */ [START, START + SIZE). */
...@@ -180,3 +229,52 @@ vec_perm_indices_to_rtx (machine_mode mode, const vec_perm_indices &indices) ...@@ -180,3 +229,52 @@ vec_perm_indices_to_rtx (machine_mode mode, const vec_perm_indices &indices)
RTVEC_ELT (v, i) = gen_int_mode (indices[i], GET_MODE_INNER (mode)); RTVEC_ELT (v, i) = gen_int_mode (indices[i], GET_MODE_INNER (mode));
return gen_rtx_CONST_VECTOR (mode, v); return gen_rtx_CONST_VECTOR (mode, v);
} }
#if CHECKING_P
namespace selftest {
/* Test a 12-element vector. */
static void
test_vec_perm_12 (void)
{
vec_perm_builder builder (12, 12, 1);
for (unsigned int i = 0; i < 4; ++i)
{
builder.quick_push (i * 5);
builder.quick_push (3 + i);
builder.quick_push (2 + 3 * i);
}
vec_perm_indices indices (builder, 1, 12);
ASSERT_TRUE (indices.series_p (0, 3, 0, 5));
ASSERT_FALSE (indices.series_p (0, 3, 3, 5));
ASSERT_FALSE (indices.series_p (0, 3, 0, 8));
ASSERT_TRUE (indices.series_p (1, 3, 3, 1));
ASSERT_TRUE (indices.series_p (2, 3, 2, 3));
ASSERT_TRUE (indices.series_p (0, 4, 0, 4));
ASSERT_FALSE (indices.series_p (1, 4, 3, 4));
ASSERT_TRUE (indices.series_p (0, 6, 0, 10));
ASSERT_FALSE (indices.series_p (0, 6, 0, 100));
ASSERT_FALSE (indices.series_p (1, 10, 3, 7));
ASSERT_TRUE (indices.series_p (1, 10, 3, 8));
ASSERT_TRUE (indices.series_p (0, 12, 0, 10));
ASSERT_TRUE (indices.series_p (0, 12, 0, 11));
ASSERT_TRUE (indices.series_p (0, 12, 0, 100));
}
/* Run selftests for this file. */
void
vec_perm_indices_c_tests ()
{
test_vec_perm_12 ();
}
} // namespace selftest
#endif
...@@ -77,7 +77,9 @@ public: ...@@ -77,7 +77,9 @@ public:
element_type clamp (element_type) const; element_type clamp (element_type) const;
element_type operator[] (unsigned int i) const; element_type operator[] (unsigned int i) const;
bool series_p (unsigned int, unsigned int, element_type, element_type) const;
bool all_in_range_p (element_type, element_type) const; bool all_in_range_p (element_type, element_type) const;
bool all_from_input_p (unsigned int) const;
private: private:
vec_perm_indices (const vec_perm_indices &); vec_perm_indices (const vec_perm_indices &);
...@@ -134,4 +136,13 @@ vec_perm_indices::operator[] (unsigned int i) const ...@@ -134,4 +136,13 @@ vec_perm_indices::operator[] (unsigned int i) const
return clamp (m_encoding.elt (i)); return clamp (m_encoding.elt (i));
} }
/* Return true if the permutation vector only selects elements from
input I. */
inline bool
vec_perm_indices::all_from_input_p (unsigned int i) const
{
return all_in_range_p (i * m_nelts_per_input, m_nelts_per_input);
}
#endif #endif
...@@ -97,6 +97,9 @@ public: ...@@ -97,6 +97,9 @@ public:
bool encoded_full_vector_p () const; bool encoded_full_vector_p () const;
T elt (unsigned int) const; T elt (unsigned int) const;
bool operator == (const Derived &) const;
bool operator != (const Derived &x) const { return !operator == (x); }
void finalize (); void finalize ();
protected: protected:
...@@ -168,6 +171,26 @@ vector_builder<T, Derived>::new_vector (unsigned int full_nelts, ...@@ -168,6 +171,26 @@ vector_builder<T, Derived>::new_vector (unsigned int full_nelts,
this->truncate (0); this->truncate (0);
} }
/* Return true if this vector and OTHER have the same elements and
are encoded in the same way. */
template<typename T, typename Derived>
bool
vector_builder<T, Derived>::operator == (const Derived &other) const
{
if (m_full_nelts != other.m_full_nelts
|| m_npatterns != other.m_npatterns
|| m_nelts_per_pattern != other.m_nelts_per_pattern)
return false;
unsigned int nelts = encoded_nelts ();
for (unsigned int i = 0; i < nelts; ++i)
if (!derived ()->equal_p ((*this)[i], other[i]))
return false;
return true;
}
/* Return the value of vector element I, which might or might not be /* Return the value of vector element I, which might or might not be
encoded explicitly. */ encoded explicitly. */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment