Implement more rtx vector folds on variable-length vectors

This patch extends the tree-level folding of variable-length vectors so that it can also be used on rtxes. The first step is to move the tree_vector_builder new_unary/binary_operator routines to the parent vector_builder class (which in turn means adding a new template parameter). The second step is to make simplify-rtx.c use a direct rtx analogue of the VECTOR_CST handling in fold-const.c. 2019-07-29 Richard Sandiford <richard.sandiford@arm.com> gcc/ * vector-builder.h (vector_builder): Add a shape template parameter. (vector_builder::new_unary_operation): New function, generalizing the old tree_vector_builder function. (vector_builder::new_binary_operation): Likewise. (vector_builder::binary_encoded_nelts): Likewise. * int-vector-builder.h (int_vector_builder): Update template parameters to vector_builder. (int_vector_builder::shape_nelts): New function. * rtx-vector-builder.h (rtx_vector_builder): Update template parameters to vector_builder. (rtx_vector_builder::shape_nelts): New function. (rtx_vector_builder::nelts_of): Likewise. (rtx_vector_builder::npatterns_of): Likewise. (rtx_vector_builder::nelts_per_pattern_of): Likewise. * tree-vector-builder.h (tree_vector_builder): Update template parameters to vector_builder. (tree_vector_builder::shape_nelts): New function. (tree_vector_builder::nelts_of): Likewise. (tree_vector_builder::npatterns_of): Likewise. (tree_vector_builder::nelts_per_pattern_of): Likewise. * tree-vector-builder.c (tree_vector_builder::new_unary_operation) (tree_vector_builder::new_binary_operation): Delete. (tree_vector_builder::binary_encoded_nelts): Likewise. * simplify-rtx.c: Include rtx-vector-builder.h. (distributes_over_addition_p): New function. (simplify_const_unary_operation) (simplify_const_binary_operation): Generalize handling of vector constants to include variable-length vectors. (test_vector_ops_series): Add more tests. From-SVN: r273867

Implement more rtx vector folds on variable-length vectors
This patch extends the tree-level folding of variable-length vectors so that it can also be used on rtxes. The first step is to move the tree_vector_builder new_unary/binary_operator routines to the parent vector_builder class (which in turn means adding a new template parameter). The second step is to make simplify-rtx.c use a direct rtx analogue of the VECTOR_CST handling in fold-const.c. 2019-07-29 Richard Sandiford <richard.sandiford@arm.com> gcc/ * vector-builder.h (vector_builder): Add a shape template parameter. (vector_builder::new_unary_operation): New function, generalizing the old tree_vector_builder function. (vector_builder::new_binary_operation): Likewise. (vector_builder::binary_encoded_nelts): Likewise. * int-vector-builder.h (int_vector_builder): Update template parameters to vector_builder. (int_vector_builder::shape_nelts): New function. * rtx-vector-builder.h (rtx_vector_builder): Update template parameters to vector_builder. (rtx_vector_builder::shape_nelts): New function. (rtx_vector_builder::nelts_of): Likewise. (rtx_vector_builder::npatterns_of): Likewise. (rtx_vector_builder::nelts_per_pattern_of): Likewise. * tree-vector-builder.h (tree_vector_builder): Update template parameters to vector_builder. (tree_vector_builder::shape_nelts): New function. (tree_vector_builder::nelts_of): Likewise. (tree_vector_builder::npatterns_of): Likewise. (tree_vector_builder::nelts_per_pattern_of): Likewise. * tree-vector-builder.c (tree_vector_builder::new_unary_operation) (tree_vector_builder::new_binary_operation): Delete. (tree_vector_builder::binary_encoded_nelts): Likewise. * simplify-rtx.c: Include rtx-vector-builder.h. (distributes_over_addition_p): New function. (simplify_const_unary_operation) (simplify_const_binary_operation): Generalize handling of vector constants to include variable-length vectors. (test_vector_ops_series): Add more tests. From-SVN: r273867
4ce6ab68 · Richard Sandiford · Richard Sandiford · 66fafc3b · 4ce6ab68 · 4ce6ab68
Commit 4ce6ab68 authored Jul 29, 2019 by Richard Sandiford Committed by Richard Sandiford Jul 29, 2019
Showing with 178 additions and 135 deletions

gcc/ChangeLog
+32 -0

gcc/int-vector-builder.h
+6 -3

gcc/rtx-vector-builder.h
+13 -3

gcc/simplify-rtx.c
+114 -25

gcc/tree-vector-builder.c
+0 -97

gcc/tree-vector-builder.h
+13 -7

gcc/vector-builder.h
+0 -0

No files found.
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
+2019-07-29  Richard Sandiford  <richard.sandiford@arm.com>
+
+	* vector-builder.h (vector_builder): Add a shape template parameter.
+	(vector_builder::new_unary_operation): New function, generalizing
+	the old tree_vector_builder function.
+	(vector_builder::new_binary_operation): Likewise.
+	(vector_builder::binary_encoded_nelts): Likewise.
+	* int-vector-builder.h (int_vector_builder): Update template
+	parameters to vector_builder.
+	(int_vector_builder::shape_nelts): New function.
+	* rtx-vector-builder.h (rtx_vector_builder): Update template
+	parameters to vector_builder.
+	(rtx_vector_builder::shape_nelts): New function.
+	(rtx_vector_builder::nelts_of): Likewise.
+	(rtx_vector_builder::npatterns_of): Likewise.
+	(rtx_vector_builder::nelts_per_pattern_of): Likewise.
+	* tree-vector-builder.h (tree_vector_builder): Update template
+	parameters to vector_builder.
+	(tree_vector_builder::shape_nelts): New function.
+	(tree_vector_builder::nelts_of): Likewise.
+	(tree_vector_builder::npatterns_of): Likewise.
+	(tree_vector_builder::nelts_per_pattern_of): Likewise.
+	* tree-vector-builder.c (tree_vector_builder::new_unary_operation)
+	(tree_vector_builder::new_binary_operation): Delete.
+	(tree_vector_builder::binary_encoded_nelts): Likewise.
+	* simplify-rtx.c: Include rtx-vector-builder.h.
+	(distributes_over_addition_p): New function.
+	(simplify_const_unary_operation)
+	(simplify_const_binary_operation): Generalize handling of vector
+	constants to include variable-length vectors.
+	(test_vector_ops_series): Add more tests.
+
 2019-07-28  Jan Hubicka  <hubicka@ucw.cz>

 	PR lto/91222

--- a/gcc/int-vector-builder.h
+++ b/gcc/int-vector-builder.h
@@ -26,10 +26,11 @@ along with GCC; see the file COPYING3.  If not see
   encoding as tree and rtx constants.  See vector_builder for more
   details.  */
 template<typename T>
-class int_vector_builder : public vector_builder<T, int_vector_builder<T> >
+class int_vector_builder : public vector_builder<T, poly_uint64,
+						 int_vector_builder<T> >
 {
-  typedef vector_builder<T, int_vector_builder> parent;
-  friend class vector_builder<T, int_vector_builder>;
+  typedef vector_builder<T, poly_uint64, int_vector_builder> parent;
+  friend class vector_builder<T, poly_uint64, int_vector_builder>;

 public:
  int_vector_builder () {}
@@ -45,6 +46,8 @@ private:
  T apply_step (T, unsigned int, T) const;
  bool can_elide_p (T) const { return true; }
  void note_representative (T *, T) {}
+
+  static poly_uint64 shape_nelts (poly_uint64 x) { return x; }
 };

 /* Create a new builder for a vector with FULL_NELTS elements.

--- a/gcc/rtx-vector-builder.h
+++ b/gcc/rtx-vector-builder.h
@@ -24,10 +24,11 @@ along with GCC; see the file COPYING3.  If not see

 /* This class is used to build VECTOR_CSTs from a sequence of elements.
   See vector_builder for more details.  */
-class rtx_vector_builder : public vector_builder<rtx, rtx_vector_builder>
+class rtx_vector_builder : public vector_builder<rtx, machine_mode,
+						 rtx_vector_builder>
 {
-  typedef vector_builder<rtx, rtx_vector_builder> parent;
-  friend class vector_builder<rtx, rtx_vector_builder>;
+  typedef vector_builder<rtx, machine_mode, rtx_vector_builder> parent;
+  friend class vector_builder<rtx, machine_mode, rtx_vector_builder>;

 public:
  rtx_vector_builder () : m_mode (VOIDmode) {}
@@ -48,6 +49,15 @@ private:
  bool can_elide_p (rtx) const { return true; }
  void note_representative (rtx *, rtx) {}

+  static poly_uint64 shape_nelts (machine_mode mode)
+    { return GET_MODE_NUNITS (mode); }
+  static poly_uint64 nelts_of (const_rtx x)
+    { return CONST_VECTOR_NUNITS (x); }
+  static unsigned int npatterns_of (const_rtx x)
+    { return CONST_VECTOR_NPATTERNS (x); }
+  static unsigned int nelts_per_pattern_of (const_rtx x)
+    { return CONST_VECTOR_NELTS_PER_PATTERN (x); }
+
  rtx find_cached_value ();

  machine_mode m_mode;

--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -35,6 +35,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "flags.h"
 #include "selftest.h"
 #include "selftest-rtl.h"
+#include "rtx-vector-builder.h"

 /* Simplification and canonicalization of RTL.  */

@@ -1753,27 +1754,23 @@ simplify_const_unary_operation (enum rtx_code code, machine_mode mode,

  if (VECTOR_MODE_P (mode) && GET_CODE (op) == CONST_VECTOR)
    {
-      unsigned int n_elts;
-      if (!CONST_VECTOR_NUNITS (op).is_constant (&n_elts))
-	return NULL_RTX;
-
-      machine_mode opmode = GET_MODE (op);
-      gcc_assert (known_eq (GET_MODE_NUNITS (mode), n_elts));
-      gcc_assert (known_eq (GET_MODE_NUNITS (opmode), n_elts));
+      gcc_assert (GET_MODE (op) == op_mode);

-      rtvec v = rtvec_alloc (n_elts);
-      unsigned int i;
+      rtx_vector_builder builder;
+      if (!builder.new_unary_operation (mode, op, false))
+	return 0;

-      for (i = 0; i < n_elts; i++)
+      unsigned int count = builder.encoded_nelts ();
+      for (unsigned int i = 0; i < count; i++)
 	{
 	  rtx x = simplify_unary_operation (code, GET_MODE_INNER (mode),
 					    CONST_VECTOR_ELT (op, i),
-					    GET_MODE_INNER (opmode));
+					    GET_MODE_INNER (op_mode));
 	  if (!x || !valid_for_const_vector_p (mode, x))
 	    return 0;
-	  RTVEC_ELT (v, i) = x;
+	  builder.quick_push (x);
 	}
-      return gen_rtx_CONST_VECTOR (mode, v);
+      return builder.build ();
    }

  /* The order of these tests is critical so that, for example, we don't
@@ -4059,6 +4056,27 @@ simplify_binary_operation_1 (enum rtx_code code, machine_mode mode,
  return 0;
 }

+/* Return true if binary operation OP distributes over addition in operand
+   OPNO, with the other operand being held constant.  OPNO counts from 1.  */
+
+static bool
+distributes_over_addition_p (rtx_code op, int opno)
+{
+  switch (op)
+    {
+    case PLUS:
+    case MINUS:
+    case MULT:
+      return true;
+
+    case ASHIFT:
+      return opno == 1;
+
+    default:
+      return false;
+    }
+}
+
 rtx
 simplify_const_binary_operation (enum rtx_code code, machine_mode mode,
 				 rtx op0, rtx op1)
@@ -4068,26 +4086,45 @@ simplify_const_binary_operation (enum rtx_code code, machine_mode mode,
      && GET_CODE (op0) == CONST_VECTOR
      && GET_CODE (op1) == CONST_VECTOR)
    {
-      unsigned int n_elts;
-      if (!CONST_VECTOR_NUNITS (op0).is_constant (&n_elts))
-	return NULL_RTX;
-
-      gcc_assert (known_eq (n_elts, CONST_VECTOR_NUNITS (op1)));
-      gcc_assert (known_eq (n_elts, GET_MODE_NUNITS (mode)));
-      rtvec v = rtvec_alloc (n_elts);
-      unsigned int i;
+      bool step_ok_p;
+      if (CONST_VECTOR_STEPPED_P (op0)
+	  && CONST_VECTOR_STEPPED_P (op1))
+	/* We can operate directly on the encoding if:
+
+	      a3 - a2 == a2 - a1 && b3 - b2 == b2 - b1
+	    implies
+	      (a3 op b3) - (a2 op b2) == (a2 op b2) - (a1 op b1)
+
+	   Addition and subtraction are the supported operators
+	   for which this is true.  */
+	step_ok_p = (code == PLUS || code == MINUS);
+      else if (CONST_VECTOR_STEPPED_P (op0))
+	/* We can operate directly on stepped encodings if:
+
+	     a3 - a2 == a2 - a1
+	   implies:
+	     (a3 op c) - (a2 op c) == (a2 op c) - (a1 op c)
+
+	   which is true if (x -> x op c) distributes over addition.  */
+	step_ok_p = distributes_over_addition_p (code, 1);
+      else
+	/* Similarly in reverse.  */
+	step_ok_p = distributes_over_addition_p (code, 2);
+      rtx_vector_builder builder;
+      if (!builder.new_binary_operation (mode, op0, op1, step_ok_p))
+	return 0;

-      for (i = 0; i < n_elts; i++)
+      unsigned int count = builder.encoded_nelts ();
+      for (unsigned int i = 0; i < count; i++)
 	{
 	  rtx x = simplify_binary_operation (code, GET_MODE_INNER (mode),
 					     CONST_VECTOR_ELT (op0, i),
 					     CONST_VECTOR_ELT (op1, i));
 	  if (!x || !valid_for_const_vector_p (mode, x))
 	    return 0;
-	  RTVEC_ELT (v, i) = x;
+	  builder.quick_push (x);
 	}
-
-      return gen_rtx_CONST_VECTOR (mode, v);
+      return builder.build ();
    }

  if (VECTOR_MODE_P (mode)
@@ -7012,6 +7049,58 @@ test_vector_ops_series (machine_mode mode, rtx scalar_reg)
  ASSERT_RTX_EQ (series_0_m1,
 		 simplify_binary_operation (VEC_SERIES, mode, const0_rtx,
 					    constm1_rtx));
+
+  /* Test NEG on constant vector series.  */
+  ASSERT_RTX_EQ (series_0_m1,
+		 simplify_unary_operation (NEG, mode, series_0_1, mode));
+  ASSERT_RTX_EQ (series_0_1,
+		 simplify_unary_operation (NEG, mode, series_0_m1, mode));
+
+  /* Test PLUS and MINUS on constant vector series.  */
+  rtx scalar2 = gen_int_mode (2, inner_mode);
+  rtx scalar3 = gen_int_mode (3, inner_mode);
+  rtx series_1_1 = gen_const_vec_series (mode, const1_rtx, const1_rtx);
+  rtx series_0_2 = gen_const_vec_series (mode, const0_rtx, scalar2);
+  rtx series_1_3 = gen_const_vec_series (mode, const1_rtx, scalar3);
+  ASSERT_RTX_EQ (series_1_1,
+		 simplify_binary_operation (PLUS, mode, series_0_1,
+					    CONST1_RTX (mode)));
+  ASSERT_RTX_EQ (series_0_m1,
+		 simplify_binary_operation (PLUS, mode, CONST0_RTX (mode),
+					    series_0_m1));
+  ASSERT_RTX_EQ (series_1_3,
+		 simplify_binary_operation (PLUS, mode, series_1_1,
+					    series_0_2));
+  ASSERT_RTX_EQ (series_0_1,
+		 simplify_binary_operation (MINUS, mode, series_1_1,
+					    CONST1_RTX (mode)));
+  ASSERT_RTX_EQ (series_1_1,
+		 simplify_binary_operation (MINUS, mode, CONST1_RTX (mode),
+					    series_0_m1));
+  ASSERT_RTX_EQ (series_1_1,
+		 simplify_binary_operation (MINUS, mode, series_1_3,
+					    series_0_2));
+
+  /* Test MULT between constant vectors.  */
+  rtx vec2 = gen_const_vec_duplicate (mode, scalar2);
+  rtx vec3 = gen_const_vec_duplicate (mode, scalar3);
+  rtx scalar9 = gen_int_mode (9, inner_mode);
+  rtx series_3_9 = gen_const_vec_series (mode, scalar3, scalar9);
+  ASSERT_RTX_EQ (series_0_2,
+		 simplify_binary_operation (MULT, mode, series_0_1, vec2));
+  ASSERT_RTX_EQ (series_3_9,
+		 simplify_binary_operation (MULT, mode, vec3, series_1_3));
+  if (!GET_MODE_NUNITS (mode).is_constant ())
+    ASSERT_FALSE (simplify_binary_operation (MULT, mode, series_0_1,
+					     series_0_1));
+
+  /* Test ASHIFT between constant vectors.  */
+  ASSERT_RTX_EQ (series_0_2,
+		 simplify_binary_operation (ASHIFT, mode, series_0_1,
+					    CONST1_RTX (mode)));
+  if (!GET_MODE_NUNITS (mode).is_constant ())
+    ASSERT_FALSE (simplify_binary_operation (ASHIFT, mode, CONST1_RTX (mode),
+					     series_0_1));
 }

 /* Verify simplify_merge_mask works correctly.  */

--- a/gcc/tree-vector-builder.c
+++ b/gcc/tree-vector-builder.c
@@ -24,103 +24,6 @@ along with GCC; see the file COPYING3.  If not see
 #include "fold-const.h"
 #include "tree-vector-builder.h"

-/* Try to start building a new vector of type TYPE that holds the result of
-   a unary operation on VECTOR_CST T.  ALLOW_STEPPED_P is true if the
-   operation can handle stepped encodings directly, without having to
-   expand the full sequence.
-
-   Return true if the operation is possible, which it always is when
-   ALLOW_STEPPED_P is true.  Leave the builder unchanged otherwise.  */
-
-bool
-tree_vector_builder::new_unary_operation (tree type, tree t,
-					  bool allow_stepped_p)
-{
-  poly_uint64 full_nelts = TYPE_VECTOR_SUBPARTS (type);
-  gcc_assert (known_eq (full_nelts, TYPE_VECTOR_SUBPARTS (TREE_TYPE (t))));
-  unsigned int npatterns = VECTOR_CST_NPATTERNS (t);
-  unsigned int nelts_per_pattern = VECTOR_CST_NELTS_PER_PATTERN (t);
-  if (!allow_stepped_p && nelts_per_pattern > 2)
-    {
-      if (!full_nelts.is_constant ())
-	return false;
-      npatterns = full_nelts.to_constant ();
-      nelts_per_pattern = 1;
-    }
-  new_vector (type, npatterns, nelts_per_pattern);
-  return true;
-}
-
-/* Try to start building a new vector of type TYPE that holds the result of
-   a binary operation on VECTOR_CSTs T1 and T2.  ALLOW_STEPPED_P is true if
-   the operation can handle stepped encodings directly, without having to
-   expand the full sequence.
-
-   Return true if the operation is possible.  Leave the builder unchanged
-   otherwise.  */
-
-bool
-tree_vector_builder::new_binary_operation (tree type, tree t1, tree t2,
-					   bool allow_stepped_p)
-{
-  poly_uint64 full_nelts = TYPE_VECTOR_SUBPARTS (type);
-  gcc_assert (known_eq (full_nelts, TYPE_VECTOR_SUBPARTS (TREE_TYPE (t1)))
-	      && known_eq (full_nelts, TYPE_VECTOR_SUBPARTS (TREE_TYPE (t2))));
-  /* Conceptually we split the patterns in T1 and T2 until we have
-     an equal number for both.  Each split pattern requires the same
-     number of elements per pattern as the original.  E.g. splitting:
-
-       { 1, 2, 3, ... }
-
-     into two gives:
-
-       { 1, 3, 5, ... }
-       { 2, 4, 6, ... }
-
-     while splitting:
-
-       { 1, 0, ... }
-
-     into two gives:
-
-       { 1, 0, ... }
-       { 0, 0, ... }.  */
-  unsigned int npatterns = least_common_multiple (VECTOR_CST_NPATTERNS (t1),
-						  VECTOR_CST_NPATTERNS (t2));
-  unsigned int nelts_per_pattern = MAX (VECTOR_CST_NELTS_PER_PATTERN (t1),
-					VECTOR_CST_NELTS_PER_PATTERN (t2));
-  if (!allow_stepped_p && nelts_per_pattern > 2)
-    {
-      if (!full_nelts.is_constant ())
-	return false;
-      npatterns = full_nelts.to_constant ();
-      nelts_per_pattern = 1;
-    }
-  new_vector (type, npatterns, nelts_per_pattern);
-  return true;
-}
-
-/* Return the number of elements that the caller needs to operate on in
-   order to handle a binary operation on VECTOR_CSTs T1 and T2.  This static
-   function is used instead of new_binary_operation if the result of the
-   operation is not a VECTOR_CST.  */
-
-unsigned int
-tree_vector_builder::binary_encoded_nelts (tree t1, tree t2)
-{
-  poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (t1));
-  gcc_assert (known_eq (nelts, TYPE_VECTOR_SUBPARTS (TREE_TYPE (t2))));
-  /* See new_binary_operation for details.  */
-  unsigned int npatterns = least_common_multiple (VECTOR_CST_NPATTERNS (t1),
-						  VECTOR_CST_NPATTERNS (t2));
-  unsigned int nelts_per_pattern = MAX (VECTOR_CST_NELTS_PER_PATTERN (t1),
-					VECTOR_CST_NELTS_PER_PATTERN (t2));
-  unsigned HOST_WIDE_INT const_nelts;
-  if (nelts.is_constant (&const_nelts))
-    return MIN (npatterns * nelts_per_pattern, const_nelts);
-  return npatterns * nelts_per_pattern;
-}
-
 /* Return a vector element with the value BASE + FACTOR * STEP.  */

 tree

--- a/gcc/tree-vector-builder.h
+++ b/gcc/tree-vector-builder.h
@@ -24,10 +24,11 @@ along with GCC; see the file COPYING3.  If not see

 /* This class is used to build VECTOR_CSTs from a sequence of elements.
   See vector_builder for more details.  */
-class tree_vector_builder : public vector_builder<tree, tree_vector_builder>
+class tree_vector_builder : public vector_builder<tree, tree,
+						  tree_vector_builder>
 {
-  typedef vector_builder<tree, tree_vector_builder> parent;
-  friend class vector_builder<tree, tree_vector_builder>;
+  typedef vector_builder<tree, tree, tree_vector_builder> parent;
+  friend class vector_builder<tree, tree, tree_vector_builder>;

 public:
  tree_vector_builder () : m_type (0) {}
@@ -37,10 +38,6 @@ public:
  tree type () const { return m_type; }

  void new_vector (tree, unsigned int, unsigned int);
-  bool new_unary_operation (tree, tree, bool);
-  bool new_binary_operation (tree, tree, tree, bool);
-
-  static unsigned int binary_encoded_nelts (tree, tree);

 private:
  bool equal_p (const_tree, const_tree) const;
@@ -51,6 +48,15 @@ private:
  bool can_elide_p (const_tree) const;
  void note_representative (tree *, tree);

+  static poly_uint64 shape_nelts (const_tree t)
+    { return TYPE_VECTOR_SUBPARTS (t); }
+  static poly_uint64 nelts_of (const_tree t)
+    { return VECTOR_CST_NELTS (t); }
+  static unsigned int npatterns_of (const_tree t)
+    { return VECTOR_CST_NPATTERNS (t); }
+  static unsigned int nelts_per_pattern_of (const_tree t)
+    { return VECTOR_CST_NELTS_PER_PATTERN (t); }
+
  tree m_type;
 };


--- a/gcc/vector-builder.h
+++ b/gcc/vector-builder.h