Commit 9b1de7e2 by Richard Sandiford Committed by Richard Sandiford

Add more vec_duplicate simplifications

This patch adds a vec_duplicate_p helper that tests for constant
or non-constant vector duplicates.  Together with the existing
const_vec_duplicate_p, this complements the gen_vec_duplicate
and gen_const_vec_duplicate added by a previous patch.

The patch uses the new routines to add more rtx simplifications
involving vector duplicates.  These mirror simplifications that
we already do for CONST_VECTOR broadcasts and are needed for
variable-length SVE, which uses:

  (const:M (vec_duplicate:M X))

to represent constant broadcasts instead.  The simplifications do
trigger on the testsuite for variable duplicates too, and in each
case I saw the change was an improvement.

The best way of testing the new simplifications seemed to be
via selftests.  The patch cribs part of David's patch here:
https://gcc.gnu.org/ml/gcc-patches/2016-07/msg00270.html .

2017-11-01  Richard Sandiford  <richard.sandiford@linaro.org>
	    David Malcolm  <dmalcolm@redhat.com>
	    Alan Hayward  <alan.hayward@arm.com>
	    David Sherwood  <david.sherwood@arm.com>

gcc/
	* rtl.h (vec_duplicate_p): New function.
	* selftest-rtl.c (assert_rtx_eq_at): New function.
	* selftest-rtl.h (ASSERT_RTX_EQ): New macro.
	(assert_rtx_eq_at): Declare.
	* selftest.h (selftest::simplify_rtx_c_tests): Declare.
	* selftest-run-tests.c (selftest::run_tests): Call it.
	* simplify-rtx.c: Include selftest.h and selftest-rtl.h.
	(simplify_unary_operation_1): Recursively handle vector duplicates.
	(simplify_binary_operation_1): Likewise.  Handle VEC_SELECTs of
	vector duplicates.
	(simplify_subreg): Handle subregs of vector duplicates.
	(make_test_reg, test_vector_ops_duplicate, test_vector_ops)
	(selftest::simplify_rtx_c_tests): New functions.

Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Malcolm <dmalcolm@redhat.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>

From-SVN: r254294
parent 59d06c05
2017-11-01 Richard Sandiford <richard.sandiford@linaro.org> 2017-11-01 Richard Sandiford <richard.sandiford@linaro.org>
David Malcolm <dmalcolm@redhat.com>
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
* rtl.h (vec_duplicate_p): New function.
* selftest-rtl.c (assert_rtx_eq_at): New function.
* selftest-rtl.h (ASSERT_RTX_EQ): New macro.
(assert_rtx_eq_at): Declare.
* selftest.h (selftest::simplify_rtx_c_tests): Declare.
* selftest-run-tests.c (selftest::run_tests): Call it.
* simplify-rtx.c: Include selftest.h and selftest-rtl.h.
(simplify_unary_operation_1): Recursively handle vector duplicates.
(simplify_binary_operation_1): Likewise. Handle VEC_SELECTs of
vector duplicates.
(simplify_subreg): Handle subregs of vector duplicates.
(make_test_reg, test_vector_ops_duplicate, test_vector_ops)
(selftest::simplify_rtx_c_tests): New functions.
2017-11-01 Richard Sandiford <richard.sandiford@linaro.org>
Alan Hayward <alan.hayward@arm.com> Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com> David Sherwood <david.sherwood@arm.com>
...@@ -2772,6 +2772,21 @@ const_vec_duplicate_p (T x, T *elt) ...@@ -2772,6 +2772,21 @@ const_vec_duplicate_p (T x, T *elt)
return false; return false;
} }
/* Return true if X is a vector with a duplicated element value, either
constant or nonconstant. Store the duplicated element in *ELT if so. */
template <typename T>
inline bool
vec_duplicate_p (T x, T *elt)
{
if (GET_CODE (x) == VEC_DUPLICATE)
{
*elt = XEXP (x, 0);
return true;
}
return const_vec_duplicate_p (x, elt);
}
/* If X is a vector constant with a duplicated element value, return that /* If X is a vector constant with a duplicated element value, return that
element value, otherwise return X. */ element value, otherwise return X. */
......
...@@ -35,6 +35,29 @@ along with GCC; see the file COPYING3. If not see ...@@ -35,6 +35,29 @@ along with GCC; see the file COPYING3. If not see
namespace selftest { namespace selftest {
/* Compare rtx EXPECTED and ACTUAL using rtx_equal_p, calling
::selftest::pass if they are equal, aborting if they are non-equal.
LOC is the effective location of the assertion, MSG describes it. */
void
assert_rtx_eq_at (const location &loc, const char *msg,
rtx expected, rtx actual)
{
if (rtx_equal_p (expected, actual))
::selftest::pass (loc, msg);
else
{
fprintf (stderr, "%s:%i: %s: FAIL: %s\n", loc.m_file, loc.m_line,
loc.m_function, msg);
fprintf (stderr, " expected: ");
print_rtl (stderr, expected);
fprintf (stderr, "\n actual: ");
print_rtl (stderr, actual);
fprintf (stderr, "\n");
abort ();
}
}
/* Compare rtx EXPECTED and ACTUAL by pointer equality, calling /* Compare rtx EXPECTED and ACTUAL by pointer equality, calling
::selftest::pass if they are equal, aborting if they are non-equal. ::selftest::pass if they are equal, aborting if they are non-equal.
LOC is the effective location of the assertion, MSG describes it. */ LOC is the effective location of the assertion, MSG describes it. */
......
...@@ -47,6 +47,15 @@ assert_rtl_dump_eq (const location &loc, const char *expected_dump, rtx x, ...@@ -47,6 +47,15 @@ assert_rtl_dump_eq (const location &loc, const char *expected_dump, rtx x,
assert_rtl_dump_eq (SELFTEST_LOCATION, (EXPECTED_DUMP), (RTX), \ assert_rtl_dump_eq (SELFTEST_LOCATION, (EXPECTED_DUMP), (RTX), \
(REUSE_MANAGER)) (REUSE_MANAGER))
#define ASSERT_RTX_EQ(EXPECTED, ACTUAL) \
SELFTEST_BEGIN_STMT \
const char *desc = "ASSERT_RTX_EQ (" #EXPECTED ", " #ACTUAL ")"; \
::selftest::assert_rtx_eq_at (SELFTEST_LOCATION, desc, (EXPECTED), \
(ACTUAL)); \
SELFTEST_END_STMT
extern void assert_rtx_eq_at (const location &, const char *, rtx, rtx);
/* Evaluate rtx EXPECTED and ACTUAL and compare them with == /* Evaluate rtx EXPECTED and ACTUAL and compare them with ==
(i.e. pointer equality), calling ::selftest::pass if they are (i.e. pointer equality), calling ::selftest::pass if they are
equal, aborting if they are non-equal. */ equal, aborting if they are non-equal. */
......
...@@ -95,6 +95,7 @@ selftest::run_tests () ...@@ -95,6 +95,7 @@ selftest::run_tests ()
store_merging_c_tests (); store_merging_c_tests ();
predict_c_tests (); predict_c_tests ();
simplify_rtx_c_tests ();
/* Run any lang-specific selftests. */ /* Run any lang-specific selftests. */
lang_hooks.run_lang_selftests (); lang_hooks.run_lang_selftests ();
......
...@@ -199,6 +199,7 @@ extern void unique_ptr_tests_cc_tests (); ...@@ -199,6 +199,7 @@ extern void unique_ptr_tests_cc_tests ();
extern void vec_c_tests (); extern void vec_c_tests ();
extern void wide_int_cc_tests (); extern void wide_int_cc_tests ();
extern void predict_c_tests (); extern void predict_c_tests ();
extern void simplify_rtx_c_tests ();
extern int num_passes; extern int num_passes;
......
...@@ -33,6 +33,8 @@ along with GCC; see the file COPYING3. If not see ...@@ -33,6 +33,8 @@ along with GCC; see the file COPYING3. If not see
#include "diagnostic-core.h" #include "diagnostic-core.h"
#include "varasm.h" #include "varasm.h"
#include "flags.h" #include "flags.h"
#include "selftest.h"
#include "selftest-rtl.h"
/* Simplification and canonicalization of RTL. */ /* Simplification and canonicalization of RTL. */
...@@ -925,7 +927,7 @@ static rtx ...@@ -925,7 +927,7 @@ static rtx
simplify_unary_operation_1 (enum rtx_code code, machine_mode mode, rtx op) simplify_unary_operation_1 (enum rtx_code code, machine_mode mode, rtx op)
{ {
enum rtx_code reversed; enum rtx_code reversed;
rtx temp; rtx temp, elt;
scalar_int_mode inner, int_mode, op_mode, op0_mode; scalar_int_mode inner, int_mode, op_mode, op0_mode;
switch (code) switch (code)
...@@ -1684,6 +1686,28 @@ simplify_unary_operation_1 (enum rtx_code code, machine_mode mode, rtx op) ...@@ -1684,6 +1686,28 @@ simplify_unary_operation_1 (enum rtx_code code, machine_mode mode, rtx op)
break; break;
} }
if (VECTOR_MODE_P (mode) && vec_duplicate_p (op, &elt))
{
/* Try applying the operator to ELT and see if that simplifies.
We can duplicate the result if so.
The reason we don't use simplify_gen_unary is that it isn't
necessarily a win to convert things like:
(neg:V (vec_duplicate:V (reg:S R)))
to:
(vec_duplicate:V (neg:S (reg:S R)))
The first might be done entirely in vector registers while the
second might need a move between register files. */
temp = simplify_unary_operation (code, GET_MODE_INNER (mode),
elt, GET_MODE_INNER (GET_MODE (op)));
if (temp)
return gen_vec_duplicate (mode, temp);
}
return 0; return 0;
} }
...@@ -2141,7 +2165,7 @@ static rtx ...@@ -2141,7 +2165,7 @@ static rtx
simplify_binary_operation_1 (enum rtx_code code, machine_mode mode, simplify_binary_operation_1 (enum rtx_code code, machine_mode mode,
rtx op0, rtx op1, rtx trueop0, rtx trueop1) rtx op0, rtx op1, rtx trueop0, rtx trueop1)
{ {
rtx tem, reversed, opleft, opright; rtx tem, reversed, opleft, opright, elt0, elt1;
HOST_WIDE_INT val; HOST_WIDE_INT val;
scalar_int_mode int_mode, inner_mode; scalar_int_mode int_mode, inner_mode;
...@@ -3484,6 +3508,9 @@ simplify_binary_operation_1 (enum rtx_code code, machine_mode mode, ...@@ -3484,6 +3508,9 @@ simplify_binary_operation_1 (enum rtx_code code, machine_mode mode,
gcc_assert (XVECLEN (trueop1, 0) == 1); gcc_assert (XVECLEN (trueop1, 0) == 1);
gcc_assert (CONST_INT_P (XVECEXP (trueop1, 0, 0))); gcc_assert (CONST_INT_P (XVECEXP (trueop1, 0, 0)));
if (vec_duplicate_p (trueop0, &elt0))
return elt0;
if (GET_CODE (trueop0) == CONST_VECTOR) if (GET_CODE (trueop0) == CONST_VECTOR)
return CONST_VECTOR_ELT (trueop0, INTVAL (XVECEXP return CONST_VECTOR_ELT (trueop0, INTVAL (XVECEXP
(trueop1, 0, 0))); (trueop1, 0, 0)));
...@@ -3566,9 +3593,6 @@ simplify_binary_operation_1 (enum rtx_code code, machine_mode mode, ...@@ -3566,9 +3593,6 @@ simplify_binary_operation_1 (enum rtx_code code, machine_mode mode,
tmp_op, gen_rtx_PARALLEL (VOIDmode, vec)); tmp_op, gen_rtx_PARALLEL (VOIDmode, vec));
return tmp; return tmp;
} }
if (GET_CODE (trueop0) == VEC_DUPLICATE
&& GET_MODE (XEXP (trueop0, 0)) == mode)
return XEXP (trueop0, 0);
} }
else else
{ {
...@@ -3577,6 +3601,11 @@ simplify_binary_operation_1 (enum rtx_code code, machine_mode mode, ...@@ -3577,6 +3601,11 @@ simplify_binary_operation_1 (enum rtx_code code, machine_mode mode,
== GET_MODE_INNER (GET_MODE (trueop0))); == GET_MODE_INNER (GET_MODE (trueop0)));
gcc_assert (GET_CODE (trueop1) == PARALLEL); gcc_assert (GET_CODE (trueop1) == PARALLEL);
if (vec_duplicate_p (trueop0, &elt0))
/* It doesn't matter which elements are selected by trueop1,
because they are all the same. */
return gen_vec_duplicate (mode, elt0);
if (GET_CODE (trueop0) == CONST_VECTOR) if (GET_CODE (trueop0) == CONST_VECTOR)
{ {
int elt_size = GET_MODE_UNIT_SIZE (mode); int elt_size = GET_MODE_UNIT_SIZE (mode);
...@@ -3877,6 +3906,32 @@ simplify_binary_operation_1 (enum rtx_code code, machine_mode mode, ...@@ -3877,6 +3906,32 @@ simplify_binary_operation_1 (enum rtx_code code, machine_mode mode,
gcc_unreachable (); gcc_unreachable ();
} }
if (mode == GET_MODE (op0)
&& mode == GET_MODE (op1)
&& vec_duplicate_p (op0, &elt0)
&& vec_duplicate_p (op1, &elt1))
{
/* Try applying the operator to ELT and see if that simplifies.
We can duplicate the result if so.
The reason we don't use simplify_gen_binary is that it isn't
necessarily a win to convert things like:
(plus:V (vec_duplicate:V (reg:S R1))
(vec_duplicate:V (reg:S R2)))
to:
(vec_duplicate:V (plus:S (reg:S R1) (reg:S R2)))
The first might be done entirely in vector registers while the
second might need a move between register files. */
tem = simplify_binary_operation (code, GET_MODE_INNER (mode),
elt0, elt1);
if (tem)
return gen_vec_duplicate (mode, tem);
}
return 0; return 0;
} }
...@@ -6025,6 +6080,20 @@ simplify_subreg (machine_mode outermode, rtx op, ...@@ -6025,6 +6080,20 @@ simplify_subreg (machine_mode outermode, rtx op,
if (outermode == innermode && !byte) if (outermode == innermode && !byte)
return op; return op;
if (byte % GET_MODE_UNIT_SIZE (innermode) == 0)
{
rtx elt;
if (VECTOR_MODE_P (outermode)
&& GET_MODE_INNER (outermode) == GET_MODE_INNER (innermode)
&& vec_duplicate_p (op, &elt))
return gen_vec_duplicate (outermode, elt);
if (outermode == GET_MODE_INNER (innermode)
&& vec_duplicate_p (op, &elt))
return elt;
}
if (CONST_SCALAR_INT_P (op) if (CONST_SCALAR_INT_P (op)
|| CONST_DOUBLE_AS_FLOAT_P (op) || CONST_DOUBLE_AS_FLOAT_P (op)
|| GET_CODE (op) == CONST_FIXED || GET_CODE (op) == CONST_FIXED
...@@ -6330,3 +6399,125 @@ simplify_rtx (const_rtx x) ...@@ -6330,3 +6399,125 @@ simplify_rtx (const_rtx x)
} }
return NULL; return NULL;
} }
#if CHECKING_P
namespace selftest {
/* Make a unique pseudo REG of mode MODE for use by selftests. */
static rtx
make_test_reg (machine_mode mode)
{
static int test_reg_num = LAST_VIRTUAL_REGISTER + 1;
return gen_rtx_REG (mode, test_reg_num++);
}
/* Test vector simplifications involving VEC_DUPLICATE in which the
operands and result have vector mode MODE. SCALAR_REG is a pseudo
register that holds one element of MODE. */
static void
test_vector_ops_duplicate (machine_mode mode, rtx scalar_reg)
{
scalar_mode inner_mode = GET_MODE_INNER (mode);
rtx duplicate = gen_rtx_VEC_DUPLICATE (mode, scalar_reg);
unsigned int nunits = GET_MODE_NUNITS (mode);
if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
{
/* Test some simple unary cases with VEC_DUPLICATE arguments. */
rtx not_scalar_reg = gen_rtx_NOT (inner_mode, scalar_reg);
rtx duplicate_not = gen_rtx_VEC_DUPLICATE (mode, not_scalar_reg);
ASSERT_RTX_EQ (duplicate,
simplify_unary_operation (NOT, mode,
duplicate_not, mode));
rtx neg_scalar_reg = gen_rtx_NEG (inner_mode, scalar_reg);
rtx duplicate_neg = gen_rtx_VEC_DUPLICATE (mode, neg_scalar_reg);
ASSERT_RTX_EQ (duplicate,
simplify_unary_operation (NEG, mode,
duplicate_neg, mode));
/* Test some simple binary cases with VEC_DUPLICATE arguments. */
ASSERT_RTX_EQ (duplicate,
simplify_binary_operation (PLUS, mode, duplicate,
CONST0_RTX (mode)));
ASSERT_RTX_EQ (duplicate,
simplify_binary_operation (MINUS, mode, duplicate,
CONST0_RTX (mode)));
ASSERT_RTX_PTR_EQ (CONST0_RTX (mode),
simplify_binary_operation (MINUS, mode, duplicate,
duplicate));
}
/* Test a scalar VEC_SELECT of a VEC_DUPLICATE. */
rtx zero_par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
ASSERT_RTX_PTR_EQ (scalar_reg,
simplify_binary_operation (VEC_SELECT, inner_mode,
duplicate, zero_par));
/* And again with the final element. */
rtx last_index = gen_int_mode (GET_MODE_NUNITS (mode) - 1, word_mode);
rtx last_par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, last_index));
ASSERT_RTX_PTR_EQ (scalar_reg,
simplify_binary_operation (VEC_SELECT, inner_mode,
duplicate, last_par));
/* Test a scalar subreg of a VEC_DUPLICATE. */
unsigned int offset = subreg_lowpart_offset (inner_mode, mode);
ASSERT_RTX_EQ (scalar_reg,
simplify_gen_subreg (inner_mode, duplicate,
mode, offset));
machine_mode narrower_mode;
if (nunits > 2
&& mode_for_vector (inner_mode, 2).exists (&narrower_mode)
&& VECTOR_MODE_P (narrower_mode))
{
/* Test VEC_SELECT of a vector. */
rtx vec_par
= gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, const1_rtx, const0_rtx));
rtx narrower_duplicate
= gen_rtx_VEC_DUPLICATE (narrower_mode, scalar_reg);
ASSERT_RTX_EQ (narrower_duplicate,
simplify_binary_operation (VEC_SELECT, narrower_mode,
duplicate, vec_par));
/* Test a vector subreg of a VEC_DUPLICATE. */
unsigned int offset = subreg_lowpart_offset (narrower_mode, mode);
ASSERT_RTX_EQ (narrower_duplicate,
simplify_gen_subreg (narrower_mode, duplicate,
mode, offset));
}
}
/* Verify some simplifications involving vectors. */
static void
test_vector_ops ()
{
for (unsigned int i = 0; i < NUM_MACHINE_MODES; ++i)
{
machine_mode mode = (machine_mode) i;
if (VECTOR_MODE_P (mode))
{
rtx scalar_reg = make_test_reg (GET_MODE_INNER (mode));
test_vector_ops_duplicate (mode, scalar_reg);
}
}
}
/* Run all of the selftests within this file. */
void
simplify_rtx_c_tests ()
{
test_vector_ops ();
}
} // namespace selftest
#endif /* CHECKING_P */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment