Commit 13c247d6 by Richard Sandiford Committed by Richard Sandiford

Handle VIEW_CONVERT_EXPR for variable-length vectors

This patch handles VIEW_CONVERT_EXPRs of variable-length VECTOR_CSTs
by adding tree-level versions of native_decode_vector_rtx and
simplify_const_vector_subreg.  It uses the same code for fixed-length
vectors, both to get more coverage and because operating directly on
the compressed encoding should be more efficient for longer vectors
with a regular pattern.

The structure and comments are very similar between the tree and
rtx routines.

2019-11-18  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
	* fold-const.c (native_encode_vector): Turn into a wrapper function,
	splitting the main code out into...
	(native_encode_vector_part): ...this new function.
	(native_decode_vector_tree): New function.
	(fold_view_convert_vector_encoding): Likewise.
	(fold_view_convert_expr): Use it for converting VECTOR_CSTs
	to VECTOR_TYPEs.

gcc/testsuite/
	* gcc.target/aarch64/sve/acle/general/temporaries_1.c: New test.

From-SVN: r278410
parent 8489e1f4
2019-11-18 Richard Sandiford <richard.sandiford@arm.com> 2019-11-18 Richard Sandiford <richard.sandiford@arm.com>
* fold-const.c (native_encode_vector): Turn into a wrapper function,
splitting the main code out into...
(native_encode_vector_part): ...this new function.
(native_decode_vector_tree): New function.
(fold_view_convert_vector_encoding): Likewise.
(fold_view_convert_expr): Use it for converting VECTOR_CSTs
to VECTOR_TYPEs.
2019-11-18 Richard Sandiford <richard.sandiford@arm.com>
* tree-data-ref.c (create_intersect_range_checks_index): If the * tree-data-ref.c (create_intersect_range_checks_index): If the
alias pair describes simple WAW and WAR dependencies, just check alias pair describes simple WAW and WAR dependencies, just check
whether the first B access overlaps later A accesses. whether the first B access overlaps later A accesses.
...@@ -7715,22 +7715,18 @@ native_encode_complex (const_tree expr, unsigned char *ptr, int len, int off) ...@@ -7715,22 +7715,18 @@ native_encode_complex (const_tree expr, unsigned char *ptr, int len, int off)
return rsize + isize; return rsize + isize;
} }
/* Like native_encode_vector, but only encode the first COUNT elements.
/* Subroutine of native_encode_expr. Encode the VECTOR_CST The other arguments are as for native_encode_vector. */
specified by EXPR into the buffer PTR of length LEN bytes.
Return the number of bytes placed in the buffer, or zero
upon failure. */
static int static int
native_encode_vector (const_tree expr, unsigned char *ptr, int len, int off) native_encode_vector_part (const_tree expr, unsigned char *ptr, int len,
int off, unsigned HOST_WIDE_INT count)
{ {
unsigned HOST_WIDE_INT i, count; unsigned HOST_WIDE_INT i;
int size, offset; int size, offset;
tree itype, elem; tree itype, elem;
offset = 0; offset = 0;
if (!VECTOR_CST_NELTS (expr).is_constant (&count))
return 0;
itype = TREE_TYPE (TREE_TYPE (expr)); itype = TREE_TYPE (TREE_TYPE (expr));
size = GET_MODE_SIZE (SCALAR_TYPE_MODE (itype)); size = GET_MODE_SIZE (SCALAR_TYPE_MODE (itype));
for (i = 0; i < count; i++) for (i = 0; i < count; i++)
...@@ -7754,6 +7750,20 @@ native_encode_vector (const_tree expr, unsigned char *ptr, int len, int off) ...@@ -7754,6 +7750,20 @@ native_encode_vector (const_tree expr, unsigned char *ptr, int len, int off)
return offset; return offset;
} }
/* Subroutine of native_encode_expr. Encode the VECTOR_CST
specified by EXPR into the buffer PTR of length LEN bytes.
Return the number of bytes placed in the buffer, or zero
upon failure. */
static int
native_encode_vector (const_tree expr, unsigned char *ptr, int len, int off)
{
unsigned HOST_WIDE_INT count;
if (!VECTOR_CST_NELTS (expr).is_constant (&count))
return 0;
return native_encode_vector_part (expr, ptr, len, off, count);
}
/* Subroutine of native_encode_expr. Encode the STRING_CST /* Subroutine of native_encode_expr. Encode the STRING_CST
specified by EXPR into the buffer PTR of length LEN bytes. specified by EXPR into the buffer PTR of length LEN bytes.
...@@ -8049,6 +8059,113 @@ can_native_interpret_type_p (tree type) ...@@ -8049,6 +8059,113 @@ can_native_interpret_type_p (tree type)
} }
} }
/* Read a vector of type TYPE from the target memory image given by BYTES,
starting at byte FIRST_BYTE. The vector is known to be encodable using
NPATTERNS interleaved patterns with NELTS_PER_PATTERN elements each,
and BYTES is known to have enough bytes to supply NPATTERNS *
NELTS_PER_PATTERN vector elements. Each element of BYTES contains
BITS_PER_UNIT bits and the bytes are in target memory order.
Return the vector on success, otherwise return null. */
static tree
native_decode_vector_tree (tree type, vec<unsigned char> bytes,
unsigned int first_byte, unsigned int npatterns,
unsigned int nelts_per_pattern)
{
tree_vector_builder builder (type, npatterns, nelts_per_pattern);
tree elt_type = TREE_TYPE (type);
unsigned int elt_bits = tree_to_uhwi (TYPE_SIZE (elt_type));
if (VECTOR_BOOLEAN_TYPE_P (type) && elt_bits <= BITS_PER_UNIT)
{
/* This is the only case in which elements can be smaller than a byte.
Element 0 is always in the lsb of the containing byte. */
elt_bits = TYPE_PRECISION (elt_type);
for (unsigned int i = 0; i < builder.encoded_nelts (); ++i)
{
unsigned int bit_index = first_byte * BITS_PER_UNIT + i * elt_bits;
unsigned int byte_index = bit_index / BITS_PER_UNIT;
unsigned int lsb = bit_index % BITS_PER_UNIT;
builder.quick_push (bytes[byte_index] & (1 << lsb)
? build_all_ones_cst (elt_type)
: build_zero_cst (elt_type));
}
}
else
{
unsigned int elt_bytes = elt_bits / BITS_PER_UNIT;
for (unsigned int i = 0; i < builder.encoded_nelts (); ++i)
{
tree elt = native_interpret_expr (elt_type, &bytes[first_byte],
elt_bytes);
if (!elt)
return NULL_TREE;
builder.quick_push (elt);
first_byte += elt_bytes;
}
}
return builder.build ();
}
/* Try to view-convert VECTOR_CST EXPR to VECTOR_TYPE TYPE by operating
directly on the VECTOR_CST encoding, in a way that works for variable-
length vectors. Return the resulting VECTOR_CST on success or null
on failure. */
static tree
fold_view_convert_vector_encoding (tree type, tree expr)
{
tree expr_type = TREE_TYPE (expr);
poly_uint64 type_bits, expr_bits;
if (!poly_int_tree_p (TYPE_SIZE (type), &type_bits)
|| !poly_int_tree_p (TYPE_SIZE (expr_type), &expr_bits))
return NULL_TREE;
poly_uint64 type_units = TYPE_VECTOR_SUBPARTS (type);
poly_uint64 expr_units = TYPE_VECTOR_SUBPARTS (expr_type);
unsigned int type_elt_bits = vector_element_size (type_bits, type_units);
unsigned int expr_elt_bits = vector_element_size (expr_bits, expr_units);
/* We can only preserve the semantics of a stepped pattern if the new
vector element is an integer of the same size. */
if (VECTOR_CST_STEPPED_P (expr)
&& (!INTEGRAL_TYPE_P (type) || type_elt_bits != expr_elt_bits))
return NULL_TREE;
/* The number of bits needed to encode one element from every pattern
of the original vector. */
unsigned int expr_sequence_bits
= VECTOR_CST_NPATTERNS (expr) * expr_elt_bits;
/* The number of bits needed to encode one element from every pattern
of the result. */
unsigned int type_sequence_bits
= least_common_multiple (expr_sequence_bits, type_elt_bits);
/* Don't try to read more bytes than are available, which can happen
for constant-sized vectors if TYPE has larger elements than EXPR_TYPE.
The general VIEW_CONVERT handling can cope with that case, so there's
no point complicating things here. */
unsigned int nelts_per_pattern = VECTOR_CST_NELTS_PER_PATTERN (expr);
unsigned int buffer_bytes = CEIL (nelts_per_pattern * type_sequence_bits,
BITS_PER_UNIT);
unsigned int buffer_bits = buffer_bytes * BITS_PER_UNIT;
if (known_gt (buffer_bits, expr_bits))
return NULL_TREE;
/* Get enough bytes of EXPR to form the new encoding. */
auto_vec<unsigned char, 128> buffer (buffer_bytes);
buffer.quick_grow (buffer_bytes);
if (native_encode_vector_part (expr, buffer.address (), buffer_bytes, 0,
buffer_bits / expr_elt_bits)
!= (int) buffer_bytes)
return NULL_TREE;
/* Reencode the bytes as TYPE. */
unsigned int type_npatterns = type_sequence_bits / type_elt_bits;
return native_decode_vector_tree (type, buffer, 0, type_npatterns,
nelts_per_pattern);
}
/* Fold a VIEW_CONVERT_EXPR of a constant expression EXPR to type /* Fold a VIEW_CONVERT_EXPR of a constant expression EXPR to type
TYPE at compile-time. If we're unable to perform the conversion TYPE at compile-time. If we're unable to perform the conversion
...@@ -8065,6 +8182,10 @@ fold_view_convert_expr (tree type, tree expr) ...@@ -8065,6 +8182,10 @@ fold_view_convert_expr (tree type, tree expr)
if (CHAR_BIT != 8 || BITS_PER_UNIT != 8) if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
return NULL_TREE; return NULL_TREE;
if (VECTOR_TYPE_P (type) && TREE_CODE (expr) == VECTOR_CST)
if (tree res = fold_view_convert_vector_encoding (type, expr))
return res;
len = native_encode_expr (expr, buffer, sizeof (buffer)); len = native_encode_expr (expr, buffer, sizeof (buffer));
if (len == 0) if (len == 0)
return NULL_TREE; return NULL_TREE;
......
2019-11-18 Richard Sandiford <richard.sandiford@arm.com> 2019-11-18 Richard Sandiford <richard.sandiford@arm.com>
* gcc.target/aarch64/sve/acle/general/temporaries_1.c: New test.
2019-11-18 Richard Sandiford <richard.sandiford@arm.com>
* gcc.dg/vect/vect-alias-check-8.c: Expect WAR/WAW checks to be used. * gcc.dg/vect/vect-alias-check-8.c: Expect WAR/WAW checks to be used.
* gcc.dg/vect/vect-alias-check-14.c: Likewise. * gcc.dg/vect/vect-alias-check-14.c: Likewise.
* gcc.dg/vect/vect-alias-check-15.c: Likewise. * gcc.dg/vect/vect-alias-check-15.c: Likewise.
......
/* { dg-do compile } */
/* { dg-options "-O" } */
/* { dg-final { check-function-bodies "**" "" } } */
#include <arm_sve.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
** test_s8:
** ptrue (p[0-7])\.b, all
** ld1b (z[0-9]+\.b), \1/z, \[x0\]
** add \2, \2, #1
** st1b \2, \1, \[x1\]
** ret
*/
void
test_s8 (int8_t *x, int8_t *y)
{
int8_t tmp1[32], tmp2[32];
svbool_t pg = svptrue_b8 ();
svst1 (pg, tmp1, svld1 (pg, x));
svst1 (pg, tmp2, svadd_x (pg, svld1 (pg, tmp1), 1));
svst1 (pg, y, svld1 (pg, tmp2));
}
/*
** test_s32_b8:
** ptrue (p[0-7])\.b, all
** ld1w (z[0-9]+\.s), \1/z, \[x0\]
** add \2, \2, #1
** st1w \2, \1, \[x1\]
** ret
*/
void
test_s32_b8 (int32_t *x, int32_t *y)
{
int32_t tmp1[8], tmp2[8];
svbool_t pg = svptrue_b8 ();
svst1 (pg, tmp1, svld1 (pg, x));
svst1 (pg, tmp2, svadd_x (pg, svld1 (pg, tmp1), 1));
svst1 (pg, y, svld1 (pg, tmp2));
}
/*
** test_s32_b32:
** ptrue (p[0-7])\.b, all
** ld1w (z[0-9]+\.s), \1/z, \[x0\]
** add \2, \2, #1
** st1w \2, \1, \[x1\]
** ret
*/
void
test_s32_b32 (int32_t *x, int32_t *y)
{
int32_t tmp1[8], tmp2[8];
svbool_t pg = svptrue_b32 ();
svst1 (pg, tmp1, svld1 (pg, x));
svst1 (pg, tmp2, svadd_x (pg, svld1 (pg, tmp1), 1));
svst1 (pg, y, svld1 (pg, tmp2));
}
#ifdef __cplusplus
}
#endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment