Commit f24f4c15 by Richard Sandiford Committed by Richard Sandiford

Rework constant subreg folds and handle more variable-length cases

This patch rewrites the way simplify_subreg handles constants.
It uses similar native_encode/native_decode routines to the
tree-level handling of VIEW_CONVERT_EXPR, meaning that we can
move between rtx constants and the target memory image of them.

The main point of this patch is to support subregs of constant-length
vectors for VLA vectors, beyond the very simple cases that were already
handled.  Many of the new tests failed before the patch for variable-
length vectors.

The boolean side is tested more by the upcoming SVE ACLE work.

2019-09-19  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
	* defaults.h (TARGET_UNIT): New macro.
	(target_unit): New type.
	* rtl.h (native_encode_rtx, native_decode_rtx)
	(native_decode_vector_rtx, subreg_size_lsb): Declare.
	(subreg_lsb_1): Turn into an inline wrapper around subreg_size_lsb.
	* rtlanal.c (subreg_lsb_1): Delete.
	(subreg_size_lsb): New function.
	* simplify-rtx.c: Include rtx-vector-builder.h
	(simplify_immed_subreg): Delete.
	(native_encode_rtx, native_decode_vector_rtx, native_decode_rtx)
	(simplify_const_vector_byte_offset, simplify_const_vector_subreg): New
	functions.
	(simplify_subreg): Use them.
	(test_vector_subregs_modes, test_vector_subregs_repeating)
	(test_vector_subregs_fore_back, test_vector_subregs_stepped)
	(test_vector_subregs): New functions.
	(test_vector_ops): Call test_vector_subregs for integer vector
	modes with at least 2 elements.

From-SVN: r275959
parent 4736041b
2019-09-19 Richard Sandiford <richard.sandiford@arm.com>
* defaults.h (TARGET_UNIT): New macro.
(target_unit): New type.
* rtl.h (native_encode_rtx, native_decode_rtx)
(native_decode_vector_rtx, subreg_size_lsb): Declare.
(subreg_lsb_1): Turn into an inline wrapper around subreg_size_lsb.
* rtlanal.c (subreg_lsb_1): Delete.
(subreg_size_lsb): New function.
* simplify-rtx.c: Include rtx-vector-builder.h
(simplify_immed_subreg): Delete.
(native_encode_rtx, native_decode_vector_rtx, native_decode_rtx)
(simplify_const_vector_byte_offset, simplify_const_vector_subreg): New
functions.
(simplify_subreg): Use them.
(test_vector_subregs_modes, test_vector_subregs_repeating)
(test_vector_subregs_fore_back, test_vector_subregs_stepped)
(test_vector_subregs): New functions.
(test_vector_ops): Call test_vector_subregs for integer vector
modes with at least 2 elements.
2019-09-19 Richard Biener <rguenther@suse.de> 2019-09-19 Richard Biener <rguenther@suse.de>
* tree-parloops.c (parloops_is_slp_reduction): Do not set * tree-parloops.c (parloops_is_slp_reduction): Do not set
......
...@@ -1459,4 +1459,18 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see ...@@ -1459,4 +1459,18 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#define DWARF_GNAT_ENCODINGS_DEFAULT DWARF_GNAT_ENCODINGS_GDB #define DWARF_GNAT_ENCODINGS_DEFAULT DWARF_GNAT_ENCODINGS_GDB
#endif #endif
#ifndef USED_FOR_TARGET
/* Done this way to keep gengtype happy. */
#if BITS_PER_UNIT == 8
#define TARGET_UNIT uint8_t
#elif BITS_PER_UNIT == 16
#define TARGET_UNIT uint16_t
#elif BITS_PER_UNIT == 32
#define TARGET_UNIT uint32_t
#else
#error Unknown BITS_PER_UNIT
#endif
typedef TARGET_UNIT target_unit;
#endif
#endif /* ! GCC_DEFAULTS_H */ #endif /* ! GCC_DEFAULTS_H */
...@@ -2406,12 +2406,30 @@ extern int rtx_cost (rtx, machine_mode, enum rtx_code, int, bool); ...@@ -2406,12 +2406,30 @@ extern int rtx_cost (rtx, machine_mode, enum rtx_code, int, bool);
extern int address_cost (rtx, machine_mode, addr_space_t, bool); extern int address_cost (rtx, machine_mode, addr_space_t, bool);
extern void get_full_rtx_cost (rtx, machine_mode, enum rtx_code, int, extern void get_full_rtx_cost (rtx, machine_mode, enum rtx_code, int,
struct full_rtx_costs *); struct full_rtx_costs *);
extern bool native_encode_rtx (machine_mode, rtx, vec<target_unit> &,
unsigned int, unsigned int);
extern rtx native_decode_rtx (machine_mode, vec<target_unit>,
unsigned int);
extern rtx native_decode_vector_rtx (machine_mode, vec<target_unit>,
unsigned int, unsigned int, unsigned int);
extern poly_uint64 subreg_lsb (const_rtx); extern poly_uint64 subreg_lsb (const_rtx);
extern poly_uint64 subreg_lsb_1 (machine_mode, machine_mode, poly_uint64); extern poly_uint64 subreg_size_lsb (poly_uint64, poly_uint64, poly_uint64);
extern poly_uint64 subreg_size_offset_from_lsb (poly_uint64, poly_uint64, extern poly_uint64 subreg_size_offset_from_lsb (poly_uint64, poly_uint64,
poly_uint64); poly_uint64);
extern bool read_modify_subreg_p (const_rtx); extern bool read_modify_subreg_p (const_rtx);
/* Given a subreg's OUTER_MODE, INNER_MODE, and SUBREG_BYTE, return the
bit offset at which the subreg begins (counting from the least significant
bit of the operand). */
inline poly_uint64
subreg_lsb_1 (machine_mode outer_mode, machine_mode inner_mode,
poly_uint64 subreg_byte)
{
return subreg_size_lsb (GET_MODE_SIZE (outer_mode),
GET_MODE_SIZE (inner_mode), subreg_byte);
}
/* Return the subreg byte offset for a subreg whose outer mode is /* Return the subreg byte offset for a subreg whose outer mode is
OUTER_MODE, whose inner mode is INNER_MODE, and where there are OUTER_MODE, whose inner mode is INNER_MODE, and where there are
LSB_SHIFT *bits* between the lsb of the outer value and the lsb of LSB_SHIFT *bits* between the lsb of the outer value and the lsb of
......
...@@ -3637,23 +3637,31 @@ loc_mentioned_in_p (rtx *loc, const_rtx in) ...@@ -3637,23 +3637,31 @@ loc_mentioned_in_p (rtx *loc, const_rtx in)
return 0; return 0;
} }
/* Helper function for subreg_lsb. Given a subreg's OUTER_MODE, INNER_MODE, /* Reinterpret a subreg as a bit extraction from an integer and return
and SUBREG_BYTE, return the bit offset where the subreg begins the position of the least significant bit of the extracted value.
(counting from the least significant bit of the operand). */ In other words, if the extraction were performed as a shift right
and mask, return the number of bits to shift right.
The outer value of the subreg has OUTER_BYTES bytes and starts at
byte offset SUBREG_BYTE within an inner value of INNER_BYTES bytes. */
poly_uint64 poly_uint64
subreg_lsb_1 (machine_mode outer_mode, subreg_size_lsb (poly_uint64 outer_bytes,
machine_mode inner_mode, poly_uint64 inner_bytes,
poly_uint64 subreg_byte) poly_uint64 subreg_byte)
{ {
poly_uint64 subreg_end, trailing_bytes, byte_pos; poly_uint64 subreg_end, trailing_bytes, byte_pos;
/* A paradoxical subreg begins at bit position 0. */ /* A paradoxical subreg begins at bit position 0. */
if (paradoxical_subreg_p (outer_mode, inner_mode)) gcc_checking_assert (ordered_p (outer_bytes, inner_bytes));
return 0; if (maybe_gt (outer_bytes, inner_bytes))
{
gcc_checking_assert (known_eq (subreg_byte, 0U));
return 0;
}
subreg_end = subreg_byte + GET_MODE_SIZE (outer_mode); subreg_end = subreg_byte + outer_bytes;
trailing_bytes = GET_MODE_SIZE (inner_mode) - subreg_end; trailing_bytes = inner_bytes - subreg_end;
if (WORDS_BIG_ENDIAN && BYTES_BIG_ENDIAN) if (WORDS_BIG_ENDIAN && BYTES_BIG_ENDIAN)
byte_pos = trailing_bytes; byte_pos = trailing_bytes;
else if (!WORDS_BIG_ENDIAN && !BYTES_BIG_ENDIAN) else if (!WORDS_BIG_ENDIAN && !BYTES_BIG_ENDIAN)
......
...@@ -6130,342 +6130,466 @@ simplify_ternary_operation (enum rtx_code code, machine_mode mode, ...@@ -6130,342 +6130,466 @@ simplify_ternary_operation (enum rtx_code code, machine_mode mode,
return 0; return 0;
} }
/* Evaluate a SUBREG of a CONST_INT or CONST_WIDE_INT or CONST_DOUBLE /* Try to calculate NUM_BYTES bytes of the target memory image of X,
or CONST_FIXED or CONST_VECTOR, returning another CONST_INT or starting at byte FIRST_BYTE. Return true on success and add the
CONST_WIDE_INT or CONST_DOUBLE or CONST_FIXED or CONST_VECTOR. bytes to BYTES, such that each byte has BITS_PER_UNIT bits and such
that the bytes follow target memory order. Leave BYTES unmodified
on failure.
Works by unpacking INNER_BYTES bytes of OP into a collection of 8-bit values MODE is the mode of X. The caller must reserve NUM_BYTES bytes in
represented as a little-endian array of 'unsigned char', selecting by BYTE, BYTES before calling this function. */
and then repacking them again for OUTERMODE. If OP is a CONST_VECTOR,
FIRST_ELEM is the number of the first element to extract, otherwise
FIRST_ELEM is ignored. */
static rtx bool
simplify_immed_subreg (fixed_size_mode outermode, rtx op, native_encode_rtx (machine_mode mode, rtx x, vec<target_unit> &bytes,
machine_mode innermode, unsigned int byte, unsigned int first_byte, unsigned int num_bytes)
unsigned int first_elem, unsigned int inner_bytes)
{ {
enum { /* Check the mode is sensible. */
value_bit = 8, gcc_assert (GET_MODE (x) == VOIDmode
value_mask = (1 << value_bit) - 1 ? is_a <scalar_int_mode> (mode)
}; : mode == GET_MODE (x));
unsigned char value[MAX_BITSIZE_MODE_ANY_MODE / value_bit];
int value_start;
int i;
int elem;
int num_elem;
rtx * elems;
int elem_bitsize;
rtx result_s = NULL;
rtvec result_v = NULL;
enum mode_class outer_class;
scalar_mode outer_submode;
int max_bitsize;
/* Some ports misuse CCmode. */ if (GET_CODE (x) == CONST_VECTOR)
if (GET_MODE_CLASS (outermode) == MODE_CC && CONST_INT_P (op)) {
return op; /* CONST_VECTOR_ELT follows target memory order, so no shuffling
is necessary. The only complication is that MODE_VECTOR_BOOL
vectors can have several elements per byte. */
unsigned int elt_bits = vector_element_size (GET_MODE_BITSIZE (mode),
GET_MODE_NUNITS (mode));
unsigned int elt = first_byte * BITS_PER_UNIT / elt_bits;
if (elt_bits < BITS_PER_UNIT)
{
/* This is the only case in which elements can be smaller than
a byte. */
gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL);
for (unsigned int i = 0; i < num_bytes; ++i)
{
target_unit value = 0;
for (unsigned int j = 0; j < BITS_PER_UNIT; j += elt_bits)
{
value |= (INTVAL (CONST_VECTOR_ELT (x, elt)) & 1) << j;
elt += 1;
}
bytes.quick_push (value);
}
return true;
}
/* We have no way to represent a complex constant at the rtl level. */ unsigned int start = bytes.length ();
if (COMPLEX_MODE_P (outermode)) unsigned int elt_bytes = GET_MODE_UNIT_SIZE (mode);
return NULL_RTX; /* Make FIRST_BYTE relative to ELT. */
first_byte %= elt_bytes;
while (num_bytes > 0)
{
/* Work out how many bytes we want from element ELT. */
unsigned int chunk_bytes = MIN (num_bytes, elt_bytes - first_byte);
if (!native_encode_rtx (GET_MODE_INNER (mode),
CONST_VECTOR_ELT (x, elt), bytes,
first_byte, chunk_bytes))
{
bytes.truncate (start);
return false;
}
elt += 1;
first_byte = 0;
num_bytes -= chunk_bytes;
}
return true;
}
/* We support any size mode. */ /* All subsequent cases are limited to scalars. */
max_bitsize = MAX (GET_MODE_BITSIZE (outermode), scalar_mode smode;
inner_bytes * BITS_PER_UNIT); if (!is_a <scalar_mode> (mode, &smode))
return false;
/* Unpack the value. */ /* Make sure that the region is in range. */
unsigned int end_byte = first_byte + num_bytes;
unsigned int mode_bytes = GET_MODE_SIZE (smode);
gcc_assert (end_byte <= mode_bytes);
if (GET_CODE (op) == CONST_VECTOR) if (CONST_SCALAR_INT_P (x))
{ {
num_elem = CEIL (inner_bytes, GET_MODE_UNIT_SIZE (innermode)); /* The target memory layout is affected by both BYTES_BIG_ENDIAN
elem_bitsize = GET_MODE_UNIT_BITSIZE (innermode); and WORDS_BIG_ENDIAN. Use the subreg machinery to get the lsb
position of each byte. */
rtx_mode_t value (x, smode);
wide_int_ref value_wi (value);
for (unsigned int byte = first_byte; byte < end_byte; ++byte)
{
/* Always constant because the inputs are. */
unsigned int lsb
= subreg_size_lsb (1, mode_bytes, byte).to_constant ();
/* Operate directly on the encoding rather than using
wi::extract_uhwi, so that we preserve the sign or zero
extension for modes that are not a whole number of bits in
size. (Zero extension is only used for the combination of
innermode == BImode && STORE_FLAG_VALUE == 1). */
unsigned int elt = lsb / HOST_BITS_PER_WIDE_INT;
unsigned int shift = lsb % HOST_BITS_PER_WIDE_INT;
unsigned HOST_WIDE_INT uhwi = value_wi.elt (elt);
bytes.quick_push (uhwi >> shift);
}
return true;
} }
else
if (CONST_DOUBLE_P (x))
{ {
num_elem = 1; /* real_to_target produces an array of integers in target memory order.
elem_bitsize = max_bitsize; All integers before the last one have 32 bits; the last one may
have 32 bits or fewer, depending on whether the mode bitsize
is divisible by 32. Each of these integers is then laid out
in target memory as any other integer would be. */
long el32[MAX_BITSIZE_MODE_ANY_MODE / 32];
real_to_target (el32, CONST_DOUBLE_REAL_VALUE (x), smode);
/* The (maximum) number of target bytes per element of el32. */
unsigned int bytes_per_el32 = 32 / BITS_PER_UNIT;
gcc_assert (bytes_per_el32 != 0);
/* Build up the integers in a similar way to the CONST_SCALAR_INT_P
handling above. */
for (unsigned int byte = first_byte; byte < end_byte; ++byte)
{
unsigned int index = byte / bytes_per_el32;
unsigned int subbyte = byte % bytes_per_el32;
unsigned int int_bytes = MIN (bytes_per_el32,
mode_bytes - index * bytes_per_el32);
/* Always constant because the inputs are. */
unsigned int lsb
= subreg_size_lsb (1, int_bytes, subbyte).to_constant ();
bytes.quick_push ((unsigned long) el32[index] >> lsb);
}
return true;
} }
/* If this asserts, it is too complicated; reducing value_bit may help. */
gcc_assert (BITS_PER_UNIT % value_bit == 0);
/* I don't know how to handle endianness of sub-units. */
gcc_assert (elem_bitsize % BITS_PER_UNIT == 0);
for (elem = 0; elem < num_elem; elem++) if (GET_CODE (x) == CONST_FIXED)
{ {
unsigned char * vp; for (unsigned int byte = first_byte; byte < end_byte; ++byte)
rtx el = (GET_CODE (op) == CONST_VECTOR {
? CONST_VECTOR_ELT (op, first_elem + elem) /* Always constant because the inputs are. */
: op); unsigned int lsb
= subreg_size_lsb (1, mode_bytes, byte).to_constant ();
unsigned HOST_WIDE_INT piece = CONST_FIXED_VALUE_LOW (x);
if (lsb >= HOST_BITS_PER_WIDE_INT)
{
lsb -= HOST_BITS_PER_WIDE_INT;
piece = CONST_FIXED_VALUE_HIGH (x);
}
bytes.quick_push (piece >> lsb);
}
return true;
}
/* Vectors are kept in target memory order. (This is probably return false;
a mistake.) */ }
{
unsigned byte = (elem * elem_bitsize) / BITS_PER_UNIT;
unsigned ibyte = (((num_elem - 1 - elem) * elem_bitsize)
/ BITS_PER_UNIT);
unsigned word_byte = WORDS_BIG_ENDIAN ? ibyte : byte;
unsigned subword_byte = BYTES_BIG_ENDIAN ? ibyte : byte;
unsigned bytele = (subword_byte % UNITS_PER_WORD
+ (word_byte / UNITS_PER_WORD) * UNITS_PER_WORD);
vp = value + (bytele * BITS_PER_UNIT) / value_bit;
}
switch (GET_CODE (el)) /* Read a vector of mode MODE from the target memory image given by BYTES,
{ starting at byte FIRST_BYTE. The vector is known to be encodable using
case CONST_INT: NPATTERNS interleaved patterns with NELTS_PER_PATTERN elements each,
for (i = 0; and BYTES is known to have enough bytes to supply NPATTERNS *
i < HOST_BITS_PER_WIDE_INT && i < elem_bitsize; NELTS_PER_PATTERN vector elements. Each element of BYTES contains
i += value_bit) BITS_PER_UNIT bits and the bytes are in target memory order.
*vp++ = INTVAL (el) >> i;
/* CONST_INTs are always logically sign-extended. */
for (; i < elem_bitsize; i += value_bit)
*vp++ = INTVAL (el) < 0 ? -1 : 0;
break;
case CONST_WIDE_INT: Return the vector on success, otherwise return NULL_RTX. */
{
rtx_mode_t val = rtx_mode_t (el, GET_MODE_INNER (innermode));
unsigned char extend = wi::sign_mask (val);
int prec = wi::get_precision (val);
for (i = 0; i < prec && i < elem_bitsize; i += value_bit)
*vp++ = wi::extract_uhwi (val, i, value_bit);
for (; i < elem_bitsize; i += value_bit)
*vp++ = extend;
}
break;
case CONST_DOUBLE: rtx
if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (el) == VOIDmode) native_decode_vector_rtx (machine_mode mode, vec<target_unit> bytes,
{ unsigned int first_byte, unsigned int npatterns,
unsigned char extend = 0; unsigned int nelts_per_pattern)
/* If this triggers, someone should have generated a {
CONST_INT instead. */ rtx_vector_builder builder (mode, npatterns, nelts_per_pattern);
gcc_assert (elem_bitsize > HOST_BITS_PER_WIDE_INT);
for (i = 0; i < HOST_BITS_PER_WIDE_INT; i += value_bit)
*vp++ = CONST_DOUBLE_LOW (el) >> i;
while (i < HOST_BITS_PER_DOUBLE_INT && i < elem_bitsize)
{
*vp++
= CONST_DOUBLE_HIGH (el) >> (i - HOST_BITS_PER_WIDE_INT);
i += value_bit;
}
if (CONST_DOUBLE_HIGH (el) >> (HOST_BITS_PER_WIDE_INT - 1)) unsigned int elt_bits = vector_element_size (GET_MODE_BITSIZE (mode),
extend = -1; GET_MODE_NUNITS (mode));
for (; i < elem_bitsize; i += value_bit) if (elt_bits < BITS_PER_UNIT)
*vp++ = extend; {
} /* This is the only case in which elements can be smaller than a byte.
else Element 0 is always in the lsb of the containing byte. */
{ gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL);
/* This is big enough for anything on the platform. */ for (unsigned int i = 0; i < builder.encoded_nelts (); ++i)
long tmp[MAX_BITSIZE_MODE_ANY_MODE / 32]; {
scalar_float_mode el_mode; unsigned int bit_index = first_byte * BITS_PER_UNIT + i * elt_bits;
unsigned int byte_index = bit_index / BITS_PER_UNIT;
unsigned int lsb = bit_index % BITS_PER_UNIT;
builder.quick_push (bytes[byte_index] & (1 << lsb)
? CONST1_RTX (BImode)
: CONST0_RTX (BImode));
}
}
else
{
for (unsigned int i = 0; i < builder.encoded_nelts (); ++i)
{
rtx x = native_decode_rtx (GET_MODE_INNER (mode), bytes, first_byte);
if (!x)
return NULL_RTX;
builder.quick_push (x);
first_byte += elt_bits / BITS_PER_UNIT;
}
}
return builder.build ();
}
el_mode = as_a <scalar_float_mode> (GET_MODE (el)); /* Read an rtx of mode MODE from the target memory image given by BYTES,
int bitsize = GET_MODE_BITSIZE (el_mode); starting at byte FIRST_BYTE. Each element of BYTES contains BITS_PER_UNIT
bits and the bytes are in target memory order. The image has enough
values to specify all bytes of MODE.
gcc_assert (bitsize <= elem_bitsize); Return the rtx on success, otherwise return NULL_RTX. */
gcc_assert (bitsize % value_bit == 0);
real_to_target (tmp, CONST_DOUBLE_REAL_VALUE (el), rtx
GET_MODE (el)); native_decode_rtx (machine_mode mode, vec<target_unit> bytes,
unsigned int first_byte)
{
if (VECTOR_MODE_P (mode))
{
/* If we know at compile time how many elements there are,
pull each element directly from BYTES. */
unsigned int nelts;
if (GET_MODE_NUNITS (mode).is_constant (&nelts))
return native_decode_vector_rtx (mode, bytes, first_byte, nelts, 1);
return NULL_RTX;
}
/* real_to_target produces its result in words affected by scalar_int_mode imode;
FLOAT_WORDS_BIG_ENDIAN. However, we ignore this, if (is_a <scalar_int_mode> (mode, &imode)
and use WORDS_BIG_ENDIAN instead; see the documentation && GET_MODE_PRECISION (imode) <= MAX_BITSIZE_MODE_ANY_INT)
of SUBREG in rtl.texi. */ {
for (i = 0; i < bitsize; i += value_bit) /* Pull the bytes msb first, so that we can use simple
{ shift-and-insert wide_int operations. */
int ibase; unsigned int size = GET_MODE_SIZE (imode);
if (WORDS_BIG_ENDIAN) wide_int result (wi::zero (GET_MODE_PRECISION (imode)));
ibase = bitsize - 1 - i; for (unsigned int i = 0; i < size; ++i)
else {
ibase = i; unsigned int lsb = (size - i - 1) * BITS_PER_UNIT;
*vp++ = tmp[ibase / 32] >> i % 32; /* Always constant because the inputs are. */
} unsigned int subbyte
= subreg_size_offset_from_lsb (1, size, lsb).to_constant ();
result <<= BITS_PER_UNIT;
result |= bytes[first_byte + subbyte];
}
return immed_wide_int_const (result, imode);
}
/* It shouldn't matter what's done here, so fill it with scalar_float_mode fmode;
zero. */ if (is_a <scalar_float_mode> (mode, &fmode))
for (; i < elem_bitsize; i += value_bit) {
*vp++ = 0; /* We need to build an array of integers in target memory order.
} All integers before the last one have 32 bits; the last one may
break; have 32 bits or fewer, depending on whether the mode bitsize
is divisible by 32. */
long el32[MAX_BITSIZE_MODE_ANY_MODE / 32];
unsigned int num_el32 = CEIL (GET_MODE_BITSIZE (fmode), 32);
memset (el32, 0, num_el32 * sizeof (long));
/* The (maximum) number of target bytes per element of el32. */
unsigned int bytes_per_el32 = 32 / BITS_PER_UNIT;
gcc_assert (bytes_per_el32 != 0);
unsigned int mode_bytes = GET_MODE_SIZE (fmode);
for (unsigned int byte = 0; byte < mode_bytes; ++byte)
{
unsigned int index = byte / bytes_per_el32;
unsigned int subbyte = byte % bytes_per_el32;
unsigned int int_bytes = MIN (bytes_per_el32,
mode_bytes - index * bytes_per_el32);
/* Always constant because the inputs are. */
unsigned int lsb
= subreg_size_lsb (1, int_bytes, subbyte).to_constant ();
el32[index] |= (unsigned long) bytes[first_byte + byte] << lsb;
}
REAL_VALUE_TYPE r;
real_from_target (&r, el32, fmode);
return const_double_from_real_value (r, fmode);
}
case CONST_FIXED: if (ALL_SCALAR_FIXED_POINT_MODE_P (mode))
if (elem_bitsize <= HOST_BITS_PER_WIDE_INT) {
{ scalar_mode smode = as_a <scalar_mode> (mode);
for (i = 0; i < elem_bitsize; i += value_bit) FIXED_VALUE_TYPE f;
*vp++ = CONST_FIXED_VALUE_LOW (el) >> i; f.data.low = 0;
} f.data.high = 0;
f.mode = smode;
unsigned int mode_bytes = GET_MODE_SIZE (smode);
for (unsigned int byte = 0; byte < mode_bytes; ++byte)
{
/* Always constant because the inputs are. */
unsigned int lsb
= subreg_size_lsb (1, mode_bytes, byte).to_constant ();
unsigned HOST_WIDE_INT unit = bytes[first_byte + byte];
if (lsb >= HOST_BITS_PER_WIDE_INT)
f.data.high |= unit << (lsb - HOST_BITS_PER_WIDE_INT);
else else
{ f.data.low |= unit << lsb;
for (i = 0; i < HOST_BITS_PER_WIDE_INT; i += value_bit)
*vp++ = CONST_FIXED_VALUE_LOW (el) >> i;
for (; i < HOST_BITS_PER_DOUBLE_INT && i < elem_bitsize;
i += value_bit)
*vp++ = CONST_FIXED_VALUE_HIGH (el)
>> (i - HOST_BITS_PER_WIDE_INT);
for (; i < elem_bitsize; i += value_bit)
*vp++ = 0;
}
break;
default:
gcc_unreachable ();
} }
return CONST_FIXED_FROM_FIXED_VALUE (f, mode);
} }
/* Now, pick the right byte to start with. */ return NULL_RTX;
/* Renumber BYTE so that the least-significant byte is byte 0. A special }
case is paradoxical SUBREGs, which shouldn't be adjusted since they
will already have offset 0. */ /* Simplify a byte offset BYTE into CONST_VECTOR X. The main purpose
if (inner_bytes >= GET_MODE_SIZE (outermode)) is to convert a runtime BYTE value into a constant one. */
static poly_uint64
simplify_const_vector_byte_offset (rtx x, poly_uint64 byte)
{
/* Cope with MODE_VECTOR_BOOL by operating on bits rather than bytes. */
machine_mode mode = GET_MODE (x);
unsigned int elt_bits = vector_element_size (GET_MODE_BITSIZE (mode),
GET_MODE_NUNITS (mode));
/* The number of bits needed to encode one element from each pattern. */
unsigned int sequence_bits = CONST_VECTOR_NPATTERNS (x) * elt_bits;
/* Identify the start point in terms of a sequence number and a byte offset
within that sequence. */
poly_uint64 first_sequence;
unsigned HOST_WIDE_INT subbit;
if (can_div_trunc_p (byte * BITS_PER_UNIT, sequence_bits,
&first_sequence, &subbit))
{ {
unsigned ibyte = inner_bytes - GET_MODE_SIZE (outermode) - byte; unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x);
unsigned word_byte = WORDS_BIG_ENDIAN ? ibyte : byte; if (nelts_per_pattern == 1)
unsigned subword_byte = BYTES_BIG_ENDIAN ? ibyte : byte; /* This is a duplicated vector, so the value of FIRST_SEQUENCE
byte = (subword_byte % UNITS_PER_WORD doesn't matter. */
+ (word_byte / UNITS_PER_WORD) * UNITS_PER_WORD); byte = subbit / BITS_PER_UNIT;
else if (nelts_per_pattern == 2 && known_gt (first_sequence, 0U))
{
/* The subreg drops the first element from each pattern and
only uses the second element. Find the first sequence
that starts on a byte boundary. */
subbit += least_common_multiple (sequence_bits, BITS_PER_UNIT);
byte = subbit / BITS_PER_UNIT;
}
} }
return byte;
}
/* Subroutine of simplify_subreg in which:
- X is known to be a CONST_VECTOR
- OUTERMODE is known to be a vector mode
/* BYTE should still be inside OP. (Note that BYTE is unsigned, Try to handle the subreg by operating on the CONST_VECTOR encoding
so if it's become negative it will instead be very large.) */ rather than on each individual element of the CONST_VECTOR.
gcc_assert (byte < inner_bytes);
/* Convert from bytes to chunks of size value_bit. */ Return the simplified subreg on success, otherwise return NULL_RTX. */
value_start = byte * (BITS_PER_UNIT / value_bit);
static rtx
simplify_const_vector_subreg (machine_mode outermode, rtx x,
machine_mode innermode, unsigned int first_byte)
{
/* Paradoxical subregs of vectors have dubious semantics. */
if (paradoxical_subreg_p (outermode, innermode))
return NULL_RTX;
/* Re-pack the value. */ /* We can only preserve the semantics of a stepped pattern if the new
num_elem = GET_MODE_NUNITS (outermode); vector element is the same as the original one. */
if (CONST_VECTOR_STEPPED_P (x)
&& GET_MODE_INNER (outermode) != GET_MODE_INNER (innermode))
return NULL_RTX;
if (VECTOR_MODE_P (outermode)) /* Cope with MODE_VECTOR_BOOL by operating on bits rather than bytes. */
unsigned int x_elt_bits
= vector_element_size (GET_MODE_BITSIZE (innermode),
GET_MODE_NUNITS (innermode));
unsigned int out_elt_bits
= vector_element_size (GET_MODE_BITSIZE (outermode),
GET_MODE_NUNITS (outermode));
/* The number of bits needed to encode one element from every pattern
of the original vector. */
unsigned int x_sequence_bits = CONST_VECTOR_NPATTERNS (x) * x_elt_bits;
/* The number of bits needed to encode one element from every pattern
of the result. */
unsigned int out_sequence_bits
= least_common_multiple (x_sequence_bits, out_elt_bits);
/* Work out the number of interleaved patterns in the output vector
and the number of encoded elements per pattern. */
unsigned int out_npatterns = out_sequence_bits / out_elt_bits;
unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x);
/* The encoding scheme requires the number of elements to be a multiple
of the number of patterns, so that each pattern appears at least once
and so that the same number of elements appear from each pattern. */
bool ok_p = multiple_p (GET_MODE_NUNITS (outermode), out_npatterns);
unsigned int const_nunits;
if (GET_MODE_NUNITS (outermode).is_constant (&const_nunits)
&& (!ok_p || out_npatterns * nelts_per_pattern > const_nunits))
{ {
result_v = rtvec_alloc (num_elem); /* Either the encoding is invalid, or applying it would give us
elems = &RTVEC_ELT (result_v, 0); more elements than we need. Just encode each element directly. */
out_npatterns = const_nunits;
nelts_per_pattern = 1;
} }
else else if (!ok_p)
elems = &result_s; return NULL_RTX;
outer_submode = GET_MODE_INNER (outermode); /* Get enough bytes of X to form the new encoding. */
outer_class = GET_MODE_CLASS (outer_submode); unsigned int buffer_bits = out_npatterns * nelts_per_pattern * out_elt_bits;
elem_bitsize = GET_MODE_BITSIZE (outer_submode); unsigned int buffer_bytes = CEIL (buffer_bits, BITS_PER_UNIT);
auto_vec<target_unit, 128> buffer (buffer_bytes);
if (!native_encode_rtx (innermode, x, buffer, first_byte, buffer_bytes))
return NULL_RTX;
gcc_assert (elem_bitsize % value_bit == 0); /* Reencode the bytes as OUTERMODE. */
gcc_assert (elem_bitsize + value_start * value_bit <= max_bitsize); return native_decode_vector_rtx (outermode, buffer, 0, out_npatterns,
nelts_per_pattern);
}
for (elem = 0; elem < num_elem; elem++) /* Try to simplify a subreg of a constant by encoding the subreg region
{ as a sequence of target bytes and reading them back in the new mode.
unsigned char *vp; Return the new value on success, otherwise return null.
/* Vectors are stored in target memory order. (This is probably The subreg has outer mode OUTERMODE, inner mode INNERMODE, inner value X
a mistake.) */ and byte offset FIRST_BYTE. */
{
unsigned byte = (elem * elem_bitsize) / BITS_PER_UNIT;
unsigned ibyte = (((num_elem - 1 - elem) * elem_bitsize)
/ BITS_PER_UNIT);
unsigned word_byte = WORDS_BIG_ENDIAN ? ibyte : byte;
unsigned subword_byte = BYTES_BIG_ENDIAN ? ibyte : byte;
unsigned bytele = (subword_byte % UNITS_PER_WORD
+ (word_byte / UNITS_PER_WORD) * UNITS_PER_WORD);
vp = value + value_start + (bytele * BITS_PER_UNIT) / value_bit;
}
switch (outer_class) static rtx
{ simplify_immed_subreg (fixed_size_mode outermode, rtx x,
case MODE_INT: machine_mode innermode, unsigned int first_byte)
case MODE_PARTIAL_INT: {
{ unsigned int buffer_bytes = GET_MODE_SIZE (outermode);
int u; auto_vec<target_unit, 128> buffer (buffer_bytes);
int base = 0;
int units
= (GET_MODE_BITSIZE (outer_submode) + HOST_BITS_PER_WIDE_INT - 1)
/ HOST_BITS_PER_WIDE_INT;
HOST_WIDE_INT tmp[MAX_BITSIZE_MODE_ANY_INT / HOST_BITS_PER_WIDE_INT];
wide_int r;
if (GET_MODE_PRECISION (outer_submode) > MAX_BITSIZE_MODE_ANY_INT)
return NULL_RTX;
for (u = 0; u < units; u++)
{
unsigned HOST_WIDE_INT buf = 0;
for (i = 0;
i < HOST_BITS_PER_WIDE_INT && base + i < elem_bitsize;
i += value_bit)
buf |= (unsigned HOST_WIDE_INT)(*vp++ & value_mask) << i;
tmp[u] = buf;
base += HOST_BITS_PER_WIDE_INT;
}
r = wide_int::from_array (tmp, units,
GET_MODE_PRECISION (outer_submode));
#if TARGET_SUPPORTS_WIDE_INT == 0
/* Make sure r will fit into CONST_INT or CONST_DOUBLE. */
if (wi::min_precision (r, SIGNED) > HOST_BITS_PER_DOUBLE_INT)
return NULL_RTX;
#endif
elems[elem] = immed_wide_int_const (r, outer_submode);
}
break;
case MODE_FLOAT: /* Some ports misuse CCmode. */
case MODE_DECIMAL_FLOAT: if (GET_MODE_CLASS (outermode) == MODE_CC && CONST_INT_P (x))
{ return x;
REAL_VALUE_TYPE r;
long tmp[MAX_BITSIZE_MODE_ANY_MODE / 32] = { 0 };
/* real_from_target wants its input in words affected by
FLOAT_WORDS_BIG_ENDIAN. However, we ignore this,
and use WORDS_BIG_ENDIAN instead; see the documentation
of SUBREG in rtl.texi. */
for (i = 0; i < elem_bitsize; i += value_bit)
{
int ibase;
if (WORDS_BIG_ENDIAN)
ibase = elem_bitsize - 1 - i;
else
ibase = i;
tmp[ibase / 32] |= (*vp++ & value_mask) << i % 32;
}
real_from_target (&r, tmp, outer_submode); /* Paradoxical subregs read undefined values for bytes outside of the
elems[elem] = const_double_from_real_value (r, outer_submode); inner value. However, we have traditionally always sign-extended
} integer constants and zero-extended others. */
break; unsigned int inner_bytes = buffer_bytes;
if (paradoxical_subreg_p (outermode, innermode))
{
if (!GET_MODE_SIZE (innermode).is_constant (&inner_bytes))
return NULL_RTX;
case MODE_FRACT: target_unit filler = 0;
case MODE_UFRACT: if (CONST_SCALAR_INT_P (x) && wi::neg_p (rtx_mode_t (x, innermode)))
case MODE_ACCUM: filler = -1;
case MODE_UACCUM:
{
FIXED_VALUE_TYPE f;
f.data.low = 0;
f.data.high = 0;
f.mode = outer_submode;
for (i = 0;
i < HOST_BITS_PER_WIDE_INT && i < elem_bitsize;
i += value_bit)
f.data.low |= (unsigned HOST_WIDE_INT)(*vp++ & value_mask) << i;
for (; i < elem_bitsize; i += value_bit)
f.data.high |= ((unsigned HOST_WIDE_INT)(*vp++ & value_mask)
<< (i - HOST_BITS_PER_WIDE_INT));
elems[elem] = CONST_FIXED_FROM_FIXED_VALUE (f, outer_submode);
}
break;
default: /* Add any leading bytes due to big-endian layout. The number of
gcc_unreachable (); bytes must be constant because both modes have constant size. */
} unsigned int leading_bytes
= -byte_lowpart_offset (outermode, innermode).to_constant ();
for (unsigned int i = 0; i < leading_bytes; ++i)
buffer.quick_push (filler);
if (!native_encode_rtx (innermode, x, buffer, first_byte, inner_bytes))
return NULL_RTX;
/* Add any trailing bytes due to little-endian layout. */
while (buffer.length () < buffer_bytes)
buffer.quick_push (filler);
} }
if (VECTOR_MODE_P (outermode))
return gen_rtx_CONST_VECTOR (outermode, result_v);
else else
return result_s; {
if (!native_encode_rtx (innermode, x, buffer, first_byte, inner_bytes))
return NULL_RTX;
}
return native_decode_rtx (outermode, buffer, 0);
} }
/* Simplify SUBREG:OUTERMODE(OP:INNERMODE, BYTE) /* Simplify SUBREG:OUTERMODE(OP:INNERMODE, BYTE)
...@@ -6494,6 +6618,9 @@ simplify_subreg (machine_mode outermode, rtx op, ...@@ -6494,6 +6618,9 @@ simplify_subreg (machine_mode outermode, rtx op,
if (outermode == innermode && known_eq (byte, 0U)) if (outermode == innermode && known_eq (byte, 0U))
return op; return op;
if (GET_CODE (op) == CONST_VECTOR)
byte = simplify_const_vector_byte_offset (op, byte);
if (multiple_p (byte, GET_MODE_UNIT_SIZE (innermode))) if (multiple_p (byte, GET_MODE_UNIT_SIZE (innermode)))
{ {
rtx elt; rtx elt;
...@@ -6513,30 +6640,21 @@ simplify_subreg (machine_mode outermode, rtx op, ...@@ -6513,30 +6640,21 @@ simplify_subreg (machine_mode outermode, rtx op,
|| CONST_FIXED_P (op) || CONST_FIXED_P (op)
|| GET_CODE (op) == CONST_VECTOR) || GET_CODE (op) == CONST_VECTOR)
{ {
/* simplify_immed_subreg deconstructs OP into bytes and constructs
the result from bytes, so it only works if the sizes of the modes
and the value of the offset are known at compile time. Cases that
that apply to general modes and offsets should be handled here
before calling simplify_immed_subreg. */
fixed_size_mode fs_outermode, fs_innermode;
unsigned HOST_WIDE_INT cbyte; unsigned HOST_WIDE_INT cbyte;
if (is_a <fixed_size_mode> (outermode, &fs_outermode) if (byte.is_constant (&cbyte))
&& is_a <fixed_size_mode> (innermode, &fs_innermode) {
&& byte.is_constant (&cbyte)) if (GET_CODE (op) == CONST_VECTOR && VECTOR_MODE_P (outermode))
return simplify_immed_subreg (fs_outermode, op, fs_innermode, cbyte, {
0, GET_MODE_SIZE (fs_innermode)); rtx tmp = simplify_const_vector_subreg (outermode, op,
innermode, cbyte);
/* Handle constant-sized outer modes and variable-sized inner modes. */ if (tmp)
unsigned HOST_WIDE_INT first_elem; return tmp;
if (GET_CODE (op) == CONST_VECTOR }
&& is_a <fixed_size_mode> (outermode, &fs_outermode)
&& constant_multiple_p (byte, GET_MODE_UNIT_SIZE (innermode),
&first_elem))
return simplify_immed_subreg (fs_outermode, op, innermode, 0,
first_elem,
GET_MODE_SIZE (fs_outermode));
return NULL_RTX; fixed_size_mode fs_outermode;
if (is_a <fixed_size_mode> (outermode, &fs_outermode))
return simplify_immed_subreg (fs_outermode, op, innermode, cbyte);
}
} }
/* Changing mode twice with SUBREG => just change it once, /* Changing mode twice with SUBREG => just change it once,
...@@ -7179,6 +7297,165 @@ test_vec_merge (machine_mode mode) ...@@ -7179,6 +7297,165 @@ test_vec_merge (machine_mode mode)
simplify_rtx (nvm)); simplify_rtx (nvm));
} }
/* Test subregs of integer vector constant X, trying elements in
the range [ELT_BIAS, ELT_BIAS + constant_lower_bound (NELTS)),
where NELTS is the number of elements in X. Subregs involving
elements [ELT_BIAS, ELT_BIAS + FIRST_VALID) are expected to fail. */
static void
test_vector_subregs_modes (rtx x, poly_uint64 elt_bias = 0,
unsigned int first_valid = 0)
{
machine_mode inner_mode = GET_MODE (x);
scalar_mode int_mode = GET_MODE_INNER (inner_mode);
for (unsigned int modei = 0; modei < NUM_MACHINE_MODES; ++modei)
{
machine_mode outer_mode = (machine_mode) modei;
if (!VECTOR_MODE_P (outer_mode))
continue;
unsigned int outer_nunits;
if (GET_MODE_INNER (outer_mode) == int_mode
&& GET_MODE_NUNITS (outer_mode).is_constant (&outer_nunits)
&& multiple_p (GET_MODE_NUNITS (inner_mode), outer_nunits))
{
/* Test subregs in which the outer mode is a smaller,
constant-sized vector of the same element type. */
unsigned int limit
= constant_lower_bound (GET_MODE_NUNITS (inner_mode));
for (unsigned int elt = 0; elt < limit; elt += outer_nunits)
{
rtx expected = NULL_RTX;
if (elt >= first_valid)
{
rtx_vector_builder builder (outer_mode, outer_nunits, 1);
for (unsigned int i = 0; i < outer_nunits; ++i)
builder.quick_push (CONST_VECTOR_ELT (x, elt + i));
expected = builder.build ();
}
poly_uint64 byte = (elt_bias + elt) * GET_MODE_SIZE (int_mode);
ASSERT_RTX_EQ (expected,
simplify_subreg (outer_mode, x,
inner_mode, byte));
}
}
else if (known_eq (GET_MODE_SIZE (outer_mode),
GET_MODE_SIZE (inner_mode))
&& known_eq (elt_bias, 0U)
&& (GET_MODE_CLASS (outer_mode) != MODE_VECTOR_BOOL
|| known_eq (GET_MODE_BITSIZE (outer_mode),
GET_MODE_NUNITS (outer_mode)))
&& (!FLOAT_MODE_P (outer_mode)
|| (FLOAT_MODE_FORMAT (outer_mode)->ieee_bits
== GET_MODE_UNIT_PRECISION (outer_mode)))
&& (GET_MODE_SIZE (inner_mode).is_constant ()
|| !CONST_VECTOR_STEPPED_P (x)))
{
/* Try converting to OUTER_MODE and back. */
rtx outer_x = simplify_subreg (outer_mode, x, inner_mode, 0);
ASSERT_TRUE (outer_x != NULL_RTX);
ASSERT_RTX_EQ (x, simplify_subreg (inner_mode, outer_x,
outer_mode, 0));
}
}
if (BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN)
{
/* Test each byte in the element range. */
unsigned int limit
= constant_lower_bound (GET_MODE_SIZE (inner_mode));
for (unsigned int i = 0; i < limit; ++i)
{
unsigned int elt = i / GET_MODE_SIZE (int_mode);
rtx expected = NULL_RTX;
if (elt >= first_valid)
{
unsigned int byte_shift = i % GET_MODE_SIZE (int_mode);
if (BYTES_BIG_ENDIAN)
byte_shift = GET_MODE_SIZE (int_mode) - byte_shift - 1;
rtx_mode_t vec_elt (CONST_VECTOR_ELT (x, elt), int_mode);
wide_int shifted_elt
= wi::lrshift (vec_elt, byte_shift * BITS_PER_UNIT);
expected = immed_wide_int_const (shifted_elt, QImode);
}
poly_uint64 byte = elt_bias * GET_MODE_SIZE (int_mode) + i;
ASSERT_RTX_EQ (expected,
simplify_subreg (QImode, x, inner_mode, byte));
}
}
}
/* Test constant subregs of integer vector mode INNER_MODE, using 1
element per pattern. */
static void
test_vector_subregs_repeating (machine_mode inner_mode)
{
poly_uint64 nunits = GET_MODE_NUNITS (inner_mode);
unsigned int min_nunits = constant_lower_bound (nunits);
scalar_mode int_mode = GET_MODE_INNER (inner_mode);
unsigned int count = gcd (min_nunits, 8);
rtx_vector_builder builder (inner_mode, count, 1);
for (unsigned int i = 0; i < count; ++i)
builder.quick_push (gen_int_mode (8 - i, int_mode));
rtx x = builder.build ();
test_vector_subregs_modes (x);
if (!nunits.is_constant ())
test_vector_subregs_modes (x, nunits - min_nunits);
}
/* Test constant subregs of integer vector mode INNER_MODE, using 2
elements per pattern. */
static void
test_vector_subregs_fore_back (machine_mode inner_mode)
{
poly_uint64 nunits = GET_MODE_NUNITS (inner_mode);
unsigned int min_nunits = constant_lower_bound (nunits);
scalar_mode int_mode = GET_MODE_INNER (inner_mode);
unsigned int count = gcd (min_nunits, 4);
rtx_vector_builder builder (inner_mode, count, 2);
for (unsigned int i = 0; i < count; ++i)
builder.quick_push (gen_int_mode (i, int_mode));
for (unsigned int i = 0; i < count; ++i)
builder.quick_push (gen_int_mode (-(int) i, int_mode));
rtx x = builder.build ();
test_vector_subregs_modes (x);
if (!nunits.is_constant ())
test_vector_subregs_modes (x, nunits - min_nunits, count);
}
/* Test constant subregs of integer vector mode INNER_MODE, using 3
elements per pattern. */
static void
test_vector_subregs_stepped (machine_mode inner_mode)
{
/* Build { 0, 1, 2, 3, ... }. */
scalar_mode int_mode = GET_MODE_INNER (inner_mode);
rtx_vector_builder builder (inner_mode, 1, 3);
for (unsigned int i = 0; i < 3; ++i)
builder.quick_push (gen_int_mode (i, int_mode));
rtx x = builder.build ();
test_vector_subregs_modes (x);
}
/* Test constant subregs of integer vector mode INNER_MODE. */
static void
test_vector_subregs (machine_mode inner_mode)
{
test_vector_subregs_repeating (inner_mode);
test_vector_subregs_fore_back (inner_mode);
test_vector_subregs_stepped (inner_mode);
}
/* Verify some simplifications involving vectors. */ /* Verify some simplifications involving vectors. */
static void static void
...@@ -7193,7 +7470,10 @@ test_vector_ops () ...@@ -7193,7 +7470,10 @@ test_vector_ops ()
test_vector_ops_duplicate (mode, scalar_reg); test_vector_ops_duplicate (mode, scalar_reg);
if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
&& maybe_gt (GET_MODE_NUNITS (mode), 2)) && maybe_gt (GET_MODE_NUNITS (mode), 2))
test_vector_ops_series (mode, scalar_reg); {
test_vector_ops_series (mode, scalar_reg);
test_vector_subregs (mode);
}
test_vec_merge (mode); test_vec_merge (mode);
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment