Commit f24f4c15 by Richard Sandiford Committed by Richard Sandiford

Rework constant subreg folds and handle more variable-length cases

This patch rewrites the way simplify_subreg handles constants.
It uses similar native_encode/native_decode routines to the
tree-level handling of VIEW_CONVERT_EXPR, meaning that we can
move between rtx constants and the target memory image of them.

The main point of this patch is to support subregs of constant-length
vectors for VLA vectors, beyond the very simple cases that were already
handled.  Many of the new tests failed before the patch for variable-
length vectors.

The boolean side is tested more by the upcoming SVE ACLE work.

2019-09-19  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
	* defaults.h (TARGET_UNIT): New macro.
	(target_unit): New type.
	* rtl.h (native_encode_rtx, native_decode_rtx)
	(native_decode_vector_rtx, subreg_size_lsb): Declare.
	(subreg_lsb_1): Turn into an inline wrapper around subreg_size_lsb.
	* rtlanal.c (subreg_lsb_1): Delete.
	(subreg_size_lsb): New function.
	* simplify-rtx.c: Include rtx-vector-builder.h
	(simplify_immed_subreg): Delete.
	(native_encode_rtx, native_decode_vector_rtx, native_decode_rtx)
	(simplify_const_vector_byte_offset, simplify_const_vector_subreg): New
	functions.
	(simplify_subreg): Use them.
	(test_vector_subregs_modes, test_vector_subregs_repeating)
	(test_vector_subregs_fore_back, test_vector_subregs_stepped)
	(test_vector_subregs): New functions.
	(test_vector_ops): Call test_vector_subregs for integer vector
	modes with at least 2 elements.

From-SVN: r275959
parent 4736041b
2019-09-19 Richard Sandiford <richard.sandiford@arm.com>
* defaults.h (TARGET_UNIT): New macro.
(target_unit): New type.
* rtl.h (native_encode_rtx, native_decode_rtx)
(native_decode_vector_rtx, subreg_size_lsb): Declare.
(subreg_lsb_1): Turn into an inline wrapper around subreg_size_lsb.
* rtlanal.c (subreg_lsb_1): Delete.
(subreg_size_lsb): New function.
* simplify-rtx.c: Include rtx-vector-builder.h
(simplify_immed_subreg): Delete.
(native_encode_rtx, native_decode_vector_rtx, native_decode_rtx)
(simplify_const_vector_byte_offset, simplify_const_vector_subreg): New
functions.
(simplify_subreg): Use them.
(test_vector_subregs_modes, test_vector_subregs_repeating)
(test_vector_subregs_fore_back, test_vector_subregs_stepped)
(test_vector_subregs): New functions.
(test_vector_ops): Call test_vector_subregs for integer vector
modes with at least 2 elements.
2019-09-19 Richard Biener <rguenther@suse.de>
* tree-parloops.c (parloops_is_slp_reduction): Do not set
......
......@@ -1459,4 +1459,18 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#define DWARF_GNAT_ENCODINGS_DEFAULT DWARF_GNAT_ENCODINGS_GDB
#endif
#ifndef USED_FOR_TARGET
/* Done this way to keep gengtype happy. */
#if BITS_PER_UNIT == 8
#define TARGET_UNIT uint8_t
#elif BITS_PER_UNIT == 16
#define TARGET_UNIT uint16_t
#elif BITS_PER_UNIT == 32
#define TARGET_UNIT uint32_t
#else
#error Unknown BITS_PER_UNIT
#endif
typedef TARGET_UNIT target_unit;
#endif
#endif /* ! GCC_DEFAULTS_H */
......@@ -2406,12 +2406,30 @@ extern int rtx_cost (rtx, machine_mode, enum rtx_code, int, bool);
extern int address_cost (rtx, machine_mode, addr_space_t, bool);
extern void get_full_rtx_cost (rtx, machine_mode, enum rtx_code, int,
struct full_rtx_costs *);
extern bool native_encode_rtx (machine_mode, rtx, vec<target_unit> &,
unsigned int, unsigned int);
extern rtx native_decode_rtx (machine_mode, vec<target_unit>,
unsigned int);
extern rtx native_decode_vector_rtx (machine_mode, vec<target_unit>,
unsigned int, unsigned int, unsigned int);
extern poly_uint64 subreg_lsb (const_rtx);
extern poly_uint64 subreg_lsb_1 (machine_mode, machine_mode, poly_uint64);
extern poly_uint64 subreg_size_lsb (poly_uint64, poly_uint64, poly_uint64);
extern poly_uint64 subreg_size_offset_from_lsb (poly_uint64, poly_uint64,
poly_uint64);
extern bool read_modify_subreg_p (const_rtx);
/* Given a subreg's OUTER_MODE, INNER_MODE, and SUBREG_BYTE, return the
bit offset at which the subreg begins (counting from the least significant
bit of the operand). */
inline poly_uint64
subreg_lsb_1 (machine_mode outer_mode, machine_mode inner_mode,
poly_uint64 subreg_byte)
{
return subreg_size_lsb (GET_MODE_SIZE (outer_mode),
GET_MODE_SIZE (inner_mode), subreg_byte);
}
/* Return the subreg byte offset for a subreg whose outer mode is
OUTER_MODE, whose inner mode is INNER_MODE, and where there are
LSB_SHIFT *bits* between the lsb of the outer value and the lsb of
......
......@@ -3637,23 +3637,31 @@ loc_mentioned_in_p (rtx *loc, const_rtx in)
return 0;
}
/* Helper function for subreg_lsb. Given a subreg's OUTER_MODE, INNER_MODE,
and SUBREG_BYTE, return the bit offset where the subreg begins
(counting from the least significant bit of the operand). */
/* Reinterpret a subreg as a bit extraction from an integer and return
the position of the least significant bit of the extracted value.
In other words, if the extraction were performed as a shift right
and mask, return the number of bits to shift right.
The outer value of the subreg has OUTER_BYTES bytes and starts at
byte offset SUBREG_BYTE within an inner value of INNER_BYTES bytes. */
poly_uint64
subreg_lsb_1 (machine_mode outer_mode,
machine_mode inner_mode,
poly_uint64 subreg_byte)
subreg_size_lsb (poly_uint64 outer_bytes,
poly_uint64 inner_bytes,
poly_uint64 subreg_byte)
{
poly_uint64 subreg_end, trailing_bytes, byte_pos;
/* A paradoxical subreg begins at bit position 0. */
if (paradoxical_subreg_p (outer_mode, inner_mode))
return 0;
gcc_checking_assert (ordered_p (outer_bytes, inner_bytes));
if (maybe_gt (outer_bytes, inner_bytes))
{
gcc_checking_assert (known_eq (subreg_byte, 0U));
return 0;
}
subreg_end = subreg_byte + GET_MODE_SIZE (outer_mode);
trailing_bytes = GET_MODE_SIZE (inner_mode) - subreg_end;
subreg_end = subreg_byte + outer_bytes;
trailing_bytes = inner_bytes - subreg_end;
if (WORDS_BIG_ENDIAN && BYTES_BIG_ENDIAN)
byte_pos = trailing_bytes;
else if (!WORDS_BIG_ENDIAN && !BYTES_BIG_ENDIAN)
......
......@@ -6130,342 +6130,466 @@ simplify_ternary_operation (enum rtx_code code, machine_mode mode,
return 0;
}
/* Evaluate a SUBREG of a CONST_INT or CONST_WIDE_INT or CONST_DOUBLE
or CONST_FIXED or CONST_VECTOR, returning another CONST_INT or
CONST_WIDE_INT or CONST_DOUBLE or CONST_FIXED or CONST_VECTOR.
/* Try to calculate NUM_BYTES bytes of the target memory image of X,
starting at byte FIRST_BYTE. Return true on success and add the
bytes to BYTES, such that each byte has BITS_PER_UNIT bits and such
that the bytes follow target memory order. Leave BYTES unmodified
on failure.
Works by unpacking INNER_BYTES bytes of OP into a collection of 8-bit values
represented as a little-endian array of 'unsigned char', selecting by BYTE,
and then repacking them again for OUTERMODE. If OP is a CONST_VECTOR,
FIRST_ELEM is the number of the first element to extract, otherwise
FIRST_ELEM is ignored. */
MODE is the mode of X. The caller must reserve NUM_BYTES bytes in
BYTES before calling this function. */
static rtx
simplify_immed_subreg (fixed_size_mode outermode, rtx op,
machine_mode innermode, unsigned int byte,
unsigned int first_elem, unsigned int inner_bytes)
bool
native_encode_rtx (machine_mode mode, rtx x, vec<target_unit> &bytes,
unsigned int first_byte, unsigned int num_bytes)
{
enum {
value_bit = 8,
value_mask = (1 << value_bit) - 1
};
unsigned char value[MAX_BITSIZE_MODE_ANY_MODE / value_bit];
int value_start;
int i;
int elem;
int num_elem;
rtx * elems;
int elem_bitsize;
rtx result_s = NULL;
rtvec result_v = NULL;
enum mode_class outer_class;
scalar_mode outer_submode;
int max_bitsize;
/* Check the mode is sensible. */
gcc_assert (GET_MODE (x) == VOIDmode
? is_a <scalar_int_mode> (mode)
: mode == GET_MODE (x));
/* Some ports misuse CCmode. */
if (GET_MODE_CLASS (outermode) == MODE_CC && CONST_INT_P (op))
return op;
if (GET_CODE (x) == CONST_VECTOR)
{
/* CONST_VECTOR_ELT follows target memory order, so no shuffling
is necessary. The only complication is that MODE_VECTOR_BOOL
vectors can have several elements per byte. */
unsigned int elt_bits = vector_element_size (GET_MODE_BITSIZE (mode),
GET_MODE_NUNITS (mode));
unsigned int elt = first_byte * BITS_PER_UNIT / elt_bits;
if (elt_bits < BITS_PER_UNIT)
{
/* This is the only case in which elements can be smaller than
a byte. */
gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL);
for (unsigned int i = 0; i < num_bytes; ++i)
{
target_unit value = 0;
for (unsigned int j = 0; j < BITS_PER_UNIT; j += elt_bits)
{
value |= (INTVAL (CONST_VECTOR_ELT (x, elt)) & 1) << j;
elt += 1;
}
bytes.quick_push (value);
}
return true;
}
/* We have no way to represent a complex constant at the rtl level. */
if (COMPLEX_MODE_P (outermode))
return NULL_RTX;
unsigned int start = bytes.length ();
unsigned int elt_bytes = GET_MODE_UNIT_SIZE (mode);
/* Make FIRST_BYTE relative to ELT. */
first_byte %= elt_bytes;
while (num_bytes > 0)
{
/* Work out how many bytes we want from element ELT. */
unsigned int chunk_bytes = MIN (num_bytes, elt_bytes - first_byte);
if (!native_encode_rtx (GET_MODE_INNER (mode),
CONST_VECTOR_ELT (x, elt), bytes,
first_byte, chunk_bytes))
{
bytes.truncate (start);
return false;
}
elt += 1;
first_byte = 0;
num_bytes -= chunk_bytes;
}
return true;
}
/* We support any size mode. */
max_bitsize = MAX (GET_MODE_BITSIZE (outermode),
inner_bytes * BITS_PER_UNIT);
/* All subsequent cases are limited to scalars. */
scalar_mode smode;
if (!is_a <scalar_mode> (mode, &smode))
return false;
/* Unpack the value. */
/* Make sure that the region is in range. */
unsigned int end_byte = first_byte + num_bytes;
unsigned int mode_bytes = GET_MODE_SIZE (smode);
gcc_assert (end_byte <= mode_bytes);
if (GET_CODE (op) == CONST_VECTOR)
if (CONST_SCALAR_INT_P (x))
{
num_elem = CEIL (inner_bytes, GET_MODE_UNIT_SIZE (innermode));
elem_bitsize = GET_MODE_UNIT_BITSIZE (innermode);
/* The target memory layout is affected by both BYTES_BIG_ENDIAN
and WORDS_BIG_ENDIAN. Use the subreg machinery to get the lsb
position of each byte. */
rtx_mode_t value (x, smode);
wide_int_ref value_wi (value);
for (unsigned int byte = first_byte; byte < end_byte; ++byte)
{
/* Always constant because the inputs are. */
unsigned int lsb
= subreg_size_lsb (1, mode_bytes, byte).to_constant ();
/* Operate directly on the encoding rather than using
wi::extract_uhwi, so that we preserve the sign or zero
extension for modes that are not a whole number of bits in
size. (Zero extension is only used for the combination of
innermode == BImode && STORE_FLAG_VALUE == 1). */
unsigned int elt = lsb / HOST_BITS_PER_WIDE_INT;
unsigned int shift = lsb % HOST_BITS_PER_WIDE_INT;
unsigned HOST_WIDE_INT uhwi = value_wi.elt (elt);
bytes.quick_push (uhwi >> shift);
}
return true;
}
else
if (CONST_DOUBLE_P (x))
{
num_elem = 1;
elem_bitsize = max_bitsize;
/* real_to_target produces an array of integers in target memory order.
All integers before the last one have 32 bits; the last one may
have 32 bits or fewer, depending on whether the mode bitsize
is divisible by 32. Each of these integers is then laid out
in target memory as any other integer would be. */
long el32[MAX_BITSIZE_MODE_ANY_MODE / 32];
real_to_target (el32, CONST_DOUBLE_REAL_VALUE (x), smode);
/* The (maximum) number of target bytes per element of el32. */
unsigned int bytes_per_el32 = 32 / BITS_PER_UNIT;
gcc_assert (bytes_per_el32 != 0);
/* Build up the integers in a similar way to the CONST_SCALAR_INT_P
handling above. */
for (unsigned int byte = first_byte; byte < end_byte; ++byte)
{
unsigned int index = byte / bytes_per_el32;
unsigned int subbyte = byte % bytes_per_el32;
unsigned int int_bytes = MIN (bytes_per_el32,
mode_bytes - index * bytes_per_el32);
/* Always constant because the inputs are. */
unsigned int lsb
= subreg_size_lsb (1, int_bytes, subbyte).to_constant ();
bytes.quick_push ((unsigned long) el32[index] >> lsb);
}
return true;
}
/* If this asserts, it is too complicated; reducing value_bit may help. */
gcc_assert (BITS_PER_UNIT % value_bit == 0);
/* I don't know how to handle endianness of sub-units. */
gcc_assert (elem_bitsize % BITS_PER_UNIT == 0);
for (elem = 0; elem < num_elem; elem++)
if (GET_CODE (x) == CONST_FIXED)
{
unsigned char * vp;
rtx el = (GET_CODE (op) == CONST_VECTOR
? CONST_VECTOR_ELT (op, first_elem + elem)
: op);
for (unsigned int byte = first_byte; byte < end_byte; ++byte)
{
/* Always constant because the inputs are. */
unsigned int lsb
= subreg_size_lsb (1, mode_bytes, byte).to_constant ();
unsigned HOST_WIDE_INT piece = CONST_FIXED_VALUE_LOW (x);
if (lsb >= HOST_BITS_PER_WIDE_INT)
{
lsb -= HOST_BITS_PER_WIDE_INT;
piece = CONST_FIXED_VALUE_HIGH (x);
}
bytes.quick_push (piece >> lsb);
}
return true;
}
/* Vectors are kept in target memory order. (This is probably
a mistake.) */
{
unsigned byte = (elem * elem_bitsize) / BITS_PER_UNIT;
unsigned ibyte = (((num_elem - 1 - elem) * elem_bitsize)
/ BITS_PER_UNIT);
unsigned word_byte = WORDS_BIG_ENDIAN ? ibyte : byte;
unsigned subword_byte = BYTES_BIG_ENDIAN ? ibyte : byte;
unsigned bytele = (subword_byte % UNITS_PER_WORD
+ (word_byte / UNITS_PER_WORD) * UNITS_PER_WORD);
vp = value + (bytele * BITS_PER_UNIT) / value_bit;
}
return false;
}
switch (GET_CODE (el))
{
case CONST_INT:
for (i = 0;
i < HOST_BITS_PER_WIDE_INT && i < elem_bitsize;
i += value_bit)
*vp++ = INTVAL (el) >> i;
/* CONST_INTs are always logically sign-extended. */
for (; i < elem_bitsize; i += value_bit)
*vp++ = INTVAL (el) < 0 ? -1 : 0;
break;
/* Read a vector of mode MODE from the target memory image given by BYTES,
starting at byte FIRST_BYTE. The vector is known to be encodable using
NPATTERNS interleaved patterns with NELTS_PER_PATTERN elements each,
and BYTES is known to have enough bytes to supply NPATTERNS *
NELTS_PER_PATTERN vector elements. Each element of BYTES contains
BITS_PER_UNIT bits and the bytes are in target memory order.
case CONST_WIDE_INT:
{
rtx_mode_t val = rtx_mode_t (el, GET_MODE_INNER (innermode));
unsigned char extend = wi::sign_mask (val);
int prec = wi::get_precision (val);
for (i = 0; i < prec && i < elem_bitsize; i += value_bit)
*vp++ = wi::extract_uhwi (val, i, value_bit);
for (; i < elem_bitsize; i += value_bit)
*vp++ = extend;
}
break;
Return the vector on success, otherwise return NULL_RTX. */
case CONST_DOUBLE:
if (TARGET_SUPPORTS_WIDE_INT == 0 && GET_MODE (el) == VOIDmode)
{
unsigned char extend = 0;
/* If this triggers, someone should have generated a
CONST_INT instead. */
gcc_assert (elem_bitsize > HOST_BITS_PER_WIDE_INT);
for (i = 0; i < HOST_BITS_PER_WIDE_INT; i += value_bit)
*vp++ = CONST_DOUBLE_LOW (el) >> i;
while (i < HOST_BITS_PER_DOUBLE_INT && i < elem_bitsize)
{
*vp++
= CONST_DOUBLE_HIGH (el) >> (i - HOST_BITS_PER_WIDE_INT);
i += value_bit;
}
rtx
native_decode_vector_rtx (machine_mode mode, vec<target_unit> bytes,
unsigned int first_byte, unsigned int npatterns,
unsigned int nelts_per_pattern)
{
rtx_vector_builder builder (mode, npatterns, nelts_per_pattern);
if (CONST_DOUBLE_HIGH (el) >> (HOST_BITS_PER_WIDE_INT - 1))
extend = -1;
for (; i < elem_bitsize; i += value_bit)
*vp++ = extend;
}
else
{
/* This is big enough for anything on the platform. */
long tmp[MAX_BITSIZE_MODE_ANY_MODE / 32];
scalar_float_mode el_mode;
unsigned int elt_bits = vector_element_size (GET_MODE_BITSIZE (mode),
GET_MODE_NUNITS (mode));
if (elt_bits < BITS_PER_UNIT)
{
/* This is the only case in which elements can be smaller than a byte.
Element 0 is always in the lsb of the containing byte. */
gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL);
for (unsigned int i = 0; i < builder.encoded_nelts (); ++i)
{
unsigned int bit_index = first_byte * BITS_PER_UNIT + i * elt_bits;
unsigned int byte_index = bit_index / BITS_PER_UNIT;
unsigned int lsb = bit_index % BITS_PER_UNIT;
builder.quick_push (bytes[byte_index] & (1 << lsb)
? CONST1_RTX (BImode)
: CONST0_RTX (BImode));
}
}
else
{
for (unsigned int i = 0; i < builder.encoded_nelts (); ++i)
{
rtx x = native_decode_rtx (GET_MODE_INNER (mode), bytes, first_byte);
if (!x)
return NULL_RTX;
builder.quick_push (x);
first_byte += elt_bits / BITS_PER_UNIT;
}
}
return builder.build ();
}
el_mode = as_a <scalar_float_mode> (GET_MODE (el));
int bitsize = GET_MODE_BITSIZE (el_mode);
/* Read an rtx of mode MODE from the target memory image given by BYTES,
starting at byte FIRST_BYTE. Each element of BYTES contains BITS_PER_UNIT
bits and the bytes are in target memory order. The image has enough
values to specify all bytes of MODE.
gcc_assert (bitsize <= elem_bitsize);
gcc_assert (bitsize % value_bit == 0);
Return the rtx on success, otherwise return NULL_RTX. */
real_to_target (tmp, CONST_DOUBLE_REAL_VALUE (el),
GET_MODE (el));
rtx
native_decode_rtx (machine_mode mode, vec<target_unit> bytes,
unsigned int first_byte)
{
if (VECTOR_MODE_P (mode))
{
/* If we know at compile time how many elements there are,
pull each element directly from BYTES. */
unsigned int nelts;
if (GET_MODE_NUNITS (mode).is_constant (&nelts))
return native_decode_vector_rtx (mode, bytes, first_byte, nelts, 1);
return NULL_RTX;
}
/* real_to_target produces its result in words affected by
FLOAT_WORDS_BIG_ENDIAN. However, we ignore this,
and use WORDS_BIG_ENDIAN instead; see the documentation
of SUBREG in rtl.texi. */
for (i = 0; i < bitsize; i += value_bit)
{
int ibase;
if (WORDS_BIG_ENDIAN)
ibase = bitsize - 1 - i;
else
ibase = i;
*vp++ = tmp[ibase / 32] >> i % 32;
}
scalar_int_mode imode;
if (is_a <scalar_int_mode> (mode, &imode)
&& GET_MODE_PRECISION (imode) <= MAX_BITSIZE_MODE_ANY_INT)
{
/* Pull the bytes msb first, so that we can use simple
shift-and-insert wide_int operations. */
unsigned int size = GET_MODE_SIZE (imode);
wide_int result (wi::zero (GET_MODE_PRECISION (imode)));
for (unsigned int i = 0; i < size; ++i)
{
unsigned int lsb = (size - i - 1) * BITS_PER_UNIT;
/* Always constant because the inputs are. */
unsigned int subbyte
= subreg_size_offset_from_lsb (1, size, lsb).to_constant ();
result <<= BITS_PER_UNIT;
result |= bytes[first_byte + subbyte];
}
return immed_wide_int_const (result, imode);
}
/* It shouldn't matter what's done here, so fill it with
zero. */
for (; i < elem_bitsize; i += value_bit)
*vp++ = 0;
}
break;
scalar_float_mode fmode;
if (is_a <scalar_float_mode> (mode, &fmode))
{
/* We need to build an array of integers in target memory order.
All integers before the last one have 32 bits; the last one may
have 32 bits or fewer, depending on whether the mode bitsize
is divisible by 32. */
long el32[MAX_BITSIZE_MODE_ANY_MODE / 32];
unsigned int num_el32 = CEIL (GET_MODE_BITSIZE (fmode), 32);
memset (el32, 0, num_el32 * sizeof (long));
/* The (maximum) number of target bytes per element of el32. */
unsigned int bytes_per_el32 = 32 / BITS_PER_UNIT;
gcc_assert (bytes_per_el32 != 0);
unsigned int mode_bytes = GET_MODE_SIZE (fmode);
for (unsigned int byte = 0; byte < mode_bytes; ++byte)
{
unsigned int index = byte / bytes_per_el32;
unsigned int subbyte = byte % bytes_per_el32;
unsigned int int_bytes = MIN (bytes_per_el32,
mode_bytes - index * bytes_per_el32);
/* Always constant because the inputs are. */
unsigned int lsb
= subreg_size_lsb (1, int_bytes, subbyte).to_constant ();
el32[index] |= (unsigned long) bytes[first_byte + byte] << lsb;
}
REAL_VALUE_TYPE r;
real_from_target (&r, el32, fmode);
return const_double_from_real_value (r, fmode);
}
case CONST_FIXED:
if (elem_bitsize <= HOST_BITS_PER_WIDE_INT)
{
for (i = 0; i < elem_bitsize; i += value_bit)
*vp++ = CONST_FIXED_VALUE_LOW (el) >> i;
}
if (ALL_SCALAR_FIXED_POINT_MODE_P (mode))
{
scalar_mode smode = as_a <scalar_mode> (mode);
FIXED_VALUE_TYPE f;
f.data.low = 0;
f.data.high = 0;
f.mode = smode;
unsigned int mode_bytes = GET_MODE_SIZE (smode);
for (unsigned int byte = 0; byte < mode_bytes; ++byte)
{
/* Always constant because the inputs are. */
unsigned int lsb
= subreg_size_lsb (1, mode_bytes, byte).to_constant ();
unsigned HOST_WIDE_INT unit = bytes[first_byte + byte];
if (lsb >= HOST_BITS_PER_WIDE_INT)
f.data.high |= unit << (lsb - HOST_BITS_PER_WIDE_INT);
else
{
for (i = 0; i < HOST_BITS_PER_WIDE_INT; i += value_bit)
*vp++ = CONST_FIXED_VALUE_LOW (el) >> i;
for (; i < HOST_BITS_PER_DOUBLE_INT && i < elem_bitsize;
i += value_bit)
*vp++ = CONST_FIXED_VALUE_HIGH (el)
>> (i - HOST_BITS_PER_WIDE_INT);
for (; i < elem_bitsize; i += value_bit)
*vp++ = 0;
}
break;
default:
gcc_unreachable ();
f.data.low |= unit << lsb;
}
return CONST_FIXED_FROM_FIXED_VALUE (f, mode);
}
/* Now, pick the right byte to start with. */
/* Renumber BYTE so that the least-significant byte is byte 0. A special
case is paradoxical SUBREGs, which shouldn't be adjusted since they
will already have offset 0. */
if (inner_bytes >= GET_MODE_SIZE (outermode))
return NULL_RTX;
}
/* Simplify a byte offset BYTE into CONST_VECTOR X. The main purpose
is to convert a runtime BYTE value into a constant one. */
static poly_uint64
simplify_const_vector_byte_offset (rtx x, poly_uint64 byte)
{
/* Cope with MODE_VECTOR_BOOL by operating on bits rather than bytes. */
machine_mode mode = GET_MODE (x);
unsigned int elt_bits = vector_element_size (GET_MODE_BITSIZE (mode),
GET_MODE_NUNITS (mode));
/* The number of bits needed to encode one element from each pattern. */
unsigned int sequence_bits = CONST_VECTOR_NPATTERNS (x) * elt_bits;
/* Identify the start point in terms of a sequence number and a byte offset
within that sequence. */
poly_uint64 first_sequence;
unsigned HOST_WIDE_INT subbit;
if (can_div_trunc_p (byte * BITS_PER_UNIT, sequence_bits,
&first_sequence, &subbit))
{
unsigned ibyte = inner_bytes - GET_MODE_SIZE (outermode) - byte;
unsigned word_byte = WORDS_BIG_ENDIAN ? ibyte : byte;
unsigned subword_byte = BYTES_BIG_ENDIAN ? ibyte : byte;
byte = (subword_byte % UNITS_PER_WORD
+ (word_byte / UNITS_PER_WORD) * UNITS_PER_WORD);
unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x);
if (nelts_per_pattern == 1)
/* This is a duplicated vector, so the value of FIRST_SEQUENCE
doesn't matter. */
byte = subbit / BITS_PER_UNIT;
else if (nelts_per_pattern == 2 && known_gt (first_sequence, 0U))
{
/* The subreg drops the first element from each pattern and
only uses the second element. Find the first sequence
that starts on a byte boundary. */
subbit += least_common_multiple (sequence_bits, BITS_PER_UNIT);
byte = subbit / BITS_PER_UNIT;
}
}
return byte;
}
/* Subroutine of simplify_subreg in which:
- X is known to be a CONST_VECTOR
- OUTERMODE is known to be a vector mode
/* BYTE should still be inside OP. (Note that BYTE is unsigned,
so if it's become negative it will instead be very large.) */
gcc_assert (byte < inner_bytes);
Try to handle the subreg by operating on the CONST_VECTOR encoding
rather than on each individual element of the CONST_VECTOR.
/* Convert from bytes to chunks of size value_bit. */
value_start = byte * (BITS_PER_UNIT / value_bit);
Return the simplified subreg on success, otherwise return NULL_RTX. */
static rtx
simplify_const_vector_subreg (machine_mode outermode, rtx x,
machine_mode innermode, unsigned int first_byte)
{
/* Paradoxical subregs of vectors have dubious semantics. */
if (paradoxical_subreg_p (outermode, innermode))
return NULL_RTX;
/* Re-pack the value. */
num_elem = GET_MODE_NUNITS (outermode);
/* We can only preserve the semantics of a stepped pattern if the new
vector element is the same as the original one. */
if (CONST_VECTOR_STEPPED_P (x)
&& GET_MODE_INNER (outermode) != GET_MODE_INNER (innermode))
return NULL_RTX;
if (VECTOR_MODE_P (outermode))
/* Cope with MODE_VECTOR_BOOL by operating on bits rather than bytes. */
unsigned int x_elt_bits
= vector_element_size (GET_MODE_BITSIZE (innermode),
GET_MODE_NUNITS (innermode));
unsigned int out_elt_bits
= vector_element_size (GET_MODE_BITSIZE (outermode),
GET_MODE_NUNITS (outermode));
/* The number of bits needed to encode one element from every pattern
of the original vector. */
unsigned int x_sequence_bits = CONST_VECTOR_NPATTERNS (x) * x_elt_bits;
/* The number of bits needed to encode one element from every pattern
of the result. */
unsigned int out_sequence_bits
= least_common_multiple (x_sequence_bits, out_elt_bits);
/* Work out the number of interleaved patterns in the output vector
and the number of encoded elements per pattern. */
unsigned int out_npatterns = out_sequence_bits / out_elt_bits;
unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x);
/* The encoding scheme requires the number of elements to be a multiple
of the number of patterns, so that each pattern appears at least once
and so that the same number of elements appear from each pattern. */
bool ok_p = multiple_p (GET_MODE_NUNITS (outermode), out_npatterns);
unsigned int const_nunits;
if (GET_MODE_NUNITS (outermode).is_constant (&const_nunits)
&& (!ok_p || out_npatterns * nelts_per_pattern > const_nunits))
{
result_v = rtvec_alloc (num_elem);
elems = &RTVEC_ELT (result_v, 0);
/* Either the encoding is invalid, or applying it would give us
more elements than we need. Just encode each element directly. */
out_npatterns = const_nunits;
nelts_per_pattern = 1;
}
else
elems = &result_s;
else if (!ok_p)
return NULL_RTX;
outer_submode = GET_MODE_INNER (outermode);
outer_class = GET_MODE_CLASS (outer_submode);
elem_bitsize = GET_MODE_BITSIZE (outer_submode);
/* Get enough bytes of X to form the new encoding. */
unsigned int buffer_bits = out_npatterns * nelts_per_pattern * out_elt_bits;
unsigned int buffer_bytes = CEIL (buffer_bits, BITS_PER_UNIT);
auto_vec<target_unit, 128> buffer (buffer_bytes);
if (!native_encode_rtx (innermode, x, buffer, first_byte, buffer_bytes))
return NULL_RTX;
gcc_assert (elem_bitsize % value_bit == 0);
gcc_assert (elem_bitsize + value_start * value_bit <= max_bitsize);
/* Reencode the bytes as OUTERMODE. */
return native_decode_vector_rtx (outermode, buffer, 0, out_npatterns,
nelts_per_pattern);
}
for (elem = 0; elem < num_elem; elem++)
{
unsigned char *vp;
/* Try to simplify a subreg of a constant by encoding the subreg region
as a sequence of target bytes and reading them back in the new mode.
Return the new value on success, otherwise return null.
/* Vectors are stored in target memory order. (This is probably
a mistake.) */
{
unsigned byte = (elem * elem_bitsize) / BITS_PER_UNIT;
unsigned ibyte = (((num_elem - 1 - elem) * elem_bitsize)
/ BITS_PER_UNIT);
unsigned word_byte = WORDS_BIG_ENDIAN ? ibyte : byte;
unsigned subword_byte = BYTES_BIG_ENDIAN ? ibyte : byte;
unsigned bytele = (subword_byte % UNITS_PER_WORD
+ (word_byte / UNITS_PER_WORD) * UNITS_PER_WORD);
vp = value + value_start + (bytele * BITS_PER_UNIT) / value_bit;
}
The subreg has outer mode OUTERMODE, inner mode INNERMODE, inner value X
and byte offset FIRST_BYTE. */
switch (outer_class)
{
case MODE_INT:
case MODE_PARTIAL_INT:
{
int u;
int base = 0;
int units
= (GET_MODE_BITSIZE (outer_submode) + HOST_BITS_PER_WIDE_INT - 1)
/ HOST_BITS_PER_WIDE_INT;
HOST_WIDE_INT tmp[MAX_BITSIZE_MODE_ANY_INT / HOST_BITS_PER_WIDE_INT];
wide_int r;
if (GET_MODE_PRECISION (outer_submode) > MAX_BITSIZE_MODE_ANY_INT)
return NULL_RTX;
for (u = 0; u < units; u++)
{
unsigned HOST_WIDE_INT buf = 0;
for (i = 0;
i < HOST_BITS_PER_WIDE_INT && base + i < elem_bitsize;
i += value_bit)
buf |= (unsigned HOST_WIDE_INT)(*vp++ & value_mask) << i;
tmp[u] = buf;
base += HOST_BITS_PER_WIDE_INT;
}
r = wide_int::from_array (tmp, units,
GET_MODE_PRECISION (outer_submode));
#if TARGET_SUPPORTS_WIDE_INT == 0
/* Make sure r will fit into CONST_INT or CONST_DOUBLE. */
if (wi::min_precision (r, SIGNED) > HOST_BITS_PER_DOUBLE_INT)
return NULL_RTX;
#endif
elems[elem] = immed_wide_int_const (r, outer_submode);
}
break;
static rtx
simplify_immed_subreg (fixed_size_mode outermode, rtx x,
machine_mode innermode, unsigned int first_byte)
{
unsigned int buffer_bytes = GET_MODE_SIZE (outermode);
auto_vec<target_unit, 128> buffer (buffer_bytes);
case MODE_FLOAT:
case MODE_DECIMAL_FLOAT:
{
REAL_VALUE_TYPE r;
long tmp[MAX_BITSIZE_MODE_ANY_MODE / 32] = { 0 };
/* real_from_target wants its input in words affected by
FLOAT_WORDS_BIG_ENDIAN. However, we ignore this,
and use WORDS_BIG_ENDIAN instead; see the documentation
of SUBREG in rtl.texi. */
for (i = 0; i < elem_bitsize; i += value_bit)
{
int ibase;
if (WORDS_BIG_ENDIAN)
ibase = elem_bitsize - 1 - i;
else
ibase = i;
tmp[ibase / 32] |= (*vp++ & value_mask) << i % 32;
}
/* Some ports misuse CCmode. */
if (GET_MODE_CLASS (outermode) == MODE_CC && CONST_INT_P (x))
return x;
real_from_target (&r, tmp, outer_submode);
elems[elem] = const_double_from_real_value (r, outer_submode);
}
break;
/* Paradoxical subregs read undefined values for bytes outside of the
inner value. However, we have traditionally always sign-extended
integer constants and zero-extended others. */
unsigned int inner_bytes = buffer_bytes;
if (paradoxical_subreg_p (outermode, innermode))
{
if (!GET_MODE_SIZE (innermode).is_constant (&inner_bytes))
return NULL_RTX;
case MODE_FRACT:
case MODE_UFRACT:
case MODE_ACCUM:
case MODE_UACCUM:
{
FIXED_VALUE_TYPE f;
f.data.low = 0;
f.data.high = 0;
f.mode = outer_submode;
for (i = 0;
i < HOST_BITS_PER_WIDE_INT && i < elem_bitsize;
i += value_bit)
f.data.low |= (unsigned HOST_WIDE_INT)(*vp++ & value_mask) << i;
for (; i < elem_bitsize; i += value_bit)
f.data.high |= ((unsigned HOST_WIDE_INT)(*vp++ & value_mask)
<< (i - HOST_BITS_PER_WIDE_INT));
elems[elem] = CONST_FIXED_FROM_FIXED_VALUE (f, outer_submode);
}
break;
target_unit filler = 0;
if (CONST_SCALAR_INT_P (x) && wi::neg_p (rtx_mode_t (x, innermode)))
filler = -1;
default:
gcc_unreachable ();
}
/* Add any leading bytes due to big-endian layout. The number of
bytes must be constant because both modes have constant size. */
unsigned int leading_bytes
= -byte_lowpart_offset (outermode, innermode).to_constant ();
for (unsigned int i = 0; i < leading_bytes; ++i)
buffer.quick_push (filler);
if (!native_encode_rtx (innermode, x, buffer, first_byte, inner_bytes))
return NULL_RTX;
/* Add any trailing bytes due to little-endian layout. */
while (buffer.length () < buffer_bytes)
buffer.quick_push (filler);
}
if (VECTOR_MODE_P (outermode))
return gen_rtx_CONST_VECTOR (outermode, result_v);
else
return result_s;
{
if (!native_encode_rtx (innermode, x, buffer, first_byte, inner_bytes))
return NULL_RTX;
}
return native_decode_rtx (outermode, buffer, 0);
}
/* Simplify SUBREG:OUTERMODE(OP:INNERMODE, BYTE)
......@@ -6494,6 +6618,9 @@ simplify_subreg (machine_mode outermode, rtx op,
if (outermode == innermode && known_eq (byte, 0U))
return op;
if (GET_CODE (op) == CONST_VECTOR)
byte = simplify_const_vector_byte_offset (op, byte);
if (multiple_p (byte, GET_MODE_UNIT_SIZE (innermode)))
{
rtx elt;
......@@ -6513,30 +6640,21 @@ simplify_subreg (machine_mode outermode, rtx op,
|| CONST_FIXED_P (op)
|| GET_CODE (op) == CONST_VECTOR)
{
/* simplify_immed_subreg deconstructs OP into bytes and constructs
the result from bytes, so it only works if the sizes of the modes
and the value of the offset are known at compile time. Cases that
that apply to general modes and offsets should be handled here
before calling simplify_immed_subreg. */
fixed_size_mode fs_outermode, fs_innermode;
unsigned HOST_WIDE_INT cbyte;
if (is_a <fixed_size_mode> (outermode, &fs_outermode)
&& is_a <fixed_size_mode> (innermode, &fs_innermode)
&& byte.is_constant (&cbyte))
return simplify_immed_subreg (fs_outermode, op, fs_innermode, cbyte,
0, GET_MODE_SIZE (fs_innermode));
/* Handle constant-sized outer modes and variable-sized inner modes. */
unsigned HOST_WIDE_INT first_elem;
if (GET_CODE (op) == CONST_VECTOR
&& is_a <fixed_size_mode> (outermode, &fs_outermode)
&& constant_multiple_p (byte, GET_MODE_UNIT_SIZE (innermode),
&first_elem))
return simplify_immed_subreg (fs_outermode, op, innermode, 0,
first_elem,
GET_MODE_SIZE (fs_outermode));
if (byte.is_constant (&cbyte))
{
if (GET_CODE (op) == CONST_VECTOR && VECTOR_MODE_P (outermode))
{
rtx tmp = simplify_const_vector_subreg (outermode, op,
innermode, cbyte);
if (tmp)
return tmp;
}
return NULL_RTX;
fixed_size_mode fs_outermode;
if (is_a <fixed_size_mode> (outermode, &fs_outermode))
return simplify_immed_subreg (fs_outermode, op, innermode, cbyte);
}
}
/* Changing mode twice with SUBREG => just change it once,
......@@ -7179,6 +7297,165 @@ test_vec_merge (machine_mode mode)
simplify_rtx (nvm));
}
/* Test subregs of integer vector constant X, trying elements in
the range [ELT_BIAS, ELT_BIAS + constant_lower_bound (NELTS)),
where NELTS is the number of elements in X. Subregs involving
elements [ELT_BIAS, ELT_BIAS + FIRST_VALID) are expected to fail. */
static void
test_vector_subregs_modes (rtx x, poly_uint64 elt_bias = 0,
unsigned int first_valid = 0)
{
machine_mode inner_mode = GET_MODE (x);
scalar_mode int_mode = GET_MODE_INNER (inner_mode);
for (unsigned int modei = 0; modei < NUM_MACHINE_MODES; ++modei)
{
machine_mode outer_mode = (machine_mode) modei;
if (!VECTOR_MODE_P (outer_mode))
continue;
unsigned int outer_nunits;
if (GET_MODE_INNER (outer_mode) == int_mode
&& GET_MODE_NUNITS (outer_mode).is_constant (&outer_nunits)
&& multiple_p (GET_MODE_NUNITS (inner_mode), outer_nunits))
{
/* Test subregs in which the outer mode is a smaller,
constant-sized vector of the same element type. */
unsigned int limit
= constant_lower_bound (GET_MODE_NUNITS (inner_mode));
for (unsigned int elt = 0; elt < limit; elt += outer_nunits)
{
rtx expected = NULL_RTX;
if (elt >= first_valid)
{
rtx_vector_builder builder (outer_mode, outer_nunits, 1);
for (unsigned int i = 0; i < outer_nunits; ++i)
builder.quick_push (CONST_VECTOR_ELT (x, elt + i));
expected = builder.build ();
}
poly_uint64 byte = (elt_bias + elt) * GET_MODE_SIZE (int_mode);
ASSERT_RTX_EQ (expected,
simplify_subreg (outer_mode, x,
inner_mode, byte));
}
}
else if (known_eq (GET_MODE_SIZE (outer_mode),
GET_MODE_SIZE (inner_mode))
&& known_eq (elt_bias, 0U)
&& (GET_MODE_CLASS (outer_mode) != MODE_VECTOR_BOOL
|| known_eq (GET_MODE_BITSIZE (outer_mode),
GET_MODE_NUNITS (outer_mode)))
&& (!FLOAT_MODE_P (outer_mode)
|| (FLOAT_MODE_FORMAT (outer_mode)->ieee_bits
== GET_MODE_UNIT_PRECISION (outer_mode)))
&& (GET_MODE_SIZE (inner_mode).is_constant ()
|| !CONST_VECTOR_STEPPED_P (x)))
{
/* Try converting to OUTER_MODE and back. */
rtx outer_x = simplify_subreg (outer_mode, x, inner_mode, 0);
ASSERT_TRUE (outer_x != NULL_RTX);
ASSERT_RTX_EQ (x, simplify_subreg (inner_mode, outer_x,
outer_mode, 0));
}
}
if (BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN)
{
/* Test each byte in the element range. */
unsigned int limit
= constant_lower_bound (GET_MODE_SIZE (inner_mode));
for (unsigned int i = 0; i < limit; ++i)
{
unsigned int elt = i / GET_MODE_SIZE (int_mode);
rtx expected = NULL_RTX;
if (elt >= first_valid)
{
unsigned int byte_shift = i % GET_MODE_SIZE (int_mode);
if (BYTES_BIG_ENDIAN)
byte_shift = GET_MODE_SIZE (int_mode) - byte_shift - 1;
rtx_mode_t vec_elt (CONST_VECTOR_ELT (x, elt), int_mode);
wide_int shifted_elt
= wi::lrshift (vec_elt, byte_shift * BITS_PER_UNIT);
expected = immed_wide_int_const (shifted_elt, QImode);
}
poly_uint64 byte = elt_bias * GET_MODE_SIZE (int_mode) + i;
ASSERT_RTX_EQ (expected,
simplify_subreg (QImode, x, inner_mode, byte));
}
}
}
/* Test constant subregs of integer vector mode INNER_MODE, using 1
element per pattern. */
static void
test_vector_subregs_repeating (machine_mode inner_mode)
{
poly_uint64 nunits = GET_MODE_NUNITS (inner_mode);
unsigned int min_nunits = constant_lower_bound (nunits);
scalar_mode int_mode = GET_MODE_INNER (inner_mode);
unsigned int count = gcd (min_nunits, 8);
rtx_vector_builder builder (inner_mode, count, 1);
for (unsigned int i = 0; i < count; ++i)
builder.quick_push (gen_int_mode (8 - i, int_mode));
rtx x = builder.build ();
test_vector_subregs_modes (x);
if (!nunits.is_constant ())
test_vector_subregs_modes (x, nunits - min_nunits);
}
/* Test constant subregs of integer vector mode INNER_MODE, using 2
elements per pattern. */
static void
test_vector_subregs_fore_back (machine_mode inner_mode)
{
poly_uint64 nunits = GET_MODE_NUNITS (inner_mode);
unsigned int min_nunits = constant_lower_bound (nunits);
scalar_mode int_mode = GET_MODE_INNER (inner_mode);
unsigned int count = gcd (min_nunits, 4);
rtx_vector_builder builder (inner_mode, count, 2);
for (unsigned int i = 0; i < count; ++i)
builder.quick_push (gen_int_mode (i, int_mode));
for (unsigned int i = 0; i < count; ++i)
builder.quick_push (gen_int_mode (-(int) i, int_mode));
rtx x = builder.build ();
test_vector_subregs_modes (x);
if (!nunits.is_constant ())
test_vector_subregs_modes (x, nunits - min_nunits, count);
}
/* Test constant subregs of integer vector mode INNER_MODE, using 3
elements per pattern. */
static void
test_vector_subregs_stepped (machine_mode inner_mode)
{
/* Build { 0, 1, 2, 3, ... }. */
scalar_mode int_mode = GET_MODE_INNER (inner_mode);
rtx_vector_builder builder (inner_mode, 1, 3);
for (unsigned int i = 0; i < 3; ++i)
builder.quick_push (gen_int_mode (i, int_mode));
rtx x = builder.build ();
test_vector_subregs_modes (x);
}
/* Test constant subregs of integer vector mode INNER_MODE. */
static void
test_vector_subregs (machine_mode inner_mode)
{
test_vector_subregs_repeating (inner_mode);
test_vector_subregs_fore_back (inner_mode);
test_vector_subregs_stepped (inner_mode);
}
/* Verify some simplifications involving vectors. */
static void
......@@ -7193,7 +7470,10 @@ test_vector_ops ()
test_vector_ops_duplicate (mode, scalar_reg);
if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
&& maybe_gt (GET_MODE_NUNITS (mode), 2))
test_vector_ops_series (mode, scalar_reg);
{
test_vector_ops_series (mode, scalar_reg);
test_vector_subregs (mode);
}
test_vec_merge (mode);
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment