Commit 8aba425f by Jakub Jelinek

sccvn: Handle bitfields in vn_reference_lookup_3 [PR93582]

The following patch is first step towards fixing PR93582.
vn_reference_lookup_3 right now punts on anything that isn't byte aligned,
so to be able to lookup a constant bitfield store, one needs to use
the exact same COMPONENT_REF, otherwise it isn't found.

This patch lifts up that that restriction if the bits to be loaded are
covered by a single store of a constant (keeps the restriction so far
for the multiple store case, can tweak that incrementally, but I think
for bisection etc. it is worth to do it one step at a time).

2020-02-13  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/93582
	* fold-const.h (shift_bytes_in_array_left,
	shift_bytes_in_array_right): Declare.
	* fold-const.c (shift_bytes_in_array_left,
	shift_bytes_in_array_right): New function, moved from
	gimple-ssa-store-merging.c, no longer static.
	* gimple-ssa-store-merging.c (shift_bytes_in_array): Move
	to gimple-ssa-store-merging.c and rename to shift_bytes_in_array_left.
	(shift_bytes_in_array_right): Move to gimple-ssa-store-merging.c.
	(encode_tree_to_bitpos): Use shift_bytes_in_array_left instead of
	shift_bytes_in_array.
	(verify_shift_bytes_in_array): Rename to ...
	(verify_shift_bytes_in_array_left): ... this.  Use
	shift_bytes_in_array_left instead of shift_bytes_in_array.
	(store_merging_c_tests): Call verify_shift_bytes_in_array_left
	instead of verify_shift_bytes_in_array.
	* tree-ssa-sccvn.c (vn_reference_lookup_3): For native_encode_expr
	/ native_interpret_expr where the store covers all needed bits,
	punt on PDP-endian, otherwise allow all involved offsets and sizes
	not to be byte-aligned.

	* gcc.dg/tree-ssa/pr93582-1.c: New test.
	* gcc.dg/tree-ssa/pr93582-2.c: New test.
	* gcc.dg/tree-ssa/pr93582-3.c: New test.
parent 8ea884b8
2020-02-13 Jakub Jelinek <jakub@redhat.com> 2020-02-13 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/93582
* fold-const.h (shift_bytes_in_array_left,
shift_bytes_in_array_right): Declare.
* fold-const.c (shift_bytes_in_array_left,
shift_bytes_in_array_right): New function, moved from
gimple-ssa-store-merging.c, no longer static.
* gimple-ssa-store-merging.c (shift_bytes_in_array): Move
to gimple-ssa-store-merging.c and rename to shift_bytes_in_array_left.
(shift_bytes_in_array_right): Move to gimple-ssa-store-merging.c.
(encode_tree_to_bitpos): Use shift_bytes_in_array_left instead of
shift_bytes_in_array.
(verify_shift_bytes_in_array): Rename to ...
(verify_shift_bytes_in_array_left): ... this. Use
shift_bytes_in_array_left instead of shift_bytes_in_array.
(store_merging_c_tests): Call verify_shift_bytes_in_array_left
instead of verify_shift_bytes_in_array.
* tree-ssa-sccvn.c (vn_reference_lookup_3): For native_encode_expr
/ native_interpret_expr where the store covers all needed bits,
punt on PDP-endian, otherwise allow all involved offsets and sizes
not to be byte-aligned.
PR target/93673 PR target/93673
* config/i386/sse.md (k<code><mode>): Drop mode from last operand and * config/i386/sse.md (k<code><mode>): Drop mode from last operand and
use const_0_to_255_operand predicate instead of immediate_operand. use const_0_to_255_operand predicate instead of immediate_operand.
......
...@@ -8354,6 +8354,70 @@ can_native_interpret_type_p (tree type) ...@@ -8354,6 +8354,70 @@ can_native_interpret_type_p (tree type)
} }
} }
/* Routines for manipulation of native_encode_expr encoded data if the encoded
or extracted constant positions and/or sizes aren't byte aligned. */
/* Shift left the bytes in PTR of SZ elements by AMNT bits, carrying over the
bits between adjacent elements. AMNT should be within
[0, BITS_PER_UNIT).
Example, AMNT = 2:
00011111|11100000 << 2 = 01111111|10000000
PTR[1] | PTR[0] PTR[1] | PTR[0]. */
void
shift_bytes_in_array_left (unsigned char *ptr, unsigned int sz,
unsigned int amnt)
{
if (amnt == 0)
return;
unsigned char carry_over = 0U;
unsigned char carry_mask = (~0U) << (unsigned char) (BITS_PER_UNIT - amnt);
unsigned char clear_mask = (~0U) << amnt;
for (unsigned int i = 0; i < sz; i++)
{
unsigned prev_carry_over = carry_over;
carry_over = (ptr[i] & carry_mask) >> (BITS_PER_UNIT - amnt);
ptr[i] <<= amnt;
if (i != 0)
{
ptr[i] &= clear_mask;
ptr[i] |= prev_carry_over;
}
}
}
/* Like shift_bytes_in_array_left but for big-endian.
Shift right the bytes in PTR of SZ elements by AMNT bits, carrying over the
bits between adjacent elements. AMNT should be within
[0, BITS_PER_UNIT).
Example, AMNT = 2:
00011111|11100000 >> 2 = 00000111|11111000
PTR[0] | PTR[1] PTR[0] | PTR[1]. */
void
shift_bytes_in_array_right (unsigned char *ptr, unsigned int sz,
unsigned int amnt)
{
if (amnt == 0)
return;
unsigned char carry_over = 0U;
unsigned char carry_mask = ~(~0U << amnt);
for (unsigned int i = 0; i < sz; i++)
{
unsigned prev_carry_over = carry_over;
carry_over = ptr[i] & carry_mask;
carry_over <<= (unsigned char) BITS_PER_UNIT - amnt;
ptr[i] >>= amnt;
ptr[i] |= prev_carry_over;
}
}
/* Try to view-convert VECTOR_CST EXPR to VECTOR_TYPE TYPE by operating /* Try to view-convert VECTOR_CST EXPR to VECTOR_TYPE TYPE by operating
directly on the VECTOR_CST encoding, in a way that works for variable- directly on the VECTOR_CST encoding, in a way that works for variable-
length vectors. Return the resulting VECTOR_CST on success or null length vectors. Return the resulting VECTOR_CST on success or null
......
...@@ -30,6 +30,10 @@ extern int native_encode_initializer (tree, unsigned char *, int, ...@@ -30,6 +30,10 @@ extern int native_encode_initializer (tree, unsigned char *, int,
int off = -1); int off = -1);
extern tree native_interpret_expr (tree, const unsigned char *, int); extern tree native_interpret_expr (tree, const unsigned char *, int);
extern bool can_native_interpret_type_p (tree); extern bool can_native_interpret_type_p (tree);
extern void shift_bytes_in_array_left (unsigned char *, unsigned int,
unsigned int);
extern void shift_bytes_in_array_right (unsigned char *, unsigned int,
unsigned int);
/* Fold constants as much as possible in an expression. /* Fold constants as much as possible in an expression.
Returns the simplified expression. Returns the simplified expression.
......
...@@ -1475,66 +1475,6 @@ dump_char_array (FILE *fd, unsigned char *ptr, unsigned int len) ...@@ -1475,66 +1475,6 @@ dump_char_array (FILE *fd, unsigned char *ptr, unsigned int len)
fprintf (fd, "\n"); fprintf (fd, "\n");
} }
/* Shift left the bytes in PTR of SZ elements by AMNT bits, carrying over the
bits between adjacent elements. AMNT should be within
[0, BITS_PER_UNIT).
Example, AMNT = 2:
00011111|11100000 << 2 = 01111111|10000000
PTR[1] | PTR[0] PTR[1] | PTR[0]. */
static void
shift_bytes_in_array (unsigned char *ptr, unsigned int sz, unsigned int amnt)
{
if (amnt == 0)
return;
unsigned char carry_over = 0U;
unsigned char carry_mask = (~0U) << (unsigned char) (BITS_PER_UNIT - amnt);
unsigned char clear_mask = (~0U) << amnt;
for (unsigned int i = 0; i < sz; i++)
{
unsigned prev_carry_over = carry_over;
carry_over = (ptr[i] & carry_mask) >> (BITS_PER_UNIT - amnt);
ptr[i] <<= amnt;
if (i != 0)
{
ptr[i] &= clear_mask;
ptr[i] |= prev_carry_over;
}
}
}
/* Like shift_bytes_in_array but for big-endian.
Shift right the bytes in PTR of SZ elements by AMNT bits, carrying over the
bits between adjacent elements. AMNT should be within
[0, BITS_PER_UNIT).
Example, AMNT = 2:
00011111|11100000 >> 2 = 00000111|11111000
PTR[0] | PTR[1] PTR[0] | PTR[1]. */
static void
shift_bytes_in_array_right (unsigned char *ptr, unsigned int sz,
unsigned int amnt)
{
if (amnt == 0)
return;
unsigned char carry_over = 0U;
unsigned char carry_mask = ~(~0U << amnt);
for (unsigned int i = 0; i < sz; i++)
{
unsigned prev_carry_over = carry_over;
carry_over = ptr[i] & carry_mask;
carry_over <<= (unsigned char) BITS_PER_UNIT - amnt;
ptr[i] >>= amnt;
ptr[i] |= prev_carry_over;
}
}
/* Clear out LEN bits starting from bit START in the byte array /* Clear out LEN bits starting from bit START in the byte array
PTR. This clears the bits to the *right* from START. PTR. This clears the bits to the *right* from START.
START must be within [0, BITS_PER_UNIT) and counts starting from START must be within [0, BITS_PER_UNIT) and counts starting from
...@@ -1793,7 +1733,7 @@ encode_tree_to_bitpos (tree expr, unsigned char *ptr, int bitlen, int bitpos, ...@@ -1793,7 +1733,7 @@ encode_tree_to_bitpos (tree expr, unsigned char *ptr, int bitlen, int bitpos,
/* Create the shifted version of EXPR. */ /* Create the shifted version of EXPR. */
if (!BYTES_BIG_ENDIAN) if (!BYTES_BIG_ENDIAN)
{ {
shift_bytes_in_array (tmpbuf, byte_size, shift_amnt); shift_bytes_in_array_left (tmpbuf, byte_size, shift_amnt);
if (shift_amnt == 0) if (shift_amnt == 0)
byte_size--; byte_size--;
} }
...@@ -5092,11 +5032,11 @@ verify_array_eq (unsigned char *x, unsigned char *y, unsigned int n) ...@@ -5092,11 +5032,11 @@ verify_array_eq (unsigned char *x, unsigned char *y, unsigned int n)
} }
} }
/* Test shift_bytes_in_array and that it carries bits across between /* Test shift_bytes_in_array_left and that it carries bits across between
bytes correctly. */ bytes correctly. */
static void static void
verify_shift_bytes_in_array (void) verify_shift_bytes_in_array_left (void)
{ {
/* byte 1 | byte 0 /* byte 1 | byte 0
00011111 | 11100000. */ 00011111 | 11100000. */
...@@ -5105,13 +5045,13 @@ verify_shift_bytes_in_array (void) ...@@ -5105,13 +5045,13 @@ verify_shift_bytes_in_array (void)
memcpy (in, orig, sizeof orig); memcpy (in, orig, sizeof orig);
unsigned char expected[2] = { 0x80, 0x7f }; unsigned char expected[2] = { 0x80, 0x7f };
shift_bytes_in_array (in, sizeof (in), 2); shift_bytes_in_array_left (in, sizeof (in), 2);
verify_array_eq (in, expected, sizeof (in)); verify_array_eq (in, expected, sizeof (in));
memcpy (in, orig, sizeof orig); memcpy (in, orig, sizeof orig);
memcpy (expected, orig, sizeof orig); memcpy (expected, orig, sizeof orig);
/* Check that shifting by zero doesn't change anything. */ /* Check that shifting by zero doesn't change anything. */
shift_bytes_in_array (in, sizeof (in), 0); shift_bytes_in_array_left (in, sizeof (in), 0);
verify_array_eq (in, expected, sizeof (in)); verify_array_eq (in, expected, sizeof (in));
} }
...@@ -5196,7 +5136,7 @@ verify_clear_bit_region_be (void) ...@@ -5196,7 +5136,7 @@ verify_clear_bit_region_be (void)
void void
store_merging_c_tests (void) store_merging_c_tests (void)
{ {
verify_shift_bytes_in_array (); verify_shift_bytes_in_array_left ();
verify_shift_bytes_in_array_right (); verify_shift_bytes_in_array_right ();
verify_clear_bit_region (); verify_clear_bit_region ();
verify_clear_bit_region_be (); verify_clear_bit_region_be ();
......
2020-02-13 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/93582
* gcc.dg/tree-ssa/pr93582-1.c: New test.
* gcc.dg/tree-ssa/pr93582-2.c: New test.
* gcc.dg/tree-ssa/pr93582-3.c: New test.
2020-02-13 Richard Biener <rguenther@suse.de> 2020-02-13 Richard Biener <rguenther@suse.de>
PR testsuite/93717 PR testsuite/93717
......
/* PR tree-optimization/93582 */
/* { dg-do compile { target int32 } } */
/* { dg-options "-O2 -fdump-tree-fre1" } */
/* { dg-final { scan-tree-dump "return 1;" "fre1" } } */
union U {
struct S { int a : 1, b : 4, c : 27; } s;
struct T { int d : 2; int e : 2; int f : 28; } t;
};
int
foo (void)
{
union U u;
u.s.b = 10;
return u.t.e;
}
/* PR tree-optimization/93582 */
/* { dg-do compile { target int32 } } */
/* { dg-options "-O2 -fdump-tree-fre1" } */
/* { dg-final { scan-tree-dump "return 593;" "fre1" } } */
union U {
struct S { int a : 1, b : 14, c : 17; } s;
struct T { int d : 2; int e : 12; int f : 18; } t;
};
int
foo (void)
{
union U u;
u.s.b = -7005;
return u.t.e;
}
/* PR tree-optimization/93582 */
/* { dg-do compile { target int32 } } */
/* { dg-options "-O2 -fdump-tree-fre1" } */
/* { dg-final { scan-tree-dump "return 1;" "fre1" { target be } } } */
/* { dg-final { scan-tree-dump "return 2;" "fre1" { target le } } } */
union U {
struct S { int a : 1, b : 14, c : 17; } s;
struct T { int d : 10; int e : 4; int f : 18; } t;
};
int
foo (void)
{
union U u;
u.s.b = -7005;
return u.t.e;
}
...@@ -2586,13 +2586,13 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, ...@@ -2586,13 +2586,13 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_,
&& is_gimple_reg_type (vr->type) && is_gimple_reg_type (vr->type)
&& !contains_storage_order_barrier_p (vr->operands) && !contains_storage_order_barrier_p (vr->operands)
&& gimple_assign_single_p (def_stmt) && gimple_assign_single_p (def_stmt)
&& CHAR_BIT == 8 && BITS_PER_UNIT == 8 && CHAR_BIT == 8
&& BITS_PER_UNIT == 8
&& BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN
/* native_encode and native_decode operate on arrays of bytes /* native_encode and native_decode operate on arrays of bytes
and so fundamentally need a compile-time size and offset. */ and so fundamentally need a compile-time size and offset. */
&& maxsize.is_constant (&maxsizei) && maxsize.is_constant (&maxsizei)
&& maxsizei % BITS_PER_UNIT == 0
&& offset.is_constant (&offseti) && offset.is_constant (&offseti)
&& offseti % BITS_PER_UNIT == 0
&& (is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)) && (is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt))
|| (TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME || (TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME
&& is_gimple_min_invariant (SSA_VAL (gimple_assign_rhs1 (def_stmt)))))) && is_gimple_min_invariant (SSA_VAL (gimple_assign_rhs1 (def_stmt))))))
...@@ -2617,8 +2617,6 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, ...@@ -2617,8 +2617,6 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_,
&& !reverse && !reverse
&& !storage_order_barrier_p (lhs) && !storage_order_barrier_p (lhs)
&& known_eq (maxsize2, size2) && known_eq (maxsize2, size2)
&& multiple_p (size2, BITS_PER_UNIT)
&& multiple_p (offset2, BITS_PER_UNIT)
&& adjust_offsets_for_equal_base_address (base, &offset, && adjust_offsets_for_equal_base_address (base, &offset,
base2, &offset2) base2, &offset2)
&& offset.is_constant (&offseti) && offset.is_constant (&offseti)
...@@ -2629,37 +2627,80 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, ...@@ -2629,37 +2627,80 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_,
&& known_subrange_p (offseti, maxsizei, offset2, size2)) && known_subrange_p (offseti, maxsizei, offset2, size2))
{ {
/* We support up to 512-bit values (for V8DFmode). */ /* We support up to 512-bit values (for V8DFmode). */
unsigned char buffer[64]; unsigned char buffer[65];
int len; int len;
tree rhs = gimple_assign_rhs1 (def_stmt); tree rhs = gimple_assign_rhs1 (def_stmt);
if (TREE_CODE (rhs) == SSA_NAME) if (TREE_CODE (rhs) == SSA_NAME)
rhs = SSA_VAL (rhs); rhs = SSA_VAL (rhs);
unsigned pad = 0;
if (BYTES_BIG_ENDIAN
&& is_a <scalar_mode> (TYPE_MODE (TREE_TYPE (rhs))))
{
/* On big-endian the padding is at the 'front' so
just skip the initial bytes. */
fixed_size_mode mode
= as_a <fixed_size_mode> (TYPE_MODE (TREE_TYPE (rhs)));
pad = GET_MODE_SIZE (mode) - size2i / BITS_PER_UNIT;
}
len = native_encode_expr (rhs, len = native_encode_expr (rhs,
buffer, sizeof (buffer), buffer, sizeof (buffer) - 1,
((offseti - offset2i) / BITS_PER_UNIT (offseti - offset2i) / BITS_PER_UNIT);
+ pad));
if (len > 0 && len * BITS_PER_UNIT >= maxsizei) if (len > 0 && len * BITS_PER_UNIT >= maxsizei)
{ {
tree type = vr->type; tree type = vr->type;
unsigned char *buf = buffer;
unsigned int amnt = 0;
/* Make sure to interpret in a type that has a range /* Make sure to interpret in a type that has a range
covering the whole access size. */ covering the whole access size. */
if (INTEGRAL_TYPE_P (vr->type) if (INTEGRAL_TYPE_P (vr->type)
&& maxsizei != TYPE_PRECISION (vr->type)) && maxsizei != TYPE_PRECISION (vr->type))
type = build_nonstandard_integer_type (maxsizei, type = build_nonstandard_integer_type (maxsizei,
TYPE_UNSIGNED (type)); TYPE_UNSIGNED (type));
tree val = native_interpret_expr (type, buffer, if (BYTES_BIG_ENDIAN)
maxsizei / BITS_PER_UNIT); {
/* For big-endian native_encode_expr stored the rhs
such that the LSB of it is the LSB of buffer[len - 1].
That bit is stored into memory at position
offset2 + size2 - 1, i.e. in byte
base + (offset2 + size2 - 1) / BITS_PER_UNIT.
E.g. for offset2 1 and size2 14, rhs -1 and memory
previously cleared that is:
0 1
01111111|11111110
Now, if we want to extract offset 2 and size 12 from
it using native_interpret_expr (which actually works
for integral bitfield types in terms of byte size of
the mode), the native_encode_expr stored the value
into buffer as
XX111111|11111111
and returned len 2 (the X bits are outside of
precision).
Let sz be maxsize / BITS_PER_UNIT if not extracting
a bitfield, and GET_MODE_SIZE otherwise.
We need to align the LSB of the value we want to
extract as the LSB of buf[sz - 1].
The LSB from memory we need to read is at position
offset + maxsize - 1. */
HOST_WIDE_INT sz = maxsizei / BITS_PER_UNIT;
if (INTEGRAL_TYPE_P (type))
sz = GET_MODE_SIZE (SCALAR_INT_TYPE_MODE (type));
amnt = ((unsigned HOST_WIDE_INT) offset2i + size2i
- offseti - maxsizei) % BITS_PER_UNIT;
if (amnt)
shift_bytes_in_array_right (buffer, len, amnt);
amnt = ((unsigned HOST_WIDE_INT) offset2i + size2i
- offseti - maxsizei - amnt) / BITS_PER_UNIT;
if ((unsigned HOST_WIDE_INT) sz + amnt > (unsigned) len)
len = 0;
else
{
buf = buffer + len - sz - amnt;
len -= (buf - buffer);
}
}
else
{
amnt = ((unsigned HOST_WIDE_INT) offset2i
- offseti) % BITS_PER_UNIT;
if (amnt)
{
buffer[len] = 0;
shift_bytes_in_array_left (buffer, len + 1, amnt);
buf = buffer + 1;
}
}
tree val = native_interpret_expr (type, buf, len);
/* If we chop off bits because the types precision doesn't /* If we chop off bits because the types precision doesn't
match the memory access size this is ok when optimizing match the memory access size this is ok when optimizing
reads but not when called from the DSE code during reads but not when called from the DSE code during
...@@ -2677,7 +2718,12 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_, ...@@ -2677,7 +2718,12 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_,
return data->finish (get_alias_set (lhs), val); return data->finish (get_alias_set (lhs), val);
} }
} }
else if (ranges_known_overlap_p (offseti, maxsizei, offset2i, size2i)) else if (ranges_known_overlap_p (offseti, maxsizei, offset2i,
size2i)
&& maxsizei % BITS_PER_UNIT == 0
&& offseti % BITS_PER_UNIT == 0
&& size2i % BITS_PER_UNIT == 0
&& offset2i % BITS_PER_UNIT == 0)
{ {
pd_data pd; pd_data pd;
tree rhs = gimple_assign_rhs1 (def_stmt); tree rhs = gimple_assign_rhs1 (def_stmt);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment