Commit a6fbd154 by Jakub Jelinek Committed by Jakub Jelinek

re PR tree-optimization/78821 (GCC7: Copying whole 32 bits structure field by…

re PR tree-optimization/78821 (GCC7: Copying whole 32 bits structure field by field not optimised into copying whole 32 bits at once)

	PR tree-optimization/78821
	* gimple-ssa-store-merging.c (compatible_load_p): Don't require
	that bit_not_p is the same.
	(imm_store_chain_info::coalesce_immediate_stores): Likewise.
	(split_group): Count precisely bit_not_p bits in each statement.
	(invert_op): New function.
	(imm_store_chain_info::output_merged_store): Use invert_op to
	emit BIT_XOR_EXPR with a xor_mask instead of BIT_NOT_EXPR if some
	but not all orig_stores have BIT_NOT_EXPR in the corresponding spots.

	* gcc.dg/store_merging_15.c: New test.

From-SVN: r254679
parent a1b5dd18
2017-11-13 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/78821
* gimple-ssa-store-merging.c (compatible_load_p): Don't require
that bit_not_p is the same.
(imm_store_chain_info::coalesce_immediate_stores): Likewise.
(split_group): Count precisely bit_not_p bits in each statement.
(invert_op): New function.
(imm_store_chain_info::output_merged_store): Use invert_op to
emit BIT_XOR_EXPR with a xor_mask instead of BIT_NOT_EXPR if some
but not all orig_stores have BIT_NOT_EXPR in the corresponding spots.
2017-11-13 Martin Liska <mliska@suse.cz> 2017-11-13 Martin Liska <mliska@suse.cz>
* gcov.c (struct coverage_info): Remove typedef of coverage_t. * gcov.c (struct coverage_info): Remove typedef of coverage_t.
...@@ -1039,7 +1039,6 @@ compatible_load_p (merged_store_group *merged_store, ...@@ -1039,7 +1039,6 @@ compatible_load_p (merged_store_group *merged_store,
{ {
store_immediate_info *infof = merged_store->stores[0]; store_immediate_info *infof = merged_store->stores[0];
if (!info->ops[idx].base_addr if (!info->ops[idx].base_addr
|| info->ops[idx].bit_not_p != infof->ops[idx].bit_not_p
|| (info->ops[idx].bitpos - infof->ops[idx].bitpos || (info->ops[idx].bitpos - infof->ops[idx].bitpos
!= info->bitpos - infof->bitpos) != info->bitpos - infof->bitpos)
|| !operand_equal_p (info->ops[idx].base_addr, || !operand_equal_p (info->ops[idx].base_addr,
...@@ -1179,8 +1178,7 @@ imm_store_chain_info::coalesce_immediate_stores () ...@@ -1179,8 +1178,7 @@ imm_store_chain_info::coalesce_immediate_stores ()
Merge it into the current store group. There can be gaps in between Merge it into the current store group. There can be gaps in between
the stores, but there can't be gaps in between bitregions. */ the stores, but there can't be gaps in between bitregions. */
else if (info->bitregion_start <= merged_store->bitregion_end else if (info->bitregion_start <= merged_store->bitregion_end
&& info->rhs_code == merged_store->stores[0]->rhs_code && info->rhs_code == merged_store->stores[0]->rhs_code)
&& info->bit_not_p == merged_store->stores[0]->bit_not_p)
{ {
store_immediate_info *infof = merged_store->stores[0]; store_immediate_info *infof = merged_store->stores[0];
...@@ -1501,16 +1499,14 @@ split_group (merged_store_group *group, bool allow_unaligned_store, ...@@ -1501,16 +1499,14 @@ split_group (merged_store_group *group, bool allow_unaligned_store,
total_orig[0] = 1; /* The orig store. */ total_orig[0] = 1; /* The orig store. */
info = group->stores[0]; info = group->stores[0];
if (info->ops[0].base_addr) if (info->ops[0].base_addr)
total_orig[0] += 1 + info->ops[0].bit_not_p; total_orig[0]++;
if (info->ops[1].base_addr) if (info->ops[1].base_addr)
total_orig[0] += 1 + info->ops[1].bit_not_p; total_orig[0]++;
switch (info->rhs_code) switch (info->rhs_code)
{ {
case BIT_AND_EXPR: case BIT_AND_EXPR:
case BIT_IOR_EXPR: case BIT_IOR_EXPR:
case BIT_XOR_EXPR: case BIT_XOR_EXPR:
if (info->bit_not_p)
total_orig[0]++; /* The orig BIT_NOT_EXPR stmt. */
total_orig[0]++; /* The orig BIT_*_EXPR stmt. */ total_orig[0]++; /* The orig BIT_*_EXPR stmt. */
break; break;
default: default:
...@@ -1519,7 +1515,12 @@ split_group (merged_store_group *group, bool allow_unaligned_store, ...@@ -1519,7 +1515,12 @@ split_group (merged_store_group *group, bool allow_unaligned_store,
total_orig[0] *= group->stores.length (); total_orig[0] *= group->stores.length ();
FOR_EACH_VEC_ELT (group->stores, i, info) FOR_EACH_VEC_ELT (group->stores, i, info)
total_new[0] += count_multiple_uses (info); {
total_new[0] += count_multiple_uses (info);
total_orig[0] += (info->bit_not_p
+ info->ops[0].bit_not_p
+ info->ops[1].bit_not_p);
}
} }
if (!allow_unaligned_load) if (!allow_unaligned_load)
...@@ -1659,13 +1660,13 @@ split_group (merged_store_group *group, bool allow_unaligned_store, ...@@ -1659,13 +1660,13 @@ split_group (merged_store_group *group, bool allow_unaligned_store,
if (total_orig) if (total_orig)
{ {
unsigned int i;
struct split_store *store;
/* If we are reusing some original stores and any of the /* If we are reusing some original stores and any of the
original SSA_NAMEs had multiple uses, we need to subtract original SSA_NAMEs had multiple uses, we need to subtract
those now before we add the new ones. */ those now before we add the new ones. */
if (total_new[0] && any_orig) if (total_new[0] && any_orig)
{ {
unsigned int i;
struct split_store *store;
FOR_EACH_VEC_ELT (*split_stores, i, store) FOR_EACH_VEC_ELT (*split_stores, i, store)
if (store->orig) if (store->orig)
total_new[0] -= count_multiple_uses (store->orig_stores[0]); total_new[0] -= count_multiple_uses (store->orig_stores[0]);
...@@ -1673,26 +1674,105 @@ split_group (merged_store_group *group, bool allow_unaligned_store, ...@@ -1673,26 +1674,105 @@ split_group (merged_store_group *group, bool allow_unaligned_store,
total_new[0] += ret; /* The new store. */ total_new[0] += ret; /* The new store. */
store_immediate_info *info = group->stores[0]; store_immediate_info *info = group->stores[0];
if (info->ops[0].base_addr) if (info->ops[0].base_addr)
total_new[0] += ret * (1 + info->ops[0].bit_not_p); total_new[0] += ret;
if (info->ops[1].base_addr) if (info->ops[1].base_addr)
total_new[0] += ret * (1 + info->ops[1].bit_not_p); total_new[0] += ret;
switch (info->rhs_code) switch (info->rhs_code)
{ {
case BIT_AND_EXPR: case BIT_AND_EXPR:
case BIT_IOR_EXPR: case BIT_IOR_EXPR:
case BIT_XOR_EXPR: case BIT_XOR_EXPR:
if (info->bit_not_p)
total_new[0] += ret; /* The new BIT_NOT_EXPR stmt. */
total_new[0] += ret; /* The new BIT_*_EXPR stmt. */ total_new[0] += ret; /* The new BIT_*_EXPR stmt. */
break; break;
default: default:
break; break;
} }
FOR_EACH_VEC_ELT (*split_stores, i, store)
{
unsigned int j;
bool bit_not_p[3] = { false, false, false };
/* If all orig_stores have certain bit_not_p set, then
we'd use a BIT_NOT_EXPR stmt and need to account for it.
If some orig_stores have certain bit_not_p set, then
we'd use a BIT_XOR_EXPR with a mask and need to account for
it. */
FOR_EACH_VEC_ELT (store->orig_stores, j, info)
{
if (info->ops[0].bit_not_p)
bit_not_p[0] = true;
if (info->ops[1].bit_not_p)
bit_not_p[1] = true;
if (info->bit_not_p)
bit_not_p[2] = true;
}
total_new[0] += bit_not_p[0] + bit_not_p[1] + bit_not_p[2];
}
} }
return ret; return ret;
} }
/* Return the operation through which the operand IDX (if < 2) or
result (IDX == 2) should be inverted. If NOP_EXPR, no inversion
is done, if BIT_NOT_EXPR, all bits are inverted, if BIT_XOR_EXPR,
the bits should be xored with mask. */
static enum tree_code
invert_op (split_store *split_store, int idx, tree int_type, tree &mask)
{
unsigned int i;
store_immediate_info *info;
unsigned int cnt = 0;
FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
{
bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
if (bit_not_p)
++cnt;
}
mask = NULL_TREE;
if (cnt == 0)
return NOP_EXPR;
if (cnt == split_store->orig_stores.length ())
return BIT_NOT_EXPR;
unsigned HOST_WIDE_INT try_bitpos = split_store->bytepos * BITS_PER_UNIT;
unsigned buf_size = split_store->size / BITS_PER_UNIT;
unsigned char *buf
= XALLOCAVEC (unsigned char, buf_size);
memset (buf, ~0U, buf_size);
FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
{
bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
if (!bit_not_p)
continue;
/* Clear regions with bit_not_p and invert afterwards, rather than
clear regions with !bit_not_p, so that gaps in between stores aren't
set in the mask. */
unsigned HOST_WIDE_INT bitsize = info->bitsize;
unsigned int pos_in_buffer = 0;
if (info->bitpos < try_bitpos)
{
gcc_assert (info->bitpos + bitsize > try_bitpos);
bitsize -= (try_bitpos - info->bitpos);
}
else
pos_in_buffer = info->bitpos - try_bitpos;
if (pos_in_buffer + bitsize > split_store->size)
bitsize = split_store->size - pos_in_buffer;
unsigned char *p = buf + (pos_in_buffer / BITS_PER_UNIT);
if (BYTES_BIG_ENDIAN)
clear_bit_region_be (p, (BITS_PER_UNIT - 1
- (pos_in_buffer % BITS_PER_UNIT)), bitsize);
else
clear_bit_region (p, pos_in_buffer % BITS_PER_UNIT, bitsize);
}
for (unsigned int i = 0; i < buf_size; ++i)
buf[i] = ~buf[i];
mask = native_interpret_expr (int_type, buf, buf_size);
return BIT_XOR_EXPR;
}
/* Given a merged store group GROUP output the widened version of it. /* Given a merged store group GROUP output the widened version of it.
The store chain is against the base object BASE. The store chain is against the base object BASE.
Try store sizes of at most MAX_STORE_BITSIZE bits wide and don't output Try store sizes of at most MAX_STORE_BITSIZE bits wide and don't output
...@@ -1899,10 +1979,13 @@ imm_store_chain_info::output_merged_store (merged_store_group *group) ...@@ -1899,10 +1979,13 @@ imm_store_chain_info::output_merged_store (merged_store_group *group)
gimple_seq_add_stmt_without_update (&seq, stmt); gimple_seq_add_stmt_without_update (&seq, stmt);
} }
ops[j] = gimple_assign_lhs (stmt); ops[j] = gimple_assign_lhs (stmt);
if (op.bit_not_p) tree xor_mask;
enum tree_code inv_op
= invert_op (split_store, j, int_type, xor_mask);
if (inv_op != NOP_EXPR)
{ {
stmt = gimple_build_assign (make_ssa_name (int_type), stmt = gimple_build_assign (make_ssa_name (int_type),
BIT_NOT_EXPR, ops[j]); inv_op, ops[j], xor_mask);
gimple_set_location (stmt, load_loc); gimple_set_location (stmt, load_loc);
ops[j] = gimple_assign_lhs (stmt); ops[j] = gimple_assign_lhs (stmt);
...@@ -1952,10 +2035,13 @@ imm_store_chain_info::output_merged_store (merged_store_group *group) ...@@ -1952,10 +2035,13 @@ imm_store_chain_info::output_merged_store (merged_store_group *group)
else else
gimple_seq_add_stmt_without_update (&seq, stmt); gimple_seq_add_stmt_without_update (&seq, stmt);
src = gimple_assign_lhs (stmt); src = gimple_assign_lhs (stmt);
if (split_store->orig_stores[0]->bit_not_p) tree xor_mask;
enum tree_code inv_op;
inv_op = invert_op (split_store, 2, int_type, xor_mask);
if (inv_op != NOP_EXPR)
{ {
stmt = gimple_build_assign (make_ssa_name (int_type), stmt = gimple_build_assign (make_ssa_name (int_type),
BIT_NOT_EXPR, src); inv_op, src, xor_mask);
gimple_set_location (stmt, bit_loc); gimple_set_location (stmt, bit_loc);
if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0])) if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
gimple_seq_add_stmt_without_update (&load_seq[0], stmt); gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
......
2017-11-13 Jakub Jelinek <jakub@redhat.com> 2017-11-13 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/78821
* gcc.dg/store_merging_15.c: New test.
PR tree-optimization/82954 PR tree-optimization/82954
* gcc.c-torture/execute/pr82954.c: New test. * gcc.c-torture/execute/pr82954.c: New test.
......
/* { dg-do compile } */
/* { dg-require-effective-target store_merge } */
/* { dg-options "-O2 -fdump-tree-store-merging" } */
struct S { unsigned char a, b; unsigned short c; unsigned char d, e, f, g; unsigned long long h; };
__attribute__((noipa)) void
f1 (struct S *__restrict p, struct S *__restrict q)
{
p->a = ~q->a;
p->b = q->b;
p->c = ~q->c;
p->d = ~q->d;
p->e = q->e;
p->f = ~q->f;
p->g = ~q->g;
}
__attribute__((noipa)) void
f2 (struct S *__restrict p, struct S *__restrict q)
{
p->a = ~(unsigned char) (p->a & q->a);
p->b = ((unsigned char) ~p->b) & q->b;
p->c = p->c & (unsigned short) ~q->c;
p->d = p->d & q->d;
p->e = p->e & (unsigned char) ~q->e;
p->f = p->f & (unsigned char) ~q->f;
p->g = ~(unsigned char) (p->g & q->g);
}
struct S s = { 20, 21, 22, 23, 24, 25, 26, 27 };
struct S u = { 28, 29, 30, 31, 32, 33, 34, 35 };
struct S v = { 36, 37, 38, 39, 40, 41, 42, 43 };
int
main ()
{
asm volatile ("" : : : "memory");
f1 (&s, &u);
asm volatile ("" : : : "memory");
if (s.a != (unsigned char) ~28 || s.b != 29
|| s.c != (unsigned short) ~30 || s.d != (unsigned char) ~31
|| s.e != 32 || s.f != (unsigned char) ~33 || s.g != (unsigned char) ~34
|| s.h != 27)
__builtin_abort ();
f2 (&u, &v);
asm volatile ("" : : : "memory");
if (u.a != (unsigned char) ~(28 & 36) || u.b != (((unsigned char) ~29) & 37)
|| u.c != (30 & (unsigned short) ~38) || u.d != (31 & 39)
|| u.e != (32 & (unsigned char) ~40) || u.f != (33 & (unsigned char) ~41)
|| u.g != (unsigned char) ~(34 & 42) || u.h != 35)
__builtin_abort ();
return 0;
}
/* { dg-final { scan-tree-dump-times "Merging successful" 2 "store-merging" } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment