Commit 4b84d9b8 by Jakub Jelinek Committed by Jakub Jelinek

re PR tree-optimization/78821 (GCC7: Copying whole 32 bits structure field by…

re PR tree-optimization/78821 (GCC7: Copying whole 32 bits structure field by field not optimised into copying whole 32 bits at once)

	PR tree-optimization/78821
	* gimple-ssa-store-merging.c (find_bswap_or_nop_load): Give up
	if base is TARGET_MEM_REF.  If base is not MEM_REF, set base_addr
	to the address of the base rather than the base itself.
	(find_bswap_or_nop_1): Just use pointer comparison for vuse check.
	(find_bswap_or_nop_finalize): New function.
	(find_bswap_or_nop): Use it.
	(bswap_replace): Return a tree rather than bool, change first
	argument from gimple * to gimple_stmt_iterator, allow inserting
	into an empty sequence, allow ins_stmt to be NULL - then emit
	all stmts into gsi.  Fix up MEM_REF address gimplification.
	(pass_optimize_bswap::execute): Adjust bswap_replace caller.
	(struct store_immediate_info): Add N and INS_STMT non-static
	data members.
	(store_immediate_info::store_immediate_info): Initialize them
	from newly added ctor args.
	(merged_store_group::apply_stores): Formatting fixes.  Sort by
	bitpos at the end.
	(stmts_may_clobber_ref_p): For stores call also
	refs_anti_dependent_p.
	(gather_bswap_load_refs): New function.
	(imm_store_chain_info::try_coalesce_bswap): New method.
	(imm_store_chain_info::coalesce_immediate_stores): Use it.
	(split_group): Handle LROTATE_EXPR and NOP_EXPR rhs_code specially.
	(imm_store_chain_info::output_merged_store): Fail if number of
	new estimated stmts is bigger or equal than old.  Handle LROTATE_EXPR
	and NOP_EXPR rhs_code.
	(pass_store_merging::process_store): Compute n and ins_stmt, if
	ins_stmt is non-NULL and the store rhs is otherwise invalid, use
	LROTATE_EXPR rhs_code.  Pass n and ins_stmt to store_immediate_info
	ctor.
	(pass_store_merging::execute): Calculate dominators.

	* gcc.dg/store_merging_16.c: New test.

From-SVN: r254948
parent dffec8eb
2017-11-20 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/78821
* gimple-ssa-store-merging.c (find_bswap_or_nop_load): Give up
if base is TARGET_MEM_REF. If base is not MEM_REF, set base_addr
to the address of the base rather than the base itself.
(find_bswap_or_nop_1): Just use pointer comparison for vuse check.
(find_bswap_or_nop_finalize): New function.
(find_bswap_or_nop): Use it.
(bswap_replace): Return a tree rather than bool, change first
argument from gimple * to gimple_stmt_iterator, allow inserting
into an empty sequence, allow ins_stmt to be NULL - then emit
all stmts into gsi. Fix up MEM_REF address gimplification.
(pass_optimize_bswap::execute): Adjust bswap_replace caller.
(struct store_immediate_info): Add N and INS_STMT non-static
data members.
(store_immediate_info::store_immediate_info): Initialize them
from newly added ctor args.
(merged_store_group::apply_stores): Formatting fixes. Sort by
bitpos at the end.
(stmts_may_clobber_ref_p): For stores call also
refs_anti_dependent_p.
(gather_bswap_load_refs): New function.
(imm_store_chain_info::try_coalesce_bswap): New method.
(imm_store_chain_info::coalesce_immediate_stores): Use it.
(split_group): Handle LROTATE_EXPR and NOP_EXPR rhs_code specially.
(imm_store_chain_info::output_merged_store): Fail if number of
new estimated stmts is bigger or equal than old. Handle LROTATE_EXPR
and NOP_EXPR rhs_code.
(pass_store_merging::process_store): Compute n and ins_stmt, if
ins_stmt is non-NULL and the store rhs is otherwise invalid, use
LROTATE_EXPR rhs_code. Pass n and ins_stmt to store_immediate_info
ctor.
(pass_store_merging::execute): Calculate dominators.
* tree-ssa-math-opts.c (nop_stats, bswap_stats, struct symbolic_number,
BITS_PER_MARKER, MARKER_MASK, MARKER_BYTE_UNKNOWN, HEAD_MARKER, CMPNOP,
CMPXCHG, do_shift_rotate, verify_symbolic_number_p,
......@@ -369,7 +369,10 @@ find_bswap_or_nop_load (gimple *stmt, tree ref, struct symbolic_number *n)
base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
&unsignedp, &reversep, &volatilep);
if (TREE_CODE (base_addr) == MEM_REF)
if (TREE_CODE (base_addr) == TARGET_MEM_REF)
/* Do not rewrite TARGET_MEM_REF. */
return false;
else if (TREE_CODE (base_addr) == MEM_REF)
{
offset_int bit_offset = 0;
tree off = TREE_OPERAND (base_addr, 1);
......@@ -401,6 +404,8 @@ find_bswap_or_nop_load (gimple *stmt, tree ref, struct symbolic_number *n)
bitpos += bit_offset.to_shwi ();
}
else
base_addr = build_fold_addr_expr (base_addr);
if (bitpos % BITS_PER_UNIT)
return false;
......@@ -743,8 +748,7 @@ find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit)
if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
return NULL;
if (!n1.vuse != !n2.vuse
|| (n1.vuse && !operand_equal_p (n1.vuse, n2.vuse, 0)))
if (n1.vuse != n2.vuse)
return NULL;
source_stmt
......@@ -765,39 +769,21 @@ find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit)
return NULL;
}
/* Check if STMT completes a bswap implementation or a read in a given
endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
accordingly. It also sets N to represent the kind of operations
performed: size of the resulting expression and whether it works on
a memory source, and if so alias-set and vuse. At last, the
function returns a stmt whose rhs's first tree is the source
expression. */
/* Helper for find_bswap_or_nop and try_coalesce_bswap to compute
*CMPXCHG, *CMPNOP and adjust *N. */
gimple *
find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap)
void
find_bswap_or_nop_finalize (struct symbolic_number *n, uint64_t *cmpxchg,
uint64_t *cmpnop)
{
unsigned rsize;
uint64_t tmpn, mask;
/* The number which the find_bswap_or_nop_1 result should match in order
to have a full byte swap. The number is shifted to the right
according to the size of the symbolic number before using it. */
uint64_t cmpxchg = CMPXCHG;
uint64_t cmpnop = CMPNOP;
gimple *ins_stmt;
int limit;
/* The last parameter determines the depth search limit. It usually
correlates directly to the number n of bytes to be touched. We
increase that number by log2(n) + 1 here in order to also
cover signed -> unsigned conversions of the src operand as can be seen
in libgcc, and for initial shift/and operation of the src operand. */
limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit);
ins_stmt = find_bswap_or_nop_1 (stmt, n, limit);
if (!ins_stmt)
return NULL;
/* The number which the find_bswap_or_nop_1 result should match in order
to have a full byte swap. The number is shifted to the right
according to the size of the symbolic number before using it. */
*cmpxchg = CMPXCHG;
*cmpnop = CMPNOP;
/* Find real size of result (highest non-zero byte). */
if (n->base_addr)
......@@ -810,8 +796,8 @@ find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap)
if (n->range < (int) sizeof (int64_t))
{
mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1;
cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
cmpnop &= mask;
*cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
*cmpnop &= mask;
}
/* Zero out the bits corresponding to unused bytes in the result of the
......@@ -821,18 +807,47 @@ find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap)
if (BYTES_BIG_ENDIAN)
{
mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
cmpxchg &= mask;
cmpnop >>= (n->range - rsize) * BITS_PER_MARKER;
*cmpxchg &= mask;
*cmpnop >>= (n->range - rsize) * BITS_PER_MARKER;
}
else
{
mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER;
cmpnop &= mask;
*cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER;
*cmpnop &= mask;
}
n->range = rsize;
}
n->range *= BITS_PER_UNIT;
}
/* Check if STMT completes a bswap implementation or a read in a given
endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
accordingly. It also sets N to represent the kind of operations
performed: size of the resulting expression and whether it works on
a memory source, and if so alias-set and vuse. At last, the
function returns a stmt whose rhs's first tree is the source
expression. */
gimple *
find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap)
{
/* The last parameter determines the depth search limit. It usually
correlates directly to the number n of bytes to be touched. We
increase that number by log2(n) + 1 here in order to also
cover signed -> unsigned conversions of the src operand as can be seen
in libgcc, and for initial shift/and operation of the src operand. */
int limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit);
gimple *ins_stmt = find_bswap_or_nop_1 (stmt, n, limit);
if (!ins_stmt)
return NULL;
uint64_t cmpxchg, cmpnop;
find_bswap_or_nop_finalize (n, &cmpxchg, &cmpnop);
/* A complete byte swap should make the symbolic number to start with
the largest digit in the highest order byte. Unchanged symbolic
number indicates a read with same endianness as target architecture. */
......@@ -847,7 +862,6 @@ find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap)
if (!n->base_addr && n->n == cmpnop && n->n_ops == 1)
return NULL;
n->range *= BITS_PER_UNIT;
return ins_stmt;
}
......@@ -882,68 +896,89 @@ public:
}; // class pass_optimize_bswap
/* Perform the bswap optimization: replace the expression computed in the rhs
of CUR_STMT by an equivalent bswap, load or load + bswap expression.
of gsi_stmt (GSI) (or if NULL add instead of replace) by an equivalent
bswap, load or load + bswap expression.
Which of these alternatives replace the rhs is given by N->base_addr (non
null if a load is needed) and BSWAP. The type, VUSE and set-alias of the
load to perform are also given in N while the builtin bswap invoke is given
in FNDEL. Finally, if a load is involved, SRC_STMT refers to one of the
load statements involved to construct the rhs in CUR_STMT and N->range gives
the size of the rhs expression for maintaining some statistics.
in FNDEL. Finally, if a load is involved, INS_STMT refers to one of the
load statements involved to construct the rhs in gsi_stmt (GSI) and
N->range gives the size of the rhs expression for maintaining some
statistics.
Note that if the replacement involve a load, CUR_STMT is moved just after
SRC_STMT to do the load with the same VUSE which can lead to CUR_STMT
changing of basic block. */
Note that if the replacement involve a load and if gsi_stmt (GSI) is
non-NULL, that stmt is moved just after INS_STMT to do the load with the
same VUSE which can lead to gsi_stmt (GSI) changing of basic block. */
bool
bswap_replace (gimple *cur_stmt, gimple *ins_stmt, tree fndecl,
tree
bswap_replace (gimple_stmt_iterator gsi, gimple *ins_stmt, tree fndecl,
tree bswap_type, tree load_type, struct symbolic_number *n,
bool bswap)
{
gimple_stmt_iterator gsi;
tree src, tmp, tgt;
tree src, tmp, tgt = NULL_TREE;
gimple *bswap_stmt;
gsi = gsi_for_stmt (cur_stmt);
gimple *cur_stmt = gsi_stmt (gsi);
src = n->src;
tgt = gimple_assign_lhs (cur_stmt);
if (cur_stmt)
tgt = gimple_assign_lhs (cur_stmt);
/* Need to load the value from memory first. */
if (n->base_addr)
{
gimple_stmt_iterator gsi_ins = gsi_for_stmt (ins_stmt);
gimple_stmt_iterator gsi_ins = gsi;
if (ins_stmt)
gsi_ins = gsi_for_stmt (ins_stmt);
tree addr_expr, addr_tmp, val_expr, val_tmp;
tree load_offset_ptr, aligned_load_type;
gimple *addr_stmt, *load_stmt;
unsigned align;
gimple *load_stmt;
unsigned align = get_object_alignment (src);
HOST_WIDE_INT load_offset = 0;
basic_block ins_bb, cur_bb;
ins_bb = gimple_bb (ins_stmt);
cur_bb = gimple_bb (cur_stmt);
if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb))
return false;
align = get_object_alignment (src);
/* Move cur_stmt just before one of the load of the original
to ensure it has the same VUSE. See PR61517 for what could
go wrong. */
if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt))
reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt));
gsi_move_before (&gsi, &gsi_ins);
gsi = gsi_for_stmt (cur_stmt);
if (cur_stmt)
{
basic_block ins_bb = gimple_bb (ins_stmt);
basic_block cur_bb = gimple_bb (cur_stmt);
if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb))
return NULL_TREE;
/* Move cur_stmt just before one of the load of the original
to ensure it has the same VUSE. See PR61517 for what could
go wrong. */
if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt))
reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt));
gsi_move_before (&gsi, &gsi_ins);
gsi = gsi_for_stmt (cur_stmt);
}
else
gsi = gsi_ins;
/* Compute address to load from and cast according to the size
of the load. */
addr_expr = build_fold_addr_expr (unshare_expr (src));
addr_expr = build_fold_addr_expr (src);
if (is_gimple_mem_ref_addr (addr_expr))
addr_tmp = addr_expr;
addr_tmp = unshare_expr (addr_expr);
else
{
addr_tmp = make_temp_ssa_name (TREE_TYPE (addr_expr), NULL,
"load_src");
addr_stmt = gimple_build_assign (addr_tmp, addr_expr);
gsi_insert_before (&gsi, addr_stmt, GSI_SAME_STMT);
addr_tmp = unshare_expr (n->base_addr);
if (!is_gimple_mem_ref_addr (addr_tmp))
addr_tmp = force_gimple_operand_gsi_1 (&gsi, addr_tmp,
is_gimple_mem_ref_addr,
NULL_TREE, true,
GSI_SAME_STMT);
load_offset = n->bytepos;
if (n->offset)
{
tree off
= force_gimple_operand_gsi (&gsi, unshare_expr (n->offset),
true, NULL_TREE, true,
GSI_SAME_STMT);
gimple *stmt
= gimple_build_assign (make_ssa_name (TREE_TYPE (addr_tmp)),
POINTER_PLUS_EXPR, addr_tmp, off);
gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
addr_tmp = gimple_assign_lhs (stmt);
}
}
/* Perform the load. */
......@@ -967,7 +1002,7 @@ bswap_replace (gimple *cur_stmt, gimple *ins_stmt, tree fndecl,
}
/* Convert the result of load if necessary. */
if (!useless_type_conversion_p (TREE_TYPE (tgt), load_type))
if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), load_type))
{
val_tmp = make_temp_ssa_name (aligned_load_type, NULL,
"load_dst");
......@@ -975,13 +1010,21 @@ bswap_replace (gimple *cur_stmt, gimple *ins_stmt, tree fndecl,
gimple_set_vuse (load_stmt, n->vuse);
gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
gimple_assign_set_rhs_with_ops (&gsi, NOP_EXPR, val_tmp);
update_stmt (cur_stmt);
}
else
else if (cur_stmt)
{
gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr);
gimple_set_vuse (cur_stmt, n->vuse);
update_stmt (cur_stmt);
}
else
{
tgt = make_ssa_name (load_type);
cur_stmt = gimple_build_assign (tgt, MEM_REF, val_expr);
gimple_set_vuse (cur_stmt, n->vuse);
gsi_insert_before (&gsi, cur_stmt, GSI_SAME_STMT);
}
update_stmt (cur_stmt);
if (dump_file)
{
......@@ -990,7 +1033,7 @@ bswap_replace (gimple *cur_stmt, gimple *ins_stmt, tree fndecl,
(int) n->range);
print_gimple_stmt (dump_file, cur_stmt, 0);
}
return true;
return tgt;
}
else
{
......@@ -1003,15 +1046,17 @@ bswap_replace (gimple *cur_stmt, gimple *ins_stmt, tree fndecl,
}
else if (!bswap)
{
gimple *g;
if (!useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src)))
gimple *g = NULL;
if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src)))
{
if (!is_gimple_val (src))
return false;
return NULL_TREE;
g = gimple_build_assign (tgt, NOP_EXPR, src);
}
else
else if (cur_stmt)
g = gimple_build_assign (tgt, src);
else
tgt = src;
if (n->range == 16)
nop_stats.found_16bit++;
else if (n->range == 32)
......@@ -1026,10 +1071,17 @@ bswap_replace (gimple *cur_stmt, gimple *ins_stmt, tree fndecl,
fprintf (dump_file,
"%d bit reshuffle in target endianness found at: ",
(int) n->range);
print_gimple_stmt (dump_file, cur_stmt, 0);
if (cur_stmt)
print_gimple_stmt (dump_file, cur_stmt, 0);
else
{
print_generic_expr (dump_file, tgt, 0);
fprintf (dump_file, "\n");
}
}
gsi_replace (&gsi, g, true);
return true;
if (cur_stmt)
gsi_replace (&gsi, g, true);
return tgt;
}
else if (TREE_CODE (src) == BIT_FIELD_REF)
src = TREE_OPERAND (src, 0);
......@@ -1069,6 +1121,8 @@ bswap_replace (gimple *cur_stmt, gimple *ins_stmt, tree fndecl,
else
bswap_stmt = gimple_build_call (fndecl, 1, tmp);
if (tgt == NULL_TREE)
tgt = make_ssa_name (bswap_type);
tmp = tgt;
/* Convert the result if necessary. */
......@@ -1087,12 +1141,23 @@ bswap_replace (gimple *cur_stmt, gimple *ins_stmt, tree fndecl,
{
fprintf (dump_file, "%d bit bswap implementation found at: ",
(int) n->range);
print_gimple_stmt (dump_file, cur_stmt, 0);
if (cur_stmt)
print_gimple_stmt (dump_file, cur_stmt, 0);
else
{
print_generic_expr (dump_file, tgt, 0);
fprintf (dump_file, "\n");
}
}
gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
gsi_remove (&gsi, true);
return true;
if (cur_stmt)
{
gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
gsi_remove (&gsi, true);
}
else
gsi_insert_before (&gsi, bswap_stmt, GSI_SAME_STMT);
return tgt;
}
/* Find manual byte swap implementations as well as load in a given
......@@ -1211,8 +1276,8 @@ pass_optimize_bswap::execute (function *fun)
if (bswap && !fndecl && n.range != 16)
continue;
if (bswap_replace (cur_stmt, ins_stmt, fndecl, bswap_type, load_type,
&n, bswap))
if (bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl,
bswap_type, load_type, &n, bswap))
changed = true;
}
}
......@@ -1281,8 +1346,15 @@ struct store_immediate_info
gimple *stmt;
unsigned int order;
/* INTEGER_CST for constant stores, MEM_REF for memory copy or
BIT_*_EXPR for logical bitwise operation. */
BIT_*_EXPR for logical bitwise operation.
LROTATE_EXPR if it can be only bswap optimized and
ops are not really meaningful.
NOP_EXPR if bswap optimization detected identity, ops
are not meaningful. */
enum tree_code rhs_code;
/* Two fields for bswap optimization purposes. */
struct symbolic_number n;
gimple *ins_stmt;
/* True if BIT_{AND,IOR,XOR}_EXPR result is inverted before storing. */
bool bit_not_p;
/* True if ops have been swapped and thus ops[1] represents
......@@ -1293,7 +1365,8 @@ struct store_immediate_info
store_operand_info ops[2];
store_immediate_info (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
gimple *, unsigned int, enum tree_code, bool,
gimple *, unsigned int, enum tree_code,
struct symbolic_number &, gimple *, bool,
const store_operand_info &,
const store_operand_info &);
};
......@@ -1305,12 +1378,14 @@ store_immediate_info::store_immediate_info (unsigned HOST_WIDE_INT bs,
gimple *st,
unsigned int ord,
enum tree_code rhscode,
struct symbolic_number &nr,
gimple *ins_stmtp,
bool bitnotp,
const store_operand_info &op0r,
const store_operand_info &op1r)
: bitsize (bs), bitpos (bp), bitregion_start (brs), bitregion_end (bre),
stmt (st), order (ord), rhs_code (rhscode), bit_not_p (bitnotp),
ops_swapped_p (false)
stmt (st), order (ord), rhs_code (rhscode), n (nr),
ins_stmt (ins_stmtp), bit_not_p (bitnotp), ops_swapped_p (false)
#if __cplusplus >= 201103L
, ops { op0r, op1r }
{
......@@ -1884,13 +1959,13 @@ merged_store_group::apply_stores ()
fprintf (dump_file, "After writing ");
print_generic_expr (dump_file, cst, 0);
fprintf (dump_file, " of size " HOST_WIDE_INT_PRINT_DEC
" at position %d the merged region contains:\n",
info->bitsize, pos_in_buffer);
" at position %d the merged region contains:\n",
info->bitsize, pos_in_buffer);
dump_char_array (dump_file, val, buf_size);
}
else
fprintf (dump_file, "Failed to merge stores\n");
}
}
if (!ret)
return false;
unsigned char *m = mask + (pos_in_buffer / BITS_PER_UNIT);
......@@ -1901,6 +1976,7 @@ merged_store_group::apply_stores ()
else
clear_bit_region (m, pos_in_buffer % BITS_PER_UNIT, info->bitsize);
}
stores.qsort (sort_by_bitpos);
return true;
}
......@@ -1937,6 +2013,7 @@ struct imm_store_chain_info
}
}
bool terminate_and_process_chain ();
bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int);
bool coalesce_immediate_stores ();
bool output_merged_store (merged_store_group *);
bool output_merged_stores ();
......@@ -2075,7 +2152,8 @@ pass_store_merging::terminate_and_release_chain (imm_store_chain_info *chain_inf
/* Return true if stmts in between FIRST (inclusive) and LAST (exclusive)
may clobber REF. FIRST and LAST must be in the same basic block and
have non-NULL vdef. */
have non-NULL vdef. We want to be able to sink load of REF across
stores between FIRST and LAST, up to right before LAST. */
bool
stmts_may_clobber_ref_p (gimple *first, gimple *last, tree ref)
......@@ -2092,6 +2170,9 @@ stmts_may_clobber_ref_p (gimple *first, gimple *last, tree ref)
stmt = SSA_NAME_DEF_STMT (vop);
if (stmt_may_clobber_ref_p_1 (stmt, &r))
return true;
if (gimple_store_p (stmt)
&& refs_anti_dependent_p (ref, gimple_get_lhs (stmt)))
return true;
/* Avoid quadratic compile time by bounding the number of checks
we perform. */
if (++count > MAX_STORE_ALIAS_CHECKS)
......@@ -2192,6 +2273,252 @@ compatible_load_p (merged_store_group *merged_store,
return true;
}
/* Add all refs loaded to compute VAL to REFS vector. */
void
gather_bswap_load_refs (vec<tree> *refs, tree val)
{
if (TREE_CODE (val) != SSA_NAME)
return;
gimple *stmt = SSA_NAME_DEF_STMT (val);
if (!is_gimple_assign (stmt))
return;
if (gimple_assign_load_p (stmt))
{
refs->safe_push (gimple_assign_rhs1 (stmt));
return;
}
switch (gimple_assign_rhs_class (stmt))
{
case GIMPLE_BINARY_RHS:
gather_bswap_load_refs (refs, gimple_assign_rhs2 (stmt));
/* FALLTHRU */
case GIMPLE_UNARY_RHS:
gather_bswap_load_refs (refs, gimple_assign_rhs1 (stmt));
break;
default:
gcc_unreachable ();
}
}
/* Return true if m_store_info[first] and at least one following store
form a group which store try_size bitsize value which is byte swapped
from a memory load or some value, or identity from some value.
This uses the bswap pass APIs. */
bool
imm_store_chain_info::try_coalesce_bswap (merged_store_group *merged_store,
unsigned int first,
unsigned int try_size)
{
unsigned int len = m_store_info.length (), last = first;
unsigned HOST_WIDE_INT width = m_store_info[first]->bitsize;
if (width >= try_size)
return false;
for (unsigned int i = first + 1; i < len; ++i)
{
if (m_store_info[i]->bitpos != m_store_info[first]->bitpos + width
|| m_store_info[i]->ins_stmt == NULL)
return false;
width += m_store_info[i]->bitsize;
if (width >= try_size)
{
last = i;
break;
}
}
if (width != try_size)
return false;
bool allow_unaligned
= !STRICT_ALIGNMENT && PARAM_VALUE (PARAM_STORE_MERGING_ALLOW_UNALIGNED);
/* Punt if the combined store would not be aligned and we need alignment. */
if (!allow_unaligned)
{
unsigned int align = merged_store->align;
unsigned HOST_WIDE_INT align_base = merged_store->align_base;
for (unsigned int i = first + 1; i <= last; ++i)
{
unsigned int this_align;
unsigned HOST_WIDE_INT align_bitpos = 0;
get_object_alignment_1 (gimple_assign_lhs (m_store_info[i]->stmt),
&this_align, &align_bitpos);
if (this_align > align)
{
align = this_align;
align_base = m_store_info[i]->bitpos - align_bitpos;
}
}
unsigned HOST_WIDE_INT align_bitpos
= (m_store_info[first]->bitpos - align_base) & (align - 1);
if (align_bitpos)
align = least_bit_hwi (align_bitpos);
if (align < try_size)
return false;
}
tree type;
switch (try_size)
{
case 16: type = uint16_type_node; break;
case 32: type = uint32_type_node; break;
case 64: type = uint64_type_node; break;
default: gcc_unreachable ();
}
struct symbolic_number n;
gimple *ins_stmt = NULL;
int vuse_store = -1;
unsigned int first_order = merged_store->first_order;
unsigned int last_order = merged_store->last_order;
gimple *first_stmt = merged_store->first_stmt;
gimple *last_stmt = merged_store->last_stmt;
store_immediate_info *infof = m_store_info[first];
for (unsigned int i = first; i <= last; ++i)
{
store_immediate_info *info = m_store_info[i];
struct symbolic_number this_n = info->n;
this_n.type = type;
if (!this_n.base_addr)
this_n.range = try_size / BITS_PER_UNIT;
unsigned int bitpos = info->bitpos - infof->bitpos;
if (!do_shift_rotate (LSHIFT_EXPR, &this_n,
BYTES_BIG_ENDIAN
? try_size - info->bitsize - bitpos
: bitpos))
return false;
if (n.base_addr && vuse_store)
{
unsigned int j;
for (j = first; j <= last; ++j)
if (this_n.vuse == gimple_vuse (m_store_info[j]->stmt))
break;
if (j > last)
{
if (vuse_store == 1)
return false;
vuse_store = 0;
}
}
if (i == first)
{
n = this_n;
ins_stmt = info->ins_stmt;
}
else
{
if (n.base_addr)
{
if (n.vuse != this_n.vuse)
{
if (vuse_store == 0)
return false;
vuse_store = 1;
}
if (info->order > last_order)
{
last_order = info->order;
last_stmt = info->stmt;
}
else if (info->order < first_order)
{
first_order = info->order;
first_stmt = info->stmt;
}
}
ins_stmt = perform_symbolic_merge (ins_stmt, &n, info->ins_stmt,
&this_n, &n);
if (ins_stmt == NULL)
return false;
}
}
uint64_t cmpxchg, cmpnop;
find_bswap_or_nop_finalize (&n, &cmpxchg, &cmpnop);
/* A complete byte swap should make the symbolic number to start with
the largest digit in the highest order byte. Unchanged symbolic
number indicates a read with same endianness as target architecture. */
if (n.n != cmpnop && n.n != cmpxchg)
return false;
if (n.base_addr == NULL_TREE && !is_gimple_val (n.src))
return false;
/* Don't handle memory copy this way if normal non-bswap processing
would handle it too. */
if (n.n == cmpnop && (unsigned) n.n_ops == last - first + 1)
{
unsigned int i;
for (i = first; i <= last; ++i)
if (m_store_info[i]->rhs_code != MEM_REF)
break;
if (i == last + 1)
return false;
}
if (n.n == cmpxchg)
switch (try_size)
{
case 16:
/* Will emit LROTATE_EXPR. */
break;
case 32:
if (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
&& optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)
break;
return false;
case 64:
if (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
&& optab_handler (bswap_optab, DImode) != CODE_FOR_nothing)
break;
return false;
default:
gcc_unreachable ();
}
if (!allow_unaligned && n.base_addr)
{
unsigned int align = get_object_alignment (n.src);
if (align < try_size)
return false;
}
/* If each load has vuse of the corresponding store, need to verify
the loads can be sunk right before the last store. */
if (vuse_store == 1)
{
auto_vec<tree, 64> refs;
for (unsigned int i = first; i <= last; ++i)
gather_bswap_load_refs (&refs,
gimple_assign_rhs1 (m_store_info[i]->stmt));
unsigned int i;
tree ref;
FOR_EACH_VEC_ELT (refs, i, ref)
if (stmts_may_clobber_ref_p (first_stmt, last_stmt, ref))
return false;
n.vuse = NULL_TREE;
}
infof->n = n;
infof->ins_stmt = ins_stmt;
for (unsigned int i = first; i <= last; ++i)
{
m_store_info[i]->rhs_code = n.n == cmpxchg ? LROTATE_EXPR : NOP_EXPR;
m_store_info[i]->ops[0].base_addr = NULL_TREE;
m_store_info[i]->ops[1].base_addr = NULL_TREE;
if (i != first)
merged_store->merge_into (m_store_info[i]);
}
return true;
}
/* Go through the candidate stores recorded in m_store_info and merge them
into merged_store_group objects recorded into m_merged_store_groups
representing the widened stores. Return true if coalescing was successful
......@@ -2210,7 +2537,7 @@ imm_store_chain_info::coalesce_immediate_stores ()
m_store_info.length ());
store_immediate_info *info;
unsigned int i;
unsigned int i, ignore = 0;
/* Order the stores by the bitposition they write to. */
m_store_info.qsort (sort_by_bitpos);
......@@ -2229,14 +2556,41 @@ imm_store_chain_info::coalesce_immediate_stores ()
fprintf (dump_file, "\n------------\n");
}
if (i == 0)
if (i <= ignore)
continue;
/* First try to handle group of stores like:
p[0] = data >> 24;
p[1] = data >> 16;
p[2] = data >> 8;
p[3] = data;
using the bswap framework. */
if (info->bitpos == merged_store->start + merged_store->width
&& merged_store->stores.length () == 1
&& merged_store->stores[0]->ins_stmt != NULL
&& info->ins_stmt != NULL)
{
unsigned int try_size;
for (try_size = 64; try_size >= 16; try_size >>= 1)
if (try_coalesce_bswap (merged_store, i - 1, try_size))
break;
if (try_size >= 16)
{
ignore = i + merged_store->stores.length () - 1;
m_merged_store_groups.safe_push (merged_store);
if (ignore < m_store_info.length ())
merged_store = new merged_store_group (m_store_info[ignore]);
else
merged_store = NULL;
continue;
}
}
/* |---store 1---|
|---store 2---|
Overlapping stores. */
unsigned HOST_WIDE_INT start = info->bitpos;
if (IN_RANGE (start, merged_store->start,
Overlapping stores. */
if (IN_RANGE (info->bitpos, merged_store->start,
merged_store->start + merged_store->width - 1))
{
/* Only allow overlapping stores of constants. */
......@@ -2251,7 +2605,8 @@ imm_store_chain_info::coalesce_immediate_stores ()
This store is consecutive to the previous one.
Merge it into the current store group. There can be gaps in between
the stores, but there can't be gaps in between bitregions. */
else if (info->bitregion_start <= merged_store->bitregion_end
else if (info->rhs_code != LROTATE_EXPR
&& info->bitregion_start <= merged_store->bitregion_end
&& info->rhs_code == merged_store->stores[0]->rhs_code)
{
store_immediate_info *infof = merged_store->stores[0];
......@@ -2297,10 +2652,13 @@ imm_store_chain_info::coalesce_immediate_stores ()
}
/* Record or discard the last store group. */
if (!merged_store->apply_stores ())
delete merged_store;
else
m_merged_store_groups.safe_push (merged_store);
if (merged_store)
{
if (!merged_store->apply_stores ())
delete merged_store;
else
m_merged_store_groups.safe_push (merged_store);
}
gcc_assert (m_merged_store_groups.length () <= m_store_info.length ());
bool success
......@@ -2560,9 +2918,41 @@ split_group (merged_store_group *group, bool allow_unaligned_store,
gcc_assert ((size % BITS_PER_UNIT == 0) && (pos % BITS_PER_UNIT == 0));
if (group->stores[0]->rhs_code == LROTATE_EXPR
|| group->stores[0]->rhs_code == NOP_EXPR)
{
/* For bswap framework using sets of stores, all the checking
has been done earlier in try_coalesce_bswap and needs to be
emitted as a single store. */
if (total_orig)
{
/* Avoid the old/new stmt count heuristics. It should be
always beneficial. */
total_new[0] = 1;
total_orig[0] = 2;
}
if (split_stores)
{
unsigned HOST_WIDE_INT align_bitpos
= (group->start - align_base) & (group_align - 1);
unsigned HOST_WIDE_INT align = group_align;
if (align_bitpos)
align = least_bit_hwi (align_bitpos);
bytepos = group->start / BITS_PER_UNIT;
struct split_store *store
= new split_store (bytepos, group->width, align);
unsigned int first = 0;
find_constituent_stores (group, &store->orig_stores,
&first, group->start, group->width);
split_stores->safe_push (store);
}
return 1;
}
unsigned int ret = 0, first = 0;
unsigned HOST_WIDE_INT try_pos = bytepos;
group->stores.qsort (sort_by_bitpos);
if (total_orig)
{
......@@ -2904,6 +3294,7 @@ imm_store_chain_info::output_merged_store (merged_store_group *group)
" not larger than estimated number of new"
" stmts (%u).\n",
total_orig, total_new);
return false;
}
gimple_stmt_iterator last_gsi = gsi_for_stmt (group->last_stmt);
......@@ -2911,13 +3302,63 @@ imm_store_chain_info::output_merged_store (merged_store_group *group)
tree last_vdef, new_vuse;
last_vdef = gimple_vdef (group->last_stmt);
new_vuse = gimple_vuse (group->last_stmt);
tree bswap_res = NULL_TREE;
if (group->stores[0]->rhs_code == LROTATE_EXPR
|| group->stores[0]->rhs_code == NOP_EXPR)
{
tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
gimple *ins_stmt = group->stores[0]->ins_stmt;
struct symbolic_number *n = &group->stores[0]->n;
bool bswap = group->stores[0]->rhs_code == LROTATE_EXPR;
switch (n->range)
{
case 16:
load_type = bswap_type = uint16_type_node;
break;
case 32:
load_type = uint32_type_node;
if (bswap)
{
fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
}
break;
case 64:
load_type = uint64_type_node;
if (bswap)
{
fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
}
break;
default:
gcc_unreachable ();
}
/* If the loads have each vuse of the corresponding store,
we've checked the aliasing already in try_coalesce_bswap and
we want to sink the need load into seq. So need to use new_vuse
on the load. */
if (n->base_addr && n->vuse == NULL)
{
n->vuse = new_vuse;
ins_stmt = NULL;
}
bswap_res = bswap_replace (gsi_start (seq), ins_stmt, fndecl,
bswap_type, load_type, n, bswap);
gcc_assert (bswap_res);
}
gimple *stmt = NULL;
split_store *split_store;
unsigned int i;
auto_vec<gimple *, 32> orig_stmts;
tree addr = force_gimple_operand_1 (unshare_expr (base_addr), &seq,
gimple_seq this_seq;
tree addr = force_gimple_operand_1 (unshare_expr (base_addr), &this_seq,
is_gimple_mem_ref_addr, NULL_TREE);
gimple_seq_add_seq_without_update (&seq, this_seq);
tree load_addr[2] = { NULL_TREE, NULL_TREE };
gimple_seq load_seq[2] = { NULL, NULL };
......@@ -2941,7 +3382,6 @@ imm_store_chain_info::output_merged_store (merged_store_group *group)
load_addr[j] = addr;
else
{
gimple_seq this_seq;
load_addr[j]
= force_gimple_operand_1 (unshare_expr (op.base_addr),
&this_seq, is_gimple_mem_ref_addr,
......@@ -2988,10 +3428,12 @@ imm_store_chain_info::output_merged_store (merged_store_group *group)
MR_DEPENDENCE_BASE (dest) = base;
}
tree mask
= native_interpret_expr (int_type,
group->mask + try_pos - start_byte_pos,
group->buf_size);
tree mask = integer_zero_node;
if (!bswap_res)
mask = native_interpret_expr (int_type,
group->mask + try_pos
- start_byte_pos,
group->buf_size);
tree ops[2];
for (int j = 0;
......@@ -2999,7 +3441,9 @@ imm_store_chain_info::output_merged_store (merged_store_group *group)
++j)
{
store_operand_info &op = split_store->orig_stores[0]->ops[j];
if (op.base_addr)
if (bswap_res)
ops[j] = bswap_res;
else if (op.base_addr)
{
FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
orig_stmts.safe_push (info->ops[j].stmt);
......@@ -3124,6 +3568,24 @@ imm_store_chain_info::output_merged_store (merged_store_group *group)
src = gimple_assign_lhs (stmt);
}
break;
case LROTATE_EXPR:
case NOP_EXPR:
src = ops[0];
if (!is_gimple_val (src))
{
stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (src)),
src);
gimple_seq_add_stmt_without_update (&seq, stmt);
src = gimple_assign_lhs (stmt);
}
if (!useless_type_conversion_p (int_type, TREE_TYPE (src)))
{
stmt = gimple_build_assign (make_ssa_name (int_type),
NOP_EXPR, src);
gimple_seq_add_stmt_without_update (&seq, stmt);
src = gimple_assign_lhs (stmt);
}
break;
default:
src = ops[0];
break;
......@@ -3494,6 +3956,8 @@ pass_store_merging::process_store (gimple *stmt)
&& (TREE_CODE (rhs) != INTEGER_CST)));
enum tree_code rhs_code = ERROR_MARK;
bool bit_not_p = false;
struct symbolic_number n;
gimple *ins_stmt = NULL;
store_operand_info ops[2];
if (invalid)
;
......@@ -3558,6 +4022,35 @@ pass_store_merging::process_store (gimple *stmt)
invalid = true;
break;
}
if ((bitsize % BITS_PER_UNIT) == 0
&& (bitpos % BITS_PER_UNIT) == 0
&& bitsize <= 64
&& BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN)
{
ins_stmt = find_bswap_or_nop_1 (def_stmt, &n, 12);
if (ins_stmt)
{
uint64_t nn = n.n;
for (unsigned HOST_WIDE_INT i = 0;
i < bitsize; i += BITS_PER_UNIT, nn >>= BITS_PER_MARKER)
if ((nn & MARKER_MASK) == 0
|| (nn & MARKER_MASK) == MARKER_BYTE_UNKNOWN)
{
ins_stmt = NULL;
break;
}
if (ins_stmt)
{
if (invalid)
{
rhs_code = LROTATE_EXPR;
ops[0].base_addr = NULL_TREE;
ops[1].base_addr = NULL_TREE;
}
invalid = false;
}
}
}
}
if (invalid)
......@@ -3566,6 +4059,9 @@ pass_store_merging::process_store (gimple *stmt)
return;
}
if (!ins_stmt)
memset (&n, 0, sizeof (n));
struct imm_store_chain_info **chain_info = NULL;
if (base_addr)
chain_info = m_stores.get (base_addr);
......@@ -3576,6 +4072,7 @@ pass_store_merging::process_store (gimple *stmt)
unsigned int ord = (*chain_info)->m_store_info.length ();
info = new store_immediate_info (bitsize, bitpos, bitregion_start,
bitregion_end, stmt, ord, rhs_code,
n, ins_stmt,
bit_not_p, ops[0], ops[1]);
if (dump_file && (dump_flags & TDF_DETAILS))
{
......@@ -3604,6 +4101,7 @@ pass_store_merging::process_store (gimple *stmt)
= new imm_store_chain_info (m_stores_head, base_addr);
info = new store_immediate_info (bitsize, bitpos, bitregion_start,
bitregion_end, stmt, 0, rhs_code,
n, ins_stmt,
bit_not_p, ops[0], ops[1]);
new_chain->m_store_info.safe_push (info);
m_stores.put (base_addr, new_chain);
......@@ -3628,6 +4126,8 @@ pass_store_merging::execute (function *fun)
basic_block bb;
hash_set<gimple *> orig_stmts;
calculate_dominance_info (CDI_DOMINATORS);
FOR_EACH_BB_FN (bb, fun)
{
gimple_stmt_iterator gsi;
......
2017-11-20 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/78821
* gcc.dg/store_merging_16.c: New test.
2017-11-19 Jan Hubicka <hubicka@ucw.cz>
PR target/82281
......
/* Only test on some 64-bit targets which do have bswap{si,di}2 patterns and
are either big or little endian (not pdp endian). */
/* { dg-do compile { target { lp64 && { i?86-*-* x86_64-*-* powerpc*-*-* aarch64*-*-* } } } } */
/* { dg-require-effective-target store_merge } */
/* { dg-options "-O2 -fdump-tree-store-merging" } */
__attribute__((noipa)) void
f1 (unsigned char *p, unsigned long long q)
{
p[0] = q;
p[1] = q >> 8;
p[2] = q >> 16;
p[3] = q >> 24;
p[4] = q >> 32;
p[5] = q >> 40;
p[6] = q >> 48;
p[7] = q >> 56;
}
__attribute__((noipa)) void
f2 (unsigned char *p, unsigned long long q)
{
p[0] = q >> 56;
p[1] = q >> 48;
p[2] = q >> 40;
p[3] = q >> 32;
p[4] = q >> 24;
p[5] = q >> 16;
p[6] = q >> 8;
p[7] = q;
}
__attribute__((noipa)) void
f3 (unsigned char *__restrict p, unsigned char *__restrict q)
{
unsigned char q3 = q[3];
unsigned char q2 = q[2];
unsigned char q1 = q[1];
unsigned char q0 = q[0];
p[0] = q3;
p[1] = q2;
p[2] = q1;
p[3] = q0;
}
__attribute__((noipa)) void
f4 (unsigned char *__restrict p, unsigned char *__restrict q)
{
p[0] = q[3];
p[1] = q[2];
p[2] = q[1];
p[3] = q[0];
}
struct S { unsigned char a, b; unsigned short c; };
__attribute__((noipa)) void
f5 (struct S *__restrict p, struct S *__restrict q)
{
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
unsigned char pa = q->c >> 8;
unsigned char pb = q->c;
unsigned short pc = (q->a << 8) | q->b;
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
unsigned char pa = q->c;
unsigned char pb = q->c >> 8;
unsigned short pc = q->a | (q->b << 8);
#endif
p->a = pa;
p->b = pb;
p->c = pc;
}
__attribute__((noipa)) void
f6 (struct S *__restrict p, struct S *__restrict q)
{
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
p->a = q->c >> 8;
p->b = q->c;
p->c = (q->a << 8) | q->b;
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
p->a = q->c;
p->b = q->c >> 8;
p->c = q->a | (q->b << 8);
#endif
}
struct T { unsigned long long a : 8, b : 8, c : 8, d : 8, e : 8, f : 8, g : 8, h : 8; };
__attribute__((noipa)) void
f7 (struct T *__restrict p, struct T *__restrict q)
{
p->a = q->h;
p->b = q->g;
p->c = q->f;
p->d = q->e;
p->e = q->d;
p->f = q->c;
p->g = q->b;
p->h = q->a;
}
struct S b = { 0x11, 0x12,
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
0x1413
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
0x1314
#endif
};
struct T e = { 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28 };
int
main ()
{
unsigned char a[8];
int i;
struct S b, c, d;
f1 (a, 0x0102030405060708ULL);
for (i = 0; i < 8; ++i)
if (a[i] != 8 - i)
__builtin_abort ();
f2 (a, 0x0102030405060708ULL);
for (i = 0; i < 8; ++i)
if (a[i] != 1 + i)
__builtin_abort ();
f3 (a, a + 4);
for (i = 0; i < 8; ++i)
if (a[i] != (i < 4 ? 8 - i : 1 + i))
__builtin_abort ();
f2 (a, 0x090a0b0c0d0e0f10ULL);
f4 (a + 4, a);
for (i = 0; i < 8; ++i)
if (a[i] != (i < 4 ? 9 + i : 16 - i))
__builtin_abort ();
f5 (&c, &b);
if (c.a != 0x14 || c.b != 0x13
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|| c.c != 0x1112
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|| c.c != 0x1211
#endif
)
__builtin_abort ();
f6 (&d, &c);
if (d.a != 0x11 || d.b != 0x12 || d.c != b.c)
__builtin_abort ();
struct T f;
f7 (&f, &e);
if (f.a != 0x28 || f.b != 0x27 || f.c != 0x26 || f.d != 0x25
|| f.e != 0x24 || f.f != 0x23 || f.g != 0x22 || f.h != 0x21)
__builtin_abort ();
return 0;
}
/* { dg-final { scan-tree-dump-times "Merging successful" 7 "store-merging" } } */
/* { dg-final { scan-tree-dump-times "__builtin_bswap64" 2 "store-merging" } } */
/* { dg-final { scan-tree-dump-times "__builtin_bswap32" 4 "store-merging" } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment