Commit 97e73bd2 by Richard Henderson Committed by Richard Henderson

tree-sra.c: Rewrite from scratch.

        * tree-sra.c: Rewrite from scratch.  Handle nested aggregates.
        * gcc.dg/tree-ssa/20040430-1.c: Expect zero if's.

From-SVN: r83858
parent ba60e475
2004-06-29 Richard Henderson <rth@redhat.com>
* tree-sra.c: Rewrite from scratch. Handle nested aggregates.
2004-06-29 Nathan Sidwell <nathan@codesourcery.com>
* vec.h (VEC_T_safe_push, VEC_T_safe_insert): Tweak for when
......
2004-06-29 Richard Henderson <rth@redhat.com>
* gcc.dg/tree-ssa/20040430-1.c: Expect zero if's.
2004-06-29 Paul Brook <paul@codesourcery.com>
* g++.old-deja/g++.abi/arraynew.C: Handle ARM EABI cookies.
......
/* PR middle-end/14470. Similar to
gcc.c-torture/execute/20040313-1.c, but with a compile time test to
make sure the second if() is removed. We should actually get rid
of the first if() too, but we're not that smart yet. */
make sure the second if() is removed. */
/* Update: We now remove both ifs. Whee. */
/* { dg-do run } */
/* { dg-options "-O2 -fdump-tree-optimized" } */
......@@ -22,4 +22,4 @@ int main()
return 0;
}
/* { dg-final { scan-tree-dump-times "if " 1 "optimized"} } */
/* { dg-final { scan-tree-dump-times "if " 0 "optimized"} } */
......@@ -44,26 +44,34 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
#include "timevar.h"
#include "flags.h"
#include "bitmap.h"
#include "obstack.h"
#include "target.h"
/* Maximum number of fields that a structure should have to be scalarized.
FIXME This limit has been arbitrarily set to 5. Experiment to find a
sensible setting. */
#define MAX_NFIELDS_FOR_SRA 5
/* This object of this pass is to replace a non-addressable aggregate with a
set of independent variables. Most of the time, all of these variables
will be scalars. But a secondary objective is to break up larger
aggregates into smaller aggregates. In the process we may find that some
bits of the larger aggregate can be deleted as unreferenced.
/* Codes indicating how to copy one structure into another. */
enum sra_copy_mode { SCALAR_SCALAR, FIELD_SCALAR, SCALAR_FIELD };
This substitution is done globally. More localized substitutions would
be the purvey of a load-store motion pass.
The optimization proceeds in phases:
(1) Identify variables that have types that are candidates for
decomposition.
(2) Scan the function looking for the ways these variables are used.
In particular we're interested in the number of times a variable
(or member) is needed as a complete unit, and the number of times
a variable (or member) is copied.
(3) Based on the usage profile, instantiate substitution variables.
(4) Scan the function making replacements.
*/
/* Local functions. */
static inline bool can_be_scalarized_p (tree);
static tree create_scalar_copies (tree lhs, tree rhs, enum sra_copy_mode mode);
static inline void scalarize_component_ref (tree, tree *tp);
static void scalarize_structures (void);
static void scalarize_stmt (block_stmt_iterator *);
static void scalarize_modify_expr (block_stmt_iterator *);
static void scalarize_call_expr (block_stmt_iterator *);
static void scalarize_asm_expr (block_stmt_iterator *);
static void scalarize_return_expr (block_stmt_iterator *);
/* The set of aggregate variables that are candidates for scalarization. */
static bitmap sra_candidates;
......@@ -72,85 +80,68 @@ static bitmap sra_candidates;
beginning of the function. */
static bitmap needs_copy_in;
/* This structure holds the mapping between and element of an aggregate
and the scalar replacement variable. */
/* Sets of bit pairs that cache type decomposition and instantiation. */
static bitmap sra_type_decomp_cache;
static bitmap sra_type_inst_cache;
/* One of these structures is created for each candidate aggregate
and each (accessed) member of such an aggregate. */
struct sra_elt
{
enum tree_code kind;
tree base;
tree field;
tree replace;
};
static htab_t sra_map;
/* A tree of the elements. Used when we want to traverse everything. */
struct sra_elt *parent;
struct sra_elt *children;
struct sra_elt *sibling;
static hashval_t
sra_elt_hash (const void *x)
{
const struct sra_elt *e = x;
hashval_t h = (size_t) e->base * e->kind;
if (e->kind == COMPONENT_REF)
h ^= (size_t) e->field;
return h;
}
/* If this element is a root, then this is the VAR_DECL. If this is
a sub-element, this is some token used to identify the reference.
In the case of COMPONENT_REF, this is the FIELD_DECL. In the case
of an ARRAY_REF, this is the (constant) index. In the case of a
complex number, this is a zero or one. */
tree element;
static int
sra_elt_eq (const void *x, const void *y)
{
const struct sra_elt *a = x;
const struct sra_elt *b = y;
/* The type of the element. */
tree type;
if (a->kind != b->kind)
return false;
if (a->base != b->base)
return false;
if (a->kind == COMPONENT_REF)
if (a->field != b->field)
return false;
/* A VAR_DECL, for any sub-element we've decided to replace. */
tree replacement;
return true;
}
/* The number of times the element is referenced as a whole. I.e.
given "a.b.c", this would be incremented for C, but not for A or B. */
unsigned int n_uses;
/* Mark all the variables in V_MAY_DEF operands for STMT for renaming.
This becomes necessary when we modify all of a non-scalar. */
/* The number of times the element is copied to or from another
scalarizable element. */
unsigned int n_copies;
static void
mark_all_v_may_defs (tree stmt)
{
v_may_def_optype v_may_defs;
size_t i, n;
/* True if TYPE is scalar. */
bool is_scalar;
get_stmt_operands (stmt);
v_may_defs = V_MAY_DEF_OPS (stmt_ann (stmt));
n = NUM_V_MAY_DEFS (v_may_defs);
/* True if we saw something about this element that prevents scalarization,
such as non-constant indexing. */
bool cannot_scalarize;
for (i = 0; i < n; i++)
{
tree sym = V_MAY_DEF_RESULT (v_may_defs, i);
bitmap_set_bit (vars_to_rename, var_ann (sym)->uid);
}
}
/* True if we've decided that structure-to-structure assignment
should happen via memcpy and not per-element. */
bool use_block_copy;
/* Mark all the variables in V_MUST_DEF operands for STMT for renaming.
This becomes necessary when we modify all of a non-scalar. */
/* A flag for use with/after random access traversals. */
bool visited;
};
static void
mark_all_v_must_defs (tree stmt)
{
v_must_def_optype v_must_defs;
size_t i, n;
/* Random access to the child of a parent is performed by hashing.
This prevents quadratic behaviour, and allows SRA to function
reasonably on larger records. */
static htab_t sra_map;
get_stmt_operands (stmt);
v_must_defs = V_MUST_DEF_OPS (stmt_ann (stmt));
n = NUM_V_MUST_DEFS (v_must_defs);
/* All structures are allocated out of the following obstack. */
static struct obstack sra_obstack;
for (i = 0; i < n; i++)
{
tree sym = V_MUST_DEF_OP (v_must_defs, i);
bitmap_set_bit (vars_to_rename, var_ann (sym)->uid);
}
}
/* Debugging functions. */
static void dump_sra_elt_name (FILE *, struct sra_elt *);
extern void debug_sra_elt_name (struct sra_elt *);
/* Return true if DECL is an SRA candidate. */
static bool
......@@ -159,157 +150,102 @@ is_sra_candidate_decl (tree decl)
return DECL_P (decl) && bitmap_bit_p (sra_candidates, var_ann (decl)->uid);
}
/* Return true if EXP is of the form <ref decl>, where REF is one of the
field access references we handle and DECL is an SRA candidate. */
/* Return true if TYPE is a scalar type. */
static bool
is_sra_candidate_ref (tree exp)
is_sra_scalar_type (tree type)
{
switch (TREE_CODE (exp))
{
case COMPONENT_REF:
case REALPART_EXPR:
case IMAGPART_EXPR:
return is_sra_candidate_decl (TREE_OPERAND (exp, 0));
default:
break;
}
return false;
enum tree_code code = TREE_CODE (type);
return (code == INTEGER_TYPE || code == REAL_TYPE || code == VECTOR_TYPE
|| code == ENUMERAL_TYPE || code == BOOLEAN_TYPE
|| code == CHAR_TYPE || code == POINTER_TYPE || code == OFFSET_TYPE
|| code == REFERENCE_TYPE);
}
/* Return true if EXP is of the form <ref decl>, where REF is a nest of
references handled by handle_components_p and DECL is an SRA candidate.
*VAR_P is set to DECL. */
/* Return true if TYPE can be decomposed into a set of independent variables.
Note that this doesn't imply that all elements of TYPE can be
instantiated, just that if we decide to break up the type into
separate pieces that it can be done. */
static bool
is_sra_candidate_complex_ref (tree exp, tree *var_p)
type_can_be_decomposed_p (tree type)
{
tree orig_exp = exp;
while (TREE_CODE (exp) == REALPART_EXPR || TREE_CODE (exp) == IMAGPART_EXPR
|| handled_component_p (exp))
exp = TREE_OPERAND (exp, 0);
unsigned int cache = TYPE_UID (TYPE_MAIN_VARIANT (type)) * 2;
tree t;
if (orig_exp != exp && is_sra_candidate_decl (exp))
{
*var_p = exp;
return true;
}
return false;
}
/* Return the scalar in SRA_MAP[VAR_IX][FIELD_IX]. If none exists, create
a new scalar with type TYPE. */
/* Avoid searching the same type twice. */
if (bitmap_bit_p (sra_type_decomp_cache, cache+0))
return true;
if (bitmap_bit_p (sra_type_decomp_cache, cache+1))
return false;
static tree
lookup_scalar (struct sra_elt *key, tree type)
{
struct sra_elt **slot, *res;
/* The type must have a definite non-zero size. */
if (TYPE_SIZE (type) == NULL || integer_zerop (TYPE_SIZE (type)))
goto fail;
slot = (struct sra_elt **) htab_find_slot (sra_map, key, INSERT);
res = *slot;
if (!res)
/* The type must be a non-union aggregate. */
switch (TREE_CODE (type))
{
res = xmalloc (sizeof (*res));
*slot = res;
*res = *key;
res->replace = make_rename_temp (type, "SR");
case RECORD_TYPE:
{
bool saw_one_field = false;
if (DECL_NAME (key->base) && !DECL_IGNORED_P (key->base))
{
char *name = NULL;
switch (key->kind)
{
case COMPONENT_REF:
if (!DECL_NAME (key->field))
break;
name = concat (IDENTIFIER_POINTER (DECL_NAME (key->base)),
"$",
IDENTIFIER_POINTER (DECL_NAME (key->field)),
NULL);
break;
case REALPART_EXPR:
name = concat (IDENTIFIER_POINTER (DECL_NAME (key->base)),
"$real", NULL);
break;
case IMAGPART_EXPR:
name = concat (IDENTIFIER_POINTER (DECL_NAME (key->base)),
"$imag", NULL);
break;
default:
abort ();
}
if (name)
for (t = TYPE_FIELDS (type); t ; t = TREE_CHAIN (t))
if (TREE_CODE (t) == FIELD_DECL)
{
DECL_NAME (res->replace) = get_identifier (name);
free (name);
}
}
DECL_SOURCE_LOCATION (res->replace) = DECL_SOURCE_LOCATION (key->base);
TREE_NO_WARNING (res->replace) = TREE_NO_WARNING (key->base);
DECL_ARTIFICIAL (res->replace) = DECL_ARTIFICIAL (key->base);
}
return res->replace;
}
/* Given a structure reference VAR.FIELD, return a scalar representing it.
If no scalar is found, a new one is created and added to the SRA_MAP
matrix. */
static tree
get_scalar_for_field (tree var, tree field)
{
struct sra_elt key;
#ifdef ENABLE_CHECKING
/* Validate that FIELD actually exists in VAR's type. */
{
tree f;
for (f = TYPE_FIELDS (TREE_TYPE (var)); f ; f = TREE_CHAIN (f))
if (f == field)
goto found;
abort ();
found:;
}
#endif
/* Reject incorrectly represented bit fields. */
if (DECL_BIT_FIELD (t)
&& (tree_low_cst (DECL_SIZE (t), 1)
!= TYPE_PRECISION (TREE_TYPE (t))))
goto fail;
key.kind = COMPONENT_REF;
key.base = var;
key.field = field;
saw_one_field = true;
}
return lookup_scalar (&key, TREE_TYPE (field));
}
/* Record types must have at least one field. */
if (!saw_one_field)
goto fail;
}
break;
case ARRAY_TYPE:
/* Array types must have a fixed lower and upper bound. */
t = TYPE_DOMAIN (type);
if (t == NULL)
goto fail;
if (TYPE_MIN_VALUE (t) == NULL || !TREE_CONSTANT (TYPE_MIN_VALUE (t)))
goto fail;
if (TYPE_MAX_VALUE (t) == NULL || !TREE_CONSTANT (TYPE_MAX_VALUE (t)))
goto fail;
break;
/* Similarly for the parts of a complex type. */
case COMPLEX_TYPE:
break;
static tree
get_scalar_for_complex_part (tree var, enum tree_code part)
{
struct sra_elt key;
default:
goto fail;
}
key.kind = part;
key.base = var;
bitmap_set_bit (sra_type_decomp_cache, cache+0);
return true;
return lookup_scalar (&key, TREE_TYPE (TREE_TYPE (var)));
fail:
bitmap_set_bit (sra_type_decomp_cache, cache+1);
return false;
}
/* Return true if the fields of VAR can be replaced by scalar temporaries.
This only happens if VAR is not call-clobbered and it contains less
than MAX_NFIELDS_FOR_SRA scalar fields. */
/* Return true if DECL can be decomposed into a set of independent
(though not necessarily scalar) variables. */
static inline bool
can_be_scalarized_p (tree var)
static bool
decl_can_be_decomposed_p (tree var)
{
tree field, type;
int nfields;
/* Early out for scalars. */
if (is_sra_scalar_type (TREE_TYPE (var)))
return false;
/* The variable must not be aliased. */
if (!is_gimple_non_addressable (var))
{
if (dump_file && (dump_flags & TDF_DETAILS))
......@@ -321,6 +257,7 @@ can_be_scalarized_p (tree var)
return false;
}
/* The variable must not be volatile. */
if (TREE_THIS_VOLATILE (var))
{
if (dump_file && (dump_flags & TDF_DETAILS))
......@@ -332,879 +269,1701 @@ can_be_scalarized_p (tree var)
return false;
}
/* Any COMPLEX_TYPE that has reached this point can be scalarized. */
if (TREE_CODE (TREE_TYPE (var)) == COMPLEX_TYPE)
/* We must be able to decompose the variable's type. */
if (!type_can_be_decomposed_p (TREE_TYPE (var)))
{
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, "Cannot scalarize variable ");
print_generic_expr (dump_file, var, dump_flags);
fprintf (dump_file, " because its type cannot be decomposed\n");
}
return false;
}
return true;
}
/* Return true if TYPE can be *completely* decomposed into scalars. */
static bool
type_can_instantiate_all_elements (tree type)
{
if (is_sra_scalar_type (type))
return true;
if (!type_can_be_decomposed_p (type))
return false;
type = TREE_TYPE (var);
nfields = 0;
for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
switch (TREE_CODE (type))
{
if (TREE_CODE (field) != FIELD_DECL)
continue;
case RECORD_TYPE:
{
unsigned int cache = TYPE_UID (TYPE_MAIN_VARIANT (type)) * 2;
tree f;
/* FIXME: We really should recurse down the type hierarchy and
scalarize the fields at the leaves. */
if (AGGREGATE_TYPE_P (TREE_TYPE (field)))
{
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, "Cannot scalarize variable ");
print_generic_expr (dump_file, var, dump_flags);
fprintf (dump_file,
" because it contains an aggregate type field, ");
print_generic_expr (dump_file, field, dump_flags);
fprintf (dump_file, "\n");
}
if (bitmap_bit_p (sra_type_inst_cache, cache+0))
return true;
if (bitmap_bit_p (sra_type_inst_cache, cache+1))
return false;
}
/* FIXME: Similarly. Indeed, considering that we treat complex
as an aggregate, this is exactly the same problem.
Structures with __complex__ fields are tested in the libstdc++
testsuite: 26_numerics/complex_inserters_extractors.cc. */
if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
{
if (dump_file && (dump_flags & TDF_DETAILS))
for (f = TYPE_FIELDS (type); f ; f = TREE_CHAIN (f))
if (TREE_CODE (f) == FIELD_DECL)
{
fprintf (dump_file, "Cannot scalarize variable ");
print_generic_expr (dump_file, var, dump_flags);
fprintf (dump_file,
" because it contains a __complex__ field, ");
print_generic_expr (dump_file, field, dump_flags);
fprintf (dump_file, "\n");
if (!type_can_instantiate_all_elements (TREE_TYPE (f)))
{
bitmap_set_bit (sra_type_inst_cache, cache+1);
return false;
}
}
return false;
}
/* FIXME. We don't scalarize structures with bit fields yet. To
support this, we should make sure that all the fields fit in one
word and modify every operation done on the scalarized bit fields
to mask them properly. */
if (DECL_BIT_FIELD (field))
{
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, "Cannot scalarize variable ");
print_generic_expr (dump_file, var, dump_flags);
fprintf (dump_file,
" because it contains a bit-field, ");
print_generic_expr (dump_file, field, dump_flags);
fprintf (dump_file, "\n");
}
return false;
}
bitmap_set_bit (sra_type_inst_cache, cache+0);
return true;
}
nfields++;
if (nfields > MAX_NFIELDS_FOR_SRA)
{
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, "Cannot scalarize variable ");
print_generic_expr (dump_file, var, dump_flags);
fprintf (dump_file,
" because it contains more than %d fields\n",
MAX_NFIELDS_FOR_SRA);
}
return false;
}
}
case ARRAY_TYPE:
return type_can_instantiate_all_elements (TREE_TYPE (type));
/* If the structure had no FIELD_DECLs, then don't bother
scalarizing it. */
return nfields > 0;
}
case COMPLEX_TYPE:
return true;
default:
abort ();
}
}
/* Replace the COMPONENT_REF, REALPART_EXPR or IMAGPART_EXPR pointed-to by
TP inside STMT with the corresponding scalar replacement from SRA_MAP. */
/* Test whether ELT or some sub-element cannot be scalarized. */
static inline void
scalarize_component_ref (tree stmt, tree *tp)
static bool
can_completely_scalarize_p (struct sra_elt *elt)
{
tree t = *tp, obj = TREE_OPERAND (t, 0);
struct sra_elt *c;
if (elt->cannot_scalarize)
return false;
for (c = elt->children; c ; c = c->sibling)
if (!can_completely_scalarize_p (c))
return false;
return true;
}
/* When scalarizing a function argument, we will need to insert copy-in
operations from the original PARM_DECLs. Note that these copy-in
operations may end up being dead, but we won't know until we rename
the new variables into SSA. */
if (TREE_CODE (obj) == PARM_DECL)
bitmap_set_bit (needs_copy_in, var_ann (obj)->uid);
/* A simplified tree hashing algorithm that only handles the types of
trees we expect to find in sra_elt->element. */
static hashval_t
sra_hash_tree (tree t)
{
switch (TREE_CODE (t))
{
case COMPONENT_REF:
t = get_scalar_for_field (obj, TREE_OPERAND (t, 1));
break;
case REALPART_EXPR:
case IMAGPART_EXPR:
t = get_scalar_for_complex_part (obj, TREE_CODE (t));
break;
case VAR_DECL:
case PARM_DECL:
case RESULT_DECL:
case FIELD_DECL:
return DECL_UID (t);
case INTEGER_CST:
return TREE_INT_CST_LOW (t) ^ TREE_INT_CST_HIGH (t);
default:
abort ();
}
*tp = t;
modify_stmt (stmt);
}
/* Hash function for type SRA_PAIR. */
/* Scalarize the structure assignment for the statement pointed by SI_P. */
static void
scalarize_structure_assignment (block_stmt_iterator *si_p)
static hashval_t
sra_elt_hash (const void *x)
{
var_ann_t lhs_ann, rhs_ann;
tree lhs, rhs, list, orig_stmt;
bool lhs_can, rhs_can;
orig_stmt = bsi_stmt (*si_p);
lhs = TREE_OPERAND (orig_stmt, 0);
rhs = TREE_OPERAND (orig_stmt, 1);
list = NULL_TREE;
const struct sra_elt *e = x;
const struct sra_elt *p;
hashval_t h;
#if defined ENABLE_CHECKING
if (TREE_CODE (orig_stmt) != MODIFY_EXPR)
abort ();
#endif
h = sra_hash_tree (e->element);
/* Remove all type casts from RHS. This may seem heavy handed but
it's actually safe and it is necessary in the presence of C++
reinterpret_cast<> where structure assignments of different
structures will be present in the IL. This was the case of PR
13347 (http://gcc.gnu.org/bugzilla/show_bug.cgi?id=13347) which
had something like this:
struct A f;
struct B g;
f = (struct A)g;
Both 'f' and 'g' were scalarizable, but the presence of the type
cast was causing SRA to not replace the RHS of the assignment
with g's scalar replacements. Furthermore, the fact that this
assignment reached this point without causing syntax errors means
that the type cast is safe and that a field-by-field assignment
from 'g' into 'f' is the right thing to do. */
STRIP_NOPS (rhs);
lhs_ann = DECL_P (lhs) ? var_ann (lhs) : NULL;
rhs_ann = DECL_P (rhs) ? var_ann (rhs) : NULL;
#if defined ENABLE_CHECKING
/* Two different variables should not have the same UID. */
if (lhs_ann
&& rhs_ann
&& lhs != rhs
&& lhs_ann->uid == rhs_ann->uid)
abort ();
#endif
/* Take into account everything back up the chain. Given that chain
lengths are rarely very long, this should be acceptable. If we
truely identify this as a performance problem, it should work to
hash the pointer value "e->parent". */
for (p = e->parent; p ; p = p->parent)
h = (h * 65521) ^ sra_hash_tree (p->element);
lhs_can = lhs_ann && bitmap_bit_p (sra_candidates, lhs_ann->uid);
rhs_can = rhs_ann && bitmap_bit_p (sra_candidates, rhs_ann->uid);
return h;
}
/* Equality function for type SRA_PAIR. */
/* Both LHS and RHS are scalarizable. */
if (lhs_can && rhs_can)
list = create_scalar_copies (lhs, rhs, SCALAR_SCALAR);
static int
sra_elt_eq (const void *x, const void *y)
{
const struct sra_elt *a = x;
const struct sra_elt *b = y;
/* Only RHS is scalarizable. */
else if (rhs_can)
list = create_scalar_copies (lhs, rhs, FIELD_SCALAR);
if (a->parent != b->parent)
return false;
/* Only LHS is scalarizable. */
else if (lhs_can)
list = create_scalar_copies (lhs, rhs, SCALAR_FIELD);
/* All the field/decl stuff is unique. */
if (a->element == b->element)
return true;
/* If neither side is scalarizable, do nothing else. */
/* The only thing left is integer equality. */
if (TREE_CODE (a->element) == INTEGER_CST
&& TREE_CODE (b->element) == INTEGER_CST)
return tree_int_cst_equal (a->element, b->element);
else
return;
/* Set line number information for our replacements. */
if (EXPR_HAS_LOCATION (orig_stmt))
annotate_all_with_locus (&list, EXPR_LOCATION (orig_stmt));
/* Replace the existing statement with the newly created list of
scalarized copies. When replacing the original statement, the first
copy replaces it and the remaining copies are inserted either after
the first copy or on the outgoing edges of the original statement's
block. */
{
tree_stmt_iterator tsi = tsi_start (list);
bsi_replace (si_p, tsi_stmt (tsi), true);
tsi_delink (&tsi);
if (stmt_ends_bb_p (orig_stmt))
insert_edge_copies (list, bb_for_stmt (orig_stmt));
else
bsi_insert_after (si_p, list, BSI_CONTINUE_LINKING);
}
return false;
}
/* Create or return the SRA_ELT structure for CHILD in PARENT. PARENT
may be null, in which case CHILD must be a DECL. */
/* Traverse all the referenced variables in the program looking for
structures that could be replaced with scalars. */
static bool
find_candidates_for_sra (void)
static struct sra_elt *
lookup_element (struct sra_elt *parent, tree child, tree type,
enum insert_option insert)
{
size_t i;
bool any_set = false;
struct sra_elt dummy;
struct sra_elt **slot;
struct sra_elt *elt;
for (i = 0; i < num_referenced_vars; i++)
dummy.parent = parent;
dummy.element = child;
slot = (struct sra_elt **) htab_find_slot (sra_map, &dummy, insert);
if (!slot && insert == NO_INSERT)
return NULL;
elt = *slot;
if (!elt && insert == INSERT)
{
tree var = referenced_var (i);
*slot = elt = obstack_alloc (&sra_obstack, sizeof (*elt));
memset (elt, 0, sizeof (*elt));
elt->parent = parent;
elt->element = child;
elt->type = type;
elt->is_scalar = is_sra_scalar_type (type);
if (parent)
{
elt->sibling = parent->children;
parent->children = elt;
}
if ((TREE_CODE (TREE_TYPE (var)) == RECORD_TYPE
|| TREE_CODE (TREE_TYPE (var)) == COMPLEX_TYPE)
&& can_be_scalarized_p (var))
/* If this is a parameter, then if we want to scalarize, we have
one copy from the true function parameter. Count it now. */
if (TREE_CODE (child) == PARM_DECL)
{
bitmap_set_bit (sra_candidates, var_ann (var)->uid);
any_set = true;
elt->n_copies = 1;
bitmap_set_bit (needs_copy_in, var_ann (child)->uid);
}
}
return any_set;
return elt;
}
/* Return true if the ARRAY_REF in EXPR is a constant, in bounds access. */
/* Insert STMT on all the outgoing edges out of BB. Note that if BB
has more than one edge, STMT will be replicated for each edge. Also,
abnormal edges will be ignored. */
void
insert_edge_copies (tree stmt, basic_block bb)
static bool
is_valid_const_index (tree expr)
{
edge e;
bool first_copy;
tree dom, t, index = TREE_OPERAND (expr, 1);
first_copy = true;
for (e = bb->succ; e; e = e->succ_next)
{
/* We don't need to insert copies on abnormal edges. The
value of the scalar replacement is not guaranteed to
be valid through an abnormal edge. */
if (!(e->flags & EDGE_ABNORMAL))
{
if (first_copy)
{
bsi_insert_on_edge (e, stmt);
first_copy = false;
}
else
bsi_insert_on_edge (e, lhd_unsave_expr_now (stmt));
}
}
}
if (TREE_CODE (index) != INTEGER_CST)
return false;
/* Watch out for stupid user tricks, indexing outside the array.
/* Append a new assignment statement to TSI. */
Careful, we're not called only on scalarizable types, so do not
assume constant array bounds. We needn't do anything with such
cases, since they'll be referring to objects that we should have
already rejected for scalarization, so returning false is fine. */
static tree
csc_assign (tree_stmt_iterator *tsi, tree lhs, tree rhs)
dom = TYPE_DOMAIN (TREE_TYPE (TREE_OPERAND (expr, 0)));
if (dom == NULL)
return false;
t = TYPE_MIN_VALUE (dom);
if (!t || TREE_CODE (t) != INTEGER_CST)
return false;
if (tree_int_cst_lt (index, t))
return false;
t = TYPE_MAX_VALUE (dom);
if (!t || TREE_CODE (t) != INTEGER_CST)
return false;
if (tree_int_cst_lt (t, index))
return false;
return true;
}
/* Create or return the SRA_ELT structure for EXPR if the expression
refers to a scalarizable variable. */
static struct sra_elt *
maybe_lookup_element_for_expr (tree expr)
{
tree stmt = build (MODIFY_EXPR, TREE_TYPE (lhs), lhs, rhs);
modify_stmt (stmt);
tsi_link_after (tsi, stmt, TSI_NEW_STMT);
return stmt;
struct sra_elt *elt;
tree child;
switch (TREE_CODE (expr))
{
case VAR_DECL:
case PARM_DECL:
case RESULT_DECL:
if (is_sra_candidate_decl (expr))
return lookup_element (NULL, expr, TREE_TYPE (expr), INSERT);
return NULL;
case ARRAY_REF:
/* We can't scalarize variable array indicies. */
if (is_valid_const_index (expr))
child = TREE_OPERAND (expr, 1);
else
return NULL;
break;
case COMPONENT_REF:
/* Don't look through unions. */
if (TREE_CODE (TREE_TYPE (TREE_OPERAND (expr, 0))) != RECORD_TYPE)
return NULL;
child = TREE_OPERAND (expr, 1);
break;
case REALPART_EXPR:
child = integer_zero_node;
break;
case IMAGPART_EXPR:
child = integer_one_node;
break;
default:
return NULL;
}
elt = maybe_lookup_element_for_expr (TREE_OPERAND (expr, 0));
if (elt)
return lookup_element (elt, child, TREE_TYPE (expr), INSERT);
return NULL;
}
/* Functions to walk just enough of the tree to see all scalarizable
references, and categorize them. */
/* A set of callbacks for phases 2 and 4. They'll be invoked for the
various kinds of references seen. In all cases, *BSI is an iterator
pointing to the statement being processed. */
struct sra_walk_fns
{
/* Invoked when ELT is required as a unit. Note that ELT might refer to
a leaf node, in which case this is a simple scalar reference. *EXPR_P
points to the location of the expression. IS_OUTPUT is true if this
is a left-hand-side reference. */
void (*use) (struct sra_elt *elt, tree *expr_p,
block_stmt_iterator *bsi, bool is_output);
/* Invoked when we have a copy between two scalarizable references. */
void (*copy) (struct sra_elt *lhs_elt, struct sra_elt *rhs_elt,
block_stmt_iterator *bsi);
/* Invoked when ELT is initialized from a constant. VALUE may be NULL,
in which case it should be treated as an empty CONSTRUCTOR. */
void (*init) (struct sra_elt *elt, tree value, block_stmt_iterator *bsi);
/* Invoked when we have a copy between one scalarizable reference ELT
and one non-scalarizable reference OTHER. IS_OUTPUT is true if ELT
is on the left-hand side. */
void (*ldst) (struct sra_elt *elt, tree other,
block_stmt_iterator *bsi, bool is_output);
/* True during phase 2, false during phase 4. */
/* ??? This is a hack. */
bool initial_scan;
};
#ifdef ENABLE_CHECKING
/* Invoked via walk_tree, if *TP contains an candidate decl, return it. */
static tree
sra_find_candidate_decl (tree *tp, int *walk_subtrees,
void *data ATTRIBUTE_UNUSED)
{
tree t = *tp;
enum tree_code code = TREE_CODE (t);
if (code == VAR_DECL || code == PARM_DECL || code == RESULT_DECL)
{
*walk_subtrees = 0;
if (is_sra_candidate_decl (t))
return t;
}
else if (TYPE_P (t))
*walk_subtrees = 0;
return NULL;
}
#endif
/* Walk most expressions looking for a scalarizable aggregate.
If we find one, invoke FNS->USE. */
static void
sra_walk_expr (tree *expr_p, block_stmt_iterator *bsi, bool is_output,
const struct sra_walk_fns *fns)
{
tree expr = *expr_p;
tree inner = expr;
/* We're looking to collect a reference expression between EXPR and INNER,
such that INNER is a scalarizable decl and all other nodes through EXPR
are references that we can scalarize. If we come across something that
we can't scalarize, we reset EXPR. This has the effect of making it
appear that we're referring to the larger expression as a whole. */
while (1)
switch (TREE_CODE (inner))
{
case VAR_DECL:
case PARM_DECL:
case RESULT_DECL:
/* If there is a scalarizable decl at the bottom, then process it. */
if (is_sra_candidate_decl (inner))
{
struct sra_elt *elt = maybe_lookup_element_for_expr (expr);
fns->use (elt, expr_p, bsi, is_output);
}
return;
case ARRAY_REF:
/* Non-constant index means any member may be accessed. Prevent the
expression from being scalarized. If we were to treat this as a
reference to the whole array, we can wind up with a single dynamic
index reference inside a loop being overridden by several constant
index references during loop setup. It's possible that this could
be avoided by using dynamic usage counts based on BB trip counts
(based on loop analysis or profiling), but that hardly seems worth
the effort. */
/* ??? Hack. Figure out how to push this into the scan routines
without duplicating too much code. */
if (!is_valid_const_index (inner))
{
if (fns->initial_scan)
{
struct sra_elt *elt
= maybe_lookup_element_for_expr (TREE_OPERAND (inner, 0));
if (elt)
elt->cannot_scalarize = true;
}
return;
}
/* ??? Are we assured that non-constant bounds and stride will have
the same value everywhere? I don't think Fortran will... */
if (TREE_OPERAND (inner, 2) || TREE_OPERAND (inner, 3))
goto use_all;
inner = TREE_OPERAND (inner, 0);
break;
case COMPONENT_REF:
/* A reference to a union member constitutes a reference to the
entire union. */
if (TREE_CODE (TREE_TYPE (TREE_OPERAND (inner, 0))) != RECORD_TYPE)
goto use_all;
/* ??? See above re non-constant stride. */
if (TREE_OPERAND (inner, 2))
goto use_all;
inner = TREE_OPERAND (inner, 0);
break;
case REALPART_EXPR:
case IMAGPART_EXPR:
inner = TREE_OPERAND (inner, 0);
break;
case BIT_FIELD_REF:
/* A bit field reference (access to *multiple* fields simultaneously)
is not currently scalarized. Consider this an access to the
complete outer element, to which walk_tree will bring us next. */
goto use_all;
case ARRAY_RANGE_REF:
/* Similarly, an subrange reference is used to modify indexing. Which
means that the canonical element names that we have won't work. */
goto use_all;
case VIEW_CONVERT_EXPR:
case NOP_EXPR:
/* Similarly, a view/nop explicitly wants to look at an object in a
type other than the one we've scalarized. */
goto use_all;
use_all:
expr_p = &TREE_OPERAND (inner, 0);
inner = expr = *expr_p;
break;
default:
#ifdef ENABLE_CHECKING
/* Validate that we're not missing any references. */
if (walk_tree (&inner, sra_find_candidate_decl, NULL, NULL))
abort ();
#endif
return;
}
}
/* Walk a TREE_LIST of values looking for scalarizable aggregates.
If we find one, invoke FNS->USE. */
static void
sra_walk_tree_list (tree list, block_stmt_iterator *bsi, bool is_output,
const struct sra_walk_fns *fns)
{
tree op;
for (op = list; op ; op = TREE_CHAIN (op))
sra_walk_expr (&TREE_VALUE (op), bsi, is_output, fns);
}
/* Walk the arguments of a CALL_EXPR looking for scalarizable aggregates.
If we find one, invoke FNS->USE. */
static void
sra_walk_call_expr (tree expr, block_stmt_iterator *bsi,
const struct sra_walk_fns *fns)
{
sra_walk_tree_list (TREE_OPERAND (expr, 1), bsi, false, fns);
}
/* Walk the inputs and outputs of an ASM_EXPR looking for scalarizable
aggregates. If we find one, invoke FNS->USE. */
static void
sra_walk_asm_expr (tree expr, block_stmt_iterator *bsi,
const struct sra_walk_fns *fns)
{
sra_walk_tree_list (ASM_INPUTS (expr), bsi, false, fns);
sra_walk_tree_list (ASM_OUTPUTS (expr), bsi, true, fns);
}
/* Walk a MODIFY_EXPR and categorize the assignment appropriately. */
static void
sra_walk_modify_expr (tree expr, block_stmt_iterator *bsi,
const struct sra_walk_fns *fns)
{
struct sra_elt *lhs_elt, *rhs_elt;
tree lhs, rhs;
lhs = TREE_OPERAND (expr, 0);
rhs = TREE_OPERAND (expr, 1);
lhs_elt = maybe_lookup_element_for_expr (lhs);
rhs_elt = maybe_lookup_element_for_expr (rhs);
/* If both sides are scalarizable, this is a COPY operation. */
if (lhs_elt && rhs_elt)
{
fns->copy (lhs_elt, rhs_elt, bsi);
return;
}
if (lhs_elt)
{
/* If this is an assignment from a constant, or constructor, then
we have access to all of the elements individually. Invoke INIT. */
if (TREE_CODE (rhs) == COMPLEX_EXPR
|| TREE_CODE (rhs) == COMPLEX_CST
|| TREE_CODE (rhs) == CONSTRUCTOR)
fns->init (lhs_elt, rhs, bsi);
/* If this is an assignment from read-only memory, treat this as if
we'd been passed the constructor directly. Invoke INIT. */
else if (TREE_CODE (rhs) == VAR_DECL
&& TREE_STATIC (rhs)
&& TREE_READONLY (rhs)
&& targetm.binds_local_p (rhs))
{
if (DECL_INITIAL (rhs) != error_mark_node)
fns->init (lhs_elt, DECL_INITIAL (rhs), bsi);
}
/* If this is a copy from a non-scalarizable lvalue, invoke LDST.
The lvalue requirement prevents us from trying to directly scalarize
the result of a function call. Which would result in trying to call
the function multiple times, and other evil things. */
else if (!lhs_elt->is_scalar && is_gimple_addr_expr_arg (rhs))
fns->ldst (lhs_elt, rhs, bsi, true);
/* Otherwise we're being used in some context that requires the
aggregate to be seen as a whole. Invoke USE. */
else
fns->use (lhs_elt, &TREE_OPERAND (expr, 0), bsi, true);
}
else
{
/* LHS_ELT being null only means that the LHS as a whole is not a
scalarizable reference. There may be occurrences of scalarizable
variables within, which implies a USE. */
sra_walk_expr (&TREE_OPERAND (expr, 0), bsi, true, fns);
}
/* Likewise for the right-hand side. The only difference here is that
we don't have to handle constants, and the RHS may be a call. */
if (rhs_elt)
{
if (!rhs_elt->is_scalar)
fns->ldst (rhs_elt, lhs, bsi, false);
else
fns->use (rhs_elt, &TREE_OPERAND (expr, 1), bsi, false);
}
else if (TREE_CODE (rhs) == CALL_EXPR)
sra_walk_call_expr (rhs, bsi, fns);
else
sra_walk_expr (&TREE_OPERAND (expr, 1), bsi, false, fns);
}
/* Entry point to the walk functions. Search the entire function,
invoking the callbacks in FNS on each of the references to
scalarizable variables. */
static void
sra_walk_function (const struct sra_walk_fns *fns)
{
basic_block bb;
block_stmt_iterator si;
/* ??? Phase 4 could derive some benefit to walking the function in
dominator tree order. */
FOR_EACH_BB (bb)
for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
{
tree stmt, t;
stmt_ann_t ann;
stmt = bsi_stmt (si);
ann = stmt_ann (stmt);
/* If the statement has no virtual operands, then it doesn't
make any structure references that we care about. */
if (NUM_V_MAY_DEFS (V_MAY_DEF_OPS (ann)) == 0
&& NUM_VUSES (VUSE_OPS (ann)) == 0
&& NUM_V_MUST_DEFS (V_MUST_DEF_OPS (ann)) == 0)
continue;
switch (TREE_CODE (stmt))
{
case RETURN_EXPR:
/* If we have "return <retval>" then the return value is
already exposed for our pleasure. Walk it as a USE to
force all the components back in place for the return.
If we have an embedded assignment, then <retval> is of
a type that gets returned in registers in this ABI, and
we do not wish to extend their lifetimes. Treat this
as a USE of the variable on the RHS of this assignment. */
t = TREE_OPERAND (stmt, 0);
if (TREE_CODE (t) == MODIFY_EXPR)
sra_walk_expr (&TREE_OPERAND (t, 1), &si, false, fns);
else
sra_walk_expr (&TREE_OPERAND (stmt, 0), &si, false, fns);
break;
case MODIFY_EXPR:
sra_walk_modify_expr (stmt, &si, fns);
break;
case CALL_EXPR:
sra_walk_call_expr (stmt, &si, fns);
break;
case ASM_EXPR:
sra_walk_asm_expr (stmt, &si, fns);
break;
default:
break;
}
}
}
/* Phase One: Scan all referenced variables in the program looking for
structures that could be decomposed. */
static bool
find_candidates_for_sra (void)
{
size_t i;
bool any_set = false;
for (i = 0; i < num_referenced_vars; i++)
{
tree var = referenced_var (i);
if (decl_can_be_decomposed_p (var))
{
bitmap_set_bit (sra_candidates, var_ann (var)->uid);
any_set = true;
}
}
return any_set;
}
/* Phase Two: Scan all references to scalarizable variables. Count the
number of times they are used or copied respectively. */
/* Callbacks to fill in SRA_WALK_FNS. Everything but USE is
considered a copy, because we can decompose the reference such that
the sub-elements needn't be contiguous. */
static void
scan_use (struct sra_elt *elt, tree *expr_p ATTRIBUTE_UNUSED,
block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
bool is_output ATTRIBUTE_UNUSED)
{
elt->n_uses += 1;
}
static void
scan_copy (struct sra_elt *lhs_elt, struct sra_elt *rhs_elt,
block_stmt_iterator *bsi ATTRIBUTE_UNUSED)
{
lhs_elt->n_copies += 1;
rhs_elt->n_copies += 1;
}
static void
scan_init (struct sra_elt *lhs_elt, tree rhs ATTRIBUTE_UNUSED,
block_stmt_iterator *bsi ATTRIBUTE_UNUSED)
{
lhs_elt->n_copies += 1;
}
static void
scan_ldst (struct sra_elt *elt, tree other ATTRIBUTE_UNUSED,
block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
bool is_output ATTRIBUTE_UNUSED)
{
elt->n_copies += 1;
}
/* Dump the values we collected during the scanning phase. */
static void
scan_dump (struct sra_elt *elt)
{
struct sra_elt *c;
dump_sra_elt_name (dump_file, elt);
fprintf (dump_file, ": n_uses=%u n_copies=%u\n", elt->n_uses, elt->n_copies);
for (c = elt->children; c ; c = c->sibling)
scan_dump (c);
}
/* Entry point to phase 2. Scan the entire function, building up
scalarization data structures, recording copies and uses. */
static void
scan_function (void)
{
static const struct sra_walk_fns fns = {
scan_use, scan_copy, scan_init, scan_ldst, true
};
sra_walk_function (&fns);
if (dump_file && (dump_flags & TDF_DETAILS))
{
size_t i;
fputs ("\nScan results:\n", dump_file);
EXECUTE_IF_SET_IN_BITMAP (sra_candidates, 0, i,
{
tree var = referenced_var (i);
struct sra_elt *elt = lookup_element (NULL, var, NULL, NO_INSERT);
if (elt)
scan_dump (elt);
});
fputc ('\n', dump_file);
}
}
/* Phase Three: Make decisions about which variables to scalarize, if any.
All elements to be scalarized have replacement variables made for them. */
/* A subroutine of build_element_name. Recursively build the element
name on the obstack. */
static void
build_element_name_1 (struct sra_elt *elt)
{
tree t;
char buffer[32];
if (elt->parent)
{
build_element_name_1 (elt->parent);
obstack_1grow (&sra_obstack, '$');
if (TREE_CODE (elt->parent->type) == COMPLEX_TYPE)
{
if (elt->element == integer_zero_node)
obstack_grow (&sra_obstack, "real", 4);
else
obstack_grow (&sra_obstack, "imag", 4);
return;
}
}
t = elt->element;
if (TREE_CODE (t) == INTEGER_CST)
{
/* ??? Eh. Don't bother doing double-wide printing. */
sprintf (buffer, HOST_WIDE_INT_PRINT_DEC, TREE_INT_CST_LOW (t));
obstack_grow (&sra_obstack, buffer, strlen (buffer));
}
else
{
tree name = DECL_NAME (t);
if (name)
obstack_grow (&sra_obstack, IDENTIFIER_POINTER (name),
IDENTIFIER_LENGTH (name));
else
{
sprintf (buffer, "D%u", DECL_UID (t));
obstack_grow (&sra_obstack, buffer, strlen (buffer));
}
}
}
/* Construct a pretty variable name for an element's replacement variable.
The name is built on the obstack. */
static char *
build_element_name (struct sra_elt *elt)
{
build_element_name_1 (elt);
obstack_1grow (&sra_obstack, '\0');
return obstack_finish (&sra_obstack);
}
/* Instantiate an element as an independent variable. */
static void
instantiate_element (struct sra_elt *elt)
{
struct sra_elt *base_elt;
tree var, base;
for (base_elt = elt; base_elt->parent; base_elt = base_elt->parent)
continue;
base = base_elt->element;
elt->replacement = var = make_rename_temp (elt->type, "SR");
DECL_SOURCE_LOCATION (var) = DECL_SOURCE_LOCATION (base);
TREE_NO_WARNING (var) = TREE_NO_WARNING (base);
DECL_ARTIFICIAL (var) = DECL_ARTIFICIAL (base);
if (DECL_NAME (base) && !DECL_IGNORED_P (base))
{
char *pretty_name = build_element_name (elt);
DECL_NAME (var) = get_identifier (pretty_name);
obstack_free (&sra_obstack, pretty_name);
}
if (dump_file)
{
fputs (" ", dump_file);
dump_sra_elt_name (dump_file, elt);
fputs (" -> ", dump_file);
print_generic_expr (dump_file, var, dump_flags);
fputc ('\n', dump_file);
}
}
/* Make one pass across an element tree deciding whether or not it's
profitable to instantiate individual leaf scalars.
PARENT_USES and PARENT_COPIES are the sum of the N_USES and N_COPIES
fields all the way up the tree. */
static void
decide_instantiation_1 (struct sra_elt *elt, unsigned int parent_uses,
unsigned int parent_copies)
{
if (dump_file && !elt->parent)
{
fputs ("Initial instantiation for ", dump_file);
dump_sra_elt_name (dump_file, elt);
fputc ('\n', dump_file);
}
if (elt->cannot_scalarize)
return;
if (elt->is_scalar)
{
/* The decision is simple: instantiate if we're used more frequently
than the parent needs to be seen as a complete unit. */
if (elt->n_uses + elt->n_copies + parent_copies > parent_uses)
instantiate_element (elt);
}
else
{
struct sra_elt *c;
unsigned int this_uses = elt->n_uses + parent_uses;
unsigned int this_copies = elt->n_copies + parent_copies;
for (c = elt->children; c ; c = c->sibling)
decide_instantiation_1 (c, this_uses, this_copies);
}
}
/* Compute the size and number of all instantiated elements below ELT.
We will only care about this if the size of the complete structure
fits in a HOST_WIDE_INT, so we don't have to worry about overflow. */
static unsigned int
sum_instantiated_sizes (struct sra_elt *elt, unsigned HOST_WIDE_INT *sizep)
{
if (elt->replacement)
{
*sizep += TREE_INT_CST_LOW (TYPE_SIZE_UNIT (elt->type));
return 1;
}
else
{
struct sra_elt *c;
unsigned int count = 0;
for (c = elt->children; c ; c = c->sibling)
count += sum_instantiated_sizes (c, sizep);
return count;
}
}
/* Instantiate fields in ELT->TYPE that are not currently present as
children of ELT. */
static void instantiate_missing_elements (struct sra_elt *elt);
static void
instantiate_missing_elements_1 (struct sra_elt *elt, tree child, tree type)
{
struct sra_elt *sub = lookup_element (elt, child, type, INSERT);
if (sub->is_scalar)
{
if (sub->replacement == NULL)
instantiate_element (sub);
}
else
instantiate_missing_elements (sub);
}
static void
instantiate_missing_elements (struct sra_elt *elt)
{
tree type = elt->type;
switch (TREE_CODE (type))
{
case RECORD_TYPE:
{
tree f;
for (f = TYPE_FIELDS (type); f ; f = TREE_CHAIN (f))
if (TREE_CODE (f) == FIELD_DECL)
instantiate_missing_elements_1 (elt, f, TREE_TYPE (f));
break;
}
case ARRAY_TYPE:
{
tree i, max, subtype;
i = TYPE_MIN_VALUE (TYPE_DOMAIN (type));
max = TYPE_MAX_VALUE (TYPE_DOMAIN (type));
subtype = TREE_TYPE (type);
while (1)
{
instantiate_missing_elements_1 (elt, i, subtype);
if (tree_int_cst_equal (i, max))
break;
i = int_const_binop (PLUS_EXPR, i, integer_one_node, true);
}
break;
}
case COMPLEX_TYPE:
type = TREE_TYPE (type);
instantiate_missing_elements_1 (elt, integer_zero_node, type);
instantiate_missing_elements_1 (elt, integer_one_node, type);
break;
default:
abort ();
}
}
/* Make one pass across an element tree deciding whether to perform block
or element copies. If we decide on element copies, instantiate all
elements. Return true if there are any instantiated sub-elements. */
static bool
decide_block_copy (struct sra_elt *elt)
{
struct sra_elt *c;
bool any_inst;
/* If scalarization is disabled, respect it. */
if (elt->cannot_scalarize)
{
elt->use_block_copy = 1;
if (dump_file)
{
fputs ("Scalarization disabled for ", dump_file);
dump_sra_elt_name (dump_file, elt);
fputc ('\n', dump_file);
}
return false;
}
/* Don't decide if we've no uses. */
if (elt->n_uses == 0 && elt->n_copies == 0)
;
else if (!elt->is_scalar)
{
tree size_tree = TYPE_SIZE_UNIT (elt->type);
bool use_block_copy = true;
/* Don't bother trying to figure out the rest if the structure is
so large we can't do easy arithmetic. This also forces block
copies for variable sized structures. */
if (host_integerp (size_tree, 1))
{
unsigned HOST_WIDE_INT full_size, inst_size = 0;
unsigned int inst_count;
full_size = tree_low_cst (size_tree, 1);
/* ??? What to do here. If there are two fields, and we've only
instantiated one, then instantiating the other is clearly a win.
If there are a large number of fields then the size of the copy
is much more of a factor. */
/* If the structure is small, and we've made copies, go ahead
and instantiate, hoping that the copies will go away. */
if (full_size <= (unsigned) MOVE_RATIO * UNITS_PER_WORD
&& elt->n_copies > elt->n_uses)
use_block_copy = false;
else
{
inst_count = sum_instantiated_sizes (elt, &inst_size);
if (inst_size * 4 >= full_size * 3)
use_block_copy = false;
}
/* In order to avoid block copy, we have to be able to instantiate
all elements of the type. See if this is possible. */
if (!use_block_copy
&& (!can_completely_scalarize_p (elt)
|| !type_can_instantiate_all_elements (elt->type)))
use_block_copy = true;
}
elt->use_block_copy = use_block_copy;
if (dump_file)
{
fprintf (dump_file, "Using %s for ",
use_block_copy ? "block-copy" : "element-copy");
dump_sra_elt_name (dump_file, elt);
fputc ('\n', dump_file);
}
if (!use_block_copy)
{
instantiate_missing_elements (elt);
return true;
}
}
any_inst = elt->replacement != NULL;
for (c = elt->children; c ; c = c->sibling)
any_inst |= decide_block_copy (c);
return any_inst;
}
/* Entry point to phase 3. Instantiate scalar replacement variables. */
static void
decide_instantiations (void)
{
unsigned int i;
bool cleared_any;
struct bitmap_head_def done_head;
/* We cannot clear bits from a bitmap we're iterating over,
so save up all the bits to clear until the end. */
bitmap_initialize (&done_head, 1);
cleared_any = false;
EXECUTE_IF_SET_IN_BITMAP (sra_candidates, 0, i,
{
tree var = referenced_var (i);
struct sra_elt *elt = lookup_element (NULL, var, NULL, NO_INSERT);
if (elt)
{
decide_instantiation_1 (elt, 0, 0);
if (!decide_block_copy (elt))
elt = NULL;
}
if (!elt)
{
bitmap_set_bit (&done_head, i);
cleared_any = true;
}
});
if (cleared_any)
{
bitmap_operation (sra_candidates, sra_candidates, &done_head,
BITMAP_AND_COMPL);
bitmap_operation (needs_copy_in, needs_copy_in, &done_head,
BITMAP_AND_COMPL);
}
bitmap_clear (&done_head);
if (dump_file)
fputc ('\n', dump_file);
}
/* Phase Four: Update the function to match the replacements created. */
/* Mark all the variables in V_MAY_DEF or V_MUST_DEF operands for STMT for
renaming. This becomes necessary when we modify all of a non-scalar. */
static void
mark_all_v_defs (tree stmt)
{
v_may_def_optype v_may_defs;
v_must_def_optype v_must_defs;
size_t i, n;
get_stmt_operands (stmt);
v_may_defs = V_MAY_DEF_OPS (stmt_ann (stmt));
n = NUM_V_MAY_DEFS (v_may_defs);
for (i = 0; i < n; i++)
{
tree sym = V_MAY_DEF_RESULT (v_may_defs, i);
if (TREE_CODE (sym) == SSA_NAME)
sym = SSA_NAME_VAR (sym);
bitmap_set_bit (vars_to_rename, var_ann (sym)->uid);
}
v_must_defs = V_MUST_DEF_OPS (stmt_ann (stmt));
n = NUM_V_MUST_DEFS (v_must_defs);
for (i = 0; i < n; i++)
{
tree sym = V_MUST_DEF_OP (v_must_defs, i);
if (TREE_CODE (sym) == SSA_NAME)
sym = SSA_NAME_VAR (sym);
bitmap_set_bit (vars_to_rename, var_ann (sym)->uid);
}
}
/* A subroutine of create_scalar_copies. Construct a COMPONENT_REF
expression for BASE referencing FIELD. INDEX is the field index. */
/* Build a single level component reference to ELT rooted at BASE. */
static tree
csc_build_component_ref (tree base, tree field)
generate_one_element_ref (struct sra_elt *elt, tree base)
{
switch (TREE_CODE (base))
switch (TREE_CODE (TREE_TYPE (base)))
{
case CONSTRUCTOR:
/* Only appears on RHS. The only remaining CONSTRUCTORS for
record types that should remain are empty, and imply that
the entire structure should be zeroed. */
if (CONSTRUCTOR_ELTS (base))
abort ();
return fold_convert (TREE_TYPE (field), integer_zero_node);
case RECORD_TYPE:
return build (COMPONENT_REF, elt->type, base, elt->element, NULL);
default:
/* Avoid sharing BASE when building the different COMPONENT_REFs.
We let the first field have the original version. */
if (field != TYPE_FIELDS (TREE_TYPE (base)))
base = unshare_expr (base);
break;
case ARRAY_TYPE:
return build (ARRAY_REF, elt->type, base, elt->element, NULL, NULL);
case VAR_DECL:
case PARM_DECL:
/* Special case for the above -- decls are always shared. */
break;
}
case COMPLEX_TYPE:
if (elt->element == integer_zero_node)
return build (REALPART_EXPR, elt->type, base);
else
return build (IMAGPART_EXPR, elt->type, base);
return build (COMPONENT_REF, TREE_TYPE (field), base, field, NULL_TREE);
default:
abort ();
}
}
/* Similarly for REALPART_EXPR and IMAGPART_EXPR for complex types. */
/* Build a full component reference to ELT rooted at its native variable. */
static tree
csc_build_complex_part (tree base, enum tree_code part)
generate_element_ref (struct sra_elt *elt)
{
if (elt->parent)
return generate_one_element_ref (elt, generate_element_ref (elt->parent));
else
return elt->element;
}
/* Generate a set of assignment statements in *LIST_P to copy all
instantiated elements under ELT to or from the equivalent structure
rooted at EXPR. COPY_OUT controls the direction of the copy, with
true meaning to copy out of EXPR into ELT. */
static void
generate_copy_inout (struct sra_elt *elt, bool copy_out, tree expr,
tree *list_p)
{
switch (TREE_CODE (base))
struct sra_elt *c;
tree t;
if (elt->replacement)
{
case COMPLEX_CST:
if (part == REALPART_EXPR)
return TREE_REALPART (base);
if (copy_out)
t = build (MODIFY_EXPR, void_type_node, elt->replacement, expr);
else
return TREE_IMAGPART (base);
t = build (MODIFY_EXPR, void_type_node, expr, elt->replacement);
append_to_statement_list (t, list_p);
}
else
{
for (c = elt->children; c ; c = c->sibling)
{
t = generate_one_element_ref (c, unshare_expr (expr));
generate_copy_inout (c, copy_out, t, list_p);
}
}
}
case COMPLEX_EXPR:
if (part == REALPART_EXPR)
return TREE_OPERAND (base, 0);
else
return TREE_OPERAND (base, 1);
/* Generate a set of assignment statements in *LIST_P to copy all instantiated
elements under SRC to their counterparts under DST. There must be a 1-1
correspondence of instantiated elements. */
default:
/* Avoid sharing BASE when building the different references.
We let the real part have the original version. */
if (part != REALPART_EXPR)
base = unshare_expr (base);
break;
static void
generate_element_copy (struct sra_elt *dst, struct sra_elt *src, tree *list_p)
{
struct sra_elt *dc, *sc;
case VAR_DECL:
case PARM_DECL:
/* Special case for the above -- decls are always shared. */
break;
for (dc = dst->children; dc ; dc = dc->sibling)
{
sc = lookup_element (src, dc->element, NULL, NO_INSERT);
if (sc == NULL)
abort ();
generate_element_copy (dc, sc, list_p);
}
return build1 (part, TREE_TYPE (TREE_TYPE (base)), base);
}
/* Create and return a list of assignments to perform a scalarized
structure assignment 'LHS = RHS'. Both LHS and RHS are assumed to be
of an aggregate or complex type. Three types of copies may be specified:
if (dst->replacement)
{
tree t;
SCALAR_SCALAR will emit assignments for all the scalar temporaries
corresponding to the fields of LHS and RHS.
if (src->replacement == NULL)
abort ();
FIELD_SCALAR will emit assignments from the scalar replacements of
RHS into each of the fields of the LHS.
t = build (MODIFY_EXPR, void_type_node, dst->replacement,
src->replacement);
append_to_statement_list (t, list_p);
}
}
SCALAR_FIELD will emit assignments from each field of the RHS into
the scalar replacements of the LHS. */
/* Generate a set of assignment statements in *LIST_P to zero all instantiated
elements under ELT. In addition, do not assign to elements that have been
marked VISITED but do reset the visited flag; this allows easy coordination
with generate_element_init. */
static tree
create_scalar_copies (tree lhs, tree rhs, enum sra_copy_mode mode)
static void
generate_element_zero (struct sra_elt *elt, tree *list_p)
{
tree type, list;
tree_stmt_iterator tsi;
#if defined ENABLE_CHECKING
/* Sanity checking. Check that we are not trying to scalarize a
non-decl. */
if (!DECL_P (lhs) && (mode == SCALAR_FIELD || mode == SCALAR_SCALAR))
abort ();
if (!DECL_P (rhs) && (mode == FIELD_SCALAR || mode == SCALAR_SCALAR))
abort ();
#endif
struct sra_elt *c;
type = TREE_TYPE (lhs);
list = alloc_stmt_list ();
tsi = tsi_start (list);
for (c = elt->children; c ; c = c->sibling)
generate_element_zero (c, list_p);
/* VA_ARG_EXPRs have side effects, so we need to copy it first to a
temporary before scalarizing. FIXME: This should disappear once
VA_ARG_EXPRs are properly lowered. */
if (TREE_CODE (rhs) == VA_ARG_EXPR)
if (elt->visited)
elt->visited = false;
else if (elt->replacement)
{
tree stmt, tmp;
/* Add TMP = VA_ARG_EXPR <> */
tmp = make_rename_temp (TREE_TYPE (rhs), NULL);
stmt = csc_assign (&tsi, tmp, rhs);
tree t;
/* Mark all the variables in VDEF operands for renaming, because
the VA_ARG_EXPR will now be in a different statement. */
mark_all_v_may_defs (stmt);
mark_all_v_must_defs (stmt);
if (elt->is_scalar)
t = fold_convert (elt->type, integer_zero_node);
else
/* We generated a replacement for a non-scalar? */
abort ();
/* Set RHS to be the new temporary TMP. */
rhs = tmp;
t = build (MODIFY_EXPR, void_type_node, elt->replacement, t);
append_to_statement_list (t, list_p);
}
}
/* When making *_SCALAR copies from PARM_DECLs, we will need to insert
copy-in operations from the original PARM_DECLs. Note that these
copy-in operations may end up being dead, but we won't know until
we rename the new variables into SSA. */
if ((mode == SCALAR_SCALAR || mode == FIELD_SCALAR)
&& TREE_CODE (rhs) == PARM_DECL)
bitmap_set_bit (needs_copy_in, var_ann (rhs)->uid);
/* Generate a set of assignment statements in *LIST_P to set all instantiated
elements under ELT with the contents of the initializer INIT. In addition,
mark all assigned elements VISITED; this allows easy coordination with
generate_element_zero. */
/* Now create scalar copies for each individual field according to MODE. */
if (TREE_CODE (type) == COMPLEX_TYPE)
{
/* Create scalar copies of both the real and imaginary parts. */
tree real_lhs, real_rhs, imag_lhs, imag_rhs;
static void
generate_element_init (struct sra_elt *elt, tree init, tree *list_p)
{
enum tree_code init_code = TREE_CODE (init);
struct sra_elt *sub;
tree t;
if (mode == SCALAR_FIELD)
{
real_rhs = csc_build_complex_part (rhs, REALPART_EXPR);
imag_rhs = csc_build_complex_part (rhs, IMAGPART_EXPR);
}
else
if (elt->is_scalar)
{
if (elt->replacement)
{
real_rhs = get_scalar_for_complex_part (rhs, REALPART_EXPR);
imag_rhs = get_scalar_for_complex_part (rhs, IMAGPART_EXPR);
t = build (MODIFY_EXPR, void_type_node, elt->replacement, init);
append_to_statement_list (t, list_p);
elt->visited = true;
}
return;
}
if (mode == FIELD_SCALAR)
switch (init_code)
{
case COMPLEX_CST:
case COMPLEX_EXPR:
for (sub = elt->children; sub ; sub = sub->sibling)
{
/* In this case we do not need to create but one statement,
since we can create a new complex value whole. */
if (TREE_CONSTANT (real_rhs) && TREE_CONSTANT (imag_rhs))
rhs = build_complex (type, real_rhs, imag_rhs);
if (sub->element == integer_zero_node)
t = (init_code == COMPLEX_EXPR
? TREE_OPERAND (init, 0) : TREE_REALPART (init));
else
rhs = build (COMPLEX_EXPR, type, real_rhs, imag_rhs);
csc_assign (&tsi, lhs, rhs);
}
else
{
real_lhs = get_scalar_for_complex_part (lhs, REALPART_EXPR);
imag_lhs = get_scalar_for_complex_part (lhs, IMAGPART_EXPR);
csc_assign (&tsi, real_lhs, real_rhs);
csc_assign (&tsi, imag_lhs, imag_rhs);
t = (init_code == COMPLEX_EXPR
? TREE_OPERAND (init, 1) : TREE_IMAGPART (init));
generate_element_init (sub, t, list_p);
}
}
else
{
tree lf, rf;
/* ??? C++ generates copies between different pointer-to-member
structures of different types. To combat this, we must track
the field of both the left and right structures, so that we
index the variables with fields of their own type. */
break;
for (lf = TYPE_FIELDS (type), rf = TYPE_FIELDS (TREE_TYPE (rhs));
lf;
lf = TREE_CHAIN (lf), rf = TREE_CHAIN (rf))
case CONSTRUCTOR:
for (t = CONSTRUCTOR_ELTS (init); t ; t = TREE_CHAIN (t))
{
tree lhs_var, rhs_var;
/* Only copy FIELD_DECLs. */
if (TREE_CODE (lf) != FIELD_DECL)
sub = lookup_element (elt, TREE_PURPOSE (t), NULL, NO_INSERT);
if (sub == NULL)
continue;
generate_element_init (sub, TREE_VALUE (t), list_p);
}
break;
if (mode == FIELD_SCALAR)
lhs_var = csc_build_component_ref (lhs, lf);
else
lhs_var = get_scalar_for_field (lhs, lf);
default:
abort ();
}
}
if (mode == SCALAR_FIELD)
rhs_var = csc_build_component_ref (rhs, rf);
else
rhs_var = get_scalar_for_field (rhs, rf);
/* Insert STMT on all the outgoing edges out of BB. Note that if BB
has more than one edge, STMT will be replicated for each edge. Also,
abnormal edges will be ignored. */
csc_assign (&tsi, lhs_var, rhs_var);
}
}
void
insert_edge_copies (tree stmt, basic_block bb)
{
edge e;
bool first_copy;
/* All the scalar copies just created will either create new definitions
or remove existing definitions of LHS, so we need to mark it for
renaming. */
if (TREE_SIDE_EFFECTS (list))
first_copy = true;
for (e = bb->succ; e; e = e->succ_next)
{
if (mode == SCALAR_FIELD || mode == SCALAR_SCALAR)
{
/* If the LHS has been scalarized, mark it for renaming. */
bitmap_set_bit (vars_to_rename, var_ann (lhs)->uid);
}
else if (mode == FIELD_SCALAR)
/* We don't need to insert copies on abnormal edges. The
value of the scalar replacement is not guaranteed to
be valid through an abnormal edge. */
if (!(e->flags & EDGE_ABNORMAL))
{
/* Otherwise, mark all the symbols in the VDEFs for the last
scalarized statement just created. Since all the statements
introduce the same VDEFs, we only need to check the last one. */
mark_all_v_may_defs (tsi_stmt (tsi));
mark_all_v_must_defs (tsi_stmt (tsi));
if (first_copy)
{
bsi_insert_on_edge (e, stmt);
first_copy = false;
}
else
bsi_insert_on_edge (e, lhd_unsave_expr_now (stmt));
}
else
abort ();
}
return list;
}
/* A helper function that creates the copies, updates line info,
and emits the code either before or after BSI. */
/* Helper function to insert LIST before BSI, and set up line number info. */
static void
emit_scalar_copies (block_stmt_iterator *bsi, tree lhs, tree rhs,
enum sra_copy_mode mode)
sra_insert_before (block_stmt_iterator *bsi, tree list)
{
tree list = create_scalar_copies (lhs, rhs, mode);
tree stmt = bsi_stmt (*bsi);
if (EXPR_HAS_LOCATION (stmt))
annotate_all_with_locus (&list, EXPR_LOCATION (stmt));
bsi_insert_before (bsi, list, BSI_SAME_STMT);
}
/* Traverse all the statements in the function replacing references to
scalarizable structures with their corresponding scalar temporaries. */
/* Similarly, but insert after BSI. Handles insertion onto edges as well. */
static void
scalarize_structures (void)
sra_insert_after (block_stmt_iterator *bsi, tree list)
{
basic_block bb;
block_stmt_iterator si;
size_t i;
FOR_EACH_BB (bb)
for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
{
tree stmt;
stmt_ann_t ann;
stmt = bsi_stmt (si);
ann = stmt_ann (stmt);
/* If the statement has no virtual operands, then it doesn't make
structure references that we care about. */
if (NUM_V_MAY_DEFS (V_MAY_DEF_OPS (ann)) == 0
&& NUM_VUSES (VUSE_OPS (ann)) == 0
&& NUM_V_MUST_DEFS (V_MUST_DEF_OPS (ann)) == 0)
continue;
/* Structure references may only appear in certain statements. */
if (TREE_CODE (stmt) != MODIFY_EXPR
&& TREE_CODE (stmt) != CALL_EXPR
&& TREE_CODE (stmt) != RETURN_EXPR
&& TREE_CODE (stmt) != ASM_EXPR)
continue;
scalarize_stmt (&si);
}
tree stmt = bsi_stmt (*bsi);
/* Initialize the scalar replacements for every structure that is a
function argument. */
EXECUTE_IF_SET_IN_BITMAP (needs_copy_in, 0, i,
{
tree var = referenced_var (i);
tree list = create_scalar_copies (var, var, SCALAR_FIELD);
bsi_insert_on_edge (ENTRY_BLOCK_PTR->succ, list);
});
if (EXPR_HAS_LOCATION (stmt))
annotate_all_with_locus (&list, EXPR_LOCATION (stmt));
/* Commit edge insertions. */
bsi_commit_edge_inserts (NULL);
if (stmt_ends_bb_p (stmt))
insert_edge_copies (list, bsi->bb);
else
bsi_insert_after (bsi, list, BSI_CONTINUE_LINKING);
}
/* Scalarize structure references in the statement pointed by SI_P. */
/* Similarly, but replace the statement at BSI. */
static void
scalarize_stmt (block_stmt_iterator *si_p)
sra_replace (block_stmt_iterator *bsi, tree list)
{
tree stmt = bsi_stmt (*si_p);
/* Handle assignments. */
if (TREE_CODE (stmt) == MODIFY_EXPR
&& TREE_CODE (TREE_OPERAND (stmt, 1)) != CALL_EXPR)
scalarize_modify_expr (si_p);
/* Handle RETURN_EXPR. */
else if (TREE_CODE (stmt) == RETURN_EXPR)
scalarize_return_expr (si_p);
/* Handle function calls (note that this must be handled after
MODIFY_EXPR and RETURN_EXPR because a function call can appear in
both). */
else if (get_call_expr_in (stmt) != NULL_TREE)
scalarize_call_expr (si_p);
/* Handle ASM_EXPRs. */
else if (TREE_CODE (stmt) == ASM_EXPR)
scalarize_asm_expr (si_p);
sra_insert_before (bsi, list);
bsi_remove (bsi);
if (bsi_end_p (*bsi))
*bsi = bsi_last (bsi->bb);
else
bsi_prev (bsi);
}
/* Helper for scalarize_stmt to handle assignments. */
/* Scalarize a USE. To recap, this is either a simple reference to ELT,
if elt is scalar, or some ocurrence of ELT that requires a complete
aggregate. IS_OUTPUT is true if ELT is being modified. */
static void
scalarize_modify_expr (block_stmt_iterator *si_p)
scalarize_use (struct sra_elt *elt, tree *expr_p, block_stmt_iterator *bsi,
bool is_output)
{
tree stmt = bsi_stmt (*si_p);
tree lhs = TREE_OPERAND (stmt, 0);
tree rhs = TREE_OPERAND (stmt, 1);
tree var = NULL_TREE;
tree list = NULL, stmt = bsi_stmt (*bsi);
/* Found AGGREGATE.FIELD = ... */
if (is_sra_candidate_ref (lhs))
if (elt->replacement)
{
tree sym;
v_may_def_optype v_may_defs;
scalarize_component_ref (stmt, &TREE_OPERAND (stmt, 0));
/* Mark the LHS to be renamed, as we have just removed the previous
V_MAY_DEF for AGGREGATE. The statement should have exactly one
V_MAY_DEF for variable AGGREGATE. */
v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
if (NUM_V_MAY_DEFS (v_may_defs) != 1)
abort ();
sym = SSA_NAME_VAR (V_MAY_DEF_RESULT (v_may_defs, 0));
bitmap_set_bit (vars_to_rename, var_ann (sym)->uid);
/* If we have a replacement, then updating the reference is as
simple as modifying the existing statement in place. */
if (is_output)
mark_all_v_defs (stmt);
*expr_p = elt->replacement;
modify_stmt (stmt);
}
/* Found ... = AGGREGATE.FIELD */
else if (is_sra_candidate_ref (rhs))
scalarize_component_ref (stmt, &TREE_OPERAND (stmt, 1));
/* Found a complex reference nesting involving a candidate decl. This
should only occur if the above condition is false if a BIT_FIELD_REF or
VIEW_CONVERT_EXPR is involved. This is similar to a CALL_EXPR, if the
operand of the BIT_FIELD_REF is a scalarizable structure, we need to
copy from its scalar replacements before doing the bitfield operation.
FIXME: BIT_FIELD_REFs are often generated by fold-const.c. This is
not always desirable because they obfuscate the original predicates,
limiting what the tree optimizers may do. For instance, in
testsuite/g++.dg/opt/nrv4.C the use of SRA allows the optimizers to
optimize function main() to 'return 0;'. However, the folder
generates a BIT_FIELD_REF operation for one of the comparisons,
preventing the optimizers from removing all the redundant
operations. */
else if (is_sra_candidate_complex_ref (rhs, &var))
else
{
emit_scalar_copies (si_p, var, var, FIELD_SCALAR);
/* If the LHS of the assignment is also a scalarizable structure, insert
copies into the scalar replacements after the call. */
if (is_sra_candidate_decl (lhs))
/* Otherwise we need some copies. If ELT is being read, then we want
to store all (modified) sub-elements back into the structure before
the reference takes place. If ELT is being written, then we want to
load the changed values back into our shadow variables. */
/* ??? We don't check modified for reads, we just always write all of
the values. We should be able to record the SSA number of the VOP
for which the values were last read. If that number matches the
SSA number of the VOP in the current statement, then we needn't
emit an assignment. This would also eliminate double writes when
a structure is passed as more than one argument to a function call.
This optimization would be most effective if sra_walk_function
processed the blocks in dominator order. */
generate_copy_inout (elt, is_output, generate_element_ref (elt), &list);
if (list == NULL)
return;
if (is_output)
{
tree list = create_scalar_copies (lhs, lhs, SCALAR_FIELD);
if (EXPR_HAS_LOCATION (stmt))
annotate_all_with_locus (&list, EXPR_LOCATION (stmt));
if (stmt_ends_bb_p (stmt))
insert_edge_copies (list, bb_for_stmt (stmt));
else
bsi_insert_after (si_p, list, BSI_NEW_STMT);
mark_all_v_defs (expr_first (list));
sra_insert_after (bsi, list);
}
else
sra_insert_before (bsi, list);
}
/* Found AGGREGATE = ... or ... = AGGREGATE */
else if (DECL_P (lhs) || DECL_P (rhs))
scalarize_structure_assignment (si_p);
}
/* Scalarize a COPY. To recap, this is an assignment statement between
two scalarizable references, LHS_ELT and RHS_ELT. */
/* Scalarize structure references in LIST. Use DONE to avoid duplicates. */
static inline void
scalarize_tree_list (tree list, block_stmt_iterator *si_p, bitmap done)
static void
scalarize_copy (struct sra_elt *lhs_elt, struct sra_elt *rhs_elt,
block_stmt_iterator *bsi)
{
tree op;
tree list, stmt;
for (op = list; op; op = TREE_CHAIN (op))
if (lhs_elt->replacement && rhs_elt->replacement)
{
tree arg = TREE_VALUE (op);
/* If we have two scalar operands, modify the existing statement. */
stmt = bsi_stmt (*bsi);
if (is_sra_candidate_decl (arg))
{
int index = var_ann (arg)->uid;
if (!bitmap_bit_p (done, index))
{
emit_scalar_copies (si_p, arg, arg, FIELD_SCALAR);
bitmap_set_bit (done, index);
}
}
else if (is_sra_candidate_ref (arg))
#ifdef ENABLE_CHECKING
/* See the commentary in sra_walk_function concerning
RETURN_EXPR, and why we should never see one here. */
if (TREE_CODE (stmt) != MODIFY_EXPR)
abort ();
#endif
TREE_OPERAND (stmt, 0) = lhs_elt->replacement;
TREE_OPERAND (stmt, 1) = rhs_elt->replacement;
modify_stmt (stmt);
}
else if (lhs_elt->use_block_copy || rhs_elt->use_block_copy)
{
/* If either side requires a block copy, then sync the RHS back
to the original structure, leave the original assignment
statement (which will perform the block copy), then load the
LHS values out of its now-updated original structure. */
/* ??? Could perform a modified pair-wise element copy. That
would at least allow those elements that are instantiated in
both structures to be optimized well. */
list = NULL;
generate_copy_inout (rhs_elt, false,
generate_element_ref (rhs_elt), &list);
if (list)
{
tree stmt = bsi_stmt (*si_p);
scalarize_component_ref (stmt, &TREE_VALUE (op));
mark_all_v_defs (expr_first (list));
sra_insert_before (bsi, list);
}
list = NULL;
generate_copy_inout (lhs_elt, true,
generate_element_ref (lhs_elt), &list);
if (list)
sra_insert_after (bsi, list);
}
}
else
{
/* Otherwise both sides must be fully instantiated. In which
case perform pair-wise element assignments and replace the
original block copy statement. */
stmt = bsi_stmt (*bsi);
mark_all_v_defs (stmt);
list = NULL;
generate_element_copy (lhs_elt, rhs_elt, &list);
if (list == NULL)
abort ();
sra_replace (bsi, list);
}
}
/* Helper for scalarize_stmt to handle function calls. */
/* Scalarize an INIT. To recap, this is an assignment to a scalarizable
reference from some form of constructor: CONSTRUCTOR, COMPLEX_CST or
COMPLEX_EXPR. If RHS is NULL, it should be treated as an empty
CONSTRUCTOR. */
static void
scalarize_call_expr (block_stmt_iterator *si_p)
scalarize_init (struct sra_elt *lhs_elt, tree rhs, block_stmt_iterator *bsi)
{
tree stmt = bsi_stmt (*si_p);
tree call = (TREE_CODE (stmt) == MODIFY_EXPR) ? TREE_OPERAND (stmt, 1) : stmt;
struct bitmap_head_def done_head;
tree list = NULL;
/* First scalarize the arguments. Order is important, because the copy
operations for the arguments need to go before the call.
Scalarization of the return value needs to go after the call. */
bitmap_initialize (&done_head, 1);
scalarize_tree_list (TREE_OPERAND (call, 1), si_p, &done_head);
bitmap_clear (&done_head);
/* Generate initialization statements for all members extant in the RHS. */
if (rhs)
generate_element_init (lhs_elt, rhs, &list);
/* Scalarize the return value, if any. */
if (TREE_CODE (stmt) == MODIFY_EXPR)
{
tree var = get_base_address (TREE_OPERAND (stmt, 0));
/* CONSTRUCTOR is defined such that any member not mentioned is assigned
a zero value. Initialize the rest of the instantiated elements. */
generate_element_zero (lhs_elt, &list);
if (list == NULL)
return;
/* If the LHS of the assignment is a scalarizable structure, insert
copies into the scalar replacements after the call. */
if (is_sra_candidate_decl (var))
{
tree list = create_scalar_copies (var, var, SCALAR_FIELD);
if (EXPR_HAS_LOCATION (stmt))
annotate_all_with_locus (&list, EXPR_LOCATION (stmt));
if (stmt_ends_bb_p (stmt))
insert_edge_copies (list, bb_for_stmt (stmt));
else
bsi_insert_after (si_p, list, BSI_NEW_STMT);
}
if (lhs_elt->use_block_copy)
{
/* Since LHS is not fully instantiated, we must leave the structure
assignment in place. Treating this case differently from a USE
exposes constants to later optimizations. */
mark_all_v_defs (expr_first (list));
sra_insert_after (bsi, list);
}
else
{
/* The LHS is fully instantiated. The list of initializations
replaces the original structure assignment. */
mark_all_v_defs (bsi_stmt (*bsi));
sra_replace (bsi, list);
}
}
/* A subroutine of scalarize_ldst called via walk_tree. Set TREE_NO_TRAP
on all INDIRECT_REFs. */
/* Helper for scalarize_stmt to handle ASM_EXPRs. */
static void
scalarize_asm_expr (block_stmt_iterator *si_p)
static tree
mark_notrap (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
{
tree stmt = bsi_stmt (*si_p);
struct bitmap_head_def done_head;
tree t = *tp;
bitmap_initialize (&done_head, 1);
scalarize_tree_list (ASM_INPUTS (stmt), si_p, &done_head);
scalarize_tree_list (ASM_OUTPUTS (stmt), si_p, &done_head);
bitmap_clear (&done_head);
if (TREE_CODE (t) == INDIRECT_REF)
{
TREE_THIS_NOTRAP (t) = 1;
*walk_subtrees = 0;
}
else if (DECL_P (t) || TYPE_P (t))
*walk_subtrees = 0;
/* ??? Process outputs after the asm. */
return NULL;
}
/* Helper for scalarize_stmt to handle return expressions. */
/* Scalarize a LDST. To recap, this is an assignment between one scalarizable
reference ELT and one non-scalarizable reference OTHER. IS_OUTPUT is true
if ELT is on the left-hand side. */
static void
scalarize_return_expr (block_stmt_iterator *si_p)
scalarize_ldst (struct sra_elt *elt, tree other,
block_stmt_iterator *bsi, bool is_output)
{
tree stmt = bsi_stmt (*si_p);
tree op = TREE_OPERAND (stmt, 0);
if (op == NULL_TREE)
return;
/* Shouldn't have gotten called for a scalar. */
if (elt->replacement)
abort ();
/* Handle a bare RESULT_DECL. This will handle for types needed
constructors, or possibly after NRV type optimizations. */
if (is_sra_candidate_decl (op))
emit_scalar_copies (si_p, op, op, FIELD_SCALAR);
else if (TREE_CODE (op) == MODIFY_EXPR)
if (elt->use_block_copy)
{
/* Since ELT is not fully instantiated, we have to leave the
block copy in place. Treat this as a USE. */
scalarize_use (elt, NULL, bsi, is_output);
}
else
{
tree *rhs_p = &TREE_OPERAND (op, 1);
tree rhs = *rhs_p;
/* The interesting case is when ELT is fully instantiated. In this
case we can have each element stored/loaded directly to/from the
corresponding slot in OTHER. This avoids a block copy. */
/* Handle 'return STRUCTURE;' */
if (is_sra_candidate_decl (rhs))
emit_scalar_copies (si_p, rhs, rhs, FIELD_SCALAR);
tree list = NULL, stmt = bsi_stmt (*bsi);
/* Handle 'return STRUCTURE.FIELD;' */
else if (is_sra_candidate_ref (rhs))
scalarize_component_ref (stmt, rhs_p);
mark_all_v_defs (stmt);
generate_copy_inout (elt, is_output, other, &list);
if (list == NULL)
abort ();
/* Handle 'return CALL_EXPR;' */
else if (TREE_CODE (rhs) == CALL_EXPR)
/* Preserve EH semantics. */
if (stmt_ends_bb_p (stmt))
{
struct bitmap_head_def done_head;
bitmap_initialize (&done_head, 1);
scalarize_tree_list (TREE_OPERAND (rhs, 1), si_p, &done_head);
bitmap_clear (&done_head);
tree_stmt_iterator tsi;
tree first;
/* Extract the first statement from LIST. */
tsi = tsi_start (list);
first = tsi_stmt (tsi);
tsi_delink (&tsi);
/* Replace the old statement with this new representative. */
bsi_replace (bsi, first, true);
if (!tsi_end_p (tsi))
{
/* If any reference would trap, then they all would. And more
to the point, the first would. Therefore none of the rest
will trap since the first didn't. Indicate this by
iterating over the remaining statements and set
TREE_THIS_NOTRAP in all INDIRECT_REFs. */
do
{
walk_tree (tsi_stmt_ptr (tsi), mark_notrap, NULL, NULL);
tsi_next (&tsi);
}
while (!tsi_end_p (tsi));
insert_edge_copies (list, bsi->bb);
}
}
else
sra_replace (bsi, list);
}
}
/* Generate initializations for all scalarizable parameters. */
/* Debugging dump for the scalar replacement map. */
static int
dump_sra_map_trav (void **slot, void *data)
static void
scalarize_parms (void)
{
struct sra_elt *e = *slot;
FILE *f = data;
tree list = NULL;
size_t i;
switch (e->kind)
{
case REALPART_EXPR:
fputs ("__real__ ", f);
print_generic_expr (dump_file, e->base, dump_flags);
fprintf (f, " -> %s\n", get_name (e->replace));
break;
case IMAGPART_EXPR:
fputs ("__imag__ ", f);
print_generic_expr (dump_file, e->base, dump_flags);
fprintf (f, " -> %s\n", get_name (e->replace));
break;
case COMPONENT_REF:
print_generic_expr (dump_file, e->base, dump_flags);
fprintf (f, ".%s -> %s\n", get_name (e->field), get_name (e->replace));
break;
default:
abort ();
}
EXECUTE_IF_SET_IN_BITMAP (needs_copy_in, 0, i,
{
tree var = referenced_var (i);
struct sra_elt *elt = lookup_element (NULL, var, NULL, NO_INSERT);
generate_copy_inout (elt, true, var, &list);
});
return 1;
if (list)
insert_edge_copies (list, ENTRY_BLOCK_PTR);
}
/* Entry point to phase 4. Update the function to match replacements. */
static void
dump_sra_map (FILE *f)
scalarize_function (void)
{
fputs ("Scalar replacements:\n", f);
htab_traverse_noresize (sra_map, dump_sra_map_trav, f);
fputs ("\n\n", f);
static const struct sra_walk_fns fns = {
scalarize_use, scalarize_copy, scalarize_init, scalarize_ldst, false
};
sra_walk_function (&fns);
scalarize_parms ();
bsi_commit_edge_inserts (NULL);
}
/* Main entry point to Scalar Replacement of Aggregates (SRA). This pass
re-writes non-aliased structure references into scalar temporaries. The
goal is to expose some/all structures to the scalar optimizers.
/* Debug helper function. Print ELT in a nice human-readable format. */
Scalarization proceeds in two main phases. First, every structure
referenced in the program that complies with can_be_scalarized_p is
marked for scalarization (find_candidates_for_sra).
Second, a mapping between structure fields and scalar temporaries so
that every time a particular field of a particular structure is
referenced in the code, we replace it with its corresponding scalar
temporary (scalarize_structures).
static void
dump_sra_elt_name (FILE *f, struct sra_elt *elt)
{
if (elt->parent && TREE_CODE (elt->parent->type) == COMPLEX_TYPE)
{
fputs (elt->element == integer_zero_node ? "__real__ " : "__imag__ ", f);
dump_sra_elt_name (f, elt->parent);
}
else
{
if (elt->parent)
dump_sra_elt_name (f, elt->parent);
if (DECL_P (elt->element))
{
if (TREE_CODE (elt->element) == FIELD_DECL)
fputc ('.', f);
print_generic_expr (f, elt->element, dump_flags);
}
else
fprintf (f, "[" HOST_WIDE_INT_PRINT_DEC "]",
TREE_INT_CST_LOW (elt->element));
}
}
TODO
/* Likewise, but callable from the debugger. */
void
debug_sra_elt_name (struct sra_elt *elt)
{
dump_sra_elt_name (stderr, elt);
fputc ('\n', stderr);
}
1- Scalarize COMPLEX_TYPEs
2- Scalarize ARRAY_REFs that are always referenced with a
constant index.
3- Timings to determine when scalarization is not profitable.
4- Determine what's a good value for MAX_NFIELDS_FOR_SRA. */
/* Main entry point. */
static void
tree_sra (void)
{
/* Initialize local variables. */
gcc_obstack_init (&sra_obstack);
sra_candidates = BITMAP_XMALLOC ();
sra_map = NULL;
needs_copy_in = NULL;
needs_copy_in = BITMAP_XMALLOC ();
sra_type_decomp_cache = BITMAP_XMALLOC ();
sra_type_inst_cache = BITMAP_XMALLOC ();
sra_map = htab_create (101, sra_elt_hash, sra_elt_eq, NULL);
/* Find structures to be scalarized. */
if (!find_candidates_for_sra ())
/* Scan. If we find anything, instantiate and scalarize. */
if (find_candidates_for_sra ())
{
BITMAP_XFREE (sra_candidates);
return;
scan_function ();
decide_instantiations ();
scalarize_function ();
}
/* If we found any, re-write structure references with their
corresponding scalar replacement. */
sra_map = htab_create (101, sra_elt_hash, sra_elt_eq, free);
needs_copy_in = BITMAP_XMALLOC ();
scalarize_structures ();
if (dump_file)
dump_sra_map (dump_file);
/* Free allocated memory. */
htab_delete (sra_map);
sra_map = NULL;
BITMAP_XFREE (needs_copy_in);
BITMAP_XFREE (sra_candidates);
BITMAP_XFREE (needs_copy_in);
BITMAP_XFREE (sra_type_decomp_cache);
BITMAP_XFREE (sra_type_inst_cache);
obstack_free (&sra_obstack, NULL);
}
static bool
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment