Commit 38b0dcb8 by Zdenek Dvorak Committed by Zdenek Dvorak

re PR tree-optimization/18048 (mgrid loop performance regression with ivopts (register pressure))

	PR tree-optimization/18048
	* fold-const.c (try_move_mult_to_index): New function.
	(fold): Use try_move_mult_to_index.
	* tree-ssa-loop-ivopts.c (try_add_cand_for): Prefer common candidates.
	* tree-ssa-loop-niter.c (number_of_iterations_cond): Produce
	an all-ones unsigned constant without extra bits.
	* tree.c (build_low_bits_mask): New function.
	* tree.h (build_low_bits_mask): Declare.

From-SVN: r89708
parent 89e73849
2004-10-27 Zdenek Dvorak <dvorakz@suse.cz>
PR tree-optimization/18048
* fold-const.c (try_move_mult_to_index): New function.
(fold): Use try_move_mult_to_index.
* tree-ssa-loop-ivopts.c (try_add_cand_for): Prefer common candidates.
* tree-ssa-loop-niter.c (number_of_iterations_cond): Produce
an all-ones unsigned constant without extra bits.
* tree.c (build_low_bits_mask): New function.
* tree.h (build_low_bits_mask): Declare.
2004-10-27 David Edelsohn <edelsohn@gnu.org> 2004-10-27 David Edelsohn <edelsohn@gnu.org>
PR target/17956 PR target/17956
......
...@@ -5967,6 +5967,84 @@ tree_swap_operands_p (tree arg0, tree arg1, bool reorder) ...@@ -5967,6 +5967,84 @@ tree_swap_operands_p (tree arg0, tree arg1, bool reorder)
return 0; return 0;
} }
/* Tries to replace &a[idx] CODE s * delta with &a[idx CODE delta], if s is
step of the array. TYPE is the type of the expression. ADDR is the address.
MULT is the multiplicative expression. If the function succeeds, the new
address expression is returned. Otherwise NULL_TREE is returned. */
static tree
try_move_mult_to_index (tree type, enum tree_code code, tree addr, tree mult)
{
tree s, delta, step;
tree arg0 = TREE_OPERAND (mult, 0), arg1 = TREE_OPERAND (mult, 1);
tree ref = TREE_OPERAND (addr, 0), pref;
tree ret, pos;
tree itype;
STRIP_NOPS (arg0);
STRIP_NOPS (arg1);
if (TREE_CODE (arg0) == INTEGER_CST)
{
s = arg0;
delta = arg1;
}
else if (TREE_CODE (arg1) == INTEGER_CST)
{
s = arg1;
delta = arg0;
}
else
return NULL_TREE;
for (;; ref = TREE_OPERAND (ref, 0))
{
if (TREE_CODE (ref) == ARRAY_REF)
{
step = array_ref_element_size (ref);
if (TREE_CODE (step) != INTEGER_CST)
continue;
itype = TREE_TYPE (step);
/* If the type sizes do not match, we might run into problems
when one of them would overflow. */
if (TYPE_PRECISION (itype) != TYPE_PRECISION (type))
continue;
if (!operand_equal_p (step, fold_convert (itype, s), 0))
continue;
delta = fold_convert (itype, delta);
break;
}
if (!handled_component_p (ref))
return NULL_TREE;
}
/* We found the suitable array reference. So copy everything up to it,
and replace the index. */
pref = TREE_OPERAND (addr, 0);
ret = copy_node (pref);
pos = ret;
while (pref != ref)
{
pref = TREE_OPERAND (pref, 0);
TREE_OPERAND (pos, 0) = copy_node (pref);
pos = TREE_OPERAND (pos, 0);
}
TREE_OPERAND (pos, 1) = fold (build2 (code, itype,
TREE_OPERAND (pos, 1),
delta));
return build1 (ADDR_EXPR, type, ret);
}
/* Perform constant folding and related simplification of EXPR. /* Perform constant folding and related simplification of EXPR.
The related simplifications include x*1 => x, x*0 => 0, etc., The related simplifications include x*1 => x, x*0 => 0, etc.,
and application of the associative law. and application of the associative law.
...@@ -6602,6 +6680,24 @@ fold (tree expr) ...@@ -6602,6 +6680,24 @@ fold (tree expr)
alt0, alt1)), alt0, alt1)),
same)); same));
} }
/* Try replacing &a[i1] + c * i2 with &a[i1 + i2], if c is step
of the array. Loop optimizer sometimes produce this type of
expressions. */
if (TREE_CODE (arg0) == ADDR_EXPR
&& TREE_CODE (arg1) == MULT_EXPR)
{
tem = try_move_mult_to_index (type, PLUS_EXPR, arg0, arg1);
if (tem)
return fold (tem);
}
else if (TREE_CODE (arg1) == ADDR_EXPR
&& TREE_CODE (arg0) == MULT_EXPR)
{
tem = try_move_mult_to_index (type, PLUS_EXPR, arg1, arg0);
if (tem)
return fold (tem);
}
} }
else else
{ {
...@@ -6974,6 +7070,17 @@ fold (tree expr) ...@@ -6974,6 +7070,17 @@ fold (tree expr)
&diff)) &diff))
return build_int_cst_type (type, diff); return build_int_cst_type (type, diff);
} }
/* Try replacing &a[i1] - c * i2 with &a[i1 - i2], if c is step
of the array. Loop optimizer sometimes produce this type of
expressions. */
if (TREE_CODE (arg0) == ADDR_EXPR
&& TREE_CODE (arg1) == MULT_EXPR)
{
tem = try_move_mult_to_index (type, MINUS_EXPR, arg0, arg1);
if (tem)
return fold (tem);
}
if (TREE_CODE (arg0) == MULT_EXPR if (TREE_CODE (arg0) == MULT_EXPR
&& TREE_CODE (arg1) == MULT_EXPR && TREE_CODE (arg1) == MULT_EXPR
......
...@@ -3603,20 +3603,32 @@ try_add_cand_for (struct ivopts_data *data, bitmap ivs, bitmap inv, ...@@ -3603,20 +3603,32 @@ try_add_cand_for (struct ivopts_data *data, bitmap ivs, bitmap inv,
bitmap act_inv = BITMAP_XMALLOC (); bitmap act_inv = BITMAP_XMALLOC ();
unsigned i; unsigned i;
struct cost_pair *cp; struct cost_pair *cp;
bitmap_iterator bi;
struct iv_cand *cand;
bitmap depends_on;
bitmap_copy (best_ivs, ivs); bitmap_copy (best_ivs, ivs);
bitmap_copy (best_inv, inv); bitmap_copy (best_inv, inv);
for (i = 0; i < use->n_map_members; i++) /* First try important candidates. Only if it fails, try the specific ones.
Rationale -- in loops with many variables the best choice often is to use
just one generic biv. If we added here many ivs specific to the uses,
the optimization algorithm later would be likely to get stuck in a local
minimum, thus causing us to create too many ivs. The approach from
few ivs to more seems more likely to be succesful -- starting from few
ivs, replacing an expensive use by a specific iv should always be a
win. */
EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
{ {
cp = use->cost_map + i; cand = iv_cand (data, i);
if (cp->cost == INFTY)
if (get_use_iv_cost (data, use, cand, &depends_on) == INFTY)
continue; continue;
bitmap_copy (act_ivs, ivs); bitmap_copy (act_ivs, ivs);
bitmap_set_bit (act_ivs, cp->cand->id); bitmap_set_bit (act_ivs, cand->id);
if (cp->depends_on) if (depends_on)
bitmap_a_or_b (act_inv, inv, cp->depends_on); bitmap_a_or_b (act_inv, inv, depends_on);
else else
bitmap_copy (act_inv, inv); bitmap_copy (act_inv, inv);
act_cost = set_cost_up_to (data, act_ivs, act_inv, use->id + 1); act_cost = set_cost_up_to (data, act_ivs, act_inv, use->id + 1);
...@@ -3629,6 +3641,35 @@ try_add_cand_for (struct ivopts_data *data, bitmap ivs, bitmap inv, ...@@ -3629,6 +3641,35 @@ try_add_cand_for (struct ivopts_data *data, bitmap ivs, bitmap inv,
} }
} }
if (best_cost == INFTY)
{
for (i = 0; i < use->n_map_members; i++)
{
cp = use->cost_map + i;
if (cp->cost == INFTY)
continue;
/* Already tried this. */
if (cp->cand->important)
continue;
bitmap_copy (act_ivs, ivs);
bitmap_set_bit (act_ivs, cp->cand->id);
if (cp->depends_on)
bitmap_a_or_b (act_inv, inv, cp->depends_on);
else
bitmap_copy (act_inv, inv);
act_cost = set_cost_up_to (data, act_ivs, act_inv, use->id + 1);
if (act_cost < best_cost)
{
best_cost = act_cost;
bitmap_copy (best_ivs, act_ivs);
bitmap_copy (best_inv, act_inv);
}
}
}
bitmap_copy (ivs, best_ivs); bitmap_copy (ivs, best_ivs);
bitmap_copy (inv, best_inv); bitmap_copy (inv, best_inv);
......
...@@ -419,9 +419,10 @@ number_of_iterations_cond (tree type, tree base0, tree step0, ...@@ -419,9 +419,10 @@ number_of_iterations_cond (tree type, tree base0, tree step0,
d = EXEC_BINARY (LSHIFT_EXPR, niter_type, d = EXEC_BINARY (LSHIFT_EXPR, niter_type,
build_int_cst_type (niter_type, 1), bits); build_int_cst_type (niter_type, 1), bits);
s = EXEC_BINARY (RSHIFT_EXPR, niter_type, step0, bits); s = EXEC_BINARY (RSHIFT_EXPR, niter_type, step0, bits);
bound = EXEC_BINARY (RSHIFT_EXPR, niter_type,
build_int_cst (niter_type, -1), bound = build_low_bits_mask (niter_type,
bits); (TYPE_PRECISION (niter_type)
- tree_low_cst (bits, 1)));
assumption = fold (build2 (FLOOR_MOD_EXPR, niter_type, base1, d)); assumption = fold (build2 (FLOOR_MOD_EXPR, niter_type, base1, d));
assumption = fold (build2 (EQ_EXPR, boolean_type_node, assumption = fold (build2 (EQ_EXPR, boolean_type_node,
......
...@@ -609,6 +609,40 @@ build_int_cst_wide (tree type, unsigned HOST_WIDE_INT low, HOST_WIDE_INT hi) ...@@ -609,6 +609,40 @@ build_int_cst_wide (tree type, unsigned HOST_WIDE_INT low, HOST_WIDE_INT hi)
return t; return t;
} }
/* Builds an integer constant in TYPE such that lowest BITS bits are ones
and the rest are zeros. */
tree
build_low_bits_mask (tree type, unsigned bits)
{
unsigned HOST_WIDE_INT low;
HOST_WIDE_INT high;
unsigned HOST_WIDE_INT all_ones = ~(unsigned HOST_WIDE_INT) 0;
gcc_assert (bits <= TYPE_PRECISION (type));
if (bits == TYPE_PRECISION (type)
&& !TYPE_UNSIGNED (type))
{
/* Sign extended all-ones mask. */
low = all_ones;
high = -1;
}
else if (bits <= HOST_BITS_PER_WIDE_INT)
{
low = all_ones >> (HOST_BITS_PER_WIDE_INT - bits);
high = 0;
}
else
{
bits -= HOST_BITS_PER_WIDE_INT;
low = all_ones;
high = all_ones >> (HOST_BITS_PER_WIDE_INT - bits);
}
return build_int_cst_wide (type, low, high);
}
/* Checks that X is integer constant that can be expressed in (unsigned) /* Checks that X is integer constant that can be expressed in (unsigned)
HOST_WIDE_INT without loss of precision. */ HOST_WIDE_INT without loss of precision. */
......
...@@ -3543,6 +3543,7 @@ tree fold_build_cleanup_point_expr (tree type, tree expr); ...@@ -3543,6 +3543,7 @@ tree fold_build_cleanup_point_expr (tree type, tree expr);
extern tree build_fold_addr_expr_with_type (tree, tree); extern tree build_fold_addr_expr_with_type (tree, tree);
extern tree build_fold_indirect_ref (tree); extern tree build_fold_indirect_ref (tree);
extern tree constant_boolean_node (int, tree); extern tree constant_boolean_node (int, tree);
extern tree build_low_bits_mask (tree, unsigned);
extern bool tree_swap_operands_p (tree, tree, bool); extern bool tree_swap_operands_p (tree, tree, bool);
extern enum tree_code swap_tree_comparison (enum tree_code); extern enum tree_code swap_tree_comparison (enum tree_code);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment