Commit 18081149 by Xinliang David Li Committed by Xinliang David Li

IVOPT performance tuning patch.

IVOPT performance tuning patch. The main problem is a variant of maximal weight 
bipartite matching/assignment problem -- i.e., there is an additional global
cost function. The complexity of the algorighm to find the optimial solution
> O(n^2). The existing algorithm in gcc tries to find the solution in 3 stages:
1) Find the initial solution set (dynamic programing style)
2) Extend the solution set
3) Prune the soultion set.

The problem is that in step 1, the initial set tends to be too large so that
the final solution is very likely local optimal.

This patch addresses the problem and sees very large SPEC improvements.

Another area of problem is that ivopts often creates loop invariant expressions, and
such expressions increase register pressure which is not counted. This is addressed
in this patch.

The third main problem is the profile data is not considered in cost computation

The forth problem is that loop invariant comptuation's cost is not properly adjusted.



There are more tuning opportuties, namely:

1) Do not check ivs dependency during ivs set pruning (this improves deallII 8% on core2)
2) Unconditionally consider all important candidates in partial set expansion (in addition
to the extended solutino based on selected candidates)
3) revisit the two stage initial set computation.

From-SVN: r162653
parent 3c5273a9
2010-07-28 Xinliang David Li <davidxl@google.com>
* tree-ssa-loop-ivopts.c (avg_loop_niter): New function.
(dump_cand): Dump var_before/after.
(htab_inv_expr_eq): New function.
(htab_inv_expr_hash): New function.
(tree_ssa_iv_optimize_init): Support pseudo invariants.
(add_candidate_1): consider base type precision.
(set_use_iv_cost): New parameter.
(adjust_setup_cost): Use profile information.
(get_address_cost): Do not hard code width in computing address
offset limits.
(compare_aff_trees): New function.
(get_loop_invariant_expr_id): New function.
(get_computation_cost_at): New parameter and use profile information.
(get_computation_cost): New parameter.
(determine_use_iv_cost_generic): Pass new parameter.
(determine_use_iv_cost_address): Ditto.
(determine_use_iv_cost_condition): Ditto.
(autoinc_possible_for_pair): Ditto.
(determine_use_iv_costs): More dumps.
(iv_ca_get_num_inv_exprs): New function.
(iv_ca_recount_cost): Consider loop invariants in register pressure
cost.
(iv_ca_add_use): New parameter.
(iv_ca_dump): Better dumping.
(iv_ca_extend): New parameter.
(try_add_cand_for): Attempt to get better partial solution.
(try_improve_iv_set): Pass new parameter to iv_ca_extend.
(create_new-ivs): More dumps.
(rewrite_use_compare): Ditto.
(free_loop_data): More cleanup.
(treee_ssa_iv_optimize_finalize): Ditto.
2010-07-28 Kai Tietz <kai.tietz@onevision.com> 2010-07-28 Kai Tietz <kai.tietz@onevision.com>
* config/i386/i386.h (MCOUNT_NAME_BEFORE_PROLOGUE): New. * config/i386/i386.h (MCOUNT_NAME_BEFORE_PROLOGUE): New.
......
/* { dg-do compile { target {{ i?86-*-* x86_64-*-* } && lp64 } } } */
/* { dg-options "-O2 -m64 -fdump-tree-ivopts" } */
#define TYPE char*
/* Testing that only one induction variable is selected after IVOPT on
the given target instead of 3. */
void foo (int i_width, TYPE dst, TYPE src1, TYPE src2)
{
int x;
for( x = 0; x < i_width; x++ )
{
dst[x] = ( src1[x] + src2[x] + 1 ) >> 1;
}
}
/* { dg-final { scan-tree-dump-times "PHI <ivtmp" 1 "ivopts"} } */
/* { dg-final { cleanup-tree-dump "ivopts" } } */
/* { dg-do compile { target {{ i?86-*-* x86_64-*-* } && lp64 } } } */
/* { dg-options "-O2 -m64 -fdump-tree-ivopts" } */
#define TYPE char*
/* Testing on the given target, only one iv candidate instead of 3. */
void foo (int i_width, TYPE dst, TYPE src1, TYPE src2)
{
int x;
for( x = 0; x < i_width; x++ )
{
*dst++ = ( *src1++ + *src2++ + 1 ) >> 1;
}
}
/* { dg-final { scan-tree-dump-times "PHI <ivtmp" 1 "ivopts"} } */
/* { dg-final { cleanup-tree-dump "ivopts" } } */
/* { dg-do compile { target {{ i?86-*-* x86_64-*-* } && lp64 } } } */
/* { dg-options "-O2 -m64 -fdump-tree-ivopts" } */
#define TYPE char*
/* Make sure only 1 iv candidate is selected after IVOPT. */
void foo (int i_width, char* dst, char* src1, char* src2)
{
int x;
for( x = 0; x < i_width; x++ )
{
*((TYPE)dst) = ( *((TYPE)src1) + *((TYPE)src2) + 1 ) >> 1;
dst+=sizeof(TYPE);
src1+=sizeof(TYPE);
src2+=sizeof(TYPE);
}
}
/* { dg-final { scan-tree-dump-times "PHI <ivtmp" 1 "ivopts"} } */
/* { dg-final { cleanup-tree-dump "ivopts" } } */
/* { dg-do compile { target {{ i?86-*-* x86_64-*-* } && lp64 } } } */
/* { dg-options "-O2 -m64 -fdump-tree-ivopts" } */
#ifndef TYPE
#define TYPE char*
#endif
/* Make sure only 1 iv candidate is selected. */
void foo (int i_width, TYPE dst, TYPE src1, TYPE src2)
{
TYPE dstn= dst + i_width;
for( ; dst < dstn; )
{
*dst++ = ( *src1++ + *src2++ + 1 ) >> 1;
}
}
/* { dg-final { scan-tree-dump-times "PHI <ivtmp" 1 "ivopts"} } */
/* { dg-final { cleanup-tree-dump "ivopts" } } */
/* { dg-do compile { target {{ i?86-*-* x86_64-*-* } && lp64 } } } */
/* { dg-options "-O2 -m64 -fdump-tree-ivopts-details" } */
#ifndef TYPE
#define TYPE char*
#endif
int a[400];
/* Testing inferred loop iteration from array -- exit test can be replaced. */
void foo (int i_width, TYPE dst, TYPE src1, TYPE src2)
{
TYPE dstn= dst + i_width;
TYPE dst0 = dst;
unsigned long long i = 0;
for( ; dst <= dstn; )
{
dst0[i] = ( src1[i] + src2[i] + 1 +a[i]) >> 1;
dst++;
i += 16;
}
}
/* { dg-final { scan-tree-dump-times "Replacing" 1 "ivopts"} } */
/* { dg-final { cleanup-tree-dump "ivopts" } } */
/* { dg-do compile { target {{ i?86-*-* x86_64-*-* } && lp64 } } } */
/* { dg-options "-O2 -m64 -fdump-tree-ivopts-details" } */
#ifndef TYPE
#define TYPE char*
#endif
extern int a[];
/* Can not infer loop iteration from array -- exit test can not be replaced. */
void foo (int i_width, TYPE dst, TYPE src1, TYPE src2)
{
TYPE dstn= dst + i_width;
TYPE dst0 = dst;
unsigned long long i = 0;
for( ; dst <= dstn; )
{
dst0[i] = ( src1[i] + src2[i] + 1 +a[i]) >> 1;
dst++;
i += 16;
}
}
/* { dg-final { scan-tree-dump-times "Replacing" 0 "ivopts"} } */
/* { dg-final { cleanup-tree-dump "ivopts" } } */
/* { dg-do compile { target {{ i?86-*-* x86_64-*-* } && lp64 } } } */
/* { dg-options "-O2 -m64 -fdump-tree-ivopts-details" } */
/* The test 'if (p2 > p_limit2)' can be replaced, so iv p2 can be
* eliminated. */
long foo(long* p, long* p2, int N1, int N2)
{
int i = 0;
long* p_limit = p + N1;
long* p_limit2 = p2 + N2;
long s = 0;
while (p <= p_limit)
{
p++;
p2++;
if (p2 > p_limit2)
break;
s += (*p);
}
return s;
}
/* { dg-final { scan-tree-dump-times "Replacing" 1 "ivopts"} } */
/* { dg-final { cleanup-tree-dump "ivopts" } } */
/* { dg-do compile { target {{ i?86-*-* x86_64-*-* } && lp64 } } } */
/* { dg-options "-O2 -m64 -fdump-tree-ivopts-details" } */
/* Exit tests 'i < N1' and 'p2 > p_limit2' can be replaced, so
* two ivs i and p2 can be eliminate. */
long foo(long* p, long* p2, int N1, int N2)
{
int i = 0;
long* p_limit2 = p2 + N2;
long s = 0;
while (i < N1)
{
p++;
p2++;
i++;
if (p2 > p_limit2)
break;
s += (*p);
}
return s;
}
/* { dg-final { scan-tree-dump-times "Replacing" 2 "ivopts"} } */
/* { dg-final { cleanup-tree-dump "ivopts" } } */
/* { dg-do compile { target {{ i?86-*-* x86_64-*-* } && lp64 } } } */
/* { dg-options "-O2 -m64 -fdump-tree-ivopts-details" } */
/* iv p2 can be eliminated. */
long foo(long* p, long* p2, int N1, int N2)
{
unsigned long i = 0;
long* p_limit2 = p2 + N2;
long s = 0;
while (i < N1)
{
p2++;
i++;
if (p2 > p_limit2)
break;
s += p[i];
}
return s;
}
/* { dg-final { scan-tree-dump-times "Replacing" 1 "ivopts"} } */
/* { dg-final { cleanup-tree-dump "ivopts" } } */
/* { dg-do compile { target {{ i?86-*-* x86_64-*-* } && lp64 } } } */
/* { dg-options "-O2 -m64 -fdump-tree-ivopts-details" } */
/* iv i's step 16 so its period is smaller than the max iterations
* i.e. replacing if (p2 > p_limit2) with testing of i may result in
* overflow. */
long foo(long* p, long* p2, int N1, int N2)
{
unsigned long i = 0;
long* p_limit2 = p2 + N2;
long s = 0;
while (i < N1)
{
p2++;
i += 16;
if (p2 > p_limit2)
break;
s += p[i];
}
return s;
}
/* { dg-final { scan-tree-dump-times "Replacing" 0 "ivopts"} } */
/* { dg-final { cleanup-tree-dump "ivopts" } } */
...@@ -8,5 +8,5 @@ void main (void) ...@@ -8,5 +8,5 @@ void main (void)
f2 (); f2 ();
} }
/* { dg-final { scan-tree-dump-times "!= 0" 4 "ivopts" } } */ /* { dg-final { scan-tree-dump-times "!= 0" 5 "ivopts" } } */
/* { dg-final { cleanup-tree-dump "ivopts" } } */ /* { dg-final { cleanup-tree-dump "ivopts" } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment