Commit 92781ff1 by Michael Matz Committed by Michael Matz

re PR middle-end/90796 (GCC: O2 vs O3 output differs on simple test)

Fix PR middle-end/90796

        PR middle-end/90796
        * gimple-loop-jam.c (any_access_function_variant_p): New function.
        (adjust_unroll_factor): Use it to constrain safety, new parameter.
        (tree_loop_unroll_and_jam): Adjust call and profitable unroll factor.

testsuite/
        * gcc.dg/unroll-and-jam.c: Add three invalid and one valid case.

From-SVN: r277287
parent 9107d652
2019-10-22 Michael Matz <matz@suse.de>
PR middle-end/90796
* gimple-loop-jam.c (any_access_function_variant_p): New function.
(adjust_unroll_factor): Use it to constrain safety, new parameter.
(tree_loop_unroll_and_jam): Adjust call and profitable unroll factor.
2019-10-22 Richard Biener <rguenther@suse.de> 2019-10-22 Richard Biener <rguenther@suse.de>
PR tree-optimization/92173 PR tree-optimization/92173
...@@ -360,16 +360,33 @@ fuse_loops (class loop *loop) ...@@ -360,16 +360,33 @@ fuse_loops (class loop *loop)
rewrite_into_loop_closed_ssa_1 (NULL, 0, SSA_OP_USE, loop); rewrite_into_loop_closed_ssa_1 (NULL, 0, SSA_OP_USE, loop);
} }
/* Return true if any of the access functions for dataref A
isn't invariant with respect to loop LOOP_NEST. */
static bool
any_access_function_variant_p (const struct data_reference *a,
const class loop *loop_nest)
{
unsigned int i;
vec<tree> fns = DR_ACCESS_FNS (a);
tree t;
FOR_EACH_VEC_ELT (fns, i, t)
if (!evolution_function_is_invariant_p (t, loop_nest->num))
return true;
return false;
}
/* Returns true if the distance in DDR can be determined and adjusts /* Returns true if the distance in DDR can be determined and adjusts
the unroll factor in *UNROLL to make unrolling valid for that distance. the unroll factor in *UNROLL to make unrolling valid for that distance.
Otherwise return false. Otherwise return false. DDR is with respect to the outer loop of INNER.
If this data dep can lead to a removed memory reference, increment If this data dep can lead to a removed memory reference, increment
*REMOVED and adjust *PROFIT_UNROLL to be the necessary unroll factor *REMOVED and adjust *PROFIT_UNROLL to be the necessary unroll factor
for this to happen. */ for this to happen. */
static bool static bool
adjust_unroll_factor (struct data_dependence_relation *ddr, adjust_unroll_factor (class loop *inner, struct data_dependence_relation *ddr,
unsigned *unroll, unsigned *profit_unroll, unsigned *unroll, unsigned *profit_unroll,
unsigned *removed) unsigned *removed)
{ {
...@@ -392,9 +409,59 @@ adjust_unroll_factor (struct data_dependence_relation *ddr, ...@@ -392,9 +409,59 @@ adjust_unroll_factor (struct data_dependence_relation *ddr,
gcc_unreachable (); gcc_unreachable ();
else if ((unsigned)dist >= *unroll) else if ((unsigned)dist >= *unroll)
; ;
else if (lambda_vector_lexico_pos (dist_v + 1, DDR_NB_LOOPS (ddr) - 1) else if (lambda_vector_zerop (dist_v + 1, DDR_NB_LOOPS (ddr) - 1))
|| (lambda_vector_zerop (dist_v + 1, DDR_NB_LOOPS (ddr) - 1) {
&& dist > 0)) /* We have (a,0) with a < N, so this will be transformed into
(0,0) after unrolling by N. This might potentially be a
problem, if it's not a read-read dependency. */
if (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr)))
;
else
{
/* So, at least one is a write, and we might reduce the
distance vector to (0,0). This is still no problem
if both data-refs are affine with respect to the inner
loops. But if one of them is invariant with respect
to an inner loop our reordering implicit in loop fusion
corrupts the program, as our data dependences don't
capture this. E.g. for:
for (0 <= i < n)
for (0 <= j < m)
a[i][0] = a[i+1][0] + 2; // (1)
b[i][j] = b[i+1][j] + 2; // (2)
the distance vector for both statements is (-1,0),
but exchanging the order for (2) is okay, while
for (1) it is not. To see this, write out the original
accesses (assume m is 2):
a i j original
0 0 0 r a[1][0] b[1][0]
1 0 0 w a[0][0] b[0][0]
2 0 1 r a[1][0] b[1][1]
3 0 1 w a[0][0] b[0][1]
4 1 0 r a[2][0] b[2][0]
5 1 0 w a[1][0] b[1][0]
after unroll-by-2 and fusion the accesses are done in
this order (from column a): 0,1, 4,5, 2,3, i.e. this:
a i j transformed
0 0 0 r a[1][0] b[1][0]
1 0 0 w a[0][0] b[0][0]
4 1 0 r a[2][0] b[2][0]
5 1 0 w a[1][0] b[1][0]
2 0 1 r a[1][0] b[1][1]
3 0 1 w a[0][0] b[0][1]
Note how access 2 accesses the same element as access 5
for array 'a' but not for array 'b'. */
if (any_access_function_variant_p (DDR_A (ddr), inner)
&& any_access_function_variant_p (DDR_B (ddr), inner))
;
else
/* And if any dataref of this pair is invariant with
respect to the inner loop, we have no chance than
to reduce the unroll factor. */
*unroll = dist;
}
}
else if (lambda_vector_lexico_pos (dist_v + 1, DDR_NB_LOOPS (ddr) - 1))
; ;
else else
*unroll = dist; *unroll = dist;
...@@ -486,7 +553,7 @@ tree_loop_unroll_and_jam (void) ...@@ -486,7 +553,7 @@ tree_loop_unroll_and_jam (void)
/* Now check the distance vector, for determining a sensible /* Now check the distance vector, for determining a sensible
outer unroll factor, and for validity of merging the inner outer unroll factor, and for validity of merging the inner
loop copies. */ loop copies. */
if (!adjust_unroll_factor (ddr, &unroll_factor, &profit_unroll, if (!adjust_unroll_factor (loop, ddr, &unroll_factor, &profit_unroll,
&removed)) &removed))
{ {
/* Couldn't get the distance vector. For two reads that's /* Couldn't get the distance vector. For two reads that's
...@@ -506,7 +573,7 @@ tree_loop_unroll_and_jam (void) ...@@ -506,7 +573,7 @@ tree_loop_unroll_and_jam (void)
to ignore all profitability concerns and apply the transformation to ignore all profitability concerns and apply the transformation
always. */ always. */
if (!PARAM_VALUE (PARAM_UNROLL_JAM_MIN_PERCENT)) if (!PARAM_VALUE (PARAM_UNROLL_JAM_MIN_PERCENT))
profit_unroll = 2; profit_unroll = MAX(2, profit_unroll);
else if (removed * 100 / datarefs.length () else if (removed * 100 / datarefs.length ()
< (unsigned)PARAM_VALUE (PARAM_UNROLL_JAM_MIN_PERCENT)) < (unsigned)PARAM_VALUE (PARAM_UNROLL_JAM_MIN_PERCENT))
profit_unroll = 1; profit_unroll = 1;
......
2019-10-22 Michael Matz <matz@suse.de>
PR middle-end/90796
* gcc.dg/unroll-and-jam.c: Add three invalid and one valid case.
2019-10-22 Richard Biener <rguenther@suse.de> 2019-10-22 Richard Biener <rguenther@suse.de>
PR tree-optimization/92173 PR tree-optimization/92173
......
...@@ -31,10 +31,10 @@ void checkb(void) ...@@ -31,10 +31,10 @@ void checkb(void)
//printf(" %d\n", sum); //printf(" %d\n", sum);
} }
unsigned i, j;
#define TEST(name, body, test) \ #define TEST(name, body, test) \
static void __attribute__((noinline,noclone)) name (unsigned long n, unsigned long m) \ static void __attribute__((noinline,noclone)) name (unsigned long n, unsigned long m) \
{ \ { \
unsigned long i, j; \
for (i = 1; i < m; i++) { \ for (i = 1; i < m; i++) { \
for (j = 1; j < n; j++) { \ for (j = 1; j < n; j++) { \
body; \ body; \
...@@ -58,9 +58,14 @@ TEST(foo3, aa[i+1][j-1]=aa[i][j] * aa[i][j] / 2, checkaa()) //notok, -1,1 ...@@ -58,9 +58,14 @@ TEST(foo3, aa[i+1][j-1]=aa[i][j] * aa[i][j] / 2, checkaa()) //notok, -1,1
TEST(foo4, aa[i][j] = aa[i-1][j+1] * aa[i-1][j+1] / 2, checkaa()) //notok, -1,1 TEST(foo4, aa[i][j] = aa[i-1][j+1] * aa[i-1][j+1] / 2, checkaa()) //notok, -1,1
TEST(foo5, aa[i][j] = aa[i+1][j+1] * aa[i+1][j+1] / 2, checkaa()) //ok, 1,1 TEST(foo5, aa[i][j] = aa[i+1][j+1] * aa[i+1][j+1] / 2, checkaa()) //ok, 1,1
TEST(foo6, aa[i][j] = aa[i+1][j] * aa[i+1][j] / 2, checkaa()) //ok, -1,0 TEST(foo6, aa[i][j] = aa[i+1][j] * aa[i+1][j] / 2, checkaa()) //ok, -1,0
TEST(foo61, aa[i][0] = aa[i+1][0] * aa[i+1][0] / 2, checkaa()) //notok, -1,0
TEST(foo62, aa[i][j/2] = aa[i+1][j/2] * aa[i+1][j/2] / 2, checkaa()) //notok, not affine
TEST(foo63, aa[i][j%2] = aa[i+1][j%2] * aa[i+1][j%2] / 2, checkaa()) //notok, not affine
TEST(foo7, aa[i+1][j] = aa[i][j] * aa[i][j] / 2, checkaa()) //ok, 1,0 TEST(foo7, aa[i+1][j] = aa[i][j] * aa[i][j] / 2, checkaa()) //ok, 1,0
TEST(foo9, b[j] = 3*b[j+1] + 1, checkb()) //notok, 0,-1 TEST(foo9, b[j] = 3*b[j+1] + 1, checkb()) //notok, 0,-1
TEST(foo10, b[j] = 3*b[j] + 1, checkb()) //ok, 0,0 TEST(foo10, b[j] = 3*b[j] + 1, checkb()) //ok, 0,0
extern int f;
TEST(foo11, f = b[i-1] = 1 + 3* b[i+1], checkb()) //ok, 2,0 but must reduce unroll factor to 2, (it would be incorrect with unroll-by-3, which the profitability would suggest)
/* foo8 should work as well, but currently doesn't because the distance /* foo8 should work as well, but currently doesn't because the distance
vectors we compute are too pessimistic. We compute vectors we compute are too pessimistic. We compute
...@@ -68,6 +73,7 @@ TEST(foo10, b[j] = 3*b[j] + 1, checkb()) //ok, 0,0 ...@@ -68,6 +73,7 @@ TEST(foo10, b[j] = 3*b[j] + 1, checkb()) //ok, 0,0
and the last one causes us to lose. */ and the last one causes us to lose. */
TEST(foo8, b[j+1] = 3*b[j] + 1, checkb()) //ok, 0,1 TEST(foo8, b[j+1] = 3*b[j] + 1, checkb()) //ok, 0,1
int f;
unsigned int a[1024]; unsigned int a[1024];
unsigned int b[1024]; unsigned int b[1024];
unsigned int aa[16][1024]; unsigned int aa[16][1024];
...@@ -88,10 +94,12 @@ void init(void) ...@@ -88,10 +94,12 @@ void init(void)
printf(" %s\n", #name); \ printf(" %s\n", #name); \
init();for(i=0;i<4;i++)name##noopt(32,8); checka = checksum; \ init();for(i=0;i<4;i++)name##noopt(32,8); checka = checksum; \
init();for(i=0;i<4;i++)name(32,8); \ init();for(i=0;i<4;i++)name(32,8); \
if (checka != checksum) fail = 1; \
printf("%sok %s\n", checka != checksum ? "NOT " : "", #name); printf("%sok %s\n", checka != checksum ? "NOT " : "", #name);
int main() int main()
{ {
int fail = 0;
int i; int i;
unsigned checka; unsigned checka;
RUN(foo1); RUN(foo1);
...@@ -100,12 +108,18 @@ int main() ...@@ -100,12 +108,18 @@ int main()
RUN(foo4); RUN(foo4);
RUN(foo5); RUN(foo5);
RUN(foo6); RUN(foo6);
RUN(foo61);
RUN(foo62);
RUN(foo63);
RUN(foo7); RUN(foo7);
RUN(foo8); RUN(foo8);
RUN(foo9); RUN(foo9);
RUN(foo10); RUN(foo10);
return 0; RUN(foo11);
if (fail)
__builtin_abort();
return fail;
} }
/* Five loops should be unroll-jammed (actually six, but see above). */ /* Six loops should be unroll-jammed (actually seven, but see above). */
/* { dg-final { scan-tree-dump-times "applying unroll and jam" 5 "unrolljam" } } */ /* { dg-final { scan-tree-dump-times "applying unroll and jam" 6 "unrolljam" } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment