Commit 785aa2a7 by Razya Ladelsky

re PR tree-optimization/38275 (bootstrap failure when -ftree-parallelize-loops=4 is enabled)

PR tree-optimization/38275
	* tree-parloops.c (parallelize_loops): Replace profitability condition 
        for expected number of iterations.
        * testsuite/gcc.dg/autopar/reduc-1char.c: Increase number  
        of iterations. Adjust the logic accordingly.
        * testsuite/gcc.dg/autopar/reduc-2char.c: Ditto.
        * testsuite/gcc.dg/autopar/reduc-1.c: Ditto.
        * testsuite/gcc.dg/autopar/reduc-2.c: Ditto.
        * testsuite/gcc.dg/autopar/reduc-3.c: Ditto.
        * testsuite/gcc.dg/autopar/reduc-6.c: Ditto.
        * testsuite/gcc.dg/autopar/reduc-7.c: Ditto.
        * testsuite/gcc.dg/autopar/reduc-8.c: Ditto.
        * testsuite/gcc.dg/autopar/reduc-9.c: Ditto.
        * testsuite/gcc.dg/autopar/pr39500-1.c: Ditto.
        * testsuite/gcc.dg/autopar/reduc-1short.c: Ditto.
        * testsuite/gcc.dg/autopar/reduc-2short.c: Ditto.
        * testsuite/gcc.dg/autopar/parallelization-1.c: Ditto.

From-SVN: r151372
parent ee5111a4
2009-09-03 Razya Ladelsky <razya@il.ibm.com>
PR tree-optimization/38275
* tree-parloops.c (parallelize_loops): Replace profitability condition
for expected number of iterations.
2009-09-03 Alon Dayan <alond@il.ibm.com>
PR tree-optimization/38275
* testsuite/gcc.dg/autopar/reduc-1char.c: Increase number
of iterations. Adjust the logic accordingly.
* testsuite/gcc.dg/autopar/reduc-2char.c: Ditto.
* testsuite/gcc.dg/autopar/reduc-1.c: Ditto.
* testsuite/gcc.dg/autopar/reduc-2.c: Ditto.
* testsuite/gcc.dg/autopar/reduc-3.c: Ditto.
* testsuite/gcc.dg/autopar/reduc-6.c: Ditto.
* testsuite/gcc.dg/autopar/reduc-7.c: Ditto.
* testsuite/gcc.dg/autopar/reduc-8.c: Ditto.
* testsuite/gcc.dg/autopar/reduc-9.c: Ditto.
* testsuite/gcc.dg/autopar/pr39500-1.c: Ditto.
* testsuite/gcc.dg/autopar/reduc-1short.c: Ditto.
* testsuite/gcc.dg/autopar/reduc-2short.c: Ditto.
* testsuite/gcc.dg/autopar/parallelization-1.c: Ditto.
2009-09-03 Alexandre Oliva <aoliva@redhat.com>
* doc/invoke.texi (BUILD_CONFIG): Document --with-build-config.
......
......@@ -6,7 +6,7 @@ void abort (void);
void parloop (int N)
{
int i;
int x[10000000];
int x[10000990];
for (i = 0; i < N; i++)
x[i] = i + 3;
......@@ -20,7 +20,7 @@ void parloop (int N)
int main(void)
{
parloop(10000000);
parloop(10000);
return 0;
}
......
......@@ -8,14 +8,14 @@ void abort (void);
int main (void)
{
int i;
int x[1000];
int x[100000];
for (i = 0; i < 100; i++)
x[i] = x[i+100];
for (i = 0; i < 10000; i++)
x[i] = x[i+10000];
for (i = 0; i < 100; i++)
for (i = 0; i < 10000; i++)
{
if (x[i] != x[i+100])
if (x[i] != x[i+10000])
abort ();
}
......
......@@ -4,55 +4,70 @@
#include <stdarg.h>
#include <stdlib.h>
#define N 16
#define DIFF 242
unsigned int ub[N] = {1,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
unsigned int uc[N] = {1,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
/* Reduction of unsigned-int. */
void main1 (unsigned int x, unsigned int max_result, unsigned int min_result)
{
int i;
unsigned int udiff = 2;
unsigned int umax = x;
unsigned int umin = x;
/* Summation. */
for (i = 0; i < N; i++) {
udiff += (ub[i] - uc[i]);
}
/* Maximum. */
for (i = 0; i < N; i++) {
umax = umax < uc[i] ? uc[i] : umax;
}
/* Minimum. */
for (i = 0; i < N; i++) {
umin = umin > uc[i] ? uc[i] : umin;
}
/* check results: */
if (udiff != DIFF)
abort ();
if (umax != max_result)
abort ();
if (umin != min_result)
abort ();
#define N 1600
#define DIFF 2558402
unsigned int ub[N];
unsigned int uc[N];
/* Reduction of unsigned-int. */
void main1 (unsigned int x, unsigned int max_result, unsigned int min_result)
{
int i;
unsigned int udiff = 2;
unsigned int umax = x;
unsigned int umin = x;
/* Summation. */
for (i = 0; i < N; i++) {
udiff += (ub[i] - uc[i]);
}
/* Maximum. */
for (i = 0; i < N; i++) {
umax = umax < uc[i] ? uc[i] : umax;
}
/* Minimum. */
for (i = 0; i < N; i++) {
umin = umin > uc[i] ? uc[i] : umin;
}
/* check results: */
if (udiff != DIFF)
abort ();
if (umax != max_result)
abort ();
if (umin != min_result)
abort ();
}
__attribute__((noinline))
void init_arrays ()
{
int i;
ub[0] = 1;
uc[0] = 1;
for (i=1; i<N; i++)
{
ub[i] = i * 3;
uc[i] = i;
}
}
int main (void)
{
main1 (100, 100, 1);
main1 (0, 15, 0);
init_arrays ();
main1 (2000, 2000, 1);
main1 (0, 1599, 0);
return 0;
}
/* { dg-final { scan-tree-dump-times "Detected reduction" 3 "parloops" } } */
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" } } */
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 4 "parloops" } } */
/* { dg-final { cleanup-tree-dump "parloops" } } */
/* { dg-final { cleanup-tree-dump "optimized" } } */
......@@ -4,7 +4,7 @@
#include <stdarg.h>
#include <stdlib.h>
#define N 16
#define N 1600
#define DIFF 242
unsigned char ub[N] = {1,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
......@@ -39,15 +39,29 @@ main1 (unsigned char x, unsigned char max_result, unsigned char min_result)
abort ();
}
__attribute__((noinline))
void init_arrays ()
{
int i;
for (i=16; i<N; i++)
{
ub[i] = 1;
uc[i] = 1;
}
}
int main (void)
{
{
init_arrays();
main1 (100, 100, 1);
main1 (0, 15, 0);
return 0;
}
/* { dg-final { scan-tree-dump-times "Detected reduction" 3 "parloops" } } */
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" } } */
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 4 "parloops" } } */
/* { dg-final { cleanup-tree-dump "parloops" } } */
/* { dg-final { cleanup-tree-dump "optimized" } } */
......@@ -4,7 +4,7 @@
#include <stdarg.h>
#include <stdlib.h>
#define N 16
#define N 1600
#define DIFF 242
unsigned short ub[N] = {1,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
......@@ -39,15 +39,28 @@ main1 (unsigned short x, unsigned short max_result, unsigned short min_result)
abort ();
}
__attribute__((noinline))
void init_arrays ()
{
int i;
for (i=16; i<N; i++)
{
ub[i] = 1;
uc[i] = 1;
}
}
int main (void)
{
init_arrays();
main1 (100, 100, 1);
main1 (0, 15, 0);
return 0;
}
/* { dg-final { scan-tree-dump-times "Detected reduction" 3 "parloops" } } */
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" } } */
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 4 "parloops" } } */
/* { dg-final { cleanup-tree-dump "parloops" } } */
/* { dg-final { cleanup-tree-dump "optimized" } } */
......@@ -4,11 +4,11 @@
#include <stdarg.h>
#include <stdlib.h>
#define N 16
#define DIFF 240
#define N 1600
#define DIFF 2558400
int b[N] = {1,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
int c[N] = {1,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
int b[N];
int c[N];
/* Reduction of signed-int. */
......@@ -31,7 +31,7 @@ void main1 (int x, int max_result, int min_result)
for (i = 0; i < N; i++) {
min = min > c[i] ? c[i] : min;
}
/* check results: */
if (diff != DIFF)
abort ();
......@@ -41,15 +41,30 @@ void main1 (int x, int max_result, int min_result)
abort ();
}
__attribute__((noinline))
void init_arrays ()
{
int i;
b[0] = 1;
c[0] = 1;
for (i=1; i<N; i++)
{
b[i] = i * 3;
c[i] = i;
}
}
int main (void)
{
main1 (100, 100, 1);
main1 (0, 15, 0);
init_arrays ();
main1 (2000, 2000, 1);
main1 (0, 1599, 0);
return 0;
}
/* { dg-final { scan-tree-dump-times "Detected reduction" 3 "parloops" } } */
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" } } */
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 4 "parloops" } } */
/* { dg-final { cleanup-tree-dump "parloops" } } */
/* { dg-final { cleanup-tree-dump "optimized" } } */
......@@ -4,7 +4,7 @@
#include <stdarg.h>
#include <stdlib.h>
#define N 16
#define N 1600
#define DIFF 121
signed char b[N] = {1,2,3,6,8,10,12,14,16,18,20,22,24,26,28,30};
......@@ -39,15 +39,29 @@ void main1 (signed char x, signed char max_result, signed char min_result)
abort ();
}
__attribute__((noinline))
void init_arrays ()
{
int i;
for (i=16; i<N; i++)
{
b[i] = 1;
c[i] = 1;
}
}
int main (void)
{
init_arrays();
main1 (100, 100, 1);
main1 (0, 15, 0);
return 0;
}
/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" } } */
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops" } } */
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" } } */
/* { dg-final { cleanup-tree-dump "parloops" } } */
/* { dg-final { cleanup-tree-dump "optimized" } } */
......
......@@ -4,7 +4,7 @@
#include <stdarg.h>
#include <stdlib.h>
#define N 16
#define N 1600
#define DIFF 242
short b[N] = {1,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
......@@ -38,15 +38,29 @@ void main1 (short x, short max_result, short min_result)
abort ();
}
__attribute__((noinline))
void init_arrays ()
{
int i;
for (i=16; i<N; i++)
{
b[i] = 1;
c[i] = 1;
}
}
int main (void)
{
init_arrays();
main1 (100, 100, 1);
main1 (0, 15, 0);
return 0;
}
/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" } } */
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops" } } */
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" } } */
/* { dg-final { cleanup-tree-dump "parloops" } } */
/* { dg-final { cleanup-tree-dump "optimized" } } */
......@@ -4,10 +4,10 @@
#include <stdarg.h>
#include <stdlib.h>
#define N 16
#define N 1600
unsigned int ub[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
unsigned int uc[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
unsigned int ub[N];
unsigned int uc[N];
/* Reduction of unsigned-int. */
......@@ -29,15 +29,29 @@ int main1 (int n, int res)
return 0;
}
__attribute__((noinline))
void init_arrays ()
{
int i;
for (i=0; i<N; i++)
{
ub[i] = i * 3;
uc[i] = i;
}
}
int main (void)
{
main1 (N, 240);
main1 (N-1, 210);
init_arrays ();
main1 (N, 2558400);
main1 (N-1, 2555202);
return 0;
}
/* { dg-final { scan-tree-dump-times "Detected reduction" 1 "parloops" } } */
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops" } } */
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops" } } */
/* { dg-final { cleanup-tree-dump "parloops" } } */
/* { dg-final { cleanup-tree-dump "optimized" } } */
......@@ -4,19 +4,26 @@
#include <stdarg.h>
#include <stdlib.h>
#define N 16
#define DIFF 242
#define N 1600
#define DIFF 2558402
__attribute__ ((noinline))
int main1 (float x, float max_result)
{
int i;
float b[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
float c[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
float b[N];
float c[N];
float diff = 2;
float max = x;
float min = 10;
for (i=0; i<N; i++)
{
b[i] = i * 3;
c[i] = i;
}
for (i = 0; i < N; i++) {
diff += (b[i] - c[i]);
}
......@@ -42,14 +49,15 @@ int main1 (float x, float max_result)
int main (void)
{
main1 (100 ,100);
main1 (0, 15);
main1 (2000, 2000);
main1 (0, 1599);
return 0;
}
/* need -ffast-math to parallelize these loops. */
/* { dg-final { scan-tree-dump-times "Detected reduction" 0 "parloops" } } */
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 0 "parloops" } } */
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops" } } */
/* { dg-final { scan-tree-dump-times "FAILED: it is not a part of reduction" 3 "parloops" } } */
/* { dg-final { cleanup-tree-dump "parloops" } } */
/* { dg-final { cleanup-tree-dump "optimized" } } */
......@@ -3,7 +3,7 @@
#include <stdlib.h>
#define N 32
#define N 3200
extern void abort (void);
typedef unsigned char T;
......@@ -42,28 +42,36 @@ testmin (const T *c, T init, T result)
int main (void)
{
static unsigned char const A[N] = {
static unsigned char A[N] = {
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10,
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f
};
static unsigned char const B[N] = {
static unsigned char B[N] = {
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f
};
static unsigned char const C[N] = {
static unsigned char C[N] = {
0xff, 0xfe, 0xfd, 0xfc, 0xfb, 0xfa, 0xf9, 0xf8,
0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10,
0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
};
int i;
for (i=32; i<N; i++)
{
A[i]= 0x01;
B[i]= 0x70;
C[i]= 0xff;
}
testmin (A, 10, 1);
testmin (B, 0x7f, 0x70);
testmin (C, 0x7f, 0x09);
......@@ -75,8 +83,9 @@ int main (void)
return 0;
}
/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" } } */
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops" } } */
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" } } */
/* { dg-final { cleanup-tree-dump "parloops" } } */
/* { dg-final { cleanup-tree-dump "optimized" } } */
......@@ -3,7 +3,7 @@
#include <stdlib.h>
#define N 32
#define N 3200
extern void abort (void);
typedef signed char T;
......@@ -42,27 +42,36 @@ testmin (const T *c, T init, T result)
int main (void)
{
static signed char const A[N] = {
static signed char A[N] = {
0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10,
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f
};
static signed char const B[N] = {
static signed char B[N] = {
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f
};
static signed char const C[N] = {
static signed char C[N] = {
0xff, 0xfe, 0xfd, 0xfc, 0xfb, 0xfa, 0xf9, 0xf8,
0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10,
0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
};
int i;
for (i=32; i<N; i++)
{
A[i]= 0x01;
B[i]= 0x70;
C[i]= 0xff;
}
testmin (A, 0, 0);
testmin (B, 0, 0x80);
testmin (C, 0, 0x80);
......@@ -74,7 +83,8 @@ int main (void)
return 0;
}
/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" } } */
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops" } } */
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" } } */
/* { dg-final { cleanup-tree-dump "parloops" } } */
/* { dg-final { cleanup-tree-dump "optimized" } } */
......@@ -3,7 +3,7 @@
#include <stdlib.h>
#define N 32
#define N 3200
extern void abort (void);
typedef unsigned short T;
......@@ -42,27 +42,36 @@ testmin (const T *c, T init, T result)
int main (void)
{
static unsigned short const A[N] = {
static unsigned short A[N] = {
0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008,
0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, 0x0010,
0x7000, 0x7100, 0x7200, 0x7300, 0x7400, 0x7500, 0x7600, 0x7700,
0x7ff8, 0x7ff9, 0x7ffa, 0x7ffb, 0x7ffc, 0x7ffd, 0x7ffe, 0x7fff
};
static unsigned short const B[N] = {
static unsigned short B[N] = {
0x7000, 0x7100, 0x7200, 0x7300, 0x7400, 0x7500, 0x7600, 0x7700,
0x7ff8, 0x7ff9, 0x7ffa, 0x7ffb, 0x7ffc, 0x7ffd, 0x7ffe, 0x7fff,
0x8000, 0x8001, 0x8002, 0x8003, 0x8004, 0x8005, 0x8006, 0x8007,
0x8008, 0x8009, 0x800a, 0x800b, 0x800c, 0x800d, 0x800e, 0x800f
};
static unsigned short const C[N] = {
static unsigned short C[N] = {
0xffff, 0xfffe, 0xfffd, 0xfffc, 0xfffb, 0xfffa, 0xfff9, 0xfff8,
0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, 0x0010,
0x8000, 0x8001, 0x8002, 0x8003, 0x8004, 0x8005, 0x8006, 0x8007,
0x7000, 0x7100, 0x7200, 0x7300, 0x7400, 0x7500, 0x7600, 0x7700,
};
int i;
for (i=32; i<N; i++)
{
A[i]= 0x0001;
B[i]= 0x7000;
C[i]= 0xffff;
}
testmin (A, 10, 1);
testmin (B, 0x7fff, 0x7000);
testmin (C, 0x7fff, 0x0009);
......@@ -74,7 +83,8 @@ int main (void)
return 0;
}
/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" } } */
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops" } } */
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" } } */
/* { dg-final { cleanup-tree-dump "parloops" } } */
/* { dg-final { cleanup-tree-dump "optimized" } } */
......@@ -1908,7 +1908,7 @@ parallelize_loops (void)
/* FIXME: Bypass this check as graphite doesn't update the
count and frequency correctly now. */
if (!flag_loop_parallelize_all
&& (expected_loop_iterations (loop) <= n_threads
&& (estimated_loop_iterations_int (loop, false)<= n_threads * MIN_PER_THREAD
/* Do not bother with loops in cold areas. */
|| optimize_loop_nest_for_size_p (loop)))
continue;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment