Commit 5b37e866 by Nathan Sidwell Committed by Nathan Sidwell

omp-low.c (struct oacc_loop): Add 'inner' field.

	gcc/
	* omp-low.c (struct oacc_loop): Add 'inner' field.
	(new_oacc_loop_raw): Initialize it to zero.
	(oacc_loop_fixed_partitions): Initialize it.
	(oacc_loop_auto_partitions): Partition outermost loop to outermost
	available partitioning.

	gcc/testsuite/
	* c-c++-common/goacc/loop-auto-1.c: Adjust expected warnings.

	libgomp/
	* testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c: Adjust
	expected partitioning.

From-SVN: r235756
parent 87cdf04b
2016-05-02 Nathan Sidwell <nathan@codesourcery.com>
* omp-low.c (struct oacc_loop): Add 'inner' field.
(new_oacc_loop_raw): Initialize it to zero.
(oacc_loop_fixed_partitions): Initialize it.
(oacc_loop_auto_partitions): Partition outermost loop to outermost
available partitioning.
2016-05-02 Claudiu Zissulescu <claziss@synopsys.com> 2016-05-02 Claudiu Zissulescu <claziss@synopsys.com>
* config/arc/arc.md (mulsidi3): Change operand 0 predicate to * config/arc/arc.md (mulsidi3): Change operand 0 predicate to
......
...@@ -241,6 +241,7 @@ struct oacc_loop ...@@ -241,6 +241,7 @@ struct oacc_loop
tree routine; /* Pseudo-loop enclosing a routine. */ tree routine; /* Pseudo-loop enclosing a routine. */
unsigned mask; /* Partitioning mask. */ unsigned mask; /* Partitioning mask. */
unsigned inner; /* Partitioning of inner loops. */
unsigned flags; /* Partitioning flags. */ unsigned flags; /* Partitioning flags. */
unsigned ifns; /* Contained loop abstraction functions. */ unsigned ifns; /* Contained loop abstraction functions. */
tree chunk_size; /* Chunk size. */ tree chunk_size; /* Chunk size. */
...@@ -18921,7 +18922,7 @@ new_oacc_loop_raw (oacc_loop *parent, location_t loc) ...@@ -18921,7 +18922,7 @@ new_oacc_loop_raw (oacc_loop *parent, location_t loc)
memset (loop->tails, 0, sizeof (loop->tails)); memset (loop->tails, 0, sizeof (loop->tails));
loop->routine = NULL_TREE; loop->routine = NULL_TREE;
loop->mask = loop->flags = 0; loop->mask = loop->flags = loop->inner = 0;
loop->ifns = 0; loop->ifns = 0;
loop->chunk_size = 0; loop->chunk_size = 0;
loop->head_end = NULL; loop->head_end = NULL;
...@@ -19449,8 +19450,11 @@ oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask) ...@@ -19449,8 +19450,11 @@ oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask)
mask_all |= this_mask; mask_all |= this_mask;
if (loop->child) if (loop->child)
mask_all |= oacc_loop_fixed_partitions (loop->child, {
outer_mask | this_mask); loop->inner = oacc_loop_fixed_partitions (loop->child,
outer_mask | this_mask);
mask_all |= loop->inner;
}
if (loop->sibling) if (loop->sibling)
mask_all |= oacc_loop_fixed_partitions (loop->sibling, outer_mask); mask_all |= oacc_loop_fixed_partitions (loop->sibling, outer_mask);
...@@ -19466,7 +19470,7 @@ oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask) ...@@ -19466,7 +19470,7 @@ oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask)
static unsigned static unsigned
oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask) oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask)
{ {
unsigned inner_mask = 0; bool assign = (loop->flags & OLF_AUTO) && (loop->flags & OLF_INDEPENDENT);
bool noisy = true; bool noisy = true;
#ifdef ACCEL_COMPILER #ifdef ACCEL_COMPILER
...@@ -19475,16 +19479,33 @@ oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask) ...@@ -19475,16 +19479,33 @@ oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask)
noisy = false; noisy = false;
#endif #endif
if (assign && outer_mask < GOMP_DIM_MASK (GOMP_DIM_MAX - 1))
{
/* Allocate the outermost loop at the outermost available
level. */
unsigned this_mask = outer_mask + 1;
if (!(this_mask & loop->inner))
loop->mask = this_mask;
}
if (loop->child) if (loop->child)
inner_mask |= oacc_loop_auto_partitions (loop->child, {
outer_mask | loop->mask); unsigned child_mask = outer_mask | loop->mask;
if (loop->mask || assign)
child_mask |= GOMP_DIM_MASK (GOMP_DIM_MAX);
if ((loop->flags & OLF_AUTO) && (loop->flags & OLF_INDEPENDENT)) loop->inner = oacc_loop_auto_partitions (loop->child, child_mask);
}
if (assign && !loop->mask)
{ {
/* Allocate the loop at the innermost available level. */
unsigned this_mask = 0; unsigned this_mask = 0;
/* Determine the outermost partitioning used within this loop. */ /* Determine the outermost partitioning used within this loop. */
this_mask = inner_mask | GOMP_DIM_MASK (GOMP_DIM_MAX); this_mask = loop->inner | GOMP_DIM_MASK (GOMP_DIM_MAX);
this_mask = (this_mask & -this_mask); this_mask = (this_mask & -this_mask);
/* Pick the partitioning just inside that one. */ /* Pick the partitioning just inside that one. */
...@@ -19497,17 +19518,20 @@ oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask) ...@@ -19497,17 +19518,20 @@ oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask)
warning_at (loop->loc, 0, warning_at (loop->loc, 0,
"insufficient partitioning available to parallelize loop"); "insufficient partitioning available to parallelize loop");
if (dump_file)
fprintf (dump_file, "Auto loop %s:%d assigned %d\n",
LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc),
this_mask);
loop->mask = this_mask; loop->mask = this_mask;
} }
inner_mask |= loop->mask;
if (assign && dump_file)
fprintf (dump_file, "Auto loop %s:%d assigned %d\n",
LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc),
loop->mask);
unsigned inner_mask = 0;
if (loop->sibling) if (loop->sibling)
inner_mask |= oacc_loop_auto_partitions (loop->sibling, outer_mask); inner_mask |= oacc_loop_auto_partitions (loop->sibling, outer_mask);
inner_mask |= loop->inner | loop->mask;
return inner_mask; return inner_mask;
} }
......
2016-05-02 Nathan Sidwell <nathan@codesourcery.com>
* c-c++-common/goacc/loop-auto-1.c: Adjust expected warnings.
2016-05-02 Marek Polacek <polacek@redhat.com> 2016-05-02 Marek Polacek <polacek@redhat.com>
PR c/70851 PR c/70851
......
...@@ -186,10 +186,10 @@ void Worker (void) ...@@ -186,10 +186,10 @@ void Worker (void)
for (int jx = 0; jx < 10; jx++) {} for (int jx = 0; jx < 10; jx++) {}
} }
#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */ #pragma acc loop auto
for (int ix = 0; ix < 10; ix++) for (int ix = 0; ix < 10; ix++)
{ {
#pragma acc loop auto #pragma acc loop auto /* { dg-warning "insufficient partitioning" } */
for (int jx = 0; jx < 10; jx++) for (int jx = 0; jx < 10; jx++)
{ {
#pragma acc loop auto #pragma acc loop auto
...@@ -214,10 +214,10 @@ void Vector (void) ...@@ -214,10 +214,10 @@ void Vector (void)
#pragma acc loop auto #pragma acc loop auto
for (int ix = 0; ix < 10; ix++) {} for (int ix = 0; ix < 10; ix++) {}
#pragma acc loop auto /* { dg-warning "insufficient partitioning" } */ #pragma acc loop auto
for (int ix = 0; ix < 10; ix++) for (int ix = 0; ix < 10; ix++)
{ {
#pragma acc loop auto #pragma acc loop auto /* { dg-warning "insufficient partitioning" } */
for (int jx = 0; jx < 10; jx++) {} for (int jx = 0; jx < 10; jx++) {}
} }
} }
......
2016-05-02 Nathan Sidwell <nathan@codesourcery.com>
* testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c: Adjust
expected partitioning.
2016-04-29 Cesar Philippidis <cesar@codesourcery.com> 2016-04-29 Cesar Philippidis <cesar@codesourcery.com>
PR middle-end/70626 PR middle-end/70626
......
...@@ -103,9 +103,11 @@ int vector_1 (int *ary, int size) ...@@ -103,9 +103,11 @@ int vector_1 (int *ary, int size)
#pragma acc parallel num_workers (32) vector_length(32) copy(ary[0:size]) firstprivate (size) #pragma acc parallel num_workers (32) vector_length(32) copy(ary[0:size]) firstprivate (size)
{ {
#pragma acc loop gang
for (int jx = 0; jx < 1; jx++)
#pragma acc loop auto #pragma acc loop auto
for (int ix = 0; ix < size; ix++) for (int ix = 0; ix < size; ix++)
ary[ix] = place (); ary[ix] = place ();
} }
return check (ary, size, 0, 0, 1); return check (ary, size, 0, 0, 1);
...@@ -118,7 +120,7 @@ int vector_2 (int *ary, int size) ...@@ -118,7 +120,7 @@ int vector_2 (int *ary, int size)
#pragma acc parallel num_workers (32) vector_length(32) copy(ary[0:size]) firstprivate (size) #pragma acc parallel num_workers (32) vector_length(32) copy(ary[0:size]) firstprivate (size)
{ {
#pragma acc loop worker #pragma acc loop worker
for (int jx = 0; jx < size / 64; jx++) for (int jx = 0; jx < size / 64; jx++)
#pragma acc loop auto #pragma acc loop auto
for (int ix = 0; ix < 64; ix++) for (int ix = 0; ix < 64; ix++)
ary[ix + jx * 64] = place (); ary[ix + jx * 64] = place ();
...@@ -133,30 +135,16 @@ int worker_1 (int *ary, int size) ...@@ -133,30 +135,16 @@ int worker_1 (int *ary, int size)
#pragma acc parallel num_workers (32) vector_length(32) copy(ary[0:size]) firstprivate (size) #pragma acc parallel num_workers (32) vector_length(32) copy(ary[0:size]) firstprivate (size)
{ {
#pragma acc loop gang
for (int kx = 0; kx < 1; kx++)
#pragma acc loop auto #pragma acc loop auto
for (int jx = 0; jx < size / 64; jx++) for (int jx = 0; jx < size / 64; jx++)
#pragma acc loop vector #pragma acc loop vector
for (int ix = 0; ix < 64; ix++) for (int ix = 0; ix < 64; ix++)
ary[ix + jx * 64] = place (); ary[ix + jx * 64] = place ();
}
return check (ary, size, 0, 1, 1);
}
int worker_2 (int *ary, int size)
{
clear (ary, size);
#pragma acc parallel num_workers (32) vector_length(32) copy(ary[0:size]) firstprivate (size)
{
#pragma acc loop auto
for (int jx = 0; jx < size / 64; jx++)
#pragma acc loop auto
for (int ix = 0; ix < 64; ix++)
ary[ix + jx * 64] = place ();
} }
return check (ary, size, 0, 1, 1); return check (ary, size, 0, 1, 1);
} }
int gang_1 (int *ary, int size) int gang_1 (int *ary, int size)
...@@ -193,6 +181,22 @@ int gang_2 (int *ary, int size) ...@@ -193,6 +181,22 @@ int gang_2 (int *ary, int size)
return check (ary, size, 1, 1, 1); return check (ary, size, 1, 1, 1);
} }
int gang_3 (int *ary, int size)
{
clear (ary, size);
#pragma acc parallel num_workers (32) vector_length(32) copy(ary[0:size]) firstprivate (size)
{
#pragma acc loop auto
for (int jx = 0; jx < size / 64; jx++)
#pragma acc loop auto
for (int ix = 0; ix < 64; ix++)
ary[ix + jx * 64] = place ();
}
return check (ary, size, 1, 0, 1);
}
#define N (32*32*32) #define N (32*32*32)
int main () int main ()
{ {
...@@ -214,13 +218,13 @@ int main () ...@@ -214,13 +218,13 @@ int main ()
if (worker_1 (ary, N)) if (worker_1 (ary, N))
return 1; return 1;
if (worker_2 (ary, N))
return 1;
if (gang_1 (ary, N)) if (gang_1 (ary, N))
return 1; return 1;
if (gang_2 (ary, N)) if (gang_2 (ary, N))
return 1; return 1;
if (gang_3 (ary, N))
return 1;
return 0; return 0;
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment