Update OpenACC tests for amdgcn

2020-01-20 Andrew Stubbs <ams@codesourcery.com> libgomp/ * testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c: Skip test on gcn. * testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c (main): Adjust test dimensions for amdgcn. * testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c (main): Adjust gang/worker/vector expectations dynamically. * testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c (main): Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c (main): Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c (main): Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c (main): Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c (main): Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c (main): Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-v-1.c (main): Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-w-1.c (main): Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c (main): Likewise. * testsuite/libgomp.oacc-c-c++-common/parallel-dims.c (acc_gang): Recognise acc_device_radeon. (acc_worker): Likewise. (acc_vector): Likewise. (main): Set expectations for amdgcn. * testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c (main): Adjust gang/worker/vector expectations dynamically. * testsuite/libgomp.oacc-c-c++-common/routine-v-1.c (main): Likewise. * testsuite/libgomp.oacc-c-c++-common/routine-w-1.c (main): Likewise. * testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c (main): Likewise. * testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c: Set expectations for amdgcn.

Update OpenACC tests for amdgcn
2020-01-20 Andrew Stubbs <ams@codesourcery.com> libgomp/ * testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c: Skip test on gcn. * testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c (main): Adjust test dimensions for amdgcn. * testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c (main): Adjust gang/worker/vector expectations dynamically. * testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c (main): Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c (main): Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c (main): Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c (main): Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c (main): Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c (main): Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-v-1.c (main): Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-w-1.c (main): Likewise. * testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c (main): Likewise. * testsuite/libgomp.oacc-c-c++-common/parallel-dims.c (acc_gang): Recognise acc_device_radeon. (acc_worker): Likewise. (acc_vector): Likewise. (main): Set expectations for amdgcn. * testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c (main): Adjust gang/worker/vector expectations dynamically. * testsuite/libgomp.oacc-c-c++-common/routine-v-1.c (main): Likewise. * testsuite/libgomp.oacc-c-c++-common/routine-w-1.c (main): Likewise. * testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c (main): Likewise. * testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c: Set expectations for amdgcn.
09e0ad62 · Andrew Stubbs · 3a434597 · 09e0ad62 · 09e0ad62 · 09e0ad62
Commit 09e0ad62 authored Nov 14, 2019 by Andrew Stubbs
19 changed files
--- a/libgomp/ChangeLog
+++ b/libgomp/ChangeLog
+2020-01-20  Andrew Stubbs  <ams@codesourcery.com>
+	* testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c: Skip test on gcn.
+	* testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c (main):
+	Adjust test dimensions for amdgcn.
+	* testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c (main): Adjust
+	gang/worker/vector expectations dynamically.
+	* testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c
+	(main): Likewise.
+	* testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c (main): Likewise.
+	* testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c (main): Likewise.
+	* testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c (main): Likewise.
+	* testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c (main): Likewise.
+	* testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c (main): Likewise.
+	* testsuite/libgomp.oacc-c-c++-common/loop-v-1.c (main): Likewise.
+	* testsuite/libgomp.oacc-c-c++-common/loop-w-1.c (main): Likewise.
+	* testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c (main): Likewise.
+	* testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
+	(acc_gang): Recognise acc_device_radeon.
+	(acc_worker): Likewise.
+	(acc_vector): Likewise.
+	(main): Set expectations for amdgcn.
+	* testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c
+	(main): Adjust gang/worker/vector expectations dynamically.
+	* testsuite/libgomp.oacc-c-c++-common/routine-v-1.c (main): Likewise.
+	* testsuite/libgomp.oacc-c-c++-common/routine-w-1.c (main): Likewise.
+	* testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c (main): Likewise.
+	* testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c: Set expectations
+	for amdgcn.
 2020-01-17  Andrew Stubbs  <ams@codesourcery.com>
 	* config/accel/openacc.f90 (openacc_kinds): Rename acc_device_gcn to

--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-auto-1.c
+/* AMD GCN does not use 32-lane vectors.
+   { dg-skip-if "unsuitable dimensions" { openacc_amdgcn_accel_selected } { "*" } { "" } } */
 /* { dg-additional-options "-fopenacc-dim=32" } */
 #include <stdio.h>

--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-dim-default.c
@@ -128,5 +128,14 @@ int test_1 (int gp, int wp, int vp)
 int main ()
 {
+#ifdef ACC_DEVICE_TYPE_gcn
+  /* AMD GCN uses the autovectorizer for the vector dimension: the use
+     of a function call in vector-partitioned code in this test is not
+     currently supported.  */
+  /* AMD GCN does not currently support multiple workers.  This should be
+     set to 16 when that changes.  */
+  return test_1 (16, 1, 1);
+#else
  return test_1 (16, 16, 32);
+#endif
 }
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c
@@ -9,11 +9,13 @@ int main ()
  int ix;
  int exit = 0;
  int ondev = 0;
+  int gangsize, workersize, vectorsize;
  for (ix = 0; ix < N;ix++)
    ary[ix] = -1;
-#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) copy(ary) copy(ondev)
+#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+	    copy(ary) copy(ondev) copyout(gangsize, workersize, vectorsize)
  {
 #pragma acc loop gang worker vector
    for (unsigned ix = 0; ix < N; ix++)
@@ -32,6 +34,10 @@ int main ()
 	else
 	  ary[ix] = ix;
      }
+    gangsize = __builtin_goacc_parlevel_size (GOMP_DIM_GANG);
+    workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
+    vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
  }
  for (ix = 0; ix < N; ix++)
@@ -39,11 +45,12 @@ int main ()
      int expected = ix;
      if(ondev)
 	{
-	  int chunk_size = (N + 32*32*32 - 1) / (32*32*32);
+	  int chunk_size = (N + gangsize * workersize * vectorsize - 1)
+			   / (gangsize * workersize * vectorsize);
-	  int g = ix / (chunk_size * 32 * 32);
+	  int g = ix / (chunk_size * workersize * vectorsize);
-	  int w = ix / 32 % 32;
+	  int w = (ix / vectorsize) % workersize;
-	  int v = ix % 32;
+	  int v = ix % vectorsize;
 	  expected = (g << 16) | (w << 8) | v;
 	}

--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c
@@ -8,8 +8,10 @@ int main ()
  int ix;
  int ondev = 0;
  int t = 0, h = 0;
+  int gangsize, workersize, vectorsize;
-#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) copy(ondev)
+#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
+	copy(ondev) copyout(gangsize, workersize, vectorsize)
  {
 #pragma acc loop gang worker vector reduction(+:t)
    for (unsigned ix = 0; ix < N; ix++)
@@ -28,18 +30,22 @@ int main ()
 	  }
 	t += val;
      }
+    gangsize = __builtin_goacc_parlevel_size (GOMP_DIM_GANG);
+    workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
+    vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
  }
  for (ix = 0; ix < N; ix++)
    {
      int val = ix;
-      if(ondev)
+      if (ondev)
 	{
-	  int chunk_size = (N + 32*32*32 - 1) / (32*32*32);
+	  int chunk_size = (N + gangsize * workersize * vectorsize - 1)
+			   / (gangsize * workersize * vectorsize);
-	  int g = ix / (chunk_size * 32 * 32);
+	  int g = ix / (chunk_size * vectorsize * workersize);
-	  int w = ix / 32 % 32;
+	  int w = ix / vectorsize % workersize;
-	  int v = ix % 32;
+	  int v = ix % vectorsize;
 	  val = (g << 16) | (w << 8) | v;
 	}

--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c
@@ -9,8 +9,9 @@ int main ()
  int ix;
  int ondev = 0;
  int t = 0,  h = 0;
+  int vectorsize;
-#pragma acc parallel vector_length(32) copy(ondev)
+#pragma acc parallel vector_length(32) copy(ondev) copyout(vectorsize)
  {
 #pragma acc loop vector reduction (+:t)
    for (unsigned ix = 0; ix < N; ix++)
@@ -29,6 +30,7 @@ int main ()
 	  }
 	t += val;
      }
+    vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
  }
  for (ix = 0; ix < N; ix++)
@@ -38,7 +40,7 @@ int main ()
 	{
 	  int g = 0;
 	  int w = 0;
-	  int v = ix % 32;
+	  int v = ix % vectorsize;
 	  val = (g << 16) | (w << 8) | v;
 	}

--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c
@@ -9,8 +9,9 @@ int main ()
  int ix;
  int ondev = 0;
  int q = 0,  h = 0;
+  int vectorsize;
-#pragma acc parallel vector_length(32) copy(q) copy(ondev)
+#pragma acc parallel vector_length(32) copy(q) copy(ondev) copyout(vectorsize)
  {
    int t = q;
@@ -32,6 +33,7 @@ int main ()
 	t += val;
      }
    q = t;
+    vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
  }
  for (ix = 0; ix < N; ix++)
@@ -41,7 +43,7 @@ int main ()
 	{
 	  int g = 0;
 	  int w = 0;
-	  int v = ix % 32;
+	  int v = ix % vectorsize;
 	  val = (g << 16) | (w << 8) | v;
 	}

--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c
@@ -8,8 +8,10 @@ int main ()
  int ix;
  int ondev = 0;
  int t = 0,  h = 0;
+  int workersize;
-#pragma acc parallel num_workers(32) vector_length(32) copy(ondev)
+#pragma acc parallel num_workers(32) vector_length(32) copy(ondev) \
+	    copyout(workersize)
  {
 #pragma acc loop worker reduction(+:t)
    for (unsigned ix = 0; ix < N; ix++)
@@ -28,6 +30,7 @@ int main ()
 	  }
 	t += val;
      }
+    workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
  }
  for (ix = 0; ix < N; ix++)
@@ -36,7 +39,7 @@ int main ()
      if(ondev)
 	{
 	  int g = 0;
-	  int w = ix % 32;
+	  int w = ix % workersize;
 	  int v = 0;
 	  val = (g << 16) | (w << 8) | v;

--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c
@@ -8,8 +8,10 @@ int main ()
  int ix;
  int ondev = 0;
  int q = 0,  h = 0;
+  int workersize;
-#pragma acc parallel num_workers(32) vector_length(32) copy(q) copy(ondev)
+#pragma acc parallel num_workers(32) vector_length(32) copy(q) copy(ondev) \
+	    copyout(workersize)
  {
    int t = q;
@@ -31,6 +33,7 @@ int main ()
 	t += val;
      }
    q = t;
+    workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
  }
  for (ix = 0; ix < N; ix++)
@@ -39,7 +42,7 @@ int main ()
      if(ondev)
 	{
 	  int g = 0;
-	  int w = ix % 32;
+	  int w = ix % workersize;
 	  int v = 0;
 	  val = (g << 16) | (w << 8) | v;

--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c
@@ -8,8 +8,10 @@ int main ()
  int ix;
  int ondev = 0;
  int t = 0, h = 0;
+  int workersize, vectorsize;
-#pragma acc parallel num_workers(32) vector_length(32) copy(ondev)
+#pragma acc parallel num_workers(32) vector_length(32) copy(ondev) \
+	    copyout(workersize, vectorsize)
  {
 #pragma acc loop worker vector reduction (+:t)
    for (unsigned ix = 0; ix < N; ix++)
@@ -28,6 +30,8 @@ int main ()
 	  }
 	t += val;
      }
+    workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
+    vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
  }
  for (ix = 0; ix < N; ix++)
@@ -36,8 +40,8 @@ int main ()
      if(ondev)
 	{
 	  int g = 0;
-	  int w = (ix / 32) % 32;
+	  int w = (ix / vectorsize) % workersize;
-	  int v = ix % 32;
+	  int v = ix % vectorsize;
 	  val = (g << 16) | (w << 8) | v;
 	}

--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-v-1.c
@@ -9,11 +9,13 @@ int main ()
  int ix;
  int exit = 0;
  int ondev = 0;
+  int vectorsize;
  for (ix = 0; ix < N;ix++)
    ary[ix] = -1;
-#pragma acc parallel vector_length(32) copy(ary) copy(ondev)
+#pragma acc parallel vector_length(32) copy(ary) copy(ondev) \
+	    copyout(vectorsize)
  {
 #pragma acc loop vector
    for (unsigned ix = 0; ix < N; ix++)
@@ -31,6 +33,7 @@ int main ()
 	else
 	  ary[ix] = ix;
      }
+    vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
  }
  for (ix = 0; ix < N; ix++)
@@ -40,7 +43,7 @@ int main ()
 	{
 	  int g = 0;
 	  int w = 0;
-	  int v = ix % 32;
+	  int v = ix % vectorsize;
 	  expected = (g << 16) | (w << 8) | v;
 	}

--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-w-1.c
@@ -9,11 +9,13 @@ int main ()
  int ix;
  int exit = 0;
  int ondev = 0;
+  int workersize;
  for (ix = 0; ix < N;ix++)
    ary[ix] = -1;
-#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev)
+#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev) \
+	    copyout(workersize)
  {
 #pragma acc loop worker
    for (unsigned ix = 0; ix < N; ix++)
@@ -31,6 +33,7 @@ int main ()
 	else
 	  ary[ix] = ix;
      }
+    workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
  }
  for (ix = 0; ix < N; ix++)
@@ -39,7 +42,7 @@ int main ()
      if(ondev)
 	{
 	  int g = 0;
-	  int w = ix % 32;
+	  int w = ix % workersize;
 	  int v = 0;
 	  expected = (g << 16) | (w << 8) | v;

--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c
@@ -9,11 +9,13 @@ int main ()
  int ix;
  int exit = 0;
  int ondev = 0;
+  int workersize, vectorsize;
  for (ix = 0; ix < N;ix++)
    ary[ix] = -1;
-#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev)
+#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev) \
+	    copyout(workersize, vectorsize)
  {
 #pragma acc loop worker vector
    for (unsigned ix = 0; ix < N; ix++)
@@ -31,6 +33,8 @@ int main ()
 	else
 	  ary[ix] = ix;
      }
+    workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
+    vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
  }
  for (ix = 0; ix < N; ix++)
@@ -39,8 +43,8 @@ int main ()
      if(ondev)
 	{
 	  int g = 0;
-	  int w = (ix / 32) % 32;
+	  int w = (ix / vectorsize) % workersize;
-	  int v = ix % 32;
+	  int v = ix % vectorsize;
 	  expected = (g << 16) | (w << 8) | v;
 	}

--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/parallel-dims.c
@@ -14,7 +14,8 @@ static unsigned int __attribute__ ((optimize ("O2"))) acc_gang ()
 {
  if (acc_on_device ((int) acc_device_host))
    return 0;
-  else if (acc_on_device ((int) acc_device_nvidia))
+  else if (acc_on_device ((int) acc_device_nvidia)
+	   || acc_on_device ((int) acc_device_radeon))
    return __builtin_goacc_parlevel_id (GOMP_DIM_GANG);
  else
    __builtin_abort ();
@@ -25,7 +26,8 @@ static unsigned int __attribute__ ((optimize ("O2"))) acc_worker ()
 {
  if (acc_on_device ((int) acc_device_host))
    return 0;
-  else if (acc_on_device ((int) acc_device_nvidia))
+  else if (acc_on_device ((int) acc_device_nvidia)
+	   || acc_on_device ((int) acc_device_radeon))
    return __builtin_goacc_parlevel_id (GOMP_DIM_WORKER);
  else
    __builtin_abort ();
@@ -36,7 +38,8 @@ static unsigned int __attribute__ ((optimize ("O2"))) acc_vector ()
 {
  if (acc_on_device ((int) acc_device_host))
    return 0;
-  else if (acc_on_device ((int) acc_device_nvidia))
+  else if (acc_on_device ((int) acc_device_nvidia)
+	   || acc_on_device ((int) acc_device_radeon))
    return __builtin_goacc_parlevel_id (GOMP_DIM_VECTOR);
  else
    __builtin_abort ();
@@ -282,6 +285,12 @@ int main ()
 	  /* The GCC nvptx back end enforces num_workers (32).  */
 	  workers_actual = 32;
 	}
+      else if (acc_on_device (acc_device_radeon))
+	{
+	  /* The GCC GCN back end is limited to num_workers (16).
+	     Temporarily set this to 1 until multiple workers are permitted. */
+	  workers_actual = 1; // 16;
+	}
      else
 	__builtin_abort ();
 #pragma acc loop worker reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
@@ -328,6 +337,11 @@ int main ()
 	  /* We're actually executing with num_workers (32).  */
 	  /* workers_actual = 32; */
 	}
+      else if (acc_on_device (acc_device_radeon))
+	{
+	  /* The GCC GCN back end is limited to num_workers (16).  */
+	  workers_actual = 16;
+	}
      else
 	__builtin_abort ();
 #pragma acc loop worker reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
@@ -367,6 +381,11 @@ int main ()
 	  /* The GCC nvptx back end enforces vector_length (32).  */
 	  vectors_actual = 1024;
 	}
+      else if (acc_on_device (acc_device_radeon))
+	{
+	  /* The GCC GCN back end enforces vector_length (1): autovectorize. */
+	  vectors_actual = 1;
+	}
      else
 	__builtin_abort ();
 #pragma acc loop vector reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
@@ -407,6 +426,13 @@ int main ()
 	  /* The GCC nvptx back end enforces vector_length (32).  */
 	  vectors_actual = 32;
 	}
+      else if (acc_on_device (acc_device_radeon))
+	{
+	  /* Because of the way vectors are implemented for GCN, a vector loop
+	     containing a seq routine call will not vectorize calls to that
+	     routine.  Hence, we'll only get one "vector".  */
+	  vectors_actual = 1;
+	}
      else
 	__builtin_abort ();
 #pragma acc loop vector reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)
@@ -433,6 +459,9 @@ int main ()
       in the following case.  So, limit ourselves here.  */
    if (acc_get_device_type () == acc_device_nvidia)
      gangs = 3;
+    /* Similar appears to be true for GCN.  */
+    if (acc_get_device_type () == acc_device_radeon)
+      gangs = 3;
    int gangs_actual = gangs;
 #define WORKERS 3
    int workers_actual = WORKERS;
@@ -459,6 +488,13 @@ int main ()
 	  /* The GCC nvptx back end enforces vector_length (32).  */
 	  vectors_actual = 32;
 	}
+      else if (acc_on_device (acc_device_radeon))
+	{
+	  /* Temporary setting, until multiple workers are permitted.  */
+	  workers_actual = 1;
+	  /* See above comments about GCN vectors_actual.  */
+	  vectors_actual = 1;
+	}
      else
 	__builtin_abort ();
 #pragma acc loop gang reduction (min: gangs_min, workers_min, vectors_min) reduction (max: gangs_max, workers_max, vectors_max)

--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c
@@ -30,14 +30,18 @@ int main ()
  int ix;
  int exit = 0;
  int ondev = 0;
+  int gangsize, workersize, vectorsize;
  for (ix = 0; ix < N;ix++)
    ary[ix] = -1;
-#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) copy(ary) copy(ondev)
+#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) copy(ary) copy(ondev) copyout(gangsize, workersize, vectorsize)
  {
    ondev = acc_on_device (acc_device_not_host);
    gang (ary);
+    gangsize = __builtin_goacc_parlevel_size (GOMP_DIM_GANG);
+    workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
+    vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
  }
  for (ix = 0; ix < N; ix++)
@@ -45,11 +49,12 @@ int main ()
      int expected = ix;
      if(ondev)
 	{
-	  int chunk_size = (N + 32*32*32 - 1) / (32*32*32);
+	  int chunk_size = (N + gangsize * workersize * vectorsize - 1)
+			   / (gangsize * workersize * vectorsize);
-	  int g = ix / (chunk_size * 32 * 32);
+	  int g = ix / (chunk_size * vectorsize * workersize);
-	  int w = ix / 32 % 32;
+	  int w = (ix / vectorsize) % workersize;
-	  int v = ix % 32;
+	  int v = ix % vectorsize;
 	  expected = (g << 16) | (w << 8) | v;
 	}

--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-v-1.c
@@ -30,14 +30,17 @@ int main ()
  int ix;
  int exit = 0;
  int ondev = 0;
+  int vectorsize;
  for (ix = 0; ix < N;ix++)
    ary[ix] = -1;
-#pragma acc parallel vector_length(32) copy(ary) copy(ondev)
+#pragma acc parallel vector_length(32) copy(ary) copy(ondev) \
+	    copyout(vectorsize)
  {
    ondev = acc_on_device (acc_device_not_host);
    vector (ary);
+    vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
  }
  for (ix = 0; ix < N; ix++)
@@ -47,7 +50,7 @@ int main ()
 	{
 	  int g = 0;
 	  int w = 0;
-	  int v = ix % 32;
+	  int v = ix % vectorsize;
 	  expected = (g << 16) | (w << 8) | v;
 	}

--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-w-1.c
@@ -30,14 +30,17 @@ int main ()
  int ix;
  int exit = 0;
  int ondev = 0;
+  int workersize;
  for (ix = 0; ix < N;ix++)
    ary[ix] = -1;
-#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev)
+#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev) \
+	    copyout(workersize)
  {
    ondev = acc_on_device (acc_device_not_host);
    worker (ary);
+    workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
  }
  for (ix = 0; ix < N; ix++)
@@ -46,7 +49,7 @@ int main ()
      if(ondev)
 	{
 	  int g = 0;
-	  int w = ix % 32;
+	  int w = ix % workersize;
 	  int v = 0;
 	  expected = (g << 16) | (w << 8) | v;

--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c
@@ -30,14 +30,18 @@ int main ()
  int ix;
  int exit = 0;
  int ondev = 0;
+  int workersize, vectorsize;
  for (ix = 0; ix < N;ix++)
    ary[ix] = -1;
-#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev)
+#pragma acc parallel num_workers(32) vector_length(32) copy(ary) copy(ondev) \
+	    copyout(workersize, vectorsize)
  {
    ondev = acc_on_device (acc_device_not_host);
    worker (ary);
+    workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
+    vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
  }
  for (ix = 0; ix < N; ix++)
@@ -46,8 +50,8 @@ int main ()
      if(ondev)
 	{
 	  int g = 0;
-	  int w = (ix / 32) % 32;
+	  int w = (ix / vectorsize) % workersize;
-	  int v = ix % 32;
+	  int v = ix % vectorsize;
 	  expected = (g << 16) | (w << 8) | v;
 	}

--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/routine-wv-2.c
@@ -2,8 +2,13 @@
 #include <openacc.h>
 #include <gomp-constants.h>
+#ifdef ACC_DEVICE_TYPE_gcn
+#define NUM_WORKERS 16
+#define NUM_VECTORS 1
+#else
 #define NUM_WORKERS 16
 #define NUM_VECTORS 32
+#endif
 #define WIDTH 64
 #define HEIGHT 32