Commit 2d256816 by Nathan Sidwell Committed by Nathan Sidwell

reduction-dbl.c: New.

	* libgomp.oacc-c-c++-common/reduction-dbl.c: New.
	* libgomp.oacc-c-c++-common/reduction-flt.c: New.
	* libgomp.oacc-c-c++-common/reduction-cplx-dbl.c: Use typedef.
	* libgomp.oacc-c-c++-common/reduction-cplx-flt.c: Use typedef.
	* libgomp.oacc-c-c++-common/reduction-2.c: Uncomment broken tests
	and fix.
	* libgomp.oacc-c-c++-common/reduction-3.c: Likewise.
	* libgomp.oacc-c-c++-common/reduction-4.c: Likewise.

From-SVN: r230621
parent 0398c183
2015-11-19 Nathan Sidwell <nathan@codesourcery.com>
* libgomp.oacc-c-c++-common/reduction-dbl.c: New.
* libgomp.oacc-c-c++-common/reduction-flt.c: New.
* libgomp.oacc-c-c++-common/reduction-cplx-dbl.c: Use typedef.
* libgomp.oacc-c-c++-common/reduction-cplx-flt.c: Use typedef.
* libgomp.oacc-c-c++-common/reduction-2.c: Uncomment broken tests
and fix.
* libgomp.oacc-c-c++-common/reduction-3.c: Likewise.
* libgomp.oacc-c-c++-common/reduction-4.c: Likewise.
2015-11-18 Nathan Sidwell <nathan@codesourcery.com> 2015-11-18 Nathan Sidwell <nathan@codesourcery.com>
* testsuite/libgomp.oacc-c-c++-common/reduction-cplx-flt.c: Add * testsuite/libgomp.oacc-c-c++-common/reduction-cplx-flt.c: Add
......
...@@ -50,39 +50,37 @@ main(void) ...@@ -50,39 +50,37 @@ main(void)
if (fabs(result - vresult) > .0001) if (fabs(result - vresult) > .0001)
abort (); abort ();
// result = 0; result = 0;
// vresult = 0; vresult = 0;
//
// /* 'max' reductions. */ /* 'max' reductions. */
// #pragma acc parallel vector_length (vl) #pragma acc parallel vector_length (vl) copy(result)
// #pragma acc loop reduction (+:result) #pragma acc loop reduction (max:result)
// for (i = 0; i < n; i++) for (i = 0; i < n; i++)
// result = result > array[i] ? result : array[i]; result = result > array[i] ? result : array[i];
//
// /* Verify the reduction. */ /* Verify the reduction. */
// for (i = 0; i < n; i++) for (i = 0; i < n; i++)
// vresult = vresult > array[i] ? vresult : array[i]; vresult = vresult > array[i] ? vresult : array[i];
//
// printf("%d != %d\n", result, vresult); if (result != vresult)
// if (result != vresult) abort ();
// abort ();
// result = 0;
// result = 0; vresult = 0;
// vresult = 0;
// /* 'min' reductions. */
// /* 'min' reductions. */ #pragma acc parallel vector_length (vl) copy(result)
// #pragma acc parallel vector_length (vl) #pragma acc loop reduction (min:result)
// #pragma acc loop reduction (+:result) for (i = 0; i < n; i++)
// for (i = 0; i < n; i++) result = result < array[i] ? result : array[i];
// result = result < array[i] ? result : array[i];
// /* Verify the reduction. */
// /* Verify the reduction. */ for (i = 0; i < n; i++)
// for (i = 0; i < n; i++) vresult = vresult < array[i] ? vresult : array[i];
// vresult = vresult < array[i] ? vresult : array[i];
// if (result != vresult)
// printf("%d != %d\n", result, vresult); abort ();
// if (result != vresult)
// abort ();
result = 5; result = 5;
vresult = 5; vresult = 5;
......
...@@ -22,15 +22,15 @@ main(void) ...@@ -22,15 +22,15 @@ main(void)
result = 0; result = 0;
vresult = 0; vresult = 0;
/* '+' reductions. */ /* 'max' reductions. */
#pragma acc parallel vector_length (vl) copy(result) #pragma acc parallel vector_length (vl) copy(result)
#pragma acc loop reduction (+:result) #pragma acc loop reduction (max:result)
for (i = 0; i < n; i++) for (i = 0; i < n; i++)
result += array[i]; result = result > array[i] ? result : array[i];
/* Verify the reduction. */ /* Verify the reduction. */
for (i = 0; i < n; i++) for (i = 0; i < n; i++)
vresult += array[i]; vresult = vresult > array[i] ? vresult : array[i];
if (result != vresult) if (result != vresult)
abort (); abort ();
...@@ -38,51 +38,18 @@ main(void) ...@@ -38,51 +38,18 @@ main(void)
result = 0; result = 0;
vresult = 0; vresult = 0;
/* '*' reductions. */ /* 'min' reductions. */
#pragma acc parallel vector_length (vl) copy(result) #pragma acc parallel vector_length (vl) copy(result)
#pragma acc loop reduction (*:result) #pragma acc loop reduction (min:result)
for (i = 0; i < n; i++) for (i = 0; i < n; i++)
result *= array[i]; result = result < array[i] ? result : array[i];
/* Verify the reduction. */ /* Verify the reduction. */
for (i = 0; i < n; i++) for (i = 0; i < n; i++)
vresult *= array[i]; vresult = vresult < array[i] ? vresult : array[i];
if (fabs(result - vresult) > .0001) if (result != vresult)
abort (); abort ();
// result = 0;
// vresult = 0;
//
// /* 'max' reductions. */
// #pragma acc parallel vector_length (vl)
// #pragma acc loop reduction (+:result)
// for (i = 0; i < n; i++)
// result = result > array[i] ? result : array[i];
//
// /* Verify the reduction. */
// for (i = 0; i < n; i++)
// vresult = vresult > array[i] ? vresult : array[i];
//
// printf("%d != %d\n", result, vresult);
// if (result != vresult)
// abort ();
//
// result = 0;
// vresult = 0;
//
// /* 'min' reductions. */
// #pragma acc parallel vector_length (vl)
// #pragma acc loop reduction (+:result)
// for (i = 0; i < n; i++)
// result = result < array[i] ? result : array[i];
//
// /* Verify the reduction. */
// for (i = 0; i < n; i++)
// vresult = vresult < array[i] ? vresult : array[i];
//
// printf("%d != %d\n", result, vresult);
// if (result != vresult)
// abort ();
result = 5; result = 5;
vresult = 5; vresult = 5;
......
...@@ -23,76 +23,6 @@ main(void) ...@@ -23,76 +23,6 @@ main(void)
result = 0; result = 0;
vresult = 0; vresult = 0;
/* '+' reductions. */
#pragma acc parallel vector_length (vl) copy(result)
#pragma acc loop reduction (+:result)
for (i = 0; i < n; i++)
result += array[i];
/* Verify the reduction. */
for (i = 0; i < n; i++)
vresult += array[i];
if (result != vresult)
abort ();
result = 0;
vresult = 0;
/* Needs support for complex multiplication. */
// /* '*' reductions. */
// #pragma acc parallel vector_length (vl)
// #pragma acc loop reduction (*:result)
// for (i = 0; i < n; i++)
// result *= array[i];
//
// /* Verify the reduction. */
// for (i = 0; i < n; i++)
// vresult *= array[i];
//
// if (fabs(result - vresult) > .0001)
// abort ();
// result = 0;
// vresult = 0;
// /* 'max' reductions. */
// #pragma acc parallel vector_length (vl)
// #pragma acc loop reduction (+:result)
// for (i = 0; i < n; i++)
// result = result > array[i] ? result : array[i];
//
// /* Verify the reduction. */
// for (i = 0; i < n; i++)
// vresult = vresult > array[i] ? vresult : array[i];
//
// printf("%d != %d\n", result, vresult);
// if (result != vresult)
// abort ();
//
// result = 0;
// vresult = 0;
//
// /* 'min' reductions. */
// #pragma acc parallel vector_length (vl)
// #pragma acc loop reduction (+:result)
// for (i = 0; i < n; i++)
// result = result < array[i] ? result : array[i];
//
// /* Verify the reduction. */
// for (i = 0; i < n; i++)
// vresult = vresult < array[i] ? vresult : array[i];
//
// printf("%d != %d\n", result, vresult);
// if (result != vresult)
// abort ();
result = 5;
vresult = 5;
lresult = false;
lvresult = false;
/* '&&' reductions. */ /* '&&' reductions. */
#pragma acc parallel vector_length (vl) copy(lresult) #pragma acc parallel vector_length (vl) copy(lresult)
#pragma acc loop reduction (&&:lresult) #pragma acc loop reduction (&&:lresult)
......
...@@ -3,10 +3,11 @@ ...@@ -3,10 +3,11 @@
/* Double float has 53 bits of fraction. */ /* Double float has 53 bits of fraction. */
#define FRAC (1.0 / (1LL << 48)) #define FRAC (1.0 / (1LL << 48))
typedef double _Complex Type;
int close_enough (double _Complex a, double _Complex b) int close_enough (Type a, Type b)
{ {
double _Complex diff = a - b; Type diff = a - b;
double mag2_a = __real__(a) * __real__ (a) + __imag__ (a) * __imag__ (a); double mag2_a = __real__(a) * __real__ (a) + __imag__ (a) * __imag__ (a);
double mag2_diff = (__real__(diff) * __real__ (diff) double mag2_diff = (__real__(diff) * __real__ (diff)
+ __imag__ (diff) * __imag__ (diff)); + __imag__ (diff) * __imag__ (diff));
...@@ -17,9 +18,9 @@ int close_enough (double _Complex a, double _Complex b) ...@@ -17,9 +18,9 @@ int close_enough (double _Complex a, double _Complex b)
#define N 100 #define N 100
static int __attribute__ ((noinline)) static int __attribute__ ((noinline))
vector (double _Complex ary[N], double _Complex sum, double _Complex prod) vector (Type ary[N], Type sum, Type prod)
{ {
double _Complex tsum = 0, tprod = 1; Type tsum = 0, tprod = 1;
#pragma acc parallel vector_length(32) copyin(ary[0:N]) copy (tsum, tprod) #pragma acc parallel vector_length(32) copyin(ary[0:N]) copy (tsum, tprod)
{ {
...@@ -41,9 +42,9 @@ vector (double _Complex ary[N], double _Complex sum, double _Complex prod) ...@@ -41,9 +42,9 @@ vector (double _Complex ary[N], double _Complex sum, double _Complex prod)
} }
static int __attribute__ ((noinline)) static int __attribute__ ((noinline))
worker (double _Complex ary[N], double _Complex sum, double _Complex prod) worker (Type ary[N], Type sum, Type prod)
{ {
double _Complex tsum = 0, tprod = 1; Type tsum = 0, tprod = 1;
#pragma acc parallel num_workers(32) copyin(ary[0:N]) copy (tsum, tprod) #pragma acc parallel num_workers(32) copyin(ary[0:N]) copy (tsum, tprod)
{ {
...@@ -65,9 +66,9 @@ worker (double _Complex ary[N], double _Complex sum, double _Complex prod) ...@@ -65,9 +66,9 @@ worker (double _Complex ary[N], double _Complex sum, double _Complex prod)
} }
static int __attribute__ ((noinline)) static int __attribute__ ((noinline))
gang (double _Complex ary[N], double _Complex sum, double _Complex prod) gang (Type ary[N], Type sum, Type prod)
{ {
double _Complex tsum = 0, tprod = 1; Type tsum = 0, tprod = 1;
#pragma acc parallel num_gangs (32) copyin(ary[0:N]) copy (tsum, tprod) #pragma acc parallel num_gangs (32) copyin(ary[0:N]) copy (tsum, tprod)
{ {
...@@ -90,7 +91,7 @@ gang (double _Complex ary[N], double _Complex sum, double _Complex prod) ...@@ -90,7 +91,7 @@ gang (double _Complex ary[N], double _Complex sum, double _Complex prod)
int main (void) int main (void)
{ {
double _Complex ary[N], sum = 0, prod = 1; Type ary[N], sum = 0, prod = 1;
for (int ix = 0; ix < N; ix++) for (int ix = 0; ix < N; ix++)
{ {
......
...@@ -3,10 +3,11 @@ ...@@ -3,10 +3,11 @@
/* Single float has 23 bits of fraction. */ /* Single float has 23 bits of fraction. */
#define FRAC (1.0f / (1 << 20)) #define FRAC (1.0f / (1 << 20))
typedef float _Complex Type;
int close_enough (float _Complex a, float _Complex b) int close_enough (Type a, Type b)
{ {
float _Complex diff = a - b; Type diff = a - b;
float mag2_a = __real__(a) * __real__ (a) + __imag__ (a) * __imag__ (a); float mag2_a = __real__(a) * __real__ (a) + __imag__ (a) * __imag__ (a);
float mag2_diff = (__real__(diff) * __real__ (diff) float mag2_diff = (__real__(diff) * __real__ (diff)
+ __imag__ (diff) * __imag__ (diff)); + __imag__ (diff) * __imag__ (diff));
...@@ -17,9 +18,9 @@ int close_enough (float _Complex a, float _Complex b) ...@@ -17,9 +18,9 @@ int close_enough (float _Complex a, float _Complex b)
#define N 100 #define N 100
static int __attribute__ ((noinline)) static int __attribute__ ((noinline))
vector (float _Complex ary[N], float _Complex sum, float _Complex prod) vector (Type ary[N], Type sum, Type prod)
{ {
float _Complex tsum = 0, tprod = 1; Type tsum = 0, tprod = 1;
#pragma acc parallel vector_length(32) copyin(ary[0:N]) copy (tsum, tprod) #pragma acc parallel vector_length(32) copyin(ary[0:N]) copy (tsum, tprod)
{ {
...@@ -41,9 +42,9 @@ vector (float _Complex ary[N], float _Complex sum, float _Complex prod) ...@@ -41,9 +42,9 @@ vector (float _Complex ary[N], float _Complex sum, float _Complex prod)
} }
static int __attribute__ ((noinline)) static int __attribute__ ((noinline))
worker (float _Complex ary[N], float _Complex sum, float _Complex prod) worker (Type ary[N], Type sum, Type prod)
{ {
float _Complex tsum = 0, tprod = 1; Type tsum = 0, tprod = 1;
#pragma acc parallel num_workers(32) copyin(ary[0:N]) copy (tsum, tprod) #pragma acc parallel num_workers(32) copyin(ary[0:N]) copy (tsum, tprod)
{ {
...@@ -65,9 +66,9 @@ worker (float _Complex ary[N], float _Complex sum, float _Complex prod) ...@@ -65,9 +66,9 @@ worker (float _Complex ary[N], float _Complex sum, float _Complex prod)
} }
static int __attribute__ ((noinline)) static int __attribute__ ((noinline))
gang (float _Complex ary[N], float _Complex sum, float _Complex prod) gang (Type ary[N], Type sum, Type prod)
{ {
float _Complex tsum = 0, tprod = 1; Type tsum = 0, tprod = 1;
#pragma acc parallel num_gangs (32) copyin(ary[0:N]) copy (tsum, tprod) #pragma acc parallel num_gangs (32) copyin(ary[0:N]) copy (tsum, tprod)
{ {
...@@ -90,7 +91,7 @@ gang (float _Complex ary[N], float _Complex sum, float _Complex prod) ...@@ -90,7 +91,7 @@ gang (float _Complex ary[N], float _Complex sum, float _Complex prod)
int main (void) int main (void)
{ {
float _Complex ary[N], sum = 0, prod = 1; Type ary[N], sum = 0, prod = 1;
for (int ix = 0; ix < N; ix++) for (int ix = 0; ix < N; ix++)
{ {
......
/* Double float has 53 bits of fraction. */
#define FRAC (1.0 / (1LL << 48))
typedef double Type;
int close_enough (Type a, Type b)
{
Type diff = a - b;
if (diff < 0)
diff = -diff;
return diff / a < FRAC;
}
#define N 100
static int __attribute__ ((noinline))
vector (Type ary[N], Type sum, Type prod)
{
Type tsum = 0, tprod = 1;
#pragma acc parallel vector_length(32) copyin(ary[0:N]) copy (tsum, tprod)
{
#pragma acc loop vector reduction(+:tsum) reduction (*:tprod)
for (int ix = 0; ix < N; ix++)
{
tsum += ary[ix];
tprod *= ary[ix];
}
}
if (!close_enough (sum, tsum))
return 1;
if (!close_enough (prod, tprod))
return 1;
return 0;
}
static int __attribute__ ((noinline))
worker (Type ary[N], Type sum, Type prod)
{
Type tsum = 0, tprod = 1;
#pragma acc parallel num_workers(32) copyin(ary[0:N]) copy (tsum, tprod)
{
#pragma acc loop worker reduction(+:tsum) reduction (*:tprod)
for (int ix = 0; ix < N; ix++)
{
tsum += ary[ix];
tprod *= ary[ix];
}
}
if (!close_enough (sum, tsum))
return 1;
if (!close_enough (prod, tprod))
return 1;
return 0;
}
static int __attribute__ ((noinline))
gang (Type ary[N], Type sum, Type prod)
{
Type tsum = 0, tprod = 1;
#pragma acc parallel num_gangs (32) copyin(ary[0:N]) copy (tsum, tprod)
{
#pragma acc loop gang reduction(+:tsum) reduction (*:tprod)
for (int ix = 0; ix < N; ix++)
{
tsum += ary[ix];
tprod *= ary[ix];
}
}
if (!close_enough (sum, tsum))
return 1;
if (!close_enough (prod, tprod))
return 1;
return 0;
}
int main (void)
{
Type ary[N], sum = 0, prod = 1;
for (int ix = 0; ix < N; ix++)
{
float frac = ix * (1.0f / 1024) + 1.0f;
ary[ix] = frac;
sum += ary[ix];
prod *= ary[ix];
}
if (vector (ary, sum, prod))
return 1;
if (worker (ary, sum, prod))
return 1;
if (gang (ary, sum, prod))
return 1;
return 0;
}
/* Single float has 23 bits of fraction. */
#define FRAC (1.0f / (1 << 20))
typedef float Type;
int close_enough (Type a, Type b)
{
Type diff = a - b;
if (diff < 0)
diff = -diff;
return diff / a < FRAC;
}
#define N 100
static int __attribute__ ((noinline))
vector (Type ary[N], Type sum, Type prod)
{
Type tsum = 0, tprod = 1;
#pragma acc parallel vector_length(32) copyin(ary[0:N]) copy (tsum, tprod)
{
#pragma acc loop vector reduction(+:tsum) reduction (*:tprod)
for (int ix = 0; ix < N; ix++)
{
tsum += ary[ix];
tprod *= ary[ix];
}
}
if (!close_enough (sum, tsum))
return 1;
if (!close_enough (prod, tprod))
return 1;
return 0;
}
static int __attribute__ ((noinline))
worker (Type ary[N], Type sum, Type prod)
{
Type tsum = 0, tprod = 1;
#pragma acc parallel num_workers(32) copyin(ary[0:N]) copy (tsum, tprod)
{
#pragma acc loop worker reduction(+:tsum) reduction (*:tprod)
for (int ix = 0; ix < N; ix++)
{
tsum += ary[ix];
tprod *= ary[ix];
}
}
if (!close_enough (sum, tsum))
return 1;
if (!close_enough (prod, tprod))
return 1;
return 0;
}
static int __attribute__ ((noinline))
gang (Type ary[N], Type sum, Type prod)
{
Type tsum = 0, tprod = 1;
#pragma acc parallel num_gangs (32) copyin(ary[0:N]) copy (tsum, tprod)
{
#pragma acc loop gang reduction(+:tsum) reduction (*:tprod)
for (int ix = 0; ix < N; ix++)
{
tsum += ary[ix];
tprod *= ary[ix];
}
}
if (!close_enough (sum, tsum))
return 1;
if (!close_enough (prod, tprod))
return 1;
return 0;
}
int main (void)
{
Type ary[N], sum = 0, prod = 1;
for (int ix = 0; ix < N; ix++)
{
float frac = ix * (1.0f / 1024) + 1.0f;
ary[ix] = frac;
sum += ary[ix];
prod *= ary[ix];
}
if (vector (ary, sum, prod))
return 1;
if (worker (ary, sum, prod))
return 1;
if (gang (ary, sum, prod))
return 1;
return 0;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment