Commit 45e8c984 by Aaron Sawdey

rs6000: Add execution tests for mma builtins [v4]

This patch adds execution tests that use the MMA builtins and
check for the right answer, and new tests that checks whether
__builtin_cpu_supports and __builtin_cpu_is return sane
answers for power10.

2020-06-30  Rajalakshmi Srinivasaraghavan  <rajis@linux.vnet.ibm.com>
	    Aaron Sawdey  <acsawdey@linux.ibm.com>

gcc/testsuite/
	* gcc.target/powerpc/p10-identify.c: New file.
	* gcc.target/powerpc/p10-arch31.c: New file.
	* gcc.target/powerpc/mma-single-test.c: New file.
	* gcc.target/powerpc/mma-double-test.c: New file.

(cherry picked from commit ed1d3639e42dccc9372f11c6231c3ffe0589fd1c)
parent a5c9fb79
/* { dg-do run } */
/* { dg-require-effective-target power10_hw } */
/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
#include <stdio.h>
#include <stdlib.h>
#include <altivec.h>
typedef unsigned char vec_t __attribute__ ((vector_size (16)));
typedef double v4sf_t __attribute__ ((vector_size (16)));
#define SAVE_ACC(ACC, ldc, J) \
__builtin_mma_disassemble_acc (result, ACC); \
rowC = (v4sf_t *) &CO[0*ldc+J]; \
rowC[0] += result[3] ; \
rowC = (v4sf_t *) &CO[1*ldc+J]; \
rowC[0] += result[2] ; \
rowC = (v4sf_t *) &CO[2*ldc+J]; \
rowC[0] += result[1] ; \
rowC = (v4sf_t *) &CO[3*ldc+J]; \
rowC[0] += result[0] ;
void
MMA (int m, int n, int k, double *A, double *B, double *C)
{
__vector_quad acc0, acc1, acc2, acc3, acc4, acc5, acc6, acc7;
v4sf_t result[4];
v4sf_t *rowC;
for (int l = 0; l < n; l += 4)
{
double *CO;
double *AO;
AO = A;
CO = C;
C += m * 4;
for (int j = 0; j < m; j += 16)
{
double *BO = B;
__builtin_mma_xxsetaccz (&acc0);
__builtin_mma_xxsetaccz (&acc1);
__builtin_mma_xxsetaccz (&acc2);
__builtin_mma_xxsetaccz (&acc3);
__builtin_mma_xxsetaccz (&acc4);
__builtin_mma_xxsetaccz (&acc5);
__builtin_mma_xxsetaccz (&acc6);
__builtin_mma_xxsetaccz (&acc7);
unsigned long i;
for (i = 0; i < k; i++)
{
vec_t *rowA = (vec_t *) & AO[i * 16];
__vector_pair rowB;
vec_t *rb = (vec_t *) & BO[i * 4];
__builtin_mma_assemble_pair (&rowB, rb[1], rb[0]);
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
__builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
__builtin_mma_xvf64gerpp (&acc2, rowB, rowA[2]);
__builtin_mma_xvf64gerpp (&acc3, rowB, rowA[3]);
__builtin_mma_xvf64gerpp (&acc4, rowB, rowA[4]);
__builtin_mma_xvf64gerpp (&acc5, rowB, rowA[5]);
__builtin_mma_xvf64gerpp (&acc6, rowB, rowA[6]);
__builtin_mma_xvf64gerpp (&acc7, rowB, rowA[7]);
}
SAVE_ACC (&acc0, m, 0);
SAVE_ACC (&acc2, m, 4);
SAVE_ACC (&acc1, m, 2);
SAVE_ACC (&acc3, m, 6);
SAVE_ACC (&acc4, m, 8);
SAVE_ACC (&acc6, m, 12);
SAVE_ACC (&acc5, m, 10);
SAVE_ACC (&acc7, m, 14);
AO += k * 16;
BO += k * 4;
CO += 16;
}
B += k * 4;
}
}
void
init (double *matrix, int row, int column)
{
for (int j = 0; j < column; j++)
{
for (int i = 0; i < row; i++)
{
matrix[j * row + i] = (i * 16 + 2 + j) / 0.123;
}
}
}
void
init0 (double *matrix, double *matrix1, int row, int column)
{
for (int j = 0; j < column; j++)
for (int i = 0; i < row; i++)
matrix[j * row + i] = matrix1[j * row + i] = 0;
}
void
print (const char *name, const double *matrix, int row, int column)
{
printf ("Matrix %s has %d rows and %d columns:\n", name, row, column);
for (int i = 0; i < row; i++)
{
for (int j = 0; j < column; j++)
{
printf ("%f ", matrix[j * row + i]);
}
printf ("\n");
}
printf ("\n");
}
int
main (int argc, char *argv[])
{
int rowsA, colsB, common;
int i, j, k;
int ret = 0;
for (int t = 16; t <= 128; t += 16)
{
for (int t1 = 4; t1 <= 16; t1 += 4)
{
rowsA = t;
colsB = t1;
common = 1;
/* printf ("Running test for rows = %d,cols = %d\n", t, t1); */
double A[rowsA * common];
double B[common * colsB];
double C[rowsA * colsB];
double D[rowsA * colsB];
init (A, rowsA, common);
init (B, common, colsB);
init0 (C, D, rowsA, colsB);
MMA (rowsA, colsB, common, A, B, C);
for (i = 0; i < colsB; i++)
{
for (j = 0; j < rowsA; j++)
{
D[i * rowsA + j] = 0;
for (k = 0; k < common; k++)
{
D[i * rowsA + j] +=
A[k * rowsA + j] * B[k + common * i];
}
}
}
for (i = 0; i < colsB; i++)
{
for (j = 0; j < rowsA; j++)
{
for (k = 0; k < common; k++)
{
if (D[i * rowsA + j] != C[i * rowsA + j])
{
printf ("Error %d,%d,%d\n",i,j,k);
ret++;
}
}
}
}
if (ret)
{
print ("A", A, rowsA, common);
print ("B", B, common, colsB);
print ("C", C, rowsA, colsB);
print ("D", D, rowsA, colsB);
}
}
}
#ifdef VERBOSE
if (ret)
printf ("MMA double test fail: %d errors\n",ret);
else
printf ("MMA single test success: 0 MMA errors\n");
#endif
return ret;
}
/* { dg-do run } */
/* { dg-require-effective-target power10_hw } */
/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
#include <stdio.h>
#include <stdlib.h>
#include <altivec.h>
typedef unsigned char vec_t __attribute__ ((vector_size (16)));
typedef float v4sf_t __attribute__ ((vector_size (16)));
#define SAVE_ACC(ACC, ldc,J) \
__builtin_mma_disassemble_acc (result, ACC); \
rowC = (v4sf_t *) &CO[0*ldc+J]; \
rowC[0] += result[3] ; \
rowC = (v4sf_t *) &CO[1*ldc+J]; \
rowC[0] += result[2] ; \
rowC = (v4sf_t *) &CO[2*ldc+J]; \
rowC[0] += result[1] ; \
rowC = (v4sf_t *) &CO[3*ldc+J]; \
rowC[0] += result[0] ;
#define SAVE_ACC1(ACC,ldc, J) \
__builtin_mma_disassemble_acc (result, ACC); \
rowC = (v4sf_t *) &CO[4* ldc+J]; \
rowC[0] += result[3] ; \
rowC = (v4sf_t *) &CO[5*ldc+J]; \
rowC[0] += result[2] ; \
rowC = (v4sf_t *) &CO[6*ldc+J]; \
rowC[0] += result[1] ; \
rowC = (v4sf_t *) &CO[7*ldc+J]; \
rowC[0] += result[0] ;
void
MMA (int m, int n, int k, float *A, float *B, float *C)
{
__vector_quad acc0, acc1, acc2, acc3, acc4, acc5, acc6, acc7;
v4sf_t result[4];
v4sf_t *rowC;
for (int l = 0; l < n; l += 8)
{
float *CO;
float *AO;
AO = A;
CO = C;
C += m * 8;
for (int j = 0; j < m; j += 16)
{
float *BO = B;
__builtin_mma_xxsetaccz (&acc0);
__builtin_mma_xxsetaccz (&acc1);
__builtin_mma_xxsetaccz (&acc2);
__builtin_mma_xxsetaccz (&acc3);
__builtin_mma_xxsetaccz (&acc4);
__builtin_mma_xxsetaccz (&acc5);
__builtin_mma_xxsetaccz (&acc6);
__builtin_mma_xxsetaccz (&acc7);
unsigned long i;
for (i = 0; i < k; i++)
{
vec_t *rowA = (vec_t *) & AO[i * 16];
vec_t *rowB = (vec_t *) & BO[i * 8];
__builtin_mma_xvf32gerpp (&acc0, rowB[0], rowA[0]);
__builtin_mma_xvf32gerpp (&acc1, rowB[1], rowA[0]);
__builtin_mma_xvf32gerpp (&acc2, rowB[0], rowA[1]);
__builtin_mma_xvf32gerpp (&acc3, rowB[1], rowA[1]);
__builtin_mma_xvf32gerpp (&acc4, rowB[0], rowA[2]);
__builtin_mma_xvf32gerpp (&acc5, rowB[1], rowA[2]);
__builtin_mma_xvf32gerpp (&acc6, rowB[0], rowA[3]);
__builtin_mma_xvf32gerpp (&acc7, rowB[1], rowA[3]);
}
SAVE_ACC (&acc0, m, 0);
SAVE_ACC (&acc2, m, 4);
SAVE_ACC1 (&acc1, m, 0);
SAVE_ACC1 (&acc3, m, 4);
SAVE_ACC (&acc4, m, 8);
SAVE_ACC (&acc6, m, 12);
SAVE_ACC1 (&acc5, m, 8);
SAVE_ACC1 (&acc7, m, 12);
AO += k * 16;
BO += k * 8;
CO += 16;
}
B += k * 8;
}
}
void
init (float *matrix, int row, int column)
{
for (int j = 0; j < column; j++)
{
for (int i = 0; i < row; i++)
{
matrix[j * row + i] = (i * 16 + 2 + j) / 0.123;
}
}
}
void
init0 (float *matrix, float *matrix1, int row, int column)
{
for (int j = 0; j < column; j++)
for (int i = 0; i < row; i++)
matrix[j * row + i] = matrix1[j * row + i] = 0;
}
void
print (const char *name, const float *matrix, int row, int column)
{
printf ("Matrix %s has %d rows and %d columns:\n", name, row, column);
for (int i = 0; i < row; i++)
{
for (int j = 0; j < column; j++)
{
printf ("%f ", matrix[j * row + i]);
}
printf ("\n");
}
printf ("\n");
}
int
main (int argc, char *argv[])
{
int rowsA, colsB, common;
int i, j, k;
int ret = 0;
for (int t = 16; t <= 128; t += 16)
{
for (int t1 = 8; t1 <= 16; t1 += 8)
{
rowsA = t;
colsB = t1;
common = 1;
/* printf ("Running test for rows = %d,cols = %d\n", t, t1); */
float A[rowsA * common];
float B[common * colsB];
float C[rowsA * colsB];
float D[rowsA * colsB];
init (A, rowsA, common);
init (B, common, colsB);
init0 (C, D, rowsA, colsB);
MMA (rowsA, colsB, common, A, B, C);
for (i = 0; i < colsB; i++)
{
for (j = 0; j < rowsA; j++)
{
D[i * rowsA + j] = 0;
for (k = 0; k < common; k++)
{
D[i * rowsA + j] +=
A[k * rowsA + j] * B[k + common * i];
}
}
}
for (i = 0; i < colsB; i++)
{
for (j = 0; j < rowsA; j++)
{
for (k = 0; k < common; k++)
{
if (D[i * rowsA + j] != C[i * rowsA + j])
{
printf ("Error %d,%d,%d\n",i,j,k);
ret++;
}
}
}
}
if (ret)
{
print ("A", A, rowsA, common);
print ("B", B, common, colsB);
print ("C", C, rowsA, colsB);
print ("D", D, rowsA, colsB);
}
}
}
#ifdef VERBOSE
if (ret)
printf ("MMA single test fail: %d errors\n",ret);
else
printf ("MMA single test success: 0 MMA errors\n");
#endif
return ret;
}
/* { dg-do run } */
/* { dg-require-effective-target power10_hw } */
/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
/* This test will only run when the power10_hw_available test passes.
If that test passes, then we expect to see that ISA 3.1 is
supported. If this is not the case, then the test environment has
problems. */
#include <stdio.h>
#include <stdlib.h>
int
main (int argc, char *argv[])
{
int ret = 0;
#ifdef __BUILTIN_CPU_SUPPORTS__
if ( !__builtin_cpu_supports ("arch_3_1"))
{
printf ("Error: __builtin_cpu_supports says arch_3_1 not supported, but power10_hw test passed.\n");
ret++;
}
#endif
return ret;
}
/* { dg-do run } */
/* { dg-require-effective-target power10_hw } */
/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
/* This test will only run when the power10_hw_available test passes.
If that test passes, then we expect to see that the cpu is Power10.
If this is not the case, then the test environment has problems.
If in the future there are cpus that pass the power10_hw test but
are not power10, they will need to be added to this check. */
#include <stdio.h>
#include <stdlib.h>
int
main (int argc, char *argv[])
{
int ret = 0;
#ifdef __BUILTIN_CPU_SUPPORTS__
if ( !__builtin_cpu_is ("power10"))
{
printf ("Error: __builtin_cpu_is says this is not power10, but power10_hw test passed.\n");
ret++;
}
#endif
return ret;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment