Commit 37115224 by Richard Sandiford Committed by Richard Sandiford

[AArch64] Split gcc.target/aarch64/sve/reduc_strict_3.c

This patch splits gcc.target/aarch64/sve/reduc_strict_3.c into one
test per function, so that it's easier to see what each scan is
matching and also so that we no longer rely on the number of times
that each dump message is printed.

The patch also generalises the tests to work with scalable vectors.
I think the test probably predates support for variable-length
loop-aware SLP.

2019-10-31  Richard Sandiford  <richard.sandiford@arm.com>

gcc/testsuite/
	* gcc.target/aarch64/sve/reduc_strict_3.c: Split all but the
	first function out into...
	* gcc.target/aarch64/sve/reduc_strict_4.c,
	* gcc.target/aarch64/sve/reduc_strict_5.c,
	* gcc.target/aarch64/sve/reduc_strict_6.c,
	* gcc.target/aarch64/sve/reduc_strict_7.c,
	* gcc.target/aarch64/sve/reduc_strict_8.c,
	* gcc.target/aarch64/sve/reduc_strict_9.c: ...these new tests.
	Test for scalable vectors instead of 256-bit vectors.

From-SVN: r277681
parent 6ff0cdeb
2019-10-31 Richard Sandiford <richard.sandiford@arm.com>
* gcc.target/aarch64/sve/reduc_strict_3.c: Split all but the
first function out into...
* gcc.target/aarch64/sve/reduc_strict_4.c,
* gcc.target/aarch64/sve/reduc_strict_5.c,
* gcc.target/aarch64/sve/reduc_strict_6.c,
* gcc.target/aarch64/sve/reduc_strict_7.c,
* gcc.target/aarch64/sve/reduc_strict_8.c,
* gcc.target/aarch64/sve/reduc_strict_9.c: ...these new tests.
Test for scalable vectors instead of 256-bit vectors.
2019-10-31 Jakub Jelinek <jakub@redhat.com>
PR fortran/92284
......
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize -fno-inline -msve-vector-bits=256 -fdump-tree-vect-details" } */
/* Disabling epilogues until we find a better way to deal with scans. */
/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
/* { dg-options "-O2 -ftree-vectorize" } */
double mat[100][4];
double mat2[100][8];
double mat3[100][12];
double mat4[100][3];
double mat[100][2];
double
slp_reduc_plus (int n)
......@@ -16,116 +11,8 @@ slp_reduc_plus (int n)
{
tmp = tmp + mat[i][0];
tmp = tmp + mat[i][1];
tmp = tmp + mat[i][2];
tmp = tmp + mat[i][3];
}
return tmp;
}
double
slp_reduc_plus2 (int n)
{
double tmp = 0.0;
for (int i = 0; i < n; i++)
{
tmp = tmp + mat2[i][0];
tmp = tmp + mat2[i][1];
tmp = tmp + mat2[i][2];
tmp = tmp + mat2[i][3];
tmp = tmp + mat2[i][4];
tmp = tmp + mat2[i][5];
tmp = tmp + mat2[i][6];
tmp = tmp + mat2[i][7];
}
return tmp;
}
double
slp_reduc_plus3 (int n)
{
double tmp = 0.0;
for (int i = 0; i < n; i++)
{
tmp = tmp + mat3[i][0];
tmp = tmp + mat3[i][1];
tmp = tmp + mat3[i][2];
tmp = tmp + mat3[i][3];
tmp = tmp + mat3[i][4];
tmp = tmp + mat3[i][5];
tmp = tmp + mat3[i][6];
tmp = tmp + mat3[i][7];
tmp = tmp + mat3[i][8];
tmp = tmp + mat3[i][9];
tmp = tmp + mat3[i][10];
tmp = tmp + mat3[i][11];
}
return tmp;
}
void
slp_non_chained_reduc (int n, double * restrict out)
{
for (int i = 0; i < 3; i++)
out[i] = 0;
for (int i = 0; i < n; i++)
{
out[0] = out[0] + mat4[i][0];
out[1] = out[1] + mat4[i][1];
out[2] = out[2] + mat4[i][2];
}
}
/* Strict FP reductions shouldn't be used for the outer loops, only the
inner loops. */
float
double_reduc1 (float (*restrict i)[16])
{
float l = 0;
#pragma GCC unroll 0
for (int a = 0; a < 8; a++)
for (int b = 0; b < 8; b++)
l += i[b][a];
return l;
}
float
double_reduc2 (float *restrict i)
{
float l = 0;
for (int a = 0; a < 8; a++)
for (int b = 0; b < 16; b++)
{
l += i[b * 4];
l += i[b * 4 + 1];
l += i[b * 4 + 2];
l += i[b * 4 + 3];
}
return l;
}
float
double_reduc3 (float *restrict i, float *restrict j)
{
float k = 0, l = 0;
for (int a = 0; a < 8; a++)
for (int b = 0; b < 8; b++)
{
k += i[b];
l += j[b];
}
return l * k;
}
/* { dg-final { scan-assembler-times {\tfadda\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s} 4 } } */
/* { dg-final { scan-assembler-times {\tfadda\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d} 9 } } */
/* 1 reduction each for double_reduc{1,2} and 2 for double_reduc3. Each one
is reported three times, once for SVE, once for 128-bit AdvSIMD and once
for 64-bit AdvSIMD. */
/* { dg-final { scan-tree-dump-times "Detected double reduction" 12 "vect" } } */
/* double_reduc2 has 2 reductions and slp_non_chained_reduc has 3. */
/* { dg-final { scan-tree-dump-times "Detected reduction" 10 "vect" } } */
/* { dg-final { scan-assembler-times {\tfadda\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d\n} 1 } } */
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize" } */
double mat[100][8];
double
slp_reduc_plus (int n)
{
double tmp = 0.0;
for (int i = 0; i < n; i++)
{
tmp = tmp + mat[i][0];
tmp = tmp + mat[i][1];
tmp = tmp + mat[i][2];
tmp = tmp + mat[i][3];
tmp = tmp + mat[i][4];
tmp = tmp + mat[i][5];
tmp = tmp + mat[i][6];
tmp = tmp + mat[i][7];
}
return tmp;
}
/* { dg-final { scan-assembler-times {\tfadda\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d} 4 } } */
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize" } */
double mat[100][12];
double
slp_reduc_plus (int n)
{
double tmp = 0.0;
for (int i = 0; i < n; i++)
{
tmp = tmp + mat[i][0];
tmp = tmp + mat[i][1];
tmp = tmp + mat[i][2];
tmp = tmp + mat[i][3];
tmp = tmp + mat[i][4];
tmp = tmp + mat[i][5];
tmp = tmp + mat[i][6];
tmp = tmp + mat[i][7];
tmp = tmp + mat[i][8];
tmp = tmp + mat[i][9];
tmp = tmp + mat[i][10];
tmp = tmp + mat[i][11];
}
return tmp;
}
/* { dg-final { scan-assembler-times {\tfadda\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d} 6 } } */
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize" } */
double mat[100][4];
double mat2[100][8];
double mat3[100][12];
double mat4[100][3];
void
slp_non_chained_reduc (int n, double * restrict out)
{
for (int i = 0; i < 3; i++)
out[i] = 0;
for (int i = 0; i < n; i++)
{
out[0] = out[0] + mat4[i][0];
out[1] = out[1] + mat4[i][1];
out[2] = out[2] + mat4[i][2];
}
}
/* { dg-final { scan-assembler-times {\tld3d\t} 1 } } */
/* { dg-final { scan-assembler-times {\tfadda\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d} 3 } } */
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details" } */
/* Strict FP reductions shouldn't be used for the outer loop, only the
inner loop. */
float
double_reduc (float (*i)[16])
{
float l = 0;
#pragma GCC unroll 0
for (int a = 0; a < 8; a++)
for (int b = 0; b < 100; b++)
l += i[b][a];
return l;
}
/* { dg-final { scan-assembler-times {\tfadda\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s\n} 1 } } */
/* { dg-final { scan-tree-dump "Detected double reduction" "vect" } } */
/* { dg-final { scan-tree-dump-not "OUTER LOOP VECTORIZED" "vect" } } */
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details" } */
float
double_reduc (float *i)
{
float l = 0;
for (int a = 0; a < 8; a++)
for (int b = 0; b < 16; b++)
{
l += i[b * 4];
l += i[b * 4 + 1];
l += i[b * 4 + 2];
l += i[b * 4 + 3];
}
return l;
}
/* { dg-final { scan-assembler-times {\tfadda\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s\n} 1 } } */
/* { dg-final { scan-tree-dump "Detected double reduction" "vect" } } */
/* { dg-final { scan-tree-dump-not "OUTER LOOP VECTORIZED" "vect" } } */
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details" } */
float
double_reduc (float *i, float *j)
{
float k = 0, l = 0;
for (int a = 0; a < 8; a++)
for (int b = 0; b < 100; b++)
{
k += i[b];
l += j[b];
}
return l * k;
}
/* { dg-final { scan-assembler-times {\tld1w\t} 2 } } */
/* { dg-final { scan-assembler-times {\tfadda\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s\n} 2 } } */
/* { dg-final { scan-tree-dump "Detected double reduction" "vect" } } */
/* { dg-final { scan-tree-dump-not "OUTER LOOP VECTORIZED" "vect" } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment