Commit 6dce23a8 by James Greenhalgh Committed by James Greenhalgh

[AArch64] Rewrite v<max,min><nm><q><v>_<sfu><8, 16, 32, 64> intrinsics using builtins.

gcc/
	* config/aarch64/arm_neon.h
	(v<max,min><nm><q><v>_<sfu><8, 16, 32, 64>): Rewrite using builtins.

gcc/testsuite/
	* gcc.target/aarch64/vect-vmaxv.c: New.
	* gcc.target/aarch64/vect-vfmaxv.c: Likewise.

From-SVN: r198499
parent 1598945b
2013-05-01 James Greenhalgh <james.greenhalgh@arm.com>
* config/aarch64/arm_neon.h
(v<max,min><nm><q><v>_<sfu><8, 16, 32, 64>): Rewrite using builtins.
2013-05-01 James Greenhalgh <james.greenhalgh@arm.com>
* config/aarch64/aarch64-builtins
(aarch64_gimple_fold_builtin): Fold reduc_<su><maxmin>_ builtins.
......
2013-05-01 James Greenhalgh <james.greenhalgh@arm.com>
* gcc.target/aarch64/vect-vmaxv.c: New.
* gcc.target/aarch64/vect-vfmaxv.c: Likewise.
2013-05-01 James Greenhalgh <james.greenhalgh@arm.com>
* gcc.target/aarch64/scalar-vca.c: New.
* gcc.target/aarch64/vect-vca.c: Likewise.
......
/* { dg-do run } */
/* { dg-options "-O3 --save-temps -ffast-math" } */
#include <arm_neon.h>
extern void abort (void);
extern float fabsf (float);
extern double fabs (double);
extern int isnan (double);
extern float fmaxf (float, float);
extern float fminf (float, float);
extern double fmax (double, double);
extern double fmin (double, double);
#define NUM_TESTS 16
#define DELTA 0.000001
#define NAN (0.0 / 0.0)
float input_float32[] = {0.1f, -0.1f, 0.4f, 10.3f,
200.0f, -800.0f, -13.0f, -0.5f,
NAN, -870.0f, 10.4f, 310.11f,
0.0f, -865.0f, -2213.0f, -1.5f};
double input_float64[] = {0.1, -0.1, 0.4, 10.3,
200.0, -800.0, -13.0, -0.5,
NAN, -870.0, 10.4, 310.11,
0.0, -865.0, -2213.0, -1.5};
#define EQUALF(a, b) (fabsf (a - b) < DELTA)
#define EQUALD(a, b) (fabs (a - b) < DELTA)
/* Floating point 'unordered' variants. */
#undef TEST
#define TEST(MAXMIN, CMP_OP, SUFFIX, Q, TYPE, LANES, FLOAT) \
int \
test_v##MAXMIN##v##SUFFIX##_##TYPE##x##LANES##_t (void) \
{ \
int i, j; \
int moves = (NUM_TESTS - LANES) + 1; \
TYPE##_t out_l[NUM_TESTS]; \
TYPE##_t out_v[NUM_TESTS]; \
\
/* Calculate linearly. */ \
for (i = 0; i < moves; i++) \
{ \
out_l[i] = input_##TYPE[i]; \
for (j = 0; j < LANES; j++) \
{ \
if (isnan (out_l[i])) \
continue; \
if (isnan (input_##TYPE[i + j]) \
|| input_##TYPE[i + j] CMP_OP out_l[i]) \
out_l[i] = input_##TYPE[i + j]; \
} \
} \
\
/* Calculate using vector reduction intrinsics. */ \
for (i = 0; i < moves; i++) \
{ \
TYPE##x##LANES##_t t1 = vld1##Q##_##SUFFIX (input_##TYPE + i); \
out_v[i] = v##MAXMIN##v##Q##_##SUFFIX (t1); \
} \
\
/* Compare. */ \
for (i = 0; i < moves; i++) \
{ \
if (!EQUAL##FLOAT (out_v[i], out_l[i]) \
&& !(isnan (out_v[i]) && isnan (out_l[i]))) \
return 0; \
} \
return 1; \
}
#define BUILD_VARIANTS(TYPE, STYPE, W32, W64, F) \
TEST (max, >, STYPE, , TYPE, W32, F) \
TEST (max, >, STYPE, q, TYPE, W64, F) \
TEST (min, <, STYPE, , TYPE, W32, F) \
TEST (min, <, STYPE, q, TYPE, W64, F)
BUILD_VARIANTS (float32, f32, 2, 4, F)
/* { dg-final { scan-assembler "fmaxp\\ts\[0-9\]+, v\[0-9\]+\.2s" } } */
/* { dg-final { scan-assembler "fminp\\ts\[0-9\]+, v\[0-9\]+\.2s" } } */
/* { dg-final { scan-assembler "fmaxv\\ts\[0-9\]+, v\[0-9\]+\.4s" } } */
/* { dg-final { scan-assembler "fminv\\ts\[0-9\]+, v\[0-9\]+\.4s" } } */
TEST (max, >, f64, q, float64, 2, D)
/* { dg-final { scan-assembler "fmaxp\\td\[0-9\]+, v\[0-9\]+\.2d" } } */
TEST (min, <, f64, q, float64, 2, D)
/* { dg-final { scan-assembler "fminp\\td\[0-9\]+, v\[0-9\]+\.2d" } } */
/* Floating point 'nm' variants. */
#undef TEST
#define TEST(MAXMIN, F, SUFFIX, Q, TYPE, LANES, FLOAT) \
int \
test_v##MAXMIN##nmv##SUFFIX##_##TYPE##x##LANES##_t (void) \
{ \
int i, j; \
int moves = (NUM_TESTS - LANES) + 1; \
TYPE##_t out_l[NUM_TESTS]; \
TYPE##_t out_v[NUM_TESTS]; \
\
/* Calculate linearly. */ \
for (i = 0; i < moves; i++) \
{ \
out_l[i] = input_##TYPE[i]; \
for (j = 0; j < LANES; j++) \
out_l[i] = f##MAXMIN##F (input_##TYPE[i + j], out_l[i]); \
} \
\
/* Calculate using vector reduction intrinsics. */ \
for (i = 0; i < moves; i++) \
{ \
TYPE##x##LANES##_t t1 = vld1##Q##_##SUFFIX (input_##TYPE + i); \
out_v[i] = v##MAXMIN##nmv##Q##_##SUFFIX (t1); \
} \
\
/* Compare. */ \
for (i = 0; i < moves; i++) \
{ \
if (!EQUAL##FLOAT (out_v[i], out_l[i])) \
return 0; \
} \
return 1; \
}
TEST (max, f, f32, , float32, 2, D)
/* { dg-final { scan-assembler "fmaxnmp\\ts\[0-9\]+, v\[0-9\]+\.2s" } } */
TEST (min, f, f32, , float32, 2, D)
/* { dg-final { scan-assembler "fminnmp\\ts\[0-9\]+, v\[0-9\]+\.2s" } } */
TEST (max, f, f32, q, float32, 4, D)
/* { dg-final { scan-assembler "fmaxnmv\\ts\[0-9\]+, v\[0-9\]+\.4s" } } */
TEST (min, f, f32, q, float32, 4, D)
/* { dg-final { scan-assembler "fminnmv\\ts\[0-9\]+, v\[0-9\]+\.4s" } } */
TEST (max, , f64, q, float64, 2, D)
/* { dg-final { scan-assembler "fmaxnmp\\td\[0-9\]+, v\[0-9\]+\.2d" } } */
TEST (min, , f64, q, float64, 2, D)
/* { dg-final { scan-assembler "fminnmp\\td\[0-9\]+, v\[0-9\]+\.2d" } } */
#undef TEST
#define TEST(MAXMIN, CMP_OP, SUFFIX, Q, TYPE, LANES, FLOAT) \
{ \
if (!test_v##MAXMIN##v##SUFFIX##_##TYPE##x##LANES##_t ()) \
abort (); \
}
int
main (int argc, char **argv)
{
BUILD_VARIANTS (float32, f32, 2, 4, F)
TEST (max, >, f64, q, float64, 2, D)
TEST (min, <, f64, q, float64, 2, D)
#undef TEST
#define TEST(MAXMIN, CMP_OP, SUFFIX, Q, TYPE, LANES, FLOAT) \
{ \
if (!test_v##MAXMIN##nmv##SUFFIX##_##TYPE##x##LANES##_t ()) \
abort (); \
}
BUILD_VARIANTS (float32, f32, 2, 4, F)
TEST (max, >, f64, q, float64, 2, D)
TEST (min, <, f64, q, float64, 2, D)
return 0;
}
/* { dg-final { cleanup-saved-temps } } */
/* { dg-do run } */
/* { dg-options "-O3 --save-temps -ffast-math" } */
#include <arm_neon.h>
extern void abort (void);
#define NUM_TESTS 16
#define DELTA 0.000001
int8_t input_int8[] = {1, 56, 2, -9, -90, 23, 54, 76,
-4, 34, 110, -110, 6, 4, 75, -34};
int16_t input_int16[] = {1, 56, 2, -9, -90, 23, 54, 76,
-4, 34, 110, -110, 6, 4, 75, -34};
int32_t input_int32[] = {1, 56, 2, -9, -90, 23, 54, 76,
-4, 34, 110, -110, 6, 4, 75, -34};
uint8_t input_uint8[] = {1, 56, 2, 9, 90, 23, 54, 76,
4, 34, 110, 110, 6, 4, 75, 34};
uint16_t input_uint16[] = {1, 56, 2, 9, 90, 23, 54, 76,
4, 34, 110, 110, 6, 4, 75, 34};
uint32_t input_uint32[] = {1, 56, 2, 9, 90, 23, 54, 76,
4, 34, 110, 110, 6, 4, 75, 34};
#define EQUAL(a, b) (a == b)
#define TEST(MAXMIN, CMP_OP, SUFFIX, Q, TYPE, LANES) \
int \
test_v##MAXMIN##v##SUFFIX##_##TYPE##x##LANES##_t (void) \
{ \
int i, j; \
int moves = (NUM_TESTS - LANES) + 1; \
TYPE##_t out_l[NUM_TESTS]; \
TYPE##_t out_v[NUM_TESTS]; \
\
/* Calculate linearly. */ \
for (i = 0; i < moves; i++) \
{ \
out_l[i] = input_##TYPE[i]; \
for (j = 0; j < LANES; j++) \
out_l[i] = input_##TYPE[i + j] CMP_OP out_l[i] ? \
input_##TYPE[i + j] : out_l[i]; \
} \
\
/* Calculate using vector reduction intrinsics. */ \
for (i = 0; i < moves; i++) \
{ \
TYPE##x##LANES##_t t1 = vld1##Q##_##SUFFIX (input_##TYPE + i); \
out_v[i] = v##MAXMIN##v##Q##_##SUFFIX (t1); \
} \
\
/* Compare. */ \
for (i = 0; i < moves; i++) \
{ \
if (!EQUAL (out_v[i], out_l[i])) \
return 0; \
} \
return 1; \
}
#define BUILD_VARIANTS(TYPE, STYPE, W32, W64) \
TEST (max, >, STYPE, , TYPE, W32) \
TEST (max, >, STYPE, q, TYPE, W64) \
TEST (min, <, STYPE, , TYPE, W32) \
TEST (min, <, STYPE, q, TYPE, W64)
BUILD_VARIANTS (int8, s8, 8, 16)
/* { dg-final { scan-assembler "smaxv\\tb\[0-9\]+, v\[0-9\]+\.8b" } } */
/* { dg-final { scan-assembler "sminv\\tb\[0-9\]+, v\[0-9\]+\.8b" } } */
/* { dg-final { scan-assembler "smaxv\\tb\[0-9\]+, v\[0-9\]+\.16b" } } */
/* { dg-final { scan-assembler "sminv\\tb\[0-9\]+, v\[0-9\]+\.16b" } } */
BUILD_VARIANTS (uint8, u8, 8, 16)
/* { dg-final { scan-assembler "umaxv\\tb\[0-9\]+, v\[0-9\]+\.8b" } } */
/* { dg-final { scan-assembler "uminv\\tb\[0-9\]+, v\[0-9\]+\.8b" } } */
/* { dg-final { scan-assembler "umaxv\\tb\[0-9\]+, v\[0-9\]+\.16b" } } */
/* { dg-final { scan-assembler "uminv\\tb\[0-9\]+, v\[0-9\]+\.16b" } } */
BUILD_VARIANTS (int16, s16, 4, 8)
/* { dg-final { scan-assembler "smaxv\\th\[0-9\]+, v\[0-9\]+\.4h" } } */
/* { dg-final { scan-assembler "sminv\\th\[0-9\]+, v\[0-9\]+\.4h" } } */
/* { dg-final { scan-assembler "smaxv\\th\[0-9\]+, v\[0-9\]+\.8h" } } */
/* { dg-final { scan-assembler "sminv\\th\[0-9\]+, v\[0-9\]+\.8h" } } */
BUILD_VARIANTS (uint16, u16, 4, 8)
/* { dg-final { scan-assembler "umaxv\\th\[0-9\]+, v\[0-9\]+\.4h" } } */
/* { dg-final { scan-assembler "uminv\\th\[0-9\]+, v\[0-9\]+\.4h" } } */
/* { dg-final { scan-assembler "umaxv\\th\[0-9\]+, v\[0-9\]+\.8h" } } */
/* { dg-final { scan-assembler "uminv\\th\[0-9\]+, v\[0-9\]+\.8h" } } */
BUILD_VARIANTS (int32, s32, 2, 4)
/* { dg-final { scan-assembler "smaxp\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
/* { dg-final { scan-assembler "sminp\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
/* { dg-final { scan-assembler "smaxv\\ts\[0-9\]+, v\[0-9\]+\.4s" } } */
/* { dg-final { scan-assembler "sminv\\ts\[0-9\]+, v\[0-9\]+\.4s" } } */
BUILD_VARIANTS (uint32, u32, 2, 4)
/* { dg-final { scan-assembler "umaxp\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
/* { dg-final { scan-assembler "uminp\\tv\[0-9\]+\.2s, v\[0-9\]+\.2s, v\[0-9\]+\.2s" } } */
/* { dg-final { scan-assembler "umaxv\\ts\[0-9\]+, v\[0-9\]+\.4s" } } */
/* { dg-final { scan-assembler "uminv\\ts\[0-9\]+, v\[0-9\]+\.4s" } } */
#undef TEST
#define TEST(MAXMIN, CMP_OP, SUFFIX, Q, TYPE, LANES) \
{ \
if (!test_v##MAXMIN##v##SUFFIX##_##TYPE##x##LANES##_t ()) \
abort (); \
}
int
main (int argc, char **argv)
{
BUILD_VARIANTS (int8, s8, 8, 16)
BUILD_VARIANTS (uint8, u8, 8, 16)
BUILD_VARIANTS (int16, s16, 4, 8)
BUILD_VARIANTS (uint16, u16, 4, 8)
BUILD_VARIANTS (int32, s32, 2, 4)
BUILD_VARIANTS (uint32, u32, 2, 4)
return 0;
}
/* { dg-final { cleanup-saved-temps } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment