Commit d2937a2e by Kyrylo Tkachov Committed by Kyrylo Tkachov

[AArch64] Implement some saturating math NEON intrinsics.

	* config/aarch64/aarch64-simd.md (aarch64_sqdmulh_laneq<mode>):
	Use VSDQ_HSI mode iterator.
	(aarch64_sqrdmulh_laneq<mode>): Likewise.
	(aarch64_sq<r>dmulh_laneq<mode>_internal): New define_insn.
	* config/aarch64/aarch64-simd-builtins.def (sqdmulh_laneq):
	Use BUILTIN_VDQHS macro.
	(sqrdmulh_laneq): Likewise.
	* config/aarch64/arm_neon.h (vqdmlalh_laneq_s16): New intrinsic.
	(vqdmlals_laneq_s32): Likewise.
	(vqdmlslh_laneq_s16): Likewise.
	(vqdmlsls_laneq_s32): Likewise.
	(vqdmulhh_laneq_s16): Likewise.
	(vqdmulhs_laneq_s32): Likewise.
	(vqrdmulhh_laneq_s16): Likewise.
	(vqrdmulhs_laneq_s32): Likewise.

	* gcc.target/aarch64/simd/vqdmlalh_laneq_s16_1.c: New test.
	* gcc.target/aarch64/simd/vqdmlals_laneq_s32_1.c: Likewise.
	* gcc.target/aarch64/simd/vqdmlslh_laneq_s16_1.c: Likewise.
	* gcc.target/aarch64/simd/vqdmlsls_laneq_s32_1.c: Likewise.
	* gcc.target/aarch64/simd/vqdmulhh_laneq_s16_1.c: Likewise.
	* gcc.target/aarch64/simd/vqdmulhs_laneq_s32_1.c: Likewise.
	* gcc.target/aarch64/simd/vqrdmulhh_laneq_s16_1.c: Likewise.
	* gcc.target/aarch64/simd/vqrdmulhs_laneq_s32_1.c: Likewise.

From-SVN: r213635
parent ada5287e
2014-08-05 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* config/aarch64/aarch64-simd.md (aarch64_sqdmulh_laneq<mode>):
Use VSDQ_HSI mode iterator.
(aarch64_sqrdmulh_laneq<mode>): Likewise.
(aarch64_sq<r>dmulh_laneq<mode>_internal): New define_insn.
* config/aarch64/aarch64-simd-builtins.def (sqdmulh_laneq):
Use BUILTIN_VDQHS macro.
(sqrdmulh_laneq): Likewise.
* config/aarch64/arm_neon.h (vqdmlalh_laneq_s16): New intrinsic.
(vqdmlals_laneq_s32): Likewise.
(vqdmlslh_laneq_s16): Likewise.
(vqdmlsls_laneq_s32): Likewise.
(vqdmulhh_laneq_s16): Likewise.
(vqdmulhs_laneq_s32): Likewise.
(vqrdmulhh_laneq_s16): Likewise.
(vqrdmulhs_laneq_s32): Likewise.
2014-08-05 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* config/aarch64/arm_neon.h (vmul_f64): New intrinsic.
(vmuld_laneq_f64): Likewise.
(vmuls_laneq_f32): Likewise.
......
......@@ -189,9 +189,9 @@
BUILTIN_VSDQ_HSI (BINOP, sqrdmulh, 0)
/* Implemented by aarch64_sq<r>dmulh_lane<q><mode>. */
BUILTIN_VDQHS (TERNOP, sqdmulh_lane, 0)
BUILTIN_VDQHS (TERNOP, sqdmulh_laneq, 0)
BUILTIN_VSDQ_HSI (TERNOP, sqdmulh_laneq, 0)
BUILTIN_VDQHS (TERNOP, sqrdmulh_lane, 0)
BUILTIN_VDQHS (TERNOP, sqrdmulh_laneq, 0)
BUILTIN_VSDQ_HSI (TERNOP, sqrdmulh_laneq, 0)
BUILTIN_SD_HSI (TERNOP, sqdmulh_lane, 0)
BUILTIN_SD_HSI (TERNOP, sqrdmulh_lane, 0)
......
......@@ -2793,8 +2793,8 @@
)
(define_expand "aarch64_sqdmulh_laneq<mode>"
[(match_operand:VDQHS 0 "register_operand" "")
(match_operand:VDQHS 1 "register_operand" "")
[(match_operand:VSDQ_HSI 0 "register_operand" "")
(match_operand:VSDQ_HSI 1 "register_operand" "")
(match_operand:<VCONQ> 2 "register_operand" "")
(match_operand:SI 3 "immediate_operand" "")]
"TARGET_SIMD"
......@@ -2810,8 +2810,8 @@
)
(define_expand "aarch64_sqrdmulh_laneq<mode>"
[(match_operand:VDQHS 0 "register_operand" "")
(match_operand:VDQHS 1 "register_operand" "")
[(match_operand:VSDQ_HSI 0 "register_operand" "")
(match_operand:VSDQ_HSI 1 "register_operand" "")
(match_operand:<VCONQ> 2 "register_operand" "")
(match_operand:SI 3 "immediate_operand" "")]
"TARGET_SIMD"
......@@ -2890,6 +2890,21 @@
[(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
)
(define_insn "aarch64_sq<r>dmulh_laneq<mode>_internal"
[(set (match_operand:SD_HSI 0 "register_operand" "=w")
(unspec:SD_HSI
[(match_operand:SD_HSI 1 "register_operand" "w")
(vec_select:<VEL>
(match_operand:<VCONQ> 2 "register_operand" "<vwx>")
(parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
VQDMULH))]
"TARGET_SIMD"
"*
operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
[(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
)
;; vqdml[sa]l
(define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
......
......@@ -19445,6 +19445,12 @@ vqdmlalh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x4_t __c, const int __d)
return __builtin_aarch64_sqdmlal_lanehi (__a, __b, __c, __d);
}
__extension__ static __inline int32_t __attribute__ ((__always_inline__))
vqdmlalh_laneq_s16 (int32_t __a, int16_t __b, int16x8_t __c, const int __d)
{
return __builtin_aarch64_sqdmlal_laneqhi (__a, __b, __c, __d);
}
__extension__ static __inline int64_t __attribute__ ((__always_inline__))
vqdmlals_s32 (int64_t __a, int32x1_t __b, int32x1_t __c)
{
......@@ -19458,6 +19464,12 @@ vqdmlals_lane_s32 (int64x1_t __a, int32x1_t __b, int32x2_t __c, const int __d)
{__builtin_aarch64_sqdmlal_lanesi (__a[0], __b, __c, __d)};
}
__extension__ static __inline int64_t __attribute__ ((__always_inline__))
vqdmlals_laneq_s32 (int64_t __a, int32_t __b, int32x4_t __c, const int __d)
{
return __builtin_aarch64_sqdmlal_laneqsi (__a, __b, __c, __d);
}
/* vqdmlsl */
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
......@@ -19572,6 +19584,12 @@ vqdmlslh_lane_s16 (int32x1_t __a, int16x1_t __b, int16x4_t __c, const int __d)
return __builtin_aarch64_sqdmlsl_lanehi (__a, __b, __c, __d);
}
__extension__ static __inline int32_t __attribute__ ((__always_inline__))
vqdmlslh_laneq_s16 (int32_t __a, int16_t __b, int16x8_t __c, const int __d)
{
return __builtin_aarch64_sqdmlsl_laneqhi (__a, __b, __c, __d);
}
__extension__ static __inline int64_t __attribute__ ((__always_inline__))
vqdmlsls_s32 (int64_t __a, int32x1_t __b, int32x1_t __c)
{
......@@ -19584,6 +19602,12 @@ vqdmlsls_lane_s32 (int64x1_t __a, int32x1_t __b, int32x2_t __c, const int __d)
return (int64x1_t) {__builtin_aarch64_sqdmlsl_lanesi (__a[0], __b, __c, __d)};
}
__extension__ static __inline int64_t __attribute__ ((__always_inline__))
vqdmlsls_laneq_s32 (int64_t __a, int32_t __b, int32x4_t __c, const int __d)
{
return __builtin_aarch64_sqdmlsl_laneqsi (__a, __b, __c, __d);
}
/* vqdmulh */
__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
......@@ -19622,6 +19646,12 @@ vqdmulhh_lane_s16 (int16x1_t __a, int16x4_t __b, const int __c)
return __builtin_aarch64_sqdmulh_lanehi (__a, __b, __c);
}
__extension__ static __inline int16_t __attribute__ ((__always_inline__))
vqdmulhh_laneq_s16 (int16_t __a, int16x8_t __b, const int __c)
{
return __builtin_aarch64_sqdmulh_laneqhi (__a, __b, __c);
}
__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
vqdmulhs_s32 (int32x1_t __a, int32x1_t __b)
{
......@@ -19634,6 +19664,12 @@ vqdmulhs_lane_s32 (int32x1_t __a, int32x2_t __b, const int __c)
return __builtin_aarch64_sqdmulh_lanesi (__a, __b, __c);
}
__extension__ static __inline int32_t __attribute__ ((__always_inline__))
vqdmulhs_laneq_s32 (int32_t __a, int32x4_t __b, const int __c)
{
return __builtin_aarch64_sqdmulh_laneqsi (__a, __b, __c);
}
/* vqdmull */
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
......@@ -19938,6 +19974,12 @@ vqrdmulhh_lane_s16 (int16x1_t __a, int16x4_t __b, const int __c)
return __builtin_aarch64_sqrdmulh_lanehi (__a, __b, __c);
}
__extension__ static __inline int16_t __attribute__ ((__always_inline__))
vqrdmulhh_laneq_s16 (int16_t __a, int16x8_t __b, const int __c)
{
return __builtin_aarch64_sqrdmulh_laneqhi (__a, __b, __c);
}
__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
vqrdmulhs_s32 (int32x1_t __a, int32x1_t __b)
{
......@@ -19950,6 +19992,12 @@ vqrdmulhs_lane_s32 (int32x1_t __a, int32x2_t __b, const int __c)
return __builtin_aarch64_sqrdmulh_lanesi (__a, __b, __c);
}
__extension__ static __inline int32_t __attribute__ ((__always_inline__))
vqrdmulhs_laneq_s32 (int32_t __a, int32x4_t __b, const int __c)
{
return __builtin_aarch64_sqrdmulh_laneqsi (__a, __b, __c);
}
/* vqrshl */
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
......
2014-08-05 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* gcc.target/aarch64/simd/vqdmlalh_laneq_s16_1.c: New test.
* gcc.target/aarch64/simd/vqdmlals_laneq_s32_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlslh_laneq_s16_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmlsls_laneq_s32_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhh_laneq_s16_1.c: Likewise.
* gcc.target/aarch64/simd/vqdmulhs_laneq_s32_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhh_laneq_s16_1.c: Likewise.
* gcc.target/aarch64/simd/vqrdmulhs_laneq_s32_1.c: Likewise.
2014-08-05 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* gcc.target/aarch64/simd/vmul_f64_1.c: New test.
* gcc.target/aarch64/simd/vmul_n_f64_1.c: Likewise.
* gcc.target/aarch64/simd/vmuld_lane_f64_1.c: Likewise.
......
/* Test the vqdmlalh_laneq_s16 AArch64 SIMD intrinsic. */
/* { dg-do run } */
/* { dg-options "-save-temps -O3 -fno-inline" } */
#include "arm_neon.h"
extern void abort (void);
int
main (void)
{
int32_t arg1;
int16_t arg2;
int16x8_t arg3;
int32_t actual;
int32_t expected;
arg1 = 0x80000000;
arg2 = -24497;
arg3 = vcombine_s16 (vcreate_s16 (0x008a80007fff7fffULL),
vcreate_s16 (0xfffffa797fff8000ULL));
actual = vqdmlalh_laneq_s16 (arg1, arg2, arg3, 7);
expected = -2147434654;
if (expected != actual)
abort ();
return 0;
}
/* { dg-final { scan-assembler-times "sqdmlal\[ \t\]+\[sS\]\[0-9\]+, ?\[hH\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[hH\]\\\[7\\\]\n" 1 } } */
/* { dg-final { cleanup-saved-temps } } */
/* Test the vqdmlals_laneq_s32 AArch64 SIMD intrinsic. */
/* { dg-do run } */
/* { dg-options "-save-temps -O3 -fno-inline" } */
#include "arm_neon.h"
extern void abort (void);
int
main (void)
{
int64_t arg1;
int32_t arg2;
int32x4_t arg3;
int64_t actual;
int64_t expected;
arg1 = -9223182289494545592LL;
arg2 = 32768;
arg3 = vcombine_s32 (vcreate_s32 (0xffff7fff8000ffffULL),
vcreate_s32 (0x80000000ffff0000ULL));
actual = vqdmlals_laneq_s32 (arg1, arg2, arg3, 3);
expected = -9223323026982900920LL;
if (expected != actual)
abort ();
return 0;
}
/* { dg-final { scan-assembler-times "sqdmlal\[ \t\]+\[dD\]\[0-9\]+, ?\[sS\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[sS\]\\\[3\\\]\n" 1 } } */
/* { dg-final { cleanup-saved-temps } } */
/* Test the vqdmlslh_laneq_s16 AArch64 SIMD intrinsic. */
/* { dg-do run } */
/* { dg-options "-save-temps -O3 -fno-inline" } */
#include "arm_neon.h"
extern void abort (void);
int
main (void)
{
int32_t arg1;
int16_t arg2;
int16x8_t arg3;
int32_t actual;
int32_t expected;
arg1 = -2147450881;
arg2 = 32767;
arg3 = vcombine_s16 (vcreate_s16 (0x359d7fff00007fffULL),
vcreate_s16 (0xe678ffff00008000ULL));
actual = vqdmlslh_laneq_s16 (arg1, arg2, arg3, 4);
expected = -32769;
if (expected != actual)
abort ();
return 0;
}
/* { dg-final { scan-assembler-times "sqdmlsl\[ \t\]+\[sS\]\[0-9\]+, ?\[hH\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[hH\]\\\[4\\\]\n" 1 } } */
/* { dg-final { cleanup-saved-temps } } */
/* Test the vqdmlsls_laneq_s32 AArch64 SIMD intrinsic. */
/* { dg-do run } */
/* { dg-options "-save-temps -O3 -fno-inline" } */
#include "arm_neon.h"
extern void abort (void);
int
main (void)
{
int64_t arg1;
int32_t arg2;
int32x4_t arg3;
int64_t actual;
int64_t expected;
arg1 = 140733193453567LL;
arg2 = 25544;
arg3 = vcombine_s32 (vcreate_s32 (0x417b8000ffff8397LL),
vcreate_s32 (0x7fffffff58488000LL));
actual = vqdmlsls_laneq_s32 (arg1, arg2, arg3, 3);
expected = 31022548895631LL;
if (expected != actual)
abort ();
return 0;
}
/* { dg-final { scan-assembler-times "sqdmlsl\[ \t\]+\[dD\]\[0-9\]+, ?\[sS\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[sS\]\\\[3\\\]\n" 1 } } */
/* { dg-final { cleanup-saved-temps } } */
/* Test the vqdmulhh_laneq_s16 AArch64 SIMD intrinsic. */
/* { dg-do run } */
/* { dg-options "-save-temps -O3 -fno-inline" } */
#include "arm_neon.h"
extern void abort (void);
int
main (void)
{
int16_t arg1;
int16x8_t arg2;
int16_t actual;
int16_t expected;
arg1 = 268;
arg2 = vcombine_s16 (vcreate_s16 (0xffffffff00000000ULL),
vcreate_s16 (0x0000800018410000ULL));
actual = vqdmulhh_laneq_s16 (arg1, arg2, 7);
expected = 0;
if (expected != actual)
abort ();
return 0;
}
/* { dg-final { scan-assembler-times "sqdmulh\[ \t\]+\[hH\]\[0-9\]+, ?\[hH\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[hH\]\\\[7\\\]\n" 1 } } */
/* { dg-final { cleanup-saved-temps } } */
/* Test the vqdmulhs_laneq_s32 AArch64 SIMD intrinsic. */
/* { dg-do run } */
/* { dg-options "-save-temps -O3 -fno-inline" } */
#include "arm_neon.h"
extern void abort (void);
int
main (void)
{
int32_t arg1;
int32x4_t arg2;
int32_t actual;
int32_t expected;
arg1 = 0x80000000;
arg2 = vcombine_s32 (vcreate_s32 (0x950dffffc4f40000ULL),
vcreate_s32 (0x7fff8000274a8000ULL));
actual = vqdmulhs_laneq_s32 (arg1, arg2, 3);
expected = -2147450880;
if (expected != actual)
abort ();
return 0;
}
/* { dg-final { scan-assembler-times "sqdmulh\[ \t\]+\[sS\]\[0-9\]+, ?\[sS\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[sS\]\\\[3\\\]\n" 1 } } */
/* { dg-final { cleanup-saved-temps } } */
/* Test the vqrdmulhh_laneq_s16 AArch64 SIMD intrinsic. */
/* { dg-do run } */
/* { dg-options "-save-temps -O3 -fno-inline" } */
#include "arm_neon.h"
extern void abort (void);
int
main (void)
{
int16_t arg1;
int16x8_t arg2;
int16_t actual;
int16_t expected;
arg1 = 0;
arg2 = vcombine_s16 (vcreate_s16 (0x7fffffffa7908000ULL),
vcreate_s16 (0x8000d2607fff0000ULL));
actual = vqrdmulhh_laneq_s16 (arg1, arg2, 7);
expected = 0;
if (expected != actual)
abort ();
return 0;
}
/* { dg-final { scan-assembler-times "sqrdmulh\[ \t\]+\[hH\]\[0-9\]+, ?\[hH\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[hH\]\\\[7\\\]\n" 1 } } */
/* { dg-final { cleanup-saved-temps } } */
/* Test the vqrdmulhs_laneq_s32 AArch64 SIMD intrinsic. */
/* { dg-do run } */
/* { dg-options "-save-temps -O3 -fno-inline" } */
#include "arm_neon.h"
extern void abort (void);
int
main (void)
{
int32_t arg1;
int32x4_t arg2;
int32_t actual;
int32_t expected;
arg1 = 32768;
arg2 = vcombine_s32 (vcreate_s32 (0x8000ffffffffcd5bULL),
vcreate_s32 (0x7fffffffffffffffULL));
actual = vqrdmulhs_laneq_s32 (arg1, arg2, 3);
expected = 32768;
if (expected != actual)
abort ();
return 0;
}
/* { dg-final { scan-assembler-times "sqrdmulh\[ \t\]+\[sS\]\[0-9\]+, ?\[sS\]\[0-9\]+, ?\[vV\]\[0-9\]+\.\[sS\]\\\[3\\\]\n" 1 } } */
/* { dg-final { cleanup-saved-temps } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment