Commit ada5287e by Kyrylo Tkachov Committed by Kyrylo Tkachov

[AArch64] Implement some vmul*_lane*_f* intrinsics in arm_neon.h.

	* config/aarch64/arm_neon.h (vmul_f64): New intrinsic.
	(vmuld_laneq_f64): Likewise.
	(vmuls_laneq_f32): Likewise.
	(vmul_n_f64): Likewise.
	(vmuld_lane_f64): Reimplement in C.
	(vmuls_lane_f32): Likewise.

	* gcc.target/aarch64/simd/vmul_f64_1.c: New test.
	* gcc.target/aarch64/simd/vmul_n_f64_1.c: Likewise.
	* gcc.target/aarch64/simd/vmuld_lane_f64_1.c: Likewise.
	* gcc.target/aarch64/simd/vmuld_laneq_f64_1.c: Likewise.
	* gcc.target/aarch64/simd/vmuls_lane_f32_1.c: Likewise.
	* gcc.target/aarch64/simd/vmuls_laneq_f32_1.c: Likewise.

From-SVN: r213634
parent ad784b91
2014-08-05 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* config/aarch64/arm_neon.h (vmul_f64): New intrinsic.
(vmuld_laneq_f64): Likewise.
(vmuls_laneq_f32): Likewise.
(vmul_n_f64): Likewise.
(vmuld_lane_f64): Reimplement in C.
(vmuls_lane_f32): Likewise.
2014-08-05 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* config/arm/cortex-a15.md (cortex_a15_alu_shift): Add crc type
to reservation.
* config/arm/cortex-a53.md (cortex_a53_alu_shift): Likewise.
......
......@@ -1277,6 +1277,12 @@ vmul_f32 (float32x2_t __a, float32x2_t __b)
return __a * __b;
}
__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vmul_f64 (float64x1_t __a, float64x1_t __b)
{
return __a * __b;
}
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vmul_u8 (uint8x8_t __a, uint8x8_t __b)
{
......@@ -8299,19 +8305,6 @@ vmul_n_u32 (uint32x2_t a, uint32_t b)
return result;
}
#define vmuld_lane_f64(a, b, c) \
__extension__ \
({ \
float64x2_t b_ = (b); \
float64_t a_ = (a); \
float64_t result; \
__asm__ ("fmul %d0,%d1,%2.d[%3]" \
: "=w"(result) \
: "w"(a_), "w"(b_), "i"(c) \
: /* No clobbers */); \
result; \
})
#define vmull_high_lane_s16(a, b, c) \
__extension__ \
({ \
......@@ -8828,19 +8821,6 @@ vmulq_n_u32 (uint32x4_t a, uint32_t b)
return result;
}
#define vmuls_lane_f32(a, b, c) \
__extension__ \
({ \
float32x4_t b_ = (b); \
float32_t a_ = (a); \
float32_t result; \
__asm__ ("fmul %s0,%s1,%2.s[%3]" \
: "=w"(result) \
: "w"(a_), "w"(b_), "i"(c) \
: /* No clobbers */); \
result; \
})
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vmulx_f32 (float32x2_t a, float32x2_t b)
{
......@@ -18985,6 +18965,34 @@ vmul_lane_u32 (uint32x2_t __a, uint32x2_t __b, const int __lane)
return __a * __aarch64_vget_lane_u32 (__b, __lane);
}
/* vmuld_lane */
__extension__ static __inline float64_t __attribute__ ((__always_inline__))
vmuld_lane_f64 (float64_t __a, float64x1_t __b, const int __lane)
{
return __a * vget_lane_f64 (__b, __lane);
}
__extension__ static __inline float64_t __attribute__ ((__always_inline__))
vmuld_laneq_f64 (float64_t __a, float64x2_t __b, const int __lane)
{
return __a * vgetq_lane_f64 (__b, __lane);
}
/* vmuls_lane */
__extension__ static __inline float32_t __attribute__ ((__always_inline__))
vmuls_lane_f32 (float32_t __a, float32x2_t __b, const int __lane)
{
return __a * vget_lane_f32 (__b, __lane);
}
__extension__ static __inline float32_t __attribute__ ((__always_inline__))
vmuls_laneq_f32 (float32_t __a, float32x4_t __b, const int __lane)
{
return __a * vgetq_lane_f32 (__b, __lane);
}
/* vmul_laneq */
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
......@@ -19023,6 +19031,14 @@ vmul_laneq_u32 (uint32x2_t __a, uint32x4_t __b, const int __lane)
return __a * __aarch64_vgetq_lane_u32 (__b, __lane);
}
/* vmul_n */
__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
vmul_n_f64 (float64x1_t __a, float64_t __b)
{
return (float64x1_t) { vget_lane_f64 (__a, 0) * __b };
}
/* vmulq_lane */
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
......
2014-08-05 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* gcc.target/aarch64/simd/vmul_f64_1.c: New test.
* gcc.target/aarch64/simd/vmul_n_f64_1.c: Likewise.
* gcc.target/aarch64/simd/vmuld_lane_f64_1.c: Likewise.
* gcc.target/aarch64/simd/vmuld_laneq_f64_1.c: Likewise.
* gcc.target/aarch64/simd/vmuls_lane_f32_1.c: Likewise.
* gcc.target/aarch64/simd/vmuls_laneq_f32_1.c: Likewise.
2014-08-05 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* gcc.dg/torture/ftrapv-1.c: Correct usage of dg-require-fork.
2014-08-04 Rohit <rohitarulraj@freescale.com>
......
/* Test the vmul_f64 AArch64 SIMD intrinsic. */
/* { dg-do run } */
/* { dg-options "-save-temps -O3" } */
#include "arm_neon.h"
extern void abort (void);
int
main (void)
{
volatile float64_t minus_e, pi;
float64_t expected, actual;
pi = 3.14159265359;
minus_e = -2.71828;
expected = pi * minus_e;
actual = vget_lane_f64 (vmul_f64 ((float64x1_t) { pi },
(float64x1_t) { minus_e }), 0);
if (expected != actual)
abort ();
return 0;
}
/* { dg-final { scan-assembler "fmul\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+\n" } } */
/* { dg-final { cleanup-saved-temps } } */
/* Test the vmul_n_f64 AArch64 SIMD intrinsic. */
/* { dg-do run } */
/* { dg-options "-O3" } */
#include "arm_neon.h"
extern void abort (void);
int
main (void)
{
volatile float64_t minus_e, pi;
float64_t expected, actual;
pi = 3.14159265359;
minus_e = -2.71828;
expected = pi * minus_e;
actual = vget_lane_f64 (vmul_n_f64 ((float64x1_t) { pi },
minus_e), 0);
if (expected != actual)
abort ();
return 0;
}
/* Test the vmuld_lane_f64 AArch64 SIMD intrinsic. */
/* { dg-do run } */
/* { dg-options " -O3" } */
#include "arm_neon.h"
extern void abort (void);
int
main (void)
{
volatile float64_t minus_e, pi;
float64_t expected, actual;
pi = 3.14159265359;
minus_e = -2.71828;
expected = pi * minus_e;
actual = vmuld_lane_f64 (pi, (float64x1_t) { minus_e }, 0);
if (expected != actual)
abort ();
return 0;
}
/* Test the vmuld_laneq_f64 AArch64 SIMD intrinsic. */
/* { dg-do run } */
/* { dg-options " -O3" } */
#include "arm_neon.h"
extern void abort (void);
int
main (void)
{
volatile float64_t minus_e, pi, ln2;
float64_t expected, actual;
float64x2_t arg2;
float64_t arr[2];
pi = 3.14159265359;
arr[0] = minus_e = -2.71828;
arr[1] = ln2 = 0.69314718056;
arg2 = vld1q_f64 (arr);
actual = vmuld_laneq_f64 (pi, arg2, 0);
expected = pi * minus_e;
if (expected != actual)
abort ();
expected = pi * ln2;
actual = vmuld_laneq_f64 (pi, arg2, 1);
if (expected != actual)
abort ();
return 0;
}
/* Test the vmuls_lane_f32 AArch64 SIMD intrinsic. */
/* { dg-do run } */
/* { dg-options " -O3" } */
#include "arm_neon.h"
extern void abort (void);
int
main (void)
{
volatile float32_t minus_e, pi, ln2;
float32_t expected, actual;
float32x2_t arg2;
float32_t arr[2];
pi = 3.14159265359;
arr[0] = minus_e = -2.71828;
arr[1] = ln2 = 0.69314718056;
arg2 = vld1_f32 (arr);
actual = vmuls_lane_f32 (pi, arg2, 0);
expected = pi * minus_e;
if (expected != actual)
abort ();
expected = pi * ln2;
actual = vmuls_lane_f32 (pi, arg2, 1);
if (expected != actual)
abort ();
return 0;
}
/* Test the vmuls_laneq_f32 AArch64 SIMD intrinsic. */
/* { dg-do run } */
/* { dg-options " -O3" } */
#include "arm_neon.h"
extern void abort (void);
int
main (void)
{
volatile float32_t minus_e, pi, ln2, sqrt2, phi;
float32_t expected, actual;
float32x4_t arg2;
float32_t arr[4];
pi = 3.14159265359;
arr[0] = minus_e = -2.71828;
arr[1] = ln2 = 0.69314718056;
arr[2] = sqrt2 = 1.41421356237;
arr[3] = phi = 1.61803398874;
arg2 = vld1q_f32 (arr);
actual = vmuls_laneq_f32 (pi, arg2, 0);
expected = pi * minus_e;
if (expected != actual)
abort ();
expected = pi * ln2;
actual = vmuls_laneq_f32 (pi, arg2, 1);
if (expected != actual)
abort ();
expected = pi * sqrt2;
actual = vmuls_laneq_f32 (pi, arg2, 2);
if (expected != actual)
abort ();
expected = pi * phi;
actual = vmuls_laneq_f32 (pi, arg2, 3);
if (expected != actual)
abort ();
return 0;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment