Commit daab4562 by Jiong Wang Committed by Jiong Wang

[AArch64, 3/4] Reimplement multiply by element to get rid of inline assembly

gcc/
	* config/aarch64/aarch64-simd.md (vmul_n_f32): Remove inline assembly.
	Use builtin.
	(vmul_n_s16): Likewise.
	(vmul_n_s32): Likewise.
	(vmul_n_u16): Likewise.
	(vmul_n_u32): Likewise.
	(vmulq_n_f32): Likewise.
	(vmulq_n_f64): Likewise.
	(vmulq_n_s16): Likewise.
	(vmulq_n_s32): Likewise.
	(vmulq_n_u16): Likewise.
	(vmulq_n_u32): Likewise.

gcc/testsuite/
	* gcc.target/aarch64/simd/vmul_elem_1.c: Use intrinsics.

From-SVN: r236333
parent 22330033
2016-05-17 Jiong Wang <jiong.wang@arm.com> 2016-05-17 Jiong Wang <jiong.wang@arm.com>
* config/aarch64/aarch64-simd.md (vmul_n_f32): Remove inline assembly.
Use builtin.
(vmul_n_s16): Likewise.
(vmul_n_s32): Likewise.
(vmul_n_u16): Likewise.
(vmul_n_u32): Likewise.
(vmulq_n_f32): Likewise.
(vmulq_n_f64): Likewise.
(vmulq_n_s16): Likewise.
(vmulq_n_s32): Likewise.
(vmulq_n_u16): Likewise.
(vmulq_n_u32): Likewise.
2016-05-17 Jiong Wang <jiong.wang@arm.com>
* config/aarch64/aarch64-simd.md (*aarch64_mul3_elt_to_128df): Extend to * config/aarch64/aarch64-simd.md (*aarch64_mul3_elt_to_128df): Extend to
all supported modes. Rename to "*aarch64_mul3_elt_from_dup". all supported modes. Rename to "*aarch64_mul3_elt_from_dup".
......
...@@ -7938,61 +7938,6 @@ vmovn_u64 (uint64x2_t a) ...@@ -7938,61 +7938,6 @@ vmovn_u64 (uint64x2_t a)
return result; return result;
} }
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vmul_n_f32 (float32x2_t a, float32_t b)
{
float32x2_t result;
__asm__ ("fmul %0.2s,%1.2s,%2.s[0]"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vmul_n_s16 (int16x4_t a, int16_t b)
{
int16x4_t result;
__asm__ ("mul %0.4h,%1.4h,%2.h[0]"
: "=w"(result)
: "w"(a), "x"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vmul_n_s32 (int32x2_t a, int32_t b)
{
int32x2_t result;
__asm__ ("mul %0.2s,%1.2s,%2.s[0]"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vmul_n_u16 (uint16x4_t a, uint16_t b)
{
uint16x4_t result;
__asm__ ("mul %0.4h,%1.4h,%2.h[0]"
: "=w"(result)
: "w"(a), "x"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vmul_n_u32 (uint32x2_t a, uint32_t b)
{
uint32x2_t result;
__asm__ ("mul %0.2s,%1.2s,%2.s[0]"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
#define vmull_high_lane_s16(a, b, c) \ #define vmull_high_lane_s16(a, b, c) \
__extension__ \ __extension__ \
({ \ ({ \
...@@ -8443,72 +8388,6 @@ vmull_u32 (uint32x2_t a, uint32x2_t b) ...@@ -8443,72 +8388,6 @@ vmull_u32 (uint32x2_t a, uint32x2_t b)
return result; return result;
} }
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vmulq_n_f32 (float32x4_t a, float32_t b)
{
float32x4_t result;
__asm__ ("fmul %0.4s,%1.4s,%2.s[0]"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vmulq_n_f64 (float64x2_t a, float64_t b)
{
float64x2_t result;
__asm__ ("fmul %0.2d,%1.2d,%2.d[0]"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vmulq_n_s16 (int16x8_t a, int16_t b)
{
int16x8_t result;
__asm__ ("mul %0.8h,%1.8h,%2.h[0]"
: "=w"(result)
: "w"(a), "x"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmulq_n_s32 (int32x4_t a, int32_t b)
{
int32x4_t result;
__asm__ ("mul %0.4s,%1.4s,%2.s[0]"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vmulq_n_u16 (uint16x8_t a, uint16_t b)
{
uint16x8_t result;
__asm__ ("mul %0.8h,%1.8h,%2.h[0]"
: "=w"(result)
: "w"(a), "x"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmulq_n_u32 (uint32x4_t a, uint32_t b)
{
uint32x4_t result;
__asm__ ("mul %0.4s,%1.4s,%2.s[0]"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vmvn_p8 (poly8x8_t a) vmvn_p8 (poly8x8_t a)
{ {
...@@ -18924,6 +18803,74 @@ vmulq_laneq_u32 (uint32x4_t __a, uint32x4_t __b, const int __lane) ...@@ -18924,6 +18803,74 @@ vmulq_laneq_u32 (uint32x4_t __a, uint32x4_t __b, const int __lane)
return __a * __aarch64_vget_lane_any (__b, __lane); return __a * __aarch64_vget_lane_any (__b, __lane);
} }
/* vmul_n. */
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vmul_n_f32 (float32x2_t __a, float32_t __b)
{
return __a * __b;
}
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vmulq_n_f32 (float32x4_t __a, float32_t __b)
{
return __a * __b;
}
__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vmulq_n_f64 (float64x2_t __a, float64_t __b)
{
return __a * __b;
}
__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
vmul_n_s16 (int16x4_t __a, int16_t __b)
{
return __a * __b;
}
__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
vmulq_n_s16 (int16x8_t __a, int16_t __b)
{
return __a * __b;
}
__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vmul_n_s32 (int32x2_t __a, int32_t __b)
{
return __a * __b;
}
__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
vmulq_n_s32 (int32x4_t __a, int32_t __b)
{
return __a * __b;
}
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vmul_n_u16 (uint16x4_t __a, uint16_t __b)
{
return __a * __b;
}
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vmulq_n_u16 (uint16x8_t __a, uint16_t __b)
{
return __a * __b;
}
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vmul_n_u32 (uint32x2_t __a, uint32_t __b)
{
return __a * __b;
}
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vmulq_n_u32 (uint32x4_t __a, uint32_t __b)
{
return __a * __b;
}
/* vneg */ /* vneg */
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
......
2016-05-17 Jiong Wang <jiong.wang@arm.com> 2016-05-17 Jiong Wang <jiong.wang@arm.com>
* gcc.target/aarch64/simd/vmul_elem_1.c: Use intrinsics.
2016-05-17 Jiong Wang <jiong.wang@arm.com>
* gcc.target/aarch64/simd/vmul_elem_1.c: New. * gcc.target/aarch64/simd/vmul_elem_1.c: New.
2016-05-17 Jiong Wang <jiong.wang@arm.com> 2016-05-17 Jiong Wang <jiong.wang@arm.com>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment