Commit 0050faf8 by James Greenhalgh Committed by James Greenhalgh

[AArch64] Support vrecp<esx> neon intrinsics in RTL.

gcc/
	* config/aarch64/aarch64-builtins.c
	(aarch64_simd_builtin_type_mode): Handle SF types.
	(sf_UP): Define.
	(BUILTIN_GPF): Define.
	(aarch64_init_simd_builtins): Handle SF types.
	* config/aarch64/aarch64-simd-builtins.def (frecpe): Add support.
	(frecps): Likewise.
	(frecpx): Likewise.
	* config/aarch64/aarch64-simd.md
	(simd_types): Update simd_frcp<esx> to simd_frecp<esx>.
	(aarch64_frecpe<mode>): New.
	(aarch64_frecps<mode>): Likewise.
	* config/aarch64/aarch64.md (unspec): Add UNSPEC_FRECP<ESX>.
	(v8type): Add frecp<esx>.
	(aarch64_frecp<FRECP:frecp_suffix><mode>): New.
	(aarch64_frecps<mode>): Likewise.
	* config/aarch64/iterators.md (FRECP): New.
	(frecp_suffix): Likewise.
	* config/aarch64/arm_neon.h
	(vrecp<esx><qsd>_<fd><32, 64>): Convert to using builtins.

gcc/testsuite/
	* gcc.target/aarch64/vrecps.c: New.
	* gcc.target/aarch64/vrecpx.c: Likewise.

From-SVN: r198136
parent ee40cdc0
2013-04-22 James Greenhalgh <james.greenhalgh@arm.com>
* config/aarch64/aarch64-builtins.c
(aarch64_simd_builtin_type_mode): Handle SF types.
(sf_UP): Define.
(BUILTIN_GPF): Define.
(aarch64_init_simd_builtins): Handle SF types.
* config/aarch64/aarch64-simd-builtins.def (frecpe): Add support.
(frecps): Likewise.
(frecpx): Likewise.
* config/aarch64/aarch64-simd.md
(simd_types): Update simd_frcp<esx> to simd_frecp<esx>.
(aarch64_frecpe<mode>): New.
(aarch64_frecps<mode>): Likewise.
* config/aarch64/aarch64.md (unspec): Add UNSPEC_FRECP<ESX>.
(v8type): Add frecp<esx>.
(aarch64_frecp<FRECP:frecp_suffix><mode>): New.
(aarch64_frecps<mode>): Likewise.
* config/aarch64/iterators.md (FRECP): New.
(frecp_suffix): Likewise.
* config/aarch64/arm_neon.h
(vrecp<esx><qsd>_<fd><32, 64>): Convert to using builtins.
2013-04-22 Christian Bruel <christian.bruel@st.com> 2013-04-22 Christian Bruel <christian.bruel@st.com>
PR target/56995 PR target/56995
......
...@@ -50,6 +50,7 @@ enum aarch64_simd_builtin_type_mode ...@@ -50,6 +50,7 @@ enum aarch64_simd_builtin_type_mode
T_OI, T_OI,
T_XI, T_XI,
T_SI, T_SI,
T_SF,
T_HI, T_HI,
T_QI, T_QI,
T_MAX T_MAX
...@@ -72,6 +73,7 @@ enum aarch64_simd_builtin_type_mode ...@@ -72,6 +73,7 @@ enum aarch64_simd_builtin_type_mode
#define oi_UP T_OI #define oi_UP T_OI
#define xi_UP T_XI #define xi_UP T_XI
#define si_UP T_SI #define si_UP T_SI
#define sf_UP T_SF
#define hi_UP T_HI #define hi_UP T_HI
#define qi_UP T_QI #define qi_UP T_QI
...@@ -172,6 +174,8 @@ typedef struct ...@@ -172,6 +174,8 @@ typedef struct
#define BUILTIN_DX(T, N) \ #define BUILTIN_DX(T, N) \
VAR2 (T, N, di, df) VAR2 (T, N, di, df)
#define BUILTIN_GPF(T, N) \
VAR2 (T, N, sf, df)
#define BUILTIN_SDQ_I(T, N) \ #define BUILTIN_SDQ_I(T, N) \
VAR4 (T, N, qi, hi, si, di) VAR4 (T, N, qi, hi, si, di)
#define BUILTIN_SD_HSI(T, N) \ #define BUILTIN_SD_HSI(T, N) \
...@@ -609,7 +613,7 @@ aarch64_init_simd_builtins (void) ...@@ -609,7 +613,7 @@ aarch64_init_simd_builtins (void)
{ {
"v8qi", "v4hi", "v2si", "v2sf", "di", "df", "v8qi", "v4hi", "v2si", "v2sf", "di", "df",
"v16qi", "v8hi", "v4si", "v4sf", "v2di", "v2df", "v16qi", "v8hi", "v4si", "v4sf", "v2di", "v2df",
"ti", "ei", "oi", "xi", "si", "hi", "qi" "ti", "ei", "oi", "xi", "si", "sf", "hi", "qi"
}; };
char namebuf[60]; char namebuf[60];
tree ftype = NULL; tree ftype = NULL;
......
...@@ -256,3 +256,12 @@ ...@@ -256,3 +256,12 @@
BUILTIN_VALL (BINOP, uzp2) BUILTIN_VALL (BINOP, uzp2)
BUILTIN_VALL (BINOP, trn1) BUILTIN_VALL (BINOP, trn1)
BUILTIN_VALL (BINOP, trn2) BUILTIN_VALL (BINOP, trn2)
/* Implemented by
aarch64_frecp<FRECP:frecp_suffix><mode>. */
BUILTIN_GPF (UNOP, frecpe)
BUILTIN_GPF (BINOP, frecps)
BUILTIN_GPF (UNOP, frecpx)
BUILTIN_VDQF (UNOP, frecpe)
BUILTIN_VDQF (BINOP, frecps)
...@@ -59,9 +59,9 @@ ...@@ -59,9 +59,9 @@
; simd_fmul floating point multiply. ; simd_fmul floating point multiply.
; simd_fmul_elt floating point multiply (by element). ; simd_fmul_elt floating point multiply (by element).
; simd_fnegabs floating point neg/abs. ; simd_fnegabs floating point neg/abs.
; simd_frcpe floating point reciprocal estimate. ; simd_frecpe floating point reciprocal estimate.
; simd_frcps floating point reciprocal step. ; simd_frecps floating point reciprocal step.
; simd_frecx floating point reciprocal exponent. ; simd_frecpx floating point reciprocal exponent.
; simd_frint floating point round to integer. ; simd_frint floating point round to integer.
; simd_fsqrt floating point square root. ; simd_fsqrt floating point square root.
; simd_icvtf integer convert to floating point. ; simd_icvtf integer convert to floating point.
...@@ -163,9 +163,9 @@ ...@@ -163,9 +163,9 @@
simd_fmul,\ simd_fmul,\
simd_fmul_elt,\ simd_fmul_elt,\
simd_fnegabs,\ simd_fnegabs,\
simd_frcpe,\ simd_frecpe,\
simd_frcps,\ simd_frecps,\
simd_frecx,\ simd_frecpx,\
simd_frint,\ simd_frint,\
simd_fsqrt,\ simd_fsqrt,\
simd_icvtf,\ simd_icvtf,\
...@@ -305,8 +305,8 @@ ...@@ -305,8 +305,8 @@
(eq_attr "simd_type" "simd_store3,simd_store4") (const_string "neon_vst1_3_4_regs") (eq_attr "simd_type" "simd_store3,simd_store4") (const_string "neon_vst1_3_4_regs")
(eq_attr "simd_type" "simd_store1s,simd_store2s") (const_string "neon_vst1_vst2_lane") (eq_attr "simd_type" "simd_store1s,simd_store2s") (const_string "neon_vst1_vst2_lane")
(eq_attr "simd_type" "simd_store3s,simd_store4s") (const_string "neon_vst3_vst4_lane") (eq_attr "simd_type" "simd_store3s,simd_store4s") (const_string "neon_vst3_vst4_lane")
(and (eq_attr "simd_type" "simd_frcpe,simd_frcps") (eq_attr "simd_mode" "V2SF")) (const_string "neon_fp_vrecps_vrsqrts_ddd") (and (eq_attr "simd_type" "simd_frecpe,simd_frecps") (eq_attr "simd_mode" "V2SF")) (const_string "neon_fp_vrecps_vrsqrts_ddd")
(and (eq_attr "simd_type" "simd_frcpe,simd_frcps") (eq_attr "simd_mode" "V4SF,V2DF")) (const_string "neon_fp_vrecps_vrsqrts_qqq") (and (eq_attr "simd_type" "simd_frecpe,simd_frecps") (eq_attr "simd_mode" "V4SF,V2DF")) (const_string "neon_fp_vrecps_vrsqrts_qqq")
(eq_attr "simd_type" "none") (const_string "none") (eq_attr "simd_type" "none") (const_string "none")
] ]
(const_string "unknown"))) (const_string "unknown")))
...@@ -3750,3 +3750,25 @@ ...@@ -3750,3 +3750,25 @@
"ld1r\\t{%0.<Vtype>}, %1" "ld1r\\t{%0.<Vtype>}, %1"
[(set_attr "simd_type" "simd_load1r") [(set_attr "simd_type" "simd_load1r")
(set_attr "simd_mode" "<MODE>")]) (set_attr "simd_mode" "<MODE>")])
(define_insn "aarch64_frecpe<mode>"
[(set (match_operand:VDQF 0 "register_operand" "=w")
(unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
UNSPEC_FRECPE))]
"TARGET_SIMD"
"frecpe\\t%0.<Vtype>, %1.<Vtype>"
[(set_attr "simd_type" "simd_frecpe")
(set_attr "simd_mode" "<MODE>")]
)
(define_insn "aarch64_frecps<mode>"
[(set (match_operand:VDQF 0 "register_operand" "=w")
(unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")
(match_operand:VDQF 2 "register_operand" "w")]
UNSPEC_FRECPS))]
"TARGET_SIMD"
"frecps\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
[(set_attr "simd_type" "simd_frecps")
(set_attr "simd_mode" "<MODE>")]
)
...@@ -68,6 +68,9 @@ ...@@ -68,6 +68,9 @@
(define_c_enum "unspec" [ (define_c_enum "unspec" [
UNSPEC_CASESI UNSPEC_CASESI
UNSPEC_CLS UNSPEC_CLS
UNSPEC_FRECPE
UNSPEC_FRECPS
UNSPEC_FRECPX
UNSPEC_FRINTA UNSPEC_FRINTA
UNSPEC_FRINTI UNSPEC_FRINTI
UNSPEC_FRINTM UNSPEC_FRINTM
...@@ -230,6 +233,9 @@ ...@@ -230,6 +233,9 @@
fmovf2i,\ fmovf2i,\
fmovi2f,\ fmovi2f,\
fmul,\ fmul,\
frecpe,\
frecps,\
frecpx,\
frint,\ frint,\
fsqrt,\ fsqrt,\
load_acq,\ load_acq,\
...@@ -3362,6 +3368,27 @@ ...@@ -3362,6 +3368,27 @@
(set_attr "mode" "<MODE>")] (set_attr "mode" "<MODE>")]
) )
(define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
[(set (match_operand:GPF 0 "register_operand" "=w")
(unspec:GPF [(match_operand:GPF 1 "register_operand" "w")]
FRECP))]
"TARGET_FLOAT"
"frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
[(set_attr "v8type" "frecp<FRECP:frecp_suffix>")
(set_attr "mode" "<MODE>")]
)
(define_insn "aarch64_frecps<mode>"
[(set (match_operand:GPF 0 "register_operand" "=w")
(unspec:GPF [(match_operand:GPF 1 "register_operand" "w")
(match_operand:GPF 2 "register_operand" "w")]
UNSPEC_FRECPS))]
"TARGET_FLOAT"
"frecps\\t%<s>0, %<s>1, %<s>2"
[(set_attr "v8type" "frecps")
(set_attr "mode" "<MODE>")]
)
;; ------------------------------------------------------------------- ;; -------------------------------------------------------------------
;; Reload support ;; Reload support
;; ------------------------------------------------------------------- ;; -------------------------------------------------------------------
......
...@@ -14556,17 +14556,6 @@ vrbitq_u8 (uint8x16_t a) ...@@ -14556,17 +14556,6 @@ vrbitq_u8 (uint8x16_t a)
return result; return result;
} }
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vrecpe_f32 (float32x2_t a)
{
float32x2_t result;
__asm__ ("frecpe %0.2s,%1.2s"
: "=w"(result)
: "w"(a)
: /* No clobbers */);
return result;
}
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vrecpe_u32 (uint32x2_t a) vrecpe_u32 (uint32x2_t a)
{ {
...@@ -14578,39 +14567,6 @@ vrecpe_u32 (uint32x2_t a) ...@@ -14578,39 +14567,6 @@ vrecpe_u32 (uint32x2_t a)
return result; return result;
} }
__extension__ static __inline float64_t __attribute__ ((__always_inline__))
vrecped_f64 (float64_t a)
{
float64_t result;
__asm__ ("frecpe %d0,%d1"
: "=w"(result)
: "w"(a)
: /* No clobbers */);
return result;
}
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vrecpeq_f32 (float32x4_t a)
{
float32x4_t result;
__asm__ ("frecpe %0.4s,%1.4s"
: "=w"(result)
: "w"(a)
: /* No clobbers */);
return result;
}
__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vrecpeq_f64 (float64x2_t a)
{
float64x2_t result;
__asm__ ("frecpe %0.2d,%1.2d"
: "=w"(result)
: "w"(a)
: /* No clobbers */);
return result;
}
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vrecpeq_u32 (uint32x4_t a) vrecpeq_u32 (uint32x4_t a)
{ {
...@@ -14622,94 +14578,6 @@ vrecpeq_u32 (uint32x4_t a) ...@@ -14622,94 +14578,6 @@ vrecpeq_u32 (uint32x4_t a)
return result; return result;
} }
__extension__ static __inline float32_t __attribute__ ((__always_inline__))
vrecpes_f32 (float32_t a)
{
float32_t result;
__asm__ ("frecpe %s0,%s1"
: "=w"(result)
: "w"(a)
: /* No clobbers */);
return result;
}
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vrecps_f32 (float32x2_t a, float32x2_t b)
{
float32x2_t result;
__asm__ ("frecps %0.2s,%1.2s,%2.2s"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline float64_t __attribute__ ((__always_inline__))
vrecpsd_f64 (float64_t a, float64_t b)
{
float64_t result;
__asm__ ("frecps %d0,%d1,%d2"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vrecpsq_f32 (float32x4_t a, float32x4_t b)
{
float32x4_t result;
__asm__ ("frecps %0.4s,%1.4s,%2.4s"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vrecpsq_f64 (float64x2_t a, float64x2_t b)
{
float64x2_t result;
__asm__ ("frecps %0.2d,%1.2d,%2.2d"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline float32_t __attribute__ ((__always_inline__))
vrecpss_f32 (float32_t a, float32_t b)
{
float32_t result;
__asm__ ("frecps %s0,%s1,%s2"
: "=w"(result)
: "w"(a), "w"(b)
: /* No clobbers */);
return result;
}
__extension__ static __inline float64_t __attribute__ ((__always_inline__))
vrecpxd_f64 (float64_t a)
{
float64_t result;
__asm__ ("frecpe %d0,%d1"
: "=w"(result)
: "w"(a)
: /* No clobbers */);
return result;
}
__extension__ static __inline float32_t __attribute__ ((__always_inline__))
vrecpxs_f32 (float32_t a)
{
float32_t result;
__asm__ ("frecpe %s0,%s1"
: "=w"(result)
: "w"(a)
: /* No clobbers */);
return result;
}
__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
vrev16_p8 (poly8x8_t a) vrev16_p8 (poly8x8_t a)
{ {
...@@ -23115,6 +22983,84 @@ vqsubd_u64 (uint64x1_t __a, uint64x1_t __b) ...@@ -23115,6 +22983,84 @@ vqsubd_u64 (uint64x1_t __a, uint64x1_t __b)
return (uint64x1_t) __builtin_aarch64_uqsubdi (__a, __b); return (uint64x1_t) __builtin_aarch64_uqsubdi (__a, __b);
} }
/* vrecpe */
__extension__ static __inline float32_t __attribute__ ((__always_inline__))
vrecpes_f32 (float32_t __a)
{
return __builtin_aarch64_frecpesf (__a);
}
__extension__ static __inline float64_t __attribute__ ((__always_inline__))
vrecped_f64 (float64_t __a)
{
return __builtin_aarch64_frecpedf (__a);
}
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vrecpe_f32 (float32x2_t __a)
{
return __builtin_aarch64_frecpev2sf (__a);
}
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vrecpeq_f32 (float32x4_t __a)
{
return __builtin_aarch64_frecpev4sf (__a);
}
__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vrecpeq_f64 (float64x2_t __a)
{
return __builtin_aarch64_frecpev2df (__a);
}
/* vrecps */
__extension__ static __inline float32_t __attribute__ ((__always_inline__))
vrecpss_f32 (float32_t __a, float32_t __b)
{
return __builtin_aarch64_frecpssf (__a, __b);
}
__extension__ static __inline float64_t __attribute__ ((__always_inline__))
vrecpsd_f64 (float64_t __a, float64_t __b)
{
return __builtin_aarch64_frecpsdf (__a, __b);
}
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vrecps_f32 (float32x2_t __a, float32x2_t __b)
{
return __builtin_aarch64_frecpsv2sf (__a, __b);
}
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vrecpsq_f32 (float32x4_t __a, float32x4_t __b)
{
return __builtin_aarch64_frecpsv4sf (__a, __b);
}
__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vrecpsq_f64 (float64x2_t __a, float64x2_t __b)
{
return __builtin_aarch64_frecpsv2df (__a, __b);
}
/* vrecpx */
__extension__ static __inline float32_t __attribute__ ((__always_inline__))
vrecpxs_f32 (float32_t __a)
{
return __builtin_aarch64_frecpxsf (__a);
}
__extension__ static __inline float64_t __attribute__ ((__always_inline__))
vrecpxd_f64 (float64_t __a)
{
return __builtin_aarch64_frecpxdf (__a);
}
/* vrshl */ /* vrshl */
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
......
...@@ -698,6 +698,8 @@ ...@@ -698,6 +698,8 @@
(define_int_iterator FCVT [UNSPEC_FRINTZ UNSPEC_FRINTP UNSPEC_FRINTM (define_int_iterator FCVT [UNSPEC_FRINTZ UNSPEC_FRINTP UNSPEC_FRINTM
UNSPEC_FRINTA]) UNSPEC_FRINTA])
(define_int_iterator FRECP [UNSPEC_FRECPE UNSPEC_FRECPX])
;; ------------------------------------------------------------------- ;; -------------------------------------------------------------------
;; Int Iterators Attributes. ;; Int Iterators Attributes.
;; ------------------------------------------------------------------- ;; -------------------------------------------------------------------
...@@ -803,3 +805,5 @@ ...@@ -803,3 +805,5 @@
(define_int_attr perm_hilo [(UNSPEC_ZIP1 "1") (UNSPEC_ZIP2 "2") (define_int_attr perm_hilo [(UNSPEC_ZIP1 "1") (UNSPEC_ZIP2 "2")
(UNSPEC_TRN1 "1") (UNSPEC_TRN2 "2") (UNSPEC_TRN1 "1") (UNSPEC_TRN2 "2")
(UNSPEC_UZP1 "1") (UNSPEC_UZP2 "2")]) (UNSPEC_UZP1 "1") (UNSPEC_UZP2 "2")])
(define_int_attr frecp_suffix [(UNSPEC_FRECPE "e") (UNSPEC_FRECPX "x")])
2013-04-22 James Greenhalgh <james.greenhalgh@arm.com>
* gcc.target/aarch64/vrecps.c: New.
* gcc.target/aarch64/vrecpx.c: Likewise.
2013-04-22 Christian Bruel <christian.bruel@st.com> 2013-04-22 Christian Bruel <christian.bruel@st.com>
PR target/56995 PR target/56995
......
/* { dg-do run } */
/* { dg-options "-O3 --save-temps" } */
#include <arm_neon.h>
#include <math.h>
#include <stdlib.h>
int
test_frecps_float32_t (void)
{
int i;
float32_t value = 0.2;
float32_t reciprocal = 5.0;
float32_t step = vrecpes_f32 (value);
/* 3 steps should give us within ~0.001 accuracy. */
for (i = 0; i < 3; i++)
step = step * vrecpss_f32 (step, value);
return fabs (step - reciprocal) < 0.001;
}
/* { dg-final { scan-assembler "frecpe\\ts\[0-9\]+, s\[0-9\]+" } } */
/* { dg-final { scan-assembler "frecps\\ts\[0-9\]+, s\[0-9\]+, s\[0-9\]+" } } */
int
test_frecps_float32x2_t (void)
{
int i;
int ret = 1;
const float32_t value_pool[] = {0.2, 0.4};
const float32_t reciprocal_pool[] = {5.0, 2.5};
float32x2_t value = vld1_f32 (value_pool);
float32x2_t reciprocal = vld1_f32 (reciprocal_pool);
float32x2_t step = vrecpe_f32 (value);
/* 3 steps should give us within ~0.001 accuracy. */
for (i = 0; i < 3; i++)
step = step * vrecps_f32 (step, value);
ret &= fabs (vget_lane_f32 (step, 0)
- vget_lane_f32 (reciprocal, 0)) < 0.001;
ret &= fabs (vget_lane_f32 (step, 1)
- vget_lane_f32 (reciprocal, 1)) < 0.001;
return ret;
}
/* { dg-final { scan-assembler "frecpe\\tv\[0-9\]+.2s, v\[0-9\]+.2s" } } */
/* { dg-final { scan-assembler "frecps\\tv\[0-9\]+.2s, v\[0-9\]+.2s, v\[0-9\]+.2s" } } */
int
test_frecps_float32x4_t (void)
{
int i;
int ret = 1;
const float32_t value_pool[] = {0.2, 0.4, 0.5, 0.8};
const float32_t reciprocal_pool[] = {5.0, 2.5, 2.0, 1.25};
float32x4_t value = vld1q_f32 (value_pool);
float32x4_t reciprocal = vld1q_f32 (reciprocal_pool);
float32x4_t step = vrecpeq_f32 (value);
/* 3 steps should give us within ~0.001 accuracy. */
for (i = 0; i < 3; i++)
step = step * vrecpsq_f32 (step, value);
ret &= fabs (vgetq_lane_f32 (step, 0)
- vgetq_lane_f32 (reciprocal, 0)) < 0.001;
ret &= fabs (vgetq_lane_f32 (step, 1)
- vgetq_lane_f32 (reciprocal, 1)) < 0.001;
ret &= fabs (vgetq_lane_f32 (step, 2)
- vgetq_lane_f32 (reciprocal, 2)) < 0.001;
ret &= fabs (vgetq_lane_f32 (step, 3)
- vgetq_lane_f32 (reciprocal, 3)) < 0.001;
return ret;
}
/* { dg-final { scan-assembler "frecpe\\tv\[0-9\]+.4s, v\[0-9\]+.4s" } } */
/* { dg-final { scan-assembler "frecps\\tv\[0-9\]+.4s, v\[0-9\]+.4s, v\[0-9\]+.4s" } } */
int
test_frecps_float64_t (void)
{
int i;
float64_t value = 0.2;
float64_t reciprocal = 5.0;
float64_t step = vrecped_f64 (value);
/* 3 steps should give us within ~0.001 accuracy. */
for (i = 0; i < 3; i++)
step = step * vrecpsd_f64 (step, value);
return fabs (step - reciprocal) < 0.001;
}
/* { dg-final { scan-assembler "frecpe\\td\[0-9\]+, d\[0-9\]+" } } */
/* { dg-final { scan-assembler "frecps\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" } } */
int
test_frecps_float64x2_t (void)
{
int i;
int ret = 1;
const float64_t value_pool[] = {0.2, 0.4};
const float64_t reciprocal_pool[] = {5.0, 2.5};
float64x2_t value = vld1q_f64 (value_pool);
float64x2_t reciprocal = vld1q_f64 (reciprocal_pool);
float64x2_t step = vrecpeq_f64 (value);
/* 3 steps should give us within ~0.001 accuracy. */
for (i = 0; i < 3; i++)
step = step * vrecpsq_f64 (step, value);
ret &= fabs (vgetq_lane_f64 (step, 0)
- vgetq_lane_f64 (reciprocal, 0)) < 0.001;
ret &= fabs (vgetq_lane_f64 (step, 1)
- vgetq_lane_f64 (reciprocal, 1)) < 0.001;
return ret;
}
/* { dg-final { scan-assembler "frecpe\\tv\[0-9\]+.2d, v\[0-9\]+.2d" } } */
/* { dg-final { scan-assembler "frecps\\tv\[0-9\]+.2d, v\[0-9\]+.2d, v\[0-9\]+.2d" } } */
int
main (int argc, char **argv)
{
if (!test_frecps_float32_t ())
abort ();
if (!test_frecps_float32x2_t ())
abort ();
if (!test_frecps_float32x4_t ())
abort ();
if (!test_frecps_float64_t ())
abort ();
if (!test_frecps_float64x2_t ())
abort ();
return 0;
}
/* { dg-final { cleanup-saved-temps } } */
/* { dg-do run } */
/* { dg-options "-O3 --save-temps" } */
#include <arm_neon.h>
#include <math.h>
#include <stdlib.h>
float32_t in_f[] =
{2.0, 4.0, 8.0, 16.0, 1.0, 0.5, 0.25, 0.125};
float32_t rec_f[] =
{1.0, 0.5, 0.25, 0.125, 2.0, 4.0, 8.0, 16.0};
float64_t in_d[] =
{2.0, 4.0, 8.0, 16.0, 1.0, 0.5, 0.25, 0.125};
float32_t rec_d[] =
{1.0, 0.5, 0.25, 0.125, 2.0, 4.0, 8.0, 16.0};
int
test_frecpx_float32_t (void)
{
int i = 0;
int ret = 1;
for (i = 0; i < 8; i++)
ret &= fabs (vrecpxs_f32 (in_f[i]) - rec_f[i]) < 0.001;
return ret;
}
/* { dg-final { scan-assembler "frecpx\\ts\[0-9\]+, s\[0-9\]+" } } */
int
test_frecpx_float64_t (void)
{
int i = 0;
int ret = 1;
for (i = 0; i < 8; i++)
ret &= fabs (vrecpxd_f64 (in_d[i]) - rec_d[i]) < 0.001;
return ret;
}
/* { dg-final { scan-assembler "frecpx\\td\[0-9\]+, d\[0-9\]+" } } */
int
main (int argc, char **argv)
{
if (!test_frecpx_float32_t ())
abort ();
if (!test_frecpx_float64_t ())
abort ();
return 0;
}
/* { dg-final { cleanup-saved-temps } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment