Commit 58a3bd25 by Felix Yang Committed by Fei Yang

arm_neon.h (vrecpe_u32, [...]): Rewrite using builtin functions.

        * config/aarch64/arm_neon.h (vrecpe_u32, vrecpeq_u32): Rewrite using
        builtin functions.
        (vfma_f32, vfmaq_f32, vfmaq_f64, vfma_n_f32, vfmaq_n_f32, vfmaq_n_f64,
        vfms_f32, vfmsq_f32, vfmsq_f64): Likewise.
        (vhsub_s8, vhsub_u8, vhsub_s16, vhsub_u16, vhsub_s32, vhsub_u32,
        vhsubq_s8, vhsubq_u8, vhsubq_s16, vhsubq_u16, vhsubq_s32, vhsubq_u32,
        vsubhn_s16, vsubhn_u16, vsubhn_s32, vsubhn_u32, vsubhn_s64, vsubhn_u66,
        vrsubhn_s16, vrsubhn_u16, vrsubhn_s32, vrsubhn_u32, vrsubhn_s64,
        vrsubhn_u64, vsubhn_high_s16, vsubhn_high_u16, vsubhn_high_s32,
        vsubhn_high_u32, vsubhn_high_s64, vsubhn_high_u64, vrsubhn_high_s16,
        vrsubhn_high_u16, vrsubhn_high_s32, vrsubhn_high_u32, vrsubhn_high_s64,
        vrsubhn_high_u64): Likewise.
        * config/aarch64/iterators.md (VDQ_SI): New mode iterator.
        * config/aarch64/aarch64.md (define_c_enum "unspec"): Add UNSPEC_URECPE.
        * config/aarch64/aarch64-simd.md (aarch64_urecpe<mode>): New pattern.
        * config/aarch64/aarch64-simd-builtins.def (shsub, uhsub, subhn, rsubhn,
        subhn2, rsubhn2, urecpe): New builtins.

Co-Authored-By: Haijian Zhang <z.zhanghaijian@huawei.com>
Co-Authored-By: Jiji Jiang <jiangjiji@huawei.com>
Co-Authored-By: Pengfei Sui <suipengfei@huawei.com>

From-SVN: r218484
parent 28adf6e7
2014-12-08 Felix Yang <felix.yang@huawei.com>
Haijian Zhang <z.zhanghaijian@huawei.com>
Jiji Jiang <jiangjiji@huawei.com>
Pengfei Sui <suipengfei@huawei.com>
* config/aarch64/arm_neon.h (vrecpe_u32, vrecpeq_u32): Rewrite using
builtin functions.
(vfma_f32, vfmaq_f32, vfmaq_f64, vfma_n_f32, vfmaq_n_f32, vfmaq_n_f64,
vfms_f32, vfmsq_f32, vfmsq_f64): Likewise.
(vhsub_s8, vhsub_u8, vhsub_s16, vhsub_u16, vhsub_s32, vhsub_u32,
vhsubq_s8, vhsubq_u8, vhsubq_s16, vhsubq_u16, vhsubq_s32, vhsubq_u32,
vsubhn_s16, vsubhn_u16, vsubhn_s32, vsubhn_u32, vsubhn_s64, vsubhn_u66,
vrsubhn_s16, vrsubhn_u16, vrsubhn_s32, vrsubhn_u32, vrsubhn_s64,
vrsubhn_u64, vsubhn_high_s16, vsubhn_high_u16, vsubhn_high_s32,
vsubhn_high_u32, vsubhn_high_s64, vsubhn_high_u64, vrsubhn_high_s16,
vrsubhn_high_u16, vrsubhn_high_s32, vrsubhn_high_u32, vrsubhn_high_s64,
vrsubhn_high_u64): Likewise.
* config/aarch64/iterators.md (VDQ_SI): New mode iterator.
* config/aarch64/aarch64.md (define_c_enum "unspec"): Add UNSPEC_URECPE.
* config/aarch64/aarch64-simd.md (aarch64_urecpe<mode>): New pattern.
* config/aarch64/aarch64-simd-builtins.def (shsub, uhsub, subhn, rsubhn,
subhn2, rsubhn2, urecpe): New builtins.
2014-12-08 Ilya Tocar <ilya.tocar@intel.com>
* config/i386/i386.c (ix86_expand_vec_perm_vpermi2): Handle v64qi.
......@@ -5997,7 +6020,6 @@
* config/aarch64/aarch64-simd.md (*aarch64_simd_ld1r<mode>): Use
VALL mode iterator instead of VALLDI.
2014-11-14 Jan Hubicka <hubicka@ucw.cz>
* optc-save-gen.awk: Output cl_target_option_eq,
......@@ -127,15 +127,21 @@
BUILTIN_VD_BHSI (BINOP, usubw, 0)
/* Implemented by aarch64_<sur>h<addsub><mode>. */
BUILTIN_VDQ_BHSI (BINOP, shadd, 0)
BUILTIN_VDQ_BHSI (BINOP, shsub, 0)
BUILTIN_VDQ_BHSI (BINOP, uhadd, 0)
BUILTIN_VDQ_BHSI (BINOP, uhsub, 0)
BUILTIN_VDQ_BHSI (BINOP, srhadd, 0)
BUILTIN_VDQ_BHSI (BINOP, urhadd, 0)
/* Implemented by aarch64_<sur><addsub>hn<mode>. */
BUILTIN_VQN (BINOP, addhn, 0)
BUILTIN_VQN (BINOP, subhn, 0)
BUILTIN_VQN (BINOP, raddhn, 0)
BUILTIN_VQN (BINOP, rsubhn, 0)
/* Implemented by aarch64_<sur><addsub>hn2<mode>. */
BUILTIN_VQN (TERNOP, addhn2, 0)
BUILTIN_VQN (TERNOP, subhn2, 0)
BUILTIN_VQN (TERNOP, raddhn2, 0)
BUILTIN_VQN (TERNOP, rsubhn2, 0)
BUILTIN_VSQN_HSDI (UNOP, sqmovun, 0)
/* Implemented by aarch64_<sur>qmovn<mode>. */
......@@ -338,6 +344,8 @@
BUILTIN_GPF (BINOP, frecps, 0)
BUILTIN_GPF (UNOP, frecpx, 0)
BUILTIN_VDQ_SI (UNOP, urecpe, 0)
BUILTIN_VDQF (UNOP, frecpe, 0)
BUILTIN_VDQF (BINOP, frecps, 0)
......
......@@ -4840,6 +4840,14 @@
[(set_attr "type" "neon_fp_recps_<Vetype><q>")]
)
(define_insn "aarch64_urecpe<mode>"
[(set (match_operand:VDQ_SI 0 "register_operand" "=w")
(unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
UNSPEC_URECPE))]
"TARGET_SIMD"
"urecpe\\t%0.<Vtype>, %1.<Vtype>"
[(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
;; Standard pattern name vec_extract<mode>.
(define_expand "vec_extract<mode>"
......
......@@ -75,6 +75,7 @@
UNSPEC_CRC32H
UNSPEC_CRC32W
UNSPEC_CRC32X
UNSPEC_URECPE
UNSPEC_FRECPE
UNSPEC_FRECPS
UNSPEC_FRECPX
......
......@@ -128,6 +128,9 @@
;; Vector modes except double int.
(define_mode_iterator VDQIF [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF V2DF])
;; Vector modes for S type.
(define_mode_iterator VDQ_SI [V2SI V4SI])
;; Vector modes for Q and H types.
(define_mode_iterator VDQQH [V8QI V16QI V4HI V8HI])
......
2014-12-08 Felix Yang <felix.yang@huawei.com>
Haijian Zhang <z.zhanghaijian@huawei.com>
Jiji Jiang <jiangjiji@huawei.com>
Pengfei Sui <suipengfei@huawei.com>
* gcc.target/aarch64/vfma.c: New test.
* gcc.target/aarch64/vfma_n.c: New test.
* gcc.target/aarch64/vfms.c: New test.
* gcc.target/aarch64/narrow_high-intrinsics.c: Fix expected assembler
for rsubhn2 & subhn2.
2014-12-08 Ilya Enkovich <ilya.enkovich@intel.com>
* gcc.target/i386/chkp-bndret.c: New.
......
#include <arm_neon.h>
#include "arm-neon-ref.h"
#include "compute-ref-data.h"
/* Expected results. */
VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x4438ca3d, 0x44390a3d };
VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x44869eb8, 0x4486beb8, 0x4486deb8, 0x4486feb8 };
VECT_VAR_DECL(expected,hfloat,64,2) [] = { 0x408906e1532b8520, 0x40890ee1532b8520 };
#define TEST_MSG "VFMA/VFMAQ"
void exec_vfma (void)
{
/* Basic test: v4=vfma(v1,v2), then store the result. */
#define TEST_VFMA(Q, T1, T2, W, N) \
VECT_VAR(vector_res, T1, W, N) = \
vfma##Q##_##T2##W(VECT_VAR(vector1, T1, W, N), \
VECT_VAR(vector2, T1, W, N), \
VECT_VAR(vector3, T1, W, N)); \
vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
#define CHECK_VFMA_RESULTS(test_name,comment) \
{ \
CHECK_FP(test_name, float, 32, 2, PRIx32, expected, comment); \
CHECK_FP(test_name, float, 32, 4, PRIx32, expected, comment); \
CHECK_FP(test_name, float, 64, 2, PRIx64, expected, comment); \
}
#define DECL_VABD_VAR(VAR) \
DECL_VARIABLE(VAR, float, 32, 2); \
DECL_VARIABLE(VAR, float, 32, 4); \
DECL_VARIABLE(VAR, float, 64, 2);
DECL_VABD_VAR(vector1);
DECL_VABD_VAR(vector2);
DECL_VABD_VAR(vector3);
DECL_VABD_VAR(vector_res);
clean_results ();
/* Initialize input "vector1" from "buffer". */
VLOAD(vector1, buffer, , float, f, 32, 2);
VLOAD(vector1, buffer, q, float, f, 32, 4);
VLOAD(vector1, buffer, q, float, f, 64, 2);
/* Choose init value arbitrarily. */
VDUP(vector2, , float, f, 32, 2, 9.3f);
VDUP(vector2, q, float, f, 32, 4, 29.7f);
VDUP(vector2, q, float, f, 64, 2, 15.8f);
/* Choose init value arbitrarily. */
VDUP(vector3, , float, f, 32, 2, 81.2f);
VDUP(vector3, q, float, f, 32, 4, 36.8f);
VDUP(vector3, q, float, f, 64, 2, 51.7f);
/* Execute the tests. */
TEST_VFMA(, float, f, 32, 2);
TEST_VFMA(q, float, f, 32, 4);
TEST_VFMA(q, float, f, 64, 2);
CHECK_VFMA_RESULTS (TEST_MSG, "");
}
int main (void)
{
exec_vfma ();
return 0;
}
#include <arm_neon.h>
#include "arm-neon-ref.h"
#include "compute-ref-data.h"
/* Expected results. */
VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x4438ca3d, 0x44390a3d };
VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x44869eb8, 0x4486beb8, 0x4486deb8, 0x4486feb8 };
VECT_VAR_DECL(expected,hfloat,64,2) [] = { 0x408906e1532b8520, 0x40890ee1532b8520 };
#define VECT_VAR_ASSIGN(S,Q,T1,W) S##Q##_##T1##W
#define ASSIGN(S, Q, T, W, V) T##W##_t S##Q##_##T##W = V
#define TEST_MSG "VFMA/VFMAQ"
void exec_vfma_n (void)
{
/* Basic test: v4=vfma_n(v1,v2), then store the result. */
#define TEST_VFMA(Q, T1, T2, W, N) \
VECT_VAR(vector_res, T1, W, N) = \
vfma##Q##_n_##T2##W(VECT_VAR(vector1, T1, W, N), \
VECT_VAR(vector2, T1, W, N), \
VECT_VAR_ASSIGN(Scalar, Q, T1, W)); \
vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
#define CHECK_VFMA_RESULTS(test_name,comment) \
{ \
CHECK_FP(test_name, float, 32, 2, PRIx32, expected, comment); \
CHECK_FP(test_name, float, 32, 4, PRIx32, expected, comment); \
CHECK_FP(test_name, float, 64, 2, PRIx64, expected, comment); \
}
#define DECL_VABD_VAR(VAR) \
DECL_VARIABLE(VAR, float, 32, 2); \
DECL_VARIABLE(VAR, float, 32, 4); \
DECL_VARIABLE(VAR, float, 64, 2);
DECL_VABD_VAR(vector1);
DECL_VABD_VAR(vector2);
DECL_VABD_VAR(vector3);
DECL_VABD_VAR(vector_res);
clean_results ();
/* Initialize input "vector1" from "buffer". */
VLOAD(vector1, buffer, , float, f, 32, 2);
VLOAD(vector1, buffer, q, float, f, 32, 4);
VLOAD(vector1, buffer, q, float, f, 64, 2);
/* Choose init value arbitrarily. */
VDUP(vector2, , float, f, 32, 2, 9.3f);
VDUP(vector2, q, float, f, 32, 4, 29.7f);
VDUP(vector2, q, float, f, 64, 2, 15.8f);
/* Choose init value arbitrarily. */
ASSIGN(Scalar, , float, 32, 81.2f);
ASSIGN(Scalar, q, float, 32, 36.8f);
ASSIGN(Scalar, q, float, 64, 51.7f);
/* Execute the tests. */
TEST_VFMA(, float, f, 32, 2);
TEST_VFMA(q, float, f, 32, 4);
TEST_VFMA(q, float, f, 64, 2);
CHECK_VFMA_RESULTS (TEST_MSG, "");
}
int main (void)
{
exec_vfma_n ();
return 0;
}
#include <arm_neon.h>
#include "arm-neon-ref.h"
#include "compute-ref-data.h"
/* Expected results. */
VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc440ca3d, 0xc4408a3d };
VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc48a9eb8, 0xc48a7eb8, 0xc48a5eb8, 0xc48a3eb8 };
VECT_VAR_DECL(expected,hfloat,64,2) [] = { 0xc08a06e1532b8520, 0xc089fee1532b8520 };
#define TEST_MSG "VFMA/VFMAQ"
void exec_vfms (void)
{
/* Basic test: v4=vfms(v1,v2), then store the result. */
#define TEST_VFMA(Q, T1, T2, W, N) \
VECT_VAR(vector_res, T1, W, N) = \
vfms##Q##_##T2##W(VECT_VAR(vector1, T1, W, N), \
VECT_VAR(vector2, T1, W, N), \
VECT_VAR(vector3, T1, W, N)); \
vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
#define CHECK_VFMA_RESULTS(test_name,comment) \
{ \
CHECK_FP(test_name, float, 32, 2, PRIx32, expected, comment); \
CHECK_FP(test_name, float, 32, 4, PRIx32, expected, comment); \
CHECK_FP(test_name, float, 64, 2, PRIx64, expected, comment); \
}
#define DECL_VABD_VAR(VAR) \
DECL_VARIABLE(VAR, float, 32, 2); \
DECL_VARIABLE(VAR, float, 32, 4); \
DECL_VARIABLE(VAR, float, 64, 2);
DECL_VABD_VAR(vector1);
DECL_VABD_VAR(vector2);
DECL_VABD_VAR(vector3);
DECL_VABD_VAR(vector_res);
clean_results ();
/* Initialize input "vector1" from "buffer". */
VLOAD(vector1, buffer, , float, f, 32, 2);
VLOAD(vector1, buffer, q, float, f, 32, 4);
VLOAD(vector1, buffer, q, float, f, 64, 2);
/* Choose init value arbitrarily. */
VDUP(vector2, , float, f, 32, 2, 9.3f);
VDUP(vector2, q, float, f, 32, 4, 29.7f);
VDUP(vector2, q, float, f, 64, 2, 15.8f);
/* Choose init value arbitrarily. */
VDUP(vector3, , float, f, 32, 2, 81.2f);
VDUP(vector3, q, float, f, 32, 4, 36.8f);
VDUP(vector3, q, float, f, 64, 2, 51.7f);
/* Execute the tests. */
TEST_VFMA(, float, f, 32, 2);
TEST_VFMA(q, float, f, 32, 4);
TEST_VFMA(q, float, f, 64, 2);
CHECK_VFMA_RESULTS (TEST_MSG, "");
}
int main (void)
{
exec_vfms ();
return 0;
}
......@@ -107,9 +107,9 @@ ONE (vmovn_high, uint16x8_t, uint16x4_t, uint32x4_t, u32)
ONE (vmovn_high, uint32x4_t, uint32x2_t, uint64x2_t, u64)
/* { dg-final { scan-assembler-times "\\tsubhn2 v" 6} } */
/* { dg-final { scan-assembler-times "\\tsubhn2\\tv" 6} } */
/* { dg-final { scan-assembler-times "\\taddhn2\\tv" 6} } */
/* { dg-final { scan-assembler-times "rsubhn2 v" 6} } */
/* { dg-final { scan-assembler-times "rsubhn2\\tv" 6} } */
/* { dg-final { scan-assembler-times "raddhn2\\tv" 6} } */
/* { dg-final { scan-assembler-times "\\trshrn2 v" 6} } */
/* { dg-final { scan-assembler-times "\\tshrn2 v" 6} } */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment