Commit c4216388 by Matthew Gretton-Dann Committed by Ramana Radhakrishnan

Add support for fma intrinsics for ARM.

Correct dates in changelog from earlier commit.


2012-10-18  Matthew Gretton-Dann  <matthew.gretton-dann@arm.com>
	    Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>

        * config/arm/arm.c (neon_builtin_data): Add vfma and vfms
      	builtins.
        * config/arm/neon-docgen.ml (intrinsic_groups): Add
        fused-multiply-* groups.
        * config/neon-gen.ml (print_feature_test_start): New function.
        (print_feature_test_end): Likewise.
        (print_variant): Print feature test macros.
        * config/arm/neon-testgen.ml (emit_prologue): Allow different
        tests to require different effective targets.
        (effective_target): New function.
        (test_intrinsic): Specify correct effective targets.
        * gcc/config/arm/neon.md (fma<VCVTF:mode>4_intrinsic): New pattern.
        (fmsub<VCVTF:mode>4_intrinsic): Likewise.
        (neon_vfma<VCVFT:mode>): New expand.
        (neon_vfms<VCVFT:mode>): Likewise.
        * config/neon.ml (opcode): Add Vfma and Vfms.
        (features): Add Requires_feature.
        (ops): Add VFMA and VFMS intrinsics.
        * config/arm/arm_neon.h: Regenerate.
        * doc/arm-neon-intrinsics.texi: Likewise.





2012-10-18  Matthew Gretton-Dann  <matthew.gretton-dann@arm.com>

        * gcc.target/arm/neon/vfmaQf32.c: New testcase.
        * gcc.target/arm/neon/vfmaf32.c: Likewise.
        * gcc.target/arm/neon/vfmsQf32.c: Likewise.
        * gcc.target/arm/neon/vfmsf32.c: Likewise.



Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>

From-SVN: r192560
parent c61f8c3b
2012-10-18 Matthew Gretton-Dann <matthew.gretton-dann@arm.com>
Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
* config/arm/arm.c (neon_builtin_data): Add vfma and vfms
builtins.
* config/arm/neon-docgen.ml (intrinsic_groups): Add
fused-multiply-* groups.
* config/neon-gen.ml (print_feature_test_start): New function.
(print_feature_test_end): Likewise.
(print_variant): Print feature test macros.
* config/arm/neon-testgen.ml (emit_prologue): Allow different
tests to require different effective targets.
(effective_target): New function.
(test_intrinsic): Specify correct effective targets.
* gcc/config/arm/neon.md (fma<VCVTF:mode>4_intrinsic): New pattern.
(fmsub<VCVTF:mode>4_intrinsic): Likewise.
(neon_vfma<VCVFT:mode>): New expand.
(neon_vfms<VCVFT:mode>): Likewise.
* config/neon.ml (opcode): Add Vfma and Vfms.
(features): Add Requires_feature.
(ops): Add VFMA and VFMS intrinsics.
* config/arm/arm_neon.h: Regenerate.
* doc/arm-neon-intrinsics.texi: Likewise.
2012-10-18 Richard Guenther <rguenther@suse.de> 2012-10-18 Richard Guenther <rguenther@suse.de>
* lto-streamer.h (enum LTO_tags): Add LTO_integer_cst. * lto-streamer.h (enum LTO_tags): Add LTO_integer_cst.
...@@ -11,7 +35,7 @@ ...@@ -11,7 +35,7 @@
(streamer_pack_tree_bitfields): Call it. (streamer_pack_tree_bitfields): Call it.
(streamer_write_integer_cst): Adjust. (streamer_write_integer_cst): Adjust.
2012-10-17 Matthew Gretton-Dann <matthew.gretton-dann@arm.com> 2012-10-18 Matthew Gretton-Dann <matthew.gretton-dann@arm.com>
Ramana Radhakrishnan <ramana.radhakrishnan@arm.com> Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
* config.gcc: Add support for ARMv8 for arm*-*-* targets. * config.gcc: Add support for ARMv8 for arm*-*-* targets.
...@@ -18726,6 +18726,8 @@ static neon_builtin_datum neon_builtin_data[] = ...@@ -18726,6 +18726,8 @@ static neon_builtin_datum neon_builtin_data[] =
VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si), VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si),
VAR2 (TERNOP, vfma, v2sf, v4sf),
VAR2 (TERNOP, vfms, v2sf, v4sf),
VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf), VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si), VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si),
VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si), VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si),
......
...@@ -1350,6 +1350,38 @@ vqdmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c) ...@@ -1350,6 +1350,38 @@ vqdmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c)
return (int64x2_t)__builtin_neon_vqdmlslv2si (__a, __b, __c, 1); return (int64x2_t)__builtin_neon_vqdmlslv2si (__a, __b, __c, 1);
} }
#ifdef __ARM_FEATURE_FMA
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vfma_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c)
{
return (float32x2_t)__builtin_neon_vfmav2sf (__a, __b, __c, 3);
}
#endif
#ifdef __ARM_FEATURE_FMA
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vfmaq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
{
return (float32x4_t)__builtin_neon_vfmav4sf (__a, __b, __c, 3);
}
#endif
#ifdef __ARM_FEATURE_FMA
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
vfms_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c)
{
return (float32x2_t)__builtin_neon_vfmsv2sf (__a, __b, __c, 3);
}
#endif
#ifdef __ARM_FEATURE_FMA
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vfmsq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c)
{
return (float32x4_t)__builtin_neon_vfmsv4sf (__a, __b, __c, 3);
}
#endif
__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) __extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
vsub_s8 (int8x8_t __a, int8x8_t __b) vsub_s8 (int8x8_t __a, int8x8_t __b)
{ {
......
...@@ -103,6 +103,8 @@ let intrinsic_groups = ...@@ -103,6 +103,8 @@ let intrinsic_groups =
"Multiplication", single_opcode Vmul; "Multiplication", single_opcode Vmul;
"Multiply-accumulate", single_opcode Vmla; "Multiply-accumulate", single_opcode Vmla;
"Multiply-subtract", single_opcode Vmls; "Multiply-subtract", single_opcode Vmls;
"Fused-multiply-accumulate", single_opcode Vfma;
"Fused-multiply-subtract", single_opcode Vfms;
"Subtraction", single_opcode Vsub; "Subtraction", single_opcode Vsub;
"Comparison (equal-to)", single_opcode Vceq; "Comparison (equal-to)", single_opcode Vceq;
"Comparison (greater-than-or-equal-to)", single_opcode Vcge; "Comparison (greater-than-or-equal-to)", single_opcode Vcge;
......
...@@ -286,6 +286,24 @@ let get_shuffle features = ...@@ -286,6 +286,24 @@ let get_shuffle features =
| _ -> None | _ -> None
with Not_found -> None with Not_found -> None
let print_feature_test_start features =
try
match List.find (fun feature ->
match feature with Requires_feature _ -> true
| _ -> false)
features with
Requires_feature feature ->
Format.printf "#ifdef __ARM_FEATURE_%s@\n" feature
| _ -> assert false
with Not_found -> assert true
let print_feature_test_end features =
let feature =
List.exists (function Requires_feature x -> true
| _ -> false) features in
if feature then Format.printf "#endif@\n"
let print_variant opcode features shape name (ctype, asmtype, elttype) = let print_variant opcode features shape name (ctype, asmtype, elttype) =
let bits = infoword_value elttype features in let bits = infoword_value elttype features in
let modesuf = mode_suffix elttype shape in let modesuf = mode_suffix elttype shape in
...@@ -302,7 +320,11 @@ let print_variant opcode features shape name (ctype, asmtype, elttype) = ...@@ -302,7 +320,11 @@ let print_variant opcode features shape name (ctype, asmtype, elttype) =
return ctype builtin in return ctype builtin in
let body = pdecls @ rdecls @ stmts let body = pdecls @ rdecls @ stmts
and fnname = (intrinsic_name name) ^ "_" ^ (string_of_elt elttype) in and fnname = (intrinsic_name name) ^ "_" ^ (string_of_elt elttype) in
print_function ctype fnname body begin
print_feature_test_start features;
print_function ctype fnname body;
print_feature_test_end features;
end
(* When this function processes the element types in the ops table, it rewrites (* When this function processes the element types in the ops table, it rewrites
them in a list of tuples (a,b,c): them in a list of tuples (a,b,c):
......
...@@ -46,13 +46,14 @@ let open_test_file dir name = ...@@ -46,13 +46,14 @@ let open_test_file dir name =
failwith ("Could not create test source file " ^ name ^ ": " ^ str) failwith ("Could not create test source file " ^ name ^ ": " ^ str)
(* Emit prologue code to a test source file. *) (* Emit prologue code to a test source file. *)
let emit_prologue chan test_name = let emit_prologue chan test_name effective_target =
Printf.fprintf chan "/* Test the `%s' ARM Neon intrinsic. */\n" test_name; Printf.fprintf chan "/* Test the `%s' ARM Neon intrinsic. */\n" test_name;
Printf.fprintf chan "/* This file was autogenerated by neon-testgen. */\n\n"; Printf.fprintf chan "/* This file was autogenerated by neon-testgen. */\n\n";
Printf.fprintf chan "/* { dg-do assemble } */\n"; Printf.fprintf chan "/* { dg-do assemble } */\n";
Printf.fprintf chan "/* { dg-require-effective-target arm_neon_ok } */\n"; Printf.fprintf chan "/* { dg-require-effective-target %s_ok } */\n"
effective_target;
Printf.fprintf chan "/* { dg-options \"-save-temps -O0\" } */\n"; Printf.fprintf chan "/* { dg-options \"-save-temps -O0\" } */\n";
Printf.fprintf chan "/* { dg-add-options arm_neon } */\n"; Printf.fprintf chan "/* { dg-add-options %s } */\n" effective_target;
Printf.fprintf chan "\n#include \"arm_neon.h\"\n\n"; Printf.fprintf chan "\n#include \"arm_neon.h\"\n\n";
Printf.fprintf chan "void test_%s (void)\n{\n" test_name Printf.fprintf chan "void test_%s (void)\n{\n" test_name
...@@ -156,6 +157,17 @@ let check_types tys = ...@@ -156,6 +157,17 @@ let check_types tys =
then (Const :: flags, String.sub ty 6 ((String.length ty) - 6)) then (Const :: flags, String.sub ty 6 ((String.length ty) - 6))
else (flags, ty)) tys' else (flags, ty)) tys'
(* Work out what the effective target should be. *)
let effective_target features =
try
match List.find (fun feature ->
match feature with Requires_feature _ -> true
| _ -> false)
features with
Requires_feature "FMA" -> "arm_neonv2"
| _ -> assert false
with Not_found -> "arm_neon"
(* Given an intrinsic shape, produce a regexp that will match (* Given an intrinsic shape, produce a regexp that will match
the right-hand sides of instructions generated by an intrinsic of the right-hand sides of instructions generated by an intrinsic of
that shape. *) that shape. *)
...@@ -263,8 +275,10 @@ let test_intrinsic dir opcode features shape name munge elt_ty = ...@@ -263,8 +275,10 @@ let test_intrinsic dir opcode features shape name munge elt_ty =
"!?\\(\\[ \t\\]+@\\[a-zA-Z0-9 \\]+\\)?\\n") "!?\\(\\[ \t\\]+@\\[a-zA-Z0-9 \\]+\\)?\\n")
(analyze_all_shapes features shape analyze_shape) (analyze_all_shapes features shape analyze_shape)
in in
let effective_target = effective_target features
in
(* Emit file and function prologues. *) (* Emit file and function prologues. *)
emit_prologue chan test_name; emit_prologue chan test_name effective_target;
(* Emit local variable declarations. *) (* Emit local variable declarations. *)
emit_automatics chan c_types features; emit_automatics chan c_types features;
Printf.fprintf chan "\n"; Printf.fprintf chan "\n";
......
...@@ -722,6 +722,10 @@ ...@@ -722,6 +722,10 @@
) )
;; Fused multiply-accumulate ;; Fused multiply-accumulate
;; We define each insn twice here:
;; 1: with flag_unsafe_math_optimizations for the widening multiply phase
;; to be able to use when converting to FMA.
;; 2: without flag_unsafe_math_optimizations for the intrinsics to use.
(define_insn "fma<VCVTF:mode>4" (define_insn "fma<VCVTF:mode>4"
[(set (match_operand:VCVTF 0 "register_operand" "=w") [(set (match_operand:VCVTF 0 "register_operand" "=w")
(fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w") (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
...@@ -735,6 +739,19 @@ ...@@ -735,6 +739,19 @@
(const_string "neon_fp_vmla_qqq")))] (const_string "neon_fp_vmla_qqq")))]
) )
(define_insn "fma<VCVTF:mode>4_intrinsic"
[(set (match_operand:VCVTF 0 "register_operand" "=w")
(fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
(match_operand:VCVTF 2 "register_operand" "w")
(match_operand:VCVTF 3 "register_operand" "0")))]
"TARGET_NEON && TARGET_FMA"
"vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
[(set (attr "neon_type")
(if_then_else (match_test "<Is_d_reg>")
(const_string "neon_fp_vmla_ddd")
(const_string "neon_fp_vmla_qqq")))]
)
(define_insn "*fmsub<VCVTF:mode>4" (define_insn "*fmsub<VCVTF:mode>4"
[(set (match_operand:VCVTF 0 "register_operand" "=w") [(set (match_operand:VCVTF 0 "register_operand" "=w")
(fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w")) (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
...@@ -748,6 +765,19 @@ ...@@ -748,6 +765,19 @@
(const_string "neon_fp_vmla_qqq")))] (const_string "neon_fp_vmla_qqq")))]
) )
(define_insn "fmsub<VCVTF:mode>4_intrinsic"
[(set (match_operand:VCVTF 0 "register_operand" "=w")
(fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
(match_operand:VCVTF 2 "register_operand" "w")
(match_operand:VCVTF 3 "register_operand" "0")))]
"TARGET_NEON && TARGET_FMA"
"vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
[(set (attr "neon_type")
(if_then_else (match_test "<Is_d_reg>")
(const_string "neon_fp_vmla_ddd")
(const_string "neon_fp_vmla_qqq")))]
)
(define_insn "ior<mode>3" (define_insn "ior<mode>3"
[(set (match_operand:VDQ 0 "s_register_operand" "=w,w") [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
(ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0") (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
...@@ -1925,6 +1955,32 @@ ...@@ -1925,6 +1955,32 @@
DONE; DONE;
}) })
(define_expand "neon_vfma<VCVTF:mode>"
[(match_operand:VCVTF 0 "s_register_operand")
(match_operand:VCVTF 1 "s_register_operand")
(match_operand:VCVTF 2 "s_register_operand")
(match_operand:VCVTF 3 "s_register_operand")
(match_operand:SI 4 "immediate_operand")]
"TARGET_NEON && TARGET_FMA"
{
emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
operands[1]));
DONE;
})
(define_expand "neon_vfms<VCVTF:mode>"
[(match_operand:VCVTF 0 "s_register_operand")
(match_operand:VCVTF 1 "s_register_operand")
(match_operand:VCVTF 2 "s_register_operand")
(match_operand:VCVTF 3 "s_register_operand")
(match_operand:SI 4 "immediate_operand")]
"TARGET_NEON && TARGET_FMA"
{
emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
operands[1]));
DONE;
})
; Used for intrinsics when flag_unsafe_math_optimizations is false. ; Used for intrinsics when flag_unsafe_math_optimizations is false.
(define_insn "neon_vmla<mode>_unspec" (define_insn "neon_vmla<mode>_unspec"
......
...@@ -102,6 +102,8 @@ type opcode = ...@@ -102,6 +102,8 @@ type opcode =
| Vmul | Vmul
| Vmla | Vmla
| Vmls | Vmls
| Vfma
| Vfms
| Vsub | Vsub
| Vceq | Vceq
| Vcge | Vcge
...@@ -275,6 +277,8 @@ type features = ...@@ -275,6 +277,8 @@ type features =
| Const_valuator of (int -> int) | Const_valuator of (int -> int)
| Fixed_vector_reg | Fixed_vector_reg
| Fixed_core_reg | Fixed_core_reg
(* Mark that the intrinsic requires __ARM_FEATURE_string to be defined. *)
| Requires_feature of string
exception MixedMode of elts * elts exception MixedMode of elts * elts
...@@ -802,6 +806,12 @@ let ops = ...@@ -802,6 +806,12 @@ let ops =
Vmls, [], Long, "vmlsl", elts_same_io, su_8_32; Vmls, [], Long, "vmlsl", elts_same_io, su_8_32;
Vmls, [Saturating; Doubling], Long, "vqdmlsl", elts_same_io, [S16; S32]; Vmls, [Saturating; Doubling], Long, "vqdmlsl", elts_same_io, [S16; S32];
(* Fused-multiply-accumulate. *)
Vfma, [Requires_feature "FMA"], All (3, Dreg), "vfma", elts_same_io, [F32];
Vfma, [Requires_feature "FMA"], All (3, Qreg), "vfmaQ", elts_same_io, [F32];
Vfms, [Requires_feature "FMA"], All (3, Dreg), "vfms", elts_same_io, [F32];
Vfms, [Requires_feature "FMA"], All (3, Qreg), "vfmsQ", elts_same_io, [F32];
(* Subtraction. *) (* Subtraction. *)
Vsub, [], All (3, Dreg), "vsub", sign_invar_2, F32 :: su_8_32; Vsub, [], All (3, Dreg), "vsub", sign_invar_2, F32 :: su_8_32;
Vsub, [No_op], All (3, Dreg), "vsub", sign_invar_2, [S64; U64]; Vsub, [No_op], All (3, Dreg), "vsub", sign_invar_2, [S64; U64];
......
2012-10-17 Matthew Gretton-Dann <matthew.gretton-dann@arm.com> 2012-10-18 Matthew Gretton-Dann <matthew.gretton-dann@arm.com>
* gcc.target/arm/neon/vfmaQf32.c: New testcase.
* gcc.target/arm/neon/vfmaf32.c: Likewise.
* gcc.target/arm/neon/vfmsQf32.c: Likewise.
* gcc.target/arm/neon/vfmsf32.c: Likewise.
2012-10-18 Matthew Gretton-Dann <matthew.gretton-dann@arm.com>
* gcc.target/arm/ftest-armv8a-arm.c: New testcase. * gcc.target/arm/ftest-armv8a-arm.c: New testcase.
* gcc.target/arm/ftest-armv8a-thumb.c: Likewise. * gcc.target/arm/ftest-armv8a-thumb.c: Likewise.
......
/* Test the `vfmaQf32' ARM Neon intrinsic. */
/* This file was autogenerated by neon-testgen. */
/* { dg-do assemble } */
/* { dg-require-effective-target arm_neonv2_ok } */
/* { dg-options "-save-temps -O0" } */
/* { dg-add-options arm_neonv2 } */
#include "arm_neon.h"
void test_vfmaQf32 (void)
{
float32x4_t out_float32x4_t;
float32x4_t arg0_float32x4_t;
float32x4_t arg1_float32x4_t;
float32x4_t arg2_float32x4_t;
out_float32x4_t = vfmaq_f32 (arg0_float32x4_t, arg1_float32x4_t, arg2_float32x4_t);
}
/* { dg-final { scan-assembler "vfma\.f32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */
/* Test the `vfmaf32' ARM Neon intrinsic. */
/* This file was autogenerated by neon-testgen. */
/* { dg-do assemble } */
/* { dg-require-effective-target arm_neonv2_ok } */
/* { dg-options "-save-temps -O0" } */
/* { dg-add-options arm_neonv2 } */
#include "arm_neon.h"
void test_vfmaf32 (void)
{
float32x2_t out_float32x2_t;
float32x2_t arg0_float32x2_t;
float32x2_t arg1_float32x2_t;
float32x2_t arg2_float32x2_t;
out_float32x2_t = vfma_f32 (arg0_float32x2_t, arg1_float32x2_t, arg2_float32x2_t);
}
/* { dg-final { scan-assembler "vfma\.f32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */
/* Test the `vfmsQf32' ARM Neon intrinsic. */
/* This file was autogenerated by neon-testgen. */
/* { dg-do assemble } */
/* { dg-require-effective-target arm_neonv2_ok } */
/* { dg-options "-save-temps -O0" } */
/* { dg-add-options arm_neonv2 } */
#include "arm_neon.h"
void test_vfmsQf32 (void)
{
float32x4_t out_float32x4_t;
float32x4_t arg0_float32x4_t;
float32x4_t arg1_float32x4_t;
float32x4_t arg2_float32x4_t;
out_float32x4_t = vfmsq_f32 (arg0_float32x4_t, arg1_float32x4_t, arg2_float32x4_t);
}
/* { dg-final { scan-assembler "vfms\.f32\[ \]+\[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */
/* Test the `vfmsf32' ARM Neon intrinsic. */
/* This file was autogenerated by neon-testgen. */
/* { dg-do assemble } */
/* { dg-require-effective-target arm_neonv2_ok } */
/* { dg-options "-save-temps -O0" } */
/* { dg-add-options arm_neonv2 } */
#include "arm_neon.h"
void test_vfmsf32 (void)
{
float32x2_t out_float32x2_t;
float32x2_t arg0_float32x2_t;
float32x2_t arg1_float32x2_t;
float32x2_t arg2_float32x2_t;
out_float32x2_t = vfms_f32 (arg0_float32x2_t, arg1_float32x2_t, arg2_float32x2_t);
}
/* { dg-final { scan-assembler "vfms\.f32\[ \]+\[dD\]\[0-9\]+, \[dD\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment