Commit 5819f96f by Kyrylo Tkachov Committed by Kyrylo Tkachov

arm.c (neon_builtin_type_mode): Add T_V4HF.

2013-04-25  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
            Julian Brown  <julian@codesourcery.com>

	* config/arm/arm.c (neon_builtin_type_mode): Add T_V4HF.
	(TB_DREG): Add T_V4HF.
	(v4hf_UP): New macro.
	(neon_itype): Add NEON_FLOAT_WIDEN, NEON_FLOAT_NARROW.
	(arm_init_neon_builtins): Handle NEON_FLOAT_WIDEN,
	NEON_FLOAT_NARROW.
	Handle initialisation of V4HF. Adjust initialisation of reinterpret
	built-ins.
	(arm_expand_neon_builtin): Handle NEON_FLOAT_WIDEN,
	NEON_FLOAT_NARROW.
	(arm_vector_mode_supported_p): Handle V4HF.
	(arm_mangle_map): Handle V4HFmode.
	* config/arm/arm.h (VALID_NEON_DREG_MODE): Add V4HF.
	* config/arm/arm_neon_builtins.def: Add entries for
	vcvtv4hfv4sf, vcvtv4sfv4hf.
	* config/arm/neon.md (neon_vcvtv4sfv4hf): New pattern.
	(neon_vcvtv4hfv4sf): Likewise.
	* config/arm/neon-gen.ml: Handle half-precision floating point
	features.
	* config/arm/neon-testgen.ml: Handle Requires_FP_bit feature.
	* config/arm/arm_neon.h: Regenerate.
	* config/arm/neon.ml (type elts): Add F16.
	(type vectype): Add T_float16x4, T_floatHF.
	(type vecmode): Add V4HF.
	(type features): Add Requires_FP_bit feature.
	(elt_width): Handle F16.
	(elt_class): Likewise.
	(elt_of_class_width): Likewise.
	(mode_of_elt): Refactor.
	(type_for_elt): Handle F16, fix error messages.
	(vectype_size): Handle T_float16x4.
	(vcvt_sh): New function.
	(ops): Add entries for vcvt_f16_f32, vcvt_f32_f16.
	(string_of_vectype): Handle T_floatHF, T_float16, T_float16x4.
	(string_of_mode): Handle V4HF.
	* doc/arm-neon-intrinsics.texi: Regenerate.

2013-04-25  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>

	* lib/target-supports.exp
	(check_effective_target_arm_neon_fp16_ok_nocache): New procedure.
	(check_effective_target_arm_neon_fp16_ok): Likewise.
	(add_options_for_arm_neon_fp16): Likewise.
	* gcc.target/arm/neon/vcvtf16_f32.c: New test. Generated.
	* gcc.target/arm/neon/vcvtf32_f16.c: Likewise.

Co-Authored-By: Julian Brown <julian@codesourcery.com>

From-SVN: r198298
parent fd8b8c01
2013-04-25 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Julian Brown <julian@codesourcery.com>
* config/arm/arm.c (neon_builtin_type_mode): Add T_V4HF.
(TB_DREG): Add T_V4HF.
(v4hf_UP): New macro.
(neon_itype): Add NEON_FLOAT_WIDEN, NEON_FLOAT_NARROW.
(arm_init_neon_builtins): Handle NEON_FLOAT_WIDEN,
NEON_FLOAT_NARROW.
Handle initialisation of V4HF. Adjust initialisation of reinterpret
built-ins.
(arm_expand_neon_builtin): Handle NEON_FLOAT_WIDEN,
NEON_FLOAT_NARROW.
(arm_vector_mode_supported_p): Handle V4HF.
(arm_mangle_map): Handle V4HFmode.
* config/arm/arm.h (VALID_NEON_DREG_MODE): Add V4HF.
* config/arm/arm_neon_builtins.def: Add entries for
vcvtv4hfv4sf, vcvtv4sfv4hf.
* config/arm/neon.md (neon_vcvtv4sfv4hf): New pattern.
(neon_vcvtv4hfv4sf): Likewise.
* config/arm/neon-gen.ml: Handle half-precision floating point
features.
* config/arm/neon-testgen.ml: Handle Requires_FP_bit feature.
* config/arm/arm_neon.h: Regenerate.
* config/arm/neon.ml (type elts): Add F16.
(type vectype): Add T_float16x4, T_floatHF.
(type vecmode): Add V4HF.
(type features): Add Requires_FP_bit feature.
(elt_width): Handle F16.
(elt_class): Likewise.
(elt_of_class_width): Likewise.
(mode_of_elt): Refactor.
(type_for_elt): Handle F16, fix error messages.
(vectype_size): Handle T_float16x4.
(vcvt_sh): New function.
(ops): Add entries for vcvt_f16_f32, vcvt_f32_f16.
(string_of_vectype): Handle T_floatHF, T_float16, T_float16x4.
(string_of_mode): Handle V4HF.
* doc/arm-neon-intrinsics.texi: Regenerate.
2013-04-25 James Greenhalgh <james.greenhalgh@arm.com> 2013-04-25 James Greenhalgh <james.greenhalgh@arm.com>
* config/aarch64/aarch64.c (aarch64_print_operand): Fix asm_fprintf * config/aarch64/aarch64.c (aarch64_print_operand): Fix asm_fprintf
......
...@@ -19940,6 +19940,7 @@ arm_debugger_arg_offset (int value, rtx addr) ...@@ -19940,6 +19940,7 @@ arm_debugger_arg_offset (int value, rtx addr)
typedef enum { typedef enum {
T_V8QI, T_V8QI,
T_V4HI, T_V4HI,
T_V4HF,
T_V2SI, T_V2SI,
T_V2SF, T_V2SF,
T_DI, T_DI,
...@@ -19957,14 +19958,15 @@ typedef enum { ...@@ -19957,14 +19958,15 @@ typedef enum {
#define TYPE_MODE_BIT(X) (1 << (X)) #define TYPE_MODE_BIT(X) (1 << (X))
#define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \ #define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
| TYPE_MODE_BIT (T_V2SI) | TYPE_MODE_BIT (T_V2SF) \ | TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI) \
| TYPE_MODE_BIT (T_DI)) | TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI))
#define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \ #define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
| TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \ | TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
| TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI)) | TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
#define v8qi_UP T_V8QI #define v8qi_UP T_V8QI
#define v4hi_UP T_V4HI #define v4hi_UP T_V4HI
#define v4hf_UP T_V4HF
#define v2si_UP T_V2SI #define v2si_UP T_V2SI
#define v2sf_UP T_V2SF #define v2sf_UP T_V2SF
#define di_UP T_DI #define di_UP T_DI
...@@ -20000,6 +20002,8 @@ typedef enum { ...@@ -20000,6 +20002,8 @@ typedef enum {
NEON_SCALARMULH, NEON_SCALARMULH,
NEON_SCALARMAC, NEON_SCALARMAC,
NEON_CONVERT, NEON_CONVERT,
NEON_FLOAT_WIDEN,
NEON_FLOAT_NARROW,
NEON_FIXCONV, NEON_FIXCONV,
NEON_SELECT, NEON_SELECT,
NEON_RESULTPAIR, NEON_RESULTPAIR,
...@@ -20393,6 +20397,7 @@ arm_init_neon_builtins (void) ...@@ -20393,6 +20397,7 @@ arm_init_neon_builtins (void)
tree neon_intQI_type_node; tree neon_intQI_type_node;
tree neon_intHI_type_node; tree neon_intHI_type_node;
tree neon_floatHF_type_node;
tree neon_polyQI_type_node; tree neon_polyQI_type_node;
tree neon_polyHI_type_node; tree neon_polyHI_type_node;
tree neon_intSI_type_node; tree neon_intSI_type_node;
...@@ -20419,6 +20424,7 @@ arm_init_neon_builtins (void) ...@@ -20419,6 +20424,7 @@ arm_init_neon_builtins (void)
tree V8QI_type_node; tree V8QI_type_node;
tree V4HI_type_node; tree V4HI_type_node;
tree V4HF_type_node;
tree V2SI_type_node; tree V2SI_type_node;
tree V2SF_type_node; tree V2SF_type_node;
tree V16QI_type_node; tree V16QI_type_node;
...@@ -20473,6 +20479,9 @@ arm_init_neon_builtins (void) ...@@ -20473,6 +20479,9 @@ arm_init_neon_builtins (void)
neon_float_type_node = make_node (REAL_TYPE); neon_float_type_node = make_node (REAL_TYPE);
TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE; TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
layout_type (neon_float_type_node); layout_type (neon_float_type_node);
neon_floatHF_type_node = make_node (REAL_TYPE);
TYPE_PRECISION (neon_floatHF_type_node) = GET_MODE_PRECISION (HFmode);
layout_type (neon_floatHF_type_node);
/* Define typedefs which exactly correspond to the modes we are basing vector /* Define typedefs which exactly correspond to the modes we are basing vector
types on. If you change these names you'll need to change types on. If you change these names you'll need to change
...@@ -20481,6 +20490,8 @@ arm_init_neon_builtins (void) ...@@ -20481,6 +20490,8 @@ arm_init_neon_builtins (void)
"__builtin_neon_qi"); "__builtin_neon_qi");
(*lang_hooks.types.register_builtin_type) (neon_intHI_type_node, (*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
"__builtin_neon_hi"); "__builtin_neon_hi");
(*lang_hooks.types.register_builtin_type) (neon_floatHF_type_node,
"__builtin_neon_hf");
(*lang_hooks.types.register_builtin_type) (neon_intSI_type_node, (*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
"__builtin_neon_si"); "__builtin_neon_si");
(*lang_hooks.types.register_builtin_type) (neon_float_type_node, (*lang_hooks.types.register_builtin_type) (neon_float_type_node,
...@@ -20522,6 +20533,8 @@ arm_init_neon_builtins (void) ...@@ -20522,6 +20533,8 @@ arm_init_neon_builtins (void)
build_vector_type_for_mode (neon_intQI_type_node, V8QImode); build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
V4HI_type_node = V4HI_type_node =
build_vector_type_for_mode (neon_intHI_type_node, V4HImode); build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
V4HF_type_node =
build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode);
V2SI_type_node = V2SI_type_node =
build_vector_type_for_mode (neon_intSI_type_node, V2SImode); build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
V2SF_type_node = V2SF_type_node =
...@@ -20644,7 +20657,7 @@ arm_init_neon_builtins (void) ...@@ -20644,7 +20657,7 @@ arm_init_neon_builtins (void)
neon_builtin_datum *d = &neon_builtin_data[i]; neon_builtin_datum *d = &neon_builtin_data[i];
const char* const modenames[] = { const char* const modenames[] = {
"v8qi", "v4hi", "v2si", "v2sf", "di", "v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di",
"v16qi", "v8hi", "v4si", "v4sf", "v2di", "v16qi", "v8hi", "v4si", "v4sf", "v2di",
"ti", "ei", "oi" "ti", "ei", "oi"
}; };
...@@ -20847,8 +20860,9 @@ arm_init_neon_builtins (void) ...@@ -20847,8 +20860,9 @@ arm_init_neon_builtins (void)
case NEON_REINTERP: case NEON_REINTERP:
{ {
/* We iterate over 5 doubleword types, then 5 quadword /* We iterate over 5 doubleword types, then 5 quadword
types. */ types. V4HF is not a type used in reinterpret, so we translate
int rhs = d->mode % 5; d->mode to the correct index in reinterp_ftype_dreg. */
int rhs = (d->mode - ((d->mode > T_V4HF) ? 1 : 0)) % 5;
switch (insn_data[d->code].operand[0].mode) switch (insn_data[d->code].operand[0].mode)
{ {
case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break; case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
...@@ -20865,7 +20879,38 @@ arm_init_neon_builtins (void) ...@@ -20865,7 +20879,38 @@ arm_init_neon_builtins (void)
} }
} }
break; break;
case NEON_FLOAT_WIDEN:
{
tree eltype = NULL_TREE;
tree return_type = NULL_TREE;
switch (insn_data[d->code].operand[1].mode)
{
case V4HFmode:
eltype = V4HF_type_node;
return_type = V4SF_type_node;
break;
default: gcc_unreachable ();
}
ftype = build_function_type_list (return_type, eltype, NULL);
break;
}
case NEON_FLOAT_NARROW:
{
tree eltype = NULL_TREE;
tree return_type = NULL_TREE;
switch (insn_data[d->code].operand[1].mode)
{
case V4SFmode:
eltype = V4SF_type_node;
return_type = V4HF_type_node;
break;
default: gcc_unreachable ();
}
ftype = build_function_type_list (return_type, eltype, NULL);
break;
}
default: default:
gcc_unreachable (); gcc_unreachable ();
} }
...@@ -21862,6 +21907,8 @@ arm_expand_neon_builtin (int fcode, tree exp, rtx target) ...@@ -21862,6 +21907,8 @@ arm_expand_neon_builtin (int fcode, tree exp, rtx target)
case NEON_DUP: case NEON_DUP:
case NEON_RINT: case NEON_RINT:
case NEON_SPLIT: case NEON_SPLIT:
case NEON_FLOAT_WIDEN:
case NEON_FLOAT_NARROW:
case NEON_REINTERP: case NEON_REINTERP:
return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode, return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
NEON_ARG_COPY_TO_REG, NEON_ARG_STOP); NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
...@@ -25517,7 +25564,7 @@ arm_vector_mode_supported_p (enum machine_mode mode) ...@@ -25517,7 +25564,7 @@ arm_vector_mode_supported_p (enum machine_mode mode)
{ {
/* Neon also supports V2SImode, etc. listed in the clause below. */ /* Neon also supports V2SImode, etc. listed in the clause below. */
if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
|| mode == V16QImode || mode == V4SFmode || mode == V2DImode)) || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
return true; return true;
if ((TARGET_NEON || TARGET_IWMMXT) if ((TARGET_NEON || TARGET_IWMMXT)
...@@ -26361,6 +26408,7 @@ static arm_mangle_map_entry arm_mangle_map[] = { ...@@ -26361,6 +26408,7 @@ static arm_mangle_map_entry arm_mangle_map[] = {
{ V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" }, { V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
{ V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" }, { V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
{ V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" }, { V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
{ V4HFmode, "__builtin_neon_hf", "18__simd64_float16_t" },
{ V2SImode, "__builtin_neon_si", "16__simd64_int32_t" }, { V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
{ V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" }, { V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
{ V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" }, { V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
......
...@@ -1052,7 +1052,7 @@ extern int prefer_neon_for_64bits; ...@@ -1052,7 +1052,7 @@ extern int prefer_neon_for_64bits;
/* Modes valid for Neon D registers. */ /* Modes valid for Neon D registers. */
#define VALID_NEON_DREG_MODE(MODE) \ #define VALID_NEON_DREG_MODE(MODE) \
((MODE) == V2SImode || (MODE) == V4HImode || (MODE) == V8QImode \ ((MODE) == V2SImode || (MODE) == V4HImode || (MODE) == V8QImode \
|| (MODE) == V2SFmode || (MODE) == DImode) || (MODE) == V4HFmode || (MODE) == V2SFmode || (MODE) == DImode)
/* Modes valid for Neon Q registers. */ /* Modes valid for Neon Q registers. */
#define VALID_NEON_QREG_MODE(MODE) \ #define VALID_NEON_QREG_MODE(MODE) \
......
...@@ -43,6 +43,7 @@ typedef __builtin_neon_hi int16x4_t __attribute__ ((__vector_size__ (8))); ...@@ -43,6 +43,7 @@ typedef __builtin_neon_hi int16x4_t __attribute__ ((__vector_size__ (8)));
typedef __builtin_neon_si int32x2_t __attribute__ ((__vector_size__ (8))); typedef __builtin_neon_si int32x2_t __attribute__ ((__vector_size__ (8)));
typedef __builtin_neon_di int64x1_t; typedef __builtin_neon_di int64x1_t;
typedef __builtin_neon_sf float32x2_t __attribute__ ((__vector_size__ (8))); typedef __builtin_neon_sf float32x2_t __attribute__ ((__vector_size__ (8)));
typedef __builtin_neon_hf float16x4_t __attribute__ ((__vector_size__ (8)));
typedef __builtin_neon_poly8 poly8x8_t __attribute__ ((__vector_size__ (8))); typedef __builtin_neon_poly8 poly8x8_t __attribute__ ((__vector_size__ (8)));
typedef __builtin_neon_poly16 poly16x4_t __attribute__ ((__vector_size__ (8))); typedef __builtin_neon_poly16 poly16x4_t __attribute__ ((__vector_size__ (8)));
typedef __builtin_neon_uqi uint8x8_t __attribute__ ((__vector_size__ (8))); typedef __builtin_neon_uqi uint8x8_t __attribute__ ((__vector_size__ (8)));
...@@ -6016,6 +6017,22 @@ vcvtq_u32_f32 (float32x4_t __a) ...@@ -6016,6 +6017,22 @@ vcvtq_u32_f32 (float32x4_t __a)
return (uint32x4_t)__builtin_neon_vcvtv4sf (__a, 0); return (uint32x4_t)__builtin_neon_vcvtv4sf (__a, 0);
} }
#if ((__ARM_FP & 0x2) != 0)
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vcvt_f16_f32 (float32x4_t __a)
{
return (float16x4_t)__builtin_neon_vcvtv4hfv4sf (__a);
}
#endif
#if ((__ARM_FP & 0x2) != 0)
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vcvt_f32_f16 (float16x4_t __a)
{
return (float32x4_t)__builtin_neon_vcvtv4sfv4hf (__a);
}
#endif
__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) __extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vcvt_n_s32_f32 (float32x2_t __a, const int __b) vcvt_n_s32_f32 (float32x2_t __a, const int __b)
{ {
......
...@@ -132,6 +132,8 @@ VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi), ...@@ -132,6 +132,8 @@ VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi),
VAR2 (UNOP, vrev16, v8qi, v16qi), VAR2 (UNOP, vrev16, v8qi, v16qi),
VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf), VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf),
VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf), VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf),
VAR1 (FLOAT_WIDEN, vcvtv4sf, v4hf),
VAR1 (FLOAT_NARROW, vcvtv4hf, v4sf),
VAR10 (SELECT, vbsl, VAR10 (SELECT, vbsl,
v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di), v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
VAR2 (RINT, vrintn, v2sf, v4sf), VAR2 (RINT, vrintn, v2sf, v4sf),
......
...@@ -121,6 +121,7 @@ let rec signed_ctype = function ...@@ -121,6 +121,7 @@ let rec signed_ctype = function
| T_uint16 | T_int16 -> T_intHI | T_uint16 | T_int16 -> T_intHI
| T_uint32 | T_int32 -> T_intSI | T_uint32 | T_int32 -> T_intSI
| T_uint64 | T_int64 -> T_intDI | T_uint64 | T_int64 -> T_intDI
| T_float16 -> T_floatHF
| T_float32 -> T_floatSF | T_float32 -> T_floatSF
| T_poly8 -> T_intQI | T_poly8 -> T_intQI
| T_poly16 -> T_intHI | T_poly16 -> T_intHI
...@@ -275,8 +276,8 @@ let rec mode_suffix elttype shape = ...@@ -275,8 +276,8 @@ let rec mode_suffix elttype shape =
let mode = mode_of_elt elttype shape in let mode = mode_of_elt elttype shape in
string_of_mode mode string_of_mode mode
with MixedMode (dst, src) -> with MixedMode (dst, src) ->
let dstmode = mode_of_elt dst shape let dstmode = mode_of_elt ~argpos:0 dst shape
and srcmode = mode_of_elt src shape in and srcmode = mode_of_elt ~argpos:1 src shape in
string_of_mode dstmode ^ string_of_mode srcmode string_of_mode dstmode ^ string_of_mode srcmode
let get_shuffle features = let get_shuffle features =
...@@ -291,19 +292,24 @@ let print_feature_test_start features = ...@@ -291,19 +292,24 @@ let print_feature_test_start features =
match List.find (fun feature -> match List.find (fun feature ->
match feature with Requires_feature _ -> true match feature with Requires_feature _ -> true
| Requires_arch _ -> true | Requires_arch _ -> true
| Requires_FP_bit _ -> true
| _ -> false) | _ -> false)
features with features with
Requires_feature feature -> Requires_feature feature ->
Format.printf "#ifdef __ARM_FEATURE_%s@\n" feature Format.printf "#ifdef __ARM_FEATURE_%s@\n" feature
| Requires_arch arch -> | Requires_arch arch ->
Format.printf "#if __ARM_ARCH >= %d@\n" arch Format.printf "#if __ARM_ARCH >= %d@\n" arch
| Requires_FP_bit bit ->
Format.printf "#if ((__ARM_FP & 0x%X) != 0)@\n"
(1 lsl bit)
| _ -> assert false | _ -> assert false
with Not_found -> assert true with Not_found -> assert true
let print_feature_test_end features = let print_feature_test_end features =
let feature = let feature =
List.exists (function Requires_feature x -> true List.exists (function Requires_feature _ -> true
| Requires_arch x -> true | Requires_arch _ -> true
| Requires_FP_bit _ -> true
| _ -> false) features in | _ -> false) features in
if feature then Format.printf "#endif@\n" if feature then Format.printf "#endif@\n"
...@@ -365,6 +371,7 @@ let deftypes () = ...@@ -365,6 +371,7 @@ let deftypes () =
"__builtin_neon_hi", "int", 16, 4; "__builtin_neon_hi", "int", 16, 4;
"__builtin_neon_si", "int", 32, 2; "__builtin_neon_si", "int", 32, 2;
"__builtin_neon_di", "int", 64, 1; "__builtin_neon_di", "int", 64, 1;
"__builtin_neon_hf", "float", 16, 4;
"__builtin_neon_sf", "float", 32, 2; "__builtin_neon_sf", "float", 32, 2;
"__builtin_neon_poly8", "poly", 8, 8; "__builtin_neon_poly8", "poly", 8, 8;
"__builtin_neon_poly16", "poly", 16, 4; "__builtin_neon_poly16", "poly", 16, 4;
......
...@@ -163,10 +163,12 @@ let effective_target features = ...@@ -163,10 +163,12 @@ let effective_target features =
match List.find (fun feature -> match List.find (fun feature ->
match feature with Requires_feature _ -> true match feature with Requires_feature _ -> true
| Requires_arch _ -> true | Requires_arch _ -> true
| Requires_FP_bit 1 -> true
| _ -> false) | _ -> false)
features with features with
Requires_feature "FMA" -> "arm_neonv2" Requires_feature "FMA" -> "arm_neonv2"
| Requires_arch 8 -> "arm_v8_neon" | Requires_arch 8 -> "arm_v8_neon"
| Requires_FP_bit 1 -> "arm_neon_fp16"
| _ -> assert false | _ -> assert false
with Not_found -> "arm_neon" with Not_found -> "arm_neon"
......
...@@ -3258,6 +3258,24 @@ ...@@ -3258,6 +3258,24 @@
(const_string "neon_fp_vadd_qqq_vabs_qq")))] (const_string "neon_fp_vadd_qqq_vabs_qq")))]
) )
(define_insn "neon_vcvtv4sfv4hf"
[(set (match_operand:V4SF 0 "s_register_operand" "=w")
(unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
UNSPEC_VCVT))]
"TARGET_NEON && TARGET_FP16"
"vcvt.f32.f16\t%q0, %P1"
[(set_attr "neon_type" "neon_fp_vadd_ddd_vabs_dd")]
)
(define_insn "neon_vcvtv4hfv4sf"
[(set (match_operand:V4HF 0 "s_register_operand" "=w")
(unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
UNSPEC_VCVT))]
"TARGET_NEON && TARGET_FP16"
"vcvt.f16.f32\t%P0, %q1"
[(set_attr "neon_type" "neon_fp_vadd_ddd_vabs_dd")]
)
(define_insn "neon_vcvt_n<mode>" (define_insn "neon_vcvt_n<mode>"
[(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w") [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
(unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w") (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
......
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
<http://www.gnu.org/licenses/>. *) <http://www.gnu.org/licenses/>. *)
(* Shorthand types for vector elements. *) (* Shorthand types for vector elements. *)
type elts = S8 | S16 | S32 | S64 | F32 | U8 | U16 | U32 | U64 | P8 | P16 type elts = S8 | S16 | S32 | S64 | F16 | F32 | U8 | U16 | U32 | U64 | P8 | P16
| I8 | I16 | I32 | I64 | B8 | B16 | B32 | B64 | Conv of elts * elts | I8 | I16 | I32 | I64 | B8 | B16 | B32 | B64 | Conv of elts * elts
| Cast of elts * elts | NoElts | Cast of elts * elts | NoElts
...@@ -37,6 +37,7 @@ type vectype = T_int8x8 | T_int8x16 ...@@ -37,6 +37,7 @@ type vectype = T_int8x8 | T_int8x16
| T_uint16x4 | T_uint16x8 | T_uint16x4 | T_uint16x8
| T_uint32x2 | T_uint32x4 | T_uint32x2 | T_uint32x4
| T_uint64x1 | T_uint64x2 | T_uint64x1 | T_uint64x2
| T_float16x4
| T_float32x2 | T_float32x4 | T_float32x2 | T_float32x4
| T_poly8x8 | T_poly8x16 | T_poly8x8 | T_poly8x16
| T_poly16x4 | T_poly16x8 | T_poly16x4 | T_poly16x8
...@@ -46,11 +47,13 @@ type vectype = T_int8x8 | T_int8x16 ...@@ -46,11 +47,13 @@ type vectype = T_int8x8 | T_int8x16
| T_uint8 | T_uint16 | T_uint8 | T_uint16
| T_uint32 | T_uint64 | T_uint32 | T_uint64
| T_poly8 | T_poly16 | T_poly8 | T_poly16
| T_float32 | T_arrayof of int * vectype | T_float16 | T_float32
| T_arrayof of int * vectype
| T_ptrto of vectype | T_const of vectype | T_ptrto of vectype | T_const of vectype
| T_void | T_intQI | T_void | T_intQI
| T_intHI | T_intSI | T_intHI | T_intSI
| T_intDI | T_floatSF | T_intDI | T_floatHF
| T_floatSF
(* The meanings of the following are: (* The meanings of the following are:
TImode : "Tetra", two registers (four words). TImode : "Tetra", two registers (four words).
...@@ -92,7 +95,7 @@ type arity = Arity0 of vectype ...@@ -92,7 +95,7 @@ type arity = Arity0 of vectype
| Arity3 of vectype * vectype * vectype * vectype | Arity3 of vectype * vectype * vectype * vectype
| Arity4 of vectype * vectype * vectype * vectype * vectype | Arity4 of vectype * vectype * vectype * vectype * vectype
type vecmode = V8QI | V4HI | V2SI | V2SF | DI type vecmode = V8QI | V4HI | V4HF |V2SI | V2SF | DI
| V16QI | V8HI | V4SI | V4SF | V2DI | V16QI | V8HI | V4SI | V4SF | V2DI
| QI | HI | SI | SF | QI | HI | SI | SF
...@@ -284,18 +287,22 @@ type features = ...@@ -284,18 +287,22 @@ type features =
| Fixed_core_reg | Fixed_core_reg
(* Mark that the intrinsic requires __ARM_FEATURE_string to be defined. *) (* Mark that the intrinsic requires __ARM_FEATURE_string to be defined. *)
| Requires_feature of string | Requires_feature of string
(* Mark that the intrinsic requires a particular architecture version. *)
| Requires_arch of int | Requires_arch of int
(* Mark that the intrinsic requires a particular bit in __ARM_FP to
be set. *)
| Requires_FP_bit of int
exception MixedMode of elts * elts exception MixedMode of elts * elts
let rec elt_width = function let rec elt_width = function
S8 | U8 | P8 | I8 | B8 -> 8 S8 | U8 | P8 | I8 | B8 -> 8
| S16 | U16 | P16 | I16 | B16 -> 16 | S16 | U16 | P16 | I16 | B16 | F16 -> 16
| S32 | F32 | U32 | I32 | B32 -> 32 | S32 | F32 | U32 | I32 | B32 -> 32
| S64 | U64 | I64 | B64 -> 64 | S64 | U64 | I64 | B64 -> 64
| Conv (a, b) -> | Conv (a, b) ->
let wa = elt_width a and wb = elt_width b in let wa = elt_width a and wb = elt_width b in
if wa = wb then wa else failwith "element width?" if wa = wb then wa else raise (MixedMode (a, b))
| Cast (a, b) -> raise (MixedMode (a, b)) | Cast (a, b) -> raise (MixedMode (a, b))
| NoElts -> failwith "No elts" | NoElts -> failwith "No elts"
...@@ -303,7 +310,7 @@ let rec elt_class = function ...@@ -303,7 +310,7 @@ let rec elt_class = function
S8 | S16 | S32 | S64 -> Signed S8 | S16 | S32 | S64 -> Signed
| U8 | U16 | U32 | U64 -> Unsigned | U8 | U16 | U32 | U64 -> Unsigned
| P8 | P16 -> Poly | P8 | P16 -> Poly
| F32 -> Float | F16 | F32 -> Float
| I8 | I16 | I32 | I64 -> Int | I8 | I16 | I32 | I64 -> Int
| B8 | B16 | B32 | B64 -> Bits | B8 | B16 | B32 | B64 -> Bits
| Conv (a, b) | Cast (a, b) -> ConvClass (elt_class a, elt_class b) | Conv (a, b) | Cast (a, b) -> ConvClass (elt_class a, elt_class b)
...@@ -315,6 +322,7 @@ let elt_of_class_width c w = ...@@ -315,6 +322,7 @@ let elt_of_class_width c w =
| Signed, 16 -> S16 | Signed, 16 -> S16
| Signed, 32 -> S32 | Signed, 32 -> S32
| Signed, 64 -> S64 | Signed, 64 -> S64
| Float, 16 -> F16
| Float, 32 -> F32 | Float, 32 -> F32
| Unsigned, 8 -> U8 | Unsigned, 8 -> U8
| Unsigned, 16 -> U16 | Unsigned, 16 -> U16
...@@ -384,7 +392,12 @@ let find_key_operand operands = ...@@ -384,7 +392,12 @@ let find_key_operand operands =
in in
scan ((Array.length operands) - 1) scan ((Array.length operands) - 1)
let rec mode_of_elt elt shape = (* Find a vecmode from a shape_elt ELT for an instruction with shape_form
SHAPE. For a Use_operands shape, if ARGPOS is passed then return the mode
for the given argument position, else determine which argument to return a
mode for automatically. *)
let rec mode_of_elt ?argpos elt shape =
let flt = match elt_class elt with let flt = match elt_class elt with
Float | ConvClass(_, Float) -> true | _ -> false in Float | ConvClass(_, Float) -> true | _ -> false in
let idx = let idx =
...@@ -394,7 +407,10 @@ let rec mode_of_elt elt shape = ...@@ -394,7 +407,10 @@ let rec mode_of_elt elt shape =
in match shape with in match shape with
All (_, Dreg) | By_scalar Dreg | Pair_result Dreg | Unary_scalar Dreg All (_, Dreg) | By_scalar Dreg | Pair_result Dreg | Unary_scalar Dreg
| Binary_imm Dreg | Long_noreg Dreg | Wide_noreg Dreg -> | Binary_imm Dreg | Long_noreg Dreg | Wide_noreg Dreg ->
[| V8QI; V4HI; if flt then V2SF else V2SI; DI |].(idx) if flt then
[| V8QI; V4HF; V2SF; DI |].(idx)
else
[| V8QI; V4HI; V2SI; DI |].(idx)
| All (_, Qreg) | By_scalar Qreg | Pair_result Qreg | Unary_scalar Qreg | All (_, Qreg) | By_scalar Qreg | Pair_result Qreg | Unary_scalar Qreg
| Binary_imm Qreg | Long_noreg Qreg | Wide_noreg Qreg -> | Binary_imm Qreg | Long_noreg Qreg | Wide_noreg Qreg ->
[| V16QI; V8HI; if flt then V4SF else V4SI; V2DI |].(idx) [| V16QI; V8HI; if flt then V4SF else V4SI; V2DI |].(idx)
...@@ -404,7 +420,11 @@ let rec mode_of_elt elt shape = ...@@ -404,7 +420,11 @@ let rec mode_of_elt elt shape =
| Long_imm -> | Long_imm ->
[| V8QI; V4HI; V2SI; DI |].(idx) [| V8QI; V4HI; V2SI; DI |].(idx)
| Narrow | Narrow_imm -> [| V16QI; V8HI; V4SI; V2DI |].(idx) | Narrow | Narrow_imm -> [| V16QI; V8HI; V4SI; V2DI |].(idx)
| Use_operands ops -> mode_of_elt elt (All (0, (find_key_operand ops))) | Use_operands ops ->
begin match argpos with
None -> mode_of_elt ?argpos elt (All (0, (find_key_operand ops)))
| Some pos -> mode_of_elt ?argpos elt (All (0, ops.(pos)))
end
| _ -> failwith "invalid shape" | _ -> failwith "invalid shape"
(* Modify an element type dependent on the shape of the instruction and the (* Modify an element type dependent on the shape of the instruction and the
...@@ -454,10 +474,11 @@ let type_for_elt shape elt no = ...@@ -454,10 +474,11 @@ let type_for_elt shape elt no =
| U16 -> T_uint16x4 | U16 -> T_uint16x4
| U32 -> T_uint32x2 | U32 -> T_uint32x2
| U64 -> T_uint64x1 | U64 -> T_uint64x1
| F16 -> T_float16x4
| F32 -> T_float32x2 | F32 -> T_float32x2
| P8 -> T_poly8x8 | P8 -> T_poly8x8
| P16 -> T_poly16x4 | P16 -> T_poly16x4
| _ -> failwith "Bad elt type" | _ -> failwith "Bad elt type for Dreg"
end end
| Qreg -> | Qreg ->
begin match elt with begin match elt with
...@@ -472,7 +493,7 @@ let type_for_elt shape elt no = ...@@ -472,7 +493,7 @@ let type_for_elt shape elt no =
| F32 -> T_float32x4 | F32 -> T_float32x4
| P8 -> T_poly8x16 | P8 -> T_poly8x16
| P16 -> T_poly16x8 | P16 -> T_poly16x8
| _ -> failwith "Bad elt type" | _ -> failwith "Bad elt type for Qreg"
end end
| Corereg -> | Corereg ->
begin match elt with begin match elt with
...@@ -487,7 +508,7 @@ let type_for_elt shape elt no = ...@@ -487,7 +508,7 @@ let type_for_elt shape elt no =
| P8 -> T_poly8 | P8 -> T_poly8
| P16 -> T_poly16 | P16 -> T_poly16
| F32 -> T_float32 | F32 -> T_float32
| _ -> failwith "Bad elt type" | _ -> failwith "Bad elt type for Corereg"
end end
| Immed -> | Immed ->
T_immediate (0, 0) T_immediate (0, 0)
...@@ -506,7 +527,7 @@ let type_for_elt shape elt no = ...@@ -506,7 +527,7 @@ let type_for_elt shape elt no =
let vectype_size = function let vectype_size = function
T_int8x8 | T_int16x4 | T_int32x2 | T_int64x1 T_int8x8 | T_int16x4 | T_int32x2 | T_int64x1
| T_uint8x8 | T_uint16x4 | T_uint32x2 | T_uint64x1 | T_uint8x8 | T_uint16x4 | T_uint32x2 | T_uint64x1
| T_float32x2 | T_poly8x8 | T_poly16x4 -> 64 | T_float32x2 | T_poly8x8 | T_poly16x4 | T_float16x4 -> 64
| T_int8x16 | T_int16x8 | T_int32x4 | T_int64x2 | T_int8x16 | T_int16x8 | T_int32x4 | T_int64x2
| T_uint8x16 | T_uint16x8 | T_uint32x4 | T_uint64x2 | T_uint8x16 | T_uint16x8 | T_uint32x4 | T_uint64x2
| T_float32x4 | T_poly8x16 | T_poly16x8 -> 128 | T_float32x4 | T_poly8x16 | T_poly16x8 -> 128
...@@ -1217,6 +1238,10 @@ let ops = ...@@ -1217,6 +1238,10 @@ let ops =
[Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)]; [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
Vcvt, [InfoWord], All (2, Qreg), "vcvtQ", conv_1, Vcvt, [InfoWord], All (2, Qreg), "vcvtQ", conv_1,
[Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)]; [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
Vcvt, [Builtin_name "vcvt" ; Requires_FP_bit 1],
Use_operands [| Dreg; Qreg; |], "vcvt", conv_1, [Conv (F16, F32)];
Vcvt, [Builtin_name "vcvt" ; Requires_FP_bit 1],
Use_operands [| Qreg; Dreg; |], "vcvt", conv_1, [Conv (F32, F16)];
Vcvt_n, [InfoWord], Use_operands [| Dreg; Dreg; Immed |], "vcvt_n", conv_2, Vcvt_n, [InfoWord], Use_operands [| Dreg; Dreg; Immed |], "vcvt_n", conv_2,
[Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)]; [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
Vcvt_n, [InfoWord], Use_operands [| Qreg; Qreg; Immed |], "vcvtQ_n", conv_2, Vcvt_n, [InfoWord], Use_operands [| Qreg; Qreg; Immed |], "vcvtQ_n", conv_2,
...@@ -1782,7 +1807,7 @@ let rec string_of_elt = function ...@@ -1782,7 +1807,7 @@ let rec string_of_elt = function
| U8 -> "u8" | U16 -> "u16" | U32 -> "u32" | U64 -> "u64" | U8 -> "u8" | U16 -> "u16" | U32 -> "u32" | U64 -> "u64"
| I8 -> "i8" | I16 -> "i16" | I32 -> "i32" | I64 -> "i64" | I8 -> "i8" | I16 -> "i16" | I32 -> "i32" | I64 -> "i64"
| B8 -> "8" | B16 -> "16" | B32 -> "32" | B64 -> "64" | B8 -> "8" | B16 -> "16" | B32 -> "32" | B64 -> "64"
| F32 -> "f32" | P8 -> "p8" | P16 -> "p16" | F16 -> "f16" | F32 -> "f32" | P8 -> "p8" | P16 -> "p16"
| Conv (a, b) | Cast (a, b) -> string_of_elt a ^ "_" ^ string_of_elt b | Conv (a, b) | Cast (a, b) -> string_of_elt a ^ "_" ^ string_of_elt b
| NoElts -> failwith "No elts" | NoElts -> failwith "No elts"
...@@ -1809,6 +1834,7 @@ let string_of_vectype vt = ...@@ -1809,6 +1834,7 @@ let string_of_vectype vt =
| T_uint32x4 -> affix "uint32x4" | T_uint32x4 -> affix "uint32x4"
| T_uint64x1 -> affix "uint64x1" | T_uint64x1 -> affix "uint64x1"
| T_uint64x2 -> affix "uint64x2" | T_uint64x2 -> affix "uint64x2"
| T_float16x4 -> affix "float16x4"
| T_float32x2 -> affix "float32x2" | T_float32x2 -> affix "float32x2"
| T_float32x4 -> affix "float32x4" | T_float32x4 -> affix "float32x4"
| T_poly8x8 -> affix "poly8x8" | T_poly8x8 -> affix "poly8x8"
...@@ -1825,6 +1851,7 @@ let string_of_vectype vt = ...@@ -1825,6 +1851,7 @@ let string_of_vectype vt =
| T_uint64 -> affix "uint64" | T_uint64 -> affix "uint64"
| T_poly8 -> affix "poly8" | T_poly8 -> affix "poly8"
| T_poly16 -> affix "poly16" | T_poly16 -> affix "poly16"
| T_float16 -> affix "float16"
| T_float32 -> affix "float32" | T_float32 -> affix "float32"
| T_immediate _ -> "const int" | T_immediate _ -> "const int"
| T_void -> "void" | T_void -> "void"
...@@ -1832,6 +1859,7 @@ let string_of_vectype vt = ...@@ -1832,6 +1859,7 @@ let string_of_vectype vt =
| T_intHI -> "__builtin_neon_hi" | T_intHI -> "__builtin_neon_hi"
| T_intSI -> "__builtin_neon_si" | T_intSI -> "__builtin_neon_si"
| T_intDI -> "__builtin_neon_di" | T_intDI -> "__builtin_neon_di"
| T_floatHF -> "__builtin_neon_hf"
| T_floatSF -> "__builtin_neon_sf" | T_floatSF -> "__builtin_neon_sf"
| T_arrayof (num, base) -> | T_arrayof (num, base) ->
let basename = name (fun x -> x) base in let basename = name (fun x -> x) base in
...@@ -1853,10 +1881,10 @@ let string_of_inttype = function ...@@ -1853,10 +1881,10 @@ let string_of_inttype = function
| B_XImode -> "__builtin_neon_xi" | B_XImode -> "__builtin_neon_xi"
let string_of_mode = function let string_of_mode = function
V8QI -> "v8qi" | V4HI -> "v4hi" | V2SI -> "v2si" | V2SF -> "v2sf" V8QI -> "v8qi" | V4HI -> "v4hi" | V4HF -> "v4hf" | V2SI -> "v2si"
| DI -> "di" | V16QI -> "v16qi" | V8HI -> "v8hi" | V4SI -> "v4si" | V2SF -> "v2sf" | DI -> "di" | V16QI -> "v16qi" | V8HI -> "v8hi"
| V4SF -> "v4sf" | V2DI -> "v2di" | QI -> "qi" | HI -> "hi" | SI -> "si" | V4SI -> "v4si" | V4SF -> "v4sf" | V2DI -> "v2di" | QI -> "qi"
| SF -> "sf" | HI -> "hi" | SI -> "si" | SF -> "sf"
(* Use uppercase chars for letters which form part of the intrinsic name, but (* Use uppercase chars for letters which form part of the intrinsic name, but
should be omitted from the builtin name (the info is passed in an extra should be omitted from the builtin name (the info is passed in an extra
......
...@@ -5748,6 +5748,18 @@ ...@@ -5748,6 +5748,18 @@
@itemize @bullet @itemize @bullet
@item float16x4_t vcvt_f16_f32 (float32x4_t)
@*@emph{Form of expected instruction(s):} @code{vcvt.f16.f32 @var{d0}, @var{q0}}
@end itemize
@itemize @bullet
@item float32x4_t vcvt_f32_f16 (float16x4_t)
@*@emph{Form of expected instruction(s):} @code{vcvt.f32.f16 @var{q0}, @var{d0}}
@end itemize
@itemize @bullet
@item float32x2_t vcvt_n_f32_u32 (uint32x2_t, const int) @item float32x2_t vcvt_n_f32_u32 (uint32x2_t, const int)
@*@emph{Form of expected instruction(s):} @code{vcvt.f32.u32 @var{d0}, @var{d0}, #@var{0}} @*@emph{Form of expected instruction(s):} @code{vcvt.f32.u32 @var{d0}, @var{d0}, #@var{0}}
@end itemize @end itemize
......
2013-04-25 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* lib/target-supports.exp
(check_effective_target_arm_neon_fp16_ok_nocache): New procedure.
(check_effective_target_arm_neon_fp16_ok): Likewise.
(add_options_for_arm_neon_fp16): Likewise.
* gcc.target/arm/neon/vcvtf16_f32.c: New test. Generated.
* gcc.target/arm/neon/vcvtf32_f16.c: Likewise.
2013-04-24 Vladimir Makarov <vmakarov@redhat.com> 2013-04-24 Vladimir Makarov <vmakarov@redhat.com>
PR rtl-optimizations/57046 PR rtl-optimizations/57046
......
/* Test the `vcvtf16_f32' ARM Neon intrinsic. */
/* This file was autogenerated by neon-testgen. */
/* { dg-do assemble } */
/* { dg-require-effective-target arm_neon_fp16_ok } */
/* { dg-options "-save-temps -O0" } */
/* { dg-add-options arm_neon_fp16 } */
#include "arm_neon.h"
void test_vcvtf16_f32 (void)
{
float16x4_t out_float16x4_t;
float32x4_t arg0_float32x4_t;
out_float16x4_t = vcvt_f16_f32 (arg0_float32x4_t);
}
/* { dg-final { scan-assembler "vcvt\.f16.f32\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */
/* Test the `vcvtf32_f16' ARM Neon intrinsic. */
/* This file was autogenerated by neon-testgen. */
/* { dg-do assemble } */
/* { dg-require-effective-target arm_neon_fp16_ok } */
/* { dg-options "-save-temps -O0" } */
/* { dg-add-options arm_neon_fp16 } */
#include "arm_neon.h"
void test_vcvtf32_f16 (void)
{
float32x4_t out_float32x4_t;
float16x4_t arg0_float16x4_t;
out_float32x4_t = vcvt_f32_f16 (arg0_float16x4_t);
}
/* { dg-final { scan-assembler "vcvt\.f32.f16\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */
...@@ -2255,6 +2255,47 @@ proc check_effective_target_arm_neon_ok { } { ...@@ -2255,6 +2255,47 @@ proc check_effective_target_arm_neon_ok { } {
check_effective_target_arm_neon_ok_nocache] check_effective_target_arm_neon_ok_nocache]
} }
# Return 1 if this is an ARM target supporting -mfpu=neon-fp16
# -mfloat-abi=softfp or equivalent options. Some multilibs may be
# incompatible with these options. Also set et_arm_neon_flags to the
# best options to add.
proc check_effective_target_arm_neon_fp16_ok_nocache { } {
global et_arm_neon_fp16_flags
set et_arm_neon_fp16_flags ""
if { [check_effective_target_arm32] } {
foreach flags {"" "-mfloat-abi=softfp" "-mfpu=neon-fp16"
"-mfpu=neon-fp16 -mfloat-abi=softfp"} {
if { [check_no_compiler_messages_nocache arm_neon_fp_16_ok object {
#include "arm_neon.h"
float16x4_t
foo (float32x4_t arg)
{
return vcvt_f16_f32 (arg);
}
} "$flags"] } {
set et_arm_neon_fp16_flags $flags
return 1
}
}
}
return 0
}
proc check_effective_target_arm_neon_fp16_ok { } {
return [check_cached_effective_target arm_neon_fp16_ok \
check_effective_target_arm_neon_fp16_ok_nocache]
}
proc add_options_for_arm_neon_fp16 { flags } {
if { ! [check_effective_target_arm_neon_fp16_ok] } {
return "$flags"
}
global et_arm_neon_fp16_flags
return "$flags $et_arm_neon_fp16_flags"
}
# Return 1 if this is an ARM target supporting -mfpu=neon-fp-armv8 # Return 1 if this is an ARM target supporting -mfpu=neon-fp-armv8
# -mfloat-abi=softfp or equivalent options. Some multilibs may be # -mfloat-abi=softfp or equivalent options. Some multilibs may be
# incompatible with these options. Also set et_arm_v8_neon_flags to the # incompatible with these options. Also set et_arm_v8_neon_flags to the
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment