Commit 5819f96f by Kyrylo Tkachov Committed by Kyrylo Tkachov

arm.c (neon_builtin_type_mode): Add T_V4HF.

2013-04-25  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
            Julian Brown  <julian@codesourcery.com>

	* config/arm/arm.c (neon_builtin_type_mode): Add T_V4HF.
	(TB_DREG): Add T_V4HF.
	(v4hf_UP): New macro.
	(neon_itype): Add NEON_FLOAT_WIDEN, NEON_FLOAT_NARROW.
	(arm_init_neon_builtins): Handle NEON_FLOAT_WIDEN,
	NEON_FLOAT_NARROW.
	Handle initialisation of V4HF. Adjust initialisation of reinterpret
	built-ins.
	(arm_expand_neon_builtin): Handle NEON_FLOAT_WIDEN,
	NEON_FLOAT_NARROW.
	(arm_vector_mode_supported_p): Handle V4HF.
	(arm_mangle_map): Handle V4HFmode.
	* config/arm/arm.h (VALID_NEON_DREG_MODE): Add V4HF.
	* config/arm/arm_neon_builtins.def: Add entries for
	vcvtv4hfv4sf, vcvtv4sfv4hf.
	* config/arm/neon.md (neon_vcvtv4sfv4hf): New pattern.
	(neon_vcvtv4hfv4sf): Likewise.
	* config/arm/neon-gen.ml: Handle half-precision floating point
	features.
	* config/arm/neon-testgen.ml: Handle Requires_FP_bit feature.
	* config/arm/arm_neon.h: Regenerate.
	* config/arm/neon.ml (type elts): Add F16.
	(type vectype): Add T_float16x4, T_floatHF.
	(type vecmode): Add V4HF.
	(type features): Add Requires_FP_bit feature.
	(elt_width): Handle F16.
	(elt_class): Likewise.
	(elt_of_class_width): Likewise.
	(mode_of_elt): Refactor.
	(type_for_elt): Handle F16, fix error messages.
	(vectype_size): Handle T_float16x4.
	(vcvt_sh): New function.
	(ops): Add entries for vcvt_f16_f32, vcvt_f32_f16.
	(string_of_vectype): Handle T_floatHF, T_float16, T_float16x4.
	(string_of_mode): Handle V4HF.
	* doc/arm-neon-intrinsics.texi: Regenerate.

2013-04-25  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>

	* lib/target-supports.exp
	(check_effective_target_arm_neon_fp16_ok_nocache): New procedure.
	(check_effective_target_arm_neon_fp16_ok): Likewise.
	(add_options_for_arm_neon_fp16): Likewise.
	* gcc.target/arm/neon/vcvtf16_f32.c: New test. Generated.
	* gcc.target/arm/neon/vcvtf32_f16.c: Likewise.

Co-Authored-By: Julian Brown <julian@codesourcery.com>

From-SVN: r198298
parent fd8b8c01
2013-04-25 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Julian Brown <julian@codesourcery.com>
* config/arm/arm.c (neon_builtin_type_mode): Add T_V4HF.
(TB_DREG): Add T_V4HF.
(v4hf_UP): New macro.
(neon_itype): Add NEON_FLOAT_WIDEN, NEON_FLOAT_NARROW.
(arm_init_neon_builtins): Handle NEON_FLOAT_WIDEN,
NEON_FLOAT_NARROW.
Handle initialisation of V4HF. Adjust initialisation of reinterpret
built-ins.
(arm_expand_neon_builtin): Handle NEON_FLOAT_WIDEN,
NEON_FLOAT_NARROW.
(arm_vector_mode_supported_p): Handle V4HF.
(arm_mangle_map): Handle V4HFmode.
* config/arm/arm.h (VALID_NEON_DREG_MODE): Add V4HF.
* config/arm/arm_neon_builtins.def: Add entries for
vcvtv4hfv4sf, vcvtv4sfv4hf.
* config/arm/neon.md (neon_vcvtv4sfv4hf): New pattern.
(neon_vcvtv4hfv4sf): Likewise.
* config/arm/neon-gen.ml: Handle half-precision floating point
features.
* config/arm/neon-testgen.ml: Handle Requires_FP_bit feature.
* config/arm/arm_neon.h: Regenerate.
* config/arm/neon.ml (type elts): Add F16.
(type vectype): Add T_float16x4, T_floatHF.
(type vecmode): Add V4HF.
(type features): Add Requires_FP_bit feature.
(elt_width): Handle F16.
(elt_class): Likewise.
(elt_of_class_width): Likewise.
(mode_of_elt): Refactor.
(type_for_elt): Handle F16, fix error messages.
(vectype_size): Handle T_float16x4.
(vcvt_sh): New function.
(ops): Add entries for vcvt_f16_f32, vcvt_f32_f16.
(string_of_vectype): Handle T_floatHF, T_float16, T_float16x4.
(string_of_mode): Handle V4HF.
* doc/arm-neon-intrinsics.texi: Regenerate.
2013-04-25 James Greenhalgh <james.greenhalgh@arm.com>
* config/aarch64/aarch64.c (aarch64_print_operand): Fix asm_fprintf
......
......@@ -19940,6 +19940,7 @@ arm_debugger_arg_offset (int value, rtx addr)
typedef enum {
T_V8QI,
T_V4HI,
T_V4HF,
T_V2SI,
T_V2SF,
T_DI,
......@@ -19957,14 +19958,15 @@ typedef enum {
#define TYPE_MODE_BIT(X) (1 << (X))
#define TB_DREG (TYPE_MODE_BIT (T_V8QI) | TYPE_MODE_BIT (T_V4HI) \
| TYPE_MODE_BIT (T_V2SI) | TYPE_MODE_BIT (T_V2SF) \
| TYPE_MODE_BIT (T_DI))
| TYPE_MODE_BIT (T_V4HF) | TYPE_MODE_BIT (T_V2SI) \
| TYPE_MODE_BIT (T_V2SF) | TYPE_MODE_BIT (T_DI))
#define TB_QREG (TYPE_MODE_BIT (T_V16QI) | TYPE_MODE_BIT (T_V8HI) \
| TYPE_MODE_BIT (T_V4SI) | TYPE_MODE_BIT (T_V4SF) \
| TYPE_MODE_BIT (T_V2DI) | TYPE_MODE_BIT (T_TI))
#define v8qi_UP T_V8QI
#define v4hi_UP T_V4HI
#define v4hf_UP T_V4HF
#define v2si_UP T_V2SI
#define v2sf_UP T_V2SF
#define di_UP T_DI
......@@ -20000,6 +20002,8 @@ typedef enum {
NEON_SCALARMULH,
NEON_SCALARMAC,
NEON_CONVERT,
NEON_FLOAT_WIDEN,
NEON_FLOAT_NARROW,
NEON_FIXCONV,
NEON_SELECT,
NEON_RESULTPAIR,
......@@ -20393,6 +20397,7 @@ arm_init_neon_builtins (void)
tree neon_intQI_type_node;
tree neon_intHI_type_node;
tree neon_floatHF_type_node;
tree neon_polyQI_type_node;
tree neon_polyHI_type_node;
tree neon_intSI_type_node;
......@@ -20419,6 +20424,7 @@ arm_init_neon_builtins (void)
tree V8QI_type_node;
tree V4HI_type_node;
tree V4HF_type_node;
tree V2SI_type_node;
tree V2SF_type_node;
tree V16QI_type_node;
......@@ -20473,6 +20479,9 @@ arm_init_neon_builtins (void)
neon_float_type_node = make_node (REAL_TYPE);
TYPE_PRECISION (neon_float_type_node) = FLOAT_TYPE_SIZE;
layout_type (neon_float_type_node);
neon_floatHF_type_node = make_node (REAL_TYPE);
TYPE_PRECISION (neon_floatHF_type_node) = GET_MODE_PRECISION (HFmode);
layout_type (neon_floatHF_type_node);
/* Define typedefs which exactly correspond to the modes we are basing vector
types on. If you change these names you'll need to change
......@@ -20481,6 +20490,8 @@ arm_init_neon_builtins (void)
"__builtin_neon_qi");
(*lang_hooks.types.register_builtin_type) (neon_intHI_type_node,
"__builtin_neon_hi");
(*lang_hooks.types.register_builtin_type) (neon_floatHF_type_node,
"__builtin_neon_hf");
(*lang_hooks.types.register_builtin_type) (neon_intSI_type_node,
"__builtin_neon_si");
(*lang_hooks.types.register_builtin_type) (neon_float_type_node,
......@@ -20522,6 +20533,8 @@ arm_init_neon_builtins (void)
build_vector_type_for_mode (neon_intQI_type_node, V8QImode);
V4HI_type_node =
build_vector_type_for_mode (neon_intHI_type_node, V4HImode);
V4HF_type_node =
build_vector_type_for_mode (neon_floatHF_type_node, V4HFmode);
V2SI_type_node =
build_vector_type_for_mode (neon_intSI_type_node, V2SImode);
V2SF_type_node =
......@@ -20644,7 +20657,7 @@ arm_init_neon_builtins (void)
neon_builtin_datum *d = &neon_builtin_data[i];
const char* const modenames[] = {
"v8qi", "v4hi", "v2si", "v2sf", "di",
"v8qi", "v4hi", "v4hf", "v2si", "v2sf", "di",
"v16qi", "v8hi", "v4si", "v4sf", "v2di",
"ti", "ei", "oi"
};
......@@ -20847,8 +20860,9 @@ arm_init_neon_builtins (void)
case NEON_REINTERP:
{
/* We iterate over 5 doubleword types, then 5 quadword
types. */
int rhs = d->mode % 5;
types. V4HF is not a type used in reinterpret, so we translate
d->mode to the correct index in reinterp_ftype_dreg. */
int rhs = (d->mode - ((d->mode > T_V4HF) ? 1 : 0)) % 5;
switch (insn_data[d->code].operand[0].mode)
{
case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
......@@ -20865,7 +20879,38 @@ arm_init_neon_builtins (void)
}
}
break;
case NEON_FLOAT_WIDEN:
{
tree eltype = NULL_TREE;
tree return_type = NULL_TREE;
switch (insn_data[d->code].operand[1].mode)
{
case V4HFmode:
eltype = V4HF_type_node;
return_type = V4SF_type_node;
break;
default: gcc_unreachable ();
}
ftype = build_function_type_list (return_type, eltype, NULL);
break;
}
case NEON_FLOAT_NARROW:
{
tree eltype = NULL_TREE;
tree return_type = NULL_TREE;
switch (insn_data[d->code].operand[1].mode)
{
case V4SFmode:
eltype = V4SF_type_node;
return_type = V4HF_type_node;
break;
default: gcc_unreachable ();
}
ftype = build_function_type_list (return_type, eltype, NULL);
break;
}
default:
gcc_unreachable ();
}
......@@ -21862,6 +21907,8 @@ arm_expand_neon_builtin (int fcode, tree exp, rtx target)
case NEON_DUP:
case NEON_RINT:
case NEON_SPLIT:
case NEON_FLOAT_WIDEN:
case NEON_FLOAT_NARROW:
case NEON_REINTERP:
return arm_expand_neon_args (target, icode, 1, type_mode, exp, fcode,
NEON_ARG_COPY_TO_REG, NEON_ARG_STOP);
......@@ -25517,7 +25564,7 @@ arm_vector_mode_supported_p (enum machine_mode mode)
{
/* Neon also supports V2SImode, etc. listed in the clause below. */
if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
|| mode == V16QImode || mode == V4SFmode || mode == V2DImode))
|| mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
return true;
if ((TARGET_NEON || TARGET_IWMMXT)
......@@ -26361,6 +26408,7 @@ static arm_mangle_map_entry arm_mangle_map[] = {
{ V8QImode, "__builtin_neon_uqi", "16__simd64_uint8_t" },
{ V4HImode, "__builtin_neon_hi", "16__simd64_int16_t" },
{ V4HImode, "__builtin_neon_uhi", "17__simd64_uint16_t" },
{ V4HFmode, "__builtin_neon_hf", "18__simd64_float16_t" },
{ V2SImode, "__builtin_neon_si", "16__simd64_int32_t" },
{ V2SImode, "__builtin_neon_usi", "17__simd64_uint32_t" },
{ V2SFmode, "__builtin_neon_sf", "18__simd64_float32_t" },
......
......@@ -1052,7 +1052,7 @@ extern int prefer_neon_for_64bits;
/* Modes valid for Neon D registers. */
#define VALID_NEON_DREG_MODE(MODE) \
((MODE) == V2SImode || (MODE) == V4HImode || (MODE) == V8QImode \
|| (MODE) == V2SFmode || (MODE) == DImode)
|| (MODE) == V4HFmode || (MODE) == V2SFmode || (MODE) == DImode)
/* Modes valid for Neon Q registers. */
#define VALID_NEON_QREG_MODE(MODE) \
......
......@@ -43,6 +43,7 @@ typedef __builtin_neon_hi int16x4_t __attribute__ ((__vector_size__ (8)));
typedef __builtin_neon_si int32x2_t __attribute__ ((__vector_size__ (8)));
typedef __builtin_neon_di int64x1_t;
typedef __builtin_neon_sf float32x2_t __attribute__ ((__vector_size__ (8)));
typedef __builtin_neon_hf float16x4_t __attribute__ ((__vector_size__ (8)));
typedef __builtin_neon_poly8 poly8x8_t __attribute__ ((__vector_size__ (8)));
typedef __builtin_neon_poly16 poly16x4_t __attribute__ ((__vector_size__ (8)));
typedef __builtin_neon_uqi uint8x8_t __attribute__ ((__vector_size__ (8)));
......@@ -6016,6 +6017,22 @@ vcvtq_u32_f32 (float32x4_t __a)
return (uint32x4_t)__builtin_neon_vcvtv4sf (__a, 0);
}
#if ((__ARM_FP & 0x2) != 0)
__extension__ static __inline float16x4_t __attribute__ ((__always_inline__))
vcvt_f16_f32 (float32x4_t __a)
{
return (float16x4_t)__builtin_neon_vcvtv4hfv4sf (__a);
}
#endif
#if ((__ARM_FP & 0x2) != 0)
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vcvt_f32_f16 (float16x4_t __a)
{
return (float32x4_t)__builtin_neon_vcvtv4sfv4hf (__a);
}
#endif
__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
vcvt_n_s32_f32 (float32x2_t __a, const int __b)
{
......
......@@ -132,6 +132,8 @@ VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi),
VAR2 (UNOP, vrev16, v8qi, v16qi),
VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf),
VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf),
VAR1 (FLOAT_WIDEN, vcvtv4sf, v4hf),
VAR1 (FLOAT_NARROW, vcvtv4hf, v4sf),
VAR10 (SELECT, vbsl,
v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
VAR2 (RINT, vrintn, v2sf, v4sf),
......
......@@ -121,6 +121,7 @@ let rec signed_ctype = function
| T_uint16 | T_int16 -> T_intHI
| T_uint32 | T_int32 -> T_intSI
| T_uint64 | T_int64 -> T_intDI
| T_float16 -> T_floatHF
| T_float32 -> T_floatSF
| T_poly8 -> T_intQI
| T_poly16 -> T_intHI
......@@ -275,8 +276,8 @@ let rec mode_suffix elttype shape =
let mode = mode_of_elt elttype shape in
string_of_mode mode
with MixedMode (dst, src) ->
let dstmode = mode_of_elt dst shape
and srcmode = mode_of_elt src shape in
let dstmode = mode_of_elt ~argpos:0 dst shape
and srcmode = mode_of_elt ~argpos:1 src shape in
string_of_mode dstmode ^ string_of_mode srcmode
let get_shuffle features =
......@@ -291,19 +292,24 @@ let print_feature_test_start features =
match List.find (fun feature ->
match feature with Requires_feature _ -> true
| Requires_arch _ -> true
| Requires_FP_bit _ -> true
| _ -> false)
features with
Requires_feature feature ->
Requires_feature feature ->
Format.printf "#ifdef __ARM_FEATURE_%s@\n" feature
| Requires_arch arch ->
Format.printf "#if __ARM_ARCH >= %d@\n" arch
| Requires_FP_bit bit ->
Format.printf "#if ((__ARM_FP & 0x%X) != 0)@\n"
(1 lsl bit)
| _ -> assert false
with Not_found -> assert true
let print_feature_test_end features =
let feature =
List.exists (function Requires_feature x -> true
| Requires_arch x -> true
List.exists (function Requires_feature _ -> true
| Requires_arch _ -> true
| Requires_FP_bit _ -> true
| _ -> false) features in
if feature then Format.printf "#endif@\n"
......@@ -365,6 +371,7 @@ let deftypes () =
"__builtin_neon_hi", "int", 16, 4;
"__builtin_neon_si", "int", 32, 2;
"__builtin_neon_di", "int", 64, 1;
"__builtin_neon_hf", "float", 16, 4;
"__builtin_neon_sf", "float", 32, 2;
"__builtin_neon_poly8", "poly", 8, 8;
"__builtin_neon_poly16", "poly", 16, 4;
......
......@@ -163,10 +163,12 @@ let effective_target features =
match List.find (fun feature ->
match feature with Requires_feature _ -> true
| Requires_arch _ -> true
| Requires_FP_bit 1 -> true
| _ -> false)
features with
Requires_feature "FMA" -> "arm_neonv2"
| Requires_arch 8 -> "arm_v8_neon"
| Requires_FP_bit 1 -> "arm_neon_fp16"
| _ -> assert false
with Not_found -> "arm_neon"
......
......@@ -3258,6 +3258,24 @@
(const_string "neon_fp_vadd_qqq_vabs_qq")))]
)
(define_insn "neon_vcvtv4sfv4hf"
[(set (match_operand:V4SF 0 "s_register_operand" "=w")
(unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
UNSPEC_VCVT))]
"TARGET_NEON && TARGET_FP16"
"vcvt.f32.f16\t%q0, %P1"
[(set_attr "neon_type" "neon_fp_vadd_ddd_vabs_dd")]
)
(define_insn "neon_vcvtv4hfv4sf"
[(set (match_operand:V4HF 0 "s_register_operand" "=w")
(unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
UNSPEC_VCVT))]
"TARGET_NEON && TARGET_FP16"
"vcvt.f16.f32\t%P0, %q1"
[(set_attr "neon_type" "neon_fp_vadd_ddd_vabs_dd")]
)
(define_insn "neon_vcvt_n<mode>"
[(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
(unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
......
......@@ -21,7 +21,7 @@
<http://www.gnu.org/licenses/>. *)
(* Shorthand types for vector elements. *)
type elts = S8 | S16 | S32 | S64 | F32 | U8 | U16 | U32 | U64 | P8 | P16
type elts = S8 | S16 | S32 | S64 | F16 | F32 | U8 | U16 | U32 | U64 | P8 | P16
| I8 | I16 | I32 | I64 | B8 | B16 | B32 | B64 | Conv of elts * elts
| Cast of elts * elts | NoElts
......@@ -37,6 +37,7 @@ type vectype = T_int8x8 | T_int8x16
| T_uint16x4 | T_uint16x8
| T_uint32x2 | T_uint32x4
| T_uint64x1 | T_uint64x2
| T_float16x4
| T_float32x2 | T_float32x4
| T_poly8x8 | T_poly8x16
| T_poly16x4 | T_poly16x8
......@@ -46,11 +47,13 @@ type vectype = T_int8x8 | T_int8x16
| T_uint8 | T_uint16
| T_uint32 | T_uint64
| T_poly8 | T_poly16
| T_float32 | T_arrayof of int * vectype
| T_float16 | T_float32
| T_arrayof of int * vectype
| T_ptrto of vectype | T_const of vectype
| T_void | T_intQI
| T_intHI | T_intSI
| T_intDI | T_floatSF
| T_intDI | T_floatHF
| T_floatSF
(* The meanings of the following are:
TImode : "Tetra", two registers (four words).
......@@ -92,7 +95,7 @@ type arity = Arity0 of vectype
| Arity3 of vectype * vectype * vectype * vectype
| Arity4 of vectype * vectype * vectype * vectype * vectype
type vecmode = V8QI | V4HI | V2SI | V2SF | DI
type vecmode = V8QI | V4HI | V4HF |V2SI | V2SF | DI
| V16QI | V8HI | V4SI | V4SF | V2DI
| QI | HI | SI | SF
......@@ -284,18 +287,22 @@ type features =
| Fixed_core_reg
(* Mark that the intrinsic requires __ARM_FEATURE_string to be defined. *)
| Requires_feature of string
(* Mark that the intrinsic requires a particular architecture version. *)
| Requires_arch of int
(* Mark that the intrinsic requires a particular bit in __ARM_FP to
be set. *)
| Requires_FP_bit of int
exception MixedMode of elts * elts
let rec elt_width = function
S8 | U8 | P8 | I8 | B8 -> 8
| S16 | U16 | P16 | I16 | B16 -> 16
| S16 | U16 | P16 | I16 | B16 | F16 -> 16
| S32 | F32 | U32 | I32 | B32 -> 32
| S64 | U64 | I64 | B64 -> 64
| Conv (a, b) ->
let wa = elt_width a and wb = elt_width b in
if wa = wb then wa else failwith "element width?"
if wa = wb then wa else raise (MixedMode (a, b))
| Cast (a, b) -> raise (MixedMode (a, b))
| NoElts -> failwith "No elts"
......@@ -303,7 +310,7 @@ let rec elt_class = function
S8 | S16 | S32 | S64 -> Signed
| U8 | U16 | U32 | U64 -> Unsigned
| P8 | P16 -> Poly
| F32 -> Float
| F16 | F32 -> Float
| I8 | I16 | I32 | I64 -> Int
| B8 | B16 | B32 | B64 -> Bits
| Conv (a, b) | Cast (a, b) -> ConvClass (elt_class a, elt_class b)
......@@ -315,6 +322,7 @@ let elt_of_class_width c w =
| Signed, 16 -> S16
| Signed, 32 -> S32
| Signed, 64 -> S64
| Float, 16 -> F16
| Float, 32 -> F32
| Unsigned, 8 -> U8
| Unsigned, 16 -> U16
......@@ -384,7 +392,12 @@ let find_key_operand operands =
in
scan ((Array.length operands) - 1)
let rec mode_of_elt elt shape =
(* Find a vecmode from a shape_elt ELT for an instruction with shape_form
SHAPE. For a Use_operands shape, if ARGPOS is passed then return the mode
for the given argument position, else determine which argument to return a
mode for automatically. *)
let rec mode_of_elt ?argpos elt shape =
let flt = match elt_class elt with
Float | ConvClass(_, Float) -> true | _ -> false in
let idx =
......@@ -394,7 +407,10 @@ let rec mode_of_elt elt shape =
in match shape with
All (_, Dreg) | By_scalar Dreg | Pair_result Dreg | Unary_scalar Dreg
| Binary_imm Dreg | Long_noreg Dreg | Wide_noreg Dreg ->
[| V8QI; V4HI; if flt then V2SF else V2SI; DI |].(idx)
if flt then
[| V8QI; V4HF; V2SF; DI |].(idx)
else
[| V8QI; V4HI; V2SI; DI |].(idx)
| All (_, Qreg) | By_scalar Qreg | Pair_result Qreg | Unary_scalar Qreg
| Binary_imm Qreg | Long_noreg Qreg | Wide_noreg Qreg ->
[| V16QI; V8HI; if flt then V4SF else V4SI; V2DI |].(idx)
......@@ -404,7 +420,11 @@ let rec mode_of_elt elt shape =
| Long_imm ->
[| V8QI; V4HI; V2SI; DI |].(idx)
| Narrow | Narrow_imm -> [| V16QI; V8HI; V4SI; V2DI |].(idx)
| Use_operands ops -> mode_of_elt elt (All (0, (find_key_operand ops)))
| Use_operands ops ->
begin match argpos with
None -> mode_of_elt ?argpos elt (All (0, (find_key_operand ops)))
| Some pos -> mode_of_elt ?argpos elt (All (0, ops.(pos)))
end
| _ -> failwith "invalid shape"
(* Modify an element type dependent on the shape of the instruction and the
......@@ -454,10 +474,11 @@ let type_for_elt shape elt no =
| U16 -> T_uint16x4
| U32 -> T_uint32x2
| U64 -> T_uint64x1
| F16 -> T_float16x4
| F32 -> T_float32x2
| P8 -> T_poly8x8
| P16 -> T_poly16x4
| _ -> failwith "Bad elt type"
| _ -> failwith "Bad elt type for Dreg"
end
| Qreg ->
begin match elt with
......@@ -472,7 +493,7 @@ let type_for_elt shape elt no =
| F32 -> T_float32x4
| P8 -> T_poly8x16
| P16 -> T_poly16x8
| _ -> failwith "Bad elt type"
| _ -> failwith "Bad elt type for Qreg"
end
| Corereg ->
begin match elt with
......@@ -487,7 +508,7 @@ let type_for_elt shape elt no =
| P8 -> T_poly8
| P16 -> T_poly16
| F32 -> T_float32
| _ -> failwith "Bad elt type"
| _ -> failwith "Bad elt type for Corereg"
end
| Immed ->
T_immediate (0, 0)
......@@ -506,7 +527,7 @@ let type_for_elt shape elt no =
let vectype_size = function
T_int8x8 | T_int16x4 | T_int32x2 | T_int64x1
| T_uint8x8 | T_uint16x4 | T_uint32x2 | T_uint64x1
| T_float32x2 | T_poly8x8 | T_poly16x4 -> 64
| T_float32x2 | T_poly8x8 | T_poly16x4 | T_float16x4 -> 64
| T_int8x16 | T_int16x8 | T_int32x4 | T_int64x2
| T_uint8x16 | T_uint16x8 | T_uint32x4 | T_uint64x2
| T_float32x4 | T_poly8x16 | T_poly16x8 -> 128
......@@ -1217,6 +1238,10 @@ let ops =
[Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
Vcvt, [InfoWord], All (2, Qreg), "vcvtQ", conv_1,
[Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
Vcvt, [Builtin_name "vcvt" ; Requires_FP_bit 1],
Use_operands [| Dreg; Qreg; |], "vcvt", conv_1, [Conv (F16, F32)];
Vcvt, [Builtin_name "vcvt" ; Requires_FP_bit 1],
Use_operands [| Qreg; Dreg; |], "vcvt", conv_1, [Conv (F32, F16)];
Vcvt_n, [InfoWord], Use_operands [| Dreg; Dreg; Immed |], "vcvt_n", conv_2,
[Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
Vcvt_n, [InfoWord], Use_operands [| Qreg; Qreg; Immed |], "vcvtQ_n", conv_2,
......@@ -1782,7 +1807,7 @@ let rec string_of_elt = function
| U8 -> "u8" | U16 -> "u16" | U32 -> "u32" | U64 -> "u64"
| I8 -> "i8" | I16 -> "i16" | I32 -> "i32" | I64 -> "i64"
| B8 -> "8" | B16 -> "16" | B32 -> "32" | B64 -> "64"
| F32 -> "f32" | P8 -> "p8" | P16 -> "p16"
| F16 -> "f16" | F32 -> "f32" | P8 -> "p8" | P16 -> "p16"
| Conv (a, b) | Cast (a, b) -> string_of_elt a ^ "_" ^ string_of_elt b
| NoElts -> failwith "No elts"
......@@ -1809,6 +1834,7 @@ let string_of_vectype vt =
| T_uint32x4 -> affix "uint32x4"
| T_uint64x1 -> affix "uint64x1"
| T_uint64x2 -> affix "uint64x2"
| T_float16x4 -> affix "float16x4"
| T_float32x2 -> affix "float32x2"
| T_float32x4 -> affix "float32x4"
| T_poly8x8 -> affix "poly8x8"
......@@ -1825,6 +1851,7 @@ let string_of_vectype vt =
| T_uint64 -> affix "uint64"
| T_poly8 -> affix "poly8"
| T_poly16 -> affix "poly16"
| T_float16 -> affix "float16"
| T_float32 -> affix "float32"
| T_immediate _ -> "const int"
| T_void -> "void"
......@@ -1832,6 +1859,7 @@ let string_of_vectype vt =
| T_intHI -> "__builtin_neon_hi"
| T_intSI -> "__builtin_neon_si"
| T_intDI -> "__builtin_neon_di"
| T_floatHF -> "__builtin_neon_hf"
| T_floatSF -> "__builtin_neon_sf"
| T_arrayof (num, base) ->
let basename = name (fun x -> x) base in
......@@ -1853,10 +1881,10 @@ let string_of_inttype = function
| B_XImode -> "__builtin_neon_xi"
let string_of_mode = function
V8QI -> "v8qi" | V4HI -> "v4hi" | V2SI -> "v2si" | V2SF -> "v2sf"
| DI -> "di" | V16QI -> "v16qi" | V8HI -> "v8hi" | V4SI -> "v4si"
| V4SF -> "v4sf" | V2DI -> "v2di" | QI -> "qi" | HI -> "hi" | SI -> "si"
| SF -> "sf"
V8QI -> "v8qi" | V4HI -> "v4hi" | V4HF -> "v4hf" | V2SI -> "v2si"
| V2SF -> "v2sf" | DI -> "di" | V16QI -> "v16qi" | V8HI -> "v8hi"
| V4SI -> "v4si" | V4SF -> "v4sf" | V2DI -> "v2di" | QI -> "qi"
| HI -> "hi" | SI -> "si" | SF -> "sf"
(* Use uppercase chars for letters which form part of the intrinsic name, but
should be omitted from the builtin name (the info is passed in an extra
......
......@@ -5748,6 +5748,18 @@
@itemize @bullet
@item float16x4_t vcvt_f16_f32 (float32x4_t)
@*@emph{Form of expected instruction(s):} @code{vcvt.f16.f32 @var{d0}, @var{q0}}
@end itemize
@itemize @bullet
@item float32x4_t vcvt_f32_f16 (float16x4_t)
@*@emph{Form of expected instruction(s):} @code{vcvt.f32.f16 @var{q0}, @var{d0}}
@end itemize
@itemize @bullet
@item float32x2_t vcvt_n_f32_u32 (uint32x2_t, const int)
@*@emph{Form of expected instruction(s):} @code{vcvt.f32.u32 @var{d0}, @var{d0}, #@var{0}}
@end itemize
......
2013-04-25 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* lib/target-supports.exp
(check_effective_target_arm_neon_fp16_ok_nocache): New procedure.
(check_effective_target_arm_neon_fp16_ok): Likewise.
(add_options_for_arm_neon_fp16): Likewise.
* gcc.target/arm/neon/vcvtf16_f32.c: New test. Generated.
* gcc.target/arm/neon/vcvtf32_f16.c: Likewise.
2013-04-24 Vladimir Makarov <vmakarov@redhat.com>
PR rtl-optimizations/57046
......
/* Test the `vcvtf16_f32' ARM Neon intrinsic. */
/* This file was autogenerated by neon-testgen. */
/* { dg-do assemble } */
/* { dg-require-effective-target arm_neon_fp16_ok } */
/* { dg-options "-save-temps -O0" } */
/* { dg-add-options arm_neon_fp16 } */
#include "arm_neon.h"
void test_vcvtf16_f32 (void)
{
float16x4_t out_float16x4_t;
float32x4_t arg0_float32x4_t;
out_float16x4_t = vcvt_f16_f32 (arg0_float32x4_t);
}
/* { dg-final { scan-assembler "vcvt\.f16.f32\[ \]+\[dD\]\[0-9\]+, \[qQ\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */
/* Test the `vcvtf32_f16' ARM Neon intrinsic. */
/* This file was autogenerated by neon-testgen. */
/* { dg-do assemble } */
/* { dg-require-effective-target arm_neon_fp16_ok } */
/* { dg-options "-save-temps -O0" } */
/* { dg-add-options arm_neon_fp16 } */
#include "arm_neon.h"
void test_vcvtf32_f16 (void)
{
float32x4_t out_float32x4_t;
float16x4_t arg0_float16x4_t;
out_float32x4_t = vcvt_f32_f16 (arg0_float16x4_t);
}
/* { dg-final { scan-assembler "vcvt\.f32.f16\[ \]+\[qQ\]\[0-9\]+, \[dD\]\[0-9\]+!?\(\[ \]+@\[a-zA-Z0-9 \]+\)?\n" } } */
/* { dg-final { cleanup-saved-temps } } */
......@@ -2255,6 +2255,47 @@ proc check_effective_target_arm_neon_ok { } {
check_effective_target_arm_neon_ok_nocache]
}
# Return 1 if this is an ARM target supporting -mfpu=neon-fp16
# -mfloat-abi=softfp or equivalent options. Some multilibs may be
# incompatible with these options. Also set et_arm_neon_flags to the
# best options to add.
proc check_effective_target_arm_neon_fp16_ok_nocache { } {
global et_arm_neon_fp16_flags
set et_arm_neon_fp16_flags ""
if { [check_effective_target_arm32] } {
foreach flags {"" "-mfloat-abi=softfp" "-mfpu=neon-fp16"
"-mfpu=neon-fp16 -mfloat-abi=softfp"} {
if { [check_no_compiler_messages_nocache arm_neon_fp_16_ok object {
#include "arm_neon.h"
float16x4_t
foo (float32x4_t arg)
{
return vcvt_f16_f32 (arg);
}
} "$flags"] } {
set et_arm_neon_fp16_flags $flags
return 1
}
}
}
return 0
}
proc check_effective_target_arm_neon_fp16_ok { } {
return [check_cached_effective_target arm_neon_fp16_ok \
check_effective_target_arm_neon_fp16_ok_nocache]
}
proc add_options_for_arm_neon_fp16 { flags } {
if { ! [check_effective_target_arm_neon_fp16_ok] } {
return "$flags"
}
global et_arm_neon_fp16_flags
return "$flags $et_arm_neon_fp16_flags"
}
# Return 1 if this is an ARM target supporting -mfpu=neon-fp-armv8
# -mfloat-abi=softfp or equivalent options. Some multilibs may be
# incompatible with these options. Also set et_arm_v8_neon_flags to the
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment