Commit 03873eb9 by Alan Lawrence Committed by Alan Lawrence

[AArch64] Add vcvt(_high)?_f32_f16 intrinsics, with BE RTL fix

gcc/:

	* config/aarch64/aarch64-simd.md (aarch64_simd_vec_unpacks_lo_<mode>,
	aarch64_simd_vec_unpacks_hi_<mode>): New insn.
	(vec_unpacks_lo_v4sf, vec_unpacks_hi_v4sf): Delete insn.
	(vec_unpacks_lo_<mode>, vec_unpacks_hi_<mode>): New expand.
	(aarch64_float_extend_lo_v2df): Rename to...
	(aarch64_float_extend_lo_<Vwide>): this, using VDF and so adding V4SF.

	* config/aarch64/aarch64-simd-builtins.def (vec_unpacks_hi): Add v8hf.
	(float_extend_lo): Add v4sf.

	* config/aarch64/arm_neon.h (vcvt_f32_f16, vcvt_high_f32_f16): New.
	* config/aarch64/iterators.md (VQ_HSF): New iterator.
	(VWIDE, Vwtype, Vhalftype): Add V8HF, V4SF.
	(Vwide): New mode_attr.

From-SVN: r227551
parent 862abc04
2015-09-08 Alan Lawrence <alan.lawrence@arm.com> 2015-09-08 Alan Lawrence <alan.lawrence@arm.com>
* config/aarch64/aarch64-simd.md (aarch64_simd_vec_unpacks_lo_<mode>,
aarch64_simd_vec_unpacks_hi_<mode>): New insn.
(vec_unpacks_lo_v4sf, vec_unpacks_hi_v4sf): Delete insn.
(vec_unpacks_lo_<mode>, vec_unpacks_hi_<mode>): New expand.
(aarch64_float_extend_lo_v2df): Rename to...
(aarch64_float_extend_lo_<Vwide>): this, using VDF and so adding V4SF.
* config/aarch64/aarch64-simd-builtins.def (vec_unpacks_hi): Add v8hf.
(float_extend_lo): Add v4sf.
* config/aarch64/arm_neon.h (vcvt_f32_f16, vcvt_high_f32_f16): New.
* config/aarch64/iterators.md (VQ_HSF): New iterator.
(VWIDE, Vwtype, Vhalftype): Add V8HF, V4SF.
(Vwide): New mode_attr.
2015-09-08 Alan Lawrence <alan.lawrence@arm.com>
* config/aarch64/aarch64-simd.md (aarch64_simd_dup<mode>, * config/aarch64/aarch64-simd.md (aarch64_simd_dup<mode>,
aarch64_dup_lane<mode>, aarch64_dup_lane_<vswap_width_name><mode>, aarch64_dup_lane<mode>, aarch64_dup_lane_<vswap_width_name><mode>,
aarch64_simd_vec_set<mode>, vec_set<mode>, vec_perm_const<mode>, aarch64_simd_vec_set<mode>, vec_set<mode>, vec_perm_const<mode>,
......
...@@ -361,11 +361,12 @@ ...@@ -361,11 +361,12 @@
BUILTIN_VSDQ_I_DI (UNOP, abs, 0) BUILTIN_VSDQ_I_DI (UNOP, abs, 0)
BUILTIN_VDQF (UNOP, abs, 2) BUILTIN_VDQF (UNOP, abs, 2)
VAR1 (UNOP, vec_unpacks_hi_, 10, v4sf) BUILTIN_VQ_HSF (UNOP, vec_unpacks_hi_, 10)
VAR1 (BINOP, float_truncate_hi_, 0, v4sf) VAR1 (BINOP, float_truncate_hi_, 0, v4sf)
VAR1 (BINOP, float_truncate_hi_, 0, v8hf) VAR1 (BINOP, float_truncate_hi_, 0, v8hf)
VAR1 (UNOP, float_extend_lo_, 0, v2df) VAR1 (UNOP, float_extend_lo_, 0, v2df)
VAR1 (UNOP, float_extend_lo_, 0, v4sf)
BUILTIN_VDF (UNOP, float_truncate_lo_, 0) BUILTIN_VDF (UNOP, float_truncate_lo_, 0)
/* Implemented by aarch64_ld1<VALL_F16:mode>. */ /* Implemented by aarch64_ld1<VALL_F16:mode>. */
......
...@@ -1692,36 +1692,57 @@ ...@@ -1692,36 +1692,57 @@
;; Float widening operations. ;; Float widening operations.
(define_insn "vec_unpacks_lo_v4sf" (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
[(set (match_operand:V2DF 0 "register_operand" "=w") [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(float_extend:V2DF (float_extend:<VWIDE> (vec_select:<VHALF>
(vec_select:V2SF (match_operand:VQ_HSF 1 "register_operand" "w")
(match_operand:V4SF 1 "register_operand" "w") (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
(parallel [(const_int 0) (const_int 1)]) )))]
)))]
"TARGET_SIMD" "TARGET_SIMD"
"fcvtl\\t%0.2d, %1.2s" "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
[(set_attr "type" "neon_fp_cvt_widen_s")] [(set_attr "type" "neon_fp_cvt_widen_s")]
) )
(define_insn "aarch64_float_extend_lo_v2df" (define_expand "vec_unpacks_lo_<mode>"
[(set (match_operand:V2DF 0 "register_operand" "=w") [(match_operand:<VWIDE> 0 "register_operand" "")
(float_extend:V2DF (match_operand:VQ_HSF 1 "register_operand" "")]
(match_operand:V2SF 1 "register_operand" "w")))]
"TARGET_SIMD" "TARGET_SIMD"
"fcvtl\\t%0.2d, %1.2s" {
rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
operands[1], p));
DONE;
}
)
(define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(float_extend:<VWIDE> (vec_select:<VHALF>
(match_operand:VQ_HSF 1 "register_operand" "w")
(match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
)))]
"TARGET_SIMD"
"fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
[(set_attr "type" "neon_fp_cvt_widen_s")] [(set_attr "type" "neon_fp_cvt_widen_s")]
) )
(define_insn "vec_unpacks_hi_v4sf" (define_expand "vec_unpacks_hi_<mode>"
[(set (match_operand:V2DF 0 "register_operand" "=w") [(match_operand:<VWIDE> 0 "register_operand" "")
(float_extend:V2DF (match_operand:VQ_HSF 1 "register_operand" "")]
(vec_select:V2SF "TARGET_SIMD"
(match_operand:V4SF 1 "register_operand" "w") {
(parallel [(const_int 2) (const_int 3)]) rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
)))] emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
operands[1], p));
DONE;
}
)
(define_insn "aarch64_float_extend_lo_<Vwide>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(float_extend:<VWIDE>
(match_operand:VDF 1 "register_operand" "w")))]
"TARGET_SIMD" "TARGET_SIMD"
"fcvtl2\\t%0.2d, %1.4s" "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
[(set_attr "type" "neon_fp_cvt_widen_s")] [(set_attr "type" "neon_fp_cvt_widen_s")]
) )
......
...@@ -6025,10 +6025,6 @@ vaddlvq_u32 (uint32x4_t a) ...@@ -6025,10 +6025,6 @@ vaddlvq_u32 (uint32x4_t a)
result; \ result; \
}) })
/* vcvt_f32_f16 not supported */
/* vcvt_high_f32_f16 not supported */
#define vcvt_n_f32_s32(a, b) \ #define vcvt_n_f32_s32(a, b) \
__extension__ \ __extension__ \
({ \ ({ \
...@@ -13436,6 +13432,12 @@ vcvt_high_f32_f64 (float32x2_t __a, float64x2_t __b) ...@@ -13436,6 +13432,12 @@ vcvt_high_f32_f64 (float32x2_t __a, float64x2_t __b)
/* vcvt (float -> double). */ /* vcvt (float -> double). */
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vcvt_f32_f16 (float16x4_t __a)
{
return __builtin_aarch64_float_extend_lo_v4sf (__a);
}
__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vcvt_f64_f32 (float32x2_t __a) vcvt_f64_f32 (float32x2_t __a)
{ {
...@@ -13443,6 +13445,12 @@ vcvt_f64_f32 (float32x2_t __a) ...@@ -13443,6 +13445,12 @@ vcvt_f64_f32 (float32x2_t __a)
return __builtin_aarch64_float_extend_lo_v2df (__a); return __builtin_aarch64_float_extend_lo_v2df (__a);
} }
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
vcvt_high_f32_f16 (float16x8_t __a)
{
return __builtin_aarch64_vec_unpacks_hi_v8hf (__a);
}
__extension__ static __inline float64x2_t __attribute__ ((__always_inline__)) __extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
vcvt_high_f64_f32 (float32x4_t __a) vcvt_high_f64_f32 (float32x4_t __a)
{ {
......
...@@ -94,6 +94,9 @@ ...@@ -94,6 +94,9 @@
;; Vector single Float modes. ;; Vector single Float modes.
(define_mode_iterator VDQSF [V2SF V4SF]) (define_mode_iterator VDQSF [V2SF V4SF])
;; Quad vector Float modes with half/single elements.
(define_mode_iterator VQ_HSF [V8HF V4SF])
;; Modes suitable to use as the return type of a vcond expression. ;; Modes suitable to use as the return type of a vcond expression.
(define_mode_iterator VDQF_COND [V2SF V2SI V4SF V4SI V2DF V2DI]) (define_mode_iterator VDQF_COND [V2SF V2SI V4SF V4SI V2DF V2DI])
...@@ -492,14 +495,18 @@ ...@@ -492,14 +495,18 @@
(V2SI "V2DI") (V16QI "V8HI") (V2SI "V2DI") (V16QI "V8HI")
(V8HI "V4SI") (V4SI "V2DI") (V8HI "V4SI") (V4SI "V2DI")
(HI "SI") (SI "DI") (HI "SI") (SI "DI")
(V8HF "V4SF") (V4SF "V2DF")
(V4HF "V4SF") (V2SF "V2DF")] (V4HF "V4SF") (V2SF "V2DF")]
) )
;; Widened mode register suffixes for VD_BHSI/VQW. ;; Widened modes of vector modes, lowercase
(define_mode_attr Vwide [(V2SF "v2df") (V4HF "v4sf")])
;; Widened mode register suffixes for VD_BHSI/VQW/VQ_HSF.
(define_mode_attr Vwtype [(V8QI "8h") (V4HI "4s") (define_mode_attr Vwtype [(V8QI "8h") (V4HI "4s")
(V2SI "2d") (V16QI "8h") (V2SI "2d") (V16QI "8h")
(V8HI "4s") (V4SI "2d")]) (V8HI "4s") (V4SI "2d")
(V8HF "4s") (V4SF "2d")])
;; Widened mode register suffixes for VDW/VQW. ;; Widened mode register suffixes for VDW/VQW.
(define_mode_attr Vmwtype [(V8QI ".8h") (V4HI ".4s") (define_mode_attr Vmwtype [(V8QI ".8h") (V4HI ".4s")
...@@ -508,9 +515,10 @@ ...@@ -508,9 +515,10 @@
(V4HF ".4s") (V2SF ".2d") (V4HF ".4s") (V2SF ".2d")
(SI "") (HI "")]) (SI "") (HI "")])
;; Lower part register suffixes for VQW. ;; Lower part register suffixes for VQW/VQ_HSF.
(define_mode_attr Vhalftype [(V16QI "8b") (V8HI "4h") (define_mode_attr Vhalftype [(V16QI "8b") (V8HI "4h")
(V4SI "2s")]) (V4SI "2s") (V8HF "4h")
(V4SF "2s")])
;; Define corresponding core/FP element mode for each vector mode. ;; Define corresponding core/FP element mode for each vector mode.
(define_mode_attr vw [(V8QI "w") (V16QI "w") (define_mode_attr vw [(V8QI "w") (V16QI "w")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment