Commit 92835317 by Tejas Belagod Committed by Tejas Belagod

aarch64-simd.md (vec_pack_trunc_<mode>, [...]): Swap for big-endian.

2013-11-22  Tejas Belagod  <tejas.belagod@arm.com>

gcc/
	* config/aarch64/aarch64-simd.md (vec_pack_trunc_<mode>,
	vec_pack_trunc_v2df, vec_pack_trunc_df): Swap for big-endian.
	(reduc_<sur>plus_<mode>): Factorize V2DI into this.
	(reduc_<sur>plus_<mode>): Change this to reduc_splus_<mode> for floats
	and also change to float UNSPEC.
	(reduc_maxmin_uns>_<mode>): Remove V2DI.
	* config/aarch64/arm_neon.h (vaddv<q>_<suf><8,16,32,64>,
        vmaxv<q>_<suf><8,16,32,64>, vminv<q>_<suf><8,16,32,64>): Fix up scalar
	result access for big-endian.
        (__LANE0): New macro used to fix up lane access of 'across-lanes'
         intrinsics for big-endian.
	* config/aarch64/iterators.md (VDQV): Add V2DI.
	(VDQV_S): New.
	(vp): New mode attribute.

From-SVN: r205269
parent 8fcc1c1f
2013-11-22 Tejas Belagod <tejas.belagod@arm.com> 2013-11-22 Tejas Belagod <tejas.belagod@arm.com>
* config/aarch64/aarch64-simd.md (vec_pack_trunc_<mode>, * config/aarch64/aarch64-simd.md (vec_pack_trunc_<mode>,
vec_pack_trunc_v2df, vec_pack_trunc_df): Swap for big-endian.
(reduc_<sur>plus_<mode>): Factorize V2DI into this.
(reduc_<sur>plus_<mode>): Change this to reduc_splus_<mode> for floats
and also change to float UNSPEC.
(reduc_maxmin_uns>_<mode>): Remove V2DI.
* config/aarch64/arm_neon.h (vaddv<q>_<suf><8,16,32,64>,
vmaxv<q>_<suf><8,16,32,64>, vminv<q>_<suf><8,16,32,64>): Fix up scalar
result access for big-endian.
(__LANE0): New macro used to fix up lane access of 'across-lanes'
intrinsics for big-endian.
* config/aarch64/iterators.md (VDQV): Add V2DI.
(VDQV_S): New.
(vp): New mode attribute.
2013-11-22 Tejas Belagod <tejas.belagod@arm.com>
* config/aarch64/aarch64-simd.md (vec_pack_trunc_<mode>,
vec_pack_trunc_v2df, vec_pack_trunc_df): Swap source ops for big-endian. vec_pack_trunc_v2df, vec_pack_trunc_df): Swap source ops for big-endian.
2013-11-22 Tejas Belagod <tejas.belagod@arm.com> 2013-11-22 Tejas Belagod <tejas.belagod@arm.com>
...@@ -1557,19 +1557,10 @@ ...@@ -1557,19 +1557,10 @@
(unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")] (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
SUADDV))] SUADDV))]
"TARGET_SIMD" "TARGET_SIMD"
"addv\\t%<Vetype>0, %1.<Vtype>" "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
[(set_attr "type" "neon_reduc_add<q>")] [(set_attr "type" "neon_reduc_add<q>")]
) )
(define_insn "reduc_<sur>plus_v2di"
[(set (match_operand:V2DI 0 "register_operand" "=w")
(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "w")]
SUADDV))]
"TARGET_SIMD"
"addp\\t%d0, %1.2d"
[(set_attr "type" "neon_reduc_add_q")]
)
(define_insn "reduc_<sur>plus_v2si" (define_insn "reduc_<sur>plus_v2si"
[(set (match_operand:V2SI 0 "register_operand" "=w") [(set (match_operand:V2SI 0 "register_operand" "=w")
(unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")] (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
...@@ -1579,10 +1570,10 @@ ...@@ -1579,10 +1570,10 @@
[(set_attr "type" "neon_reduc_add")] [(set_attr "type" "neon_reduc_add")]
) )
(define_insn "reduc_<sur>plus_<mode>" (define_insn "reduc_splus_<mode>"
[(set (match_operand:V2F 0 "register_operand" "=w") [(set (match_operand:V2F 0 "register_operand" "=w")
(unspec:V2F [(match_operand:V2F 1 "register_operand" "w")] (unspec:V2F [(match_operand:V2F 1 "register_operand" "w")]
SUADDV))] UNSPEC_FADDV))]
"TARGET_SIMD" "TARGET_SIMD"
"faddp\\t%<Vetype>0, %1.<Vtype>" "faddp\\t%<Vetype>0, %1.<Vtype>"
[(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")] [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
...@@ -1597,15 +1588,14 @@ ...@@ -1597,15 +1588,14 @@
[(set_attr "type" "neon_fp_reduc_add_s_q")] [(set_attr "type" "neon_fp_reduc_add_s_q")]
) )
(define_expand "reduc_<sur>plus_v4sf" (define_expand "reduc_splus_v4sf"
[(set (match_operand:V4SF 0 "register_operand") [(set (match_operand:V4SF 0 "register_operand")
(unspec:V4SF [(match_operand:V4SF 1 "register_operand")] (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
SUADDV))] UNSPEC_FADDV))]
"TARGET_SIMD" "TARGET_SIMD"
{ {
rtx tmp = gen_reg_rtx (V4SFmode); emit_insn (gen_aarch64_addpv4sf (operands[0], operands[1]));
emit_insn (gen_aarch64_addpv4sf (tmp, operands[1])); emit_insn (gen_aarch64_addpv4sf (operands[0], operands[0]));
emit_insn (gen_aarch64_addpv4sf (operands[0], tmp));
DONE; DONE;
}) })
...@@ -1620,23 +1610,14 @@ ...@@ -1620,23 +1610,14 @@
;; 'across lanes' max and min ops. ;; 'across lanes' max and min ops.
(define_insn "reduc_<maxmin_uns>_<mode>" (define_insn "reduc_<maxmin_uns>_<mode>"
[(set (match_operand:VDQV 0 "register_operand" "=w") [(set (match_operand:VDQV_S 0 "register_operand" "=w")
(unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")] (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
MAXMINV))] MAXMINV))]
"TARGET_SIMD" "TARGET_SIMD"
"<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>" "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
[(set_attr "type" "neon_reduc_minmax<q>")] [(set_attr "type" "neon_reduc_minmax<q>")]
) )
(define_insn "reduc_<maxmin_uns>_v2di"
[(set (match_operand:V2DI 0 "register_operand" "=w")
(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "w")]
MAXMINV))]
"TARGET_SIMD"
"<maxmin_uns_op>p\\t%d0, %1.2d"
[(set_attr "type" "neon_reduc_minmax_q")]
)
(define_insn "reduc_<maxmin_uns>_v2si" (define_insn "reduc_<maxmin_uns>_v2si"
[(set (match_operand:V2SI 0 "register_operand" "=w") [(set (match_operand:V2SI 0 "register_operand" "=w")
(unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")] (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
......
...@@ -15913,118 +15913,132 @@ vaddd_u64 (uint64x1_t __a, uint64x1_t __b) ...@@ -15913,118 +15913,132 @@ vaddd_u64 (uint64x1_t __a, uint64x1_t __b)
return __a + __b; return __a + __b;
} }
#if __AARCH64EB__
#define __LANE0(__t) ((__t) - 1)
#else
#define __LANE0(__t) 0
#endif
/* vaddv */ /* vaddv */
__extension__ static __inline int8_t __attribute__ ((__always_inline__)) __extension__ static __inline int8_t __attribute__ ((__always_inline__))
vaddv_s8 (int8x8_t __a) vaddv_s8 (int8x8_t __a)
{ {
return vget_lane_s8 (__builtin_aarch64_reduc_splus_v8qi (__a), 0); return vget_lane_s8 (__builtin_aarch64_reduc_splus_v8qi (__a), __LANE0 (8));
} }
__extension__ static __inline int16_t __attribute__ ((__always_inline__)) __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vaddv_s16 (int16x4_t __a) vaddv_s16 (int16x4_t __a)
{ {
return vget_lane_s16 (__builtin_aarch64_reduc_splus_v4hi (__a), 0); return vget_lane_s16 (__builtin_aarch64_reduc_splus_v4hi (__a), __LANE0 (4));
} }
__extension__ static __inline int32_t __attribute__ ((__always_inline__)) __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vaddv_s32 (int32x2_t __a) vaddv_s32 (int32x2_t __a)
{ {
return vget_lane_s32 (__builtin_aarch64_reduc_splus_v2si (__a), 0); return vget_lane_s32 (__builtin_aarch64_reduc_splus_v2si (__a), __LANE0 (2));
} }
__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
vaddv_u8 (uint8x8_t __a) vaddv_u8 (uint8x8_t __a)
{ {
return vget_lane_u8 ((uint8x8_t) return vget_lane_u8 ((uint8x8_t)
__builtin_aarch64_reduc_uplus_v8qi ((int8x8_t) __a), 0); __builtin_aarch64_reduc_uplus_v8qi ((int8x8_t) __a),
__LANE0 (8));
} }
__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
vaddv_u16 (uint16x4_t __a) vaddv_u16 (uint16x4_t __a)
{ {
return vget_lane_u16 ((uint16x4_t) return vget_lane_u16 ((uint16x4_t)
__builtin_aarch64_reduc_uplus_v4hi ((int16x4_t) __a), 0); __builtin_aarch64_reduc_uplus_v4hi ((int16x4_t) __a),
__LANE0 (4));
} }
__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vaddv_u32 (uint32x2_t __a) vaddv_u32 (uint32x2_t __a)
{ {
return vget_lane_u32 ((uint32x2_t) return vget_lane_u32 ((uint32x2_t)
__builtin_aarch64_reduc_uplus_v2si ((int32x2_t) __a), 0); __builtin_aarch64_reduc_uplus_v2si ((int32x2_t) __a),
__LANE0 (2));
} }
__extension__ static __inline int8_t __attribute__ ((__always_inline__)) __extension__ static __inline int8_t __attribute__ ((__always_inline__))
vaddvq_s8 (int8x16_t __a) vaddvq_s8 (int8x16_t __a)
{ {
return vgetq_lane_s8 (__builtin_aarch64_reduc_splus_v16qi (__a), 0); return vgetq_lane_s8 (__builtin_aarch64_reduc_splus_v16qi (__a),
__LANE0 (16));
} }
__extension__ static __inline int16_t __attribute__ ((__always_inline__)) __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vaddvq_s16 (int16x8_t __a) vaddvq_s16 (int16x8_t __a)
{ {
return vgetq_lane_s16 (__builtin_aarch64_reduc_splus_v8hi (__a), 0); return vgetq_lane_s16 (__builtin_aarch64_reduc_splus_v8hi (__a), __LANE0 (8));
} }
__extension__ static __inline int32_t __attribute__ ((__always_inline__)) __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vaddvq_s32 (int32x4_t __a) vaddvq_s32 (int32x4_t __a)
{ {
return vgetq_lane_s32 (__builtin_aarch64_reduc_splus_v4si (__a), 0); return vgetq_lane_s32 (__builtin_aarch64_reduc_splus_v4si (__a), __LANE0 (4));
} }
__extension__ static __inline int64_t __attribute__ ((__always_inline__)) __extension__ static __inline int64_t __attribute__ ((__always_inline__))
vaddvq_s64 (int64x2_t __a) vaddvq_s64 (int64x2_t __a)
{ {
return vgetq_lane_s64 (__builtin_aarch64_reduc_splus_v2di (__a), 0); return vgetq_lane_s64 (__builtin_aarch64_reduc_splus_v2di (__a), __LANE0 (2));
} }
__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
vaddvq_u8 (uint8x16_t __a) vaddvq_u8 (uint8x16_t __a)
{ {
return vgetq_lane_u8 ((uint8x16_t) return vgetq_lane_u8 ((uint8x16_t)
__builtin_aarch64_reduc_uplus_v16qi ((int8x16_t) __a), 0); __builtin_aarch64_reduc_uplus_v16qi ((int8x16_t) __a),
__LANE0 (16));
} }
__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
vaddvq_u16 (uint16x8_t __a) vaddvq_u16 (uint16x8_t __a)
{ {
return vgetq_lane_u16 ((uint16x8_t) return vgetq_lane_u16 ((uint16x8_t)
__builtin_aarch64_reduc_uplus_v8hi ((int16x8_t) __a), 0); __builtin_aarch64_reduc_uplus_v8hi ((int16x8_t) __a),
__LANE0 (8));
} }
__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vaddvq_u32 (uint32x4_t __a) vaddvq_u32 (uint32x4_t __a)
{ {
return vgetq_lane_u32 ((uint32x4_t) return vgetq_lane_u32 ((uint32x4_t)
__builtin_aarch64_reduc_uplus_v4si ((int32x4_t) __a), 0); __builtin_aarch64_reduc_uplus_v4si ((int32x4_t) __a),
__LANE0 (4));
} }
__extension__ static __inline uint64_t __attribute__ ((__always_inline__)) __extension__ static __inline uint64_t __attribute__ ((__always_inline__))
vaddvq_u64 (uint64x2_t __a) vaddvq_u64 (uint64x2_t __a)
{ {
return vgetq_lane_u64 ((uint64x2_t) return vgetq_lane_u64 ((uint64x2_t)
__builtin_aarch64_reduc_uplus_v2di ((int64x2_t) __a), 0); __builtin_aarch64_reduc_uplus_v2di ((int64x2_t) __a),
__LANE0 (2));
} }
__extension__ static __inline float32_t __attribute__ ((__always_inline__)) __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vaddv_f32 (float32x2_t __a) vaddv_f32 (float32x2_t __a)
{ {
float32x2_t t = __builtin_aarch64_reduc_splus_v2sf (__a); float32x2_t __t = __builtin_aarch64_reduc_splus_v2sf (__a);
return vget_lane_f32 (t, 0); return vget_lane_f32 (__t, __LANE0 (2));
} }
__extension__ static __inline float32_t __attribute__ ((__always_inline__)) __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vaddvq_f32 (float32x4_t __a) vaddvq_f32 (float32x4_t __a)
{ {
float32x4_t t = __builtin_aarch64_reduc_splus_v4sf (__a); float32x4_t __t = __builtin_aarch64_reduc_splus_v4sf (__a);
return vgetq_lane_f32 (t, 0); return vgetq_lane_f32 (__t, __LANE0 (4));
} }
__extension__ static __inline float64_t __attribute__ ((__always_inline__)) __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vaddvq_f64 (float64x2_t __a) vaddvq_f64 (float64x2_t __a)
{ {
float64x2_t t = __builtin_aarch64_reduc_splus_v2df (__a); float64x2_t __t = __builtin_aarch64_reduc_splus_v2df (__a);
return vgetq_lane_f64 (t, 0); return vgetq_lane_f64 (__t, __LANE0 (2));
} }
/* vcage */ /* vcage */
...@@ -20265,97 +20279,106 @@ vmaxnmq_f64 (float64x2_t __a, float64x2_t __b) ...@@ -20265,97 +20279,106 @@ vmaxnmq_f64 (float64x2_t __a, float64x2_t __b)
__extension__ static __inline float32_t __attribute__ ((__always_inline__)) __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vmaxv_f32 (float32x2_t __a) vmaxv_f32 (float32x2_t __a)
{ {
return vget_lane_f32 (__builtin_aarch64_reduc_smax_nan_v2sf (__a), 0); return vget_lane_f32 (__builtin_aarch64_reduc_smax_nan_v2sf (__a),
__LANE0 (2));
} }
__extension__ static __inline int8_t __attribute__ ((__always_inline__)) __extension__ static __inline int8_t __attribute__ ((__always_inline__))
vmaxv_s8 (int8x8_t __a) vmaxv_s8 (int8x8_t __a)
{ {
return vget_lane_s8 (__builtin_aarch64_reduc_smax_v8qi (__a), 0); return vget_lane_s8 (__builtin_aarch64_reduc_smax_v8qi (__a), __LANE0 (8));
} }
__extension__ static __inline int16_t __attribute__ ((__always_inline__)) __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vmaxv_s16 (int16x4_t __a) vmaxv_s16 (int16x4_t __a)
{ {
return vget_lane_s16 (__builtin_aarch64_reduc_smax_v4hi (__a), 0); return vget_lane_s16 (__builtin_aarch64_reduc_smax_v4hi (__a), __LANE0 (4));
} }
__extension__ static __inline int32_t __attribute__ ((__always_inline__)) __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vmaxv_s32 (int32x2_t __a) vmaxv_s32 (int32x2_t __a)
{ {
return vget_lane_s32 (__builtin_aarch64_reduc_smax_v2si (__a), 0); return vget_lane_s32 (__builtin_aarch64_reduc_smax_v2si (__a), __LANE0 (2));
} }
__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
vmaxv_u8 (uint8x8_t __a) vmaxv_u8 (uint8x8_t __a)
{ {
return vget_lane_u8 ((uint8x8_t) return vget_lane_u8 ((uint8x8_t)
__builtin_aarch64_reduc_umax_v8qi ((int8x8_t) __a), 0); __builtin_aarch64_reduc_umax_v8qi ((int8x8_t) __a),
__LANE0 (8));
} }
__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
vmaxv_u16 (uint16x4_t __a) vmaxv_u16 (uint16x4_t __a)
{ {
return vget_lane_u16 ((uint16x4_t) return vget_lane_u16 ((uint16x4_t)
__builtin_aarch64_reduc_umax_v4hi ((int16x4_t) __a), 0); __builtin_aarch64_reduc_umax_v4hi ((int16x4_t) __a),
__LANE0 (4));
} }
__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vmaxv_u32 (uint32x2_t __a) vmaxv_u32 (uint32x2_t __a)
{ {
return vget_lane_u32 ((uint32x2_t) return vget_lane_u32 ((uint32x2_t)
__builtin_aarch64_reduc_umax_v2si ((int32x2_t) __a), 0); __builtin_aarch64_reduc_umax_v2si ((int32x2_t) __a),
__LANE0 (2));
} }
__extension__ static __inline float32_t __attribute__ ((__always_inline__)) __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vmaxvq_f32 (float32x4_t __a) vmaxvq_f32 (float32x4_t __a)
{ {
return vgetq_lane_f32 (__builtin_aarch64_reduc_smax_nan_v4sf (__a), 0); return vgetq_lane_f32 (__builtin_aarch64_reduc_smax_nan_v4sf (__a),
__LANE0 (4));
} }
__extension__ static __inline float64_t __attribute__ ((__always_inline__)) __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vmaxvq_f64 (float64x2_t __a) vmaxvq_f64 (float64x2_t __a)
{ {
return vgetq_lane_f64 (__builtin_aarch64_reduc_smax_nan_v2df (__a), 0); return vgetq_lane_f64 (__builtin_aarch64_reduc_smax_nan_v2df (__a),
__LANE0 (2));
} }
__extension__ static __inline int8_t __attribute__ ((__always_inline__)) __extension__ static __inline int8_t __attribute__ ((__always_inline__))
vmaxvq_s8 (int8x16_t __a) vmaxvq_s8 (int8x16_t __a)
{ {
return vgetq_lane_s8 (__builtin_aarch64_reduc_smax_v16qi (__a), 0); return vgetq_lane_s8 (__builtin_aarch64_reduc_smax_v16qi (__a), __LANE0 (16));
} }
__extension__ static __inline int16_t __attribute__ ((__always_inline__)) __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vmaxvq_s16 (int16x8_t __a) vmaxvq_s16 (int16x8_t __a)
{ {
return vgetq_lane_s16 (__builtin_aarch64_reduc_smax_v8hi (__a), 0); return vgetq_lane_s16 (__builtin_aarch64_reduc_smax_v8hi (__a), __LANE0 (8));
} }
__extension__ static __inline int32_t __attribute__ ((__always_inline__)) __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vmaxvq_s32 (int32x4_t __a) vmaxvq_s32 (int32x4_t __a)
{ {
return vgetq_lane_s32 (__builtin_aarch64_reduc_smax_v4si (__a), 0); return vgetq_lane_s32 (__builtin_aarch64_reduc_smax_v4si (__a), __LANE0 (4));
} }
__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
vmaxvq_u8 (uint8x16_t __a) vmaxvq_u8 (uint8x16_t __a)
{ {
return vgetq_lane_u8 ((uint8x16_t) return vgetq_lane_u8 ((uint8x16_t)
__builtin_aarch64_reduc_umax_v16qi ((int8x16_t) __a), 0); __builtin_aarch64_reduc_umax_v16qi ((int8x16_t) __a),
__LANE0 (16));
} }
__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
vmaxvq_u16 (uint16x8_t __a) vmaxvq_u16 (uint16x8_t __a)
{ {
return vgetq_lane_u16 ((uint16x8_t) return vgetq_lane_u16 ((uint16x8_t)
__builtin_aarch64_reduc_umax_v8hi ((int16x8_t) __a), 0); __builtin_aarch64_reduc_umax_v8hi ((int16x8_t) __a),
__LANE0 (8));
} }
__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vmaxvq_u32 (uint32x4_t __a) vmaxvq_u32 (uint32x4_t __a)
{ {
return vgetq_lane_u32 ((uint32x4_t) return vgetq_lane_u32 ((uint32x4_t)
__builtin_aarch64_reduc_umax_v4si ((int32x4_t) __a), 0); __builtin_aarch64_reduc_umax_v4si ((int32x4_t) __a),
__LANE0 (4));
} }
/* vmaxnmv */ /* vmaxnmv */
...@@ -20363,19 +20386,20 @@ vmaxvq_u32 (uint32x4_t __a) ...@@ -20363,19 +20386,20 @@ vmaxvq_u32 (uint32x4_t __a)
__extension__ static __inline float32_t __attribute__ ((__always_inline__)) __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vmaxnmv_f32 (float32x2_t __a) vmaxnmv_f32 (float32x2_t __a)
{ {
return vget_lane_f32 (__builtin_aarch64_reduc_smax_v2sf (__a), 0); return vget_lane_f32 (__builtin_aarch64_reduc_smax_v2sf (__a),
__LANE0 (2));
} }
__extension__ static __inline float32_t __attribute__ ((__always_inline__)) __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vmaxnmvq_f32 (float32x4_t __a) vmaxnmvq_f32 (float32x4_t __a)
{ {
return vgetq_lane_f32 (__builtin_aarch64_reduc_smax_v4sf (__a), 0); return vgetq_lane_f32 (__builtin_aarch64_reduc_smax_v4sf (__a), __LANE0 (4));
} }
__extension__ static __inline float64_t __attribute__ ((__always_inline__)) __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vmaxnmvq_f64 (float64x2_t __a) vmaxnmvq_f64 (float64x2_t __a)
{ {
return vgetq_lane_f64 (__builtin_aarch64_reduc_smax_v2df (__a), 0); return vgetq_lane_f64 (__builtin_aarch64_reduc_smax_v2df (__a), __LANE0 (2));
} }
/* vmin */ /* vmin */
...@@ -20501,97 +20525,107 @@ vminnmq_f64 (float64x2_t __a, float64x2_t __b) ...@@ -20501,97 +20525,107 @@ vminnmq_f64 (float64x2_t __a, float64x2_t __b)
__extension__ static __inline float32_t __attribute__ ((__always_inline__)) __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vminv_f32 (float32x2_t __a) vminv_f32 (float32x2_t __a)
{ {
return vget_lane_f32 (__builtin_aarch64_reduc_smin_nan_v2sf (__a), 0); return vget_lane_f32 (__builtin_aarch64_reduc_smin_nan_v2sf (__a),
__LANE0 (2));
} }
__extension__ static __inline int8_t __attribute__ ((__always_inline__)) __extension__ static __inline int8_t __attribute__ ((__always_inline__))
vminv_s8 (int8x8_t __a) vminv_s8 (int8x8_t __a)
{ {
return vget_lane_s8 (__builtin_aarch64_reduc_smin_v8qi (__a), 0); return vget_lane_s8 (__builtin_aarch64_reduc_smin_v8qi (__a),
__LANE0 (8));
} }
__extension__ static __inline int16_t __attribute__ ((__always_inline__)) __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vminv_s16 (int16x4_t __a) vminv_s16 (int16x4_t __a)
{ {
return vget_lane_s16 (__builtin_aarch64_reduc_smin_v4hi (__a), 0); return vget_lane_s16 (__builtin_aarch64_reduc_smin_v4hi (__a), __LANE0 (4));
} }
__extension__ static __inline int32_t __attribute__ ((__always_inline__)) __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vminv_s32 (int32x2_t __a) vminv_s32 (int32x2_t __a)
{ {
return vget_lane_s32 (__builtin_aarch64_reduc_smin_v2si (__a), 0); return vget_lane_s32 (__builtin_aarch64_reduc_smin_v2si (__a), __LANE0 (2));
} }
__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
vminv_u8 (uint8x8_t __a) vminv_u8 (uint8x8_t __a)
{ {
return vget_lane_u8 ((uint8x8_t) return vget_lane_u8 ((uint8x8_t)
__builtin_aarch64_reduc_umin_v8qi ((int8x8_t) __a), 0); __builtin_aarch64_reduc_umin_v8qi ((int8x8_t) __a),
__LANE0 (8));
} }
__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
vminv_u16 (uint16x4_t __a) vminv_u16 (uint16x4_t __a)
{ {
return vget_lane_u16 ((uint16x4_t) return vget_lane_u16 ((uint16x4_t)
__builtin_aarch64_reduc_umin_v4hi ((int16x4_t) __a), 0); __builtin_aarch64_reduc_umin_v4hi ((int16x4_t) __a),
__LANE0 (4));
} }
__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vminv_u32 (uint32x2_t __a) vminv_u32 (uint32x2_t __a)
{ {
return vget_lane_u32 ((uint32x2_t) return vget_lane_u32 ((uint32x2_t)
__builtin_aarch64_reduc_umin_v2si ((int32x2_t) __a), 0); __builtin_aarch64_reduc_umin_v2si ((int32x2_t) __a),
__LANE0 (2));
} }
__extension__ static __inline float32_t __attribute__ ((__always_inline__)) __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vminvq_f32 (float32x4_t __a) vminvq_f32 (float32x4_t __a)
{ {
return vgetq_lane_f32 (__builtin_aarch64_reduc_smin_nan_v4sf (__a), 0); return vgetq_lane_f32 (__builtin_aarch64_reduc_smin_nan_v4sf (__a),
__LANE0 (4));
} }
__extension__ static __inline float64_t __attribute__ ((__always_inline__)) __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vminvq_f64 (float64x2_t __a) vminvq_f64 (float64x2_t __a)
{ {
return vgetq_lane_f64 (__builtin_aarch64_reduc_smin_nan_v2df (__a), 0); return vgetq_lane_f64 (__builtin_aarch64_reduc_smin_nan_v2df (__a),
__LANE0 (2));
} }
__extension__ static __inline int8_t __attribute__ ((__always_inline__)) __extension__ static __inline int8_t __attribute__ ((__always_inline__))
vminvq_s8 (int8x16_t __a) vminvq_s8 (int8x16_t __a)
{ {
return vgetq_lane_s8 (__builtin_aarch64_reduc_smin_v16qi (__a), 0); return vgetq_lane_s8 (__builtin_aarch64_reduc_smin_v16qi (__a), __LANE0 (16));
} }
__extension__ static __inline int16_t __attribute__ ((__always_inline__)) __extension__ static __inline int16_t __attribute__ ((__always_inline__))
vminvq_s16 (int16x8_t __a) vminvq_s16 (int16x8_t __a)
{ {
return vgetq_lane_s16 (__builtin_aarch64_reduc_smin_v8hi (__a), 0); return vgetq_lane_s16 (__builtin_aarch64_reduc_smin_v8hi (__a), __LANE0 (8));
} }
__extension__ static __inline int32_t __attribute__ ((__always_inline__)) __extension__ static __inline int32_t __attribute__ ((__always_inline__))
vminvq_s32 (int32x4_t __a) vminvq_s32 (int32x4_t __a)
{ {
return vgetq_lane_s32 (__builtin_aarch64_reduc_smin_v4si (__a), 0); return vgetq_lane_s32 (__builtin_aarch64_reduc_smin_v4si (__a), __LANE0 (4));
} }
__extension__ static __inline uint8_t __attribute__ ((__always_inline__)) __extension__ static __inline uint8_t __attribute__ ((__always_inline__))
vminvq_u8 (uint8x16_t __a) vminvq_u8 (uint8x16_t __a)
{ {
return vgetq_lane_u8 ((uint8x16_t) return vgetq_lane_u8 ((uint8x16_t)
__builtin_aarch64_reduc_umin_v16qi ((int8x16_t) __a), 0); __builtin_aarch64_reduc_umin_v16qi ((int8x16_t) __a),
__LANE0 (16));
} }
__extension__ static __inline uint16_t __attribute__ ((__always_inline__)) __extension__ static __inline uint16_t __attribute__ ((__always_inline__))
vminvq_u16 (uint16x8_t __a) vminvq_u16 (uint16x8_t __a)
{ {
return vgetq_lane_u16 ((uint16x8_t) return vgetq_lane_u16 ((uint16x8_t)
__builtin_aarch64_reduc_umin_v8hi ((int16x8_t) __a), 0); __builtin_aarch64_reduc_umin_v8hi ((int16x8_t) __a),
__LANE0 (8));
} }
__extension__ static __inline uint32_t __attribute__ ((__always_inline__)) __extension__ static __inline uint32_t __attribute__ ((__always_inline__))
vminvq_u32 (uint32x4_t __a) vminvq_u32 (uint32x4_t __a)
{ {
return vgetq_lane_u32 ((uint32x4_t) return vgetq_lane_u32 ((uint32x4_t)
__builtin_aarch64_reduc_umin_v4si ((int32x4_t) __a), 0); __builtin_aarch64_reduc_umin_v4si ((int32x4_t) __a),
__LANE0 (4));
} }
/* vminnmv */ /* vminnmv */
...@@ -20599,19 +20633,19 @@ vminvq_u32 (uint32x4_t __a) ...@@ -20599,19 +20633,19 @@ vminvq_u32 (uint32x4_t __a)
__extension__ static __inline float32_t __attribute__ ((__always_inline__)) __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vminnmv_f32 (float32x2_t __a) vminnmv_f32 (float32x2_t __a)
{ {
return vget_lane_f32 (__builtin_aarch64_reduc_smin_v2sf (__a), 0); return vget_lane_f32 (__builtin_aarch64_reduc_smin_v2sf (__a), __LANE0 (2));
} }
__extension__ static __inline float32_t __attribute__ ((__always_inline__)) __extension__ static __inline float32_t __attribute__ ((__always_inline__))
vminnmvq_f32 (float32x4_t __a) vminnmvq_f32 (float32x4_t __a)
{ {
return vgetq_lane_f32 (__builtin_aarch64_reduc_smin_v4sf (__a), 0); return vgetq_lane_f32 (__builtin_aarch64_reduc_smin_v4sf (__a), __LANE0 (4));
} }
__extension__ static __inline float64_t __attribute__ ((__always_inline__)) __extension__ static __inline float64_t __attribute__ ((__always_inline__))
vminnmvq_f64 (float64x2_t __a) vminnmvq_f64 (float64x2_t __a)
{ {
return vgetq_lane_f64 (__builtin_aarch64_reduc_smin_v2df (__a), 0); return vgetq_lane_f64 (__builtin_aarch64_reduc_smin_v2df (__a), __LANE0 (2));
} }
/* vmla */ /* vmla */
...@@ -25444,6 +25478,8 @@ __INTERLEAVE_LIST (zip) ...@@ -25444,6 +25478,8 @@ __INTERLEAVE_LIST (zip)
/* End of optimal implementations in approved order. */ /* End of optimal implementations in approved order. */
#undef __LANE0
#undef __aarch64_vget_lane_any #undef __aarch64_vget_lane_any
#undef __aarch64_vget_lane_f32 #undef __aarch64_vget_lane_f32
#undef __aarch64_vget_lane_f64 #undef __aarch64_vget_lane_f64
......
...@@ -108,7 +108,10 @@ ...@@ -108,7 +108,10 @@
(define_mode_iterator VALLDI [V8QI V16QI V4HI V8HI V2SI V4SI V2DI V2SF V4SF V2DF DI]) (define_mode_iterator VALLDI [V8QI V16QI V4HI V8HI V2SI V4SI V2DI V2SF V4SF V2DF DI])
;; Vector modes for Integer reduction across lanes. ;; Vector modes for Integer reduction across lanes.
(define_mode_iterator VDQV [V8QI V16QI V4HI V8HI V4SI]) (define_mode_iterator VDQV [V8QI V16QI V4HI V8HI V4SI V2DI])
;; Vector modes(except V2DI) for Integer reduction across lanes.
(define_mode_iterator VDQV_S [V8QI V16QI V4HI V8HI V4SI])
;; All double integer narrow-able modes. ;; All double integer narrow-able modes.
(define_mode_iterator VDN [V4HI V2SI DI]) (define_mode_iterator VDN [V4HI V2SI DI])
...@@ -585,6 +588,12 @@ ...@@ -585,6 +588,12 @@
(V2DF "_q") (V2DF "_q")
(QI "") (HI "") (SI "") (DI "") (SF "") (DF "")]) (QI "") (HI "") (SI "") (DI "") (SF "") (DF "")])
(define_mode_attr vp [(V8QI "v") (V16QI "v")
(V4HI "v") (V8HI "v")
(V2SI "p") (V4SI "v")
(V2DI "p") (V2DF "p")
(V2SF "p") (V4SF "v")])
;; ------------------------------------------------------------------- ;; -------------------------------------------------------------------
;; Code Iterators ;; Code Iterators
;; ------------------------------------------------------------------- ;; -------------------------------------------------------------------
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment