Commit fd92bb80 by Matthew Gretton-Dann Committed by Matthew Gretton-Dann

re PR target/51534 (Bad code gen for vcgtq_u32 NEON intrinsic)

	PR target/51534
	* gcc/config/arm/arm.c (neon_builtin_data): Add entries for vcgeu
	and vcgtu.
	* gcc/config/arm/arm_neon.h: Regenerate.
	* gcc/config/arm/neon.md (unspec): Add UNSPEC_VCGEU, and UNSPEC_VCGTU.
	(neon_vcgeu): New insn.
	(neon_vcgtu): Likewise.
	* gcc/config/arm/neon.ml (s_8_32, u_8_32): New lists.
	(ops): Unsigned comparison intrinsics call a different
	builtin.
	* gcc/testsuite/gcc.target/arm/neon/pr51534.c: New testcase.

From-SVN: r184629
parent 1d548381
2012-02-28 Matthew Gretton-Dann <matthew.gretton-dann@arm.com>
PR target/51534
* config/arm/arm.c (neon_builtin_data): Add entries for vcgeu
and vcgtu.
* config/arm/arm_neon.h: Regenerate.
* config/arm/neon.md (unspec): Add UNSPEC_VCGEU, and UNSPEC_VCGTU.
(neon_vcgeu): New insn.
(neon_vcgtu): Likewise.
* config/arm/neon.ml (s_8_32, u_8_32): New lists.
(ops): Unsigned comparison intrinsics call a different
builtin.
2012-02-28 Richard Guenther <rguenther@suse.de>
PR target/52407
......
......@@ -19104,7 +19104,9 @@ static neon_builtin_datum neon_builtin_data[] =
VAR3 (BINOP, vsubhn, v8hi, v4si, v2di),
VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
VAR6 (BINOP, vcgeu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
VAR6 (BINOP, vcgtu, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
VAR2 (BINOP, vcage, v2sf, v4sf),
VAR2 (BINOP, vcagt, v2sf, v4sf),
VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
......
......@@ -1893,19 +1893,19 @@ vcge_f32 (float32x2_t __a, float32x2_t __b)
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vcge_u8 (uint8x8_t __a, uint8x8_t __b)
{
return (uint8x8_t)__builtin_neon_vcgev8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
return (uint8x8_t)__builtin_neon_vcgeuv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
}
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vcge_u16 (uint16x4_t __a, uint16x4_t __b)
{
return (uint16x4_t)__builtin_neon_vcgev4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
return (uint16x4_t)__builtin_neon_vcgeuv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
}
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcge_u32 (uint32x2_t __a, uint32x2_t __b)
{
return (uint32x2_t)__builtin_neon_vcgev2si ((int32x2_t) __a, (int32x2_t) __b, 0);
return (uint32x2_t)__builtin_neon_vcgeuv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
}
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
......@@ -1935,19 +1935,19 @@ vcgeq_f32 (float32x4_t __a, float32x4_t __b)
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vcgeq_u8 (uint8x16_t __a, uint8x16_t __b)
{
return (uint8x16_t)__builtin_neon_vcgev16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
return (uint8x16_t)__builtin_neon_vcgeuv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
}
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vcgeq_u16 (uint16x8_t __a, uint16x8_t __b)
{
return (uint16x8_t)__builtin_neon_vcgev8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
return (uint16x8_t)__builtin_neon_vcgeuv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
}
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcgeq_u32 (uint32x4_t __a, uint32x4_t __b)
{
return (uint32x4_t)__builtin_neon_vcgev4si ((int32x4_t) __a, (int32x4_t) __b, 0);
return (uint32x4_t)__builtin_neon_vcgeuv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
}
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
......@@ -1977,19 +1977,19 @@ vcle_f32 (float32x2_t __a, float32x2_t __b)
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vcle_u8 (uint8x8_t __a, uint8x8_t __b)
{
return (uint8x8_t)__builtin_neon_vcgev8qi ((int8x8_t) __b, (int8x8_t) __a, 0);
return (uint8x8_t)__builtin_neon_vcgeuv8qi ((int8x8_t) __b, (int8x8_t) __a, 0);
}
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vcle_u16 (uint16x4_t __a, uint16x4_t __b)
{
return (uint16x4_t)__builtin_neon_vcgev4hi ((int16x4_t) __b, (int16x4_t) __a, 0);
return (uint16x4_t)__builtin_neon_vcgeuv4hi ((int16x4_t) __b, (int16x4_t) __a, 0);
}
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcle_u32 (uint32x2_t __a, uint32x2_t __b)
{
return (uint32x2_t)__builtin_neon_vcgev2si ((int32x2_t) __b, (int32x2_t) __a, 0);
return (uint32x2_t)__builtin_neon_vcgeuv2si ((int32x2_t) __b, (int32x2_t) __a, 0);
}
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
......@@ -2019,19 +2019,19 @@ vcleq_f32 (float32x4_t __a, float32x4_t __b)
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vcleq_u8 (uint8x16_t __a, uint8x16_t __b)
{
return (uint8x16_t)__builtin_neon_vcgev16qi ((int8x16_t) __b, (int8x16_t) __a, 0);
return (uint8x16_t)__builtin_neon_vcgeuv16qi ((int8x16_t) __b, (int8x16_t) __a, 0);
}
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vcleq_u16 (uint16x8_t __a, uint16x8_t __b)
{
return (uint16x8_t)__builtin_neon_vcgev8hi ((int16x8_t) __b, (int16x8_t) __a, 0);
return (uint16x8_t)__builtin_neon_vcgeuv8hi ((int16x8_t) __b, (int16x8_t) __a, 0);
}
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcleq_u32 (uint32x4_t __a, uint32x4_t __b)
{
return (uint32x4_t)__builtin_neon_vcgev4si ((int32x4_t) __b, (int32x4_t) __a, 0);
return (uint32x4_t)__builtin_neon_vcgeuv4si ((int32x4_t) __b, (int32x4_t) __a, 0);
}
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
......@@ -2061,19 +2061,19 @@ vcgt_f32 (float32x2_t __a, float32x2_t __b)
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vcgt_u8 (uint8x8_t __a, uint8x8_t __b)
{
return (uint8x8_t)__builtin_neon_vcgtv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
return (uint8x8_t)__builtin_neon_vcgtuv8qi ((int8x8_t) __a, (int8x8_t) __b, 0);
}
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vcgt_u16 (uint16x4_t __a, uint16x4_t __b)
{
return (uint16x4_t)__builtin_neon_vcgtv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
return (uint16x4_t)__builtin_neon_vcgtuv4hi ((int16x4_t) __a, (int16x4_t) __b, 0);
}
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vcgt_u32 (uint32x2_t __a, uint32x2_t __b)
{
return (uint32x2_t)__builtin_neon_vcgtv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
return (uint32x2_t)__builtin_neon_vcgtuv2si ((int32x2_t) __a, (int32x2_t) __b, 0);
}
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
......@@ -2103,19 +2103,19 @@ vcgtq_f32 (float32x4_t __a, float32x4_t __b)
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vcgtq_u8 (uint8x16_t __a, uint8x16_t __b)
{
return (uint8x16_t)__builtin_neon_vcgtv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
return (uint8x16_t)__builtin_neon_vcgtuv16qi ((int8x16_t) __a, (int8x16_t) __b, 0);
}
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vcgtq_u16 (uint16x8_t __a, uint16x8_t __b)
{
return (uint16x8_t)__builtin_neon_vcgtv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
return (uint16x8_t)__builtin_neon_vcgtuv8hi ((int16x8_t) __a, (int16x8_t) __b, 0);
}
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcgtq_u32 (uint32x4_t __a, uint32x4_t __b)
{
return (uint32x4_t)__builtin_neon_vcgtv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
return (uint32x4_t)__builtin_neon_vcgtuv4si ((int32x4_t) __a, (int32x4_t) __b, 0);
}
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
......@@ -2145,19 +2145,19 @@ vclt_f32 (float32x2_t __a, float32x2_t __b)
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
vclt_u8 (uint8x8_t __a, uint8x8_t __b)
{
return (uint8x8_t)__builtin_neon_vcgtv8qi ((int8x8_t) __b, (int8x8_t) __a, 0);
return (uint8x8_t)__builtin_neon_vcgtuv8qi ((int8x8_t) __b, (int8x8_t) __a, 0);
}
__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
vclt_u16 (uint16x4_t __a, uint16x4_t __b)
{
return (uint16x4_t)__builtin_neon_vcgtv4hi ((int16x4_t) __b, (int16x4_t) __a, 0);
return (uint16x4_t)__builtin_neon_vcgtuv4hi ((int16x4_t) __b, (int16x4_t) __a, 0);
}
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
vclt_u32 (uint32x2_t __a, uint32x2_t __b)
{
return (uint32x2_t)__builtin_neon_vcgtv2si ((int32x2_t) __b, (int32x2_t) __a, 0);
return (uint32x2_t)__builtin_neon_vcgtuv2si ((int32x2_t) __b, (int32x2_t) __a, 0);
}
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
......@@ -2187,19 +2187,19 @@ vcltq_f32 (float32x4_t __a, float32x4_t __b)
__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
vcltq_u8 (uint8x16_t __a, uint8x16_t __b)
{
return (uint8x16_t)__builtin_neon_vcgtv16qi ((int8x16_t) __b, (int8x16_t) __a, 0);
return (uint8x16_t)__builtin_neon_vcgtuv16qi ((int8x16_t) __b, (int8x16_t) __a, 0);
}
__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
vcltq_u16 (uint16x8_t __a, uint16x8_t __b)
{
return (uint16x8_t)__builtin_neon_vcgtv8hi ((int16x8_t) __b, (int16x8_t) __a, 0);
return (uint16x8_t)__builtin_neon_vcgtuv8hi ((int16x8_t) __b, (int16x8_t) __a, 0);
}
__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
vcltq_u32 (uint32x4_t __a, uint32x4_t __b)
{
return (uint32x4_t)__builtin_neon_vcgtv4si ((int32x4_t) __b, (int32x4_t) __a, 0);
return (uint32x4_t)__builtin_neon_vcgtuv4si ((int32x4_t) __b, (int32x4_t) __a, 0);
}
__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
......
......@@ -34,7 +34,9 @@
UNSPEC_VCAGT
UNSPEC_VCEQ
UNSPEC_VCGE
UNSPEC_VCGEU
UNSPEC_VCGT
UNSPEC_VCGTU
UNSPEC_VCLS
UNSPEC_VCONCAT
UNSPEC_VCVT
......@@ -2146,6 +2148,18 @@
(const_string "neon_int_5")))]
)
(define_insn "neon_vcgeu<mode>"
[(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
(unspec:<V_cmp_result>
[(match_operand:VDQIW 1 "s_register_operand" "w")
(match_operand:VDQIW 2 "s_register_operand" "w")
(match_operand:SI 3 "immediate_operand" "i")]
UNSPEC_VCGEU))]
"TARGET_NEON"
"vcge.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
[(set_attr "neon_type" "neon_int_5")]
)
(define_insn "neon_vcgt<mode>"
[(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
(unspec:<V_cmp_result>
......@@ -2165,6 +2179,18 @@
(const_string "neon_int_5")))]
)
(define_insn "neon_vcgtu<mode>"
[(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
(unspec:<V_cmp_result>
[(match_operand:VDQIW 1 "s_register_operand" "w")
(match_operand:VDQIW 2 "s_register_operand" "w")
(match_operand:SI 3 "immediate_operand" "i")]
UNSPEC_VCGTU))]
"TARGET_NEON"
"vcgt.%T3%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
[(set_attr "neon_type" "neon_int_5")]
)
;; VCLE and VCLT only support comparisons with immediate zero (register
;; variants are VCGE and VCGT with operands reversed).
......
......@@ -700,6 +700,8 @@ let bit_select shape elt =
(* Common lists of supported element types. *)
let s_8_32 = [S8; S16; S32]
let u_8_32 = [U8; U16; U32]
let su_8_32 = [S8; S16; S32; U8; U16; U32]
let su_8_64 = S64 :: U64 :: su_8_32
let su_16_64 = [S16; S32; S64; U16; U32; U64]
......@@ -777,26 +779,40 @@ let ops =
Vceq, [], All (3, Qreg), "vceqQ", cmp_sign_invar, P8 :: F32 :: su_8_32;
(* Comparison, greater-than or equal. *)
Vcge, [], All (3, Dreg), "vcge", cmp_sign_matters, F32 :: su_8_32;
Vcge, [], All (3, Qreg), "vcgeQ", cmp_sign_matters, F32 :: su_8_32;
Vcge, [], All (3, Dreg), "vcge", cmp_sign_matters, F32 :: s_8_32;
Vcge, [Builtin_name "vcgeu"], All (3, Dreg), "vcge", cmp_sign_matters, u_8_32;
Vcge, [], All (3, Qreg), "vcgeQ", cmp_sign_matters, F32 :: s_8_32;
Vcge, [Builtin_name "vcgeu"], All (3, Qreg), "vcgeQ", cmp_sign_matters, u_8_32;
(* Comparison, less-than or equal. *)
Vcle, [Flipped "vcge"], All (3, Dreg), "vcle", cmp_sign_matters,
F32 :: su_8_32;
F32 :: s_8_32;
Vcle, [Flipped "vcgeu"], All (3, Dreg), "vcle", cmp_sign_matters,
u_8_32;
Vcle, [Instruction_name ["vcge"]; Flipped "vcgeQ"],
All (3, Qreg), "vcleQ", cmp_sign_matters,
F32 :: su_8_32;
F32 :: s_8_32;
Vcle, [Instruction_name ["vcge"]; Flipped "vcgeuQ"],
All (3, Qreg), "vcleQ", cmp_sign_matters,
u_8_32;
(* Comparison, greater-than. *)
Vcgt, [], All (3, Dreg), "vcgt", cmp_sign_matters, F32 :: su_8_32;
Vcgt, [], All (3, Qreg), "vcgtQ", cmp_sign_matters, F32 :: su_8_32;
Vcgt, [], All (3, Dreg), "vcgt", cmp_sign_matters, F32 :: s_8_32;
Vcgt, [Builtin_name "vcgtu"], All (3, Dreg), "vcgt", cmp_sign_matters, u_8_32;
Vcgt, [], All (3, Qreg), "vcgtQ", cmp_sign_matters, F32 :: s_8_32;
Vcgt, [Builtin_name "vcgtu"], All (3, Qreg), "vcgtQ", cmp_sign_matters, u_8_32;
(* Comparison, less-than. *)
Vclt, [Flipped "vcgt"], All (3, Dreg), "vclt", cmp_sign_matters,
F32 :: su_8_32;
F32 :: s_8_32;
Vclt, [Flipped "vcgtu"], All (3, Dreg), "vclt", cmp_sign_matters,
u_8_32;
Vclt, [Instruction_name ["vcgt"]; Flipped "vcgtQ"],
All (3, Qreg), "vcltQ", cmp_sign_matters,
F32 :: su_8_32;
F32 :: s_8_32;
Vclt, [Instruction_name ["vcgt"]; Flipped "vcgtuQ"],
All (3, Qreg), "vcltQ", cmp_sign_matters,
u_8_32;
(* Compare absolute greater-than or equal. *)
Vcage, [Instruction_name ["vacge"]],
......
2012-02-28 Matthew Gretton-Dann <matthew.gretton-dann@arm.com>
PR target/51534
* gcc.target/arm/neon/pr51534.c: New testcase.
2012-02-28 Richard Guenther <rguenther@suse.de>
PR target/52407
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment