Commit 92422235 by Christophe Lyon Committed by Christophe Lyon

re PR target/68620 (ICE on gcc.target/arm/attr-neon-fp16.c)

2016-01-26  Christophe Lyon  <christophe.lyon@linaro.org>

	gcc/
	PR target/68620
	* config/arm/arm.c (neon_valid_immediate): Handle FP16 vectors.
	* config/arm/arm_neon.h (__ARM_NUM_LANES, __arm_lane, arm_lanq):
	New helper macros.
	(vget_lane_f16): Handle big-endian.
	(vgetq_lane_f16): Likewise.
	(vset_lane_f16): Likewise.
	(vsetq_lane_f16): Likewise.
	* config/arm/iterators.md (VQXMOV): Add V8HF.
	(VDQ): Add V4HF and V8HF.
	(V_reg): Handle V4HF and V8HF.
	(Is_float_mode): Likewise.
	* config/arm/neon.md (movv4hf, movv8hf, neon_vdup_nv4hf,
	neon_vdup_nv8hf): New patterns.
	(vec_set<mode>_internal, vec_extract<mode>, neon_vld1_dup<mode>):
	Use VD_LANE iterator.
	(neon_vld1_dup<mode>): Use VQ2 iterator.

	testsuite/
	PR target/68620
	* gcc.target/arm/pr68620.c: New test.

From-SVN: r232832
parent 0bccf11b
2016-01-26 Christophe Lyon <christophe.lyon@linaro.org>
PR target/68620
* config/arm/arm.c (neon_valid_immediate): Handle FP16 vectors.
* config/arm/arm_neon.h (__ARM_NUM_LANES, __arm_lane, arm_lanq):
New helper macros.
(vget_lane_f16): Handle big-endian.
(vgetq_lane_f16): Likewise.
(vset_lane_f16): Likewise.
(vsetq_lane_f16): Likewise.
* config/arm/iterators.md (VQXMOV): Add V8HF.
(VDQ): Add V4HF and V8HF.
(V_reg): Handle V4HF and V8HF.
(Is_float_mode): Likewise.
* config/arm/neon.md (movv4hf, movv8hf, neon_vdup_nv4hf,
neon_vdup_nv8hf): New patterns.
(vec_set<mode>_internal, vec_extract<mode>, neon_vld1_dup<mode>):
Use VD_LANE iterator.
(neon_vld1_dup<mode>): Use VQ2 iterator.
2016-01-26 Nathan Sidwell <nathan@acm.org> 2016-01-26 Nathan Sidwell <nathan@acm.org>
* omp-low.h (oacc_fn_attrib_kernels_p): Declare. * omp-low.h (oacc_fn_attrib_kernels_p): Declare.
......
...@@ -12381,6 +12381,10 @@ neon_valid_immediate (rtx op, machine_mode mode, int inverse, ...@@ -12381,6 +12381,10 @@ neon_valid_immediate (rtx op, machine_mode mode, int inverse,
if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0))) if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
return -1; return -1;
/* FP16 vectors cannot be represented. */
if (GET_MODE_INNER (mode) == HFmode)
return -1;
r0 = CONST_DOUBLE_REAL_VALUE (el0); r0 = CONST_DOUBLE_REAL_VALUE (el0);
for (i = 1; i < n_elts; i++) for (i = 1; i < n_elts; i++)
......
...@@ -5302,14 +5302,26 @@ vget_lane_s32 (int32x2_t __a, const int __b) ...@@ -5302,14 +5302,26 @@ vget_lane_s32 (int32x2_t __a, const int __b)
were marked always-inline so there were no call sites, the declaration were marked always-inline so there were no call sites, the declaration
would nonetheless raise an error. Hence, we must use a macro instead. */ would nonetheless raise an error. Hence, we must use a macro instead. */
#define vget_lane_f16(__v, __idx) \ /* For big-endian, GCC's vector indices are reversed within each 64
__extension__ \ bits compared to the architectural lane indices used by Neon
({ \ intrinsics. */
float16x4_t __vec = (__v); \ #ifdef __ARM_BIG_ENDIAN
__builtin_arm_lane_check (4, __idx); \ #define __ARM_NUM_LANES(__v) (sizeof (__v) / sizeof (__v[0]))
float16_t __res = __vec[__idx]; \ #define __arm_lane(__vec, __idx) (__idx ^ (__ARM_NUM_LANES(__vec) - 1))
__res; \ #define __arm_laneq(__vec, __idx) (__idx ^ (__ARM_NUM_LANES(__vec)/2 - 1))
}) #else
#define __arm_lane(__vec, __idx) __idx
#define __arm_laneq(__vec, __idx) __idx
#endif
#define vget_lane_f16(__v, __idx) \
__extension__ \
({ \
float16x4_t __vec = (__v); \
__builtin_arm_lane_check (4, __idx); \
float16_t __res = __vec[__arm_lane(__vec, __idx)]; \
__res; \
})
#endif #endif
__extension__ static __inline float32_t __attribute__ ((__always_inline__)) __extension__ static __inline float32_t __attribute__ ((__always_inline__))
...@@ -5379,14 +5391,14 @@ vgetq_lane_s32 (int32x4_t __a, const int __b) ...@@ -5379,14 +5391,14 @@ vgetq_lane_s32 (int32x4_t __a, const int __b)
} }
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
#define vgetq_lane_f16(__v, __idx) \ #define vgetq_lane_f16(__v, __idx) \
__extension__ \ __extension__ \
({ \ ({ \
float16x8_t __vec = (__v); \ float16x8_t __vec = (__v); \
__builtin_arm_lane_check (8, __idx); \ __builtin_arm_lane_check (8, __idx); \
float16_t __res = __vec[__idx]; \ float16_t __res = __vec[__arm_laneq(__vec, __idx)]; \
__res; \ __res; \
}) })
#endif #endif
__extension__ static __inline float32_t __attribute__ ((__always_inline__)) __extension__ static __inline float32_t __attribute__ ((__always_inline__))
...@@ -5458,13 +5470,13 @@ vset_lane_s32 (int32_t __a, int32x2_t __b, const int __c) ...@@ -5458,13 +5470,13 @@ vset_lane_s32 (int32_t __a, int32x2_t __b, const int __c)
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
#define vset_lane_f16(__e, __v, __idx) \ #define vset_lane_f16(__e, __v, __idx) \
__extension__ \ __extension__ \
({ \ ({ \
float16_t __elem = (__e); \ float16_t __elem = (__e); \
float16x4_t __vec = (__v); \ float16x4_t __vec = (__v); \
__builtin_arm_lane_check (4, __idx); \ __builtin_arm_lane_check (4, __idx); \
__vec[__idx] = __elem; \ __vec[__arm_lane (__vec, __idx)] = __elem; \
__vec; \ __vec; \
}) })
#endif #endif
__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
...@@ -5536,13 +5548,13 @@ vsetq_lane_s32 (int32_t __a, int32x4_t __b, const int __c) ...@@ -5536,13 +5548,13 @@ vsetq_lane_s32 (int32_t __a, int32x4_t __b, const int __c)
#if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE)
#define vsetq_lane_f16(__e, __v, __idx) \ #define vsetq_lane_f16(__e, __v, __idx) \
__extension__ \ __extension__ \
({ \ ({ \
float16_t __elem = (__e); \ float16_t __elem = (__e); \
float16x8_t __vec = (__v); \ float16x8_t __vec = (__v); \
__builtin_arm_lane_check (8, __idx); \ __builtin_arm_lane_check (8, __idx); \
__vec[__idx] = __elem; \ __vec[__arm_laneq (__vec, __idx)] = __elem; \
__vec; \ __vec; \
}) })
#endif #endif
__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) __extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
......
...@@ -99,7 +99,7 @@ ...@@ -99,7 +99,7 @@
(define_mode_iterator VQI [V16QI V8HI V4SI]) (define_mode_iterator VQI [V16QI V8HI V4SI])
;; Quad-width vector modes, with TImode added, for moves. ;; Quad-width vector modes, with TImode added, for moves.
(define_mode_iterator VQXMOV [V16QI V8HI V4SI V4SF V2DI TI]) (define_mode_iterator VQXMOV [V16QI V8HI V8HF V4SI V4SF V2DI TI])
;; Opaque structure types wider than TImode. ;; Opaque structure types wider than TImode.
(define_mode_iterator VSTRUCT [EI OI CI XI]) (define_mode_iterator VSTRUCT [EI OI CI XI])
...@@ -114,7 +114,7 @@ ...@@ -114,7 +114,7 @@
(define_mode_iterator VN [V8HI V4SI V2DI]) (define_mode_iterator VN [V8HI V4SI V2DI])
;; All supported vector modes (except singleton DImode). ;; All supported vector modes (except singleton DImode).
(define_mode_iterator VDQ [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF V2DI]) (define_mode_iterator VDQ [V8QI V16QI V4HI V8HI V2SI V4SI V4HF V8HF V2SF V4SF V2DI])
;; All supported vector modes (except those with 64-bit integer elements). ;; All supported vector modes (except those with 64-bit integer elements).
(define_mode_iterator VDQW [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF]) (define_mode_iterator VDQW [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF])
...@@ -428,6 +428,7 @@ ...@@ -428,6 +428,7 @@
;; Register width from element mode ;; Register width from element mode
(define_mode_attr V_reg [(V8QI "P") (V16QI "q") (define_mode_attr V_reg [(V8QI "P") (V16QI "q")
(V4HI "P") (V8HI "q") (V4HI "P") (V8HI "q")
(V4HF "P") (V8HF "q")
(V2SI "P") (V4SI "q") (V2SI "P") (V4SI "q")
(V2SF "P") (V4SF "q") (V2SF "P") (V4SF "q")
(DI "P") (V2DI "q") (DI "P") (V2DI "q")
...@@ -576,6 +577,7 @@ ...@@ -576,6 +577,7 @@
(define_mode_attr Is_float_mode [(V8QI "false") (V16QI "false") (define_mode_attr Is_float_mode [(V8QI "false") (V16QI "false")
(V4HI "false") (V8HI "false") (V4HI "false") (V8HI "false")
(V2SI "false") (V4SI "false") (V2SI "false") (V4SI "false")
(V4HF "true") (V8HF "true")
(V2SF "true") (V4SF "true") (V2SF "true") (V4SF "true")
(DI "false") (V2DI "false")]) (DI "false") (V2DI "false")])
......
...@@ -137,6 +137,36 @@ ...@@ -137,6 +137,36 @@
} }
}) })
(define_expand "movv4hf"
[(set (match_operand:V4HF 0 "s_register_operand")
(match_operand:V4HF 1 "s_register_operand"))]
"TARGET_NEON && TARGET_FP16"
{
/* We need to use force_reg to avoid CANNOT_CHANGE_MODE_CLASS
causing an ICE on big-endian because it cannot extract subregs in
this case. */
if (can_create_pseudo_p ())
{
if (!REG_P (operands[0]))
operands[1] = force_reg (V4HFmode, operands[1]);
}
})
(define_expand "movv8hf"
[(set (match_operand:V8HF 0 "")
(match_operand:V8HF 1 ""))]
"TARGET_NEON && TARGET_FP16"
{
/* We need to use force_reg to avoid CANNOT_CHANGE_MODE_CLASS
causing an ICE on big-endian because it cannot extract subregs in
this case. */
if (can_create_pseudo_p ())
{
if (!REG_P (operands[0]))
operands[1] = force_reg (V8HFmode, operands[1]);
}
})
(define_insn "*neon_mov<mode>" (define_insn "*neon_mov<mode>"
[(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w") [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w")
(match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))] (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))]
...@@ -299,11 +329,11 @@ ...@@ -299,11 +329,11 @@
[(set_attr "type" "neon_load1_1reg<q>")]) [(set_attr "type" "neon_load1_1reg<q>")])
(define_insn "vec_set<mode>_internal" (define_insn "vec_set<mode>_internal"
[(set (match_operand:VD 0 "s_register_operand" "=w,w") [(set (match_operand:VD_LANE 0 "s_register_operand" "=w,w")
(vec_merge:VD (vec_merge:VD_LANE
(vec_duplicate:VD (vec_duplicate:VD_LANE
(match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r")) (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
(match_operand:VD 3 "s_register_operand" "0,0") (match_operand:VD_LANE 3 "s_register_operand" "0,0")
(match_operand:SI 2 "immediate_operand" "i,i")))] (match_operand:SI 2 "immediate_operand" "i,i")))]
"TARGET_NEON" "TARGET_NEON"
{ {
...@@ -385,7 +415,7 @@ ...@@ -385,7 +415,7 @@
(define_insn "vec_extract<mode>" (define_insn "vec_extract<mode>"
[(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r") [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
(vec_select:<V_elem> (vec_select:<V_elem>
(match_operand:VD 1 "s_register_operand" "w,w") (match_operand:VD_LANE 1 "s_register_operand" "w,w")
(parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))] (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
"TARGET_NEON" "TARGET_NEON"
{ {
...@@ -2829,6 +2859,22 @@ if (BYTES_BIG_ENDIAN) ...@@ -2829,6 +2859,22 @@ if (BYTES_BIG_ENDIAN)
[(set_attr "type" "neon_from_gp<q>")] [(set_attr "type" "neon_from_gp<q>")]
) )
(define_insn "neon_vdup_nv4hf"
[(set (match_operand:V4HF 0 "s_register_operand" "=w")
(vec_duplicate:V4HF (match_operand:HF 1 "s_register_operand" "r")))]
"TARGET_NEON"
"vdup.16\t%P0, %1"
[(set_attr "type" "neon_from_gp")]
)
(define_insn "neon_vdup_nv8hf"
[(set (match_operand:V8HF 0 "s_register_operand" "=w")
(vec_duplicate:V8HF (match_operand:HF 1 "s_register_operand" "r")))]
"TARGET_NEON"
"vdup.16\t%q0, %1"
[(set_attr "type" "neon_from_gp_q")]
)
(define_insn "neon_vdup_n<mode>" (define_insn "neon_vdup_n<mode>"
[(set (match_operand:V32 0 "s_register_operand" "=w,w") [(set (match_operand:V32 0 "s_register_operand" "=w,w")
(vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))] (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
...@@ -4361,8 +4407,8 @@ if (BYTES_BIG_ENDIAN) ...@@ -4361,8 +4407,8 @@ if (BYTES_BIG_ENDIAN)
) )
(define_insn "neon_vld1_dup<mode>" (define_insn "neon_vld1_dup<mode>"
[(set (match_operand:VD 0 "s_register_operand" "=w") [(set (match_operand:VD_LANE 0 "s_register_operand" "=w")
(vec_duplicate:VD (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))] (vec_duplicate:VD_LANE (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
"TARGET_NEON" "TARGET_NEON"
"vld1.<V_sz_elem>\t{%P0[]}, %A1" "vld1.<V_sz_elem>\t{%P0[]}, %A1"
[(set_attr "type" "neon_load1_all_lanes<q>")] [(set_attr "type" "neon_load1_all_lanes<q>")]
...@@ -4378,8 +4424,8 @@ if (BYTES_BIG_ENDIAN) ...@@ -4378,8 +4424,8 @@ if (BYTES_BIG_ENDIAN)
) )
(define_insn "neon_vld1_dup<mode>" (define_insn "neon_vld1_dup<mode>"
[(set (match_operand:VQ 0 "s_register_operand" "=w") [(set (match_operand:VQ2 0 "s_register_operand" "=w")
(vec_duplicate:VQ (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))] (vec_duplicate:VQ2 (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
"TARGET_NEON" "TARGET_NEON"
{ {
return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1"; return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
......
2016-01-26 Christophe Lyon <christophe.lyon@linaro.org>
PR target/68620
* gcc.target/arm/pr68620.c: New test.
2016-01-26 H.J. Lu <hongjiu.lu@intel.com> 2016-01-26 H.J. Lu <hongjiu.lu@intel.com>
* gcc.target/i386/pr68986-2.c: Remove -m32. * gcc.target/i386/pr68986-2.c: Remove -m32.
......
/* { dg-do compile } */
/* { dg-require-effective-target arm_fp_ok } */
/* { dg-options "-mfp16-format=ieee" } */
/* { dg-add-options arm_fp } */
#include "arm_neon.h"
float16x4_t __attribute__((target("fpu=neon-fp16")))
foo (float32x4_t arg)
{
return vcvt_f16_f32 (arg);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment