Commit 862abc04 by Alan Lawrence Committed by Alan Lawrence

[AArch64] Improve code generation for float16 vector code

gcc/:

	* config/aarch64/aarch64-simd.md (aarch64_simd_dup<mode>,
	aarch64_dup_lane<mode>, aarch64_dup_lane_<vswap_width_name><mode>,
	aarch64_simd_vec_set<mode>, vec_set<mode>, vec_perm_const<mode>,
	vec_init<mode>, *aarch64_simd_ld1r<mode>, vec_extract<mode>): Add
	V4HF and V8HF variants to iterator.

	* config/aarch64/aarch64.c (aarch64_evpc_dup): Add V4HF and V8HF cases.

	* config/aarch64/iterators.md (VDQF_F16): New.
	(VSWAP_WIDTH, vswap_width_name): Add V4HF and V8HF cases.

From-SVN: r227550
parent 7171dc86
2015-09-08 Alan Lawrence <alan.lawrence@arm.com> 2015-09-08 Alan Lawrence <alan.lawrence@arm.com>
* config/aarch64/aarch64-simd.md (aarch64_simd_dup<mode>,
aarch64_dup_lane<mode>, aarch64_dup_lane_<vswap_width_name><mode>,
aarch64_simd_vec_set<mode>, vec_set<mode>, vec_perm_const<mode>,
vec_init<mode>, *aarch64_simd_ld1r<mode>, vec_extract<mode>): Add
V4HF and V8HF variants to iterator.
* config/aarch64/aarch64.c (aarch64_evpc_dup): Add V4HF and V8HF cases.
* config/aarch64/iterators.md (VDQF_F16): New.
(VSWAP_WIDTH, vswap_width_name): Add V4HF and V8HF cases.
2015-09-08 Alan Lawrence <alan.lawrence@arm.com>
* config/aarch64/arm_neon.h (vreinterpret_p8_f16, vreinterpret_p16_f16, * config/aarch64/arm_neon.h (vreinterpret_p8_f16, vreinterpret_p16_f16,
vreinterpret_f16_f64, vreinterpret_f16_s8, vreinterpret_f16_s16, vreinterpret_f16_f64, vreinterpret_f16_s8, vreinterpret_f16_s16,
vreinterpret_f16_s32, vreinterpret_f16_s64, vreinterpret_f16_f32, vreinterpret_f16_s32, vreinterpret_f16_s64, vreinterpret_f16_f32,
......
...@@ -53,18 +53,19 @@ ...@@ -53,18 +53,19 @@
) )
(define_insn "aarch64_simd_dup<mode>" (define_insn "aarch64_simd_dup<mode>"
[(set (match_operand:VDQF 0 "register_operand" "=w") [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
(vec_duplicate:VDQF (match_operand:<VEL> 1 "register_operand" "w")))] (vec_duplicate:VDQF_F16
(match_operand:<VEL> 1 "register_operand" "w")))]
"TARGET_SIMD" "TARGET_SIMD"
"dup\\t%0.<Vtype>, %1.<Vetype>[0]" "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
[(set_attr "type" "neon_dup<q>")] [(set_attr "type" "neon_dup<q>")]
) )
(define_insn "aarch64_dup_lane<mode>" (define_insn "aarch64_dup_lane<mode>"
[(set (match_operand:VALL 0 "register_operand" "=w") [(set (match_operand:VALL_F16 0 "register_operand" "=w")
(vec_duplicate:VALL (vec_duplicate:VALL_F16
(vec_select:<VEL> (vec_select:<VEL>
(match_operand:VALL 1 "register_operand" "w") (match_operand:VALL_F16 1 "register_operand" "w")
(parallel [(match_operand:SI 2 "immediate_operand" "i")]) (parallel [(match_operand:SI 2 "immediate_operand" "i")])
)))] )))]
"TARGET_SIMD" "TARGET_SIMD"
...@@ -76,8 +77,8 @@ ...@@ -76,8 +77,8 @@
) )
(define_insn "aarch64_dup_lane_<vswap_width_name><mode>" (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
[(set (match_operand:VALL 0 "register_operand" "=w") [(set (match_operand:VALL_F16 0 "register_operand" "=w")
(vec_duplicate:VALL (vec_duplicate:VALL_F16
(vec_select:<VEL> (vec_select:<VEL>
(match_operand:<VSWAP_WIDTH> 1 "register_operand" "w") (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
(parallel [(match_operand:SI 2 "immediate_operand" "i")]) (parallel [(match_operand:SI 2 "immediate_operand" "i")])
...@@ -834,11 +835,11 @@ ...@@ -834,11 +835,11 @@
) )
(define_insn "aarch64_simd_vec_set<mode>" (define_insn "aarch64_simd_vec_set<mode>"
[(set (match_operand:VDQF 0 "register_operand" "=w") [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
(vec_merge:VDQF (vec_merge:VDQF_F16
(vec_duplicate:VDQF (vec_duplicate:VDQF_F16
(match_operand:<VEL> 1 "register_operand" "w")) (match_operand:<VEL> 1 "register_operand" "w"))
(match_operand:VDQF 3 "register_operand" "0") (match_operand:VDQF_F16 3 "register_operand" "0")
(match_operand:SI 2 "immediate_operand" "i")))] (match_operand:SI 2 "immediate_operand" "i")))]
"TARGET_SIMD" "TARGET_SIMD"
{ {
...@@ -851,7 +852,7 @@ ...@@ -851,7 +852,7 @@
) )
(define_expand "vec_set<mode>" (define_expand "vec_set<mode>"
[(match_operand:VDQF 0 "register_operand" "+w") [(match_operand:VDQF_F16 0 "register_operand" "+w")
(match_operand:<VEL> 1 "register_operand" "w") (match_operand:<VEL> 1 "register_operand" "w")
(match_operand:SI 2 "immediate_operand" "")] (match_operand:SI 2 "immediate_operand" "")]
"TARGET_SIMD" "TARGET_SIMD"
...@@ -4670,9 +4671,9 @@ ...@@ -4670,9 +4671,9 @@
;; vec_perm support ;; vec_perm support
(define_expand "vec_perm_const<mode>" (define_expand "vec_perm_const<mode>"
[(match_operand:VALL 0 "register_operand") [(match_operand:VALL_F16 0 "register_operand")
(match_operand:VALL 1 "register_operand") (match_operand:VALL_F16 1 "register_operand")
(match_operand:VALL 2 "register_operand") (match_operand:VALL_F16 2 "register_operand")
(match_operand:<V_cmp_result> 3)] (match_operand:<V_cmp_result> 3)]
"TARGET_SIMD" "TARGET_SIMD"
{ {
...@@ -4936,7 +4937,7 @@ ...@@ -4936,7 +4937,7 @@
;; Standard pattern name vec_init<mode>. ;; Standard pattern name vec_init<mode>.
(define_expand "vec_init<mode>" (define_expand "vec_init<mode>"
[(match_operand:VALL 0 "register_operand" "") [(match_operand:VALL_F16 0 "register_operand" "")
(match_operand 1 "" "")] (match_operand 1 "" "")]
"TARGET_SIMD" "TARGET_SIMD"
{ {
...@@ -4945,8 +4946,8 @@ ...@@ -4945,8 +4946,8 @@
}) })
(define_insn "*aarch64_simd_ld1r<mode>" (define_insn "*aarch64_simd_ld1r<mode>"
[(set (match_operand:VALL 0 "register_operand" "=w") [(set (match_operand:VALL_F16 0 "register_operand" "=w")
(vec_duplicate:VALL (vec_duplicate:VALL_F16
(match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))] (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
"TARGET_SIMD" "TARGET_SIMD"
"ld1r\\t{%0.<Vtype>}, %1" "ld1r\\t{%0.<Vtype>}, %1"
...@@ -4993,7 +4994,7 @@ ...@@ -4993,7 +4994,7 @@
(define_expand "vec_extract<mode>" (define_expand "vec_extract<mode>"
[(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "") [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
(match_operand:VALL 1 "register_operand" "") (match_operand:VALL_F16 1 "register_operand" "")
(match_operand:SI 2 "immediate_operand" "")] (match_operand:SI 2 "immediate_operand" "")]
"TARGET_SIMD" "TARGET_SIMD"
{ {
......
...@@ -11906,6 +11906,8 @@ aarch64_evpc_dup (struct expand_vec_perm_d *d) ...@@ -11906,6 +11906,8 @@ aarch64_evpc_dup (struct expand_vec_perm_d *d)
case V4SImode: gen = gen_aarch64_dup_lanev4si; break; case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
case V2SImode: gen = gen_aarch64_dup_lanev2si; break; case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
case V2DImode: gen = gen_aarch64_dup_lanev2di; break; case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
case V8HFmode: gen = gen_aarch64_dup_lanev8hf; break;
case V4HFmode: gen = gen_aarch64_dup_lanev4hf; break;
case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break; case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break; case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
case V2DFmode: gen = gen_aarch64_dup_lanev2df; break; case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
......
...@@ -82,7 +82,10 @@ ...@@ -82,7 +82,10 @@
;; pointer-sized quantities. Exactly one of the two alternatives will match. ;; pointer-sized quantities. Exactly one of the two alternatives will match.
(define_mode_iterator PTR [(SI "ptr_mode == SImode") (DI "ptr_mode == DImode")]) (define_mode_iterator PTR [(SI "ptr_mode == SImode") (DI "ptr_mode == DImode")])
;; Vector Float modes. ;; Vector Float modes suitable for moving, loading and storing.
(define_mode_iterator VDQF_F16 [V4HF V8HF V2SF V4SF V2DF])
;; Vector Float modes, barring HF modes.
(define_mode_iterator VDQF [V2SF V4SF V2DF]) (define_mode_iterator VDQF [V2SF V4SF V2DF])
;; Vector Float modes, and DF. ;; Vector Float modes, and DF.
...@@ -627,12 +630,14 @@ ...@@ -627,12 +630,14 @@
(V2SI "V4SI") (V4SI "V2SI") (V2SI "V4SI") (V4SI "V2SI")
(DI "V2DI") (V2DI "DI") (DI "V2DI") (V2DI "DI")
(V2SF "V4SF") (V4SF "V2SF") (V2SF "V4SF") (V4SF "V2SF")
(V4HF "V8HF") (V8HF "V4HF")
(DF "V2DF") (V2DF "DF")]) (DF "V2DF") (V2DF "DF")])
(define_mode_attr vswap_width_name [(V8QI "to_128") (V16QI "to_64") (define_mode_attr vswap_width_name [(V8QI "to_128") (V16QI "to_64")
(V4HI "to_128") (V8HI "to_64") (V4HI "to_128") (V8HI "to_64")
(V2SI "to_128") (V4SI "to_64") (V2SI "to_128") (V4SI "to_64")
(DI "to_128") (V2DI "to_64") (DI "to_128") (V2DI "to_64")
(V4HF "to_128") (V8HF "to_64")
(V2SF "to_128") (V4SF "to_64") (V2SF "to_128") (V4SF "to_64")
(DF "to_128") (V2DF "to_64")]) (DF "to_128") (V2DF "to_64")])
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment