Commit c15893df by Richard Sandiford

aarch64: Add vector/vector vec_extract patterns [PR92822]

Part of the problem in this PR is that we don't provide patterns
to extract a 64-bit vector from one half of a 128-bit vector.
Adding them fixes:

FAIL: gcc.target/aarch64/fmul_intrinsic_1.c scan-assembler-times fmul\\td[0-9]+, d[0-9]+, d[0-9]+ 1
FAIL: gcc.target/aarch64/fmul_intrinsic_1.c scan-assembler-times fmul\\tv[0-9]+.2d, v[0-9]+.2d, v[0-9]+.d\\[[0-9]+\\] 3

The 2s failures need target-independent changes, after which they rely
on these patterns too.

2020-01-27  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
	PR target/92822
	* config/aarch64/aarch64-simd.md (aarch64_get_half<mode>): New
	expander.
	(@aarch64_split_simd_mov<mode>): Use it.
	(aarch64_simd_mov_from_<mode>low): Add a GPR alternative.
	Leave the vec_extract patterns to handle 2-element vectors.
	(aarch64_simd_mov_from_<mode>high): Likewise.
	(vec_extract<VQMOV_NO2E:mode><Vhalf>): New expander.
	(vec_extractv2dfv1df): Likewise.
parent e2a14bec
2020-01-27 Richard Sandiford <richard.sandiford@arm.com> 2020-01-27 Richard Sandiford <richard.sandiford@arm.com>
PR target/92822
* config/aarch64/aarch64-simd.md (aarch64_get_half<mode>): New
expander.
(@aarch64_split_simd_mov<mode>): Use it.
(aarch64_simd_mov_from_<mode>low): Add a GPR alternative.
Leave the vec_extract patterns to handle 2-element vectors.
(aarch64_simd_mov_from_<mode>high): Likewise.
(vec_extract<VQMOV_NO2E:mode><Vhalf>): New expander.
(vec_extractv2dfv1df): Likewise.
2020-01-27 Richard Sandiford <richard.sandiford@arm.com>
* config/aarch64/aarch64.c (aarch64_if_then_else_costs): Match * config/aarch64/aarch64.c (aarch64_if_then_else_costs): Match
jump conditions for *compare_condjump<GPI:mode>. jump conditions for *compare_condjump<GPI:mode>.
......
...@@ -282,37 +282,51 @@ ...@@ -282,37 +282,51 @@
rtx dst_high_part = gen_highpart (<VHALF>mode, dst); rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
emit_insn (gen_aarch64_get_half<mode> (dst_low_part, src, lo));
emit_insn emit_insn (gen_aarch64_get_half<mode> (dst_high_part, src, hi));
(gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
emit_insn
(gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
} }
DONE; DONE;
} }
) )
(define_insn "aarch64_simd_mov_from_<mode>low" (define_expand "aarch64_get_half<mode>"
[(set (match_operand:<VHALF> 0 "register_operand" "=r") [(set (match_operand:<VHALF> 0 "register_operand")
(vec_select:<VHALF> (vec_select:<VHALF>
(match_operand:VQMOV 1 "register_operand" "w") (match_operand:VQMOV 1 "register_operand")
(match_operand:VQMOV 2 "vect_par_cnst_lo_half" "")))] (match_operand 2 "ascending_int_parallel")))]
"TARGET_SIMD && reload_completed" "TARGET_SIMD"
"umov\t%0, %1.d[0]" )
[(set_attr "type" "neon_to_gp<q>")
(set_attr "length" "4") (define_insn_and_split "aarch64_simd_mov_from_<mode>low"
]) [(set (match_operand:<VHALF> 0 "register_operand" "=w,?r")
(vec_select:<VHALF>
(match_operand:VQMOV_NO2E 1 "register_operand" "w,w")
(match_operand:VQMOV_NO2E 2 "vect_par_cnst_lo_half" "")))]
"TARGET_SIMD"
"@
#
umov\t%0, %1.d[0]"
"&& reload_completed && aarch64_simd_register (operands[0], <VHALF>mode)"
[(set (match_dup 0) (match_dup 1))]
{
operands[1] = aarch64_replace_reg_mode (operands[1], <VHALF>mode);
}
[(set_attr "type" "mov_reg,neon_to_gp<q>")
(set_attr "length" "4")]
)
(define_insn "aarch64_simd_mov_from_<mode>high" (define_insn "aarch64_simd_mov_from_<mode>high"
[(set (match_operand:<VHALF> 0 "register_operand" "=r") [(set (match_operand:<VHALF> 0 "register_operand" "=w,?r")
(vec_select:<VHALF> (vec_select:<VHALF>
(match_operand:VQMOV 1 "register_operand" "w") (match_operand:VQMOV_NO2E 1 "register_operand" "w,w")
(match_operand:VQMOV 2 "vect_par_cnst_hi_half" "")))] (match_operand:VQMOV_NO2E 2 "vect_par_cnst_hi_half" "")))]
"TARGET_SIMD && reload_completed" "TARGET_SIMD"
"umov\t%0, %1.d[1]" "@
[(set_attr "type" "neon_to_gp<q>") dup\\t%d0, %1.d[1]
(set_attr "length" "4") umov\t%0, %1.d[1]"
]) [(set_attr "type" "neon_dup<q>,neon_to_gp<q>")
(set_attr "length" "4")]
)
(define_insn "orn<mode>3" (define_insn "orn<mode>3"
[(set (match_operand:VDQ_I 0 "register_operand" "=w") [(set (match_operand:VDQ_I 0 "register_operand" "=w")
...@@ -6140,6 +6154,35 @@ ...@@ -6140,6 +6154,35 @@
DONE; DONE;
}) })
;; Extract a 64-bit vector from one half of a 128-bit vector.
(define_expand "vec_extract<mode><Vhalf>"
[(match_operand:<VHALF> 0 "register_operand")
(match_operand:VQMOV_NO2E 1 "register_operand")
(match_operand 2 "immediate_operand")]
"TARGET_SIMD"
{
int start = INTVAL (operands[2]);
if (start != 0 && start != <nunits> / 2)
FAIL;
rtx sel = aarch64_gen_stepped_int_parallel (<nunits> / 2, start, 1);
emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], sel));
DONE;
})
;; Extract a single-element 64-bit vector from one half of a 128-bit vector.
(define_expand "vec_extractv2dfv1df"
[(match_operand:V1DF 0 "register_operand")
(match_operand:V2DF 1 "register_operand")
(match_operand 2 "immediate_operand")]
"TARGET_SIMD"
{
/* V1DF is rarely used by other patterns, so it should be better to hide
it in a subreg destination of a normal DF op. */
rtx scalar0 = gen_lowpart (DFmode, operands[0]);
emit_insn (gen_vec_extractv2dfdf (scalar0, operands[1], operands[2]));
DONE;
})
;; aes ;; aes
(define_insn "aarch64_crypto_aes<aes_op>v16qi" (define_insn "aarch64_crypto_aes<aes_op>v16qi"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment