Commit e9619a8a by Richard Henderson

arm: Implement vec_perm and vec_perm_const for NEON.

From-SVN: r182299
parent 8c75d41b
...@@ -244,4 +244,7 @@ extern const struct tune_params *current_tune; ...@@ -244,4 +244,7 @@ extern const struct tune_params *current_tune;
extern int vfp3_const_double_for_fract_bits (rtx); extern int vfp3_const_double_for_fract_bits (rtx);
#endif /* RTX_CODE */ #endif /* RTX_CODE */
extern void arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel);
extern bool arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel);
#endif /* ! GCC_ARM_PROTOS_H */ #endif /* ! GCC_ARM_PROTOS_H */
...@@ -3876,6 +3876,65 @@ ...@@ -3876,6 +3876,65 @@
[(set_attr "neon_type" "neon_bp_3cycle")] [(set_attr "neon_type" "neon_bp_3cycle")]
) )
;; These two are used by the vec_perm infrastructure for V16QImode.
(define_insn_and_split "neon_vtbl1v16qi"
[(set (match_operand:V16QI 0 "s_register_operand" "=w")
(unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
(match_operand:V16QI 2 "s_register_operand" "w")]
UNSPEC_VTBL))]
"TARGET_NEON"
"#"
"&& reload_completed"
[(const_int 0)]
{
rtx op0, op1, op2, part0, part2;
unsigned ofs;
op0 = operands[0];
op1 = gen_lowpart (TImode, operands[1]);
op2 = operands[2];
ofs = subreg_lowpart_offset (V8QImode, V16QImode);
part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
ofs = subreg_highpart_offset (V8QImode, V16QImode);
part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
DONE;
})
(define_insn_and_split "neon_vtbl2v16qi"
[(set (match_operand:V16QI 0 "s_register_operand" "=w")
(unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
(match_operand:V16QI 2 "s_register_operand" "w")]
UNSPEC_VTBL))]
"TARGET_NEON"
"#"
"&& reload_completed"
[(const_int 0)]
{
rtx op0, op1, op2, part0, part2;
unsigned ofs;
op0 = operands[0];
op1 = operands[1];
op2 = operands[2];
ofs = subreg_lowpart_offset (V8QImode, V16QImode);
part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
ofs = subreg_highpart_offset (V8QImode, V16QImode);
part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
DONE;
})
(define_insn "neon_vtbx1v8qi" (define_insn "neon_vtbx1v8qi"
[(set (match_operand:V8QI 0 "s_register_operand" "=w") [(set (match_operand:V8QI 0 "s_register_operand" "=w")
(unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0") (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
......
...@@ -108,3 +108,29 @@ ...@@ -108,3 +108,29 @@
|| (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))" || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))"
{ {
}) })
(define_expand "vec_perm_const<mode>"
[(match_operand:VALL 0 "s_register_operand" "")
(match_operand:VALL 1 "s_register_operand" "")
(match_operand:VALL 2 "s_register_operand" "")
(match_operand:<V_cmp_result> 3 "" "")]
"TARGET_NEON
|| (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))"
{
if (arm_expand_vec_perm_const (operands[0], operands[1],
operands[2], operands[3]))
DONE;
else
FAIL;
})
(define_expand "vec_perm<mode>"
[(match_operand:VE 0 "s_register_operand" "")
(match_operand:VE 1 "s_register_operand" "")
(match_operand:VE 2 "s_register_operand" "")
(match_operand:VE 3 "s_register_operand" "")]
"TARGET_NEON && !BYTES_BIG_ENDIAN"
{
arm_expand_vec_perm (operands[0], operands[1], operands[2], operands[3]);
DONE;
})
...@@ -2725,7 +2725,8 @@ proc check_effective_target_vect_perm { } { ...@@ -2725,7 +2725,8 @@ proc check_effective_target_vect_perm { } {
verbose "check_effective_target_vect_perm: using cached result" 2 verbose "check_effective_target_vect_perm: using cached result" 2
} else { } else {
set et_vect_perm_saved 0 set et_vect_perm_saved 0
if { [istarget powerpc*-*-*] if { [is-effective-target arm_neon_ok]
|| [istarget powerpc*-*-*]
|| [istarget spu-*-*] || [istarget spu-*-*]
|| [istarget i?86-*-*] || [istarget i?86-*-*]
|| [istarget x86_64-*-*] } { || [istarget x86_64-*-*] } {
...@@ -2748,7 +2749,8 @@ proc check_effective_target_vect_perm_byte { } { ...@@ -2748,7 +2749,8 @@ proc check_effective_target_vect_perm_byte { } {
verbose "check_effective_target_vect_perm_byte: using cached result" 2 verbose "check_effective_target_vect_perm_byte: using cached result" 2
} else { } else {
set et_vect_perm_byte_saved 0 set et_vect_perm_byte_saved 0
if { [istarget powerpc*-*-*] if { [is-effective-target arm_neon_ok]
|| [istarget powerpc*-*-*]
|| [istarget spu-*-*] } { || [istarget spu-*-*] } {
set et_vect_perm_byte_saved 1 set et_vect_perm_byte_saved 1
} }
...@@ -2769,7 +2771,8 @@ proc check_effective_target_vect_perm_short { } { ...@@ -2769,7 +2771,8 @@ proc check_effective_target_vect_perm_short { } {
verbose "check_effective_target_vect_perm_short: using cached result" 2 verbose "check_effective_target_vect_perm_short: using cached result" 2
} else { } else {
set et_vect_perm_short_saved 0 set et_vect_perm_short_saved 0
if { [istarget powerpc*-*-*] if { [is-effective-target arm_neon_ok]
|| [istarget powerpc*-*-*]
|| [istarget spu-*-*] } { || [istarget spu-*-*] } {
set et_vect_perm_short_saved 1 set et_vect_perm_short_saved 1
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment