Commit b440f324 by Richard Henderson Committed by Richard Henderson

arm: Implement vec_perm and vec_perm_const for NEON.

	* config/arm/arm.c (arm_vectorize_vec_perm_const_ok,
	TARGET_VECTORIZE_VEC_PERM_CONST_OK, neon_split_vcombine, MAX_VECT_LEN,
	struct expand_vec_perm_d, arm_expand_vec_perm_1, arm_expand_vec_perm,
	arm_evpc_neon_vuzp, arm_evpc_neon_vzip, arm_evpc_neon_vrev,
	arm_evpc_neon_vtrn, arm_evpc_neon_vtbl, arm_expand_vec_perm_const_1,
	arm_expand_vec_perm_const): New.
	* config/arm/arm-protos.h: Update.
	* config/arm/neon.md (UNSPEC_VCONCAT): New.
	(*neon_vswp<VDQX>): New.
	(neon_vcombine<VDX>): Use neon_split_vcombine.
	(neon_vtbl1v16qi, neon_vtbl2v16qi, neon_vcombinev16qi): New.
	* config/arm/vec-common.md (vec_perm_const<VALL>): New.
	(vec_perm<VE>): New.

testsuite/
	* lib/target-supports.exp (check_effective_target_vect_perm,
	check_effective_target_vect_perm_byte,
	check_effective_target_vect_perm_short): Enable for arm neon.

From-SVN: r183051
parent 18f0fe6b
2012-01-10 Richard Henderson <rth@redhat.com>
* config/arm/arm.c (arm_vectorize_vec_perm_const_ok,
TARGET_VECTORIZE_VEC_PERM_CONST_OK, neon_split_vcombine, MAX_VECT_LEN,
struct expand_vec_perm_d, arm_expand_vec_perm_1, arm_expand_vec_perm,
arm_evpc_neon_vuzp, arm_evpc_neon_vzip, arm_evpc_neon_vrev,
arm_evpc_neon_vtrn, arm_evpc_neon_vtbl, arm_expand_vec_perm_const_1,
arm_expand_vec_perm_const): New.
* config/arm/arm-protos.h: Update.
* config/arm/neon.md (UNSPEC_VCONCAT): New.
(*neon_vswp<VDQX>): New.
(neon_vcombine<VDX>): Use neon_split_vcombine.
(neon_vtbl1v16qi, neon_vtbl2v16qi, neon_vcombinev16qi): New.
* config/arm/vec-common.md (vec_perm_const<VALL>): New.
(vec_perm<VE>): New.
2012-01-10 Richard Henderson <rth@redhat.com>
* config/arm/arm.c (arm_gen_compare_reg): Add scratch argument;
use it if reload_completed.
(arm_legitimize_sync_memory, arm_emit, arm_insn_count, arm_count,
......
......@@ -86,6 +86,7 @@ extern void neon_emit_pair_result_insn (enum machine_mode,
rtx (*) (rtx, rtx, rtx, rtx),
rtx, rtx, rtx);
extern void neon_disambiguate_copy (rtx *, rtx *, rtx *, unsigned int);
extern void neon_split_vcombine (rtx op[3]);
extern enum reg_class coproc_secondary_reload_class (enum machine_mode, rtx,
bool);
extern bool arm_tls_referenced_p (rtx);
......@@ -243,4 +244,7 @@ extern const struct tune_params *current_tune;
extern int vfp3_const_double_for_fract_bits (rtx);
#endif /* RTX_CODE */
extern void arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel);
extern bool arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel);
#endif /* ! GCC_ARM_PROTOS_H */
;; ARM NEON coprocessor Machine Description
;; Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
;; Copyright (C) 2006, 2007, 2008, 2009, 2010, 2012
;; Free Software Foundation, Inc.
;; Written by CodeSourcery.
;;
;; This file is part of GCC.
......@@ -35,6 +36,7 @@
UNSPEC_VCGE
UNSPEC_VCGT
UNSPEC_VCLS
UNSPEC_VCONCAT
UNSPEC_VCVT
UNSPEC_VCVT_N
UNSPEC_VEXT
......@@ -2860,6 +2862,20 @@
DONE;
})
; Disabled before reload because we don't want combine doing something silly,
; but used by the post-reload expansion of neon_vcombine.
(define_insn "*neon_vswp<mode>"
[(set (match_operand:VDQX 0 "s_register_operand" "+w")
(match_operand:VDQX 1 "s_register_operand" "+w"))
(set (match_dup 1) (match_dup 0))]
"TARGET_NEON && reload_completed"
"vswp\t%<V_reg>1, %<V_reg>2"
[(set (attr "neon_type")
(if_then_else (match_test "<Is_d_reg>")
(const_string "neon_bp_simple")
(const_string "neon_bp_2cycle")))]
)
;; In this insn, operand 1 should be low, and operand 2 the high part of the
;; dest vector.
;; FIXME: A different implementation of this builtin could make it much
......@@ -2867,48 +2883,19 @@
;; it so that the reg allocator puts things in the right places magically
;; instead). Lack of subregs for vectors makes that tricky though, I think.
(define_insn "neon_vcombine<mode>"
(define_insn_and_split "neon_vcombine<mode>"
[(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
(vec_concat:<V_DOUBLE> (match_operand:VDX 1 "s_register_operand" "w")
(vec_concat:<V_DOUBLE>
(match_operand:VDX 1 "s_register_operand" "w")
(match_operand:VDX 2 "s_register_operand" "w")))]
"TARGET_NEON"
"#"
"&& reload_completed"
[(const_int 0)]
{
int dest = REGNO (operands[0]);
int src1 = REGNO (operands[1]);
int src2 = REGNO (operands[2]);
rtx destlo;
if (src1 == dest && src2 == dest + 2)
return "";
else if (src2 == dest && src1 == dest + 2)
/* Special case of reversed high/low parts. */
return "vswp\t%P1, %P2";
destlo = gen_rtx_REG (<MODE>mode, dest);
if (!reg_overlap_mentioned_p (operands[2], destlo))
{
/* Try to avoid unnecessary moves if part of the result is in the right
place already. */
if (src1 != dest)
output_asm_insn ("vmov\t%e0, %P1", operands);
if (src2 != dest + 2)
output_asm_insn ("vmov\t%f0, %P2", operands);
}
else
{
if (src2 != dest + 2)
output_asm_insn ("vmov\t%f0, %P2", operands);
if (src1 != dest)
output_asm_insn ("vmov\t%e0, %P1", operands);
}
return "";
}
;; We set the neon_type attribute based on the vmov instructions above.
[(set_attr "length" "8")
(set_attr "neon_type" "neon_bp_simple")]
)
neon_split_vcombine (operands);
DONE;
})
(define_expand "neon_vget_high<mode>"
[(match_operand:<V_HALF> 0 "s_register_operand")
......@@ -3920,6 +3907,83 @@
[(set_attr "neon_type" "neon_bp_3cycle")]
)
;; These three are used by the vec_perm infrastructure for V16QImode.
(define_insn_and_split "neon_vtbl1v16qi"
[(set (match_operand:V16QI 0 "s_register_operand" "=&w")
(unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
(match_operand:V16QI 2 "s_register_operand" "w")]
UNSPEC_VTBL))]
"TARGET_NEON"
"#"
"&& reload_completed"
[(const_int 0)]
{
rtx op0, op1, op2, part0, part2;
unsigned ofs;
op0 = operands[0];
op1 = gen_lowpart (TImode, operands[1]);
op2 = operands[2];
ofs = subreg_lowpart_offset (V8QImode, V16QImode);
part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
ofs = subreg_highpart_offset (V8QImode, V16QImode);
part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
DONE;
})
(define_insn_and_split "neon_vtbl2v16qi"
[(set (match_operand:V16QI 0 "s_register_operand" "=&w")
(unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
(match_operand:V16QI 2 "s_register_operand" "w")]
UNSPEC_VTBL))]
"TARGET_NEON"
"#"
"&& reload_completed"
[(const_int 0)]
{
rtx op0, op1, op2, part0, part2;
unsigned ofs;
op0 = operands[0];
op1 = operands[1];
op2 = operands[2];
ofs = subreg_lowpart_offset (V8QImode, V16QImode);
part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
ofs = subreg_highpart_offset (V8QImode, V16QImode);
part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
DONE;
})
;; ??? Logically we should extend the regular neon_vcombine pattern to
;; handle quad-word input modes, producing octa-word output modes. But
;; that requires us to add support for octa-word vector modes in moves.
;; That seems overkill for this one use in vec_perm.
(define_insn_and_split "neon_vcombinev16qi"
[(set (match_operand:OI 0 "s_register_operand" "=w")
(unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
(match_operand:V16QI 2 "s_register_operand" "w")]
UNSPEC_VCONCAT))]
"TARGET_NEON"
"#"
"&& reload_completed"
[(const_int 0)]
{
neon_split_vcombine (operands);
DONE;
})
(define_insn "neon_vtbx1v8qi"
[(set (match_operand:V8QI 0 "s_register_operand" "=w")
(unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
......
;; Machine Description for shared bits common to IWMMXT and Neon.
;; Copyright (C) 2006, 2007, 2010 Free Software Foundation, Inc.
;; Copyright (C) 2006, 2007, 2010, 2012 Free Software Foundation, Inc.
;; Written by CodeSourcery.
;;
;; This file is part of GCC.
......@@ -108,3 +108,29 @@
|| (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))"
{
})
(define_expand "vec_perm_const<mode>"
[(match_operand:VALL 0 "s_register_operand" "")
(match_operand:VALL 1 "s_register_operand" "")
(match_operand:VALL 2 "s_register_operand" "")
(match_operand:<V_cmp_result> 3 "" "")]
"TARGET_NEON
|| (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))"
{
if (arm_expand_vec_perm_const (operands[0], operands[1],
operands[2], operands[3]))
DONE;
else
FAIL;
})
(define_expand "vec_perm<mode>"
[(match_operand:VE 0 "s_register_operand" "")
(match_operand:VE 1 "s_register_operand" "")
(match_operand:VE 2 "s_register_operand" "")
(match_operand:VE 3 "s_register_operand" "")]
"TARGET_NEON && !BYTES_BIG_ENDIAN"
{
arm_expand_vec_perm (operands[0], operands[1], operands[2], operands[3]);
DONE;
})
2012-01-10 Richard Henderson <rth@redhat.com>
* lib/target-supports.exp (check_effective_target_vect_perm,
check_effective_target_vect_perm_byte,
check_effective_target_vect_perm_short): Enable for arm neon.
2012-01-09 Tobias Burnus <burnus@net-b.de>
PR fortran/46328
......
# Copyright (C) 1999, 2001, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
# 2011 Free Software Foundation, Inc.
# 2011, 2012 Free Software Foundation, Inc.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
......@@ -2733,7 +2733,8 @@ proc check_effective_target_vect_perm { } {
verbose "check_effective_target_vect_perm: using cached result" 2
} else {
set et_vect_perm_saved 0
if { [istarget powerpc*-*-*]
if { [is-effective-target arm_neon_ok]
|| [istarget powerpc*-*-*]
|| [istarget spu-*-*]
|| [istarget i?86-*-*]
|| [istarget x86_64-*-*] } {
......@@ -2756,7 +2757,8 @@ proc check_effective_target_vect_perm_byte { } {
verbose "check_effective_target_vect_perm_byte: using cached result" 2
} else {
set et_vect_perm_byte_saved 0
if { [istarget powerpc*-*-*]
if { [is-effective-target arm_neon_ok]
|| [istarget powerpc*-*-*]
|| [istarget spu-*-*] } {
set et_vect_perm_byte_saved 1
}
......@@ -2777,7 +2779,8 @@ proc check_effective_target_vect_perm_short { } {
verbose "check_effective_target_vect_perm_short: using cached result" 2
} else {
set et_vect_perm_short_saved 0
if { [istarget powerpc*-*-*]
if { [is-effective-target arm_neon_ok]
|| [istarget powerpc*-*-*]
|| [istarget spu-*-*] } {
set et_vect_perm_short_saved 1
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment