Commit 315fdae8 by Richard Earnshaw Committed by Richard Earnshaw

[aarch64] PR target/87369 Prefer bsl/bit/bif for copysign

The copysign operations will almost always be performed on values in
floating-point registers.  As such, we do not want the compiler to
simplify the operations into code sequences that can only be done
using the general-purpose register set.  Unfortunately, this is what
is currently happening.

Fortunately, it seems quite unlikely that copysign() will be
subsequently followed by other logical operations on the values
involved, so I think it is acceptable to use an unspec here.  This
allows us to preserve the operation in a form that allows the register
allocator to make the right choice later on, without limitation on the
final form of the operation (well, if we do end up using the gp
register bank, we get a dead constant load that we cannot easily
eliminate at a late stage).

	PR target/37369
	* config/aarch64/iterators.md (sizem1): Add sizes for SFmode and DFmode.
	(Vbtype): Add SFmode mapping.
	* config/aarch64/aarch64.md (copysigndf3, copysignsf3): Delete.
	(copysign<GPF:mode>3): New expand pattern.
	(copysign<GPF:mode>3_insn): New insn pattern.

From-SVN: r267019
parent c47fb5d9
2018-12-11 Richard Earnshaw <rearnsha@arm.com>
PR target/37369
* config/aarch64/iterators.md (sizem1): Add sizes for SFmode and DFmode.
(Vbtype): Add SFmode mapping.
* config/aarch64/aarch64.md (copysigndf3, copysignsf3): Delete.
(copysign<GPF:mode>3): New expand pattern.
(copysign<GPF:mode>3_insn): New insn pattern.
2018-12-11 Richard Biener <rguenther@suse.de> 2018-12-11 Richard Biener <rguenther@suse.de>
* ccmp.c (ccmp_candidate_p): Use GIMPLE API properly. * ccmp.c (ccmp_candidate_p): Use GIMPLE API properly.
...@@ -222,6 +222,7 @@ ...@@ -222,6 +222,7 @@
UNSPEC_FADDA UNSPEC_FADDA
UNSPEC_REV_SUBREG UNSPEC_REV_SUBREG
UNSPEC_SPECULATION_TRACKER UNSPEC_SPECULATION_TRACKER
UNSPEC_COPYSIGN
]) ])
(define_c_enum "unspecv" [ (define_c_enum "unspecv" [
...@@ -5987,49 +5988,47 @@ ...@@ -5987,49 +5988,47 @@
;; LDR d2, #(1 << 63) ;; LDR d2, #(1 << 63)
;; BSL v2.8b, [y], [x] ;; BSL v2.8b, [y], [x]
;; ;;
;; or another, equivalent, sequence using one of BSL/BIT/BIF. ;; or another, equivalent, sequence using one of BSL/BIT/BIF. Because
;; aarch64_simd_bsldf will select the best suited of these instructions ;; we expect these operations to nearly always operate on
;; to generate based on register allocation, and knows how to partially ;; floating-point values, we do not want the operation to be
;; constant fold based on the values of X and Y, so expand through that. ;; simplified into a bit-field insert operation that operates on the
;; integer side, since typically that would involve three inter-bank
(define_expand "copysigndf3" ;; register copies. As we do not expect copysign to be followed by
[(match_operand:DF 0 "register_operand") ;; other logical operations on the result, it seems preferable to keep
(match_operand:DF 1 "register_operand") ;; this as an unspec operation, rather than exposing the underlying
(match_operand:DF 2 "register_operand")] ;; logic to the compiler.
(define_expand "copysign<GPF:mode>3"
[(match_operand:GPF 0 "register_operand")
(match_operand:GPF 1 "register_operand")
(match_operand:GPF 2 "register_operand")]
"TARGET_FLOAT && TARGET_SIMD" "TARGET_FLOAT && TARGET_SIMD"
{ {
rtx mask = gen_reg_rtx (DImode); rtx bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
emit_move_insn (mask, GEN_INT (HOST_WIDE_INT_1U << 63)); emit_move_insn (bitmask, GEN_INT (HOST_WIDE_INT_M1U
emit_insn (gen_aarch64_simd_bsldf (operands[0], mask, << (GET_MODE_BITSIZE (<MODE>mode) - 1)));
operands[2], operands[1])); emit_insn (gen_copysign<mode>3_insn (operands[0], operands[1], operands[2],
bitmask));
DONE; DONE;
} }
) )
;; As above, but we must first get to a 64-bit value if we wish to use (define_insn "copysign<GPF:mode>3_insn"
;; aarch64_simd_bslv2sf. [(set (match_operand:GPF 0 "register_operand" "=w,w,w,r")
(unspec:GPF [(match_operand:GPF 1 "register_operand" "w,0,w,r")
(define_expand "copysignsf3" (match_operand:GPF 2 "register_operand" "w,w,0,0")
[(match_operand:SF 0 "register_operand") (match_operand:<V_INT_EQUIV> 3 "register_operand" "0,w,w,X")]
(match_operand:SF 1 "register_operand") UNSPEC_COPYSIGN))]
(match_operand:SF 2 "register_operand")]
"TARGET_FLOAT && TARGET_SIMD" "TARGET_FLOAT && TARGET_SIMD"
{ "@
rtx v_bitmask = gen_reg_rtx (V2SImode); bsl\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
bit\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
/* Juggle modes to get us in to a vector mode for BSL. */ bif\\t%0.<Vbtype>, %1.<Vbtype>, %3.<Vbtype>
rtx op1 = lowpart_subreg (DImode, operands[1], SFmode); bfxil\\t%<w1>0, %<w1>1, #0, <sizem1>"
rtx op2 = lowpart_subreg (V2SFmode, operands[2], SFmode); [(set_attr "type" "neon_bsl<q>,neon_bsl<q>,neon_bsl<q>,bfm")]
rtx tmp = gen_reg_rtx (V2SFmode);
emit_move_insn (v_bitmask,
aarch64_simd_gen_const_vector_dup (V2SImode,
HOST_WIDE_INT_M1U << 31));
emit_insn (gen_aarch64_simd_bslv2sf (tmp, v_bitmask, op2, op1));
emit_move_insn (operands[0], lowpart_subreg (SFmode, tmp, V2SFmode));
DONE;
}
) )
;; For xorsign (x, y), we want to generate: ;; For xorsign (x, y), we want to generate:
;; ;;
;; LDR d2, #1<<63 ;; LDR d2, #1<<63
......
...@@ -601,7 +601,8 @@ ...@@ -601,7 +601,8 @@
(define_mode_attr sizen [(QI "8") (HI "16") (SI "32") (DI "64")]) (define_mode_attr sizen [(QI "8") (HI "16") (SI "32") (DI "64")])
;; Give the ordinal of the MSB in the mode ;; Give the ordinal of the MSB in the mode
(define_mode_attr sizem1 [(QI "#7") (HI "#15") (SI "#31") (DI "#63")]) (define_mode_attr sizem1 [(QI "#7") (HI "#15") (SI "#31") (DI "#63")
(HF "#15") (SF "#31") (DF "#63")])
;; Attribute to describe constants acceptable in logical operations ;; Attribute to describe constants acceptable in logical operations
(define_mode_attr lconst [(SI "K") (DI "L")]) (define_mode_attr lconst [(SI "K") (DI "L")])
...@@ -687,7 +688,7 @@ ...@@ -687,7 +688,7 @@
(V8HF "16b") (V2SF "8b") (V8HF "16b") (V2SF "8b")
(V4SF "16b") (V2DF "16b") (V4SF "16b") (V2DF "16b")
(DI "8b") (DF "8b") (DI "8b") (DF "8b")
(SI "8b")]) (SI "8b") (SF "8b")])
;; Define element mode for each vector mode. ;; Define element mode for each vector mode.
(define_mode_attr VEL [(V8QI "QI") (V16QI "QI") (VNx16QI "QI") (define_mode_attr VEL [(V8QI "QI") (V16QI "QI") (VNx16QI "QI")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment