Commit 740c1ed7 by Richard Sandiford Committed by Richard Sandiford

[AArch64][SVE] Remove unnecessary PTRUEs from FP arithmetic

When using the unpredicated all-register forms of FADD, FSUB and FMUL,
the rtl patterns would still have the predicate operand we created for
the other forms.  This patch splits the patterns after reload in order
to get rid of the predicate, like we already do for WHILE.

2018-12-07  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
	* config/aarch64/iterators.md (SVE_UNPRED_FP_BINARY): New code
	iterator.
	(sve_fp_op): Handle minus and mult.
	* config/aarch64/aarch64-sve.md (*add<mode>3, *sub<mode>3)
	(*mul<mode>3): Split the patterns after reload if we don't
	need the predicate operand.
	(*post_ra_<sve_fp_op><mode>3): New pattern.

gcc/testsuite/
	* gcc.target/aarch64/sve/pred_elim_1.c: New test.

From-SVN: r266891
parent 5a58929b
2018-12-07 Richard Sandiford <richard.sandiford@arm.com>
* config/aarch64/iterators.md (SVE_UNPRED_FP_BINARY): New code
iterator.
(sve_fp_op): Handle minus and mult.
* config/aarch64/aarch64-sve.md (*add<mode>3, *sub<mode>3)
(*mul<mode>3): Split the patterns after reload if we don't
need the predicate operand.
(*post_ra_<sve_fp_op><mode>3): New pattern.
2018-12-07 Bin Cheng <bin.cheng@linux.alibaba.com> 2018-12-07 Bin Cheng <bin.cheng@linux.alibaba.com>
* profile-count.h (profile_count::oeprator>=): Fix typo by inverting * profile-count.h (profile_count::oeprator>=): Fix typo by inverting
...@@ -2194,7 +2194,7 @@ ...@@ -2194,7 +2194,7 @@
) )
;; Floating-point addition predicated with a PTRUE. ;; Floating-point addition predicated with a PTRUE.
(define_insn "*add<mode>3" (define_insn_and_split "*add<mode>3"
[(set (match_operand:SVE_F 0 "register_operand" "=w, w, w") [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w")
(unspec:SVE_F (unspec:SVE_F
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
...@@ -2206,7 +2206,12 @@ ...@@ -2206,7 +2206,12 @@
"@ "@
fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3 fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
fadd\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>" #"
; Split the unpredicated form after reload, so that we don't have
; the unnecessary PTRUE.
"&& reload_completed
&& register_operand (operands[3], <MODE>mode)"
[(set (match_dup 0) (plus:SVE_F (match_dup 2) (match_dup 3)))]
) )
;; Unpredicated floating-point subtraction. ;; Unpredicated floating-point subtraction.
...@@ -2225,7 +2230,7 @@ ...@@ -2225,7 +2230,7 @@
) )
;; Floating-point subtraction predicated with a PTRUE. ;; Floating-point subtraction predicated with a PTRUE.
(define_insn "*sub<mode>3" (define_insn_and_split "*sub<mode>3"
[(set (match_operand:SVE_F 0 "register_operand" "=w, w, w, w") [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w, w")
(unspec:SVE_F (unspec:SVE_F
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl") [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
...@@ -2240,7 +2245,13 @@ ...@@ -2240,7 +2245,13 @@
fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3 fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2 fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
fsub\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>" #"
; Split the unpredicated form after reload, so that we don't have
; the unnecessary PTRUE.
"&& reload_completed
&& register_operand (operands[2], <MODE>mode)
&& register_operand (operands[3], <MODE>mode)"
[(set (match_dup 0) (minus:SVE_F (match_dup 2) (match_dup 3)))]
) )
;; Unpredicated floating-point multiplication. ;; Unpredicated floating-point multiplication.
...@@ -2259,7 +2270,7 @@ ...@@ -2259,7 +2270,7 @@
) )
;; Floating-point multiplication predicated with a PTRUE. ;; Floating-point multiplication predicated with a PTRUE.
(define_insn "*mul<mode>3" (define_insn_and_split "*mul<mode>3"
[(set (match_operand:SVE_F 0 "register_operand" "=w, w") [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
(unspec:SVE_F (unspec:SVE_F
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
...@@ -2270,9 +2281,25 @@ ...@@ -2270,9 +2281,25 @@
"TARGET_SVE" "TARGET_SVE"
"@ "@
fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
fmul\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>" #"
; Split the unpredicated form after reload, so that we don't have
; the unnecessary PTRUE.
"&& reload_completed
&& register_operand (operands[3], <MODE>mode)"
[(set (match_dup 0) (mult:SVE_F (match_dup 2) (match_dup 3)))]
) )
;; Unpredicated floating-point binary operations (post-RA only).
;; These are generated by splitting a predicated instruction whose
;; predicate is unused.
(define_insn "*post_ra_<sve_fp_op><mode>3"
[(set (match_operand:SVE_F 0 "register_operand" "=w")
(SVE_UNPRED_FP_BINARY:SVE_F
(match_operand:SVE_F 1 "register_operand" "w")
(match_operand:SVE_F 2 "register_operand" "w")))]
"TARGET_SVE && reload_completed"
"<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>")
;; Unpredicated fma (%0 = (%1 * %2) + %3). ;; Unpredicated fma (%0 = (%1 * %2) + %3).
(define_expand "fma<mode>4" (define_expand "fma<mode>4"
[(set (match_operand:SVE_F 0 "register_operand") [(set (match_operand:SVE_F 0 "register_operand")
......
...@@ -1220,6 +1220,9 @@ ...@@ -1220,6 +1220,9 @@
;; SVE integer binary division operations. ;; SVE integer binary division operations.
(define_code_iterator SVE_INT_BINARY_SD [div udiv]) (define_code_iterator SVE_INT_BINARY_SD [div udiv])
;; SVE floating-point operations with an unpredicated all-register form.
(define_code_iterator SVE_UNPRED_FP_BINARY [plus minus mult])
;; SVE integer comparisons. ;; SVE integer comparisons.
(define_code_iterator SVE_INT_CMP [lt le eq ne ge gt ltu leu geu gtu]) (define_code_iterator SVE_INT_CMP [lt le eq ne ge gt ltu leu geu gtu])
...@@ -1423,6 +1426,8 @@ ...@@ -1423,6 +1426,8 @@
;; The floating-point SVE instruction that implements an rtx code. ;; The floating-point SVE instruction that implements an rtx code.
(define_code_attr sve_fp_op [(plus "fadd") (define_code_attr sve_fp_op [(plus "fadd")
(minus "fsub")
(mult "fmul")
(neg "fneg") (neg "fneg")
(abs "fabs") (abs "fabs")
(sqrt "fsqrt")]) (sqrt "fsqrt")])
......
2018-12-07 Richard Sandiford <richard.sandiford@arm.com>
* gcc.target/aarch64/sve/pred_elim_1.c: New test.
2018-12-07 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE> 2018-12-07 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE>
* gcc.target/i386/ipa-stack-alignment-2.c: Add * gcc.target/i386/ipa-stack-alignment-2.c: Add
......
/* { dg-options "-O2 -ftree-vectorize" } */
#define TEST_OP(NAME, TYPE, OP) \
void \
NAME##_##TYPE (TYPE *restrict a, TYPE *restrict b, \
TYPE *restrict c, int n) \
{ \
for (int i = 0; i < n; ++i) \
a[i] = b[i] OP c[i]; \
}
#define TEST_TYPE(TYPE) \
TEST_OP (add, TYPE, +) \
TEST_OP (sub, TYPE, -) \
TEST_OP (mult, TYPE, *) \
TEST_TYPE (float)
TEST_TYPE (double)
/* { dg-final { scan-assembler-times {\tfadd\t} 2 } } */
/* { dg-final { scan-assembler-times {\tfsub\t} 2 } } */
/* { dg-final { scan-assembler-times {\tfmul\t} 2 } } */
/* { dg-final { scan-assembler-not {\tptrue\t} } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment