Commit 26004f51 by Richard Sandiford Committed by Richard Sandiford

[AArch64][SVE] Remove unnecessary PTRUEs from integer arithmetic

When using the unpredicated immediate forms of MUL, LSL, LSR and ASR,
the rtl patterns would still have the predicate operand we created for
the other forms.  This patch splits the patterns after reload in order
to get rid of the predicate, like we already do for WHILE.

2018-12-07  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
	* config/aarch64/aarch64-sve.md (*mul<mode>3, *v<optab><mode>3):
	Split the patterns after reload if we don't need the predicate
	operand.
	(*post_ra_mul<mode>3, *post_ra_v<optab><mode>3): New patterns.

gcc/testsuite/
	* gcc.target/aarch64/sve/pred_elim_2.c: New test.

From-SVN: r266892
parent 740c1ed7
2018-12-07 Richard Sandiford <richard.sandiford@arm.com> 2018-12-07 Richard Sandiford <richard.sandiford@arm.com>
* config/aarch64/aarch64-sve.md (*mul<mode>3, *v<optab><mode>3):
Split the patterns after reload if we don't need the predicate
operand.
(*post_ra_mul<mode>3, *post_ra_v<optab><mode>3): New patterns.
2018-12-07 Richard Sandiford <richard.sandiford@arm.com>
* config/aarch64/iterators.md (SVE_UNPRED_FP_BINARY): New code * config/aarch64/iterators.md (SVE_UNPRED_FP_BINARY): New code
iterator. iterator.
(sve_fp_op): Handle minus and mult. (sve_fp_op): Handle minus and mult.
...@@ -936,7 +936,7 @@ ...@@ -936,7 +936,7 @@
;; predicate for the first alternative, but using Upa or X isn't likely ;; predicate for the first alternative, but using Upa or X isn't likely
;; to gain much and would make the instruction seem less uniform to the ;; to gain much and would make the instruction seem less uniform to the
;; register allocator. ;; register allocator.
(define_insn "*mul<mode>3" (define_insn_and_split "*mul<mode>3"
[(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w") [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
(unspec:SVE_I (unspec:SVE_I
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
...@@ -946,12 +946,30 @@ ...@@ -946,12 +946,30 @@
UNSPEC_MERGE_PTRUE))] UNSPEC_MERGE_PTRUE))]
"TARGET_SVE" "TARGET_SVE"
"@ "@
mul\t%0.<Vetype>, %0.<Vetype>, #%3 #
mul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> mul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
movprfx\t%0, %2\;mul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" movprfx\t%0, %2\;mul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
; Split the unpredicated form after reload, so that we don't have
; the unnecessary PTRUE.
"&& reload_completed
&& !register_operand (operands[3], <MODE>mode)"
[(set (match_dup 0) (mult:SVE_I (match_dup 2) (match_dup 3)))]
""
[(set_attr "movprfx" "*,*,yes")] [(set_attr "movprfx" "*,*,yes")]
) )
;; Unpredicated multiplications by a constant (post-RA only).
;; These are generated by splitting a predicated instruction whose
;; predicate is unused.
(define_insn "*post_ra_mul<mode>3"
[(set (match_operand:SVE_I 0 "register_operand" "=w")
(mult:SVE_I
(match_operand:SVE_I 1 "register_operand" "0")
(match_operand:SVE_I 2 "aarch64_sve_mul_immediate")))]
"TARGET_SVE && reload_completed"
"mul\t%0.<Vetype>, %0.<Vetype>, #%2"
)
(define_insn "*madd<mode>" (define_insn "*madd<mode>"
[(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w") [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
(plus:SVE_I (plus:SVE_I
...@@ -1232,7 +1250,7 @@ ...@@ -1232,7 +1250,7 @@
;; actually need the predicate for the first alternative, but using Upa ;; actually need the predicate for the first alternative, but using Upa
;; or X isn't likely to gain much and would make the instruction seem ;; or X isn't likely to gain much and would make the instruction seem
;; less uniform to the register allocator. ;; less uniform to the register allocator.
(define_insn "*v<optab><mode>3" (define_insn_and_split "*v<optab><mode>3"
[(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w") [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
(unspec:SVE_I (unspec:SVE_I
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
...@@ -1242,12 +1260,28 @@ ...@@ -1242,12 +1260,28 @@
UNSPEC_MERGE_PTRUE))] UNSPEC_MERGE_PTRUE))]
"TARGET_SVE" "TARGET_SVE"
"@ "@
<shift>\t%0.<Vetype>, %2.<Vetype>, #%3 #
<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
"&& reload_completed
&& !register_operand (operands[3], <MODE>mode)"
[(set (match_dup 0) (ASHIFT:SVE_I (match_dup 2) (match_dup 3)))]
""
[(set_attr "movprfx" "*,*,yes")] [(set_attr "movprfx" "*,*,yes")]
) )
;; Unpredicated shift operations by a constant (post-RA only).
;; These are generated by splitting a predicated instruction whose
;; predicate is unused.
(define_insn "*post_ra_v<optab><mode>3"
[(set (match_operand:SVE_I 0 "register_operand" "=w")
(ASHIFT:SVE_I
(match_operand:SVE_I 1 "register_operand" "w")
(match_operand:SVE_I 2 "aarch64_simd_<lr>shift_imm")))]
"TARGET_SVE && reload_completed"
"<shift>\t%0.<Vetype>, %1.<Vetype>, #%2"
)
;; LSL, LSR and ASR by a scalar, which expands into one of the vector ;; LSL, LSR and ASR by a scalar, which expands into one of the vector
;; shifts above. ;; shifts above.
(define_expand "<ASHIFT:optab><mode>3" (define_expand "<ASHIFT:optab><mode>3"
......
2018-12-07 Richard Sandiford <richard.sandiford@arm.com> 2018-12-07 Richard Sandiford <richard.sandiford@arm.com>
* gcc.target/aarch64/sve/pred_elim_2.c: New test.
2018-12-07 Richard Sandiford <richard.sandiford@arm.com>
* gcc.target/aarch64/sve/pred_elim_1.c: New test. * gcc.target/aarch64/sve/pred_elim_1.c: New test.
2018-12-07 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE> 2018-12-07 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE>
......
/* { dg-options "-O2 -ftree-vectorize" } */
#include <stdint.h>
#define TEST_OP(NAME, TYPE, OP) \
void \
NAME##_##TYPE (TYPE *restrict a, TYPE *restrict b, int n) \
{ \
for (int i = 0; i < n; ++i) \
a[i] = b[i] OP; \
}
#define TEST_TYPE(TYPE) \
TEST_OP (shl, TYPE, << 6) \
TEST_OP (shr, TYPE, >> 6) \
TEST_OP (mult, TYPE, * 0x2b)
TEST_TYPE (int8_t)
TEST_TYPE (int16_t)
TEST_TYPE (int32_t)
TEST_TYPE (int64_t)
TEST_TYPE (uint8_t)
TEST_TYPE (uint16_t)
TEST_TYPE (uint32_t)
TEST_TYPE (uint64_t)
/* { dg-final { scan-assembler-times {\tlsl\t} 8 } } */
/* { dg-final { scan-assembler-times {\tlsr\t} 4 } } */
/* { dg-final { scan-assembler-times {\tasr\t} 4 } } */
/* { dg-final { scan-assembler-times {\tmul\t} 8 } } */
/* { dg-final { scan-assembler-not {\tptrue\t} } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment