Commit 2d57b12e by Yuliang Wang Committed by Richard Sandiford

[AArch64][SVE2] Support for EOR3 and variants of BSL

2019-10-17  Yuliang Wang  <yuliang.wang@arm.com>

gcc/
	* config/aarch64/aarch64-sve2.md (aarch64_sve2_eor3<mode>)
	(aarch64_sve2_nor<mode>, aarch64_sve2_nand<mode>)
	(aarch64_sve2_bsl<mode>, aarch64_sve2_nbsl<mode>)
	(aarch64_sve2_bsl1n<mode>, aarch64_sve2_bsl2n<mode>):
	New combine patterns.
	* config/aarch64/iterators.md (BSL_DUP): New int iterator for the
	above.
	(bsl_1st, bsl_2nd, bsl_dup, bsl_mov): Attributes for the above.

gcc/testsuite/
	* gcc.target/aarch64/sve2/eor3_1.c: New test.
	* gcc.target/aarch64/sve2/nlogic_1.c: As above.
	* gcc.target/aarch64/sve2/nlogic_2.c: As above.
	* gcc.target/aarch64/sve2/bitsel_1.c: As above.
	* gcc.target/aarch64/sve2/bitsel_2.c: As above.
	* gcc.target/aarch64/sve2/bitsel_3.c: As above.
	* gcc.target/aarch64/sve2/bitsel_4.c: As above.

From-SVN: r277110
parent 9309a547
2019-10-17 Yuliang Wang <yuliang.wang@arm.com>
* config/aarch64/aarch64-sve2.md (aarch64_sve2_eor3<mode>)
(aarch64_sve2_nor<mode>, aarch64_sve2_nand<mode>)
(aarch64_sve2_bsl<mode>, aarch64_sve2_nbsl<mode>)
(aarch64_sve2_bsl1n<mode>, aarch64_sve2_bsl2n<mode>):
New combine patterns.
* config/aarch64/iterators.md (BSL_DUP): New int iterator for the
above.
(bsl_1st, bsl_2nd, bsl_dup, bsl_mov): Attributes for the above.
2019-10-17 Aldy Hernandez <aldyh@redhat.com> 2019-10-17 Aldy Hernandez <aldyh@redhat.com>
* tree-vrp.c (value_range_base::dump): Display +INF for both * tree-vrp.c (value_range_base::dump): Display +INF for both
......
...@@ -142,3 +142,187 @@ ...@@ -142,3 +142,187 @@
} }
) )
;; Unpredicated 3-way exclusive OR.
(define_insn "*aarch64_sve2_eor3<mode>"
[(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, ?&w")
(xor:SVE_I
(xor:SVE_I
(match_operand:SVE_I 1 "register_operand" "0, w, w, w")
(match_operand:SVE_I 2 "register_operand" "w, 0, w, w"))
(match_operand:SVE_I 3 "register_operand" "w, w, 0, w")))]
"TARGET_SVE2"
"@
eor3\t%0.d, %0.d, %2.d, %3.d
eor3\t%0.d, %0.d, %1.d, %3.d
eor3\t%0.d, %0.d, %1.d, %2.d
movprfx\t%0, %1\;eor3\t%0.d, %0.d, %2.d, %3.d"
[(set_attr "movprfx" "*,*,*,yes")]
)
;; Use NBSL for vector NOR.
(define_insn_and_rewrite "*aarch64_sve2_nor<mode>"
[(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
(unspec:SVE_I
[(match_operand 3)
(and:SVE_I
(not:SVE_I
(match_operand:SVE_I 1 "register_operand" "%0, w"))
(not:SVE_I
(match_operand:SVE_I 2 "register_operand" "w, w")))]
UNSPEC_PRED_X))]
"TARGET_SVE2"
"@
nbsl\t%0.d, %0.d, %2.d, %0.d
movprfx\t%0, %1\;nbsl\t%0.d, %0.d, %2.d, %0.d"
"&& !CONSTANT_P (operands[3])"
{
operands[3] = CONSTM1_RTX (<VPRED>mode);
}
[(set_attr "movprfx" "*,yes")]
)
;; Use NBSL for vector NAND.
(define_insn_and_rewrite "*aarch64_sve2_nand<mode>"
[(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
(unspec:SVE_I
[(match_operand 3)
(ior:SVE_I
(not:SVE_I
(match_operand:SVE_I 1 "register_operand" "%0, w"))
(not:SVE_I
(match_operand:SVE_I 2 "register_operand" "w, w")))]
UNSPEC_PRED_X))]
"TARGET_SVE2"
"@
nbsl\t%0.d, %0.d, %2.d, %2.d
movprfx\t%0, %1\;nbsl\t%0.d, %0.d, %2.d, %2.d"
"&& !CONSTANT_P (operands[3])"
{
operands[3] = CONSTM1_RTX (<VPRED>mode);
}
[(set_attr "movprfx" "*,yes")]
)
;; Unpredicated bitwise select.
;; (op3 ? bsl_mov : bsl_dup) == (((bsl_mov ^ bsl_dup) & op3) ^ bsl_dup)
(define_insn "*aarch64_sve2_bsl<mode>"
[(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
(xor:SVE_I
(and:SVE_I
(xor:SVE_I
(match_operand:SVE_I 1 "register_operand" "<bsl_1st>, w")
(match_operand:SVE_I 2 "register_operand" "<bsl_2nd>, w"))
(match_operand:SVE_I 3 "register_operand" "w, w"))
(match_dup BSL_DUP)))]
"TARGET_SVE2"
"@
bsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d
movprfx\t%0, %<bsl_mov>\;bsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d"
[(set_attr "movprfx" "*,yes")]
)
;; Unpredicated bitwise inverted select.
;; (~(op3 ? bsl_mov : bsl_dup)) == (~(((bsl_mov ^ bsl_dup) & op3) ^ bsl_dup))
(define_insn_and_rewrite "*aarch64_sve2_nbsl<mode>"
[(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
(unspec:SVE_I
[(match_operand 4)
(not:SVE_I
(xor:SVE_I
(and:SVE_I
(xor:SVE_I
(match_operand:SVE_I 1 "register_operand" "<bsl_1st>, w")
(match_operand:SVE_I 2 "register_operand" "<bsl_2nd>, w"))
(match_operand:SVE_I 3 "register_operand" "w, w"))
(match_dup BSL_DUP)))]
UNSPEC_PRED_X))]
"TARGET_SVE2"
"@
nbsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d
movprfx\t%0, %<bsl_mov>\;nbsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d"
"&& !CONSTANT_P (operands[4])"
{
operands[4] = CONSTM1_RTX (<VPRED>mode);
}
[(set_attr "movprfx" "*,yes")]
)
;; Unpredicated bitwise select with inverted first operand.
;; (op3 ? ~bsl_mov : bsl_dup) == ((~(bsl_mov ^ bsl_dup) & op3) ^ bsl_dup)
(define_insn_and_rewrite "*aarch64_sve2_bsl1n<mode>"
[(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
(xor:SVE_I
(and:SVE_I
(unspec:SVE_I
[(match_operand 4)
(not:SVE_I
(xor:SVE_I
(match_operand:SVE_I 1 "register_operand" "<bsl_1st>, w")
(match_operand:SVE_I 2 "register_operand" "<bsl_2nd>, w")))]
UNSPEC_PRED_X)
(match_operand:SVE_I 3 "register_operand" "w, w"))
(match_dup BSL_DUP)))]
"TARGET_SVE2"
"@
bsl1n\t%0.d, %0.d, %<bsl_dup>.d, %3.d
movprfx\t%0, %<bsl_mov>\;bsl1n\t%0.d, %0.d, %<bsl_dup>.d, %3.d"
"&& !CONSTANT_P (operands[4])"
{
operands[4] = CONSTM1_RTX (<VPRED>mode);
}
[(set_attr "movprfx" "*,yes")]
)
;; Unpredicated bitwise select with inverted second operand.
;; (bsl_dup ? bsl_mov : ~op3) == ((bsl_dup & bsl_mov) | (~op3 & ~bsl_dup))
(define_insn_and_rewrite "*aarch64_sve2_bsl2n<mode>"
[(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
(ior:SVE_I
(and:SVE_I
(match_operand:SVE_I 1 "register_operand" "<bsl_1st>, w")
(match_operand:SVE_I 2 "register_operand" "<bsl_2nd>, w"))
(unspec:SVE_I
[(match_operand 4)
(and:SVE_I
(not:SVE_I
(match_operand:SVE_I 3 "register_operand" "w, w"))
(not:SVE_I
(match_dup BSL_DUP)))]
UNSPEC_PRED_X)))]
"TARGET_SVE2"
"@
bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d
movprfx\t%0, %<bsl_mov>\;bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d"
"&& !CONSTANT_P (operands[4])"
{
operands[4] = CONSTM1_RTX (<VPRED>mode);
}
[(set_attr "movprfx" "*,yes")]
)
;; Unpredicated bitwise select with inverted second operand, alternative form.
;; (bsl_dup ? bsl_mov : ~op3) == ((bsl_dup & bsl_mov) | (~bsl_dup & ~op3))
(define_insn_and_rewrite "*aarch64_sve2_bsl2n<mode>"
[(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
(ior:SVE_I
(and:SVE_I
(match_operand:SVE_I 1 "register_operand" "<bsl_1st>, w")
(match_operand:SVE_I 2 "register_operand" "<bsl_2nd>, w"))
(unspec:SVE_I
[(match_operand 4)
(and:SVE_I
(not:SVE_I
(match_dup BSL_DUP))
(not:SVE_I
(match_operand:SVE_I 3 "register_operand" "w, w")))]
UNSPEC_PRED_X)))]
"TARGET_SVE2"
"@
bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d
movprfx\t%0, %<bsl_mov>\;bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d"
"&& !CONSTANT_P (operands[4])"
{
operands[4] = CONSTM1_RTX (<VPRED>mode);
}
[(set_attr "movprfx" "*,yes")]
)
...@@ -1611,6 +1611,8 @@ ...@@ -1611,6 +1611,8 @@
(define_int_iterator SHRNT [UNSPEC_SHRNT UNSPEC_RSHRNT]) (define_int_iterator SHRNT [UNSPEC_SHRNT UNSPEC_RSHRNT])
(define_int_iterator BSL_DUP [1 2])
(define_int_iterator DOTPROD [UNSPEC_SDOT UNSPEC_UDOT]) (define_int_iterator DOTPROD [UNSPEC_SDOT UNSPEC_UDOT])
(define_int_iterator ADDSUBHN [UNSPEC_ADDHN UNSPEC_RADDHN (define_int_iterator ADDSUBHN [UNSPEC_ADDHN UNSPEC_RADDHN
...@@ -1976,6 +1978,18 @@ ...@@ -1976,6 +1978,18 @@
(UNSPEC_RADDHN2 "add") (UNSPEC_RADDHN2 "add")
(UNSPEC_RSUBHN2 "sub")]) (UNSPEC_RSUBHN2 "sub")])
;; BSL variants: first commutative operand.
(define_int_attr bsl_1st [(1 "w") (2 "0")])
;; BSL variants: second commutative operand.
(define_int_attr bsl_2nd [(1 "0") (2 "w")])
;; BSL variants: duplicated input operand.
(define_int_attr bsl_dup [(1 "1") (2 "2")])
;; BSL variants: operand which requires preserving via movprfx.
(define_int_attr bsl_mov [(1 "2") (2 "1")])
(define_int_attr offsetlr [(UNSPEC_SSLI "") (UNSPEC_USLI "") (define_int_attr offsetlr [(UNSPEC_SSLI "") (UNSPEC_USLI "")
(UNSPEC_SSRI "offset_") (UNSPEC_SSRI "offset_")
(UNSPEC_USRI "offset_")]) (UNSPEC_USRI "offset_")])
......
2019-10-17 Yuliang Wang <yuliang.wang@arm.com>
* gcc.target/aarch64/sve2/eor3_1.c: New test.
* gcc.target/aarch64/sve2/nlogic_1.c: As above.
* gcc.target/aarch64/sve2/nlogic_2.c: As above.
* gcc.target/aarch64/sve2/bitsel_1.c: As above.
* gcc.target/aarch64/sve2/bitsel_2.c: As above.
* gcc.target/aarch64/sve2/bitsel_3.c: As above.
* gcc.target/aarch64/sve2/bitsel_4.c: As above.
2019-10-17 Aldy Hernandez <aldyh@redhat.com> 2019-10-17 Aldy Hernandez <aldyh@redhat.com>
* gcc.dg/tree-ssa/evrp4.c: Check for +INF instead of -1. * gcc.dg/tree-ssa/evrp4.c: Check for +INF instead of -1.
......
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
#include <stdint.h>
#ifndef OP
#define OP(x,y,z) (((x) & (z)) | ((y) & ~(z)))
#endif
#define TYPE(N) int##N##_t
#define TEMPLATE(SIZE) \
void __attribute__ ((noinline, noclone)) \
f_##SIZE##_##OP \
(TYPE(SIZE) *restrict a, TYPE(SIZE) *restrict b, \
TYPE(SIZE) *restrict c, TYPE(SIZE) *restrict d, int n) \
{ \
for (int i = 0; i < n; i++) \
a[i] = OP (b[i], c[i], d[i]); \
}
TEMPLATE (8);
TEMPLATE (16);
TEMPLATE (32);
TEMPLATE (64);
/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.[bhsd]} } } */
/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.[bhsd]} } } */
/* { dg-final { scan-assembler-times {\tbsl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
#define OP(x,y,z) (~(((x) & (z)) | ((y) & ~(z))))
#include "bitsel_1.c"
/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.[bhsd]} } } */
/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.[bhsd]} } } */
/* { dg-final { scan-assembler-not {\tnot\tz[0-9]+\.[bhsd]} } } */
/* { dg-final { scan-assembler-times {\tnbsl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
#define OP(x,y,z) ((~(x) & (z)) | ((y) & ~(z)))
#include "bitsel_1.c"
/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.[bhsd]} } } */
/* { dg-final { scan-assembler-not {\tbic\tz[0-9]+\.[bhsd]} } } */
/* { dg-final { scan-assembler-times {\tbsl1n\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
#define OP(x,y,z) (((x) & (z)) | (~(y) & ~(z)))
#include "bitsel_1.c"
/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.[bhsd]} } } */
/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.[bhsd]} } } */
/* { dg-final { scan-assembler-not {\tnot\tz[0-9]+\.[bhsd]} } } */
/* { dg-final { scan-assembler-times {\tbsl2n\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
#define OP(x,y,z) ((x) ^ (y) ^ (z))
#include "bitsel_1.c"
/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
/* { dg-final { scan-assembler-not {\teor\tz[0-9]+\.[bhsd]} } } */
/* { dg-final { scan-assembler-times {\teor3\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
#include <stdint.h>
#ifndef OP
#define OP(x,y) (~((x) | (y)))
#endif
#define TYPE(N) int##N##_t
#define TEMPLATE(SIZE) \
void __attribute__ ((noinline, noclone)) \
f_##SIZE##_##OP \
(TYPE(SIZE) *restrict a, TYPE(SIZE) *restrict b, \
TYPE(SIZE) *restrict c, int n) \
{ \
for (int i = 0; i < n; i++) \
a[i] = OP (b[i], c[i]); \
}
TEMPLATE (8);
TEMPLATE (16);
TEMPLATE (32);
TEMPLATE (64);
/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.[bhsd]} } } */
/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.[bhsd]} } } */
/* { dg-final { scan-assembler-not {\tnot\tz[0-9]+\.[bhsd]} } } */
/* { dg-final { scan-assembler-times {\tnbsl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details --save-temps" } */
#define OP(x,y) (~((x) & (y)))
#include "nlogic_1.c"
/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 4 "vect" } } */
/* { dg-final { scan-assembler-not {\tand\tz[0-9]+\.[bhsd]} } } */
/* { dg-final { scan-assembler-not {\torr\tz[0-9]+\.[bhsd]} } } */
/* { dg-final { scan-assembler-not {\tnot\tz[0-9]+\.[bhsd]} } } */
/* { dg-final { scan-assembler-times {\tnbsl\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 4 } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment