Commit b21f7d53 by Richard Sandiford Committed by Richard Sandiford

[AArch64] Add SVE conditional floating-point unary patterns

This patch adds patterns to match conditional unary operations
on floating-point modes.  At the moment we rely on combine to merge
separate arithmetic and vcond_mask operations, and since the latter
doesn't accept zero operands, we miss out on the opportunity to use
the movprfx /z alternative.  (This alternative is tested by the ACLE
patches though.)

2019-08-14  Richard Sandiford  <richard.sandiford@arm.com>
	    Kugan Vivekanandarajah  <kugan.vivekanandarajah@linaro.org>

gcc/
	* config/aarch64/aarch64-sve.md
	(*cond_<SVE_COND_FP_UNARY:optab><SVE_F:mode>_2): New pattern.
	(*cond_<SVE_COND_FP_UNARY:optab><SVE_F:mode>_any): Likewise.

gcc/testsuite/
	* gcc.target/aarch64/sve/cond_unary_1.c: Add tests for
	floating-point types.
	* gcc.target/aarch64/sve/cond_unary_2.c: Likewise.
	* gcc.target/aarch64/sve/cond_unary_3.c: Likewise.
	* gcc.target/aarch64/sve/cond_unary_4.c: Likewise.

Co-Authored-By: Kugan Vivekanandarajah <kuganv@linaro.org>

From-SVN: r274477
parent 3c9f4963
...@@ -2,6 +2,13 @@ ...@@ -2,6 +2,13 @@
Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org> Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org>
* config/aarch64/aarch64-sve.md * config/aarch64/aarch64-sve.md
(*cond_<SVE_COND_FP_UNARY:optab><SVE_F:mode>_2): New pattern.
(*cond_<SVE_COND_FP_UNARY:optab><SVE_F:mode>_any): Likewise.
2019-08-14 Richard Sandiford <richard.sandiford@arm.com>
Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org>
* config/aarch64/aarch64-sve.md
(*cond_<SVE_INT_UNARY:optab><SVE_I:mode>_2): New pattern. (*cond_<SVE_INT_UNARY:optab><SVE_I:mode>_2): New pattern.
(*cond_<SVE_INT_UNARY:optab><SVE_I:mode>_any): Likewise. (*cond_<SVE_INT_UNARY:optab><SVE_I:mode>_any): Likewise.
......
...@@ -1624,6 +1624,62 @@ ...@@ -1624,6 +1624,62 @@
"<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
) )
;; Predicated floating-point unary arithmetic, merging with the first input.
(define_insn_and_rewrite "*cond_<optab><mode>_2"
[(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
(unspec:SVE_F
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
(unspec:SVE_F
[(match_operand 3)
(match_operand:SI 4 "aarch64_sve_gp_strictness")
(match_operand:SVE_F 2 "register_operand" "0, w")]
SVE_COND_FP_UNARY)
(match_dup 2)]
UNSPEC_SEL))]
"TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[3], operands[1])"
"@
<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>
movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
"&& !rtx_equal_p (operands[1], operands[3])"
{
operands[3] = copy_rtx (operands[1]);
}
[(set_attr "movprfx" "*,yes")]
)
;; Predicated floating-point unary arithmetic, merging with an independent
;; value.
;;
;; The earlyclobber isn't needed for the first alternative, but omitting
;; it would only help the case in which operands 2 and 3 are the same,
;; which is handled above rather than here. Marking all the alternatives
;; as earlyclobber helps to make the instruction more regular to the
;; register allocator.
(define_insn_and_rewrite "*cond_<optab><mode>_any"
[(set (match_operand:SVE_F 0 "register_operand" "=&w, ?&w, ?&w")
(unspec:SVE_F
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
(unspec:SVE_F
[(match_operand 4)
(match_operand:SI 5 "aarch64_sve_gp_strictness")
(match_operand:SVE_F 2 "register_operand" "w, w, w")]
SVE_COND_FP_UNARY)
(match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
UNSPEC_SEL))]
"TARGET_SVE
&& !rtx_equal_p (operands[2], operands[3])
&& aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
"@
<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
"&& !rtx_equal_p (operands[1], operands[4])"
{
operands[4] = copy_rtx (operands[1]);
}
[(set_attr "movprfx" "*,yes,yes")]
)
;; ------------------------------------------------------------------------- ;; -------------------------------------------------------------------------
;; ---- [PRED] Inverse ;; ---- [PRED] Inverse
;; ------------------------------------------------------------------------- ;; -------------------------------------------------------------------------
......
2019-08-14 Richard Sandiford <richard.sandiford@arm.com> 2019-08-14 Richard Sandiford <richard.sandiford@arm.com>
Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org> Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org>
* gcc.target/aarch64/sve/cond_unary_1.c: Add tests for
floating-point types.
* gcc.target/aarch64/sve/cond_unary_2.c: Likewise.
* gcc.target/aarch64/sve/cond_unary_3.c: Likewise.
* gcc.target/aarch64/sve/cond_unary_4.c: Likewise.
2019-08-14 Richard Sandiford <richard.sandiford@arm.com>
Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org>
* gcc.target/aarch64/sve/cond_unary_1.c: New test. * gcc.target/aarch64/sve/cond_unary_1.c: New test.
* gcc.target/aarch64/sve/cond_unary_1_run.c: Likewise. * gcc.target/aarch64/sve/cond_unary_1_run.c: Likewise.
* gcc.target/aarch64/sve/cond_unary_2.c: Likewise. * gcc.target/aarch64/sve/cond_unary_2.c: Likewise.
......
...@@ -15,15 +15,22 @@ ...@@ -15,15 +15,22 @@
r[i] = pred[i] ? OP (a[i]) : a[i]; \ r[i] = pred[i] ? OP (a[i]) : a[i]; \
} }
#define TEST_TYPE(T, TYPE) \ #define TEST_INT_TYPE(T, TYPE) \
T (TYPE, abs) \ T (TYPE, abs) \
T (TYPE, neg) T (TYPE, neg)
#define TEST_FLOAT_TYPE(T, TYPE, SUFFIX) \
T (TYPE, __builtin_fabs##SUFFIX) \
T (TYPE, neg)
#define TEST_ALL(T) \ #define TEST_ALL(T) \
TEST_TYPE (T, int8_t) \ TEST_INT_TYPE (T, int8_t) \
TEST_TYPE (T, int16_t) \ TEST_INT_TYPE (T, int16_t) \
TEST_TYPE (T, int32_t) \ TEST_INT_TYPE (T, int32_t) \
TEST_TYPE (T, int64_t) TEST_INT_TYPE (T, int64_t) \
TEST_FLOAT_TYPE (T, _Float16, f16) \
TEST_FLOAT_TYPE (T, float, f) \
TEST_FLOAT_TYPE (T, double, )
TEST_ALL (DEF_LOOP) TEST_ALL (DEF_LOOP)
...@@ -37,6 +44,14 @@ TEST_ALL (DEF_LOOP) ...@@ -37,6 +44,14 @@ TEST_ALL (DEF_LOOP)
/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ /* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ /* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-not {\tmov\tz} } } */ /* { dg-final { scan-assembler-not {\tmov\tz} } } */
/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ /* { dg-final { scan-assembler-not {\tmovprfx\t} } } */
/* XFAILed because the ?: gets canonicalized so that the operation is in /* XFAILed because the ?: gets canonicalized so that the operation is in
......
...@@ -16,15 +16,22 @@ ...@@ -16,15 +16,22 @@
r[i] = pred[i] ? OP (a[i]) : b[i]; \ r[i] = pred[i] ? OP (a[i]) : b[i]; \
} }
#define TEST_TYPE(T, TYPE) \ #define TEST_INT_TYPE(T, TYPE) \
T (TYPE, abs) \ T (TYPE, abs) \
T (TYPE, neg) T (TYPE, neg)
#define TEST_FLOAT_TYPE(T, TYPE, SUFFIX) \
T (TYPE, __builtin_fabs##SUFFIX) \
T (TYPE, neg)
#define TEST_ALL(T) \ #define TEST_ALL(T) \
TEST_TYPE (T, int8_t) \ TEST_INT_TYPE (T, int8_t) \
TEST_TYPE (T, int16_t) \ TEST_INT_TYPE (T, int16_t) \
TEST_TYPE (T, int32_t) \ TEST_INT_TYPE (T, int32_t) \
TEST_TYPE (T, int64_t) TEST_INT_TYPE (T, int64_t) \
TEST_FLOAT_TYPE (T, _Float16, f16) \
TEST_FLOAT_TYPE (T, float, f) \
TEST_FLOAT_TYPE (T, double, )
TEST_ALL (DEF_LOOP) TEST_ALL (DEF_LOOP)
...@@ -38,6 +45,17 @@ TEST_ALL (DEF_LOOP) ...@@ -38,6 +45,17 @@ TEST_ALL (DEF_LOOP)
/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ /* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ /* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-not {\tmov\tz} } } */ /* { dg-final { scan-assembler-not {\tmov\tz} } } */
/* { dg-final { scan-assembler-not {\tmovprfx\t} } } */ /* At the moment we don't manage to avoid using MOVPRFX for the
floating-point functions. */
/* { dg-final { scan-assembler-not {\tmovprfx\t} { xfail *-*-* } } } */
/* { dg-final { scan-assembler-times {\tmovprfx\t} 6 } } */
/* { dg-final { scan-assembler-not {\tsel\t} } } */ /* { dg-final { scan-assembler-not {\tsel\t} } } */
...@@ -15,15 +15,22 @@ ...@@ -15,15 +15,22 @@
r[i] = pred[i] ? OP (a[i]) : 5; \ r[i] = pred[i] ? OP (a[i]) : 5; \
} }
#define TEST_TYPE(T, TYPE) \ #define TEST_INT_TYPE(T, TYPE) \
T (TYPE, abs) \ T (TYPE, abs) \
T (TYPE, neg) T (TYPE, neg)
#define TEST_FLOAT_TYPE(T, TYPE, SUFFIX) \
T (TYPE, __builtin_fabs##SUFFIX) \
T (TYPE, neg)
#define TEST_ALL(T) \ #define TEST_ALL(T) \
TEST_TYPE (T, int8_t) \ TEST_INT_TYPE (T, int8_t) \
TEST_TYPE (T, int16_t) \ TEST_INT_TYPE (T, int16_t) \
TEST_TYPE (T, int32_t) \ TEST_INT_TYPE (T, int32_t) \
TEST_TYPE (T, int64_t) TEST_INT_TYPE (T, int64_t) \
TEST_FLOAT_TYPE (T, _Float16, f16) \
TEST_FLOAT_TYPE (T, float, f) \
TEST_FLOAT_TYPE (T, double, )
TEST_ALL (DEF_LOOP) TEST_ALL (DEF_LOOP)
...@@ -37,7 +44,15 @@ TEST_ALL (DEF_LOOP) ...@@ -37,7 +44,15 @@ TEST_ALL (DEF_LOOP)
/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ /* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ /* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+, z[0-9]+\n} 8 } } */ /* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+, z[0-9]+\n} 14 } } */
/* { dg-final { scan-assembler-not {\tmov\tz[^\n]*z} } } */ /* { dg-final { scan-assembler-not {\tmov\tz[^\n]*z} } } */
/* { dg-final { scan-assembler-not {\tsel\t} } } */ /* { dg-final { scan-assembler-not {\tsel\t} } } */
...@@ -15,15 +15,22 @@ ...@@ -15,15 +15,22 @@
r[i] = pred[i] ? OP (a[i]) : 0; \ r[i] = pred[i] ? OP (a[i]) : 0; \
} }
#define TEST_TYPE(T, TYPE) \ #define TEST_INT_TYPE(T, TYPE) \
T (TYPE, abs) \ T (TYPE, abs) \
T (TYPE, neg) T (TYPE, neg)
#define TEST_FLOAT_TYPE(T, TYPE, SUFFIX) \
T (TYPE, __builtin_fabs##SUFFIX) \
T (TYPE, neg)
#define TEST_ALL(T) \ #define TEST_ALL(T) \
TEST_TYPE (T, int8_t) \ TEST_INT_TYPE (T, int8_t) \
TEST_TYPE (T, int16_t) \ TEST_INT_TYPE (T, int16_t) \
TEST_TYPE (T, int32_t) \ TEST_INT_TYPE (T, int32_t) \
TEST_TYPE (T, int64_t) TEST_INT_TYPE (T, int64_t) \
TEST_FLOAT_TYPE (T, _Float16, f16) \
TEST_FLOAT_TYPE (T, float, f) \
TEST_FLOAT_TYPE (T, double, )
TEST_ALL (DEF_LOOP) TEST_ALL (DEF_LOOP)
...@@ -37,11 +44,19 @@ TEST_ALL (DEF_LOOP) ...@@ -37,11 +44,19 @@ TEST_ALL (DEF_LOOP)
/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.s, p[0-7]/m,} 1 } } */ /* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.d, p[0-7]/m,} 1 } } */ /* { dg-final { scan-assembler-times {\tneg\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tfabs\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.s, p[0-7]/m,} 1 } } */
/* { dg-final { scan-assembler-times {\tfneg\tz[0-9]+\.d, p[0-7]/m,} 1 } } */
/* Really we should be able to use MOVPRFX /z here, but at the moment /* Really we should be able to use MOVPRFX /z here, but at the moment
we're relying on combine to merge a SEL and an arithmetic operation, we're relying on combine to merge a SEL and an arithmetic operation,
and the SEL doesn't allow the "false" value to be zero when the "true" and the SEL doesn't allow the "false" value to be zero when the "true"
value is a register. */ value is a register. */
/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+, z[0-9]+\n} 8 } } */ /* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+, z[0-9]+\n} 14 } } */
/* { dg-final { scan-assembler-not {\tmov\tz[^\n]*z} } } */ /* { dg-final { scan-assembler-not {\tmov\tz[^\n]*z} } } */
/* { dg-final { scan-assembler-not {\tsel\t} } } */ /* { dg-final { scan-assembler-not {\tsel\t} } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment