Commit c38f7319 by Richard Sandiford Committed by Richard Sandiford

[AArch64] Add SVE support for integer division

After the previous patch to prevent pessimisation of divisions
by constants, this patch adds support for the SVE integer division
instructions.

2018-05-25  Richard Sandiford  <richard.sandiford@linaro.org>

gcc/
	* config/aarch64/iterators.md (SVE_INT_BINARY_SD): New code iterator.
	(optab, sve_int_op): Handle div and udiv.
	* config/aarch64/aarch64-sve.md (<optab><mode>3): New expander
	for SVE_INT_BINARY_SD.
	(*<optab><mode>3): New insn for the same.

gcc/testsuite/
	* gcc.target/aarch64/sve/div_1.c: New test.
	* gcc.target/aarch64/sve/div_1_run.c: Likewise.
	* gcc.target/aarch64/sve/mul_highpart_2.c: Likewise.
	* gcc.target/aarch64/sve/mul_highpart_2_run.c: Likewise.

From-SVN: r260712
parent 8f76f377
2018-05-25 Richard Sandiford <richard.sandiford@linaro.org> 2018-05-25 Richard Sandiford <richard.sandiford@linaro.org>
* config/aarch64/iterators.md (SVE_INT_BINARY_SD): New code iterator.
(optab, sve_int_op): Handle div and udiv.
* config/aarch64/aarch64-sve.md (<optab><mode>3): New expander
for SVE_INT_BINARY_SD.
(*<optab><mode>3): New insn for the same.
2018-05-25 Richard Sandiford <richard.sandiford@linaro.org>
* tree-vect-patterns.c: Include predict.h. * tree-vect-patterns.c: Include predict.h.
(vect_recog_divmod_pattern): Restrict check for division support (vect_recog_divmod_pattern): Restrict check for division support
to when optimizing for size. to when optimizing for size.
......
...@@ -1008,6 +1008,36 @@ ...@@ -1008,6 +1008,36 @@
"<su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" "<su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
) )
;; Unpredicated division.
(define_expand "<optab><mode>3"
[(set (match_operand:SVE_SDI 0 "register_operand")
(unspec:SVE_SDI
[(match_dup 3)
(SVE_INT_BINARY_SD:SVE_SDI
(match_operand:SVE_SDI 1 "register_operand")
(match_operand:SVE_SDI 2 "register_operand"))]
UNSPEC_MERGE_PTRUE))]
"TARGET_SVE"
{
operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
}
)
;; Division predicated with a PTRUE.
(define_insn "*<optab><mode>3"
[(set (match_operand:SVE_SDI 0 "register_operand" "=w, w")
(unspec:SVE_SDI
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
(SVE_INT_BINARY_SD:SVE_SDI
(match_operand:SVE_SDI 2 "register_operand" "0, w")
(match_operand:SVE_SDI 3 "aarch64_sve_mul_operand" "w, 0"))]
UNSPEC_MERGE_PTRUE))]
"TARGET_SVE"
"@
<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
<sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
)
;; Unpredicated NEG, NOT and POPCOUNT. ;; Unpredicated NEG, NOT and POPCOUNT.
(define_expand "<optab><mode>2" (define_expand "<optab><mode>2"
[(set (match_operand:SVE_I 0 "register_operand") [(set (match_operand:SVE_I 0 "register_operand")
......
...@@ -1207,6 +1207,8 @@ ...@@ -1207,6 +1207,8 @@
(define_code_iterator SVE_INT_BINARY_REV [minus]) (define_code_iterator SVE_INT_BINARY_REV [minus])
(define_code_iterator SVE_INT_BINARY_SD [div udiv])
;; SVE integer comparisons. ;; SVE integer comparisons.
(define_code_iterator SVE_INT_CMP [lt le eq ne ge gt ltu leu geu gtu]) (define_code_iterator SVE_INT_CMP [lt le eq ne ge gt ltu leu geu gtu])
...@@ -1237,6 +1239,8 @@ ...@@ -1237,6 +1239,8 @@
(neg "neg") (neg "neg")
(plus "add") (plus "add")
(minus "sub") (minus "sub")
(div "div")
(udiv "udiv")
(ss_plus "qadd") (ss_plus "qadd")
(us_plus "qadd") (us_plus "qadd")
(ss_minus "qsub") (ss_minus "qsub")
...@@ -1378,6 +1382,8 @@ ...@@ -1378,6 +1382,8 @@
;; The integer SVE instruction that implements an rtx code. ;; The integer SVE instruction that implements an rtx code.
(define_code_attr sve_int_op [(plus "add") (define_code_attr sve_int_op [(plus "add")
(minus "sub") (minus "sub")
(div "sdiv")
(udiv "udiv")
(neg "neg") (neg "neg")
(smin "smin") (smin "smin")
(smax "smax") (smax "smax")
......
2018-05-25 Richard Sandiford <richard.sandiford@linaro.org> 2018-05-25 Richard Sandiford <richard.sandiford@linaro.org>
* gcc.target/aarch64/sve/div_1.c: New test.
* gcc.target/aarch64/sve/div_1_run.c: Likewise.
* gcc.target/aarch64/sve/mul_highpart_2.c: Likewise.
* gcc.target/aarch64/sve/mul_highpart_2_run.c: Likewise.
2018-05-25 Richard Sandiford <richard.sandiford@linaro.org>
* gcc.dg/vect/bb-slp-div-1.c: New XFAILed test. * gcc.dg/vect/bb-slp-div-1.c: New XFAILed test.
2018-05-25 Richard Sandiford <richard.sandiford@linaro.org> 2018-05-25 Richard Sandiford <richard.sandiford@linaro.org>
......
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model --save-temps" } */
#include <stdint.h>
#define DEF_LOOP(TYPE) \
void __attribute__ ((noipa)) \
mod_##TYPE (TYPE *restrict dst, TYPE *restrict src1, \
TYPE *restrict src2, int count) \
{ \
for (int i = 0; i < count; ++i) \
dst[i] = src1[i] / src2[i]; \
}
#define TEST_ALL(T) \
T (int32_t) \
T (uint32_t) \
T (int64_t) \
T (uint64_t)
TEST_ALL (DEF_LOOP)
/* { dg-final { scan-assembler-times {\tsdiv\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
/* { dg-final { scan-assembler-times {\tudiv\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
/* { dg-final { scan-assembler-times {\tsdiv\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
/* { dg-final { scan-assembler-times {\tudiv\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
/* { dg-do run { target aarch64_sve_hw } } */
/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model --save-temps" } */
#include "div_1.c"
#define N 79
#define TEST_LOOP(TYPE) \
{ \
TYPE dst[N], src1[N], src2[N]; \
for (int i = 0; i < N; ++i) \
{ \
src1[i] = i * 7 + i % 3; \
if (i % 11 > 7) \
src1[i] = -src1[i]; \
src2[i] = 5 + (i % 5); \
asm volatile ("" ::: "memory"); \
} \
mod_##TYPE (dst, src1, src2, N); \
for (int i = 0; i < N; ++i) \
if (dst[i] != src1[i] / src2[i]) \
__builtin_abort (); \
}
int
main (void)
{
TEST_ALL (TEST_LOOP);
return 0;
}
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model --save-temps" } */
#include <stdint.h>
#define DEF_LOOP(TYPE) \
void __attribute__ ((noipa)) \
mod_##TYPE (TYPE *dst, TYPE *src, int count) \
{ \
for (int i = 0; i < count; ++i) \
dst[i] = src[i] / 17; \
}
#define TEST_ALL(T) \
T (int32_t) \
T (uint32_t) \
T (int64_t) \
T (uint64_t)
TEST_ALL (DEF_LOOP)
/* { dg-final { scan-assembler-times {\tsmulh\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
/* { dg-final { scan-assembler-times {\tumulh\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
/* { dg-final { scan-assembler-times {\tsmulh\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
/* { dg-final { scan-assembler-times {\tumulh\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
/* { dg-do run { target aarch64_sve_hw } } */
/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model --save-temps" } */
#include "mul_highpart_2.c"
#define N 79
#define TEST_LOOP(TYPE) \
{ \
TYPE dst[N], src[N]; \
for (int i = 0; i < N; ++i) \
{ \
src[i] = i * 7 + i % 3; \
if (i % 11 > 7) \
src[i] = -src[i]; \
asm volatile ("" ::: "memory"); \
} \
mod_##TYPE (dst, src, N); \
for (int i = 0; i < N; ++i) \
if (dst[i] != src[i] / 17) \
__builtin_abort (); \
}
int
main (void)
{
TEST_ALL (TEST_LOOP);
return 0;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment