Commit 37a3662f by Richard Sandiford Committed by Richard Sandiford

[AArch64] Add scatter stores for partial SVE modes

This patch adds support for scatter stores of partial vectors,
where the vector base or offset elements can be wider than the
elements being stored.

2019-11-16  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
	* config/aarch64/aarch64-sve.md
	(scatter_store<SVE_FULL_SD:mode><v_int_equiv>): Extend to...
	(scatter_store<SVE_24:mode><v_int_container>): ...this.
	(mask_scatter_store<SVE_FULL_S:mode><v_int_equiv>): Extend to...
	(mask_scatter_store<SVE_4:mode><v_int_equiv>): ...this.
	(mask_scatter_store<SVE_FULL_D:mode><v_int_equiv>): Extend to...
	(mask_scatter_store<SVE_2:mode><v_int_equiv>): ...this.
	(*mask_scatter_store<mode><v_int_container>_<su>xtw_unpacked): New
	pattern.
	(*mask_scatter_store<SVE_FULL_D:mode><v_int_equiv>_sxtw): Extend to...
	(*mask_scatter_store<SVE_2:mode><v_int_equiv>_sxtw): ...this.
	(*mask_scatter_store<SVE_FULL_D:mode><v_int_equiv>_uxtw): Extend to...
	(*mask_scatter_store<SVE_2:mode><v_int_equiv>_uxtw): ...this.

gcc/testsuite/
	* gcc.target/aarch64/sve/scatter_store_1.c (TEST_LOOP): Start at 0.
	(TEST_ALL): Add tests for 8-bit and 16-bit elements.
	* gcc.target/aarch64/sve/scatter_store_2.c: Update accordingly.
	* gcc.target/aarch64/sve/scatter_store_3.c (TEST_LOOP): Start at 0.
	(TEST_ALL): Add tests for 8-bit and 16-bit elements.
	* gcc.target/aarch64/sve/scatter_store_4.c: Update accordingly.
	* gcc.target/aarch64/sve/scatter_store_5.c (TEST_LOOP): Start at 0.
	(TEST_ALL): Add tests for 8-bit, 16-bit and 32-bit elements.
	* gcc.target/aarch64/sve/scatter_store_8.c: New test.
	* gcc.target/aarch64/sve/scatter_store_9.c: Likewise.

From-SVN: r278347
parent 87a80d27
2019-11-16 Richard Sandiford <richard.sandiford@arm.com> 2019-11-16 Richard Sandiford <richard.sandiford@arm.com>
* config/aarch64/aarch64-sve.md
(scatter_store<SVE_FULL_SD:mode><v_int_equiv>): Extend to...
(scatter_store<SVE_24:mode><v_int_container>): ...this.
(mask_scatter_store<SVE_FULL_S:mode><v_int_equiv>): Extend to...
(mask_scatter_store<SVE_4:mode><v_int_equiv>): ...this.
(mask_scatter_store<SVE_FULL_D:mode><v_int_equiv>): Extend to...
(mask_scatter_store<SVE_2:mode><v_int_equiv>): ...this.
(*mask_scatter_store<mode><v_int_container>_<su>xtw_unpacked): New
pattern.
(*mask_scatter_store<SVE_FULL_D:mode><v_int_equiv>_sxtw): Extend to...
(*mask_scatter_store<SVE_2:mode><v_int_equiv>_sxtw): ...this.
(*mask_scatter_store<SVE_FULL_D:mode><v_int_equiv>_uxtw): Extend to...
(*mask_scatter_store<SVE_2:mode><v_int_equiv>_uxtw): ...this.
2019-11-16 Richard Sandiford <richard.sandiford@arm.com>
* config/aarch64/iterators.md (SVE_2BHSI, SVE_2HSDI, SVE_4BHI) * config/aarch64/iterators.md (SVE_2BHSI, SVE_2HSDI, SVE_4BHI)
(SVE_4HSI): New mode iterators. (SVE_4HSI): New mode iterators.
(ANY_EXTEND2): New code iterator. (ANY_EXTEND2): New code iterator.
...@@ -2135,15 +2135,15 @@ ...@@ -2135,15 +2135,15 @@
;; ------------------------------------------------------------------------- ;; -------------------------------------------------------------------------
;; Unpredicated scatter stores. ;; Unpredicated scatter stores.
(define_expand "scatter_store<mode><v_int_equiv>" (define_expand "scatter_store<mode><v_int_container>"
[(set (mem:BLK (scratch)) [(set (mem:BLK (scratch))
(unspec:BLK (unspec:BLK
[(match_dup 5) [(match_dup 5)
(match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>") (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>")
(match_operand:<V_INT_EQUIV> 1 "register_operand") (match_operand:<V_INT_CONTAINER> 1 "register_operand")
(match_operand:DI 2 "const_int_operand") (match_operand:DI 2 "const_int_operand")
(match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>") (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
(match_operand:SVE_FULL_SD 4 "register_operand")] (match_operand:SVE_24 4 "register_operand")]
UNSPEC_ST1_SCATTER))] UNSPEC_ST1_SCATTER))]
"TARGET_SVE" "TARGET_SVE"
{ {
...@@ -2153,48 +2153,74 @@ ...@@ -2153,48 +2153,74 @@
;; Predicated scatter stores for 32-bit elements. Operand 2 is true for ;; Predicated scatter stores for 32-bit elements. Operand 2 is true for
;; unsigned extension and false for signed extension. ;; unsigned extension and false for signed extension.
(define_insn "mask_scatter_store<mode><v_int_equiv>" (define_insn "mask_scatter_store<mode><v_int_container>"
[(set (mem:BLK (scratch)) [(set (mem:BLK (scratch))
(unspec:BLK (unspec:BLK
[(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
(match_operand:DI 0 "aarch64_sve_gather_offset_w" "Z, vgw, rk, rk, rk, rk") (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>" "Z, vgw, rk, rk, rk, rk")
(match_operand:VNx4SI 1 "register_operand" "w, w, w, w, w, w") (match_operand:VNx4SI 1 "register_operand" "w, w, w, w, w, w")
(match_operand:DI 2 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1") (match_operand:DI 2 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1")
(match_operand:DI 3 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, Ui1, i, i") (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
(match_operand:SVE_FULL_S 4 "register_operand" "w, w, w, w, w, w")] (match_operand:SVE_4 4 "register_operand" "w, w, w, w, w, w")]
UNSPEC_ST1_SCATTER))] UNSPEC_ST1_SCATTER))]
"TARGET_SVE" "TARGET_SVE"
"@ "@
st1w\t%4.s, %5, [%1.s] st1<Vesize>\t%4.s, %5, [%1.s]
st1w\t%4.s, %5, [%1.s, #%0] st1<Vesize>\t%4.s, %5, [%1.s, #%0]
st1w\t%4.s, %5, [%0, %1.s, sxtw] st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw]
st1w\t%4.s, %5, [%0, %1.s, uxtw] st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw]
st1w\t%4.s, %5, [%0, %1.s, sxtw %p3] st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw %p3]
st1w\t%4.s, %5, [%0, %1.s, uxtw %p3]" st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw %p3]"
) )
;; Predicated scatter stores for 64-bit elements. The value of operand 2 ;; Predicated scatter stores for 64-bit elements. The value of operand 2
;; doesn't matter in this case. ;; doesn't matter in this case.
(define_insn "mask_scatter_store<mode><v_int_equiv>" (define_insn "mask_scatter_store<mode><v_int_container>"
[(set (mem:BLK (scratch)) [(set (mem:BLK (scratch))
(unspec:BLK (unspec:BLK
[(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl") [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
(match_operand:DI 0 "aarch64_sve_gather_offset_d" "Z, vgd, rk, rk") (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>" "Z, vgd, rk, rk")
(match_operand:VNx2DI 1 "register_operand" "w, w, w, w") (match_operand:VNx2DI 1 "register_operand" "w, w, w, w")
(match_operand:DI 2 "const_int_operand") (match_operand:DI 2 "const_int_operand")
(match_operand:DI 3 "aarch64_gather_scale_operand_d" "Ui1, Ui1, Ui1, i") (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, i")
(match_operand:SVE_FULL_D 4 "register_operand" "w, w, w, w")] (match_operand:SVE_2 4 "register_operand" "w, w, w, w")]
UNSPEC_ST1_SCATTER))] UNSPEC_ST1_SCATTER))]
"TARGET_SVE" "TARGET_SVE"
"@ "@
st1d\t%4.d, %5, [%1.d] st1<Vesize>\t%4.d, %5, [%1.d]
st1d\t%4.d, %5, [%1.d, #%0] st1<Vesize>\t%4.d, %5, [%1.d, #%0]
st1d\t%4.d, %5, [%0, %1.d] st1<Vesize>\t%4.d, %5, [%0, %1.d]
st1d\t%4.d, %5, [%0, %1.d, lsl %p3]" st1<Vesize>\t%4.d, %5, [%0, %1.d, lsl %p3]"
) )
;; Likewise, but with the offset being sign-extended from 32 bits. ;; Likewise, but with the offset being extended from 32 bits.
(define_insn_and_rewrite "*mask_scatter_store<mode><v_int_equiv>_sxtw" (define_insn_and_rewrite "*mask_scatter_store<mode><v_int_container>_<su>xtw_unpacked"
[(set (mem:BLK (scratch))
(unspec:BLK
[(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
(match_operand:DI 0 "register_operand" "rk, rk")
(unspec:VNx2DI
[(match_operand 6)
(ANY_EXTEND:VNx2DI
(match_operand:VNx2SI 1 "register_operand" "w, w"))]
UNSPEC_PRED_X)
(match_operand:DI 2 "const_int_operand")
(match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
(match_operand:SVE_2 4 "register_operand" "w, w")]
UNSPEC_ST1_SCATTER))]
"TARGET_SVE"
"@
st1<Vesize>\t%4.d, %5, [%0, %1.d, <su>xtw]
st1<Vesize>\t%4.d, %5, [%0, %1.d, <su>xtw %p3]"
"&& !CONSTANT_P (operands[6])"
{
operands[6] = CONSTM1_RTX (<VPRED>mode);
}
)
;; Likewise, but with the offset being truncated to 32 bits and then
;; sign-extended.
(define_insn_and_rewrite "*mask_scatter_store<mode><v_int_container>_sxtw"
[(set (mem:BLK (scratch)) [(set (mem:BLK (scratch))
(unspec:BLK (unspec:BLK
[(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
...@@ -2206,21 +2232,22 @@ ...@@ -2206,21 +2232,22 @@
(match_operand:VNx2DI 1 "register_operand" "w, w")))] (match_operand:VNx2DI 1 "register_operand" "w, w")))]
UNSPEC_PRED_X) UNSPEC_PRED_X)
(match_operand:DI 2 "const_int_operand") (match_operand:DI 2 "const_int_operand")
(match_operand:DI 3 "aarch64_gather_scale_operand_d" "Ui1, i") (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
(match_operand:SVE_FULL_D 4 "register_operand" "w, w")] (match_operand:SVE_2 4 "register_operand" "w, w")]
UNSPEC_ST1_SCATTER))] UNSPEC_ST1_SCATTER))]
"TARGET_SVE" "TARGET_SVE"
"@ "@
st1d\t%4.d, %5, [%0, %1.d, sxtw] st1<Vesize>\t%4.d, %5, [%0, %1.d, sxtw]
st1d\t%4.d, %5, [%0, %1.d, sxtw %p3]" st1<Vesize>\t%4.d, %5, [%0, %1.d, sxtw %p3]"
"&& !rtx_equal_p (operands[5], operands[6])" "&& !CONSTANT_P (operands[6])"
{ {
operands[6] = copy_rtx (operands[5]); operands[6] = CONSTM1_RTX (<VPRED>mode);
} }
) )
;; Likewise, but with the offset being zero-extended from 32 bits. ;; Likewise, but with the offset being truncated to 32 bits and then
(define_insn "*mask_scatter_store<mode><v_int_equiv>_uxtw" ;; zero-extended.
(define_insn "*mask_scatter_store<mode><v_int_container>_uxtw"
[(set (mem:BLK (scratch)) [(set (mem:BLK (scratch))
(unspec:BLK (unspec:BLK
[(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
...@@ -2229,13 +2256,13 @@ ...@@ -2229,13 +2256,13 @@
(match_operand:VNx2DI 1 "register_operand" "w, w") (match_operand:VNx2DI 1 "register_operand" "w, w")
(match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate")) (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
(match_operand:DI 2 "const_int_operand") (match_operand:DI 2 "const_int_operand")
(match_operand:DI 3 "aarch64_gather_scale_operand_d" "Ui1, i") (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
(match_operand:SVE_FULL_D 4 "register_operand" "w, w")] (match_operand:SVE_2 4 "register_operand" "w, w")]
UNSPEC_ST1_SCATTER))] UNSPEC_ST1_SCATTER))]
"TARGET_SVE" "TARGET_SVE"
"@ "@
st1d\t%4.d, %5, [%0, %1.d, uxtw] st1<Vesize>\t%4.d, %5, [%0, %1.d, uxtw]
st1d\t%4.d, %5, [%0, %1.d, uxtw %p3]" st1<Vesize>\t%4.d, %5, [%0, %1.d, uxtw %p3]"
) )
;; ------------------------------------------------------------------------- ;; -------------------------------------------------------------------------
......
2019-11-16 Richard Sandiford <richard.sandiford@arm.com> 2019-11-16 Richard Sandiford <richard.sandiford@arm.com>
* gcc.target/aarch64/sve/scatter_store_1.c (TEST_LOOP): Start at 0.
(TEST_ALL): Add tests for 8-bit and 16-bit elements.
* gcc.target/aarch64/sve/scatter_store_2.c: Update accordingly.
* gcc.target/aarch64/sve/scatter_store_3.c (TEST_LOOP): Start at 0.
(TEST_ALL): Add tests for 8-bit and 16-bit elements.
* gcc.target/aarch64/sve/scatter_store_4.c: Update accordingly.
* gcc.target/aarch64/sve/scatter_store_5.c (TEST_LOOP): Start at 0.
(TEST_ALL): Add tests for 8-bit, 16-bit and 32-bit elements.
* gcc.target/aarch64/sve/scatter_store_8.c: New test.
* gcc.target/aarch64/sve/scatter_store_9.c: Likewise.
2019-11-16 Richard Sandiford <richard.sandiford@arm.com>
* gcc.target/aarch64/sve/gather_load_extend_1.c: New test. * gcc.target/aarch64/sve/gather_load_extend_1.c: New test.
* gcc.target/aarch64/sve/gather_load_extend_2.c: Likewise. * gcc.target/aarch64/sve/gather_load_extend_2.c: Likewise.
* gcc.target/aarch64/sve/gather_load_extend_3.c: Likewise. * gcc.target/aarch64/sve/gather_load_extend_3.c: Likewise.
......
...@@ -13,11 +13,15 @@ ...@@ -13,11 +13,15 @@
f_##DATA_TYPE (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \ f_##DATA_TYPE (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \
INDEX##BITS *indices, int n) \ INDEX##BITS *indices, int n) \
{ \ { \
for (int i = 9; i < n; ++i) \ for (int i = 0; i < n; ++i) \
dest[indices[i]] = src[i] + 1; \ dest[indices[i]] = src[i] + 1; \
} }
#define TEST_ALL(T) \ #define TEST_ALL(T) \
T (int8_t, 32) \
T (uint8_t, 32) \
T (int16_t, 32) \
T (uint16_t, 32) \
T (int32_t, 32) \ T (int32_t, 32) \
T (uint32_t, 32) \ T (uint32_t, 32) \
T (float, 32) \ T (float, 32) \
...@@ -27,5 +31,7 @@ ...@@ -27,5 +31,7 @@
TEST_ALL (TEST_LOOP) TEST_ALL (TEST_LOOP)
/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw\]\n} 2 } } */
/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw 1\]\n} 2 } } */
/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 3 } } */ /* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 3 } } */
/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 3 } } */ /* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 3 } } */
...@@ -6,5 +6,7 @@ ...@@ -6,5 +6,7 @@
#include "scatter_store_1.c" #include "scatter_store_1.c"
/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw\]\n} 2 } } */
/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw 1\]\n} 2 } } */
/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw 2\]\n} 3 } } */ /* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw 2\]\n} 3 } } */
/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 3 } } */ /* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 3 } } */
...@@ -8,17 +8,20 @@ ...@@ -8,17 +8,20 @@
#define INDEX64 int64_t #define INDEX64 int64_t
#endif #endif
/* Invoked 18 times for each data size. */
#define TEST_LOOP(DATA_TYPE, BITS) \ #define TEST_LOOP(DATA_TYPE, BITS) \
void __attribute__ ((noinline, noclone)) \ void __attribute__ ((noinline, noclone)) \
f_##DATA_TYPE (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \ f_##DATA_TYPE (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \
INDEX##BITS *indices, int n) \ INDEX##BITS *indices, int n) \
{ \ { \
for (int i = 9; i < n; ++i) \ for (int i = 0; i < n; ++i) \
*(DATA_TYPE *) ((char *) dest + indices[i]) = src[i] + 1; \ *(DATA_TYPE *) ((char *) dest + indices[i]) = src[i] + 1; \
} }
#define TEST_ALL(T) \ #define TEST_ALL(T) \
T (int8_t, 32) \
T (uint8_t, 32) \
T (int16_t, 32) \
T (uint16_t, 32) \
T (int32_t, 32) \ T (int32_t, 32) \
T (uint32_t, 32) \ T (uint32_t, 32) \
T (float, 32) \ T (float, 32) \
...@@ -28,5 +31,7 @@ ...@@ -28,5 +31,7 @@
TEST_ALL (TEST_LOOP) TEST_ALL (TEST_LOOP)
/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw\]\n} 2 } } */
/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw\]\n} 2 } } */
/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw\]\n} 3 } } */ /* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw\]\n} 3 } } */
/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d\]\n} 3 } } */ /* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d\]\n} 3 } } */
...@@ -6,5 +6,7 @@ ...@@ -6,5 +6,7 @@
#include "scatter_store_3.c" #include "scatter_store_3.c"
/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw\]\n} 2 } } */
/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw\]\n} 2 } } */
/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw\]\n} 3 } } */ /* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw\]\n} 3 } } */
/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d\]\n} 3 } } */ /* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d\]\n} 3 } } */
...@@ -3,21 +3,29 @@ ...@@ -3,21 +3,29 @@
#include <stdint.h> #include <stdint.h>
/* Invoked 18 times for each data size. */
#define TEST_LOOP(DATA_TYPE) \ #define TEST_LOOP(DATA_TYPE) \
void __attribute__ ((noinline, noclone)) \ void __attribute__ ((noinline, noclone)) \
f_##DATA_TYPE (DATA_TYPE *restrict *dest, DATA_TYPE *restrict src, \ f_##DATA_TYPE (DATA_TYPE *restrict *dest, DATA_TYPE *restrict src, \
int n) \ int n) \
{ \ { \
for (int i = 9; i < n; ++i) \ for (int i = 0; i < n; ++i) \
*dest[i] = src[i] + 1; \ *dest[i] = src[i] + 1; \
} }
#define TEST_ALL(T) \ #define TEST_ALL(T) \
T (int8_t) \
T (uint8_t) \
T (int16_t) \
T (uint16_t) \
T (int32_t) \
T (uint32_t) \
T (int64_t) \ T (int64_t) \
T (uint64_t) \ T (uint64_t) \
T (double) T (double)
TEST_ALL (TEST_LOOP) TEST_ALL (TEST_LOOP)
/* We assume this isn't profitable for bytes. */
/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.d, p[0-7], \[z[0-9]+.d\]\n} 2 } } */
/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.d, p[0-7], \[z[0-9]+.d\]\n} 2 } } */
/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[z[0-9]+.d\]\n} 3 } } */ /* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[z[0-9]+.d\]\n} 3 } } */
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
/* { dg-options "-O2 -ftree-vectorize -fwrapv --save-temps" } */
#include <stdint.h>
#ifndef INDEX32
#define INDEX16 int16_t
#define INDEX32 int32_t
#endif
#define TEST_LOOP(DATA_TYPE, BITS) \
void __attribute__ ((noinline, noclone)) \
f_##DATA_TYPE (DATA_TYPE *restrict dest, DATA_TYPE *restrict src, \
INDEX##BITS *indices, INDEX##BITS mask, int n) \
{ \
for (int i = 0; i < n; ++i) \
dest[(INDEX##BITS) (indices[i] + mask)] = src[i]; \
}
#define TEST_ALL(T) \
T (int8_t, 16) \
T (uint8_t, 16) \
T (int16_t, 16) \
T (uint16_t, 16) \
T (_Float16, 16) \
T (int32_t, 16) \
T (uint32_t, 16) \
T (float, 16) \
T (int64_t, 32) \
T (uint64_t, 32) \
T (double, 32)
TEST_ALL (TEST_LOOP)
/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw\]\n} 2 } } */
/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw 1\]\n} 3 } } */
/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 3 } } */
/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d, sxtw 3\]\n} 3 } } */
/* { dg-final { scan-assembler-times {\tsxt.\tz} 8 } } */
/* { dg-final { scan-assembler-times {\tsxth\tz[0-9]+\.s,} 8 } } */
/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.s,} 2 } } */
/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s,} 3 } } */
/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 } } */
/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 } } */
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
/* { dg-options "-O2 -ftree-vectorize -fwrapv --save-temps" } */
#define INDEX16 uint16_t
#define INDEX32 uint32_t
#include "scatter_store_8.c"
/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw\]\n} 2 } } */
/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw 1\]\n} 3 } } */
/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, z[0-9]+.s, uxtw 2\]\n} 3 } } */
/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, z[0-9]+.d, uxtw 3\]\n} 3 } } */
/* { dg-final { scan-assembler-times {\tuxt.\tz} 8 } } */
/* { dg-final { scan-assembler-times {\tuxth\tz[0-9]+\.s,} 8 } } */
/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.s,} 2 } } */
/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s,} 3 } } */
/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s,} 3 } } */
/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d,} 3 } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment