Commit 74166aab by Richard Sandiford Committed by Richard Sandiford

[AArch64] Support vectorising with multiple vector sizes

This patch makes the vectoriser try mixtures of 64-bit and 128-bit
vector modes on AArch64.  It fixes some existing XFAILs and allows
kernel 24 from the Livermore Loops test to be vectorised (by using
a mixture of V2DF and V2SI).

2019-11-14  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
	* config/aarch64/aarch64.c (aarch64_vectorize_related_mode): New
	function.
	(aarch64_autovectorize_vector_modes): Also add V4HImode and V2SImode.
	(TARGET_VECTORIZE_RELATED_MODE): Define.

gcc/testsuite/
	* gcc.dg/vect/vect-outer-4f.c: Expect the test to pass on aarch64
	targets.
	* gcc.dg/vect/vect-outer-4g.c: Likewise.
	* gcc.dg/vect/vect-outer-4k.c: Likewise.
	* gcc.dg/vect/vect-outer-4l.c: Likewise.
	* gfortran.dg/vect/vect-8.f90: Expect kernel 24 to be vectorized
	for aarch64.
	* gcc.target/aarch64/vect_mixed_sizes_1.c: New test.
	* gcc.target/aarch64/vect_mixed_sizes_2.c: Likewise.
	* gcc.target/aarch64/vect_mixed_sizes_3.c: Likewise.
	* gcc.target/aarch64/vect_mixed_sizes_4.c: Likewise.

From-SVN: r278243
parent a55d8232
2019-11-14 Richard Sandiford <richard.sandiford@arm.com> 2019-11-14 Richard Sandiford <richard.sandiford@arm.com>
* config/aarch64/aarch64.c (aarch64_vectorize_related_mode): New
function.
(aarch64_autovectorize_vector_modes): Also add V4HImode and V2SImode.
(TARGET_VECTORIZE_RELATED_MODE): Define.
2019-11-14 Richard Sandiford <richard.sandiford@arm.com>
* tree-vectorizer.h (vec_info::mode_set): New typedef. * tree-vectorizer.h (vec_info::mode_set): New typedef.
(vec_info::used_vector_mode): New member variable. (vec_info::used_vector_mode): New member variable.
(vect_chooses_same_modes_p): Declare. (vect_chooses_same_modes_p): Declare.
...@@ -1822,6 +1822,30 @@ aarch64_sve_int_mode (machine_mode mode) ...@@ -1822,6 +1822,30 @@ aarch64_sve_int_mode (machine_mode mode)
return aarch64_sve_data_mode (int_mode, GET_MODE_NUNITS (mode)).require (); return aarch64_sve_data_mode (int_mode, GET_MODE_NUNITS (mode)).require ();
} }
/* Implement TARGET_VECTORIZE_RELATED_MODE. */
static opt_machine_mode
aarch64_vectorize_related_mode (machine_mode vector_mode,
scalar_mode element_mode,
poly_uint64 nunits)
{
unsigned int vec_flags = aarch64_classify_vector_mode (vector_mode);
/* Prefer to use 1 128-bit vector instead of 2 64-bit vectors. */
if ((vec_flags & VEC_ADVSIMD)
&& known_eq (nunits, 0U)
&& known_eq (GET_MODE_BITSIZE (vector_mode), 64U)
&& maybe_ge (GET_MODE_BITSIZE (element_mode)
* GET_MODE_NUNITS (vector_mode), 128U))
{
machine_mode res = aarch64_simd_container_mode (element_mode, 128);
if (VECTOR_MODE_P (res))
return res;
}
return default_vectorize_related_mode (vector_mode, element_mode, nunits);
}
/* Implement TARGET_PREFERRED_ELSE_VALUE. For binary operations, /* Implement TARGET_PREFERRED_ELSE_VALUE. For binary operations,
prefer to use the first arithmetic operand as the else value if prefer to use the first arithmetic operand as the else value if
the else value doesn't matter, since that exactly matches the SVE the else value doesn't matter, since that exactly matches the SVE
...@@ -15916,8 +15940,27 @@ aarch64_autovectorize_vector_modes (vector_modes *modes, bool) ...@@ -15916,8 +15940,27 @@ aarch64_autovectorize_vector_modes (vector_modes *modes, bool)
{ {
if (TARGET_SVE) if (TARGET_SVE)
modes->safe_push (VNx16QImode); modes->safe_push (VNx16QImode);
/* Try using 128-bit vectors for all element types. */
modes->safe_push (V16QImode); modes->safe_push (V16QImode);
/* Try using 64-bit vectors for 8-bit elements and 128-bit vectors
for wider elements. */
modes->safe_push (V8QImode); modes->safe_push (V8QImode);
/* Try using 64-bit vectors for 16-bit elements and 128-bit vectors
for wider elements.
TODO: We could support a limited form of V4QImode too, so that
we use 32-bit vectors for 8-bit elements. */
modes->safe_push (V4HImode);
/* Try using 64-bit vectors for 32-bit elements and 128-bit vectors
for 64-bit elements.
TODO: We could similarly support limited forms of V2QImode and V2HImode
for this case. */
modes->safe_push (V2SImode);
} }
/* Implement TARGET_MANGLE_TYPE. */ /* Implement TARGET_MANGLE_TYPE. */
...@@ -21786,6 +21829,8 @@ aarch64_libgcc_floating_mode_supported_p ...@@ -21786,6 +21829,8 @@ aarch64_libgcc_floating_mode_supported_p
#define TARGET_VECTORIZE_VEC_PERM_CONST \ #define TARGET_VECTORIZE_VEC_PERM_CONST \
aarch64_vectorize_vec_perm_const aarch64_vectorize_vec_perm_const
#undef TARGET_VECTORIZE_RELATED_MODE
#define TARGET_VECTORIZE_RELATED_MODE aarch64_vectorize_related_mode
#undef TARGET_VECTORIZE_GET_MASK_MODE #undef TARGET_VECTORIZE_GET_MASK_MODE
#define TARGET_VECTORIZE_GET_MASK_MODE aarch64_get_mask_mode #define TARGET_VECTORIZE_GET_MASK_MODE aarch64_get_mask_mode
#undef TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE #undef TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE
......
2019-11-14 Richard Sandiford <richard.sandiford@arm.com> 2019-11-14 Richard Sandiford <richard.sandiford@arm.com>
* gcc.dg/vect/vect-outer-4f.c: Expect the test to pass on aarch64
targets.
* gcc.dg/vect/vect-outer-4g.c: Likewise.
* gcc.dg/vect/vect-outer-4k.c: Likewise.
* gcc.dg/vect/vect-outer-4l.c: Likewise.
* gfortran.dg/vect/vect-8.f90: Expect kernel 24 to be vectorized
for aarch64.
* gcc.target/aarch64/vect_mixed_sizes_1.c: New test.
* gcc.target/aarch64/vect_mixed_sizes_2.c: Likewise.
* gcc.target/aarch64/vect_mixed_sizes_3.c: Likewise.
* gcc.target/aarch64/vect_mixed_sizes_4.c: Likewise.
2019-11-14 Richard Sandiford <richard.sandiford@arm.com>
* gcc.dg/vect/vect-tail-nomask-1.c: Update expected epilogue * gcc.dg/vect/vect-tail-nomask-1.c: Update expected epilogue
vectorization message. vectorization message.
......
...@@ -65,4 +65,4 @@ int main (void) ...@@ -65,4 +65,4 @@ int main (void)
return 0; return 0;
} }
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */ /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail { ! aarch64*-*-* } } } } */
...@@ -65,4 +65,4 @@ int main (void) ...@@ -65,4 +65,4 @@ int main (void)
return 0; return 0;
} }
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */ /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail { ! aarch64*-*-* } } } } */
...@@ -65,4 +65,4 @@ int main (void) ...@@ -65,4 +65,4 @@ int main (void)
return 0; return 0;
} }
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */ /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail { ! aarch64*-*-* } } } } */
...@@ -65,4 +65,4 @@ int main (void) ...@@ -65,4 +65,4 @@ int main (void)
return 0; return 0;
} }
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */ /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail { ! aarch64*-*-* } } } }*/
/* { dg-options "-O2 -ftree-vectorize" } */
#pragma GCC target "+nosve"
#include <stdint.h>
void
f (int64_t *x, int64_t *y, int32_t *z, int n)
{
for (int i = 0; i < n; ++i)
{
x[i] += y[i];
z[i] += z[i - 2];
}
}
/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.2d,} 1 } } */
/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.2s,} 1 } } */
/* { dg-options "-O2 -ftree-vectorize" } */
#pragma GCC target "+nosve"
#include <stdint.h>
void
f (int32_t *x, int32_t *y, int16_t *z, int n)
{
for (int i = 0; i < n; ++i)
{
x[i] += y[i];
z[i] += z[i - 4];
}
}
/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.4s,} 1 } } */
/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.4h,} 1 } } */
/* { dg-final { scan-assembler-not {\tadd\tv[0-9]+\.2s,} } } */
/* { dg-options "-O2 -ftree-vectorize" } */
#pragma GCC target "+nosve"
#include <stdint.h>
void
f (int16_t *x, int16_t *y, int8_t *z, int n)
{
for (int i = 0; i < n; ++i)
{
x[i] += y[i];
z[i] += z[i - 8];
}
}
/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.8h,} 1 } } */
/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.8b,} 1 } } */
/* { dg-final { scan-assembler-not {\tadd\tv[0-9]+\.4h,} } } */
/* { dg-options "-O2 -ftree-vectorize" } */
#pragma GCC target "+nosve"
#include <stdint.h>
void
f (int64_t *x, int64_t *y, int8_t *z, int n)
{
for (int i = 0; i < n; ++i)
{
x[i] += y[i];
z[i] += z[i - 8];
}
}
/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.2d,} 4 } } */
/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.8b,} 1 } } */
...@@ -704,5 +704,6 @@ CALL track('KERNEL ') ...@@ -704,5 +704,6 @@ CALL track('KERNEL ')
RETURN RETURN
END SUBROUTINE kernel END SUBROUTINE kernel
! { dg-final { scan-tree-dump-times "vectorized 22 loops" 1 "vect" { target vect_intdouble_cvt } } } ! { dg-final { scan-tree-dump-times "vectorized 23 loops" 1 "vect" { target aarch64*-*-* } } }
! { dg-final { scan-tree-dump-times "vectorized 17 loops" 1 "vect" { target { ! vect_intdouble_cvt } } } } ! { dg-final { scan-tree-dump-times "vectorized 22 loops" 1 "vect" { target { vect_intdouble_cvt && { ! aarch64*-*-* } } } } }
! { dg-final { scan-tree-dump-times "vectorized 17 loops" 1 "vect" { target { { ! vect_intdouble_cvt } && { ! aarch64*-*-* } } } } }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment