Commit 9c437a10 by Richard Sandiford Committed by Richard Sandiford

Vectorise conversions between differently-sized integer vectors

This patch adds AArch64 patterns for converting between 64-bit and
128-bit integer vectors, and makes the vectoriser and expand pass
use them.

2019-11-14  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
	* tree-cfg.c (verify_gimple_assign_unary): Handle conversions
	between vector types.
	* tree-vect-stmts.c (vectorizable_conversion): Extend the
	non-widening and non-narrowing path to handle standard
	conversion codes, if the target supports them.
	* expr.c (convert_move): Try using the extend and truncate optabs
	for vectors.
	* optabs-tree.c (supportable_convert_operation): Likewise.
	* config/aarch64/iterators.md (Vnarroqw): New iterator.
	* config/aarch64/aarch64-simd.md (<optab><Vnarrowq><mode>2)
	(trunc<mode><Vnarrowq>2): New patterns.

gcc/testsuite/
	* gcc.dg/vect/bb-slp-pr69907.c: Do not expect BB vectorization
	to fail for aarch64 targets.
	* gcc.dg/vect/no-scevccp-outer-12.c: Expect the test to pass
	on aarch64 targets.
	* gcc.dg/vect/vect-double-reduc-5.c: Likewise.
	* gcc.dg/vect/vect-outer-4e.c: Likewise.
	* gcc.target/aarch64/vect_mixed_sizes_5.c: New test.
	* gcc.target/aarch64/vect_mixed_sizes_6.c: Likewise.
	* gcc.target/aarch64/vect_mixed_sizes_7.c: Likewise.
	* gcc.target/aarch64/vect_mixed_sizes_8.c: Likewise.
	* gcc.target/aarch64/vect_mixed_sizes_9.c: Likewise.
	* gcc.target/aarch64/vect_mixed_sizes_10.c: Likewise.
	* gcc.target/aarch64/vect_mixed_sizes_11.c: Likewise.
	* gcc.target/aarch64/vect_mixed_sizes_12.c: Likewise.
	* gcc.target/aarch64/vect_mixed_sizes_13.c: Likewise.

From-SVN: r278245
parent 05101d1b
2019-11-14 Richard Sandiford <richard.sandiford@arm.com> 2019-11-14 Richard Sandiford <richard.sandiford@arm.com>
* tree-cfg.c (verify_gimple_assign_unary): Handle conversions
between vector types.
* tree-vect-stmts.c (vectorizable_conversion): Extend the
non-widening and non-narrowing path to handle standard
conversion codes, if the target supports them.
* expr.c (convert_move): Try using the extend and truncate optabs
for vectors.
* optabs-tree.c (supportable_convert_operation): Likewise.
* config/aarch64/iterators.md (Vnarroqw): New iterator.
* config/aarch64/aarch64-simd.md (<optab><Vnarrowq><mode>2)
(trunc<mode><Vnarrowq>2): New patterns.
2019-11-14 Richard Sandiford <richard.sandiford@arm.com>
* tree-vect-stmts.c (vect_get_vector_types_for_stmt): Don't * tree-vect-stmts.c (vect_get_vector_types_for_stmt): Don't
require vectype and nunits_vectype to have the same size; require vectype and nunits_vectype to have the same size;
instead assert that nunits_vectype has at least as many instead assert that nunits_vectype has at least as many
...@@ -7007,3 +7007,21 @@ ...@@ -7007,3 +7007,21 @@
"pmull2\\t%0.1q, %1.2d, %2.2d" "pmull2\\t%0.1q, %1.2d, %2.2d"
[(set_attr "type" "crypto_pmull")] [(set_attr "type" "crypto_pmull")]
) )
;; Sign- or zero-extend a 64-bit integer vector to a 128-bit vector.
(define_insn "<optab><Vnarrowq><mode>2"
[(set (match_operand:VQN 0 "register_operand" "=w")
(ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))]
"TARGET_SIMD"
"<su>xtl\t%0.<Vtype>, %1.<Vntype>"
[(set_attr "type" "neon_shift_imm_long")]
)
;; Truncate a 128-bit integer vector to a 64-bit vector.
(define_insn "trunc<mode><Vnarrowq>2"
[(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
(truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
"TARGET_SIMD"
"xtn\t%0.<Vntype>, %1.<Vtype>"
[(set_attr "type" "neon_shift_imm_narrow_q")]
)
...@@ -927,6 +927,8 @@ ...@@ -927,6 +927,8 @@
(V2DI "V2SI") (V2DI "V2SI")
(DI "SI") (SI "HI") (DI "SI") (SI "HI")
(HI "QI")]) (HI "QI")])
(define_mode_attr Vnarrowq [(V8HI "v8qi") (V4SI "v4hi")
(V2DI "v2si")])
;; Narrowed quad-modes for VQN (Used for XTN2). ;; Narrowed quad-modes for VQN (Used for XTN2).
(define_mode_attr VNARROWQ2 [(V8HI "V16QI") (V4SI "V8HI") (define_mode_attr VNARROWQ2 [(V8HI "V16QI") (V4SI "V8HI")
......
...@@ -250,6 +250,31 @@ convert_move (rtx to, rtx from, int unsignedp) ...@@ -250,6 +250,31 @@ convert_move (rtx to, rtx from, int unsignedp)
if (VECTOR_MODE_P (to_mode) || VECTOR_MODE_P (from_mode)) if (VECTOR_MODE_P (to_mode) || VECTOR_MODE_P (from_mode))
{ {
if (GET_MODE_UNIT_PRECISION (to_mode)
> GET_MODE_UNIT_PRECISION (from_mode))
{
optab op = unsignedp ? zext_optab : sext_optab;
insn_code icode = convert_optab_handler (op, to_mode, from_mode);
if (icode != CODE_FOR_nothing)
{
emit_unop_insn (icode, to, from,
unsignedp ? ZERO_EXTEND : SIGN_EXTEND);
return;
}
}
if (GET_MODE_UNIT_PRECISION (to_mode)
< GET_MODE_UNIT_PRECISION (from_mode))
{
insn_code icode = convert_optab_handler (trunc_optab,
to_mode, from_mode);
if (icode != CODE_FOR_nothing)
{
emit_unop_insn (icode, to, from, TRUNCATE);
return;
}
}
gcc_assert (known_eq (GET_MODE_BITSIZE (from_mode), gcc_assert (known_eq (GET_MODE_BITSIZE (from_mode),
GET_MODE_BITSIZE (to_mode))); GET_MODE_BITSIZE (to_mode)));
......
...@@ -303,6 +303,20 @@ supportable_convert_operation (enum tree_code code, ...@@ -303,6 +303,20 @@ supportable_convert_operation (enum tree_code code,
return true; return true;
} }
if (GET_MODE_UNIT_PRECISION (m1) > GET_MODE_UNIT_PRECISION (m2)
&& can_extend_p (m1, m2, TYPE_UNSIGNED (vectype_in)))
{
*code1 = code;
return true;
}
if (GET_MODE_UNIT_PRECISION (m1) < GET_MODE_UNIT_PRECISION (m2)
&& convert_optab_handler (trunc_optab, m1, m2) != CODE_FOR_nothing)
{
*code1 = code;
return true;
}
/* Now check for builtin. */ /* Now check for builtin. */
if (targetm.vectorize.builtin_conversion if (targetm.vectorize.builtin_conversion
&& targetm.vectorize.builtin_conversion (code, vectype_out, vectype_in)) && targetm.vectorize.builtin_conversion (code, vectype_out, vectype_in))
......
2019-11-14 Richard Sandiford <richard.sandiford@arm.com> 2019-11-14 Richard Sandiford <richard.sandiford@arm.com>
* gcc.dg/vect/bb-slp-pr69907.c: Do not expect BB vectorization
to fail for aarch64 targets.
* gcc.dg/vect/no-scevccp-outer-12.c: Expect the test to pass
on aarch64 targets.
* gcc.dg/vect/vect-double-reduc-5.c: Likewise.
* gcc.dg/vect/vect-outer-4e.c: Likewise.
* gcc.target/aarch64/vect_mixed_sizes_5.c: New test.
* gcc.target/aarch64/vect_mixed_sizes_6.c: Likewise.
* gcc.target/aarch64/vect_mixed_sizes_7.c: Likewise.
* gcc.target/aarch64/vect_mixed_sizes_8.c: Likewise.
* gcc.target/aarch64/vect_mixed_sizes_9.c: Likewise.
* gcc.target/aarch64/vect_mixed_sizes_10.c: Likewise.
* gcc.target/aarch64/vect_mixed_sizes_11.c: Likewise.
* gcc.target/aarch64/vect_mixed_sizes_12.c: Likewise.
* gcc.target/aarch64/vect_mixed_sizes_13.c: Likewise.
2019-11-14 Richard Sandiford <richard.sandiford@arm.com>
* gcc.dg/vect/vect-outer-4f.c: Expect the test to pass on aarch64 * gcc.dg/vect/vect-outer-4f.c: Expect the test to pass on aarch64
targets. targets.
* gcc.dg/vect/vect-outer-4g.c: Likewise. * gcc.dg/vect/vect-outer-4g.c: Likewise.
......
...@@ -18,5 +18,6 @@ void foo(unsigned *p1, unsigned short *p2) ...@@ -18,5 +18,6 @@ void foo(unsigned *p1, unsigned short *p2)
} }
/* Disable for SVE because for long or variable-length vectors we don't /* Disable for SVE because for long or variable-length vectors we don't
get an unrolled epilogue loop. */ get an unrolled epilogue loop. Also disable for AArch64 Advanced SIMD,
/* { dg-final { scan-tree-dump "BB vectorization with gaps at the end of a load is not supported" "slp1" { target { ! aarch64_sve } } } } */ because there we can vectorize the epilogue using mixed vector sizes. */
/* { dg-final { scan-tree-dump "BB vectorization with gaps at the end of a load is not supported" "slp1" { target { ! aarch64*-*-* } } } } */
...@@ -46,4 +46,4 @@ int main (void) ...@@ -46,4 +46,4 @@ int main (void)
} }
/* Until we support multiple types in the inner loop */ /* Until we support multiple types in the inner loop */
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { xfail *-*-* } } } */ /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { xfail { ! aarch64*-*-* } } } } */
...@@ -52,5 +52,5 @@ int main () ...@@ -52,5 +52,5 @@ int main ()
/* Vectorization of loops with multiple types and double reduction is not /* Vectorization of loops with multiple types and double reduction is not
supported yet. */ supported yet. */
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */ /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail { ! aarch64*-*-* } } } } */
...@@ -23,4 +23,4 @@ foo (){ ...@@ -23,4 +23,4 @@ foo (){
return; return;
} }
/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */ /* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail { ! aarch64*-*-* } } } } */
/* { dg-options "-O2 -ftree-vectorize" } */
#pragma GCC target "+nosve"
#include <stdint.h>
void
f (int16_t *x, int16_t *y, uint8_t *z, int n)
{
for (int i = 0; i < n; ++i)
{
x[i] = z[i];
y[i] += y[i - 8];
}
}
/* { dg-final { scan-assembler-times {\tuxtl\tv[0-9]+\.8h, v[0-9]+\.8b\n} 1 } } */
/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.8h,} 1 } } */
/* { dg-options "-O2 -ftree-vectorize" } */
#pragma GCC target "+nosve"
#include <stdint.h>
void
f (int32_t *x, int64_t *y, int64_t *z, int n)
{
for (int i = 0; i < n; ++i)
{
x[i] = z[i];
y[i] += y[i - 2];
}
}
/* { dg-final { scan-assembler-times {\txtn\tv[0-9]+\.2s, v[0-9]+\.2d\n} 1 } } */
/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.2d,} 1 } } */
/* { dg-options "-O2 -ftree-vectorize" } */
#pragma GCC target "+nosve"
#include <stdint.h>
void
f (int16_t *x, int32_t *y, int32_t *z, int n)
{
for (int i = 0; i < n; ++i)
{
x[i] = z[i];
y[i] += y[i - 4];
}
}
/* { dg-final { scan-assembler-times {\txtn\tv[0-9]+\.4h, v[0-9]+\.4s\n} 1 } } */
/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.4s,} 1 } } */
/* { dg-options "-O2 -ftree-vectorize" } */
#pragma GCC target "+nosve"
#include <stdint.h>
void
f (int8_t *x, int16_t *y, int16_t *z, int n)
{
for (int i = 0; i < n; ++i)
{
x[i] = z[i];
y[i] += y[i - 8];
}
}
/* { dg-final { scan-assembler-times {\txtn\tv[0-9]+\.8b, v[0-9]+\.8h\n} 1 } } */
/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.8h,} 1 } } */
/* { dg-options "-O2 -ftree-vectorize" } */
#pragma GCC target "+nosve"
#include <stdint.h>
void
f (int64_t *x, int64_t *y, int32_t *z, int n)
{
for (int i = 0; i < n; ++i)
{
x[i] = z[i];
y[i] += y[i - 2];
}
}
/* { dg-final { scan-assembler-times {\tsxtl\tv[0-9]+\.2d, v[0-9]+\.2s\n} 1 } } */
/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.2d,} 1 } } */
/* { dg-options "-O2 -ftree-vectorize" } */
#pragma GCC target "+nosve"
#include <stdint.h>
void
f (int32_t *x, int32_t *y, int16_t *z, int n)
{
for (int i = 0; i < n; ++i)
{
x[i] = z[i];
y[i] += y[i - 4];
}
}
/* { dg-final { scan-assembler-times {\tsxtl\tv[0-9]+\.4s, v[0-9]+\.4h\n} 1 } } */
/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.4s,} 1 } } */
/* { dg-options "-O2 -ftree-vectorize" } */
#pragma GCC target "+nosve"
#include <stdint.h>
void
f (int16_t *x, int16_t *y, int8_t *z, int n)
{
for (int i = 0; i < n; ++i)
{
x[i] = z[i];
y[i] += y[i - 8];
}
}
/* { dg-final { scan-assembler-times {\tsxtl\tv[0-9]+\.8h, v[0-9]+\.8b\n} 1 } } */
/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.8h,} 1 } } */
/* { dg-options "-O2 -ftree-vectorize" } */
#pragma GCC target "+nosve"
#include <stdint.h>
void
f (int64_t *x, int64_t *y, uint32_t *z, int n)
{
for (int i = 0; i < n; ++i)
{
x[i] = z[i];
y[i] += y[i - 2];
}
}
/* { dg-final { scan-assembler-times {\tuxtl\tv[0-9]+\.2d, v[0-9]+\.2s\n} 1 } } */
/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.2d,} 1 } } */
/* { dg-options "-O2 -ftree-vectorize" } */
#pragma GCC target "+nosve"
#include <stdint.h>
void
f (int32_t *x, int32_t *y, uint16_t *z, int n)
{
for (int i = 0; i < n; ++i)
{
x[i] = z[i];
y[i] += y[i - 4];
}
}
/* { dg-final { scan-assembler-times {\tuxtl\tv[0-9]+\.4s, v[0-9]+\.4h\n} 1 } } */
/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.4s,} 1 } } */
...@@ -3553,6 +3553,24 @@ verify_gimple_assign_unary (gassign *stmt) ...@@ -3553,6 +3553,24 @@ verify_gimple_assign_unary (gassign *stmt)
{ {
CASE_CONVERT: CASE_CONVERT:
{ {
/* Allow conversions between vectors with the same number of elements,
provided that the conversion is OK for the element types too. */
if (VECTOR_TYPE_P (lhs_type)
&& VECTOR_TYPE_P (rhs1_type)
&& known_eq (TYPE_VECTOR_SUBPARTS (lhs_type),
TYPE_VECTOR_SUBPARTS (rhs1_type)))
{
lhs_type = TREE_TYPE (lhs_type);
rhs1_type = TREE_TYPE (rhs1_type);
}
else if (VECTOR_TYPE_P (lhs_type) || VECTOR_TYPE_P (rhs1_type))
{
error ("invalid vector types in nop conversion");
debug_generic_expr (lhs_type);
debug_generic_expr (rhs1_type);
return true;
}
/* Allow conversions from pointer type to integral type only if /* Allow conversions from pointer type to integral type only if
there is no sign or zero extension involved. there is no sign or zero extension involved.
For targets were the precision of ptrofftype doesn't match that For targets were the precision of ptrofftype doesn't match that
......
...@@ -4861,7 +4861,9 @@ vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, ...@@ -4861,7 +4861,9 @@ vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
switch (modifier) switch (modifier)
{ {
case NONE: case NONE:
if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR) if (code != FIX_TRUNC_EXPR
&& code != FLOAT_EXPR
&& !CONVERT_EXPR_CODE_P (code))
return false; return false;
if (supportable_convert_operation (code, vectype_out, vectype_in, if (supportable_convert_operation (code, vectype_out, vectype_in,
&decl1, &code1)) &decl1, &code1))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment