Commit 0e350f13 by Jakub Jelinek Committed by Jakub Jelinek

re PR tree-optimization/88464 (AVX-512 vectorization of masked scatter failing…

re PR tree-optimization/88464 (AVX-512 vectorization of masked scatter failing with "not suitable for scatter store")

	PR tree-optimization/88464
	* tree-vect-stmts.c (vect_build_gather_load_calls): Handle INTEGER_TYPE
	masktype if mask is non-NULL.
	(vectorizable_load): Don't reject masked gather loads if masktype
	in the decl is INTEGER_TYPE.

	* gcc.target/i386/avx512f-pr88462-1.c: New test.
	* gcc.target/i386/avx512f-pr88462-2.c: New test.

From-SVN: r267097
parent 2139fd74
2018-12-13 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/88464
* tree-vect-stmts.c (vect_build_gather_load_calls): Handle INTEGER_TYPE
masktype if mask is non-NULL.
(vectorizable_load): Don't reject masked gather loads if masktype
in the decl is INTEGER_TYPE.
2018-12-13 Andreas Krebbel <krebbel@linux.ibm.com> 2018-12-13 Andreas Krebbel <krebbel@linux.ibm.com>
* config/s390/s390-builtins.def (s390_vec_double_s64): Map to * config/s390/s390-builtins.def (s390_vec_double_s64): Map to
2018-12-13 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/88464
* gcc.target/i386/avx512f-pr88462-1.c: New test.
* gcc.target/i386/avx512f-pr88462-2.c: New test.
2018-12-13 Nathan Sidwell <nathan@acm.org> 2018-12-13 Nathan Sidwell <nathan@acm.org>
PR c++/87531 PR c++/87531
......
/* PR tree-optimization/88462 */
/* { dg-do compile } */
/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 -mtune=skylake-avx512 -fdump-tree-vect-details" } */
/* { dg-final { scan-tree-dump-times "loop vectorized using 64 byte vectors" 3 "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops in function" 3 "vect" } } */
__attribute__((noipa)) void
f1 (double * __restrict__ a, const double * __restrict__ b, const int * __restrict__ c, int n)
{
int i;
#pragma GCC ivdep
for (i = 0; i < n; ++i)
if (a[i] > 10.0)
a[i] = b[c[i]];
}
__attribute__((noipa)) void
f2 (double * __restrict__ a, const double * __restrict__ b, const long * __restrict__ c, int n)
{
int i;
#pragma GCC ivdep
for (i = 0; i < n; ++i)
if (a[i] > 10.0)
a[i] = b[c[i]];
}
__attribute__((noipa)) void
f3 (float * __restrict__ a, const float * __restrict__ b, const int * __restrict__ c, int n)
{
int i;
#pragma GCC ivdep
for (i = 0; i < n; ++i)
if (a[i] > 10.0f)
a[i] = b[c[i]];
}
/* PR tree-optimization/88462 */
/* { dg-do run { target { avx512f } } } */
/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 -mtune=skylake-avx512" } */
#include "avx512f-check.h"
#include "avx512f-pr88462-1.c"
static void
avx512f_test (void)
{
double a[1024], b[1024];
float c[1024], f[1024];
int d[1024];
long e[1024];
int i;
for (i = 0; i < 1024; i++)
{
asm volatile ("" : "+g" (i));
a[i] = (i % 3) != 0 ? 15.0 : -5.0;
b[i] = 2 * i;
d[i] = (i % 3) ? 1023 - i : __INT_MAX__;
}
f1 (a, b, d, 1024);
for (i = 0; i < 1024; i++)
{
asm volatile ("" : "+g" (i));
if (a[i] != ((i % 3) != 0 ? (1023 - i) * 2.0 : -5.0))
abort ();
a[i] = (i % 3) != 1 ? 15.0 : -5.0;
b[i] = 3 * i;
e[i] = (i % 3) != 1 ? 1023 - i : __LONG_MAX__;
}
f2 (a, b, e, 1024);
for (i = 0; i < 1024; i++)
{
asm volatile ("" : "+g" (i));
if (a[i] != ((i % 3) != 1 ? (1023 - i) * 3.0 : -5.0))
abort ();
c[i] = (i % 3) != 2 ? 15.0f : -5.0f;
d[i] = (i % 3) != 2 ? 1023 - i : __INT_MAX__;
f[i] = 4 * i;
}
f3 (c, f, d, 1024);
for (i = 0; i < 1024; i++)
{
asm volatile ("" : "+g" (i));
if (c[i] != ((i % 3) != 2 ? (1023 - i) * 4.0f : -5.0f))
abort ();
}
}
...@@ -2647,8 +2647,13 @@ vect_build_gather_load_calls (stmt_vec_info stmt_info, ...@@ -2647,8 +2647,13 @@ vect_build_gather_load_calls (stmt_vec_info stmt_info,
tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist); tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
tree scaletype = TREE_VALUE (arglist); tree scaletype = TREE_VALUE (arglist);
tree real_masktype = masktype;
gcc_checking_assert (types_compatible_p (srctype, rettype) gcc_checking_assert (types_compatible_p (srctype, rettype)
&& (!mask || types_compatible_p (srctype, masktype))); && (!mask
|| TREE_CODE (masktype) == INTEGER_TYPE
|| types_compatible_p (srctype, masktype)));
if (mask && TREE_CODE (masktype) == INTEGER_TYPE)
masktype = build_same_sized_truth_vector_type (srctype);
tree perm_mask = NULL_TREE; tree perm_mask = NULL_TREE;
tree mask_perm_mask = NULL_TREE; tree mask_perm_mask = NULL_TREE;
...@@ -2763,9 +2768,9 @@ vect_build_gather_load_calls (stmt_vec_info stmt_info, ...@@ -2763,9 +2768,9 @@ vect_build_gather_load_calls (stmt_vec_info stmt_info,
mask_op = vec_mask; mask_op = vec_mask;
if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask))) if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
{ {
gcc_assert poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
(known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op)), poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
TYPE_VECTOR_SUBPARTS (masktype))); gcc_assert (known_eq (sub1, sub2));
var = vect_get_new_ssa_name (masktype, vect_simple_var); var = vect_get_new_ssa_name (masktype, vect_simple_var);
mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op); mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
gassign *new_stmt gassign *new_stmt
...@@ -2777,8 +2782,33 @@ vect_build_gather_load_calls (stmt_vec_info stmt_info, ...@@ -2777,8 +2782,33 @@ vect_build_gather_load_calls (stmt_vec_info stmt_info,
src_op = mask_op; src_op = mask_op;
} }
tree mask_arg = mask_op;
if (masktype != real_masktype)
{
tree utype;
if (TYPE_MODE (real_masktype) == TYPE_MODE (masktype))
utype = real_masktype;
else
utype = lang_hooks.types.type_for_mode (TYPE_MODE (masktype), 1);
var = vect_get_new_ssa_name (utype, vect_scalar_var);
mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
gassign *new_stmt
= gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
mask_arg = var;
if (!useless_type_conversion_p (real_masktype, utype))
{
gcc_assert (TYPE_PRECISION (utype)
<= TYPE_PRECISION (real_masktype));
var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
new_stmt = gimple_build_assign (var, NOP_EXPR, utype);
vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
mask_arg = var;
}
src_op = build_zero_cst (srctype);
}
gcall *new_call = gimple_build_call (gs_info->decl, 5, src_op, ptr, op, gcall *new_call = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
mask_op, scale); mask_arg, scale);
stmt_vec_info new_stmt_info; stmt_vec_info new_stmt_info;
if (!useless_type_conversion_p (vectype, rettype)) if (!useless_type_conversion_p (vectype, rettype))
...@@ -7555,20 +7585,6 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, ...@@ -7555,20 +7585,6 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
TYPE_MODE (mask_vectype), true)) TYPE_MODE (mask_vectype), true))
return false; return false;
} }
else if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
{
tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
tree masktype
= TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
if (TREE_CODE (masktype) == INTEGER_TYPE)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"masked gather with integer mask not"
" supported.");
return false;
}
}
else if (memory_access_type != VMAT_LOAD_STORE_LANES else if (memory_access_type != VMAT_LOAD_STORE_LANES
&& memory_access_type != VMAT_GATHER_SCATTER) && memory_access_type != VMAT_GATHER_SCATTER)
{ {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment