Commit 8d689cf4 by Kewen Lin

Fix PR90332 by extending half size vector mode

As PR90332 shows, the current scalar epilogue peeling for gaps
elimination requires expected vec_init optab with two half size
vector mode.  On Power, we don't support vector mode like V8QI,
so can't support optab like vec_initv16qiv8qi.  But we want to
leverage existing scalar mode like DI to init the desirable
vector mode.  This patch is to extend the existing support for
Power, as evaluated on Power9 we can see expected 1.9% speed up
on SPEC2017 525.x264_r.

As Richi suggested, add one function vector_vector_composition_type
to refactor existing related codes and also make use of it further.

Bootstrapped/regtested on powerpc64le-linux-gnu (LE) P8 and P9,
as well as x86_64-redhat-linux.

gcc/ChangeLog

2020-03-27  Kewen Lin  <linkw@gcc.gnu.org>

    PR tree-optimization/90332
    * tree-vect-stmts.c (vector_vector_composition_type): New function.
    (get_group_load_store_type): Adjust to call vector_vector_composition_type,
    extend it to construct with scalar types.
    (vectorizable_load): Likewise.
parent 66e0e23c
2020-03-27 Kewen Lin <linkw@gcc.gnu.org>
PR tree-optimization/90332
* tree-vect-stmts.c (vector_vector_composition_type): New function.
(get_group_load_store_type): Adjust to call
vector_vector_composition_type, extend it to construct with scalar
types.
(vectorizable_load): Likewise.
2020-03-27 Roman Zhuykov <zhroma@ispras.ru> 2020-03-27 Roman Zhuykov <zhroma@ispras.ru>
* ddg.c (create_ddg_dep_from_intra_loop_link): Remove assertions. * ddg.c (create_ddg_dep_from_intra_loop_link): Remove assertions.
......
...@@ -2220,6 +2220,62 @@ vect_get_store_rhs (stmt_vec_info stmt_info) ...@@ -2220,6 +2220,62 @@ vect_get_store_rhs (stmt_vec_info stmt_info)
gcc_unreachable (); gcc_unreachable ();
} }
/* Function VECTOR_VECTOR_COMPOSITION_TYPE
This function returns a vector type which can be composed with NETLS pieces,
whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
same vector size as the return vector. It checks target whether supports
pieces-size vector mode for construction firstly, if target fails to, check
pieces-size scalar mode for construction further. It returns NULL_TREE if
fails to find the available composition.
For example, for (vtype=V16QI, nelts=4), we can probably get:
- V16QI with PTYPE V4QI.
- V4SI with PTYPE SI.
- NULL_TREE. */
static tree
vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
{
gcc_assert (VECTOR_TYPE_P (vtype));
gcc_assert (known_gt (nelts, 0U));
machine_mode vmode = TYPE_MODE (vtype);
if (!VECTOR_MODE_P (vmode))
return NULL_TREE;
poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
unsigned int pbsize;
if (constant_multiple_p (vbsize, nelts, &pbsize))
{
/* First check if vec_init optab supports construction from
vector pieces directly. */
scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
machine_mode rmode;
if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
&& (convert_optab_handler (vec_init_optab, vmode, rmode)
!= CODE_FOR_nothing))
{
*ptype = build_vector_type (TREE_TYPE (vtype), inelts);
return vtype;
}
/* Otherwise check if exists an integer type of the same piece size and
if vec_init optab supports construction from it directly. */
if (int_mode_for_size (pbsize, 0).exists (&elmode)
&& related_vector_mode (vmode, elmode, nelts).exists (&rmode)
&& (convert_optab_handler (vec_init_optab, rmode, elmode)
!= CODE_FOR_nothing))
{
*ptype = build_nonstandard_integer_type (pbsize, 1);
return build_vector_type (*ptype, nelts);
}
}
return NULL_TREE;
}
/* A subroutine of get_load_store_type, with a subset of the same /* A subroutine of get_load_store_type, with a subset of the same
arguments. Handle the case where STMT_INFO is part of a grouped load arguments. Handle the case where STMT_INFO is part of a grouped load
or store. or store.
...@@ -2300,8 +2356,7 @@ get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp, ...@@ -2300,8 +2356,7 @@ get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
by simply loading half of the vector only. Usually by simply loading half of the vector only. Usually
the construction with an upper zero half will be elided. */ the construction with an upper zero half will be elided. */
dr_alignment_support alignment_support_scheme; dr_alignment_support alignment_support_scheme;
scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype)); tree half_vtype;
machine_mode vmode;
if (overrun_p if (overrun_p
&& !masked_p && !masked_p
&& (((alignment_support_scheme && (((alignment_support_scheme
...@@ -2310,12 +2365,8 @@ get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp, ...@@ -2310,12 +2365,8 @@ get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
|| alignment_support_scheme == dr_unaligned_supported) || alignment_support_scheme == dr_unaligned_supported)
&& known_eq (nunits, (group_size - gap) * 2) && known_eq (nunits, (group_size - gap) * 2)
&& known_eq (nunits, group_size) && known_eq (nunits, group_size)
&& VECTOR_MODE_P (TYPE_MODE (vectype)) && (vector_vector_composition_type (vectype, 2, &half_vtype)
&& related_vector_mode (TYPE_MODE (vectype), elmode, != NULL_TREE))
group_size - gap).exists (&vmode)
&& (convert_optab_handler (vec_init_optab,
TYPE_MODE (vectype), vmode)
!= CODE_FOR_nothing))
overrun_p = false; overrun_p = false;
if (overrun_p && !can_overrun_p) if (overrun_p && !can_overrun_p)
...@@ -8915,47 +8966,24 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, ...@@ -8915,47 +8966,24 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
{ {
if (group_size < const_nunits) if (group_size < const_nunits)
{ {
/* First check if vec_init optab supports construction from /* First check if vec_init optab supports construction from vector
vector elts directly. */ elts directly. Otherwise avoid emitting a constructor of
scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype)); vector elements by performing the loads using an integer type
machine_mode vmode; of the same size, constructing a vector of those and then
if (VECTOR_MODE_P (TYPE_MODE (vectype)) re-interpreting it as the original vector type. This avoids a
&& related_vector_mode (TYPE_MODE (vectype), elmode, huge runtime penalty due to the general inability to perform
group_size).exists (&vmode) store forwarding from smaller stores to a larger load. */
&& (convert_optab_handler (vec_init_optab, tree ptype;
TYPE_MODE (vectype), vmode) tree vtype
!= CODE_FOR_nothing)) = vector_vector_composition_type (vectype,
const_nunits / group_size,
&ptype);
if (vtype != NULL_TREE)
{ {
nloads = const_nunits / group_size; nloads = const_nunits / group_size;
lnel = group_size; lnel = group_size;
ltype = build_vector_type (TREE_TYPE (vectype), group_size); lvectype = vtype;
} ltype = ptype;
else
{
/* Otherwise avoid emitting a constructor of vector elements
by performing the loads using an integer type of the same
size, constructing a vector of those and then
re-interpreting it as the original vector type.
This avoids a huge runtime penalty due to the general
inability to perform store forwarding from smaller stores
to a larger load. */
unsigned lsize
= group_size * TYPE_PRECISION (TREE_TYPE (vectype));
unsigned int lnunits = const_nunits / group_size;
/* If we can't construct such a vector fall back to
element loads of the original vector type. */
if (int_mode_for_size (lsize, 0).exists (&elmode)
&& VECTOR_MODE_P (TYPE_MODE (vectype))
&& related_vector_mode (TYPE_MODE (vectype), elmode,
lnunits).exists (&vmode)
&& (convert_optab_handler (vec_init_optab, vmode, elmode)
!= CODE_FOR_nothing))
{
nloads = lnunits;
lnel = group_size;
ltype = build_nonstandard_integer_type (lsize, 1);
lvectype = build_vector_type (ltype, nloads);
}
} }
} }
else else
...@@ -9541,6 +9569,7 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, ...@@ -9541,6 +9569,7 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
else else
{ {
tree ltype = vectype; tree ltype = vectype;
tree new_vtype = NULL_TREE;
/* If there's no peeling for gaps but we have a gap /* If there's no peeling for gaps but we have a gap
with slp loads then load the lower half of the with slp loads then load the lower half of the
vector only. See get_group_load_store_type for vector only. See get_group_load_store_type for
...@@ -9553,10 +9582,14 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, ...@@ -9553,10 +9582,14 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
(group_size (group_size
- DR_GROUP_GAP (first_stmt_info)) * 2) - DR_GROUP_GAP (first_stmt_info)) * 2)
&& known_eq (nunits, group_size)) && known_eq (nunits, group_size))
ltype = build_vector_type (TREE_TYPE (vectype), {
(group_size tree half_vtype;
- DR_GROUP_GAP new_vtype
(first_stmt_info))); = vector_vector_composition_type (vectype, 2,
&half_vtype);
if (new_vtype != NULL_TREE)
ltype = half_vtype;
}
data_ref data_ref
= fold_build2 (MEM_REF, ltype, dataref_ptr, = fold_build2 (MEM_REF, ltype, dataref_ptr,
dataref_offset dataref_offset
...@@ -9584,10 +9617,21 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, ...@@ -9584,10 +9617,21 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem); CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
build_zero_cst (ltype)); build_zero_cst (ltype));
new_stmt gcc_assert (new_vtype != NULL_TREE);
= gimple_build_assign (vec_dest, if (new_vtype == vectype)
build_constructor new_stmt = gimple_build_assign (
(vectype, v)); vec_dest, build_constructor (vectype, v));
else
{
tree new_vname = make_ssa_name (new_vtype);
new_stmt = gimple_build_assign (
new_vname, build_constructor (new_vtype, v));
vect_finish_stmt_generation (stmt_info,
new_stmt, gsi);
new_stmt = gimple_build_assign (
vec_dest, build1 (VIEW_CONVERT_EXPR, vectype,
new_vname));
}
} }
} }
break; break;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment