Commit 7d75abc8 by Michael Matz Committed by Michael Matz

re PR tree-optimization/18437 (vectorizer failed for matrix multiplication)

	PR tree-optimization/18437

	* tree-vectorizer.h (_stmt_vec_info.stride_load_p): New member.
	(STMT_VINFO_STRIDE_LOAD_P): New accessor.
	(vect_check_strided_load): Declare.
	* tree-vect-data-refs.c (vect_check_strided_load): New function.
	(vect_analyze_data_refs): Use it to accept strided loads.
	* tree-vect-stmts.c (vectorizable_load): Ditto and handle them.

testsuite/
	* gfortran.dg/vect/rnflow-trs2a2.f90: New test.

From-SVN: r186530
parent efa26eaa
2012-04-17 Michael Matz <matz@suse.de>
PR tree-optimization/18437
* tree-vectorizer.h (_stmt_vec_info.stride_load_p): New member.
(STMT_VINFO_STRIDE_LOAD_P): New accessor.
(vect_check_strided_load): Declare.
* tree-vect-data-refs.c (vect_check_strided_load): New function.
(vect_analyze_data_refs): Use it to accept strided loads.
* tree-vect-stmts.c (vectorizable_load): Ditto and handle them.
2012-04-17 Richard Guenther <rguenther@suse.de> 2012-04-17 Richard Guenther <rguenther@suse.de>
PR middle-end/53011 PR middle-end/53011
......
2012-04-17 Michael Matz <matz@suse.de>
PR tree-optimization/18437
* gfortran.dg/vect/rnflow-trs2a2.f90: New test.
2012-04-17 Richard Guenther <rguenther@suse.de> 2012-04-17 Richard Guenther <rguenther@suse.de>
PR middle-end/53011 PR middle-end/53011
......
! { dg-do compile }
! { dg-require-effective-target vect_double }
function trs2a2 (j, k, u, d, m)
! matrice de transition intermediaire, partant de k sans descendre
! sous j. R = IjU(I-Ik)DIj, avec Ii = deltajj, j >= i.
! alternative: trs2a2 = 0
! trs2a2 (j:k-1, j:k-1) = matmul (utrsft (j:k-1,j:k-1),
! dtrsft (j:k-1,j:k-1))
!
real, dimension (1:m,1:m) :: trs2a2 ! resultat
real, dimension (1:m,1:m) :: u, d ! matrices utrsft, dtrsft
integer, intent (in) :: j, k, m ! niveaux vallee pic
!
!##### following line replaced by Prentice to make less system dependent
! real (kind = kind (1.0d0)) :: dtmp
real (kind = selected_real_kind (10,50)) :: dtmp
!
trs2a2 = 0.0
do iclw1 = j, k - 1
do iclw2 = j, k - 1
dtmp = 0.0d0
do iclww = j, k - 1
dtmp = dtmp + u (iclw1, iclww) * d (iclww, iclw2)
enddo
trs2a2 (iclw1, iclw2) = dtmp
enddo
enddo
return
end function trs2a2
! { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } }
! { dg-final { cleanup-tree-dump "vect" } }
...@@ -2690,6 +2690,53 @@ vect_check_gather (gimple stmt, loop_vec_info loop_vinfo, tree *basep, ...@@ -2690,6 +2690,53 @@ vect_check_gather (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
return decl; return decl;
} }
/* Check wether a non-affine load in STMT (being in the loop referred to
in LOOP_VINFO) is suitable for handling as strided load. That is the case
if its address is a simple induction variable. If so return the base
of that induction variable in *BASEP and the (loop-invariant) step
in *STEPP, both only when that pointer is non-zero.
This handles ARRAY_REFs (with variant index) and MEM_REFs (with variant
base pointer) only. */
bool
vect_check_strided_load (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
tree *stepp)
{
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
tree base, off;
affine_iv iv;
base = DR_REF (dr);
if (TREE_CODE (base) == ARRAY_REF)
{
off = TREE_OPERAND (base, 1);
base = TREE_OPERAND (base, 0);
}
else if (TREE_CODE (base) == MEM_REF)
{
off = TREE_OPERAND (base, 0);
base = TREE_OPERAND (base, 1);
}
else
return false;
if (TREE_CODE (off) != SSA_NAME)
return false;
if (!expr_invariant_in_loop_p (loop, base)
|| !simple_iv (loop, loop_containing_stmt (stmt), off, &iv, true))
return false;
if (basep)
*basep = iv.base;
if (stepp)
*stepp = iv.step;
return true;
}
/* Function vect_analyze_data_refs. /* Function vect_analyze_data_refs.
...@@ -3090,16 +3137,21 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo, ...@@ -3090,16 +3137,21 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo,
VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo); VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
struct data_dependence_relation *ddr, *newddr; struct data_dependence_relation *ddr, *newddr;
bool bad = false; bool bad = false;
bool strided_load = false;
tree off; tree off;
VEC (loop_p, heap) *nest = LOOP_VINFO_LOOP_NEST (loop_vinfo); VEC (loop_p, heap) *nest = LOOP_VINFO_LOOP_NEST (loop_vinfo);
if (!vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL) strided_load = vect_check_strided_load (stmt, loop_vinfo, NULL, NULL);
|| get_vectype_for_scalar_type (TREE_TYPE (off)) == NULL_TREE) gather = 0 != vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
if (gather
&& get_vectype_for_scalar_type (TREE_TYPE (off)) == NULL_TREE)
gather = false;
if (!gather && !strided_load)
{ {
if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
{ {
fprintf (vect_dump, fprintf (vect_dump,
"not vectorized: not suitable for gather "); "not vectorized: not suitable for gather/strided load ");
print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
} }
return false; return false;
...@@ -3152,13 +3204,16 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo, ...@@ -3152,13 +3204,16 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo,
{ {
fprintf (vect_dump, fprintf (vect_dump,
"not vectorized: data dependence conflict" "not vectorized: data dependence conflict"
" prevents gather"); " prevents gather/strided load");
print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
} }
return false; return false;
} }
if (gather)
STMT_VINFO_GATHER_P (stmt_info) = true; STMT_VINFO_GATHER_P (stmt_info) = true;
else if (strided_load)
STMT_VINFO_STRIDE_LOAD_P (stmt_info) = true;
} }
} }
......
...@@ -4224,6 +4224,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, ...@@ -4224,6 +4224,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
tree aggr_type; tree aggr_type;
tree gather_base = NULL_TREE, gather_off = NULL_TREE; tree gather_base = NULL_TREE, gather_off = NULL_TREE;
tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE; tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
tree stride_base, stride_step;
int gather_scale = 1; int gather_scale = 1;
enum vect_def_type gather_dt = vect_unknown_def_type; enum vect_def_type gather_dt = vect_unknown_def_type;
...@@ -4357,6 +4358,10 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, ...@@ -4357,6 +4358,10 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
return false; return false;
} }
} }
else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
{
vect_check_strided_load (stmt, loop_vinfo, &stride_base, &stride_step);
}
if (!vec_stmt) /* transformation not required. */ if (!vec_stmt) /* transformation not required. */
{ {
...@@ -4524,6 +4529,104 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, ...@@ -4524,6 +4529,104 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
} }
return true; return true;
} }
else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
{
gimple_stmt_iterator incr_gsi;
bool insert_after;
gimple incr;
tree offvar;
tree ref = DR_REF (dr);
tree ivstep;
tree running_off;
VEC(constructor_elt, gc) *v = NULL;
gimple_seq stmts = NULL;
gcc_assert (stride_base && stride_step);
/* For a load with loop-invariant (but other than power-of-2)
stride (i.e. not a grouped access) like so:
for (i = 0; i < n; i += stride)
... = array[i];
we generate a new induction variable and new accesses to
form a new vector (or vectors, depending on ncopies):
for (j = 0; ; j += VF*stride)
tmp1 = array[j];
tmp2 = array[j + stride];
...
vectemp = {tmp1, tmp2, ...}
*/
ivstep = stride_step;
ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
build_int_cst (TREE_TYPE (ivstep), vf));
standard_iv_increment_position (loop, &incr_gsi, &insert_after);
create_iv (stride_base, ivstep, NULL,
loop, &incr_gsi, insert_after,
&offvar, NULL);
incr = gsi_stmt (incr_gsi);
set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
if (stmts)
gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
prev_stmt_info = NULL;
running_off = offvar;
for (j = 0; j < ncopies; j++)
{
tree vec_inv;
v = VEC_alloc (constructor_elt, gc, nunits);
for (i = 0; i < nunits; i++)
{
tree newref, newoff;
gimple incr;
if (TREE_CODE (ref) == ARRAY_REF)
newref = build4 (ARRAY_REF, TREE_TYPE (ref),
unshare_expr (TREE_OPERAND (ref, 0)),
running_off,
NULL_TREE, NULL_TREE);
else
newref = build2 (MEM_REF, TREE_TYPE (ref),
running_off,
TREE_OPERAND (ref, 1));
newref = force_gimple_operand_gsi (gsi, newref, true,
NULL_TREE, true,
GSI_SAME_STMT);
CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
newoff = SSA_NAME_VAR (running_off);
if (POINTER_TYPE_P (TREE_TYPE (newoff)))
incr = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, newoff,
running_off, stride_step);
else
incr = gimple_build_assign_with_ops (PLUS_EXPR, newoff,
running_off, stride_step);
newoff = make_ssa_name (newoff, incr);
gimple_assign_set_lhs (incr, newoff);
vect_finish_stmt_generation (stmt, incr, gsi);
running_off = newoff;
}
vec_inv = build_constructor (vectype, v);
new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
new_stmt = SSA_NAME_DEF_STMT (new_temp);
mark_symbols_for_renaming (new_stmt);
if (j == 0)
STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
else
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
prev_stmt_info = vinfo_for_stmt (new_stmt);
}
return true;
}
if (grouped_load) if (grouped_load)
{ {
......
...@@ -545,6 +545,7 @@ typedef struct _stmt_vec_info { ...@@ -545,6 +545,7 @@ typedef struct _stmt_vec_info {
/* For loads only, true if this is a gather load. */ /* For loads only, true if this is a gather load. */
bool gather_p; bool gather_p;
bool stride_load_p;
} *stmt_vec_info; } *stmt_vec_info;
/* Access Functions. */ /* Access Functions. */
...@@ -559,6 +560,7 @@ typedef struct _stmt_vec_info { ...@@ -559,6 +560,7 @@ typedef struct _stmt_vec_info {
#define STMT_VINFO_VECTORIZABLE(S) (S)->vectorizable #define STMT_VINFO_VECTORIZABLE(S) (S)->vectorizable
#define STMT_VINFO_DATA_REF(S) (S)->data_ref_info #define STMT_VINFO_DATA_REF(S) (S)->data_ref_info
#define STMT_VINFO_GATHER_P(S) (S)->gather_p #define STMT_VINFO_GATHER_P(S) (S)->gather_p
#define STMT_VINFO_STRIDE_LOAD_P(S) (S)->stride_load_p
#define STMT_VINFO_DR_BASE_ADDRESS(S) (S)->dr_base_address #define STMT_VINFO_DR_BASE_ADDRESS(S) (S)->dr_base_address
#define STMT_VINFO_DR_INIT(S) (S)->dr_init #define STMT_VINFO_DR_INIT(S) (S)->dr_init
...@@ -875,6 +877,7 @@ extern bool vect_analyze_data_ref_accesses (loop_vec_info, bb_vec_info); ...@@ -875,6 +877,7 @@ extern bool vect_analyze_data_ref_accesses (loop_vec_info, bb_vec_info);
extern bool vect_prune_runtime_alias_test_list (loop_vec_info); extern bool vect_prune_runtime_alias_test_list (loop_vec_info);
extern tree vect_check_gather (gimple, loop_vec_info, tree *, tree *, extern tree vect_check_gather (gimple, loop_vec_info, tree *, tree *,
int *); int *);
extern bool vect_check_strided_load (gimple, loop_vec_info, tree *, tree *);
extern bool vect_analyze_data_refs (loop_vec_info, bb_vec_info, int *); extern bool vect_analyze_data_refs (loop_vec_info, bb_vec_info, int *);
extern tree vect_create_data_ref_ptr (gimple, tree, struct loop *, tree, extern tree vect_create_data_ref_ptr (gimple, tree, struct loop *, tree,
tree *, gimple_stmt_iterator *, tree *, gimple_stmt_iterator *,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment