Commit 18ba3ce7 by Michael Matz Committed by Michael Matz

re PR tree-optimization/43432 (Missed vectorization: "complicated access…

re PR tree-optimization/43432 (Missed vectorization: "complicated access pattern" for increasing and decreasing data indexing)

	PR tree-optimization/43432
	* tree-vect-data-refs.c (vect_analyze_data_ref_access):
	Accept backwards consecutive accesses.
	(vect_create_data_ref_ptr): If step is negative generate
	decreasing IVs.
	* tree-vect-stmts.c (vectorizable_store): Reject negative steps.
	(perm_mask_for_reverse, reverse_vec_elements): New functions.
	(vectorizable_load): Handle loads with negative steps when easily
	possible.

testsuite/
	PR tree-optimization/43432
	* lib/target-supports.exp (check_effective_target_vect_perm_byte,
	check_effective_target_vect_perm_short): New predicates.
	(check_effective_target_vect_perm): Include x86_64.
	* gcc.dg/vect/pr43432.c: New test.
	* gcc.dg/vect/vect-114.c: Adjust.
	* gcc.dg/vect/vect-15.c: Ditto.
	* gcc.dg/vect/slp-perm-8.c: Use new predicate.
	* gcc.dg/vect/slp-perm-9.c: Ditto.

From-SVN: r164367
parent 4c588abf
2010-09-17 Michael Matz <matz@suse.de>
PR tree-optimization/43432
* tree-vect-data-refs.c (vect_analyze_data_ref_access):
Accept backwards consecutive accesses.
(vect_create_data_ref_ptr): If step is negative generate
decreasing IVs.
* tree-vect-stmts.c (vectorizable_store): Reject negative steps.
(perm_mask_for_reverse, reverse_vec_elements): New functions.
(vectorizable_load): Handle loads with negative steps when easily
possible.
2010-09-03 Jan Hubicka <jh@suse.cz> 2010-09-03 Jan Hubicka <jh@suse.cz>
* lto-cgraph.c (compute_ltrans_boundary): Use const_value_known. * lto-cgraph.c (compute_ltrans_boundary): Use const_value_known.
......
2010-09-17 Michael Matz <matz@suse.de>
PR tree-optimization/43432
* lib/target-supports.exp (check_effective_target_vect_perm_byte,
check_effective_target_vect_perm_short): New predicates.
(check_effective_target_vect_perm): Include x86_64.
* gcc.dg/vect/pr43432.c: New test.
* gcc.dg/vect/vect-114.c: Adjust.
* gcc.dg/vect/vect-15.c: Ditto.
* gcc.dg/vect/slp-perm-8.c: Use new predicate.
* gcc.dg/vect/slp-perm-9.c: Ditto.
2010-09-17 Nicola Pero <nicola.pero@meta-innovation.com> 2010-09-17 Nicola Pero <nicola.pero@meta-innovation.com>
PR testsuite/45692 PR testsuite/45692
......
/* { dg-do compile } */
/* { dg-require-effective-target vect_float } */
/* { dg-options "-O3 -ffast-math -fdump-tree-vect-details" } */
void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1,
int len){
int i;
src1 += len-1;
for(i=0; i<len; i++)
dst[i] = src0[i] * src1[-i];
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_perm } } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -53,7 +53,7 @@ int main (int argc, const char* argv[]) ...@@ -53,7 +53,7 @@ int main (int argc, const char* argv[])
return 0; return 0;
} }
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_perm } } } */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_perm_byte } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm } } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_perm_byte } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -54,7 +54,7 @@ int main (int argc, const char* argv[]) ...@@ -54,7 +54,7 @@ int main (int argc, const char* argv[])
} }
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { scan-tree-dump-times "permutation requires at least three vectors" 1 "vect" { target vect_perm } } } */ /* { dg-final { scan-tree-dump-times "permutation requires at least three vectors" 1 "vect" { target vect_perm_short } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" } } */ /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -34,6 +34,7 @@ int main (void) ...@@ -34,6 +34,7 @@ int main (void)
return main1 (); return main1 ();
} }
/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" } } */ /* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { target { ! vect_perm } } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_perm } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -35,5 +35,5 @@ int main (void) ...@@ -35,5 +35,5 @@ int main (void)
return main1 (); return main1 ();
} }
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_perm } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -2425,7 +2425,8 @@ proc check_effective_target_vect_perm { } { ...@@ -2425,7 +2425,8 @@ proc check_effective_target_vect_perm { } {
} else { } else {
set et_vect_perm_saved 0 set et_vect_perm_saved 0
if { [istarget powerpc*-*-*] if { [istarget powerpc*-*-*]
|| [istarget spu-*-*] } { || [istarget spu-*-*]
|| [istarget x86_64-*-*] } {
set et_vect_perm_saved 1 set et_vect_perm_saved 1
} }
} }
...@@ -2433,6 +2434,48 @@ proc check_effective_target_vect_perm { } { ...@@ -2433,6 +2434,48 @@ proc check_effective_target_vect_perm { } {
return $et_vect_perm_saved return $et_vect_perm_saved
} }
# Return 1 if the target plus current options supports vector permutation
# on byte-sized elements, 0 otherwise.
#
# This won't change for different subtargets so cache the result.
proc check_effective_target_vect_perm_byte { } {
global et_vect_perm_byte
if [info exists et_vect_perm_byte_saved] {
verbose "check_effective_target_vect_perm_byte: using cached result" 2
} else {
set et_vect_perm_byte_saved 0
if { [istarget powerpc*-*-*]
|| [istarget spu-*-*] } {
set et_vect_perm_byte_saved 1
}
}
verbose "check_effective_target_vect_perm_byte: returning $et_vect_perm_byte_saved" 2
return $et_vect_perm_byte_saved
}
# Return 1 if the target plus current options supports vector permutation
# on short-sized elements, 0 otherwise.
#
# This won't change for different subtargets so cache the result.
proc check_effective_target_vect_perm_short { } {
global et_vect_perm_short
if [info exists et_vect_perm_short_saved] {
verbose "check_effective_target_vect_perm_short: using cached result" 2
} else {
set et_vect_perm_short_saved 0
if { [istarget powerpc*-*-*]
|| [istarget spu-*-*] } {
set et_vect_perm_short_saved 1
}
}
verbose "check_effective_target_vect_perm_short: returning $et_vect_perm_short_saved" 2
return $et_vect_perm_short_saved
}
# Return 1 if the target plus current options supports a vector # Return 1 if the target plus current options supports a vector
# widening summation of *short* args into *int* result, 0 otherwise. # widening summation of *short* args into *int* result, 0 otherwise.
# #
......
...@@ -2287,7 +2287,9 @@ vect_analyze_data_ref_access (struct data_reference *dr) ...@@ -2287,7 +2287,9 @@ vect_analyze_data_ref_access (struct data_reference *dr)
} }
/* Consecutive? */ /* Consecutive? */
if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))) if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
|| (dr_step < 0
&& !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
{ {
/* Mark that it is not interleaving. */ /* Mark that it is not interleaving. */
DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL; DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
...@@ -2970,6 +2972,7 @@ vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop, ...@@ -2970,6 +2972,7 @@ vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
tree vptr; tree vptr;
gimple_stmt_iterator incr_gsi; gimple_stmt_iterator incr_gsi;
bool insert_after; bool insert_after;
bool negative;
tree indx_before_incr, indx_after_incr; tree indx_before_incr, indx_after_incr;
gimple incr; gimple incr;
tree step; tree step;
...@@ -3002,6 +3005,7 @@ vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop, ...@@ -3002,6 +3005,7 @@ vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
*inv_p = true; *inv_p = true;
else else
*inv_p = false; *inv_p = false;
negative = tree_int_cst_compare (step, size_zero_node) < 0;
/* Create an expression for the first address accessed by this load /* Create an expression for the first address accessed by this load
in LOOP. */ in LOOP. */
...@@ -3160,6 +3164,8 @@ vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop, ...@@ -3160,6 +3164,8 @@ vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
LOOP is zero. In this case the step here is also zero. */ LOOP is zero. In this case the step here is also zero. */
if (*inv_p) if (*inv_p)
step = size_zero_node; step = size_zero_node;
else if (negative)
step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
standard_iv_increment_position (loop, &incr_gsi, &insert_after); standard_iv_increment_position (loop, &incr_gsi, &insert_after);
......
...@@ -3145,6 +3145,13 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, ...@@ -3145,6 +3145,13 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
if (!STMT_VINFO_DATA_REF (stmt_info)) if (!STMT_VINFO_DATA_REF (stmt_info))
return false; return false;
if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "negative step for store.");
return false;
}
if (STMT_VINFO_STRIDED_ACCESS (stmt_info)) if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
{ {
strided_store = true; strided_store = true;
...@@ -3425,6 +3432,68 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, ...@@ -3425,6 +3432,68 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
return true; return true;
} }
/* Given a vector type VECTYPE returns a builtin DECL to be used
for vector permutation and stores a mask into *MASK that implements
reversal of the vector elements. If that is impossible to do
returns NULL (and *MASK is unchanged). */
static tree
perm_mask_for_reverse (tree vectype, tree *mask)
{
tree builtin_decl;
tree mask_element_type, mask_type;
tree mask_vec = NULL;
int i;
int nunits;
if (!targetm.vectorize.builtin_vec_perm)
return NULL;
builtin_decl = targetm.vectorize.builtin_vec_perm (vectype,
&mask_element_type);
if (!builtin_decl || !mask_element_type)
return NULL;
mask_type = get_vectype_for_scalar_type (mask_element_type);
nunits = TYPE_VECTOR_SUBPARTS (vectype);
if (TYPE_VECTOR_SUBPARTS (vectype) != TYPE_VECTOR_SUBPARTS (mask_type))
return NULL;
for (i = 0; i < nunits; i++)
mask_vec = tree_cons (NULL, build_int_cst (mask_element_type, i), mask_vec);
mask_vec = build_vector (mask_type, mask_vec);
if (!targetm.vectorize.builtin_vec_perm_ok (vectype, mask_vec))
return NULL;
if (mask)
*mask = mask_vec;
return builtin_decl;
}
/* Given a vector variable X, that was generated for the scalar LHS of
STMT, generate instructions to reverse the vector elements of X,
insert them a *GSI and return the permuted vector variable. */
static tree
reverse_vec_elements (tree x, gimple stmt, gimple_stmt_iterator *gsi)
{
tree vectype = TREE_TYPE (x);
tree mask_vec, builtin_decl;
tree perm_dest, data_ref;
gimple perm_stmt;
builtin_decl = perm_mask_for_reverse (vectype, &mask_vec);
perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
/* Generate the permute statement. */
perm_stmt = gimple_build_call (builtin_decl, 3, x, x, mask_vec);
data_ref = make_ssa_name (perm_dest, perm_stmt);
gimple_call_set_lhs (perm_stmt, data_ref);
vect_finish_stmt_generation (stmt, perm_stmt, gsi);
return data_ref;
}
/* vectorizable_load. /* vectorizable_load.
Check if STMT reads a non scalar data-ref (array/pointer/structure) that Check if STMT reads a non scalar data-ref (array/pointer/structure) that
...@@ -3467,6 +3536,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, ...@@ -3467,6 +3536,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
gimple first_stmt; gimple first_stmt;
tree scalar_type; tree scalar_type;
bool inv_p; bool inv_p;
bool negative;
bool compute_in_loop = false; bool compute_in_loop = false;
struct loop *at_loop; struct loop *at_loop;
int vec_num; int vec_num;
...@@ -3529,6 +3599,14 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, ...@@ -3529,6 +3599,14 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
if (!STMT_VINFO_DATA_REF (stmt_info)) if (!STMT_VINFO_DATA_REF (stmt_info))
return false; return false;
negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
if (negative && ncopies > 1)
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "multiple types with negative step.");
return false;
}
scalar_type = TREE_TYPE (DR_REF (dr)); scalar_type = TREE_TYPE (DR_REF (dr));
mode = TYPE_MODE (vectype); mode = TYPE_MODE (vectype);
...@@ -3563,6 +3641,25 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, ...@@ -3563,6 +3641,25 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
return false; return false;
} }
if (negative)
{
gcc_assert (!strided_load);
alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
if (alignment_support_scheme != dr_aligned
&& alignment_support_scheme != dr_unaligned_supported)
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "negative step but alignment required.");
return false;
}
if (!perm_mask_for_reverse (vectype, NULL))
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "negative step and reversing not supported.");
return false;
}
}
if (!vec_stmt) /* transformation not required. */ if (!vec_stmt) /* transformation not required. */
{ {
STMT_VINFO_TYPE (stmt_info) = load_vec_info_type; STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
...@@ -3737,6 +3834,9 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, ...@@ -3737,6 +3834,9 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
else else
at_loop = loop; at_loop = loop;
if (negative)
offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
prev_stmt_info = NULL; prev_stmt_info = NULL;
for (j = 0; j < ncopies; j++) for (j = 0; j < ncopies; j++)
{ {
...@@ -3925,6 +4025,12 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, ...@@ -3925,6 +4025,12 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
gcc_unreachable (); /* FORNOW. */ gcc_unreachable (); /* FORNOW. */
} }
if (negative)
{
new_temp = reverse_vec_elements (new_temp, stmt, gsi);
new_stmt = SSA_NAME_DEF_STMT (new_temp);
}
/* Collect vector loads and later create their permutation in /* Collect vector loads and later create their permutation in
vect_transform_strided_load (). */ vect_transform_strided_load (). */
if (strided_load || slp_perm) if (strided_load || slp_perm)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment