Commit cb9ed5d7 by Dorit Nuzman

re PR middle-end/31699 (-march=opteron -ftree-vectorize generates wrong code)

        PR tree-optimization/31699
        * tree-vect-analyze.c (vect_update_misalignment_for_peel): Remove wrong
        code.
        (vect_enhance_data_refs_alignment): Compute peel amount using
        TYPE_VECTOR_SUBPARTS instead of vf.
        * tree-vect-transform.c (vect_gen_niters_for_prolog_loop): Likewise.

From-SVN: r124375
parent 7b50cdef
2007-05-03 Dorit Nuzman <dorit@il.ibm.com>
PR tree-optimization/31699
* tree-vect-analyze.c (vect_update_misalignment_for_peel): Remove wrong
code.
(vect_enhance_data_refs_alignment): Compute peel amount using
TYPE_VECTOR_SUBPARTS instead of vf.
* tree-vect-transform.c (vect_gen_niters_for_prolog_loop): Likewise.
2007-05-02 Brooks Moses <brooks.moses@codesourcery.com> 2007-05-02 Brooks Moses <brooks.moses@codesourcery.com>
PR bootstrap/31776 PR bootstrap/31776
......
2007-05-03 Dorit Nuzman <dorit@il.ibm.com>
PR tree-optimization/31699
* lib/target-supports.exp (check_effective_target_vect_intfloat_cvt):
New.
(check_effective_target_vect_floatint_cvt): New.
* gcc.dg/vect/vect-floatint-conversion-1.c: Use new keyword instead
of specific targets.
* gcc.dg/vect/vect-intfloat-conversion-1.c: Likewise.
* gcc.dg/vect/vect-multitypes-1.c: One less loop gets vectorized.
* gcc.dg/vect/vect-multitypes-4.c: Likewise.
* gcc.dg/vect/vect-iv-4.c: Likewise.
* gcc.dg/vect/vect-multitypes-11.c: New.
* gcc.dg/vect/pr31699.c: New.
2007-05-02 Geoffrey Keating <geoffk@apple.com> 2007-05-02 Geoffrey Keating <geoffk@apple.com>
* gcc.c-torture/compile-limits-stringlit.c: Reduce size of string. * gcc.c-torture/compile-limits-stringlit.c: Reduce size of string.
...@@ -2399,7 +2414,7 @@ ...@@ -2399,7 +2414,7 @@
Dorit Nuzman <dorit@il.ibm.com> Dorit Nuzman <dorit@il.ibm.com>
* gcc.dg/vect/vect-intfloat-conversion-1.c: New test. * gcc.dg/vect/vect-intfloat-conversion-1.c: New test.
* gcc.dg/vect/vect-intfloat-conversion-1.c: New test. * gcc.dg/vect/vect-floatint-conversion-1.c: New test.
* gcc.dg/vect/vect-93.c: Another loop gets vectorized on powerpc. * gcc.dg/vect/vect-93.c: Another loop gets vectorized on powerpc.
* gcc.dg/vect/vect-113.c: Likewise. * gcc.dg/vect/vect-113.c: Likewise.
/* { dg-require-effective-target vect_double } */
#include <stdlib.h>
#include <stdarg.h>
#include "tree-vect.h"
float x[256];
void foo(void)
{
double *z = malloc (sizeof(double) * 256);
int i;
for (i=0; i<256; ++i)
z[i] = x[i] + 1.0f;
}
int main()
{
int i;
check_vect ();
for (i = 0; i < 256; i++)
x[i] = (float) i;
foo();
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_intfloat_cvt } } } */
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail vect_no_align } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -36,5 +36,5 @@ main (void) ...@@ -36,5 +36,5 @@ main (void)
return main1 (); return main1 ();
} }
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target i?86-*-* x86_64-*-* } } } */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_floatint_cvt } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -34,5 +34,5 @@ int main (void) ...@@ -34,5 +34,5 @@ int main (void)
return main1 (); return main1 ();
} }
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target powerpc*-*-* i?86-*-* x86_64-*-* } } } */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_intfloat_cvt } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -40,5 +40,5 @@ int main (void) ...@@ -40,5 +40,5 @@ int main (void)
return main1 (); return main1 ();
} }
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_pack_trunc } } } */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -14,10 +14,9 @@ int ib[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45, ...@@ -14,10 +14,9 @@ int ib[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,
/* Current peeling-for-alignment scheme will consider the 'sa[i+7]' /* Current peeling-for-alignment scheme will consider the 'sa[i+7]'
access for peeling, and therefore will examine the option of access for peeling, and therefore will examine the option of
using a peeling factor = VF-7%VF. This will result in a peeling factor 1, using a peeling factor = V-7%V = 1,3 for V=8,4 respectively,
which will also align the access to 'ia[i+3]', and the loop could be which will also align the access to 'ia[i+3]', and the loop could be
vectorized on all targets that support unaligned loads. vectorized on all targets that support unaligned loads. */
*/
int main1 (int n) int main1 (int n)
{ {
...@@ -43,17 +42,16 @@ int main1 (int n) ...@@ -43,17 +42,16 @@ int main1 (int n)
/* Current peeling-for-alignment scheme will consider the 'ia[i+3]' /* Current peeling-for-alignment scheme will consider the 'ia[i+3]'
access for peeling, and therefore will examine the option of access for peeling, and therefore will examine the option of
using a peeling factor = VF-3%VF. This will result in a peeling factor using a peeling factor = (V-3)%V = 1 for V=2,4.
5 if VF=8, or 1 if VF=4,2. In either case, this will also align the access This will not align the access 'sa[i+3]' (for which we need to
to 'sa[i+3]', and the loop could be vectorized on targets that support peel 5 iterations), so the loop can not be vectorized. */
unaligned loads. */
int main2 (int n) int main2 (int n)
{ {
int i; int i;
/* Multiple types with different sizes, used in independent /* Multiple types with different sizes, used in independent
copmutations. Vectorizable. */ copmutations. */
for (i = 0; i < n; i++) for (i = 0; i < n; i++)
{ {
ia[i+3] = ib[i]; ia[i+3] = ib[i];
...@@ -80,8 +78,11 @@ int main (void) ...@@ -80,8 +78,11 @@ int main (void)
return 0; return 0;
} }
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail vect_no_align } } } */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 2 "vect" { xfail vect_no_align } } } */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_align } } } */
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 4 "vect" { xfail vect_no_align } } } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 2 "vect" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail vect_no_align } } } */
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 4 "vect" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 2 "vect" { xfail vect_no_align } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 64
short x[N] __attribute__ ((__aligned__(16)));
int
foo (int len, int *z) {
int i;
for (i=0; i<len; i++) {
z[i] = x[i];
}
}
int main (void)
{
short i;
int z[N+4];
check_vect ();
for (i=0; i<N; i++) {
x[i] = i;
}
foo (N,z+2);
for (i=0; i<N; i++) {
if (z[i+2] != x[i])
abort ();
}
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_unpack } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { ! vect_unpack } } } } */
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { target { vect_no_align && vect_unpack } } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -20,8 +20,7 @@ unsigned int ib[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45, ...@@ -20,8 +20,7 @@ unsigned int ib[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,
access for peeling, and therefore will examine the option of access for peeling, and therefore will examine the option of
using a peeling factor = VF-7%VF. This will result in a peeling factor 1, using a peeling factor = VF-7%VF. This will result in a peeling factor 1,
which will also align the access to 'ia[i+3]', and the loop could be which will also align the access to 'ia[i+3]', and the loop could be
vectorized on all targets that support unaligned loads. vectorized on all targets that support unaligned loads. */
*/
int main1 (int n) int main1 (int n)
{ {
...@@ -48,9 +47,9 @@ int main1 (int n) ...@@ -48,9 +47,9 @@ int main1 (int n)
/* Current peeling-for-alignment scheme will consider the 'ia[i+3]' /* Current peeling-for-alignment scheme will consider the 'ia[i+3]'
access for peeling, and therefore will examine the option of access for peeling, and therefore will examine the option of
using a peeling factor = VF-3%VF. This will result in a peeling factor using a peeling factor = VF-3%VF. This will result in a peeling factor
5 if VF=8, or 1 if VF=4,2. In either case, this will also align the access 1 if VF=4,2. This will not align the access to 'sa[i+3]', for which we
to 'sa[i+3]', and the loop could be vectorized on targets that support need to peel 5,1 iterations for VF=4,2 respectively, so the loop can not
unaligned loads. */ be vectorized. */
int main2 (int n) int main2 (int n)
{ {
...@@ -84,8 +83,11 @@ int main (void) ...@@ -84,8 +83,11 @@ int main (void)
return 0; return 0;
} }
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail vect_no_align } } } */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 2 "vect" { xfail vect_no_align } } } */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_align } } } */
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 8 "vect" { xfail vect_no_align } } } */ /* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 2 "vect" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 1 "vect" { xfail vect_no_align } } } */
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 8 "vect" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 4 "vect" { xfail vect_no_align } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -1368,6 +1368,49 @@ proc check_effective_target_vect_int { } { ...@@ -1368,6 +1368,49 @@ proc check_effective_target_vect_int { } {
return $et_vect_int_saved return $et_vect_int_saved
} }
# Return 1 if the target supports int->float conversion
#
proc check_effective_target_vect_intfloat_cvt { } {
global et_vect_intfloat_cvt_saved
if [info exists et_vect_intfloat_cvt_saved] {
verbose "check_effective_target_vect_intfloat_cvt: using cached result" 2
} else {
set et_vect_intfloat_cvt_saved 0
if { [istarget i?86-*-*]
|| [istarget powerpc*-*-*]
|| [istarget x86_64-*-*] } {
set et_vect_intfloat_cvt_saved 1
}
}
verbose "check_effective_target_vect_intfloat_cvt: returning $et_vect_intfloat_cvt_saved" 2
return $et_vect_intfloat_cvt_saved
}
# Return 1 if the target supports float->int conversion
#
proc check_effective_target_vect_floatint_cvt { } {
global et_vect_floatint_cvt_saved
if [info exists et_vect_floatint_cvt_saved] {
verbose "check_effective_target_vect_floatint_cvt: using cached result" 2
} else {
set et_vect_floatint_cvt_saved 0
if { [istarget i?86-*-*]
|| [istarget x86_64-*-*] } {
set et_vect_floatint_cvt_saved 1
}
}
verbose "check_effective_target_vect_floatint_cvt: returning $et_vect_floatint_cvt_saved" 2
return $et_vect_floatint_cvt_saved
}
# Return 1 is this is an arm target using 32-bit instructions # Return 1 is this is an arm target using 32-bit instructions
proc check_effective_target_arm32 { } { proc check_effective_target_arm32 { } {
global et_arm32_saved global et_arm32_saved
......
...@@ -1258,15 +1258,6 @@ vect_update_misalignment_for_peel (struct data_reference *dr, ...@@ -1258,15 +1258,6 @@ vect_update_misalignment_for_peel (struct data_reference *dr,
if (DR_GROUP_FIRST_DR (peel_stmt_info)) if (DR_GROUP_FIRST_DR (peel_stmt_info))
dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info); dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
if (known_alignment_for_access_p (dr)
&& known_alignment_for_access_p (dr_peel)
&& (DR_MISALIGNMENT (dr) / dr_size ==
DR_MISALIGNMENT (dr_peel) / dr_peel_size))
{
DR_MISALIGNMENT (dr) = 0;
return;
}
/* It can be assumed that the data refs with the same alignment as dr_peel /* It can be assumed that the data refs with the same alignment as dr_peel
are aligned in the vector loop. */ are aligned in the vector loop. */
same_align_drs same_align_drs
...@@ -1507,7 +1498,8 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) ...@@ -1507,7 +1498,8 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
the prolog loop ({VF - misalignment}), is a multiple of the the prolog loop ({VF - misalignment}), is a multiple of the
number of the interleaved accesses. */ number of the interleaved accesses. */
int elem_size, mis_in_elements; int elem_size, mis_in_elements;
int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); tree vectype = STMT_VINFO_VECTYPE (stmt_info);
int nelements = TYPE_VECTOR_SUBPARTS (vectype);
/* FORNOW: handle only known alignment. */ /* FORNOW: handle only known alignment. */
if (!known_alignment_for_access_p (dr)) if (!known_alignment_for_access_p (dr))
...@@ -1516,10 +1508,10 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) ...@@ -1516,10 +1508,10 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
break; break;
} }
elem_size = UNITS_PER_SIMD_WORD / vf; elem_size = UNITS_PER_SIMD_WORD / nelements;
mis_in_elements = DR_MISALIGNMENT (dr) / elem_size; mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
if ((vf - mis_in_elements) % DR_GROUP_SIZE (stmt_info)) if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
{ {
do_peeling = false; do_peeling = false;
break; break;
...@@ -1541,6 +1533,10 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) ...@@ -1541,6 +1533,10 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
{ {
int mis; int mis;
int npeel = 0; int npeel = 0;
tree stmt = DR_STMT (dr0);
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
int nelements = TYPE_VECTOR_SUBPARTS (vectype);
if (known_alignment_for_access_p (dr0)) if (known_alignment_for_access_p (dr0))
{ {
...@@ -1550,7 +1546,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) ...@@ -1550,7 +1546,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
factor minus the misalignment as an element count. */ factor minus the misalignment as an element count. */
mis = DR_MISALIGNMENT (dr0); mis = DR_MISALIGNMENT (dr0);
mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0)))); mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
npeel = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - mis; npeel = nelements - mis;
/* For interleaved data access every iteration accesses all the /* For interleaved data access every iteration accesses all the
members of the group, therefore we divide the number of iterations members of the group, therefore we divide the number of iterations
......
...@@ -4786,13 +4786,17 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio) ...@@ -4786,13 +4786,17 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio)
prolog_niters = min ( LOOP_NITERS , prolog_niters = min ( LOOP_NITERS ,
(VF/group_size - addr_mis/elem_size)&(VF/group_size-1) ) (VF/group_size - addr_mis/elem_size)&(VF/group_size-1) )
where group_size is the size of the interleaved group. where group_size is the size of the interleaved group.
*/
The above formulas assume that VF == number of elements in the vector. This
may not hold when there are multiple-types in the loop.
In this case, for some data-references in the loop the VF does not represent
the number of elements that fit in the vector. Therefore, instead of VF we
use TYPE_VECTOR_SUBPARTS. */
static tree static tree
vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters) vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
{ {
struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo); struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
tree var, stmt; tree var, stmt;
tree iters, iters_name; tree iters, iters_name;
...@@ -4805,6 +4809,7 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters) ...@@ -4805,6 +4809,7 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
tree niters_type = TREE_TYPE (loop_niters); tree niters_type = TREE_TYPE (loop_niters);
int group_size = 1; int group_size = 1;
int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr)))); int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
int nelements = TYPE_VECTOR_SUBPARTS (vectype);
if (DR_GROUP_FIRST_DR (stmt_info)) if (DR_GROUP_FIRST_DR (stmt_info))
{ {
...@@ -4825,7 +4830,7 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters) ...@@ -4825,7 +4830,7 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
if (vect_print_dump_info (REPORT_DETAILS)) if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "known alignment = %d.", byte_misalign); fprintf (vect_dump, "known alignment = %d.", byte_misalign);
iters = build_int_cst (niters_type, iters = build_int_cst (niters_type,
(vf - elem_misalign)&(vf/group_size-1)); (nelements - elem_misalign)&(nelements/group_size-1));
} }
else else
{ {
...@@ -4837,9 +4842,9 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters) ...@@ -4837,9 +4842,9 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1); tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1); tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
tree elem_size_log = tree elem_size_log =
build_int_cst (type, exact_log2 (vectype_align/vf)); build_int_cst (type, exact_log2 (vectype_align/nelements));
tree vf_minus_1 = build_int_cst (type, vf - 1); tree nelements_minus_1 = build_int_cst (type, nelements - 1);
tree vf_tree = build_int_cst (type, vf); tree nelements_tree = build_int_cst (type, nelements);
tree byte_misalign; tree byte_misalign;
tree elem_misalign; tree elem_misalign;
...@@ -4854,9 +4859,9 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters) ...@@ -4854,9 +4859,9 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
elem_misalign = elem_misalign =
fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log); fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
/* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */ /* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */
iters = fold_build2 (MINUS_EXPR, type, vf_tree, elem_misalign); iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
iters = fold_build2 (BIT_AND_EXPR, type, iters, vf_minus_1); iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
iters = fold_convert (niters_type, iters); iters = fold_convert (niters_type, iters);
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment