Commit afb119be by Richard Biener Committed by Richard Biener

tree-vect-data-refs.c (vect_enhance_data_refs_alignment): Do not disable peeling…

tree-vect-data-refs.c (vect_enhance_data_refs_alignment): Do not disable peeling when we version for aliasing.

2013-05-10  Richard Biener  <rguenther@suse.de>

	* tree-vect-data-refs.c (vect_enhance_data_refs_alignment): Do not
	disable peeling when we version for aliasing.
	(vector_alignment_reachable_p): Honor explicit user alignment.
	(vect_supportable_dr_alignment): Likewise.
	* tree-vect-loop-manip.c (vect_can_advance_ivs_p): Use
	STMT_VINFO_LOOP_PHI_EVOLUTION_PART instead of recomputing it.
	* tree-vect-loop.c (vect_transform_loop): First apply versioning,
	then peeling to arrange for the cost-model check to come first.

	* gcc.target/i386/avx256-unaligned-load-2.c: Make well-defined.
	* gcc.target/i386/l_fma_double_1.c: Adjust.
	* gcc.target/i386/l_fma_double_2.c: Likewise.
	* gcc.target/i386/l_fma_double_3.c: Likewise.
	* gcc.target/i386/l_fma_double_4.c: Likewise.
	* gcc.target/i386/l_fma_double_5.c: Likewise.
	* gcc.target/i386/l_fma_double_6.c: Likewise.
	* gcc.target/i386/l_fma_float_1.c: Likewise.
	* gcc.target/i386/l_fma_float_2.c: Likewise.
	* gcc.target/i386/l_fma_float_3.c: Likewise.
	* gcc.target/i386/l_fma_float_4.c: Likewise.
	* gcc.target/i386/l_fma_float_5.c: Likewise.
	* gcc.target/i386/l_fma_float_6.c: Likewise.

From-SVN: r198767
parent 01ae4861
2013-05-10 Richard Biener <rguenther@suse.de>
* tree-vect-data-refs.c (vect_enhance_data_refs_alignment): Do not
disable peeling when we version for aliasing.
(vector_alignment_reachable_p): Honor explicit user alignment.
(vect_supportable_dr_alignment): Likewise.
* tree-vect-loop-manip.c (vect_can_advance_ivs_p): Use
STMT_VINFO_LOOP_PHI_EVOLUTION_PART instead of recomputing it.
* tree-vect-loop.c (vect_transform_loop): First apply versioning,
then peeling to arrange for the cost-model check to come first.
2013-05-10 Alan Modra <amodra@gmail.com> 2013-05-10 Alan Modra <amodra@gmail.com>
* configure.ac (HAVE_AS_TLS): Swap powerpc64 and powerpc cases. * configure.ac (HAVE_AS_TLS): Swap powerpc64 and powerpc cases.
......
2013-05-10 Richard Biener <rguenther@suse.de>
* gcc.target/i386/avx256-unaligned-load-2.c: Make well-defined.
* gcc.target/i386/l_fma_double_1.c: Adjust.
* gcc.target/i386/l_fma_double_2.c: Likewise.
* gcc.target/i386/l_fma_double_3.c: Likewise.
* gcc.target/i386/l_fma_double_4.c: Likewise.
* gcc.target/i386/l_fma_double_5.c: Likewise.
* gcc.target/i386/l_fma_double_6.c: Likewise.
* gcc.target/i386/l_fma_float_1.c: Likewise.
* gcc.target/i386/l_fma_float_2.c: Likewise.
* gcc.target/i386/l_fma_float_3.c: Likewise.
* gcc.target/i386/l_fma_float_4.c: Likewise.
* gcc.target/i386/l_fma_float_5.c: Likewise.
* gcc.target/i386/l_fma_float_6.c: Likewise.
2013-05-08 Paolo Carlini <paolo.carlini@oracle.com> 2013-05-08 Paolo Carlini <paolo.carlini@oracle.com>
PR c++/51226 PR c++/51226
......
/* { dg-do compile { target { ! ia32 } } } */ /* { dg-do compile { target { ! ia32 } } } */
/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-load" } */ /* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-load" } */
#define N 1024
char **ep;
char **fp;
void void
avx_test (void) avx_test (char **cp, char **ep)
{ {
int i; int i;
char **ap; char **ap = __builtin_assume_aligned (ep, 32);
char **bp; for (i = 128; i > 0; i--)
char **cp;
ap = ep;
bp = fp;
for (i = 128; i >= 0; i--)
{
*ap++ = *cp++; *ap++ = *cp++;
*bp++ = 0;
}
} }
/* { dg-final { scan-assembler-not "avx_loaddqu256" } } */ /* { dg-final { scan-assembler-not "avx_loaddqu256" } } */
......
...@@ -4,23 +4,24 @@ ...@@ -4,23 +4,24 @@
/* Test that the compiler properly optimizes floating point multiply /* Test that the compiler properly optimizes floating point multiply
and add instructions into FMA3 instructions. */ and add instructions into FMA3 instructions. */
#define TYPE double typedef double adouble __attribute__((aligned(sizeof (double))));
#define TYPE adouble
#include "l_fma_1.h" #include "l_fma_1.h"
/* { dg-final { scan-assembler-times "vfmadd132pd" 4 } } */ /* { dg-final { scan-assembler-times "vfmadd132pd" 4 } } */
/* { dg-final { scan-assembler-times "vfmadd231pd" 4 } } */ /* { dg-final { scan-assembler-times "vfmadd213pd" 4 } } */
/* { dg-final { scan-assembler-times "vfmsub132pd" 4 } } */ /* { dg-final { scan-assembler-times "vfmsub132pd" 4 } } */
/* { dg-final { scan-assembler-times "vfmsub231pd" 4 } } */ /* { dg-final { scan-assembler-times "vfmsub213pd" 4 } } */
/* { dg-final { scan-assembler-times "vfnmadd132pd" 4 } } */ /* { dg-final { scan-assembler-times "vfnmadd132pd" 4 } } */
/* { dg-final { scan-assembler-times "vfnmadd231pd" 4 } } */ /* { dg-final { scan-assembler-times "vfnmadd231pd" 4 } } */
/* { dg-final { scan-assembler-times "vfnmsub132pd" 4 } } */ /* { dg-final { scan-assembler-times "vfnmsub132pd" 4 } } */
/* { dg-final { scan-assembler-times "vfnmsub231pd" 4 } } */ /* { dg-final { scan-assembler-times "vfnmsub231pd" 4 } } */
/* { dg-final { scan-assembler-times "vfmadd132sd" 16 } } */ /* { dg-final { scan-assembler-times "vfmadd132sd" 28 } } */
/* { dg-final { scan-assembler-times "vfmadd213sd" 16 } } */ /* { dg-final { scan-assembler-times "vfmadd213sd" 28 } } */
/* { dg-final { scan-assembler-times "vfmsub132sd" 16 } } */ /* { dg-final { scan-assembler-times "vfmsub132sd" 28 } } */
/* { dg-final { scan-assembler-times "vfmsub213sd" 16 } } */ /* { dg-final { scan-assembler-times "vfmsub213sd" 28 } } */
/* { dg-final { scan-assembler-times "vfnmadd132sd" 16 } } */ /* { dg-final { scan-assembler-times "vfnmadd132sd" 28 } } */
/* { dg-final { scan-assembler-times "vfnmadd213sd" 16 } } */ /* { dg-final { scan-assembler-times "vfnmadd213sd" 28 } } */
/* { dg-final { scan-assembler-times "vfnmsub132sd" 16 } } */ /* { dg-final { scan-assembler-times "vfnmsub132sd" 28 } } */
/* { dg-final { scan-assembler-times "vfnmsub213sd" 16 } } */ /* { dg-final { scan-assembler-times "vfnmsub213sd" 28 } } */
...@@ -4,7 +4,8 @@ ...@@ -4,7 +4,8 @@
/* Test that the compiler properly optimizes floating point multiply /* Test that the compiler properly optimizes floating point multiply
and add instructions into FMA3 instructions. */ and add instructions into FMA3 instructions. */
#define TYPE double typedef double adouble __attribute__((aligned(sizeof (double))));
#define TYPE adouble
#include "l_fma_2.h" #include "l_fma_2.h"
...@@ -12,7 +13,7 @@ ...@@ -12,7 +13,7 @@
/* { dg-final { scan-assembler-times "vfmsub132pd" 8 } } */ /* { dg-final { scan-assembler-times "vfmsub132pd" 8 } } */
/* { dg-final { scan-assembler-times "vfnmadd132pd" 8 } } */ /* { dg-final { scan-assembler-times "vfnmadd132pd" 8 } } */
/* { dg-final { scan-assembler-times "vfnmsub132pd" 8 } } */ /* { dg-final { scan-assembler-times "vfnmsub132pd" 8 } } */
/* { dg-final { scan-assembler-times "vfmadd132sd" 32 } } */ /* { dg-final { scan-assembler-times "vfmadd132sd" 56 } } */
/* { dg-final { scan-assembler-times "vfmsub132sd" 32 } } */ /* { dg-final { scan-assembler-times "vfmsub132sd" 56 } } */
/* { dg-final { scan-assembler-times "vfnmadd132sd" 32 } } */ /* { dg-final { scan-assembler-times "vfnmadd132sd" 56 } } */
/* { dg-final { scan-assembler-times "vfnmsub132sd" 32 } } */ /* { dg-final { scan-assembler-times "vfnmsub132sd" 56 } } */
...@@ -4,23 +4,24 @@ ...@@ -4,23 +4,24 @@
/* Test that the compiler properly optimizes floating point multiply /* Test that the compiler properly optimizes floating point multiply
and add instructions into FMA3 instructions. */ and add instructions into FMA3 instructions. */
#define TYPE double typedef double adouble __attribute__((aligned(sizeof (double))));
#define TYPE adouble
#include "l_fma_3.h" #include "l_fma_3.h"
/* { dg-final { scan-assembler-times "vfmadd132pd" 4 } } */ /* { dg-final { scan-assembler-times "vfmadd132pd" 4 } } */
/* { dg-final { scan-assembler-times "vfmadd231pd" 4 } } */ /* { dg-final { scan-assembler-times "vfmadd213pd" 4 } } */
/* { dg-final { scan-assembler-times "vfmsub132pd" 4 } } */ /* { dg-final { scan-assembler-times "vfmsub132pd" 4 } } */
/* { dg-final { scan-assembler-times "vfmsub231pd" 4 } } */ /* { dg-final { scan-assembler-times "vfmsub213pd" 4 } } */
/* { dg-final { scan-assembler-times "vfnmadd132pd" 4 } } */ /* { dg-final { scan-assembler-times "vfnmadd132pd" 4 } } */
/* { dg-final { scan-assembler-times "vfnmadd231pd" 4 } } */ /* { dg-final { scan-assembler-times "vfnmadd231pd" 4 } } */
/* { dg-final { scan-assembler-times "vfnmsub132pd" 4 } } */ /* { dg-final { scan-assembler-times "vfnmsub132pd" 4 } } */
/* { dg-final { scan-assembler-times "vfnmsub231pd" 4 } } */ /* { dg-final { scan-assembler-times "vfnmsub231pd" 4 } } */
/* { dg-final { scan-assembler-times "vfmadd132sd" 16 } } */ /* { dg-final { scan-assembler-times "vfmadd132sd" 28 } } */
/* { dg-final { scan-assembler-times "vfmadd213sd" 16 } } */ /* { dg-final { scan-assembler-times "vfmadd213sd" 28 } } */
/* { dg-final { scan-assembler-times "vfmsub132sd" 16 } } */ /* { dg-final { scan-assembler-times "vfmsub132sd" 28 } } */
/* { dg-final { scan-assembler-times "vfmsub213sd" 16 } } */ /* { dg-final { scan-assembler-times "vfmsub213sd" 28 } } */
/* { dg-final { scan-assembler-times "vfnmadd132sd" 16 } } */ /* { dg-final { scan-assembler-times "vfnmadd132sd" 28 } } */
/* { dg-final { scan-assembler-times "vfnmadd213sd" 16 } } */ /* { dg-final { scan-assembler-times "vfnmadd213sd" 28 } } */
/* { dg-final { scan-assembler-times "vfnmsub132sd" 16 } } */ /* { dg-final { scan-assembler-times "vfnmsub132sd" 28 } } */
/* { dg-final { scan-assembler-times "vfnmsub213sd" 16 } } */ /* { dg-final { scan-assembler-times "vfnmsub213sd" 28 } } */
...@@ -4,7 +4,8 @@ ...@@ -4,7 +4,8 @@
/* Test that the compiler properly optimizes floating point multiply /* Test that the compiler properly optimizes floating point multiply
and add instructions into FMA3 instructions. */ and add instructions into FMA3 instructions. */
#define TYPE double typedef double adouble __attribute__((aligned(sizeof (double))));
#define TYPE adouble
#include "l_fma_4.h" #include "l_fma_4.h"
...@@ -12,7 +13,7 @@ ...@@ -12,7 +13,7 @@
/* { dg-final { scan-assembler-times "vfmsub132pd" 8 } } */ /* { dg-final { scan-assembler-times "vfmsub132pd" 8 } } */
/* { dg-final { scan-assembler-times "vfnmadd132pd" 8 } } */ /* { dg-final { scan-assembler-times "vfnmadd132pd" 8 } } */
/* { dg-final { scan-assembler-times "vfnmsub132pd" 8 } } */ /* { dg-final { scan-assembler-times "vfnmsub132pd" 8 } } */
/* { dg-final { scan-assembler-times "vfmadd132sd" 32 } } */ /* { dg-final { scan-assembler-times "vfmadd132sd" 56 } } */
/* { dg-final { scan-assembler-times "vfmsub132sd" 32 } } */ /* { dg-final { scan-assembler-times "vfmsub132sd" 56 } } */
/* { dg-final { scan-assembler-times "vfnmadd132sd" 32 } } */ /* { dg-final { scan-assembler-times "vfnmadd132sd" 56 } } */
/* { dg-final { scan-assembler-times "vfnmsub132sd" 32 } } */ /* { dg-final { scan-assembler-times "vfnmsub132sd" 56 } } */
...@@ -4,7 +4,8 @@ ...@@ -4,7 +4,8 @@
/* Test that the compiler properly optimizes floating point multiply /* Test that the compiler properly optimizes floating point multiply
and add instructions into FMA3 instructions. */ and add instructions into FMA3 instructions. */
#define TYPE double typedef double adouble __attribute__((aligned(sizeof (double))));
#define TYPE adouble
#include "l_fma_5.h" #include "l_fma_5.h"
...@@ -12,7 +13,7 @@ ...@@ -12,7 +13,7 @@
/* { dg-final { scan-assembler-times "vfmsub132pd" 8 } } */ /* { dg-final { scan-assembler-times "vfmsub132pd" 8 } } */
/* { dg-final { scan-assembler-times "vfnmadd132pd" 8 } } */ /* { dg-final { scan-assembler-times "vfnmadd132pd" 8 } } */
/* { dg-final { scan-assembler-times "vfnmsub132pd" 8 } } */ /* { dg-final { scan-assembler-times "vfnmsub132pd" 8 } } */
/* { dg-final { scan-assembler-times "vfmadd132sd" 32 } } */ /* { dg-final { scan-assembler-times "vfmadd132sd" 56 } } */
/* { dg-final { scan-assembler-times "vfmsub132sd" 32 } } */ /* { dg-final { scan-assembler-times "vfmsub132sd" 56 } } */
/* { dg-final { scan-assembler-times "vfnmadd132sd" 32 } } */ /* { dg-final { scan-assembler-times "vfnmadd132sd" 56 } } */
/* { dg-final { scan-assembler-times "vfnmsub132sd" 32 } } */ /* { dg-final { scan-assembler-times "vfnmsub132sd" 56 } } */
...@@ -4,7 +4,8 @@ ...@@ -4,7 +4,8 @@
/* Test that the compiler properly optimizes floating point multiply /* Test that the compiler properly optimizes floating point multiply
and add instructions into FMA3 instructions. */ and add instructions into FMA3 instructions. */
#define TYPE double typedef double adouble __attribute__((aligned(sizeof (double))));
#define TYPE adouble
#include "l_fma_6.h" #include "l_fma_6.h"
...@@ -12,7 +13,7 @@ ...@@ -12,7 +13,7 @@
/* { dg-final { scan-assembler-times "vfmsub132pd" 8 } } */ /* { dg-final { scan-assembler-times "vfmsub132pd" 8 } } */
/* { dg-final { scan-assembler-times "vfnmadd132pd" 8 } } */ /* { dg-final { scan-assembler-times "vfnmadd132pd" 8 } } */
/* { dg-final { scan-assembler-times "vfnmsub132pd" 8 } } */ /* { dg-final { scan-assembler-times "vfnmsub132pd" 8 } } */
/* { dg-final { scan-assembler-times "vfmadd132sd" 32 } } */ /* { dg-final { scan-assembler-times "vfmadd132sd" 56 } } */
/* { dg-final { scan-assembler-times "vfmsub132sd" 32 } } */ /* { dg-final { scan-assembler-times "vfmsub132sd" 56 } } */
/* { dg-final { scan-assembler-times "vfnmadd132sd" 32 } } */ /* { dg-final { scan-assembler-times "vfnmadd132sd" 56 } } */
/* { dg-final { scan-assembler-times "vfnmsub132sd" 32 } } */ /* { dg-final { scan-assembler-times "vfnmsub132sd" 56 } } */
...@@ -9,18 +9,18 @@ ...@@ -9,18 +9,18 @@
#include "l_fma_1.h" #include "l_fma_1.h"
/* { dg-final { scan-assembler-times "vfmadd132ps" 4 } } */ /* { dg-final { scan-assembler-times "vfmadd132ps" 4 } } */
/* { dg-final { scan-assembler-times "vfmadd231ps" 4 } } */ /* { dg-final { scan-assembler-times "vfmadd213ps" 4 } } */
/* { dg-final { scan-assembler-times "vfmsub132ps" 4 } } */ /* { dg-final { scan-assembler-times "vfmsub132ps" 4 } } */
/* { dg-final { scan-assembler-times "vfmsub231ps" 4 } } */ /* { dg-final { scan-assembler-times "vfmsub213ps" 4 } } */
/* { dg-final { scan-assembler-times "vfnmadd132ps" 4 } } */ /* { dg-final { scan-assembler-times "vfnmadd132ps" 4 } } */
/* { dg-final { scan-assembler-times "vfnmadd231ps" 4 } } */ /* { dg-final { scan-assembler-times "vfnmadd231ps" 4 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ps" 4 } } */ /* { dg-final { scan-assembler-times "vfnmsub132ps" 4 } } */
/* { dg-final { scan-assembler-times "vfnmsub231ps" 4 } } */ /* { dg-final { scan-assembler-times "vfnmsub231ps" 4 } } */
/* { dg-final { scan-assembler-times "vfmadd132ss" 32 } } */ /* { dg-final { scan-assembler-times "vfmadd132ss" 60 } } */
/* { dg-final { scan-assembler-times "vfmadd213ss" 32 } } */ /* { dg-final { scan-assembler-times "vfmadd213ss" 60 } } */
/* { dg-final { scan-assembler-times "vfmsub132ss" 32 } } */ /* { dg-final { scan-assembler-times "vfmsub132ss" 60 } } */
/* { dg-final { scan-assembler-times "vfmsub213ss" 32 } } */ /* { dg-final { scan-assembler-times "vfmsub213ss" 60 } } */
/* { dg-final { scan-assembler-times "vfnmadd132ss" 32 } } */ /* { dg-final { scan-assembler-times "vfnmadd132ss" 60 } } */
/* { dg-final { scan-assembler-times "vfnmadd213ss" 32 } } */ /* { dg-final { scan-assembler-times "vfnmadd213ss" 60 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ss" 32 } } */ /* { dg-final { scan-assembler-times "vfnmsub132ss" 60 } } */
/* { dg-final { scan-assembler-times "vfnmsub213ss" 32 } } */ /* { dg-final { scan-assembler-times "vfnmsub213ss" 60 } } */
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
/* { dg-final { scan-assembler-times "vfmsub132ps" 8 } } */ /* { dg-final { scan-assembler-times "vfmsub132ps" 8 } } */
/* { dg-final { scan-assembler-times "vfnmadd132ps" 8 } } */ /* { dg-final { scan-assembler-times "vfnmadd132ps" 8 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ps" 8 } } */ /* { dg-final { scan-assembler-times "vfnmsub132ps" 8 } } */
/* { dg-final { scan-assembler-times "vfmadd132ss" 64 } } */ /* { dg-final { scan-assembler-times "vfmadd132ss" 120 } } */
/* { dg-final { scan-assembler-times "vfmsub132ss" 64 } } */ /* { dg-final { scan-assembler-times "vfmsub132ss" 120 } } */
/* { dg-final { scan-assembler-times "vfnmadd132ss" 64 } } */ /* { dg-final { scan-assembler-times "vfnmadd132ss" 120 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ss" 64 } } */ /* { dg-final { scan-assembler-times "vfnmsub132ss" 120 } } */
...@@ -9,18 +9,18 @@ ...@@ -9,18 +9,18 @@
#include "l_fma_3.h" #include "l_fma_3.h"
/* { dg-final { scan-assembler-times "vfmadd132ps" 4 } } */ /* { dg-final { scan-assembler-times "vfmadd132ps" 4 } } */
/* { dg-final { scan-assembler-times "vfmadd231ps" 4 } } */ /* { dg-final { scan-assembler-times "vfmadd213ps" 4 } } */
/* { dg-final { scan-assembler-times "vfmsub132ps" 4 } } */ /* { dg-final { scan-assembler-times "vfmsub132ps" 4 } } */
/* { dg-final { scan-assembler-times "vfmsub231ps" 4 } } */ /* { dg-final { scan-assembler-times "vfmsub213ps" 4 } } */
/* { dg-final { scan-assembler-times "vfnmadd132ps" 4 } } */ /* { dg-final { scan-assembler-times "vfnmadd132ps" 4 } } */
/* { dg-final { scan-assembler-times "vfnmadd231ps" 4 } } */ /* { dg-final { scan-assembler-times "vfnmadd231ps" 4 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ps" 4 } } */ /* { dg-final { scan-assembler-times "vfnmsub132ps" 4 } } */
/* { dg-final { scan-assembler-times "vfnmsub231ps" 4 } } */ /* { dg-final { scan-assembler-times "vfnmsub231ps" 4 } } */
/* { dg-final { scan-assembler-times "vfmadd132ss" 32 } } */ /* { dg-final { scan-assembler-times "vfmadd132ss" 60 } } */
/* { dg-final { scan-assembler-times "vfmadd213ss" 32 } } */ /* { dg-final { scan-assembler-times "vfmadd213ss" 60 } } */
/* { dg-final { scan-assembler-times "vfmsub132ss" 32 } } */ /* { dg-final { scan-assembler-times "vfmsub132ss" 60 } } */
/* { dg-final { scan-assembler-times "vfmsub213ss" 32 } } */ /* { dg-final { scan-assembler-times "vfmsub213ss" 60 } } */
/* { dg-final { scan-assembler-times "vfnmadd132ss" 32 } } */ /* { dg-final { scan-assembler-times "vfnmadd132ss" 60 } } */
/* { dg-final { scan-assembler-times "vfnmadd213ss" 32 } } */ /* { dg-final { scan-assembler-times "vfnmadd213ss" 60 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ss" 32 } } */ /* { dg-final { scan-assembler-times "vfnmsub132ss" 60 } } */
/* { dg-final { scan-assembler-times "vfnmsub213ss" 32 } } */ /* { dg-final { scan-assembler-times "vfnmsub213ss" 60 } } */
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
/* { dg-final { scan-assembler-times "vfmsub132ps" 8 } } */ /* { dg-final { scan-assembler-times "vfmsub132ps" 8 } } */
/* { dg-final { scan-assembler-times "vfnmadd132ps" 8 } } */ /* { dg-final { scan-assembler-times "vfnmadd132ps" 8 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ps" 8 } } */ /* { dg-final { scan-assembler-times "vfnmsub132ps" 8 } } */
/* { dg-final { scan-assembler-times "vfmadd132ss" 64 } } */ /* { dg-final { scan-assembler-times "vfmadd132ss" 120 } } */
/* { dg-final { scan-assembler-times "vfmsub132ss" 64 } } */ /* { dg-final { scan-assembler-times "vfmsub132ss" 120 } } */
/* { dg-final { scan-assembler-times "vfnmadd132ss" 64 } } */ /* { dg-final { scan-assembler-times "vfnmadd132ss" 120 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ss" 64 } } */ /* { dg-final { scan-assembler-times "vfnmsub132ss" 120 } } */
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
/* { dg-final { scan-assembler-times "vfmsub132ps" 8 } } */ /* { dg-final { scan-assembler-times "vfmsub132ps" 8 } } */
/* { dg-final { scan-assembler-times "vfnmadd132ps" 8 } } */ /* { dg-final { scan-assembler-times "vfnmadd132ps" 8 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ps" 8 } } */ /* { dg-final { scan-assembler-times "vfnmsub132ps" 8 } } */
/* { dg-final { scan-assembler-times "vfmadd132ss" 64 } } */ /* { dg-final { scan-assembler-times "vfmadd132ss" 120 } } */
/* { dg-final { scan-assembler-times "vfmsub132ss" 64 } } */ /* { dg-final { scan-assembler-times "vfmsub132ss" 120 } } */
/* { dg-final { scan-assembler-times "vfnmadd132ss" 64 } } */ /* { dg-final { scan-assembler-times "vfnmadd132ss" 120 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ss" 64 } } */ /* { dg-final { scan-assembler-times "vfnmsub132ss" 120 } } */
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
/* { dg-final { scan-assembler-times "vfmsub132ps" 8 } } */ /* { dg-final { scan-assembler-times "vfmsub132ps" 8 } } */
/* { dg-final { scan-assembler-times "vfnmadd132ps" 8 } } */ /* { dg-final { scan-assembler-times "vfnmadd132ps" 8 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ps" 8 } } */ /* { dg-final { scan-assembler-times "vfnmsub132ps" 8 } } */
/* { dg-final { scan-assembler-times "vfmadd132ss" 64 } } */ /* { dg-final { scan-assembler-times "vfmadd132ss" 120 } } */
/* { dg-final { scan-assembler-times "vfmsub132ss" 64 } } */ /* { dg-final { scan-assembler-times "vfmsub132ss" 120 } } */
/* { dg-final { scan-assembler-times "vfnmadd132ss" 64 } } */ /* { dg-final { scan-assembler-times "vfnmadd132ss" 120 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ss" 64 } } */ /* { dg-final { scan-assembler-times "vfnmsub132ss" 120 } } */
...@@ -1024,7 +1024,8 @@ vector_alignment_reachable_p (struct data_reference *dr) ...@@ -1024,7 +1024,8 @@ vector_alignment_reachable_p (struct data_reference *dr)
if (dump_enabled_p ()) if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"Unknown misalignment, is_packed = %d",is_packed); "Unknown misalignment, is_packed = %d",is_packed);
if (targetm.vectorize.vector_alignment_reachable (type, is_packed)) if ((TYPE_USER_ALIGN (type) && !is_packed)
|| targetm.vectorize.vector_alignment_reachable (type, is_packed))
return true; return true;
else else
return false; return false;
...@@ -1323,7 +1324,6 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) ...@@ -1323,7 +1324,6 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
bool stat; bool stat;
gimple stmt; gimple stmt;
stmt_vec_info stmt_info; stmt_vec_info stmt_info;
int vect_versioning_for_alias_required;
unsigned int npeel = 0; unsigned int npeel = 0;
bool all_misalignments_unknown = true; bool all_misalignments_unknown = true;
unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
...@@ -1510,15 +1510,8 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo) ...@@ -1510,15 +1510,8 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
} }
} }
vect_versioning_for_alias_required /* Check if we can possibly peel the loop. */
= LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo); if (!vect_can_advance_ivs_p (loop_vinfo)
/* Temporarily, if versioning for alias is required, we disable peeling
until we support peeling and versioning. Often peeling for alignment
will require peeling for loop-bound, which in turn requires that we
know how to adjust the loop ivs after the loop. */
if (vect_versioning_for_alias_required
|| !vect_can_advance_ivs_p (loop_vinfo)
|| !slpeel_can_duplicate_loop_p (loop, single_exit (loop))) || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
do_peeling = false; do_peeling = false;
...@@ -4722,7 +4715,8 @@ vect_supportable_dr_alignment (struct data_reference *dr, ...@@ -4722,7 +4715,8 @@ vect_supportable_dr_alignment (struct data_reference *dr,
if (!known_alignment_for_access_p (dr)) if (!known_alignment_for_access_p (dr))
is_packed = not_size_aligned (DR_REF (dr)); is_packed = not_size_aligned (DR_REF (dr));
if (targetm.vectorize. if ((TYPE_USER_ALIGN (type) && !is_packed)
|| targetm.vectorize.
support_vector_misalignment (mode, type, support_vector_misalignment (mode, type,
DR_MISALIGNMENT (dr), is_packed)) DR_MISALIGNMENT (dr), is_packed))
/* Can't software pipeline the loads, but can at least do them. */ /* Can't software pipeline the loads, but can at least do them. */
...@@ -4736,7 +4730,8 @@ vect_supportable_dr_alignment (struct data_reference *dr, ...@@ -4736,7 +4730,8 @@ vect_supportable_dr_alignment (struct data_reference *dr,
if (!known_alignment_for_access_p (dr)) if (!known_alignment_for_access_p (dr))
is_packed = not_size_aligned (DR_REF (dr)); is_packed = not_size_aligned (DR_REF (dr));
if (targetm.vectorize. if ((TYPE_USER_ALIGN (type) && !is_packed)
|| targetm.vectorize.
support_vector_misalignment (mode, type, support_vector_misalignment (mode, type,
DR_MISALIGNMENT (dr), is_packed)) DR_MISALIGNMENT (dr), is_packed))
return dr_unaligned_supported; return dr_unaligned_supported;
......
...@@ -1555,7 +1555,6 @@ vect_can_advance_ivs_p (loop_vec_info loop_vinfo) ...@@ -1555,7 +1555,6 @@ vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:"); dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:");
for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
{ {
tree access_fn = NULL;
tree evolution_part; tree evolution_part;
phi = gsi_stmt (gsi); phi = gsi_stmt (gsi);
...@@ -1588,31 +1587,13 @@ vect_can_advance_ivs_p (loop_vec_info loop_vinfo) ...@@ -1588,31 +1587,13 @@ vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
/* Analyze the evolution function. */ /* Analyze the evolution function. */
access_fn = instantiate_parameters evolution_part
(loop, analyze_scalar_evolution (loop, PHI_RESULT (phi))); = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
if (!access_fn)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"No Access function.");
return false;
}
STRIP_NOPS (access_fn);
if (dump_enabled_p ())
{
dump_printf_loc (MSG_NOTE, vect_location,
"Access function of PHI: ");
dump_generic_expr (MSG_NOTE, TDF_SLIM, access_fn);
}
evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
if (evolution_part == NULL_TREE) if (evolution_part == NULL_TREE)
{ {
if (dump_enabled_p ()) if (dump_enabled_p ())
dump_printf (MSG_MISSED_OPTIMIZATION, "No evolution."); dump_printf (MSG_MISSED_OPTIMIZATION,
"No access function or evolution.");
return false; return false;
} }
......
...@@ -5499,19 +5499,22 @@ vect_transform_loop (loop_vec_info loop_vinfo) ...@@ -5499,19 +5499,22 @@ vect_transform_loop (loop_vec_info loop_vinfo)
check_profitability = true; check_profitability = true;
} }
/* Peel the loop if there are data refs with unknown alignment. /* Version the loop first, if required, so the profitability check
Only one data ref with unknown store is allowed. */ comes first. */
if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo)) if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
|| LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
{ {
vect_do_peeling_for_alignment (loop_vinfo, th, check_profitability); vect_loop_versioning (loop_vinfo, th, check_profitability);
check_profitability = false; check_profitability = false;
} }
if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo) /* Peel the loop if there are data refs with unknown alignment.
|| LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo)) Only one data ref with unknown store is allowed. */
if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
{ {
vect_loop_versioning (loop_vinfo, th, check_profitability); vect_do_peeling_for_alignment (loop_vinfo, th, check_profitability);
check_profitability = false; check_profitability = false;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment