Commit cbb22e61 by Bin Cheng Committed by Bin Cheng

re PR tree-optimization/78005 (172.mgrid and 450.soplex miscompare)

	PR tree-optimization/78005
	* tree-vect-loop-manip.c (vect_gen_prolog_loop_niters): Compute
	upper (included) bound for niters of prolog loop.
	(vect_gen_scalar_loop_niters): Change parameter VF to VFM1.
	Compute niters of scalar loop above which vectorized loop is
	preferred, as well as the upper (included) bound for the niters.
	(vect_do_peeling): Record niter bound for loops accordingly.

	gcc/testsuite
	PR tree-optimization/78005
	* gcc.dg/vect/pr78005.c: New.
	* gcc.target/i386/l_fma_float_1.c: Revise test.
	* gcc.target/i386/l_fma_float_2.c: Ditto.
	* gcc.target/i386/l_fma_float_3.c: Ditto.
	* gcc.target/i386/l_fma_float_4.c: Ditto.
	* gcc.target/i386/l_fma_float_5.c: Ditto.
	* gcc.target/i386/l_fma_float_6.c: Ditto.
	* gcc.target/i386/l_fma_double_1.c: Ditto.
	* gcc.target/i386/l_fma_double_2.c: Ditto.
	* gcc.target/i386/l_fma_double_3.c: Ditto.
	* gcc.target/i386/l_fma_double_4.c: Ditto.
	* gcc.target/i386/l_fma_double_5.c: Ditto.
	* gcc.target/i386/l_fma_double_6.c: Ditto.

From-SVN: r241339
parent 3b834a2e
2016-10-19 Bin Cheng <bin.cheng@arm.com>
PR tree-optimization/78005
* tree-vect-loop-manip.c (vect_gen_prolog_loop_niters): Compute
upper (included) bound for niters of prolog loop.
(vect_gen_scalar_loop_niters): Change parameter VF to VFM1.
Compute niters of scalar loop above which vectorized loop is
preferred, as well as the upper (included) bound for the niters.
(vect_do_peeling): Record niter bound for loops accordingly.
2016-10-19 Thomas Schwinge <thomas@codesourcery.com> 2016-10-19 Thomas Schwinge <thomas@codesourcery.com>
PR lto/77458 PR lto/77458
...@@ -1071,8 +1081,6 @@ ...@@ -1071,8 +1081,6 @@
(vect_can_advance_ivs_p): Call iv_phi_p. (vect_can_advance_ivs_p): Call iv_phi_p.
(vect_update_ivs_after_vectorizer): Call iv_phi_p. Directly insert (vect_update_ivs_after_vectorizer): Call iv_phi_p. Directly insert
new gimple stmts in basic block. new gimple stmts in basic block.
(vect_do_peeling_for_loop_bound):
(vect_do_peeling_for_alignment):
(vect_gen_niters_for_prolog_loop): Rename to... (vect_gen_niters_for_prolog_loop): Rename to...
(vect_gen_prolog_loop_niters): ...Rename from. Change parameters and (vect_gen_prolog_loop_niters): ...Rename from. Change parameters and
adjust implementation. adjust implementation.
......
2016-10-19 Bin Cheng <bin.cheng@arm.com>
PR tree-optimization/78005
* gcc.dg/vect/pr78005.c: New.
* gcc.target/i386/l_fma_float_1.c: Revise test.
* gcc.target/i386/l_fma_float_2.c: Ditto.
* gcc.target/i386/l_fma_float_3.c: Ditto.
* gcc.target/i386/l_fma_float_4.c: Ditto.
* gcc.target/i386/l_fma_float_5.c: Ditto.
* gcc.target/i386/l_fma_float_6.c: Ditto.
* gcc.target/i386/l_fma_double_1.c: Ditto.
* gcc.target/i386/l_fma_double_2.c: Ditto.
* gcc.target/i386/l_fma_double_3.c: Ditto.
* gcc.target/i386/l_fma_double_4.c: Ditto.
* gcc.target/i386/l_fma_double_5.c: Ditto.
* gcc.target/i386/l_fma_double_6.c: Ditto.
2016-10-19 Thomas Schwinge <thomas@codesourcery.com> 2016-10-19 Thomas Schwinge <thomas@codesourcery.com>
PR tree-optimization/78024 PR tree-optimization/78024
......
/* { dg-require-effective-target vect_int } */
#include "tree-vect.h"
#define N 20
int u[N] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19};
int z[N] = {-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18};
int res4[N] = {0, 1, 8, 3, 22, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19};
int res5[N] = {0, 1, 8, 3, 22, 5, 36, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19};
int res6[N] = {0, 1, 8, 3, 22, 5, 36, 7, 50, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19};
int res7[N] = {0, 1, 8, 3, 22, 5, 36, 7, 50, 9, 64, 11, 12, 13, 14, 15, 16, 17, 18, 19};
int res8[N] = {0, 1, 8, 3, 22, 5, 36, 7, 50, 9, 64, 11, 78, 13, 14, 15, 16, 17, 18, 19};
int res9[N] = {0, 1, 8, 3, 22, 5, 36, 7, 50, 9, 64, 11, 78, 13, 92, 15, 16, 17, 18, 19};
int res10[N] = {0, 1, 8, 3, 22, 5, 36, 7, 50, 9, 64, 11, 78, 13, 92, 15, 106, 17, 18, 19};
__attribute__ ((noinline)) void
foo (int n, int d)
{
int i;
for (i = 2; i < n; i++)
u[2*i-2] = u[2*i-2] + d * (z[i-1] + z[i] + z[i-1] + z[i] + z[i-1] + z[i]);
}
#define check_u(x) \
foo (x, 2); \
for (i = 0; i < N; i++) \
{ \
if (u[i] != res##x[i]) \
abort (); \
u[i] = i; \
}
int main(void)
{
int i;
check_vect ();
/* Need to check for all possible vector factors. */
check_u(4);
check_u(5);
check_u(6);
check_u(7);
check_u(8);
check_u(9);
check_u(10);
return 0;
}
...@@ -13,7 +13,7 @@ typedef double adouble __attribute__((aligned(sizeof (double)))); ...@@ -13,7 +13,7 @@ typedef double adouble __attribute__((aligned(sizeof (double))));
/* { dg-final { scan-assembler-times "vfmsub\[123\]+pd" 8 } } */ /* { dg-final { scan-assembler-times "vfmsub\[123\]+pd" 8 } } */
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+pd" 8 } } */ /* { dg-final { scan-assembler-times "vfnmadd\[123\]+pd" 8 } } */
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+pd" 8 } } */ /* { dg-final { scan-assembler-times "vfnmsub\[123\]+pd" 8 } } */
/* { dg-final { scan-assembler-times "vfmadd\[123\]+sd" 88 } } */ /* { dg-final { scan-assembler-times "vfmadd\[123\]+sd" 80 } } */
/* { dg-final { scan-assembler-times "vfmsub\[123\]+sd" 88 } } */ /* { dg-final { scan-assembler-times "vfmsub\[123\]+sd" 80 } } */
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+sd" 88 } } */ /* { dg-final { scan-assembler-times "vfnmadd\[123\]+sd" 80 } } */
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+sd" 88 } } */ /* { dg-final { scan-assembler-times "vfnmsub\[123\]+sd" 80 } } */
...@@ -13,7 +13,7 @@ typedef double adouble __attribute__((aligned(sizeof (double)))); ...@@ -13,7 +13,7 @@ typedef double adouble __attribute__((aligned(sizeof (double))));
/* { dg-final { scan-assembler-times "vfmsub\[123\]+pd" 8 } } */ /* { dg-final { scan-assembler-times "vfmsub\[123\]+pd" 8 } } */
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+pd" 8 } } */ /* { dg-final { scan-assembler-times "vfnmadd\[123\]+pd" 8 } } */
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+pd" 8 } } */ /* { dg-final { scan-assembler-times "vfnmsub\[123\]+pd" 8 } } */
/* { dg-final { scan-assembler-times "vfmadd\[123\]+sd" 88 } } */ /* { dg-final { scan-assembler-times "vfmadd\[123\]+sd" 80 } } */
/* { dg-final { scan-assembler-times "vfmsub\[123\]+sd" 88 } } */ /* { dg-final { scan-assembler-times "vfmsub\[123\]+sd" 80 } } */
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+sd" 88 } } */ /* { dg-final { scan-assembler-times "vfnmadd\[123\]+sd" 80 } } */
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+sd" 88 } } */ /* { dg-final { scan-assembler-times "vfnmsub\[123\]+sd" 80 } } */
...@@ -13,7 +13,7 @@ typedef double adouble __attribute__((aligned(sizeof (double)))); ...@@ -13,7 +13,7 @@ typedef double adouble __attribute__((aligned(sizeof (double))));
/* { dg-final { scan-assembler-times "vfmsub\[123\]+pd" 8 } } */ /* { dg-final { scan-assembler-times "vfmsub\[123\]+pd" 8 } } */
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+pd" 8 } } */ /* { dg-final { scan-assembler-times "vfnmadd\[123\]+pd" 8 } } */
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+pd" 8 } } */ /* { dg-final { scan-assembler-times "vfnmsub\[123\]+pd" 8 } } */
/* { dg-final { scan-assembler-times "vfmadd\[123\]+sd" 88 } } */ /* { dg-final { scan-assembler-times "vfmadd\[123\]+sd" 80 } } */
/* { dg-final { scan-assembler-times "vfmsub\[123\]+sd" 88 } } */ /* { dg-final { scan-assembler-times "vfmsub\[123\]+sd" 80 } } */
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+sd" 88 } } */ /* { dg-final { scan-assembler-times "vfnmadd\[123\]+sd" 80 } } */
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+sd" 88 } } */ /* { dg-final { scan-assembler-times "vfnmsub\[123\]+sd" 80 } } */
...@@ -13,7 +13,7 @@ typedef double adouble __attribute__((aligned(sizeof (double)))); ...@@ -13,7 +13,7 @@ typedef double adouble __attribute__((aligned(sizeof (double))));
/* { dg-final { scan-assembler-times "vfmsub\[123\]+pd" 8 } } */ /* { dg-final { scan-assembler-times "vfmsub\[123\]+pd" 8 } } */
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+pd" 8 } } */ /* { dg-final { scan-assembler-times "vfnmadd\[123\]+pd" 8 } } */
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+pd" 8 } } */ /* { dg-final { scan-assembler-times "vfnmsub\[123\]+pd" 8 } } */
/* { dg-final { scan-assembler-times "vfmadd\[123\]+sd" 88 } } */ /* { dg-final { scan-assembler-times "vfmadd\[123\]+sd" 80 } } */
/* { dg-final { scan-assembler-times "vfmsub\[123\]+sd" 88 } } */ /* { dg-final { scan-assembler-times "vfmsub\[123\]+sd" 80 } } */
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+sd" 88 } } */ /* { dg-final { scan-assembler-times "vfnmadd\[123\]+sd" 80 } } */
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+sd" 88 } } */ /* { dg-final { scan-assembler-times "vfnmsub\[123\]+sd" 80 } } */
...@@ -13,7 +13,7 @@ typedef double adouble __attribute__((aligned(sizeof (double)))); ...@@ -13,7 +13,7 @@ typedef double adouble __attribute__((aligned(sizeof (double))));
/* { dg-final { scan-assembler-times "vfmsub\[123\]+pd" 8 } } */ /* { dg-final { scan-assembler-times "vfmsub\[123\]+pd" 8 } } */
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+pd" 8 } } */ /* { dg-final { scan-assembler-times "vfnmadd\[123\]+pd" 8 } } */
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+pd" 8 } } */ /* { dg-final { scan-assembler-times "vfnmsub\[123\]+pd" 8 } } */
/* { dg-final { scan-assembler-times "vfmadd\[123\]+sd" 88 } } */ /* { dg-final { scan-assembler-times "vfmadd\[123\]+sd" 80 } } */
/* { dg-final { scan-assembler-times "vfmsub\[123\]+sd" 88 } } */ /* { dg-final { scan-assembler-times "vfmsub\[123\]+sd" 80 } } */
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+sd" 88 } } */ /* { dg-final { scan-assembler-times "vfnmadd\[123\]+sd" 80 } } */
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+sd" 88 } } */ /* { dg-final { scan-assembler-times "vfnmsub\[123\]+sd" 80 } } */
...@@ -13,7 +13,7 @@ typedef double adouble __attribute__((aligned(sizeof (double)))); ...@@ -13,7 +13,7 @@ typedef double adouble __attribute__((aligned(sizeof (double))));
/* { dg-final { scan-assembler-times "vfmsub\[123\]+pd" 8 } } */ /* { dg-final { scan-assembler-times "vfmsub\[123\]+pd" 8 } } */
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+pd" 8 } } */ /* { dg-final { scan-assembler-times "vfnmadd\[123\]+pd" 8 } } */
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+pd" 8 } } */ /* { dg-final { scan-assembler-times "vfnmsub\[123\]+pd" 8 } } */
/* { dg-final { scan-assembler-times "vfmadd\[123\]+sd" 88 } } */ /* { dg-final { scan-assembler-times "vfmadd\[123\]+sd" 80 } } */
/* { dg-final { scan-assembler-times "vfmsub\[123\]+sd" 88 } } */ /* { dg-final { scan-assembler-times "vfmsub\[123\]+sd" 80 } } */
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+sd" 88 } } */ /* { dg-final { scan-assembler-times "vfnmadd\[123\]+sd" 80 } } */
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+sd" 88 } } */ /* { dg-final { scan-assembler-times "vfnmsub\[123\]+sd" 80 } } */
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
/* { dg-final { scan-assembler-times "vfmsub\[123\]+ps" 8 } } */ /* { dg-final { scan-assembler-times "vfmsub\[123\]+ps" 8 } } */
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ps" 8 } } */ /* { dg-final { scan-assembler-times "vfnmadd\[123\]+ps" 8 } } */
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ps" 8 } } */ /* { dg-final { scan-assembler-times "vfnmsub\[123\]+ps" 8 } } */
/* { dg-final { scan-assembler-times "vfmadd\[123\]+ss" 184 } } */ /* { dg-final { scan-assembler-times "vfmadd\[123\]+ss" 176 } } */
/* { dg-final { scan-assembler-times "vfmsub\[123\]+ss" 184 } } */ /* { dg-final { scan-assembler-times "vfmsub\[123\]+ss" 176 } } */
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ss" 184 } } */ /* { dg-final { scan-assembler-times "vfnmadd\[123\]+ss" 176 } } */
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ss" 184 } } */ /* { dg-final { scan-assembler-times "vfnmsub\[123\]+ss" 176 } } */
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
/* { dg-final { scan-assembler-times "vfmsub\[123\]+ps" 8 } } */ /* { dg-final { scan-assembler-times "vfmsub\[123\]+ps" 8 } } */
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ps" 8 } } */ /* { dg-final { scan-assembler-times "vfnmadd\[123\]+ps" 8 } } */
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ps" 8 } } */ /* { dg-final { scan-assembler-times "vfnmsub\[123\]+ps" 8 } } */
/* { dg-final { scan-assembler-times "vfmadd\[123\]+ss" 184 } } */ /* { dg-final { scan-assembler-times "vfmadd\[123\]+ss" 176 } } */
/* { dg-final { scan-assembler-times "vfmsub\[123\]+ss" 184 } } */ /* { dg-final { scan-assembler-times "vfmsub\[123\]+ss" 176 } } */
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ss" 184 } } */ /* { dg-final { scan-assembler-times "vfnmadd\[123\]+ss" 176 } } */
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ss" 184 } } */ /* { dg-final { scan-assembler-times "vfnmsub\[123\]+ss" 176 } } */
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
/* { dg-final { scan-assembler-times "vfmsub\[123\]+ps" 8 } } */ /* { dg-final { scan-assembler-times "vfmsub\[123\]+ps" 8 } } */
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ps" 8 } } */ /* { dg-final { scan-assembler-times "vfnmadd\[123\]+ps" 8 } } */
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ps" 8 } } */ /* { dg-final { scan-assembler-times "vfnmsub\[123\]+ps" 8 } } */
/* { dg-final { scan-assembler-times "vfmadd\[123\]+ss" 184 } } */ /* { dg-final { scan-assembler-times "vfmadd\[123\]+ss" 176 } } */
/* { dg-final { scan-assembler-times "vfmsub\[123\]+ss" 184 } } */ /* { dg-final { scan-assembler-times "vfmsub\[123\]+ss" 176 } } */
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ss" 184 } } */ /* { dg-final { scan-assembler-times "vfnmadd\[123\]+ss" 176 } } */
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ss" 184 } } */ /* { dg-final { scan-assembler-times "vfnmsub\[123\]+ss" 176 } } */
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
/* { dg-final { scan-assembler-times "vfmsub\[123\]+ps" 8 } } */ /* { dg-final { scan-assembler-times "vfmsub\[123\]+ps" 8 } } */
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ps" 8 } } */ /* { dg-final { scan-assembler-times "vfnmadd\[123\]+ps" 8 } } */
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ps" 8 } } */ /* { dg-final { scan-assembler-times "vfnmsub\[123\]+ps" 8 } } */
/* { dg-final { scan-assembler-times "vfmadd\[123\]+ss" 184 } } */ /* { dg-final { scan-assembler-times "vfmadd\[123\]+ss" 176 } } */
/* { dg-final { scan-assembler-times "vfmsub\[123\]+ss" 184 } } */ /* { dg-final { scan-assembler-times "vfmsub\[123\]+ss" 176 } } */
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ss" 184 } } */ /* { dg-final { scan-assembler-times "vfnmadd\[123\]+ss" 176 } } */
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ss" 184 } } */ /* { dg-final { scan-assembler-times "vfnmsub\[123\]+ss" 176 } } */
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
/* { dg-final { scan-assembler-times "vfmsub\[123\]+ps" 8 } } */ /* { dg-final { scan-assembler-times "vfmsub\[123\]+ps" 8 } } */
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ps" 8 } } */ /* { dg-final { scan-assembler-times "vfnmadd\[123\]+ps" 8 } } */
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ps" 8 } } */ /* { dg-final { scan-assembler-times "vfnmsub\[123\]+ps" 8 } } */
/* { dg-final { scan-assembler-times "vfmadd\[123\]+ss" 184 } } */ /* { dg-final { scan-assembler-times "vfmadd\[123\]+ss" 176 } } */
/* { dg-final { scan-assembler-times "vfmsub\[123\]+ss" 184 } } */ /* { dg-final { scan-assembler-times "vfmsub\[123\]+ss" 176 } } */
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ss" 184 } } */ /* { dg-final { scan-assembler-times "vfnmadd\[123\]+ss" 176 } } */
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ss" 184 } } */ /* { dg-final { scan-assembler-times "vfnmsub\[123\]+ss" 176 } } */
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
/* { dg-final { scan-assembler-times "vfmsub\[123\]+ps" 8 } } */ /* { dg-final { scan-assembler-times "vfmsub\[123\]+ps" 8 } } */
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ps" 8 } } */ /* { dg-final { scan-assembler-times "vfnmadd\[123\]+ps" 8 } } */
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ps" 8 } } */ /* { dg-final { scan-assembler-times "vfnmsub\[123\]+ps" 8 } } */
/* { dg-final { scan-assembler-times "vfmadd\[123\]+ss" 184 } } */ /* { dg-final { scan-assembler-times "vfmadd\[123\]+ss" 176 } } */
/* { dg-final { scan-assembler-times "vfmsub\[123\]+ss" 184 } } */ /* { dg-final { scan-assembler-times "vfmsub\[123\]+ss" 176 } } */
/* { dg-final { scan-assembler-times "vfnmadd\[123\]+ss" 184 } } */ /* { dg-final { scan-assembler-times "vfnmadd\[123\]+ss" 176 } } */
/* { dg-final { scan-assembler-times "vfnmsub\[123\]+ss" 184 } } */ /* { dg-final { scan-assembler-times "vfnmsub\[123\]+ss" 176 } } */
...@@ -904,7 +904,7 @@ vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, ...@@ -904,7 +904,7 @@ vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo,
is the inner type of the vectype) is the inner type of the vectype)
The computations will be emitted at the end of BB. We also compute and The computations will be emitted at the end of BB. We also compute and
store upper bound of the result in BOUND. store upper bound (included) of the result in BOUND.
When the step of the data-ref in the loop is not 1 (as in interleaved data When the step of the data-ref in the loop is not 1 (as in interleaved data
and SLP), the number of iterations of the prolog must be divided by the step and SLP), the number of iterations of the prolog must be divided by the step
...@@ -941,7 +941,7 @@ vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo, ...@@ -941,7 +941,7 @@ vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
"known peeling = %d.\n", npeel); "known peeling = %d.\n", npeel);
iters = build_int_cst (niters_type, npeel); iters = build_int_cst (niters_type, npeel);
*bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) + 1; *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
} }
else else
{ {
...@@ -976,7 +976,7 @@ vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo, ...@@ -976,7 +976,7 @@ vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign); iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1); iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
iters = fold_convert (niters_type, iters); iters = fold_convert (niters_type, iters);
*bound = nelements; *bound = nelements - 1;
} }
if (dump_enabled_p ()) if (dump_enabled_p ())
...@@ -1090,43 +1090,47 @@ vect_build_loop_niters (loop_vec_info loop_vinfo) ...@@ -1090,43 +1090,47 @@ vect_build_loop_niters (loop_vec_info loop_vinfo)
} }
} }
/* Calculate the number of iterations under which scalar loop will be /* Calculate the number of iterations above which vectorized loop will be
preferred than vectorized loop. NITERS_PROLOG is the number of preferred than scalar loop. NITERS_PROLOG is the number of iterations
iterations of prolog loop. If it's integer const, the integer of prolog loop. If it's integer const, the integer number is also passed
number is also passed by INT_NITERS_PROLOG. VF is vector factor; in INT_NITERS_PROLOG. BOUND_PROLOG is the upper bound (included) of
TH is the threshold for vectorized loop if CHECK_PROFITABILITY is number of iterations of prolog loop. VFM1 is vector factor minus one.
true. This function also store upper bound of the result in BOUND. */ If CHECK_PROFITABILITY is true, TH is the threshold below which scalar
(rather than vectorized) loop will be executed. This function stores
upper bound (included) of the result in BOUND_SCALAR. */
static tree static tree
vect_gen_scalar_loop_niters (tree niters_prolog, int int_niters_prolog, vect_gen_scalar_loop_niters (tree niters_prolog, int int_niters_prolog,
int bound_prolog, int vf, int th, int *bound, int bound_prolog, int vfm1, int th,
bool check_profitability) int *bound_scalar, bool check_profitability)
{ {
tree type = TREE_TYPE (niters_prolog); tree type = TREE_TYPE (niters_prolog);
tree niters = fold_build2 (PLUS_EXPR, type, niters_prolog, tree niters = fold_build2 (PLUS_EXPR, type, niters_prolog,
build_int_cst (type, vf)); build_int_cst (type, vfm1));
*bound = vf + bound_prolog; *bound_scalar = vfm1 + bound_prolog;
if (check_profitability) if (check_profitability)
{ {
th++; /* TH indicates the minimum niters of vectorized loop, while we
compute the maximum niters of scalar loop. */
th--;
/* Peeling for constant times. */ /* Peeling for constant times. */
if (int_niters_prolog >= 0) if (int_niters_prolog >= 0)
{ {
*bound = (int_niters_prolog + vf < th *bound_scalar = (int_niters_prolog + vfm1 < th
? th ? th
: vf + int_niters_prolog); : vfm1 + int_niters_prolog);
return build_int_cst (type, *bound); return build_int_cst (type, *bound_scalar);
} }
/* Peeling for unknown times, in this case, prolog loop must /* Peeling for unknown times. Note BOUND_PROLOG is the upper
execute less than bound_prolog times. */ bound (inlcuded) of niters of prolog loop. */
if (th >= vf + bound_prolog - 1) if (th >= vfm1 + bound_prolog)
{ {
*bound = th; *bound_scalar = th;
return build_int_cst (type, th); return build_int_cst (type, th);
} }
/* Need to do runtime comparison, but bound remains the same. */ /* Need to do runtime comparison, but BOUND_SCALAR remains the same. */
else if (th > vf) else if (th > vfm1)
return fold_build2 (MAX_EXPR, type, build_int_cst (type, th), niters); return fold_build2 (MAX_EXPR, type, build_int_cst (type, th), niters);
} }
return niters; return niters;
...@@ -1620,7 +1624,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, ...@@ -1620,7 +1624,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
tree type = TREE_TYPE (niters), guard_cond; tree type = TREE_TYPE (niters), guard_cond;
basic_block guard_bb, guard_to; basic_block guard_bb, guard_to;
int prob_prolog, prob_vector, prob_epilog; int prob_prolog, prob_vector, prob_epilog;
int bound_prolog = 0, bound_epilog = 0, bound = 0; int bound_prolog = 0, bound_scalar = 0, bound = 0;
int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
int prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo); int prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
bool epilog_peeling = (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) bool epilog_peeling = (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
...@@ -1721,9 +1725,9 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, ...@@ -1721,9 +1725,9 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
LOOP_VINFO_NITERSM1 (loop_vinfo), niters_prolog); LOOP_VINFO_NITERSM1 (loop_vinfo), niters_prolog);
niters = vect_build_loop_niters (loop_vinfo); niters = vect_build_loop_niters (loop_vinfo);
/* Prolog iterates at most bound_prolog - 1 times, latch iterates /* Prolog iterates at most bound_prolog times, latch iterates at
at most bound_prolog - 2 times. */ most bound_prolog - 1 times. */
record_niter_bound (prolog, bound_prolog - 2, false, true); record_niter_bound (prolog, bound_prolog - 1, false, true);
delete_update_ssa (); delete_update_ssa ();
adjust_vec_debug_stmts (); adjust_vec_debug_stmts ();
scev_reset (); scev_reset ();
...@@ -1754,16 +1758,15 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, ...@@ -1754,16 +1758,15 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
won't be vectorized. */ won't be vectorized. */
if (skip_vector) if (skip_vector)
{ {
/* Guard_cond needs is based on NITERSM1 because NITERS might /* Additional epilogue iteration is peeled if gap exists. */
overflow, so here it is niters_scalar - 1 generated. In bool peel_for_gaps = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo);
other words, both niters_scalar and bound_epilog are for
scalar loop's latch. */
tree t = vect_gen_scalar_loop_niters (niters_prolog, prolog_peeling, tree t = vect_gen_scalar_loop_niters (niters_prolog, prolog_peeling,
bound_prolog, vf - 1, th - 1, bound_prolog,
&bound_epilog, peel_for_gaps ? vf : vf - 1,
th, &bound_scalar,
check_profitability); check_profitability);
guard_cond = fold_build2 (LT_EXPR, boolean_type_node, /* Build guard against NITERSM1 since NITERS may overflow. */
nitersm1, t); guard_cond = fold_build2 (LT_EXPR, boolean_type_node, nitersm1, t);
guard_bb = anchor; guard_bb = anchor;
guard_to = split_edge (loop_preheader_edge (epilog)); guard_to = split_edge (loop_preheader_edge (epilog));
guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
...@@ -1772,7 +1775,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, ...@@ -1772,7 +1775,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
e = EDGE_PRED (guard_to, 0); e = EDGE_PRED (guard_to, 0);
e = (e != guard_e ? e : EDGE_PRED (guard_to, 1)); e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e); slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e);
scale_loop_profile (epilog, prob_vector, bound_epilog); scale_loop_profile (epilog, prob_vector, bound_scalar);
} }
tree niters_vector_mult_vf; tree niters_vector_mult_vf;
...@@ -1807,10 +1810,10 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, ...@@ -1807,10 +1810,10 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
else else
slpeel_update_phi_nodes_for_lcssa (epilog); slpeel_update_phi_nodes_for_lcssa (epilog);
bound = (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) ? vf * 2 : vf) - 2; bound = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) ? vf - 1 : vf - 2;
/* We share epilog loop with scalar version loop. */ /* We share epilog loop with scalar version loop. */
bound_epilog = MAX (bound, bound_epilog - 1); bound = MAX (bound, bound_scalar - 1);
record_niter_bound (epilog, bound_epilog, false, true); record_niter_bound (epilog, bound, false, true);
delete_update_ssa (); delete_update_ssa ();
adjust_vec_debug_stmts (); adjust_vec_debug_stmts ();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment