Commit fb607032 by Richard Biener Committed by Richard Biener

re PR tree-optimization/83232 (fma3d spec2000 regression on zen with -Ofast…

re PR tree-optimization/83232 (fma3d spec2000 regression on zen with -Ofast (generic tuning) after r255268 by missed SLP oppurtunity)

2017-12-01  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/83232
	* tree-vect-data-refs.c (vect_analyze_data_ref_accesses): Fix
	detection of same access. Instead of breaking the group here
	do not consider the duplicate.  Add comment explaining real fix.

	* gfortran.dg/vect/pr83232.f90: New testcase.

From-SVN: r255307
parent d5f9df6a
2017-12-01 Richard Biener <rguenther@suse.de>
PR tree-optimization/83232
* tree-vect-data-refs.c (vect_analyze_data_ref_accesses): Fix
detection of same access. Instead of breaking the group here
do not consider the duplicate. Add comment explaining real fix.
2017-12-01 Jonathan Wakely <jwakely@redhat.com> 2017-12-01 Jonathan Wakely <jwakely@redhat.com>
* doc/md.texi (Insn Splitting): Fix "central flowgraph" typo. * doc/md.texi (Insn Splitting): Fix "central flowgraph" typo.
2017-12-01 Richard Biener <rguenther@suse.de>
PR tree-optimization/83232
* gfortran.dg/vect/pr83232.f90: New testcase.
2017-12-01 Sudakshina Das <sudi.das@arm.com> 2017-12-01 Sudakshina Das <sudi.das@arm.com>
* gcc.target/arm/armv8_2-fp16-move-2.c: New test. * gcc.target/arm/armv8_2-fp16-move-2.c: New test.
......
! { dg-do compile }
! { dg-require-effective-target vect_double }
! { dg-additional-options "-funroll-loops --param vect-max-peeling-for-alignment=0 -fdump-tree-slp-details" }
SUBROUTINE MATERIAL_41_INTEGRATION ( STRESS,YLDC,EFPS, &
& DTnext,Dxx,Dyy,Dzz,Dxy,Dxz,Dyz,MatID,P1,P3 )
REAL(KIND(0D0)), INTENT(INOUT) :: STRESS(6)
REAL(KIND(0D0)), INTENT(IN) :: DTnext
REAL(KIND(0D0)), INTENT(IN) :: Dxx,Dyy,Dzz,Dxy,Dxz,Dyz
REAL(KIND(0D0)) :: Einc(6)
REAL(KIND(0D0)) :: P1,P3
Einc(1) = DTnext * Dxx ! (1)
Einc(2) = DTnext * Dyy
Einc(3) = DTnext * Dzz
Einc(4) = DTnext * Dxy
Einc(5) = DTnext * Dxz
Einc(6) = DTnext * Dyz
DO i = 1,6
STRESS(i) = STRESS(i) + P3*Einc(i)
ENDDO
STRESS(1) = STRESS(1) + (DTnext * P1 * (Dxx+Dyy+Dzz)) ! (2)
STRESS(2) = STRESS(2) + (DTnext * P1 * (Dxx+Dyy+Dzz))
STRESS(3) = 0.0
Einc(5) = 0.0 ! (3)
Einc(6) = 0.0
call foo (Einc)
END SUBROUTINE
! We should vectorize (1) and (2)
! { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "slp1" } }
! We fail to vectorize at (3), this can be fixed in the future
! { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "slp1" { xfail *-*-* } } }
...@@ -2841,10 +2841,6 @@ vect_analyze_data_ref_accesses (vec_info *vinfo) ...@@ -2841,10 +2841,6 @@ vect_analyze_data_ref_accesses (vec_info *vinfo)
if (data_ref_compare_tree (DR_STEP (dra), DR_STEP (drb)) != 0) if (data_ref_compare_tree (DR_STEP (dra), DR_STEP (drb)) != 0)
break; break;
/* Do not place the same access in the interleaving chain twice. */
if (tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb)) == 0)
break;
/* Check the types are compatible. /* Check the types are compatible.
??? We don't distinguish this during sorting. */ ??? We don't distinguish this during sorting. */
if (!types_compatible_p (TREE_TYPE (DR_REF (dra)), if (!types_compatible_p (TREE_TYPE (DR_REF (dra)),
...@@ -2854,7 +2850,25 @@ vect_analyze_data_ref_accesses (vec_info *vinfo) ...@@ -2854,7 +2850,25 @@ vect_analyze_data_ref_accesses (vec_info *vinfo)
/* Sorting has ensured that DR_INIT (dra) <= DR_INIT (drb). */ /* Sorting has ensured that DR_INIT (dra) <= DR_INIT (drb). */
HOST_WIDE_INT init_a = TREE_INT_CST_LOW (DR_INIT (dra)); HOST_WIDE_INT init_a = TREE_INT_CST_LOW (DR_INIT (dra));
HOST_WIDE_INT init_b = TREE_INT_CST_LOW (DR_INIT (drb)); HOST_WIDE_INT init_b = TREE_INT_CST_LOW (DR_INIT (drb));
gcc_assert (init_a <= init_b); HOST_WIDE_INT init_prev
= TREE_INT_CST_LOW (DR_INIT (datarefs_copy[i-1]));
gcc_assert (init_a <= init_b
&& init_a <= init_prev
&& init_prev <= init_b);
/* Do not place the same access in the interleaving chain twice. */
if (init_b == init_prev)
{
gcc_assert (gimple_uid (DR_STMT (datarefs_copy[i-1]))
< gimple_uid (DR_STMT (drb)));
/* ??? For now we simply "drop" the later reference which is
otherwise the same rather than finishing off this group.
In the end we'd want to re-process duplicates forming
multiple groups from the refs, likely by just collecting
all candidates (including duplicates and split points
below) in a vector and then process them together. */
continue;
}
/* If init_b == init_a + the size of the type * k, we have an /* If init_b == init_a + the size of the type * k, we have an
interleaving, and DRA is accessed before DRB. */ interleaving, and DRA is accessed before DRB. */
...@@ -2866,10 +2880,7 @@ vect_analyze_data_ref_accesses (vec_info *vinfo) ...@@ -2866,10 +2880,7 @@ vect_analyze_data_ref_accesses (vec_info *vinfo)
/* If we have a store, the accesses are adjacent. This splits /* If we have a store, the accesses are adjacent. This splits
groups into chunks we support (we don't support vectorization groups into chunks we support (we don't support vectorization
of stores with gaps). */ of stores with gaps). */
if (!DR_IS_READ (dra) if (!DR_IS_READ (dra) && init_b - init_prev != type_size_a)
&& (init_b - (HOST_WIDE_INT) TREE_INT_CST_LOW
(DR_INIT (datarefs_copy[i-1]))
!= type_size_a))
break; break;
/* If the step (if not zero or non-constant) is greater than the /* If the step (if not zero or non-constant) is greater than the
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment