Commit a1f072e2 by Richard Biener Committed by Richard Biener

re PR tree-optimization/87105 (Autovectorization [X86, SSE2, AVX2, DoublePrecision])

2018-10-26  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/87105
	* tree-vectorizer.h (_slp_tree::refcnt): New member.
	* tree-vect-slp.c (vect_free_slp_tree): Decrement and honor
	refcnt.
	(vect_create_new_slp_node): Initialize refcnt to one.
	(bst_traits): Move.
	(scalar_stmts_set_t, bst_fail): Remove.
	(vect_build_slp_tree_2): Add bst_map argument and adjust calls.
	(vect_build_slp_tree): Add bst_map argument and lookup
	already created SLP nodes.
	(vect_print_slp_tree): Handle a SLP graph, print SLP node
	addresses.
	(vect_slp_rearrange_stmts): Handle a SLP graph.
	(vect_analyze_slp_instance): Adjust and free SLP nodes from
	the CSE map.  Fix indenting.
	(vect_schedule_slp_instance): Add short-cut.

	* g++.dg/vect/slp-pr87105.cc: Adjust.
	* gcc.dg/torture/20181024-1.c: New testcase.
	* g++.dg/opt/20181025-1.C: Likewise.

From-SVN: r265522
parent 74ca1c01
2018-10-26 Richard Biener <rguenther@suse.de>
PR tree-optimization/87105
* tree-vectorizer.h (_slp_tree::refcnt): New member.
* tree-vect-slp.c (vect_free_slp_tree): Decrement and honor
refcnt.
(vect_create_new_slp_node): Initialize refcnt to one.
(bst_traits): Move.
(scalar_stmts_set_t, bst_fail): Remove.
(vect_build_slp_tree_2): Add bst_map argument and adjust calls.
(vect_build_slp_tree): Add bst_map argument and lookup
already created SLP nodes.
(vect_print_slp_tree): Handle a SLP graph, print SLP node
addresses.
(vect_slp_rearrange_stmts): Handle a SLP graph.
(vect_analyze_slp_instance): Adjust and free SLP nodes from
the CSE map. Fix indenting.
(vect_schedule_slp_instance): Add short-cut.
2018-10-26 Martin Liska <mliska@suse.cz>
PR testsuite/86158
2018-10-26 Richard Biener <rguenther@suse.de>
PR tree-optimization/87105
* g++.dg/vect/slp-pr87105.cc: Adjust.
* gcc.dg/torture/20181024-1.c: New testcase.
* g++.dg/opt/20181025-1.C: Likewise.
2018-10-26 Richard Biener <rguenther@suse.de>
PR testsuite/87754
* g++.dg/lto/odr-1_0.C: Fix pattern.
......
// { dg-do compile }
// { dg-options "-Ofast" }
template <typename Number>
class Vector {
typedef Number value_type;
typedef const value_type *const_iterator;
Number norm_sqr () const;
const_iterator begin () const;
unsigned int dim;
};
template <typename Number>
static inline Number
local_sqr (const Number x)
{
return x*x;
}
template <typename Number>
Number
Vector<Number>::norm_sqr () const
{
Number sum0 = 0, sum1 = 0, sum2 = 0, sum3 = 0;
const_iterator ptr = begin(), eptr = ptr + (dim/4)*4;
while (ptr!=eptr)
{
sum0 += ::local_sqr(*ptr++);
sum1 += ::local_sqr(*ptr++);
}
return sum0+sum1+sum2+sum3;
}
template class Vector<double>;
......@@ -2,7 +2,7 @@
// { dg-require-effective-target c++11 }
// { dg-require-effective-target vect_double }
// For MIN/MAX recognition
// { dg-additional-options "-ffast-math -fvect-cost-model" }
// { dg-additional-options "-ffast-math" }
#include <algorithm>
#include <cmath>
......@@ -99,6 +99,7 @@ void quadBoundingBoxA(const Point bez[3], Box& bBox) noexcept {
// We should have if-converted everything down to straight-line code
// { dg-final { scan-tree-dump-times "<bb \[0-9\]+>" 1 "slp2" } }
// We fail to elide an earlier store which makes us not handle a later
// duplicate one for vectorization.
// { dg-final { scan-tree-dump-times "basic block part vectorized" 1 "slp2" { xfail *-*-* } } }
// { dg-final { scan-tree-dump-times "basic block part vectorized" 1 "slp2" } }
// It's a bit awkward to detect that all stores were vectorized but the
// following more or less does the trick
// { dg-final { scan-tree-dump "vect_iftmp\[^\r\m\]* = MIN" "slp2" } }
/* { dg-do compile } */
/* { dg-additional-options "-march=core-avx2" { target { x86_64-*-* i?86-*-* } } } */
typedef enum {
C = 0, N, S, E, W, T, B, NE, NW, SE, SW, NT, NB, ST, SB, ET, EB, WT, WB, FLAGS, N_CELL_ENTRIES} CELL_ENTRIES;
typedef double LBM_Grid[(130)*100*100*N_CELL_ENTRIES];
void foo( LBM_Grid srcGrid )
{
double ux , uy , uz , rho , ux1, uy1, uz1, rho1, ux2, uy2, uz2, rho2, u2, px, py;
int i;
for( i = 0;
i < (N_CELL_ENTRIES*( 100*100));
i += N_CELL_ENTRIES )
{
rho1 = + ((srcGrid)[((C)+N_CELL_ENTRIES*( 100*100))+(i)])
+ ((srcGrid)[((N)+N_CELL_ENTRIES*( 100*100))+(i)])
+ ((srcGrid)[((S)+N_CELL_ENTRIES*( 100*100))+(i)])
+ ((srcGrid)[((E)+N_CELL_ENTRIES*( 100*100))+(i)])
+ ((srcGrid)[((W)+N_CELL_ENTRIES*( 100*100))+(i)])
+ ((srcGrid)[((T)+N_CELL_ENTRIES*( 100*100))+(i)])
+ ((srcGrid)[((B)+N_CELL_ENTRIES*( 100*100))+(i)])
+ ((srcGrid)[((NE)+N_CELL_ENTRIES*( 100*100))+(i)])
+ ((srcGrid)[((NW)+N_CELL_ENTRIES*( 100*100))+(i)])
+ ((srcGrid)[((SE)+N_CELL_ENTRIES*( 100*100))+(i)])
+ ((srcGrid)[((SW)+N_CELL_ENTRIES*( 100*100))+(i)])
+ ((srcGrid)[((NT)+N_CELL_ENTRIES*( 100*100))+(i)])
+ ((srcGrid)[((NB)+N_CELL_ENTRIES*( 100*100))+(i)])
+ ((srcGrid)[((ST)+N_CELL_ENTRIES*( 100*100))+(i)])
+ ((srcGrid)[((SB)+N_CELL_ENTRIES*( 100*100))+(i)])
+ ((srcGrid)[((ET)+N_CELL_ENTRIES*( 100*100))+(i)])
+ ((srcGrid)[((EB)+N_CELL_ENTRIES*( 100*100))+(i)])
+ ((srcGrid)[((WT)+N_CELL_ENTRIES*( 100*100))+(i)])
+ ((srcGrid)[((WB)+N_CELL_ENTRIES*( 100*100))+(i)]);
rho = 2.0*rho1 - rho2;
px = (((i / N_CELL_ENTRIES) % 100) / (0.5*(100-1))) - 1.0;
uz = 0.01 * (1.0-px*px) * (1.0-py*py);
u2 = 1.5 * (ux*ux + uy*uy + uz*uz);
(((srcGrid)[((C))+(i)])) = (1.0/ 3.0)*rho*(1.0 - u2);
(((srcGrid)[((N))+(i)])) = (1.0/18.0)*rho*(1.0 + uy*(4.5*uy + 3.0) - u2);
}
}
......@@ -130,6 +130,8 @@ struct _slp_tree {
scalar elements in one scalar iteration (GROUP_SIZE) multiplied by VF
divided by vector size. */
unsigned int vec_stmts_size;
/* Reference count in the SLP graph. */
unsigned int refcnt;
/* Whether the scalar computations use two different operators. */
bool two_operators;
/* The DEF type of this node. */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment