Commit 1612b1fe by Jakub Jelinek Committed by Jakub Jelinek

omp-low.c (lower_rec_simd_input_clauses): Add rvar2 argument...

	* omp-low.c (lower_rec_simd_input_clauses): Add rvar2 argument,
	create another "omp scan inscan exclusive" array if
	!ctx->scan_inclusive.
	(lower_rec_input_clauses): Handle exclusive scan inscan reductions.
	(lower_omp_scan): Likewise.
	* tree-vectorizer.h (struct _stmt_vec_info): Use 3-bit instead of
	2-bit bitfield for simd_lane_access_p member.
	* tree-vect-data-refs.c (vect_analyze_data_refs): Also handle
	aux == (void *)-4 as simd lane access.
	* tree-vect-stmts.c (check_scan_store): Handle exclusive scan.  Update
	comment with permutations to show the canonical permutation order.
	(vectorizable_scan_store): Handle exclusive scan.
	(vectorizable_store): Call vectorizable_scan_store even for
	STMT_VINFO_SIMD_LANE_ACCESS_P > 3.

	* gcc.dg/vect/vect-simd-12.c: New test.
	* gcc.dg/vect/vect-simd-13.c: New test.
	* gcc.dg/vect/vect-simd-14.c: New test.
	* gcc.dg/vect/vect-simd-15.c: New test.
	* gcc.target/i386/sse2-vect-simd-12.c: New test.
	* gcc.target/i386/sse2-vect-simd-13.c: New test.
	* gcc.target/i386/sse2-vect-simd-14.c: New test.
	* gcc.target/i386/sse2-vect-simd-15.c: New test.
	* gcc.target/i386/avx2-vect-simd-12.c: New test.
	* gcc.target/i386/avx2-vect-simd-13.c: New test.
	* gcc.target/i386/avx2-vect-simd-14.c: New test.
	* gcc.target/i386/avx2-vect-simd-15.c: New test.
	* gcc.target/i386/avx512f-vect-simd-12.c: New test.
	* gcc.target/i386/avx512f-vect-simd-13.c: New test.
	* gcc.target/i386/avx512f-vect-simd-14.c: New test.
	* gcc.target/i386/avx512bw-vect-simd-15.c: New test.
	* g++.dg/vect/simd-6.cc: New test.
	* g++.dg/vect/simd-7.cc: New test.
	* g++.dg/vect/simd-8.cc: New test.
	* g++.dg/vect/simd-9.cc: New test.
	* c-c++-common/gomp/scan-2.c: Don't expect any diagnostics.

From-SVN: r272544
parent e73fb06d
2019-06-21 Jakub Jelinek <jakub@redhat.com> 2019-06-21 Jakub Jelinek <jakub@redhat.com>
* omp-low.c (lower_rec_simd_input_clauses): Add rvar2 argument,
create another "omp scan inscan exclusive" array if
!ctx->scan_inclusive.
(lower_rec_input_clauses): Handle exclusive scan inscan reductions.
(lower_omp_scan): Likewise.
* tree-vectorizer.h (struct _stmt_vec_info): Use 3-bit instead of
2-bit bitfield for simd_lane_access_p member.
* tree-vect-data-refs.c (vect_analyze_data_refs): Also handle
aux == (void *)-4 as simd lane access.
* tree-vect-stmts.c (check_scan_store): Handle exclusive scan. Update
comment with permutations to show the canonical permutation order.
(vectorizable_scan_store): Handle exclusive scan.
(vectorizable_store): Call vectorizable_scan_store even for
STMT_VINFO_SIMD_LANE_ACCESS_P > 3.
* tree-vect-data-refs.c (vect_find_stmt_data_reference): Handle * tree-vect-data-refs.c (vect_find_stmt_data_reference): Handle
"omp simd array" arrays with one byte elements. "omp simd array" arrays with one byte elements.
......
2019-06-21 Jakub Jelinek <jakub@redhat.com> 2019-06-21 Jakub Jelinek <jakub@redhat.com>
* gcc.dg/vect/vect-simd-12.c: New test.
* gcc.dg/vect/vect-simd-13.c: New test.
* gcc.dg/vect/vect-simd-14.c: New test.
* gcc.dg/vect/vect-simd-15.c: New test.
* gcc.target/i386/sse2-vect-simd-12.c: New test.
* gcc.target/i386/sse2-vect-simd-13.c: New test.
* gcc.target/i386/sse2-vect-simd-14.c: New test.
* gcc.target/i386/sse2-vect-simd-15.c: New test.
* gcc.target/i386/avx2-vect-simd-12.c: New test.
* gcc.target/i386/avx2-vect-simd-13.c: New test.
* gcc.target/i386/avx2-vect-simd-14.c: New test.
* gcc.target/i386/avx2-vect-simd-15.c: New test.
* gcc.target/i386/avx512f-vect-simd-12.c: New test.
* gcc.target/i386/avx512f-vect-simd-13.c: New test.
* gcc.target/i386/avx512f-vect-simd-14.c: New test.
* gcc.target/i386/avx512bw-vect-simd-15.c: New test.
* g++.dg/vect/simd-6.cc: New test.
* g++.dg/vect/simd-7.cc: New test.
* g++.dg/vect/simd-8.cc: New test.
* g++.dg/vect/simd-9.cc: New test.
* c-c++-common/gomp/scan-2.c: Don't expect any diagnostics.
PR c++/90950 PR c++/90950
* g++.dg/gomp/lastprivate-1.C: New test. * g++.dg/gomp/lastprivate-1.C: New test.
......
...@@ -8,7 +8,7 @@ f1 (int *c, int *d) ...@@ -8,7 +8,7 @@ f1 (int *c, int *d)
for (i = 0; i < 64; i++) for (i = 0; i < 64; i++)
{ {
d[i] = a; d[i] = a;
#pragma omp scan exclusive (a) /* { dg-message "sorry, unimplemented: '#pragma omp scan' not supported yet" } */ #pragma omp scan exclusive (a)
a += c[i]; a += c[i];
} }
} }
// { dg-require-effective-target size32plus }
// { dg-additional-options "-fopenmp-simd" }
// { dg-additional-options "-mavx" { target avx_runtime } }
// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { xfail *-*-* } } }
#include "../../gcc.dg/vect/tree-vect.h"
template <typename T>
struct S {
inline S ();
inline ~S ();
inline S (const S &);
inline S & operator= (const S &);
T s;
};
template <typename T>
S<T>::S () : s (0)
{
}
template <typename T>
S<T>::~S ()
{
}
template <typename T>
S<T>::S (const S &x)
{
s = x.s;
}
template <typename T>
S<T> &
S<T>::operator= (const S &x)
{
s = x.s;
return *this;
}
template <typename T>
static inline void
ini (S<T> &x)
{
x.s = 0;
}
S<int> r, a[1024], b[1024];
#pragma omp declare reduction (+: S<int>: omp_out.s += omp_in.s)
#pragma omp declare reduction (plus: S<int>: omp_out.s += omp_in.s) initializer (ini (omp_priv))
template <typename T>
__attribute__((noipa)) void
foo (S<T> *a, S<T> *b)
{
#pragma omp simd reduction (inscan, +:r)
for (int i = 0; i < 1024; i++)
{
b[i] = r;
#pragma omp scan exclusive(r)
r.s += a[i].s;
}
}
template <typename T>
__attribute__((noipa)) S<T>
bar (void)
{
S<T> s;
#pragma omp simd reduction (inscan, plus:s)
for (int i = 0; i < 1024; i++)
{
b[i] = s;
#pragma omp scan exclusive(s)
s.s += 2 * a[i].s;
}
return S<T> (s);
}
__attribute__((noipa)) void
baz (S<int> *a, S<int> *b)
{
#pragma omp simd reduction (inscan, +:r) simdlen(1)
for (int i = 0; i < 1024; i++)
{
b[i] = r;
#pragma omp scan exclusive(r)
r.s += a[i].s;
}
}
__attribute__((noipa)) S<int>
qux (void)
{
S<int> s;
#pragma omp simd if (0) reduction (inscan, plus:s)
for (int i = 0; i < 1024; i++)
{
b[i] = s;
#pragma omp scan exclusive(s)
s.s += 2 * a[i].s;
}
return S<int> (s);
}
int
main ()
{
S<int> s;
check_vect ();
for (int i = 0; i < 1024; ++i)
{
a[i].s = i;
b[i].s = -1;
asm ("" : "+g" (i));
}
foo (a, b);
if (r.s != 1024 * 1023 / 2)
abort ();
for (int i = 0; i < 1024; ++i)
{
if (b[i].s != s.s)
abort ();
else
b[i].s = 25;
s.s += i;
}
if (bar<int> ().s != 1024 * 1023)
abort ();
s.s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i].s != s.s)
abort ();
s.s += 2 * i;
}
r.s = 0;
baz (a, b);
if (r.s != 1024 * 1023 / 2)
abort ();
s.s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i].s != s.s)
abort ();
else
b[i].s = 25;
s.s += i;
}
if (qux ().s != 1024 * 1023)
abort ();
s.s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i].s != s.s)
abort ();
s.s += 2 * i;
}
return 0;
}
// { dg-require-effective-target size32plus }
// { dg-additional-options "-fopenmp-simd" }
// { dg-additional-options "-mavx" { target avx_runtime } }
// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */
#include "../../gcc.dg/vect/tree-vect.h"
int r, a[1024], b[1024], q;
template <typename T, typename U>
__attribute__((noipa)) void
foo (T a, T b, U r)
{
#pragma omp simd reduction (inscan, +:r)
for (int i = 0; i < 1024; i++)
{
b[i] = r;
#pragma omp scan exclusive(r)
r += a[i];
}
}
template <typename T>
__attribute__((noipa)) T
bar (void)
{
T &s = q;
q = 0;
#pragma omp simd reduction (inscan, +:s)
for (int i = 0; i < 1024; i++)
{
b[i] = s;
#pragma omp scan exclusive(s)
s += 2 * a[i];
}
return s;
}
template <typename T>
__attribute__((noipa)) void
baz (T *a, T *b, T &r)
{
#pragma omp simd reduction (inscan, +:r) if (simd: 0)
for (T i = 0; i < 1024; i++)
{
b[i] = r;
#pragma omp scan exclusive(r)
r += a[i];
}
}
template <typename T>
__attribute__((noipa)) int
qux (void)
{
T s = q;
q = 0;
#pragma omp simd reduction (inscan, +:s) simdlen (1)
for (int i = 0; i < 1024; i++)
{
b[i] = s;
#pragma omp scan exclusive(s)
s += 2 * a[i];
}
return s;
}
int
main ()
{
int s = 0;
check_vect ();
for (int i = 0; i < 1024; ++i)
{
a[i] = i;
b[i] = -1;
asm ("" : "+g" (i));
}
foo<int *, int &> (a, b, r);
if (r != 1024 * 1023 / 2)
abort ();
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
else
b[i] = 25;
s += i;
}
if (bar<int> () != 1024 * 1023)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
else
b[i] = -1;
s += 2 * i;
}
r = 0;
baz<int> (a, b, r);
if (r != 1024 * 1023 / 2)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
else
b[i] = -25;
s += i;
}
if (qux<int &> () != 1024 * 1023)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
s += 2 * i;
}
return 0;
}
// { dg-require-effective-target size32plus }
// { dg-additional-options "-fopenmp-simd" }
// { dg-additional-options "-mavx" { target avx_runtime } }
// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } }
#include "../../gcc.dg/vect/tree-vect.h"
int r, a[1024], b[1024], q;
#pragma omp declare reduction (foo: int: omp_out += omp_in) initializer (omp_priv = 0)
__attribute__((noipa)) void
foo (int *a, int *b, int &r)
{
#pragma omp simd reduction (inscan, foo:r)
for (int i = 0; i < 1024; i++)
{
b[i] = r;
#pragma omp scan exclusive(r)
r += a[i];
}
}
__attribute__((noipa)) int
bar (void)
{
int &s = q;
q = 0;
#pragma omp simd reduction (inscan, foo:s)
for (int i = 0; i < 1024; i++)
{
b[i] = s;
#pragma omp scan exclusive(s)
s += 2 * a[i];
}
return s;
}
__attribute__((noipa)) void
baz (int *a, int *b, int &r)
{
#pragma omp simd reduction (inscan, foo:r) if (simd: 0)
for (int i = 0; i < 1024; i++)
{
b[i] = r;
#pragma omp scan exclusive(r)
r += a[i];
}
}
__attribute__((noipa)) int
qux (void)
{
int &s = q;
q = 0;
#pragma omp simd reduction (inscan, foo:s) simdlen (1)
for (int i = 0; i < 1024; i++)
{
b[i] = s;
#pragma omp scan exclusive(s)
s += 2 * a[i];
}
return s;
}
int
main ()
{
int s = 0;
check_vect ();
for (int i = 0; i < 1024; ++i)
{
a[i] = i;
b[i] = -1;
asm ("" : "+g" (i));
}
foo (a, b, r);
if (r != 1024 * 1023 / 2)
abort ();
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
else
b[i] = 25;
s += i;
}
if (bar () != 1024 * 1023)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
else
b[i] = -1;
s += 2 * i;
}
r = 0;
baz (a, b, r);
if (r != 1024 * 1023 / 2)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
else
b[i] = -25;
s += i;
}
if (qux () != 1024 * 1023)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
s += 2 * i;
}
return 0;
}
// { dg-require-effective-target size32plus }
// { dg-additional-options "-fopenmp-simd" }
// { dg-additional-options "-mavx" { target avx_runtime } }
// { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { xfail *-*-* } } }
#include "../../gcc.dg/vect/tree-vect.h"
struct S {
inline S ();
inline ~S ();
inline S (const S &);
inline S & operator= (const S &);
int s;
};
S::S () : s (0)
{
}
S::~S ()
{
}
S::S (const S &x)
{
s = x.s;
}
S &
S::operator= (const S &x)
{
s = x.s;
return *this;
}
static inline void
ini (S &x)
{
x.s = 0;
}
S r, a[1024], b[1024];
#pragma omp declare reduction (+: S: omp_out.s += omp_in.s)
#pragma omp declare reduction (plus: S: omp_out.s += omp_in.s) initializer (ini (omp_priv))
__attribute__((noipa)) void
foo (S *a, S *b, S &r)
{
#pragma omp simd reduction (inscan, +:r)
for (int i = 0; i < 1024; i++)
{
b[i] = r;
#pragma omp scan exclusive(r)
r.s += a[i].s;
}
}
__attribute__((noipa)) S
bar (void)
{
S s;
#pragma omp simd reduction (inscan, plus:s)
for (int i = 0; i < 1024; i++)
{
b[i] = s;
#pragma omp scan exclusive(s)
s.s += 2 * a[i].s;
}
return s;
}
__attribute__((noipa)) void
baz (S *a, S *b, S &r)
{
#pragma omp simd reduction (inscan, +:r) simdlen(1)
for (int i = 0; i < 1024; i++)
{
b[i] = r;
#pragma omp scan exclusive(r)
r.s += a[i].s;
}
}
__attribute__((noipa)) S
qux (void)
{
S s;
#pragma omp simd if (0) reduction (inscan, plus:s)
for (int i = 0; i < 1024; i++)
{
b[i] = s;
#pragma omp scan exclusive(s)
s.s += 2 * a[i].s;
}
return s;
}
int
main ()
{
S s;
check_vect ();
for (int i = 0; i < 1024; ++i)
{
a[i].s = i;
b[i].s = -1;
asm ("" : "+g" (i));
}
foo (a, b, r);
if (r.s != 1024 * 1023 / 2)
abort ();
for (int i = 0; i < 1024; ++i)
{
if (b[i].s != s.s)
abort ();
else
b[i].s = 25;
s.s += i;
}
if (bar ().s != 1024 * 1023)
abort ();
s.s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i].s != s.s)
abort ();
s.s += 2 * i;
}
r.s = 0;
baz (a, b, r);
if (r.s != 1024 * 1023 / 2)
abort ();
s.s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i].s != s.s)
abort ();
else
b[i].s = 25;
s.s += i;
}
if (qux ().s != 1024 * 1023)
abort ();
s.s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i].s != s.s)
abort ();
s.s += 2 * i;
}
return 0;
}
/* { dg-require-effective-target size32plus } */
/* { dg-additional-options "-fopenmp-simd" } */
/* { dg-additional-options "-mavx" { target avx_runtime } } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */
#ifndef main
#include "tree-vect.h"
#endif
int r, a[1024], b[1024];
__attribute__((noipa)) void
foo (int *a, int *b)
{
#pragma omp simd reduction (inscan, +:r)
for (int i = 0; i < 1024; i++)
{
b[i] = r;
#pragma omp scan exclusive(r)
r += a[i];
}
}
__attribute__((noipa)) int
bar (void)
{
int s = 0;
#pragma omp simd reduction (inscan, +:s)
for (int i = 0; i < 1024; i++)
{
b[i] = s;
#pragma omp scan exclusive(s)
s += 2 * a[i];
}
return s;
}
__attribute__((noipa)) void
baz (int *a, int *b)
{
#pragma omp simd reduction (inscan, +:r) if (simd: 0)
for (int i = 0; i < 1024; i++)
{
b[i] = r;
#pragma omp scan exclusive(r)
r += a[i];
}
}
__attribute__((noipa)) int
qux (void)
{
int s = 0;
#pragma omp simd reduction (inscan, +:s) simdlen (1)
for (int i = 0; i < 1024; i++)
{
b[i] = s;
#pragma omp scan exclusive(s)
s += 2 * a[i];
}
return s;
}
int
main ()
{
int s = 0;
#ifndef main
check_vect ();
#endif
for (int i = 0; i < 1024; ++i)
{
a[i] = i;
b[i] = -1;
asm ("" : "+g" (i));
}
foo (a, b);
if (r != 1024 * 1023 / 2)
abort ();
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
else
b[i] = 25;
s += i;
}
if (bar () != 1024 * 1023)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
else
b[i] = -1;
s += 2 * i;
}
r = 0;
baz (a, b);
if (r != 1024 * 1023 / 2)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
else
b[i] = -25;
s += i;
}
if (qux () != 1024 * 1023)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
s += 2 * i;
}
return 0;
}
/* { dg-require-effective-target size32plus } */
/* { dg-additional-options "-fopenmp-simd" } */
/* { dg-additional-options "-mavx" { target avx_runtime } } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */
#ifndef main
#include "tree-vect.h"
#endif
int r, a[1024], b[1024];
#pragma omp declare reduction (foo: int: omp_out += omp_in) initializer (omp_priv = 0)
__attribute__((noipa)) void
foo (int *a, int *b)
{
#pragma omp simd reduction (inscan, foo:r)
for (int i = 0; i < 1024; i++)
{
b[i] = r;
#pragma omp scan exclusive(r)
r += a[i];
}
}
__attribute__((noipa)) int
bar (void)
{
int s = 0;
#pragma omp simd reduction (inscan, foo:s)
for (int i = 0; i < 1024; i++)
{
b[i] = s;
#pragma omp scan exclusive(s)
s += 2 * a[i];
}
return s;
}
__attribute__((noipa)) void
baz (int *a, int *b)
{
#pragma omp simd reduction (inscan, foo:r) if (simd: 0)
for (int i = 0; i < 1024; i++)
{
b[i] = r;
#pragma omp scan exclusive(r)
r += a[i];
}
}
__attribute__((noipa)) int
qux (void)
{
int s = 0;
#pragma omp simd reduction (inscan, foo:s) simdlen (1)
for (int i = 0; i < 1024; i++)
{
b[i] = s;
#pragma omp scan exclusive(s)
s += 2 * a[i];
}
return s;
}
int
main ()
{
int s = 0;
#ifndef main
check_vect ();
#endif
for (int i = 0; i < 1024; ++i)
{
a[i] = i;
b[i] = -1;
asm ("" : "+g" (i));
}
foo (a, b);
if (r != 1024 * 1023 / 2)
abort ();
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
else
b[i] = 25;
s += i;
}
if (bar () != 1024 * 1023)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
else
b[i] = -1;
s += 2 * i;
}
r = 0;
baz (a, b);
if (r != 1024 * 1023 / 2)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
else
b[i] = -25;
s += i;
}
if (qux () != 1024 * 1023)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
s += 2 * i;
}
return 0;
}
/* { dg-require-effective-target size32plus } */
/* { dg-additional-options "-fopenmp-simd" } */
/* { dg-additional-options "-mavx" { target avx_runtime } } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */
#ifndef main
#include "tree-vect.h"
#endif
float r = 1.0f, a[1024], b[1024];
__attribute__((noipa)) void
foo (float *a, float *b)
{
#pragma omp simd reduction (inscan, *:r)
for (int i = 0; i < 1024; i++)
{
b[i] = r;
#pragma omp scan exclusive(r)
r *= a[i];
}
}
__attribute__((noipa)) float
bar (void)
{
float s = -__builtin_inff ();
#pragma omp simd reduction (inscan, max:s)
for (int i = 0; i < 1024; i++)
{
b[i] = s;
#pragma omp scan exclusive(s)
s = s > a[i] ? s : a[i];
}
return s;
}
int
main ()
{
float s = 1.0f;
#ifndef main
check_vect ();
#endif
for (int i = 0; i < 1024; ++i)
{
if (i < 80)
a[i] = (i & 1) ? 0.25f : 0.5f;
else if (i < 200)
a[i] = (i % 3) == 0 ? 2.0f : (i % 3) == 1 ? 4.0f : 1.0f;
else if (i < 280)
a[i] = (i & 1) ? 0.25f : 0.5f;
else if (i < 380)
a[i] = (i % 3) == 0 ? 2.0f : (i % 3) == 1 ? 4.0f : 1.0f;
else
switch (i % 6)
{
case 0: a[i] = 0.25f; break;
case 1: a[i] = 2.0f; break;
case 2: a[i] = -1.0f; break;
case 3: a[i] = -4.0f; break;
case 4: a[i] = 0.5f; break;
case 5: a[i] = 1.0f; break;
default: a[i] = 0.0f; break;
}
b[i] = -19.0f;
asm ("" : "+g" (i));
}
foo (a, b);
if (r * 16384.0f != 0.125f)
abort ();
float m = -175.25f;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
else
b[i] = -231.75f;
s *= a[i];
a[i] = m - ((i % 3) == 1 ? 2.0f : (i % 3) == 2 ? 4.0f : 0.0f);
m += 0.75f;
}
if (bar () != 592.0f)
abort ();
s = -__builtin_inff ();
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s)
abort ();
if (s < a[i])
s = a[i];
}
return 0;
}
/* { dg-require-effective-target size32plus } */
/* { dg-additional-options "-fopenmp-simd" } */
/* { dg-additional-options "-mavx" { target avx_runtime } } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */
#ifndef main
#include "tree-vect.h"
#endif
int r, a[1024], b[1024];
unsigned short r2, b2[1024];
unsigned char r3, b3[1024];
__attribute__((noipa)) void
foo (int *a, int *b, unsigned short *b2, unsigned char *b3)
{
#pragma omp simd reduction (inscan, +:r, r2, r3)
for (int i = 0; i < 1024; i++)
{
{
b[i] = r;
b2[i] = r2;
b3[i] = r3;
}
#pragma omp scan exclusive(r, r2, r3)
{ r += a[i]; r2 += a[i]; r3 += a[i]; }
}
}
__attribute__((noipa)) int
bar (unsigned short *s2p, unsigned char *s3p)
{
int s = 0;
unsigned short s2 = 0;
unsigned char s3 = 0;
#pragma omp simd reduction (inscan, +:s, s2, s3)
for (int i = 0; i < 1024; i++)
{
{ b[i] = s; b2[i] = s2; b3[i] = s3; }
#pragma omp scan exclusive(s, s2, s3)
{
s += 2 * a[i];
s2 += 2 * a[i];
s3 += 2 * a[i];
}
}
*s2p = s2;
*s3p = s3;
return s;
}
__attribute__((noipa)) void
baz (int *a, int *b, unsigned short *b2, unsigned char *b3)
{
#pragma omp simd reduction (inscan, +:r, r2, r3) if (simd: 0)
for (int i = 0; i < 1024; i++)
{
{
b[i] = r;
b2[i] = r2;
b3[i] = r3;
}
#pragma omp scan exclusive(r, r2, r3)
{
r += a[i];
r2 += a[i];
r3 += a[i];
}
}
}
__attribute__((noipa)) int
qux (unsigned short *s2p, unsigned char *s3p)
{
int s = 0;
unsigned short s2 = 0;
unsigned char s3 = 0;
#pragma omp simd reduction (inscan, +:s, s2, s3) simdlen (1)
for (int i = 0; i < 1024; i++)
{
{ b[i] = s; b2[i] = s2; b3[i] = s3; }
#pragma omp scan exclusive(s, s2, s3)
{ s += 2 * a[i]; s2 += 2 * a[i]; s3 += 2 * a[i]; }
}
*s2p = s2;
*s3p = s3;
return s;
}
int
main ()
{
int s = 0;
unsigned short s2;
unsigned char s3;
#ifndef main
check_vect ();
#endif
for (int i = 0; i < 1024; ++i)
{
a[i] = i;
b[i] = -1;
b2[i] = -1;
b3[i] = -1;
asm ("" : "+g" (i));
}
foo (a, b, b2, b3);
if (r != 1024 * 1023 / 2
|| r2 != (unsigned short) r
|| r3 != (unsigned char) r)
abort ();
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s
|| b2[i] != (unsigned short) s
|| b3[i] != (unsigned char) s)
abort ();
else
{
b[i] = 25;
b2[i] = 24;
b3[i] = 26;
}
s += i;
}
if (bar (&s2, &s3) != 1024 * 1023)
abort ();
if (s2 != (unsigned short) (1024 * 1023)
|| s3 != (unsigned char) (1024 * 1023))
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s
|| b2[i] != (unsigned short) s
|| b3[i] != (unsigned char) s)
abort ();
else
{
b[i] = -1;
b2[i] = -1;
b3[i] = -1;
}
s += 2 * i;
}
r = 0;
r2 = 0;
r3 = 0;
baz (a, b, b2, b3);
if (r != 1024 * 1023 / 2
|| r2 != (unsigned short) r
|| r3 != (unsigned char) r)
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s
|| b2[i] != (unsigned short) s
|| b3[i] != (unsigned char) s)
abort ();
else
{
b[i] = 25;
b2[i] = 24;
b3[i] = 26;
}
s += i;
}
s2 = 0;
s3 = 0;
if (qux (&s2, &s3) != 1024 * 1023)
abort ();
if (s2 != (unsigned short) (1024 * 1023)
|| s3 != (unsigned char) (1024 * 1023))
abort ();
s = 0;
for (int i = 0; i < 1024; ++i)
{
if (b[i] != s
|| b2[i] != (unsigned short) s
|| b3[i] != (unsigned char) s)
abort ();
s += 2 * i;
}
return 0;
}
/* { dg-do run } */
/* { dg-options "-O2 -fopenmp-simd -mavx2 -fdump-tree-vect-details" } */
/* { dg-require-effective-target avx2 } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
#include "avx2-check.h"
#define main() do_main ()
#include "../../gcc.dg/vect/vect-simd-12.c"
static void
avx2_test (void)
{
do_main ();
}
/* { dg-do run } */
/* { dg-options "-O2 -fopenmp-simd -mavx2 -fdump-tree-vect-details" } */
/* { dg-require-effective-target avx2 } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
#include "avx2-check.h"
#define main() do_main ()
#include "../../gcc.dg/vect/vect-simd-13.c"
static void
avx2_test (void)
{
do_main ();
}
/* { dg-do run } */
/* { dg-options "-O2 -fopenmp-simd -mavx2 -fdump-tree-vect-details" } */
/* { dg-require-effective-target avx2 } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
#include "avx2-check.h"
#define main() do_main ()
#include "../../gcc.dg/vect/vect-simd-14.c"
static void
avx2_test (void)
{
do_main ();
}
/* { dg-do run } */
/* { dg-options "-O2 -fopenmp-simd -mavx2 -fdump-tree-vect-details" } */
/* { dg-require-effective-target avx2 } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
#include "avx2-check.h"
#define main() do_main ()
#include "../../gcc.dg/vect/vect-simd-15.c"
static void
avx2_test (void)
{
do_main ();
}
/* { dg-do run } */
/* { dg-options "-O2 -fopenmp-simd -mavx512bw -mprefer-vector-width=512 -fdump-tree-vect-details" } */
/* { dg-require-effective-target avx512bw } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
#include "avx512bw-check.h"
#define main() do_main ()
#include "../../gcc.dg/vect/vect-simd-15.c"
static void
avx512bw_test (void)
{
do_main ();
}
/* { dg-do run } */
/* { dg-options "-O2 -fopenmp-simd -mavx512f -mprefer-vector-width=512 -fdump-tree-vect-details" } */
/* { dg-require-effective-target avx512f } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
#include "avx512f-check.h"
#define main() do_main ()
#include "../../gcc.dg/vect/vect-simd-12.c"
static void
avx512f_test (void)
{
do_main ();
}
/* { dg-do run } */
/* { dg-options "-O2 -fopenmp-simd -mavx512f -mprefer-vector-width=512 -fdump-tree-vect-details" } */
/* { dg-require-effective-target avx512f } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
#include "avx512f-check.h"
#define main() do_main ()
#include "../../gcc.dg/vect/vect-simd-13.c"
static void
avx512f_test (void)
{
do_main ();
}
/* { dg-do run } */
/* { dg-options "-O2 -fopenmp-simd -mavx512f -mprefer-vector-width=512 -fdump-tree-vect-details" } */
/* { dg-require-effective-target avx512f } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
#include "avx512f-check.h"
#define main() do_main ()
#include "../../gcc.dg/vect/vect-simd-14.c"
static void
avx512f_test (void)
{
do_main ();
}
/* { dg-do run } */
/* { dg-options "-O2 -fopenmp-simd -msse2 -mno-sse3 -fdump-tree-vect-details" } */
/* { dg-require-effective-target sse2 } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
#include "sse2-check.h"
#define main() do_main ()
#include "../../gcc.dg/vect/vect-simd-12.c"
static void
sse2_test (void)
{
do_main ();
}
/* { dg-do run } */
/* { dg-options "-O2 -fopenmp-simd -msse2 -mno-sse3 -fdump-tree-vect-details" } */
/* { dg-require-effective-target sse2 } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
#include "sse2-check.h"
#define main() do_main ()
#include "../../gcc.dg/vect/vect-simd-13.c"
static void
sse2_test (void)
{
do_main ();
}
/* { dg-do run } */
/* { dg-options "-O2 -fopenmp-simd -msse2 -mno-sse3 -fdump-tree-vect-details" } */
/* { dg-require-effective-target sse2 } */
#include "sse2-check.h"
#define main() do_main ()
#include "../../gcc.dg/vect/vect-simd-14.c"
static void
sse2_test (void)
{
do_main ();
}
/* { dg-do run } */
/* { dg-options "-O2 -fopenmp-simd -msse2 -mno-sse3 -fdump-tree-vect-details" } */
/* { dg-require-effective-target sse2 } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
#include "sse2-check.h"
#define main() do_main ()
#include "../../gcc.dg/vect/vect-simd-15.c"
static void
sse2_test (void)
{
do_main ();
}
...@@ -4223,7 +4223,8 @@ vect_analyze_data_refs (vec_info *vinfo, poly_uint64 *min_vf) ...@@ -4223,7 +4223,8 @@ vect_analyze_data_refs (vec_info *vinfo, poly_uint64 *min_vf)
/* See if this was detected as SIMD lane access. */ /* See if this was detected as SIMD lane access. */
if (dr->aux == (void *)-1 if (dr->aux == (void *)-1
|| dr->aux == (void *)-2 || dr->aux == (void *)-2
|| dr->aux == (void *)-3) || dr->aux == (void *)-3
|| dr->aux == (void *)-4)
{ {
if (nested_in_vect_loop_p (loop, stmt_info)) if (nested_in_vect_loop_p (loop, stmt_info))
return opt_result::failure_at (stmt_info->stmt, return opt_result::failure_at (stmt_info->stmt,
......
...@@ -917,7 +917,7 @@ struct _stmt_vec_info { ...@@ -917,7 +917,7 @@ struct _stmt_vec_info {
bool strided_p; bool strided_p;
/* For both loads and stores. */ /* For both loads and stores. */
unsigned simd_lane_access_p : 2; unsigned simd_lane_access_p : 3;
/* Classifies how the load or store is going to be implemented /* Classifies how the load or store is going to be implemented
for loop vectorization. */ for loop vectorization. */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment