Commit 2e83f583 by Jakub Jelinek Committed by Jakub Jelinek

md.texi: Document vec_shl_<mode> pattern.

	* doc/md.texi: Document vec_shl_<mode> pattern.
	* optabs.def (vec_shl_optab): New optab.
	* optabs.c (shift_amt_for_vec_perm_mask): Add shift_optab
	argument, if == vec_shl_optab, check for left whole vector shift
	pattern rather than right shift.
	(expand_vec_perm_const): Add vec_shl_optab support.
	* optabs-query.c (can_vec_perm_var_p): Mention also vec_shl optab
	in the comment.
	* tree-vect-generic.c (lower_vec_perm): Support permutations which
	can be handled by vec_shl_optab.
	* tree-vect-stmts.c (scan_store_can_perm_p): New function.
	(check_scan_store): Use it.
	(vectorizable_scan_store): If target can't do normal permutations,
	try to use whole vector left shifts and if needed a VEC_COND_EXPR
	after it.
	* config/i386/sse.md (vec_shl_<mode>): New expander.

	* gcc.dg/vect/vect-simd-8.c: If main is defined, don't include
	tree-vect.h nor call check_vect.
	* gcc.dg/vect/vect-simd-9.c: Likewise.
	* gcc.dg/vect/vect-simd-10.c: New test.
	* gcc.target/i386/sse2-vect-simd-8.c: New test.
	* gcc.target/i386/sse2-vect-simd-9.c: New test.
	* gcc.target/i386/sse2-vect-simd-10.c: New test.
	* gcc.target/i386/avx2-vect-simd-8.c: New test.
	* gcc.target/i386/avx2-vect-simd-9.c: New test.
	* gcc.target/i386/avx2-vect-simd-10.c: New test.
	* gcc.target/i386/avx512f-vect-simd-8.c: New test.
	* gcc.target/i386/avx512f-vect-simd-9.c: New test.
	* gcc.target/i386/avx512f-vect-simd-10.c: New test.

From-SVN: r272472
parent 6a2892a6
2019-06-19 Jakub Jelinek <jakub@redhat.com> 2019-06-19 Jakub Jelinek <jakub@redhat.com>
* doc/md.texi: Document vec_shl_<mode> pattern.
* optabs.def (vec_shl_optab): New optab.
* optabs.c (shift_amt_for_vec_perm_mask): Add shift_optab
argument, if == vec_shl_optab, check for left whole vector shift
pattern rather than right shift.
(expand_vec_perm_const): Add vec_shl_optab support.
* optabs-query.c (can_vec_perm_var_p): Mention also vec_shl optab
in the comment.
* tree-vect-generic.c (lower_vec_perm): Support permutations which
can be handled by vec_shl_optab.
* tree-vect-stmts.c (scan_store_can_perm_p): New function.
(check_scan_store): Use it.
(vectorizable_scan_store): If target can't do normal permutations,
try to use whole vector left shifts and if needed a VEC_COND_EXPR
after it.
* config/i386/sse.md (vec_shl_<mode>): New expander.
* omp-low.c (lower_rec_input_clauses): Handle references properly * omp-low.c (lower_rec_input_clauses): Handle references properly
in inscan clauses. in inscan clauses.
(lower_omp_scan): Likewise. (lower_omp_scan): Likewise.
......
...@@ -11758,6 +11758,19 @@ ...@@ -11758,6 +11758,19 @@
(set_attr "mode" "<sseinsnmode>")]) (set_attr "mode" "<sseinsnmode>")])
(define_expand "vec_shl_<mode>"
[(set (match_dup 3)
(ashift:V1TI
(match_operand:VI_128 1 "register_operand")
(match_operand:SI 2 "const_0_to_255_mul_8_operand")))
(set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
"TARGET_SSE2"
{
operands[1] = gen_lowpart (V1TImode, operands[1]);
operands[3] = gen_reg_rtx (V1TImode);
operands[4] = gen_lowpart (<MODE>mode, operands[3]);
})
(define_expand "vec_shr_<mode>" (define_expand "vec_shr_<mode>"
[(set (match_dup 3) [(set (match_dup 3)
(lshiftrt:V1TI (lshiftrt:V1TI
......
...@@ -5459,6 +5459,14 @@ in operand 2. Store the result in vector output operand 0. Operands ...@@ -5459,6 +5459,14 @@ in operand 2. Store the result in vector output operand 0. Operands
0 and 1 have mode @var{m} and operand 2 has the mode appropriate for 0 and 1 have mode @var{m} and operand 2 has the mode appropriate for
one element of @var{m}. one element of @var{m}.
@cindex @code{vec_shl_@var{m}} instruction pattern
@item @samp{vec_shl_@var{m}}
Whole vector left shift in bits, i.e.@: away from element 0.
Operand 1 is a vector to be shifted.
Operand 2 is an integer shift amount in bits.
Operand 0 is where the resulting shifted vector is stored.
The output and input vectors should have the same modes.
@cindex @code{vec_shr_@var{m}} instruction pattern @cindex @code{vec_shr_@var{m}} instruction pattern
@item @samp{vec_shr_@var{m}} @item @samp{vec_shr_@var{m}}
Whole vector right shift in bits, i.e.@: towards element 0. Whole vector right shift in bits, i.e.@: towards element 0.
......
...@@ -415,8 +415,9 @@ can_vec_perm_var_p (machine_mode mode) ...@@ -415,8 +415,9 @@ can_vec_perm_var_p (machine_mode mode)
permute (if the target supports that). permute (if the target supports that).
Note that additional permutations representing whole-vector shifts may Note that additional permutations representing whole-vector shifts may
also be handled via the vec_shr optab, but only where the second input also be handled via the vec_shr or vec_shl optab, but only where the
vector is entirely constant zeroes; this case is not dealt with here. */ second input vector is entirely constant zeroes; this case is not dealt
with here. */
bool bool
can_vec_perm_const_p (machine_mode mode, const vec_perm_indices &sel, can_vec_perm_const_p (machine_mode mode, const vec_perm_indices &sel,
......
...@@ -5444,19 +5444,45 @@ vector_compare_rtx (machine_mode cmp_mode, enum tree_code tcode, ...@@ -5444,19 +5444,45 @@ vector_compare_rtx (machine_mode cmp_mode, enum tree_code tcode,
} }
/* Check if vec_perm mask SEL is a constant equivalent to a shift of /* Check if vec_perm mask SEL is a constant equivalent to a shift of
the first vec_perm operand, assuming the second operand is a constant the first vec_perm operand, assuming the second operand (for left shift
vector of zeros. Return the shift distance in bits if so, or NULL_RTX first operand) is a constant vector of zeros. Return the shift distance
if the vec_perm is not a shift. MODE is the mode of the value being in bits if so, or NULL_RTX if the vec_perm is not a shift. MODE is the
shifted. */ mode of the value being shifted. SHIFT_OPTAB is vec_shr_optab for right
shift or vec_shl_optab for left shift. */
static rtx static rtx
shift_amt_for_vec_perm_mask (machine_mode mode, const vec_perm_indices &sel) shift_amt_for_vec_perm_mask (machine_mode mode, const vec_perm_indices &sel,
optab shift_optab)
{ {
unsigned int bitsize = GET_MODE_UNIT_BITSIZE (mode); unsigned int bitsize = GET_MODE_UNIT_BITSIZE (mode);
poly_int64 first = sel[0]; poly_int64 first = sel[0];
if (maybe_ge (sel[0], GET_MODE_NUNITS (mode))) if (maybe_ge (sel[0], GET_MODE_NUNITS (mode)))
return NULL_RTX; return NULL_RTX;
if (!sel.series_p (0, 1, first, 1)) if (shift_optab == vec_shl_optab)
{
unsigned int nelt;
if (!GET_MODE_NUNITS (mode).is_constant (&nelt))
return NULL_RTX;
unsigned firstidx = 0;
for (unsigned int i = 0; i < nelt; i++)
{
if (known_eq (sel[i], nelt))
{
if (i == 0 || firstidx)
return NULL_RTX;
firstidx = i;
}
else if (firstidx
? maybe_ne (sel[i], nelt + i - firstidx)
: maybe_ge (sel[i], nelt))
return NULL_RTX;
}
if (firstidx == 0)
return NULL_RTX;
first = firstidx;
}
else if (!sel.series_p (0, 1, first, 1))
{ {
unsigned int nelt; unsigned int nelt;
if (!GET_MODE_NUNITS (mode).is_constant (&nelt)) if (!GET_MODE_NUNITS (mode).is_constant (&nelt))
...@@ -5544,25 +5570,37 @@ expand_vec_perm_const (machine_mode mode, rtx v0, rtx v1, ...@@ -5544,25 +5570,37 @@ expand_vec_perm_const (machine_mode mode, rtx v0, rtx v1,
target instruction. */ target instruction. */
vec_perm_indices indices (sel, 2, GET_MODE_NUNITS (mode)); vec_perm_indices indices (sel, 2, GET_MODE_NUNITS (mode));
/* See if this can be handled with a vec_shr. We only do this if the /* See if this can be handled with a vec_shr or vec_shl. We only do this
second vector is all zeroes. */ if the second (for vec_shr) or first (for vec_shl) vector is all
insn_code shift_code = optab_handler (vec_shr_optab, mode); zeroes. */
insn_code shift_code_qi = ((qimode != VOIDmode && qimode != mode) insn_code shift_code = CODE_FOR_nothing;
? optab_handler (vec_shr_optab, qimode) insn_code shift_code_qi = CODE_FOR_nothing;
optab shift_optab = unknown_optab;
rtx v2 = v0;
if (v1 == CONST0_RTX (GET_MODE (v1)))
shift_optab = vec_shr_optab;
else if (v0 == CONST0_RTX (GET_MODE (v0)))
{
shift_optab = vec_shl_optab;
v2 = v1;
}
if (shift_optab != unknown_optab)
{
shift_code = optab_handler (shift_optab, mode);
shift_code_qi = ((qimode != VOIDmode && qimode != mode)
? optab_handler (shift_optab, qimode)
: CODE_FOR_nothing); : CODE_FOR_nothing);
}
if (v1 == CONST0_RTX (GET_MODE (v1)) if (shift_code != CODE_FOR_nothing || shift_code_qi != CODE_FOR_nothing)
&& (shift_code != CODE_FOR_nothing
|| shift_code_qi != CODE_FOR_nothing))
{ {
rtx shift_amt = shift_amt_for_vec_perm_mask (mode, indices); rtx shift_amt = shift_amt_for_vec_perm_mask (mode, indices, shift_optab);
if (shift_amt) if (shift_amt)
{ {
struct expand_operand ops[3]; struct expand_operand ops[3];
if (shift_code != CODE_FOR_nothing) if (shift_code != CODE_FOR_nothing)
{ {
create_output_operand (&ops[0], target, mode); create_output_operand (&ops[0], target, mode);
create_input_operand (&ops[1], v0, mode); create_input_operand (&ops[1], v2, mode);
create_convert_operand_from_type (&ops[2], shift_amt, sizetype); create_convert_operand_from_type (&ops[2], shift_amt, sizetype);
if (maybe_expand_insn (shift_code, 3, ops)) if (maybe_expand_insn (shift_code, 3, ops))
return ops[0].value; return ops[0].value;
...@@ -5571,7 +5609,7 @@ expand_vec_perm_const (machine_mode mode, rtx v0, rtx v1, ...@@ -5571,7 +5609,7 @@ expand_vec_perm_const (machine_mode mode, rtx v0, rtx v1,
{ {
rtx tmp = gen_reg_rtx (qimode); rtx tmp = gen_reg_rtx (qimode);
create_output_operand (&ops[0], tmp, qimode); create_output_operand (&ops[0], tmp, qimode);
create_input_operand (&ops[1], gen_lowpart (qimode, v0), qimode); create_input_operand (&ops[1], gen_lowpart (qimode, v2), qimode);
create_convert_operand_from_type (&ops[2], shift_amt, sizetype); create_convert_operand_from_type (&ops[2], shift_amt, sizetype);
if (maybe_expand_insn (shift_code_qi, 3, ops)) if (maybe_expand_insn (shift_code_qi, 3, ops))
return gen_lowpart (mode, ops[0].value); return gen_lowpart (mode, ops[0].value);
......
...@@ -349,6 +349,7 @@ OPTAB_D (vec_packu_float_optab, "vec_packu_float_$a") ...@@ -349,6 +349,7 @@ OPTAB_D (vec_packu_float_optab, "vec_packu_float_$a")
OPTAB_D (vec_perm_optab, "vec_perm$a") OPTAB_D (vec_perm_optab, "vec_perm$a")
OPTAB_D (vec_realign_load_optab, "vec_realign_load_$a") OPTAB_D (vec_realign_load_optab, "vec_realign_load_$a")
OPTAB_D (vec_set_optab, "vec_set$a") OPTAB_D (vec_set_optab, "vec_set$a")
OPTAB_D (vec_shl_optab, "vec_shl_$a")
OPTAB_D (vec_shr_optab, "vec_shr_$a") OPTAB_D (vec_shr_optab, "vec_shr_$a")
OPTAB_D (vec_unpack_sfix_trunc_hi_optab, "vec_unpack_sfix_trunc_hi_$a") OPTAB_D (vec_unpack_sfix_trunc_hi_optab, "vec_unpack_sfix_trunc_hi_$a")
OPTAB_D (vec_unpack_sfix_trunc_lo_optab, "vec_unpack_sfix_trunc_lo_$a") OPTAB_D (vec_unpack_sfix_trunc_lo_optab, "vec_unpack_sfix_trunc_lo_$a")
......
2019-06-19 Jakub Jelinek <jakub@redhat.com> 2019-06-19 Jakub Jelinek <jakub@redhat.com>
* gcc.dg/vect/vect-simd-8.c: If main is defined, don't include
tree-vect.h nor call check_vect.
* gcc.dg/vect/vect-simd-9.c: Likewise.
* gcc.dg/vect/vect-simd-10.c: New test.
* gcc.target/i386/sse2-vect-simd-8.c: New test.
* gcc.target/i386/sse2-vect-simd-9.c: New test.
* gcc.target/i386/sse2-vect-simd-10.c: New test.
* gcc.target/i386/avx2-vect-simd-8.c: New test.
* gcc.target/i386/avx2-vect-simd-9.c: New test.
* gcc.target/i386/avx2-vect-simd-10.c: New test.
* gcc.target/i386/avx512f-vect-simd-8.c: New test.
* gcc.target/i386/avx512f-vect-simd-9.c: New test.
* gcc.target/i386/avx512f-vect-simd-10.c: New test.
* g++.dg/vect/simd-3.cc: New test. * g++.dg/vect/simd-3.cc: New test.
* g++.dg/vect/simd-4.cc: New test. * g++.dg/vect/simd-4.cc: New test.
* g++.dg/vect/simd-5.cc: New test. * g++.dg/vect/simd-5.cc: New test.
......
/* { dg-require-effective-target size32plus } */
/* { dg-additional-options "-fopenmp-simd" } */
/* { dg-additional-options "-mavx" { target avx_runtime } } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */
#ifndef main
#include "tree-vect.h"
#endif
float r = 1.0f, a[1024], b[1024];
__attribute__((noipa)) void
foo (float *a, float *b)
{
#pragma omp simd reduction (inscan, *:r)
for (int i = 0; i < 1024; i++)
{
r *= a[i];
#pragma omp scan inclusive(r)
b[i] = r;
}
}
__attribute__((noipa)) float
bar (void)
{
float s = -__builtin_inff ();
#pragma omp simd reduction (inscan, max:s)
for (int i = 0; i < 1024; i++)
{
s = s > a[i] ? s : a[i];
#pragma omp scan inclusive(s)
b[i] = s;
}
return s;
}
int
main ()
{
float s = 1.0f;
#ifndef main
check_vect ();
#endif
for (int i = 0; i < 1024; ++i)
{
if (i < 80)
a[i] = (i & 1) ? 0.25f : 0.5f;
else if (i < 200)
a[i] = (i % 3) == 0 ? 2.0f : (i % 3) == 1 ? 4.0f : 1.0f;
else if (i < 280)
a[i] = (i & 1) ? 0.25f : 0.5f;
else if (i < 380)
a[i] = (i % 3) == 0 ? 2.0f : (i % 3) == 1 ? 4.0f : 1.0f;
else
switch (i % 6)
{
case 0: a[i] = 0.25f; break;
case 1: a[i] = 2.0f; break;
case 2: a[i] = -1.0f; break;
case 3: a[i] = -4.0f; break;
case 4: a[i] = 0.5f; break;
case 5: a[i] = 1.0f; break;
default: a[i] = 0.0f; break;
}
b[i] = -19.0f;
asm ("" : "+g" (i));
}
foo (a, b);
if (r * 16384.0f != 0.125f)
abort ();
float m = -175.25f;
for (int i = 0; i < 1024; ++i)
{
s *= a[i];
if (b[i] != s)
abort ();
else
{
a[i] = m - ((i % 3) == 1 ? 2.0f : (i % 3) == 2 ? 4.0f : 0.0f);
b[i] = -231.75f;
m += 0.75f;
}
}
if (bar () != 592.0f)
abort ();
s = -__builtin_inff ();
for (int i = 0; i < 1024; ++i)
{
if (s < a[i])
s = a[i];
if (b[i] != s)
abort ();
}
return 0;
}
...@@ -3,7 +3,9 @@ ...@@ -3,7 +3,9 @@
/* { dg-additional-options "-mavx" { target avx_runtime } } */ /* { dg-additional-options "-mavx" { target avx_runtime } } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */ /* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */
#ifndef main
#include "tree-vect.h" #include "tree-vect.h"
#endif
int r, a[1024], b[1024]; int r, a[1024], b[1024];
...@@ -63,7 +65,9 @@ int ...@@ -63,7 +65,9 @@ int
main () main ()
{ {
int s = 0; int s = 0;
#ifndef main
check_vect (); check_vect ();
#endif
for (int i = 0; i < 1024; ++i) for (int i = 0; i < 1024; ++i)
{ {
a[i] = i; a[i] = i;
......
...@@ -3,7 +3,9 @@ ...@@ -3,7 +3,9 @@
/* { dg-additional-options "-mavx" { target avx_runtime } } */ /* { dg-additional-options "-mavx" { target avx_runtime } } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */ /* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } } */
#ifndef main
#include "tree-vect.h" #include "tree-vect.h"
#endif
int r, a[1024], b[1024]; int r, a[1024], b[1024];
...@@ -65,7 +67,9 @@ int ...@@ -65,7 +67,9 @@ int
main () main ()
{ {
int s = 0; int s = 0;
#ifndef main
check_vect (); check_vect ();
#endif
for (int i = 0; i < 1024; ++i) for (int i = 0; i < 1024; ++i)
{ {
a[i] = i; a[i] = i;
......
/* { dg-do run } */
/* { dg-options "-O2 -fopenmp-simd -mavx2 -fdump-tree-vect-details" } */
/* { dg-require-effective-target avx2 } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
#include "avx2-check.h"
#define main() do_main ()
#include "../../gcc.dg/vect/vect-simd-10.c"
static void
avx2_test (void)
{
do_main ();
}
/* { dg-do run } */
/* { dg-options "-O2 -fopenmp-simd -mavx2 -fdump-tree-vect-details" } */
/* { dg-require-effective-target avx2 } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
#include "avx2-check.h"
#define main() do_main ()
#include "../../gcc.dg/vect/vect-simd-8.c"
static void
avx2_test (void)
{
do_main ();
}
/* { dg-do run } */
/* { dg-options "-O2 -fopenmp-simd -mavx2 -fdump-tree-vect-details" } */
/* { dg-require-effective-target avx2 } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
#include "avx2-check.h"
#define main() do_main ()
#include "../../gcc.dg/vect/vect-simd-9.c"
static void
avx2_test (void)
{
do_main ();
}
/* { dg-do run } */
/* { dg-options "-O2 -fopenmp-simd -mavx512f -mprefer-vector-width=512 -fdump-tree-vect-details" } */
/* { dg-require-effective-target avx512f } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
#include "avx512f-check.h"
#define main() do_main ()
#include "../../gcc.dg/vect/vect-simd-10.c"
static void
avx512f_test (void)
{
do_main ();
}
/* { dg-do run } */
/* { dg-options "-O2 -fopenmp-simd -mavx512f -mprefer-vector-width=512 -fdump-tree-vect-details" } */
/* { dg-require-effective-target avx512f } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
#include "avx512f-check.h"
#define main() do_main ()
#include "../../gcc.dg/vect/vect-simd-8.c"
static void
avx512f_test (void)
{
do_main ();
}
/* { dg-do run } */
/* { dg-options "-O2 -fopenmp-simd -mavx512f -mprefer-vector-width=512 -fdump-tree-vect-details" } */
/* { dg-require-effective-target avx512f } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
#include "avx512f-check.h"
#define main() do_main ()
#include "../../gcc.dg/vect/vect-simd-9.c"
static void
avx512f_test (void)
{
do_main ();
}
/* { dg-do run } */
/* { dg-options "-O2 -fopenmp-simd -msse2 -mno-sse3 -fdump-tree-vect-details" } */
/* { dg-require-effective-target sse2 } */
#include "sse2-check.h"
#define main() do_main ()
#include "../../gcc.dg/vect/vect-simd-10.c"
static void
sse2_test (void)
{
do_main ();
}
/* { dg-do run } */
/* { dg-options "-O2 -fopenmp-simd -msse2 -mno-sse3 -fdump-tree-vect-details" } */
/* { dg-require-effective-target sse2 } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
#include "sse2-check.h"
#define main() do_main ()
#include "../../gcc.dg/vect/vect-simd-8.c"
static void
sse2_test (void)
{
do_main ();
}
/* { dg-do run } */
/* { dg-options "-O2 -fopenmp-simd -msse2 -mno-sse3 -fdump-tree-vect-details" } */
/* { dg-require-effective-target sse2 } */
/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" } } */
#include "sse2-check.h"
#define main() do_main ()
#include "../../gcc.dg/vect/vect-simd-9.c"
static void
sse2_test (void)
{
do_main ();
}
...@@ -1367,6 +1367,32 @@ lower_vec_perm (gimple_stmt_iterator *gsi) ...@@ -1367,6 +1367,32 @@ lower_vec_perm (gimple_stmt_iterator *gsi)
return; return;
} }
} }
/* And similarly vec_shl pattern. */
if (optab_handler (vec_shl_optab, TYPE_MODE (vect_type))
!= CODE_FOR_nothing
&& TREE_CODE (vec0) == VECTOR_CST
&& initializer_zerop (vec0))
{
unsigned int first = 0;
for (i = 0; i < elements; ++i)
if (known_eq (poly_uint64 (indices[i]), elements))
{
if (i == 0 || first)
break;
first = i;
}
else if (first
? maybe_ne (poly_uint64 (indices[i]),
elements + i - first)
: maybe_ge (poly_uint64 (indices[i]), elements))
break;
if (i == elements)
{
gimple_assign_set_rhs3 (stmt, mask);
update_stmt (stmt);
return;
}
}
} }
else if (can_vec_perm_var_p (TYPE_MODE (vect_type))) else if (can_vec_perm_var_p (TYPE_MODE (vect_type)))
return; return;
......
...@@ -6356,6 +6356,71 @@ scan_operand_equal_p (tree ref1, tree ref2) ...@@ -6356,6 +6356,71 @@ scan_operand_equal_p (tree ref1, tree ref2)
/* Function check_scan_store. /* Function check_scan_store.
Verify if we can perform the needed permutations or whole vector shifts.
Return -1 on failure, otherwise exact log2 of vectype's nunits. */
static int
scan_store_can_perm_p (tree vectype, tree init, int *use_whole_vector_p = NULL)
{
enum machine_mode vec_mode = TYPE_MODE (vectype);
unsigned HOST_WIDE_INT nunits;
if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
return -1;
int units_log2 = exact_log2 (nunits);
if (units_log2 <= 0)
return -1;
int i;
for (i = 0; i <= units_log2; ++i)
{
unsigned HOST_WIDE_INT j, k;
vec_perm_builder sel (nunits, nunits, 1);
sel.quick_grow (nunits);
if (i == 0)
{
for (j = 0; j < nunits; ++j)
sel[j] = nunits - 1;
}
else
{
for (j = 0; j < (HOST_WIDE_INT_1U << (i - 1)); ++j)
sel[j] = j;
for (k = 0; j < nunits; ++j, ++k)
sel[j] = nunits + k;
}
vec_perm_indices indices (sel, i == 0 ? 1 : 2, nunits);
if (!can_vec_perm_const_p (vec_mode, indices))
break;
}
if (i == 0)
return -1;
if (i <= units_log2)
{
if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
return -1;
int kind = 1;
/* Whole vector shifts shift in zeros, so if init is all zero constant,
there is no need to do anything further. */
if ((TREE_CODE (init) != INTEGER_CST
&& TREE_CODE (init) != REAL_CST)
|| !initializer_zerop (init))
{
tree masktype = build_same_sized_truth_vector_type (vectype);
if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
return -1;
kind = 2;
}
if (use_whole_vector_p)
*use_whole_vector_p = kind;
}
return units_log2;
}
/* Function check_scan_store.
Check magic stores for #pragma omp scan {in,ex}clusive reductions. */ Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
static bool static bool
...@@ -6596,34 +6661,9 @@ check_scan_store (stmt_vec_info stmt_info, tree vectype, ...@@ -6596,34 +6661,9 @@ check_scan_store (stmt_vec_info stmt_info, tree vectype,
if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing) if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
goto fail; goto fail;
unsigned HOST_WIDE_INT nunits; int units_log2 = scan_store_can_perm_p (vectype, *init);
if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)) if (units_log2 == -1)
goto fail;
int units_log2 = exact_log2 (nunits);
if (units_log2 <= 0)
goto fail;
for (int i = 0; i <= units_log2; ++i)
{
unsigned HOST_WIDE_INT j, k;
vec_perm_builder sel (nunits, nunits, 1);
sel.quick_grow (nunits);
if (i == units_log2)
{
for (j = 0; j < nunits; ++j)
sel[j] = nunits - 1;
}
else
{
for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
sel[j] = nunits + j;
for (k = 0; j < nunits; ++j, ++k)
sel[j] = k;
}
vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
if (!can_vec_perm_const_p (vec_mode, indices))
goto fail; goto fail;
}
return true; return true;
} }
...@@ -6686,7 +6726,8 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, ...@@ -6686,7 +6726,8 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
unsigned HOST_WIDE_INT nunits; unsigned HOST_WIDE_INT nunits;
if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)) if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
gcc_unreachable (); gcc_unreachable ();
int units_log2 = exact_log2 (nunits); int use_whole_vector_p = 0;
int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector_p);
gcc_assert (units_log2 > 0); gcc_assert (units_log2 > 0);
auto_vec<tree, 16> perms; auto_vec<tree, 16> perms;
perms.quick_grow (units_log2 + 1); perms.quick_grow (units_log2 + 1);
...@@ -6696,21 +6737,25 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, ...@@ -6696,21 +6737,25 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
vec_perm_builder sel (nunits, nunits, 1); vec_perm_builder sel (nunits, nunits, 1);
sel.quick_grow (nunits); sel.quick_grow (nunits);
if (i == units_log2) if (i == units_log2)
{
for (j = 0; j < nunits; ++j) for (j = 0; j < nunits; ++j)
sel[j] = nunits - 1; sel[j] = nunits - 1;
}
else else
{ {
for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j) for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
sel[j] = nunits + j; sel[j] = j;
for (k = 0; j < nunits; ++j, ++k) for (k = 0; j < nunits; ++j, ++k)
sel[j] = k; sel[j] = nunits + k;
} }
vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits); vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
if (use_whole_vector_p && i < units_log2)
perms[i] = vect_gen_perm_mask_any (vectype, indices);
else
perms[i] = vect_gen_perm_mask_checked (vectype, indices); perms[i] = vect_gen_perm_mask_checked (vectype, indices);
} }
tree zero_vec = use_whole_vector_p ? build_zero_cst (vectype) : NULL_TREE;
tree masktype = (use_whole_vector_p == 2
? build_same_sized_truth_vector_type (vectype) : NULL_TREE);
stmt_vec_info prev_stmt_info = NULL; stmt_vec_info prev_stmt_info = NULL;
tree vec_oprnd1 = NULL_TREE; tree vec_oprnd1 = NULL_TREE;
tree vec_oprnd2 = NULL_TREE; tree vec_oprnd2 = NULL_TREE;
...@@ -6742,8 +6787,9 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, ...@@ -6742,8 +6787,9 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
for (int i = 0; i < units_log2; ++i) for (int i = 0; i < units_log2; ++i)
{ {
tree new_temp = make_ssa_name (vectype); tree new_temp = make_ssa_name (vectype);
gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR, v, gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
vec_oprnd1, perms[i]); zero_vec ? zero_vec : vec_oprnd1, v,
perms[i]);
new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi); new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
if (prev_stmt_info == NULL) if (prev_stmt_info == NULL)
STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info; STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
...@@ -6751,6 +6797,25 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, ...@@ -6751,6 +6797,25 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
prev_stmt_info = new_stmt_info; prev_stmt_info = new_stmt_info;
if (use_whole_vector_p == 2)
{
/* Whole vector shift shifted in zero bits, but if *init
is not initializer_zerop, we need to replace those elements
with elements from vec_oprnd1. */
tree_vector_builder vb (masktype, nunits, 1);
for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
vb.quick_push (k < (HOST_WIDE_INT_1U << i)
? boolean_false_node : boolean_true_node);
tree new_temp2 = make_ssa_name (vectype);
g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
new_temp, vec_oprnd1);
new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
prev_stmt_info = new_stmt_info;
new_temp = new_temp2;
}
tree new_temp2 = make_ssa_name (vectype); tree new_temp2 = make_ssa_name (vectype);
g = gimple_build_assign (new_temp2, code, v, new_temp); g = gimple_build_assign (new_temp2, code, v, new_temp);
new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi); new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment