Commit 6a03477e by Jakub Jelinek Committed by Jakub Jelinek

re PR target/85323 (SSE/AVX/AVX512 shift by 0 not optimized away)

	PR target/85323
	* config/i386/i386.c (ix86_fold_builtin): Fold shift builtins by
	vector.
	(ix86_gimple_fold_builtin): Likewise.

	* gcc.target/i386/pr85323-4.c: New test.
	* gcc.target/i386/pr85323-5.c: New test.
	* gcc.target/i386/pr85323-6.c: New test.

From-SVN: r260312
parent 28a8a768
2018-05-17 Jakub Jelinek <jakub@redhat.com> 2018-05-17 Jakub Jelinek <jakub@redhat.com>
PR target/85323 PR target/85323
* config/i386/i386.c (ix86_fold_builtin): Fold shift builtins by
vector.
(ix86_gimple_fold_builtin): Likewise.
PR target/85323
* config/i386/i386.c: Include tree-vector-builder.h. * config/i386/i386.c: Include tree-vector-builder.h.
(ix86_vector_shift_count): New function. (ix86_vector_shift_count): New function.
(ix86_fold_builtin): Fold shift builtins by scalar count. (ix86_fold_builtin): Fold shift builtins by scalar count.
......
...@@ -33397,6 +33397,7 @@ ix86_fold_builtin (tree fndecl, int n_args, ...@@ -33397,6 +33397,7 @@ ix86_fold_builtin (tree fndecl, int n_args,
enum ix86_builtins fn_code = (enum ix86_builtins) enum ix86_builtins fn_code = (enum ix86_builtins)
DECL_FUNCTION_CODE (fndecl); DECL_FUNCTION_CODE (fndecl);
enum rtx_code rcode; enum rtx_code rcode;
bool is_vshift;
switch (fn_code) switch (fn_code)
{ {
...@@ -33615,6 +33616,7 @@ ix86_fold_builtin (tree fndecl, int n_args, ...@@ -33615,6 +33616,7 @@ ix86_fold_builtin (tree fndecl, int n_args,
case IX86_BUILTIN_PSLLWI256_MASK: case IX86_BUILTIN_PSLLWI256_MASK:
case IX86_BUILTIN_PSLLWI512_MASK: case IX86_BUILTIN_PSLLWI512_MASK:
rcode = ASHIFT; rcode = ASHIFT;
is_vshift = false;
goto do_shift; goto do_shift;
case IX86_BUILTIN_PSRAD: case IX86_BUILTIN_PSRAD:
case IX86_BUILTIN_PSRAD128: case IX86_BUILTIN_PSRAD128:
...@@ -33647,6 +33649,7 @@ ix86_fold_builtin (tree fndecl, int n_args, ...@@ -33647,6 +33649,7 @@ ix86_fold_builtin (tree fndecl, int n_args,
case IX86_BUILTIN_PSRAWI256_MASK: case IX86_BUILTIN_PSRAWI256_MASK:
case IX86_BUILTIN_PSRAWI512: case IX86_BUILTIN_PSRAWI512:
rcode = ASHIFTRT; rcode = ASHIFTRT;
is_vshift = false;
goto do_shift; goto do_shift;
case IX86_BUILTIN_PSRLD: case IX86_BUILTIN_PSRLD:
case IX86_BUILTIN_PSRLD128: case IX86_BUILTIN_PSRLD128:
...@@ -33685,6 +33688,53 @@ ix86_fold_builtin (tree fndecl, int n_args, ...@@ -33685,6 +33688,53 @@ ix86_fold_builtin (tree fndecl, int n_args,
case IX86_BUILTIN_PSRLWI256_MASK: case IX86_BUILTIN_PSRLWI256_MASK:
case IX86_BUILTIN_PSRLWI512: case IX86_BUILTIN_PSRLWI512:
rcode = LSHIFTRT; rcode = LSHIFTRT;
is_vshift = false;
goto do_shift;
case IX86_BUILTIN_PSLLVV16HI:
case IX86_BUILTIN_PSLLVV16SI:
case IX86_BUILTIN_PSLLVV2DI:
case IX86_BUILTIN_PSLLVV2DI_MASK:
case IX86_BUILTIN_PSLLVV32HI:
case IX86_BUILTIN_PSLLVV4DI:
case IX86_BUILTIN_PSLLVV4DI_MASK:
case IX86_BUILTIN_PSLLVV4SI:
case IX86_BUILTIN_PSLLVV4SI_MASK:
case IX86_BUILTIN_PSLLVV8DI:
case IX86_BUILTIN_PSLLVV8HI:
case IX86_BUILTIN_PSLLVV8SI:
case IX86_BUILTIN_PSLLVV8SI_MASK:
rcode = ASHIFT;
is_vshift = true;
goto do_shift;
case IX86_BUILTIN_PSRAVQ128:
case IX86_BUILTIN_PSRAVQ256:
case IX86_BUILTIN_PSRAVV16HI:
case IX86_BUILTIN_PSRAVV16SI:
case IX86_BUILTIN_PSRAVV32HI:
case IX86_BUILTIN_PSRAVV4SI:
case IX86_BUILTIN_PSRAVV4SI_MASK:
case IX86_BUILTIN_PSRAVV8DI:
case IX86_BUILTIN_PSRAVV8HI:
case IX86_BUILTIN_PSRAVV8SI:
case IX86_BUILTIN_PSRAVV8SI_MASK:
rcode = ASHIFTRT;
is_vshift = true;
goto do_shift;
case IX86_BUILTIN_PSRLVV16HI:
case IX86_BUILTIN_PSRLVV16SI:
case IX86_BUILTIN_PSRLVV2DI:
case IX86_BUILTIN_PSRLVV2DI_MASK:
case IX86_BUILTIN_PSRLVV32HI:
case IX86_BUILTIN_PSRLVV4DI:
case IX86_BUILTIN_PSRLVV4DI_MASK:
case IX86_BUILTIN_PSRLVV4SI:
case IX86_BUILTIN_PSRLVV4SI_MASK:
case IX86_BUILTIN_PSRLVV8DI:
case IX86_BUILTIN_PSRLVV8HI:
case IX86_BUILTIN_PSRLVV8SI:
case IX86_BUILTIN_PSRLVV8SI_MASK:
rcode = LSHIFTRT;
is_vshift = true;
goto do_shift; goto do_shift;
do_shift: do_shift:
...@@ -33703,7 +33753,10 @@ ix86_fold_builtin (tree fndecl, int n_args, ...@@ -33703,7 +33753,10 @@ ix86_fold_builtin (tree fndecl, int n_args,
if ((mask | (HOST_WIDE_INT_M1U << elems)) != HOST_WIDE_INT_M1U) if ((mask | (HOST_WIDE_INT_M1U << elems)) != HOST_WIDE_INT_M1U)
break; break;
} }
if (tree tem = ix86_vector_shift_count (args[1])) if (is_vshift && TREE_CODE (args[1]) != VECTOR_CST)
break;
if (tree tem = (is_vshift ? integer_one_node
: ix86_vector_shift_count (args[1])))
{ {
unsigned HOST_WIDE_INT count = tree_to_uhwi (tem); unsigned HOST_WIDE_INT count = tree_to_uhwi (tem);
if (count == 0) if (count == 0)
...@@ -33714,7 +33767,9 @@ ix86_fold_builtin (tree fndecl, int n_args, ...@@ -33714,7 +33767,9 @@ ix86_fold_builtin (tree fndecl, int n_args,
return build_zero_cst (TREE_TYPE (args[0])); return build_zero_cst (TREE_TYPE (args[0]));
count = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (args[0]))) - 1; count = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (args[0]))) - 1;
} }
tree countt = build_int_cst (integer_type_node, count); tree countt = NULL_TREE;
if (!is_vshift)
countt = build_int_cst (integer_type_node, count);
tree_vector_builder builder; tree_vector_builder builder;
builder.new_unary_operation (TREE_TYPE (args[0]), args[0], builder.new_unary_operation (TREE_TYPE (args[0]), args[0],
false); false);
...@@ -33727,9 +33782,30 @@ ix86_fold_builtin (tree fndecl, int n_args, ...@@ -33727,9 +33782,30 @@ ix86_fold_builtin (tree fndecl, int n_args,
tree type = TREE_TYPE (elt); tree type = TREE_TYPE (elt);
if (rcode == LSHIFTRT) if (rcode == LSHIFTRT)
elt = fold_convert (unsigned_type_for (type), elt); elt = fold_convert (unsigned_type_for (type), elt);
if (is_vshift)
{
countt = VECTOR_CST_ELT (args[1], i);
if (TREE_CODE (countt) != INTEGER_CST
|| TREE_OVERFLOW (countt))
return NULL_TREE;
int prec
= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (args[0])));
if (wi::neg_p (wi::to_wide (countt))
|| wi::to_widest (countt) >= prec)
{
if (rcode == ASHIFTRT)
countt = build_int_cst (TREE_TYPE (countt),
prec - 1);
else
{
elt = build_zero_cst (TREE_TYPE (elt));
countt = build_zero_cst (TREE_TYPE (countt));
}
}
}
elt = const_binop (rcode == ASHIFT elt = const_binop (rcode == ASHIFT
? LSHIFT_EXPR : RSHIFT_EXPR, TREE_TYPE (elt), ? LSHIFT_EXPR : RSHIFT_EXPR,
elt, countt); TREE_TYPE (elt), elt, countt);
if (!elt || TREE_CODE (elt) != INTEGER_CST) if (!elt || TREE_CODE (elt) != INTEGER_CST)
return NULL_TREE; return NULL_TREE;
if (rcode == LSHIFTRT) if (rcode == LSHIFTRT)
...@@ -33767,6 +33843,7 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) ...@@ -33767,6 +33843,7 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
tree arg0, arg1; tree arg0, arg1;
enum rtx_code rcode; enum rtx_code rcode;
unsigned HOST_WIDE_INT count; unsigned HOST_WIDE_INT count;
bool is_vshift;
switch (fn_code) switch (fn_code)
{ {
...@@ -33883,6 +33960,7 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) ...@@ -33883,6 +33960,7 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
case IX86_BUILTIN_PSLLWI256_MASK: case IX86_BUILTIN_PSLLWI256_MASK:
case IX86_BUILTIN_PSLLWI512_MASK: case IX86_BUILTIN_PSLLWI512_MASK:
rcode = ASHIFT; rcode = ASHIFT;
is_vshift = false;
goto do_shift; goto do_shift;
case IX86_BUILTIN_PSRAD: case IX86_BUILTIN_PSRAD:
case IX86_BUILTIN_PSRAD128: case IX86_BUILTIN_PSRAD128:
...@@ -33915,6 +33993,7 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) ...@@ -33915,6 +33993,7 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
case IX86_BUILTIN_PSRAWI256_MASK: case IX86_BUILTIN_PSRAWI256_MASK:
case IX86_BUILTIN_PSRAWI512: case IX86_BUILTIN_PSRAWI512:
rcode = ASHIFTRT; rcode = ASHIFTRT;
is_vshift = false;
goto do_shift; goto do_shift;
case IX86_BUILTIN_PSRLD: case IX86_BUILTIN_PSRLD:
case IX86_BUILTIN_PSRLD128: case IX86_BUILTIN_PSRLD128:
...@@ -33953,6 +34032,53 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) ...@@ -33953,6 +34032,53 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
case IX86_BUILTIN_PSRLWI256_MASK: case IX86_BUILTIN_PSRLWI256_MASK:
case IX86_BUILTIN_PSRLWI512: case IX86_BUILTIN_PSRLWI512:
rcode = LSHIFTRT; rcode = LSHIFTRT;
is_vshift = false;
goto do_shift;
case IX86_BUILTIN_PSLLVV16HI:
case IX86_BUILTIN_PSLLVV16SI:
case IX86_BUILTIN_PSLLVV2DI:
case IX86_BUILTIN_PSLLVV2DI_MASK:
case IX86_BUILTIN_PSLLVV32HI:
case IX86_BUILTIN_PSLLVV4DI:
case IX86_BUILTIN_PSLLVV4DI_MASK:
case IX86_BUILTIN_PSLLVV4SI:
case IX86_BUILTIN_PSLLVV4SI_MASK:
case IX86_BUILTIN_PSLLVV8DI:
case IX86_BUILTIN_PSLLVV8HI:
case IX86_BUILTIN_PSLLVV8SI:
case IX86_BUILTIN_PSLLVV8SI_MASK:
rcode = ASHIFT;
is_vshift = true;
goto do_shift;
case IX86_BUILTIN_PSRAVQ128:
case IX86_BUILTIN_PSRAVQ256:
case IX86_BUILTIN_PSRAVV16HI:
case IX86_BUILTIN_PSRAVV16SI:
case IX86_BUILTIN_PSRAVV32HI:
case IX86_BUILTIN_PSRAVV4SI:
case IX86_BUILTIN_PSRAVV4SI_MASK:
case IX86_BUILTIN_PSRAVV8DI:
case IX86_BUILTIN_PSRAVV8HI:
case IX86_BUILTIN_PSRAVV8SI:
case IX86_BUILTIN_PSRAVV8SI_MASK:
rcode = ASHIFTRT;
is_vshift = true;
goto do_shift;
case IX86_BUILTIN_PSRLVV16HI:
case IX86_BUILTIN_PSRLVV16SI:
case IX86_BUILTIN_PSRLVV2DI:
case IX86_BUILTIN_PSRLVV2DI_MASK:
case IX86_BUILTIN_PSRLVV32HI:
case IX86_BUILTIN_PSRLVV4DI:
case IX86_BUILTIN_PSRLVV4DI_MASK:
case IX86_BUILTIN_PSRLVV4SI:
case IX86_BUILTIN_PSRLVV4SI_MASK:
case IX86_BUILTIN_PSRLVV8DI:
case IX86_BUILTIN_PSRLVV8HI:
case IX86_BUILTIN_PSRLVV8SI:
case IX86_BUILTIN_PSRLVV8SI_MASK:
rcode = LSHIFTRT;
is_vshift = true;
goto do_shift; goto do_shift;
do_shift: do_shift:
...@@ -33970,10 +34096,31 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) ...@@ -33970,10 +34096,31 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
if ((mask | (HOST_WIDE_INT_M1U << elems)) != HOST_WIDE_INT_M1U) if ((mask | (HOST_WIDE_INT_M1U << elems)) != HOST_WIDE_INT_M1U)
break; break;
} }
arg1 = ix86_vector_shift_count (arg1); if (is_vshift)
if (!arg1) {
break; if (TREE_CODE (arg1) != VECTOR_CST)
count = tree_to_uhwi (arg1); break;
count = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0)));
if (integer_zerop (arg1))
count = 0;
else if (rcode == ASHIFTRT)
break;
else
for (unsigned int i = 0; i < VECTOR_CST_NELTS (arg1); ++i)
{
tree elt = VECTOR_CST_ELT (arg1, i);
if (!wi::neg_p (wi::to_wide (elt))
&& wi::to_widest (elt) < count)
return false;
}
}
else
{
arg1 = ix86_vector_shift_count (arg1);
if (!arg1)
break;
count = tree_to_uhwi (arg1);
}
if (count == 0) if (count == 0)
{ {
/* Just return the first argument for shift by 0. */ /* Just return the first argument for shift by 0. */
2018-05-17 Jakub Jelinek <jakub@redhat.com> 2018-05-17 Jakub Jelinek <jakub@redhat.com>
PR target/85323 PR target/85323
* gcc.target/i386/pr85323-4.c: New test.
* gcc.target/i386/pr85323-5.c: New test.
* gcc.target/i386/pr85323-6.c: New test.
PR target/85323
* gcc.target/i386/pr85323-1.c: New test. * gcc.target/i386/pr85323-1.c: New test.
* gcc.target/i386/pr85323-2.c: New test. * gcc.target/i386/pr85323-2.c: New test.
* gcc.target/i386/pr85323-3.c: New test. * gcc.target/i386/pr85323-3.c: New test.
......
/* PR target/85323 */
/* { dg-do compile } */
/* { dg-options "-O2 -mavx512vl -mavx512bw -fdump-tree-optimized" } */
/* { dg-final { scan-tree-dump-times "return x_\[0-9]*.D.;" 3 "optimized" } } */
#include <x86intrin.h>
__m128i
foo (__m128i x)
{
x = _mm_sllv_epi64 (x, _mm_set1_epi32 (0));
x = _mm_sllv_epi32 (x, _mm_set1_epi32 (0));
x = _mm_sllv_epi16 (x, _mm_set1_epi32 (0));
x = _mm_srlv_epi64 (x, _mm_set1_epi32 (0));
x = _mm_srlv_epi32 (x, _mm_set1_epi32 (0));
x = _mm_srlv_epi16 (x, _mm_set1_epi32 (0));
x = _mm_srav_epi64 (x, _mm_set1_epi32 (0));
x = _mm_srav_epi32 (x, _mm_set1_epi32 (0));
x = _mm_srav_epi16 (x, _mm_set1_epi32 (0));
return x;
}
__m256i
bar (__m256i x)
{
x = _mm256_sllv_epi64 (x, _mm256_set1_epi32 (0));
x = _mm256_sllv_epi32 (x, _mm256_set1_epi32 (0));
x = _mm256_sllv_epi16 (x, _mm256_set1_epi32 (0));
x = _mm256_srlv_epi64 (x, _mm256_set1_epi32 (0));
x = _mm256_srlv_epi32 (x, _mm256_set1_epi32 (0));
x = _mm256_srlv_epi16 (x, _mm256_set1_epi32 (0));
x = _mm256_srav_epi64 (x, _mm256_set1_epi32 (0));
x = _mm256_srav_epi32 (x, _mm256_set1_epi32 (0));
x = _mm256_srav_epi16 (x, _mm256_set1_epi32 (0));
return x;
}
__m512i
baz (__m512i x)
{
x = _mm512_sllv_epi64 (x, _mm512_setzero_epi32 ());
x = _mm512_sllv_epi32 (x, _mm512_setzero_epi32 ());
x = _mm512_sllv_epi16 (x, _mm512_setzero_epi32 ());
x = _mm512_srlv_epi64 (x, _mm512_setzero_epi32 ());
x = _mm512_srlv_epi32 (x, _mm512_setzero_epi32 ());
x = _mm512_srlv_epi16 (x, _mm512_setzero_epi32 ());
x = _mm512_srav_epi64 (x, _mm512_setzero_epi32 ());
x = _mm512_srav_epi32 (x, _mm512_setzero_epi32 ());
x = _mm512_srav_epi16 (x, _mm512_setzero_epi32 ());
return x;
}
/* PR target/85323 */
/* { dg-do compile } */
/* { dg-options "-O2 -mavx512vl -mavx512bw -fdump-tree-optimized" } */
/* { dg-final { scan-tree-dump-times "= \{ 0, 0 \};" 6 "optimized" } } */
/* { dg-final { scan-tree-dump-times "= \{ 0, 0, 0, 0 \};" 6 "optimized" } } */
/* { dg-final { scan-tree-dump-times "= \{ 0, 0, 0, 0, 0, 0, 0, 0 \};" 6 "optimized" } } */
#include <x86intrin.h>
void
foo (__m128i x[6])
{
x[0] = _mm_sllv_epi64 (x[0], _mm_set_epi64x (64, 65));
x[1] = _mm_sllv_epi32 (x[1], _mm_set_epi32 (32, 33, 34, 32));
x[2] = _mm_sllv_epi16 (x[2], _mm_set_epi16 (16, 18, -16, -1, 19, 16, 18, 20));
x[3] = _mm_srlv_epi64 (x[3], _mm_set_epi64x (65, -1));
x[4] = _mm_srlv_epi32 (x[4], _mm_set_epi32 (33, 32, 39, -5));
x[5] = _mm_srlv_epi16 (x[5], _mm_set1_epi16 (17));
}
void
bar (__m256i x[6])
{
x[0] = _mm256_sllv_epi64 (x[0], _mm256_set_epi64x (64, 65, -2, 66));
x[1] = _mm256_sllv_epi32 (x[1], _mm256_set_epi32 (32, 32, 39, -4, -32, 98, 2048, 32));
x[2] = _mm256_sllv_epi16 (x[2], _mm256_set_epi16 (16, 32, 64, 128, 16, 16, 32, -5,
-1, -2, -3, 16, 17, 18, 19, 200));
x[3] = _mm256_srlv_epi64 (x[3], _mm256_set_epi64x (65, 64, -5, 64));
x[4] = _mm256_srlv_epi32 (x[4], _mm256_set_epi32 (33, 49, 2048, 32, -1, 32, 37, 16384));
x[5] = _mm256_srlv_epi16 (x[5], _mm256_set1_epi16 (17));
}
void
baz (__m512i x[6])
{
x[0] = _mm512_sllv_epi64 (x[0], _mm512_set_epi64 (64, 64, 69, -1, 2048, 64, 16348, -64));
x[1] = _mm512_sllv_epi32 (x[1], _mm512_set_epi32 (32, 33, 34, 35, 36, -32, -33, -34,
-1, -2, -3, -4, -5, -6, 32, 2048));
x[2] = _mm512_sllv_epi16 (x[2], _mm512_set_epi16 (16, 32, 64, 128, 16, 16, 32, -5,
-1, -2, -3, 16, 17, 18, 19, 200,
16, 19, 2048, 16, -2, -8, -19, 16,
-1, -2, -3, -4, -5, -6, -7, -8));
x[3] = _mm512_srlv_epi64 (x[3], _mm512_set_epi64 (65, 64, 69, 68, 64, 79, 2048, -1));
x[4] = _mm512_srlv_epi32 (x[4], _mm512_set_epi32 (32, 33, 34, 35, 36, -32, -33, -34,
-1, -2, -3, -4, -5, -6, 32, 2048));
x[5] = _mm512_srlv_epi16 (x[5], _mm512_set1_epi16 (17));
}
/* PR target/85323 */
/* { dg-do compile } */
/* { dg-options "-O2 -mavx512vl -mavx512bw -fdump-tree-optimized" } */
#include <x86intrin.h>
struct S1 { __m128i a, b, c, d, e, f, g, h, i; } s1;
struct S2 { __m256i a, b, c, d, e, f, g, h, i; } s2;
struct S3 { __m512i a, b, c, d, e, f, g, h, i; } s3;
/* { dg-final { scan-tree-dump-times "s1.a = \{ -4342213319840130048, -761680639942076944 \};" 1 "optimized" } } */
/* { dg-final { scan-tree-dump-times "s1.b = \{ 16777216, 149499221639168 \};" 1 "optimized" } } */
/* { dg-final { scan-tree-dump-times "s1.c = \{ 2623346860254860648, -763360136839241728 \};" 1 "optimized" } } */
/* { dg-final { scan-tree-dump-times "s1.d = \{ 35871495301330685, 2005711373062887255 \};" 1 "optimized" } } */
/* { dg-final { scan-tree-dump-times "s1.e = \{ 128, 1729384589077512192 \};" 1 "optimized" } } */
/* { dg-final { scan-tree-dump-times "s1.f = \{ 655836773112359254, 2005509209063424011 \};" 1 "optimized" } } */
/* { dg-final { scan-tree-dump-times "s1.g = \{ -157301717633283, -300131636150806697 \};" 1 "optimized" } } */
/* { dg-final { scan-tree-dump-times "s1.h = \{ -128, -576458420136181760 \};" 1 "optimized" } } */
/* { dg-final { scan-tree-dump-times "s1.i = \{ 655836777273157974, -300052325173559301 \};" 1 "optimized" } } */
void
foo (void)
{
__m128i a = _mm_set_epi64x (0xdeadbeefcafebabeULL, 0xfee1deadfeedfaceULL);
__m128i b = _mm_set_epi64x (3, 9);
__m128i c = _mm_set_epi32 (0xc00010ffU, 0x12345678U, 0xfedcba98U, 0x80000001U);
__m128i d = _mm_set_epi32 (3, 32, -6, 24);
__m128i e = _mm_set_epi16 (0xdead, 0xbeef, 0xcafe, 0xbabe,
0x1234, 0x0012, 0x8001, 0xa55a);
__m128i f = _mm_set_epi16 (3, 16, -1, 12, 1, 0, 5, 2);
s1.a = _mm_sllv_epi64 (a, b);
s1.b = _mm_sllv_epi32 (c, d);
s1.c = _mm_sllv_epi16 (e, f);
s1.d = _mm_srlv_epi64 (a, b);
s1.e = _mm_srlv_epi32 (c, d);
s1.f = _mm_srlv_epi16 (e, f);
s1.g = _mm_srav_epi64 (a, b);
s1.h = _mm_srav_epi32 (c, d);
s1.i = _mm_srav_epi16 (e, f);
}
/* { dg-final { scan-tree-dump-times "s2.a = \{ 6722813395751927808, 0, 0, -1523361279884153888 \};" 1 "optimized" } } */
/* { dg-final { scan-tree-dump-times "s2.b = \{ 9177596069264525312, 1851607040, -81985531201716224, 76543602090093808 \};" 1 "optimized" } } */
/* { dg-final { scan-tree-dump-times "s2.c = \{ 1008895103428722688, -5985166321598332416, 2623346860254860648, -763360136839241728 \};" 1 "optimized" } } */
/* { dg-final { scan-tree-dump-times "s2.d = \{ 2189249818860, 0, 0, 1002855686531443627 \};" 1 "optimized" } } */
/* { dg-final { scan-tree-dump-times "s2.e = \{ 114276044520956448, 130489, -81985531201716224, 3377704168205116 \};" 1 "optimized" } } */
/* { dg-final { scan-tree-dump-times "s2.f = \{ 289076540546023424, 3115407575762206978, 655836773112359254, 2005509209063424011 \};" 1 "optimized" } } */
/* { dg-final { scan-tree-dump-times "s2.g = \{ 2189249818860, 0, -1, -150065818075403349 \};" 1 "optimized" } } */
/* { dg-final { scan-tree-dump-times "s2.h = \{ -29839143554899424, -4294836807, -81985526906748929, -1125895459165380 \};" 1 "optimized" } } */
/* { dg-final { scan-tree-dump-times "s2.i = \{ -287384211757400064, 3115618685752836354, 655836777273157974, -300052325173559301 \};" 1 "optimized" } } */
void
bar (void)
{
__m256i a = _mm256_set_epi64x (0xdeadbeefcafebabeULL, 0xfee1deadfeedfaceULL,
0x123456789abcdef0ULL, 0x0fedcba987654321ULL);
__m256i b = _mm256_set_epi64x (4, 65, -2, 19);
__m256i c = _mm256_set_epi32 (0xc00010ffU, 0x12345678U, 0xfedcba98U, 0x80000001U,
0xdeadbeefU, 0x0fedcba9U, 0xcafebabeU, 0x00111100U);
__m256i d = _mm256_set_epi32 (12, 1, 0, -2, 32, 11, 7, 3);
__m256i e = _mm256_set_epi16 (0xdead, 0xbeef, 0xcafe, 0xbabe,
0x1234, 0x0012, 0x8001, 0xa55a,
0x5678, 0x9abc, 0xdef0, 0x1020,
0x8070, 0x6543, 0x129f, 0);
__m256i f = _mm256_set_epi16 (3, 16, -1, 12, 1, 0, 5, 2, 1, 2, 3, 4, 5, 6, 7, 8);
s2.a = _mm256_sllv_epi64 (a, b);
s2.b = _mm256_sllv_epi32 (c, d);
s2.c = _mm256_sllv_epi16 (e, f);
s2.d = _mm256_srlv_epi64 (a, b);
s2.e = _mm256_srlv_epi32 (c, d);
s2.f = _mm256_srlv_epi16 (e, f);
s2.g = _mm256_srav_epi64 (a, b);
s2.h = _mm256_srav_epi32 (c, d);
s2.i = _mm256_srav_epi16 (e, f);
}
/* { dg-final { scan-tree-dump-times "s3.a = \{ 6592671264835730432, 5247073869855161280, 1147797409030816545, -161076958856481380, 6722813395751927808, 0, 0, -1523361279884153888 \};" 1 "optimized" } } */
/* { dg-final { scan-tree-dump-times "s3.b = \{ -4611667331015735296, 6592669523180452796, 2541551364173987968, 1068969636, 9177596069264525312, 1851607040, -81985531201716224, 76543602090093808 \};" 1 "optimized" } } */
/* { dg-final { scan-tree-dump-times "s3.c = \{ 6233191819462621886, 8070591269736295416, 8610979175836155904, 40534596407293308, 1008895103428722688, -5985166321598332416, 2623346860254860648, -763360136839241728 \};" 1 "optimized" } } */
/* { dg-final { scan-tree-dump-times "s3.d = \{ 31339240204107613, 327942116865947580, 1147797409030816545, 9183102797140655463, 2189249818860, 0, 0, 1002855686531443627 \};" 1 "optimized" } } */
/* { dg-final { scan-tree-dump-times "s3.e = \{ -4611667331024543200, 31339239126560699, 81985526923526144, 66810602, 114276044520956448, 130489, -81985531201716224, 3377704168205116 \};" 1 "optimized" } } */
/* { dg-final { scan-tree-dump-times "s3.f = \{ 125466298768407230, 36028797018976959, 107269861939347456, 563225682730335, 289076540546023424, 3115407575762206978, 655836773112359254, 2005509209063424011 \};" 1 "optimized" } } */
/* { dg-final { scan-tree-dump-times "s3.g = \{ -4689556814856355, 327942116865947580, 1147797409030816545, -40269239714120345, 2189249818860, 0, -1, -150065818075403349 \};" 1 "optimized" } } */
/* { dg-final { scan-tree-dump-times "s3.h = \{ -4611667331024543200, -4689554671177797, 81985531184939008, 66810602, -29839143554899424, -4294836807, -81985526906748929, -1125895459165380 \};" 1 "optimized" } } */
/* { dg-final { scan-tree-dump-times "s3.i = \{ -18648885549352258, -36028797018901825, -36599031236919297, 844154124885343, -287384211757400064, 3115618685752836354, 655836777273157974, -300052325173559301 \};" 1 "optimized" } } */
void
baz (void)
{
__m512i a = _mm512_set_epi64 (0xdeadbeefcafebabeULL, 0xfee1deadfeedfaceULL,
0x123456789abcdef0ULL, 0x0fedcba987654321ULL,
0xfee1deadfeedfaceULL, 0x0fedcba987654321ULL,
0x123456789abcdef0ULL, 0xdeadbeefcafebabeULL);
__m512i b = _mm512_set_epi64 (4, 65, -2, 19, 1, 0, 2, 9);
__m512i c = _mm512_set_epi32 (0xc00010ffU, 0x12345678U, 0xfedcba98U, 0x80000001U,
0xdeadbeefU, 0x0fedcba9U, 0xcafebabeU, 0x00111100U,
0, 0x0fedcba9U, 0x12345678U, 0x80000001U,
0xdeadbeefU, 0xdeadbeefU, 0xc00010ffU, 0x00111100U);
__m512i d = _mm512_set_epi32 (12, 1, 0, -2, 32, 11, 7, 3, 1, 2, 4, 7, 9, 2, 0, 3);
__m512i e = _mm512_set_epi16 (0xdead, 0xbeef, 0xcafe, 0xbabe,
0x1234, 0x0012, 0x8001, 0xa55a,
0x5678, 0x9abc, 0xdef0, 0x1020,
0x8070, 0x6543, 0x129f, 0,
0x0012, 0x8001, 0xcafe, 0xbabe,
0xbeef, 0xcafe, 0x9abc, 0xdef0,
0x8070, 0x6543, 0x129f, 0xcafe,
0xdead, 0xbeef, 0xcafe, 0xbabe);
__m512i f = _mm512_set_epi16 (3, 16, -1, 12, 1, 0, 5, 2, 1, 2, 3, 4, 5, 6, 7, 8,
3, 9, 2, 1, 7, 3, -12, 26, 8, 15, 17, 2, 7, 0, 3, 0);
s3.a = _mm512_sllv_epi64 (a, b);
s3.b = _mm512_sllv_epi32 (c, d);
s3.c = _mm512_sllv_epi16 (e, f);
s3.d = _mm512_srlv_epi64 (a, b);
s3.e = _mm512_srlv_epi32 (c, d);
s3.f = _mm512_srlv_epi16 (e, f);
s3.g = _mm512_srav_epi64 (a, b);
s3.h = _mm512_srav_epi32 (c, d);
s3.i = _mm512_srav_epi16 (e, f);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment