Commit 28a8a768 by Jakub Jelinek Committed by Jakub Jelinek

re PR target/85323 (SSE/AVX/AVX512 shift by 0 not optimized away)

	PR target/85323
	* config/i386/i386.c: Include tree-vector-builder.h.
	(ix86_vector_shift_count): New function.
	(ix86_fold_builtin): Fold shift builtins by scalar count.
	(ix86_gimple_fold_builtin): Likewise.

	* gcc.target/i386/pr85323-1.c: New test.
	* gcc.target/i386/pr85323-2.c: New test.
	* gcc.target/i386/pr85323-3.c: New test.

From-SVN: r260311
parent 4e6a811f
2018-05-17 Jakub Jelinek <jakub@redhat.com>
PR target/85323
* config/i386/i386.c: Include tree-vector-builder.h.
(ix86_vector_shift_count): New function.
(ix86_fold_builtin): Fold shift builtins by scalar count.
(ix86_gimple_fold_builtin): Likewise.
* config/i386/avx512fintrin.h (_mm512_set_epi16, _mm512_set_epi8,
_mm512_setzero): New intrinsics.
......
2018-05-17 Jakub Jelinek <jakub@redhat.com>
PR target/85323
* gcc.target/i386/pr85323-1.c: New test.
* gcc.target/i386/pr85323-2.c: New test.
* gcc.target/i386/pr85323-3.c: New test.
* gcc.target/i386/avx512f-set-v32hi-1.c: New test.
* gcc.target/i386/avx512f-set-v32hi-2.c: New test.
* gcc.target/i386/avx512f-set-v32hi-3.c: New test.
......
/* PR target/85323 */
/* { dg-do compile } */
/* { dg-options "-O2 -mavx512vl -mavx512bw -fdump-tree-optimized" } */
/* { dg-final { scan-tree-dump-times "return x_\[0-9]*.D.;" 3 "optimized" } } */
#include <x86intrin.h>
__m128i
foo (__m128i x)
{
x = _mm_sll_epi64 (x, _mm_set1_epi32 (0));
x = _mm_sll_epi32 (x, _mm_set1_epi32 (0));
x = _mm_sll_epi16 (x, _mm_set1_epi32 (0));
x = _mm_srl_epi64 (x, _mm_set1_epi32 (0));
x = _mm_srl_epi32 (x, _mm_set1_epi32 (0));
x = _mm_srl_epi16 (x, _mm_set1_epi32 (0));
x = _mm_sra_epi64 (x, _mm_set1_epi32 (0));
x = _mm_sra_epi32 (x, _mm_set1_epi32 (0));
x = _mm_sra_epi16 (x, _mm_set1_epi32 (0));
x = _mm_slli_epi64 (x, 0);
x = _mm_slli_epi32 (x, 0);
x = _mm_slli_epi16 (x, 0);
x = _mm_srli_epi64 (x, 0);
x = _mm_srli_epi32 (x, 0);
x = _mm_srli_epi16 (x, 0);
x = _mm_srai_epi64 (x, 0);
x = _mm_srai_epi32 (x, 0);
x = _mm_srai_epi16 (x, 0);
return x;
}
__m256i
bar (__m256i x)
{
x = _mm256_sll_epi64 (x, _mm_set1_epi32 (0));
x = _mm256_sll_epi32 (x, _mm_set1_epi32 (0));
x = _mm256_sll_epi16 (x, _mm_set1_epi32 (0));
x = _mm256_srl_epi64 (x, _mm_set1_epi32 (0));
x = _mm256_srl_epi32 (x, _mm_set1_epi32 (0));
x = _mm256_srl_epi16 (x, _mm_set1_epi32 (0));
x = _mm256_sra_epi64 (x, _mm_set1_epi32 (0));
x = _mm256_sra_epi32 (x, _mm_set1_epi32 (0));
x = _mm256_sra_epi16 (x, _mm_set1_epi32 (0));
x = _mm256_slli_epi64 (x, 0);
x = _mm256_slli_epi32 (x, 0);
x = _mm256_slli_epi16 (x, 0);
x = _mm256_srli_epi64 (x, 0);
x = _mm256_srli_epi32 (x, 0);
x = _mm256_srli_epi16 (x, 0);
x = _mm256_srai_epi64 (x, 0);
x = _mm256_srai_epi32 (x, 0);
x = _mm256_srai_epi16 (x, 0);
return x;
}
__m512i
baz (__m512i x)
{
x = _mm512_sll_epi64 (x, _mm_set1_epi32 (0));
x = _mm512_sll_epi32 (x, _mm_set1_epi32 (0));
x = _mm512_sll_epi16 (x, _mm_set1_epi32 (0));
x = _mm512_srl_epi64 (x, _mm_set1_epi32 (0));
x = _mm512_srl_epi32 (x, _mm_set1_epi32 (0));
x = _mm512_srl_epi16 (x, _mm_set1_epi32 (0));
x = _mm512_sra_epi64 (x, _mm_set1_epi32 (0));
x = _mm512_sra_epi32 (x, _mm_set1_epi32 (0));
x = _mm512_sra_epi16 (x, _mm_set1_epi32 (0));
x = _mm512_slli_epi64 (x, 0);
x = _mm512_slli_epi32 (x, 0);
x = _mm512_slli_epi16 (x, 0);
x = _mm512_srli_epi64 (x, 0);
x = _mm512_srli_epi32 (x, 0);
x = _mm512_srli_epi16 (x, 0);
x = _mm512_srai_epi64 (x, 0);
x = _mm512_srai_epi32 (x, 0);
x = _mm512_srai_epi16 (x, 0);
return x;
}
/* PR target/85323 */
/* { dg-do compile } */
/* { dg-options "-O2 -mavx512vl -mavx512bw -fdump-tree-optimized" } */
/* { dg-final { scan-tree-dump-times "= \{ 0, 0 \};" 12 "optimized" } } */
/* { dg-final { scan-tree-dump-times "= \{ 0, 0, 0, 0 \};" 12 "optimized" } } */
/* { dg-final { scan-tree-dump-times "= \{ 0, 0, 0, 0, 0, 0, 0, 0 \};" 12 "optimized" } } */
#include <x86intrin.h>
void
foo (__m128i x[12])
{
x[0] = _mm_sll_epi64 (x[0], _mm_set1_epi64x (64));
x[1] = _mm_sll_epi32 (x[1], _mm_set1_epi64x (32));
x[2] = _mm_sll_epi16 (x[2], _mm_set1_epi64x (16));
x[3] = _mm_srl_epi64 (x[3], _mm_set1_epi64x (65));
x[4] = _mm_srl_epi32 (x[4], _mm_set1_epi64x (33));
x[5] = _mm_srl_epi16 (x[5], _mm_set1_epi64x (17));
x[6] = _mm_slli_epi64 (x[6], 66);
x[7] = _mm_slli_epi32 (x[7], 34);
x[8] = _mm_slli_epi16 (x[8], 18);
x[9] = _mm_srli_epi64 (x[9], 67);
x[10] = _mm_srli_epi32 (x[10], 35);
x[11] = _mm_srli_epi16 (x[11], 19);
}
void
bar (__m256i x[12])
{
x[0] = _mm256_sll_epi64 (x[0], _mm_set1_epi64x (64));
x[1] = _mm256_sll_epi32 (x[1], _mm_set1_epi64x (32));
x[2] = _mm256_sll_epi16 (x[2], _mm_set1_epi64x (16));
x[3] = _mm256_srl_epi64 (x[3], _mm_set1_epi64x (65));
x[4] = _mm256_srl_epi32 (x[4], _mm_set1_epi64x (33));
x[5] = _mm256_srl_epi16 (x[5], _mm_set1_epi64x (17));
x[6] = _mm256_slli_epi64 (x[6], 66);
x[7] = _mm256_slli_epi32 (x[7], 34);
x[8] = _mm256_slli_epi16 (x[8], 18);
x[9] = _mm256_srli_epi64 (x[9], 67);
x[10] = _mm256_srli_epi32 (x[10], 35);
x[11] = _mm256_srli_epi16 (x[11], 19);
}
void
baz (__m512i x[12])
{
x[0] = _mm512_sll_epi64 (x[0], _mm_set1_epi64x (64));
x[1] = _mm512_sll_epi32 (x[1], _mm_set1_epi64x (32));
x[2] = _mm512_sll_epi16 (x[2], _mm_set1_epi64x (16));
x[3] = _mm512_srl_epi64 (x[3], _mm_set1_epi64x (65));
x[4] = _mm512_srl_epi32 (x[4], _mm_set1_epi64x (33));
x[5] = _mm512_srl_epi16 (x[5], _mm_set1_epi64x (17));
x[6] = _mm512_slli_epi64 (x[6], 66);
x[7] = _mm512_slli_epi32 (x[7], 34);
x[8] = _mm512_slli_epi16 (x[8], 18);
x[9] = _mm512_srli_epi64 (x[9], 67);
x[10] = _mm512_srli_epi32 (x[10], 35);
x[11] = _mm512_srli_epi16 (x[11], 19);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment