Commit 0b1c4b83 by Jakub Jelinek Committed by Jakub Jelinek

re PR target/88152 (optimize SSE & AVX char compares with subsequent movmskb)

	PR target/88152
	* config/i386/sse.md (*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_lt,
	*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext_lt,
	*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_shift,
	*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext_shift,
	*<sse2_avx2>_pmovmskb_lt, *<sse2_avx2>_pmovmskb_zext_lt): New
	define_insn_and_split patterns.

	* g++.target/i386/pr88152.C: New test.

From-SVN: r266649
parent fb9e6a4b
2018-11-29 Jakub Jelinek <jakub@redhat.com>
PR target/88152
* config/i386/sse.md (*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_lt,
*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext_lt,
*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_shift,
*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext_shift,
*<sse2_avx2>_pmovmskb_lt, *<sse2_avx2>_pmovmskb_zext_lt): New
define_insn_and_split patterns.
PR target/54700
* config/i386/sse.md
(*<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>_lt,
......@@ -14653,6 +14653,78 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "<MODE>")])
(define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_lt"
[(set (match_operand:SI 0 "register_operand" "=r")
(unspec:SI
[(lt:VF_128_256
(match_operand:<sseintvecmode> 1 "register_operand" "x")
(match_operand:<sseintvecmode> 2 "const0_operand" "C"))]
UNSPEC_MOVMSK))]
"TARGET_SSE"
"#"
"&& reload_completed"
[(set (match_dup 0)
(unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))]
"operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
[(set_attr "type" "ssemov")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "<MODE>")])
(define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext_lt"
[(set (match_operand:DI 0 "register_operand" "=r")
(zero_extend:DI
(unspec:SI
[(lt:VF_128_256
(match_operand:<sseintvecmode> 1 "register_operand" "x")
(match_operand:<sseintvecmode> 2 "const0_operand" "C"))]
UNSPEC_MOVMSK)))]
"TARGET_64BIT && TARGET_SSE"
"#"
"&& reload_completed"
[(set (match_dup 0)
(zero_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
"operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
[(set_attr "type" "ssemov")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "<MODE>")])
(define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_shift"
[(set (match_operand:SI 0 "register_operand" "=r")
(unspec:SI
[(subreg:VF_128_256
(ashiftrt:<sseintvecmode>
(match_operand:<sseintvecmode> 1 "register_operand" "x")
(match_operand:QI 2 "const_int_operand" "n")) 0)]
UNSPEC_MOVMSK))]
"TARGET_SSE"
"#"
"&& reload_completed"
[(set (match_dup 0)
(unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))]
"operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
[(set_attr "type" "ssemov")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "<MODE>")])
(define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext_shift"
[(set (match_operand:DI 0 "register_operand" "=r")
(zero_extend:DI
(unspec:SI
[(subreg:VF_128_256
(ashiftrt:<sseintvecmode>
(match_operand:<sseintvecmode> 1 "register_operand" "x")
(match_operand:QI 2 "const_int_operand" "n")) 0)]
UNSPEC_MOVMSK)))]
"TARGET_64BIT && TARGET_SSE"
"#"
"&& reload_completed"
[(set (match_dup 0)
(zero_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
"operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
[(set_attr "type" "ssemov")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "<MODE>")])
(define_insn "<sse2_avx2>_pmovmskb"
[(set (match_operand:SI 0 "register_operand" "=r")
(unspec:SI
......@@ -14686,6 +14758,49 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "SI")])
(define_insn_and_split "*<sse2_avx2>_pmovmskb_lt"
[(set (match_operand:SI 0 "register_operand" "=r")
(unspec:SI
[(lt:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand" "x")
(match_operand:VI1_AVX2 2 "const0_operand" "C"))]
UNSPEC_MOVMSK))]
"TARGET_SSE2"
"#"
""
[(set (match_dup 0)
(unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))]
""
[(set_attr "type" "ssemov")
(set (attr "prefix_data16")
(if_then_else
(match_test "TARGET_AVX")
(const_string "*")
(const_string "1")))
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "SI")])
(define_insn_and_split "*<sse2_avx2>_pmovmskb_zext_lt"
[(set (match_operand:DI 0 "register_operand" "=r")
(zero_extend:DI
(unspec:SI
[(lt:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand" "x")
(match_operand:VI1_AVX2 2 "const0_operand" "C"))]
UNSPEC_MOVMSK)))]
"TARGET_64BIT && TARGET_SSE2"
"#"
""
[(set (match_dup 0)
(zero_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
""
[(set_attr "type" "ssemov")
(set (attr "prefix_data16")
(if_then_else
(match_test "TARGET_AVX")
(const_string "*")
(const_string "1")))
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "SI")])
(define_expand "sse2_maskmovdqu"
[(set (match_operand:V16QI 0 "memory_operand")
(unspec:V16QI [(match_operand:V16QI 1 "register_operand")
......
2018-11-29 Jakub Jelinek <jakub@redhat.com>
PR target/88152
* g++.target/i386/pr88152.C: New test.
2018-11-29 Vladimir Makarov <vmakarov@redhat.com>
* gcc.target/i386/pr34256.c: Adjust the number of expected moves.
......
// PR target/88152
// { dg-do compile }
// { dg-options "-O2 -mavx2 -std=c++11" }
// { dg-final { scan-assembler-times "vpmovmskb\[^\n\r]*xmm" 6 } }
// { dg-final { scan-assembler-times "vpmovmskb\[^\n\r]*ymm" 6 } }
// { dg-final { scan-assembler-times "vmovmskps\[^\n\r]*xmm" 4 } }
// { dg-final { scan-assembler-times "vmovmskps\[^\n\r]*ymm" 4 } }
// { dg-final { scan-assembler-times "vmovmskpd\[^\n\r]*xmm" 4 } }
// { dg-final { scan-assembler-times "vmovmskpd\[^\n\r]*ymm" 4 } }
// { dg-final { scan-assembler-not "vpcmpgt|vpcmpeq|vpsra" } }
#include <x86intrin.h>
template <typename T, size_t N>
using V [[gnu::vector_size(N)]] = T;
int f0 (V<unsigned char, 16> a) { return _mm_movemask_epi8 (reinterpret_cast<__m128i> (a > 0x7f)); }
long int f1 (V<unsigned char, 16> a) { return (unsigned) _mm_movemask_epi8 (reinterpret_cast<__m128i> (a >= 0x80)); }
long int f2 (V<signed char, 16> a) { return (unsigned) _mm_movemask_epi8 (reinterpret_cast<__m128i> (a < 0)); }
int f3 (V<signed char, 16> a) { return _mm_movemask_epi8 (reinterpret_cast<__m128i> (a <= -1)); }
int f4 (V<char, 16> a) { return _mm_movemask_epi8 (reinterpret_cast<__m128i> (a < 0)); }
long int f5 (V<char, 16> a) { return (unsigned) _mm_movemask_epi8 (reinterpret_cast<__m128i> (a <= -1)); }
int f6 (V<unsigned int, 16> a) { return _mm_movemask_ps (reinterpret_cast<__m128> (a > __INT_MAX__)); }
int f7 (V<unsigned int, 16> a) { return _mm_movemask_ps (reinterpret_cast<__m128> (a >= 1U + __INT_MAX__)); }
int f8 (V<int, 16> a) { return _mm_movemask_ps (reinterpret_cast<__m128> (a < 0)); }
int f9 (V<int, 16> a) { return _mm_movemask_ps (reinterpret_cast<__m128> (a <= -1)); }
int f10 (V<unsigned long long, 16> a) { return _mm_movemask_pd (reinterpret_cast<__m128d> (a > __LONG_LONG_MAX__)); }
int f11 (V<unsigned long long, 16> a) { return _mm_movemask_pd (reinterpret_cast<__m128d> (a >= 1ULL + __LONG_LONG_MAX__)); }
long int f12 (V<long long, 16> a) { return (unsigned) _mm_movemask_pd (reinterpret_cast<__m128d> (a < 0)); }
int f13 (V<long long, 16> a) { return _mm_movemask_pd (reinterpret_cast<__m128d> (a <= -1)); }
int f14 (V<unsigned char, 32> a) { return _mm256_movemask_epi8 (reinterpret_cast<__m256i> (a > 0x7f)); }
int f15 (V<unsigned char, 32> a) { return _mm256_movemask_epi8 (reinterpret_cast<__m256i> (a >= 0x80)); }
long int f16 (V<signed char, 32> a) { return (unsigned) _mm256_movemask_epi8 (reinterpret_cast<__m256i> (a < 0)); }
int f17 (V<signed char, 32> a) { return _mm256_movemask_epi8 (reinterpret_cast<__m256i> (a <= -1)); }
int f18 (V<char, 32> a) { return _mm256_movemask_epi8 (reinterpret_cast<__m256i> (a < 0)); }
int f19 (V<char, 32> a) { return _mm256_movemask_epi8 (reinterpret_cast<__m256i> (a <= -1)); }
long int f20 (V<unsigned int, 32> a) { return (unsigned) _mm256_movemask_ps (reinterpret_cast<__m256> (a > __INT_MAX__)); }
int f21 (V<unsigned int, 32> a) { return _mm256_movemask_ps (reinterpret_cast<__m256> (a >= 1U + __INT_MAX__)); }
int f22 (V<int, 32> a) { return _mm256_movemask_ps (reinterpret_cast<__m256> (a < 0)); }
int f23 (V<int, 32> a) { return _mm256_movemask_ps (reinterpret_cast<__m256> (a <= -1)); }
int f24 (V<unsigned long long, 32> a) { return _mm256_movemask_pd (reinterpret_cast<__m256d> (a > __LONG_LONG_MAX__)); }
int f25 (V<unsigned long long, 32> a) { return _mm256_movemask_pd (reinterpret_cast<__m256d> (a >= 1ULL + __LONG_LONG_MAX__)); }
int f26 (V<long long, 32> a) { return _mm256_movemask_pd (reinterpret_cast<__m256d> (a < 0)); }
long int f27 (V<long long, 32> a) { return (unsigned) _mm256_movemask_pd (reinterpret_cast<__m256d> (a <= -1)); }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment