Commit d37c81f4 by Jakub Jelinek

i386: Optimize {,v}{,p}movmsk{b,ps,pd} followed by sign extension [PR91824]

Some time ago, patterns were added to optimize move mask followed by zero
extension from 32 bits to 64 bit.  As the testcase shows, the intrinsics
actually return int, not unsigned int, so it will happen quite often that
one actually needs sign extension instead of zero extension.  Except for
vpmovmskb with 256-bit operand, sign vs. zero extension doesn't make a
difference, as we know the bit 31 will not be set (the source will have 2 or
4 doubles, 4 or 8 floats or 16 or 32 chars).
So, for the floating point patterns, this patch just uses a code iterator
so that we handle both zero extend and sign extend, and for the byte one
adds a separate pattern for the 128-bit operand.

2020-01-30  Jakub Jelinek  <jakub@redhat.com>

	PR target/91824
	* config/i386/sse.md
	(*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext): Renamed to ...
	(*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext): ... this.  Use
	any_extend code iterator instead of always zero_extend.
	(*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext_lt): Renamed to ...
	(*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_lt): ... this.
	Use any_extend code iterator instead of always zero_extend.
	(*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext_shift): Renamed to ...
	(*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_shift): ... this.
	Use any_extend code iterator instead of always zero_extend.
	(*sse2_pmovmskb_ext): New define_insn.
	(*sse2_pmovmskb_ext_lt): New define_insn_and_split.

	* gcc.target/i386/pr91824-2.c: New test.
parent b285bebe
2020-01-30 Jakub Jelinek <jakub@redhat.com> 2020-01-30 Jakub Jelinek <jakub@redhat.com>
PR target/91824 PR target/91824
* config/i386/sse.md
(*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext): Renamed to ...
(*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext): ... this. Use
any_extend code iterator instead of always zero_extend.
(*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext_lt): Renamed to ...
(*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_lt): ... this.
Use any_extend code iterator instead of always zero_extend.
(*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext_shift): Renamed to ...
(*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_shift): ... this.
Use any_extend code iterator instead of always zero_extend.
(*sse2_pmovmskb_ext): New define_insn.
(*sse2_pmovmskb_ext_lt): New define_insn_and_split.
PR target/91824
* config/i386/i386.md (*popcountsi2_zext): New define_insn_and_split. * config/i386/i386.md (*popcountsi2_zext): New define_insn_and_split.
(*popcountsi2_zext_falsedep): New define_insn. (*popcountsi2_zext_falsedep): New define_insn.
......
...@@ -15815,9 +15815,9 @@ ...@@ -15815,9 +15815,9 @@
(set_attr "prefix" "maybe_vex") (set_attr "prefix" "maybe_vex")
(set_attr "mode" "<MODE>")]) (set_attr "mode" "<MODE>")])
(define_insn "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext" (define_insn "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext"
[(set (match_operand:DI 0 "register_operand" "=r") [(set (match_operand:DI 0 "register_operand" "=r")
(zero_extend:DI (any_extend:DI
(unspec:SI (unspec:SI
[(match_operand:VF_128_256 1 "register_operand" "x")] [(match_operand:VF_128_256 1 "register_operand" "x")]
UNSPEC_MOVMSK)))] UNSPEC_MOVMSK)))]
...@@ -15844,9 +15844,9 @@ ...@@ -15844,9 +15844,9 @@
(set_attr "prefix" "maybe_vex") (set_attr "prefix" "maybe_vex")
(set_attr "mode" "<MODE>")]) (set_attr "mode" "<MODE>")])
(define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext_lt" (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_lt"
[(set (match_operand:DI 0 "register_operand" "=r") [(set (match_operand:DI 0 "register_operand" "=r")
(zero_extend:DI (any_extend:DI
(unspec:SI (unspec:SI
[(lt:VF_128_256 [(lt:VF_128_256
(match_operand:<sseintvecmode> 1 "register_operand" "x") (match_operand:<sseintvecmode> 1 "register_operand" "x")
...@@ -15856,7 +15856,7 @@ ...@@ -15856,7 +15856,7 @@
"#" "#"
"&& reload_completed" "&& reload_completed"
[(set (match_dup 0) [(set (match_dup 0)
(zero_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))] (any_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
"operands[1] = gen_lowpart (<MODE>mode, operands[1]);" "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
[(set_attr "type" "ssemov") [(set_attr "type" "ssemov")
(set_attr "prefix" "maybe_vex") (set_attr "prefix" "maybe_vex")
...@@ -15880,9 +15880,9 @@ ...@@ -15880,9 +15880,9 @@
(set_attr "prefix" "maybe_vex") (set_attr "prefix" "maybe_vex")
(set_attr "mode" "<MODE>")]) (set_attr "mode" "<MODE>")])
(define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_zext_shift" (define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_shift"
[(set (match_operand:DI 0 "register_operand" "=r") [(set (match_operand:DI 0 "register_operand" "=r")
(zero_extend:DI (any_extend:DI
(unspec:SI (unspec:SI
[(subreg:VF_128_256 [(subreg:VF_128_256
(ashiftrt:<sseintvecmode> (ashiftrt:<sseintvecmode>
...@@ -15893,7 +15893,7 @@ ...@@ -15893,7 +15893,7 @@
"#" "#"
"&& reload_completed" "&& reload_completed"
[(set (match_dup 0) [(set (match_dup 0)
(zero_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))] (any_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
"operands[1] = gen_lowpart (<MODE>mode, operands[1]);" "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
[(set_attr "type" "ssemov") [(set_attr "type" "ssemov")
(set_attr "prefix" "maybe_vex") (set_attr "prefix" "maybe_vex")
...@@ -15932,6 +15932,23 @@ ...@@ -15932,6 +15932,23 @@
(set_attr "prefix" "maybe_vex") (set_attr "prefix" "maybe_vex")
(set_attr "mode" "SI")]) (set_attr "mode" "SI")])
(define_insn "*sse2_pmovmskb_ext"
[(set (match_operand:DI 0 "register_operand" "=r")
(sign_extend:DI
(unspec:SI
[(match_operand:V16QI 1 "register_operand" "x")]
UNSPEC_MOVMSK)))]
"TARGET_64BIT && TARGET_SSE2"
"%vpmovmskb\t{%1, %k0|%k0, %1}"
[(set_attr "type" "ssemov")
(set (attr "prefix_data16")
(if_then_else
(match_test "TARGET_AVX")
(const_string "*")
(const_string "1")))
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "SI")])
(define_insn_and_split "*<sse2_avx2>_pmovmskb_lt" (define_insn_and_split "*<sse2_avx2>_pmovmskb_lt"
[(set (match_operand:SI 0 "register_operand" "=r") [(set (match_operand:SI 0 "register_operand" "=r")
(unspec:SI (unspec:SI
...@@ -15975,6 +15992,28 @@ ...@@ -15975,6 +15992,28 @@
(set_attr "prefix" "maybe_vex") (set_attr "prefix" "maybe_vex")
(set_attr "mode" "SI")]) (set_attr "mode" "SI")])
(define_insn_and_split "*sse2_pmovmskb_ext_lt"
[(set (match_operand:DI 0 "register_operand" "=r")
(sign_extend:DI
(unspec:SI
[(lt:V16QI (match_operand:V16QI 1 "register_operand" "x")
(match_operand:V16QI 2 "const0_operand" "C"))]
UNSPEC_MOVMSK)))]
"TARGET_64BIT && TARGET_SSE2"
"#"
""
[(set (match_dup 0)
(sign_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
""
[(set_attr "type" "ssemov")
(set (attr "prefix_data16")
(if_then_else
(match_test "TARGET_AVX")
(const_string "*")
(const_string "1")))
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "SI")])
(define_expand "sse2_maskmovdqu" (define_expand "sse2_maskmovdqu"
[(set (match_operand:V16QI 0 "memory_operand") [(set (match_operand:V16QI 0 "memory_operand")
(unspec:V16QI [(match_operand:V16QI 1 "register_operand") (unspec:V16QI [(match_operand:V16QI 1 "register_operand")
......
2020-01-30 Jakub Jelinek <jakub@redhat.com> 2020-01-30 Jakub Jelinek <jakub@redhat.com>
PR target/91824 PR target/91824
* gcc.target/i386/pr91824-2.c: New test.
PR target/91824
* gcc.target/i386/pr91824-1.c: New test. * gcc.target/i386/pr91824-1.c: New test.
2020-01-30 Bin Cheng <bin.cheng@linux.alibaba.com> 2020-01-30 Bin Cheng <bin.cheng@linux.alibaba.com>
......
/* PR target/91824 */
/* { dg-do compile } */
/* { dg-options "-O2 -mavx2" } */
/* { dg-final { scan-assembler-not "cltq" } } */
/* { dg-final { scan-assembler-not "movl\t%eax, %eax" } } */
#include <x86intrin.h>
unsigned long long
f1 (__m128i x)
{
return _mm_movemask_epi8 (x);
}
unsigned long long
f2 (__m128i x)
{
return (unsigned) _mm_movemask_epi8 (x);
}
unsigned long long
f3 (__m128 x)
{
return _mm_movemask_ps (x);
}
unsigned long long
f4 (__m128 x)
{
return (unsigned) _mm_movemask_ps (x);
}
unsigned long long
f5 (__m128d x)
{
return _mm_movemask_pd (x);
}
unsigned long long
f6 (__m128d x)
{
return (unsigned) _mm_movemask_pd (x);
}
unsigned long long
f7 (__m256 x)
{
return _mm256_movemask_ps (x);
}
unsigned long long
f8 (__m256 x)
{
return (unsigned) _mm256_movemask_ps (x);
}
unsigned long long
f9 (__m256d x)
{
return _mm256_movemask_pd (x);
}
unsigned long long
f10 (__m256d x)
{
return (unsigned) _mm256_movemask_pd (x);
}
unsigned long long
f11 (__m256i x)
{
return (unsigned) _mm256_movemask_epi8 (x);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment