Commit ff8e1022 by H.J. Lu Committed by H.J. Lu

x86: Add pmovzx/pmovsx patterns with memory operands

Many x86 pmovzx/pmovsx instructions with memory operands are modeled in
a wrong way.  For example:

(define_insn "sse4_1_<code>v8qiv8hi2<mask_name>"
  [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v")
    (any_extend:V8HI
      (vec_select:V8QI
        (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*xm,vm")
        (parallel [(const_int 0) (const_int 1)
               (const_int 2) (const_int 3)
               (const_int 4) (const_int 5)
               (const_int 6) (const_int 7)]))))]

should be defind for memory operands as:

(define_insn "sse4_1_<code>v8qiv8hi2<mask_name>"
  [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v")
    (any_extend:V8HI
      (match_operand:V8QI "memory_operand" "m,m,m")))]

This patch updates them to

(define_insn "sse4_1_<code>v8qiv8hi2<mask_name>"
  [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v")
	(any_extend:V8HI
	  (vec_select:V8QI
	    (match_operand:V16QI 1 "register_operand" "Yr,*x,v")
	    (parallel [(const_int 0) (const_int 1)
		       (const_int 2) (const_int 3)
		       (const_int 4) (const_int 5)
		       (const_int 6) (const_int 7)]))))]

(define_insn "*sse4_1_<code>v8qiv8hi2<mask_name>_1"
  [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v")
	(any_extend:V8HI
	  (match_operand:V8QI "subreg_memory_operand" "m,m,m")))]

with a splitter:

(define_insn_and_split "*sse4_1_<code>v8qiv8hi2<mask_name>_2"
  [(set (match_operand:V8HI 0 "register_operand")
        (any_extend:V8HI
          (vec_select:V8QI
            (subreg:V16QI
              (vec_concat:V2DI
                (match_operand:DI 1 "memory_operand")
                (const_int 0)) 0)
            (parallel [(const_int 0) (const_int 1)
                       (const_int 2) (const_int 3)
                       (const_int 4) (const_int 5)
                       (const_int 6) (const_int 7)]))))]
  "TARGET_SSE4_1
   && <mask_avx512bw_condition>
   && <mask_avx512vl_condition>
  "&& can_create_pseudo_p ()"
  "#"
  "&& 1"
  [(set (match_dup 0)
        (any_extend:V8HI (match_dup 1)))]
  "operands[1] = adjust_address_nv (operands[1], V8QImode, 0);")

This patch requires updating apply_subst_iterator to handle
define_insn_and_split.

gcc/

	PR target/87317
	* config/i386/sse.md (sse4_1_<code>v8qiv8hi2<mask_name>): Replace
	nonimmediate_operand with register_operand.
	(avx2_<code>v8qiv8si2<mask_name>): Likewise.
	(sse4_1_<code>v4qiv4si2<mask_name>): Likewise.
	(sse4_1_<code>v4hiv4si2<mask_name>): Likewise.
	(sse4_1_<code>v2qiv2di2<mask_name>): Likewise.
	(avx512f_<code>v8qiv8di2<mask_name>): Likewise.
	(avx2_<code>v4qiv4di2<mask_name>): Likewise.
	(avx2_<code>v4hiv4di2<mask_name>): Likewise.
	(sse4_1_<code>v2hiv2di2<mask_name>): Likewise.
	(sse4_1_<code>v2siv2di2<mask_name>): Likewise.
	(*sse4_1_<code>v8qiv8hi2<mask_name>_1): New pattern.
	(*sse4_1_<code>v8qiv8hi2<mask_name>_2): Likewise.
	(*avx2_<code>v8qiv8si2<mask_name>_1): Likewise.
	(*avx2_<code>v8qiv8si2<mask_name>_2): Likewise.
	(*sse4_1_<code>v4qiv4si2<mask_name>_1): Likewise.
	(*sse4_1_<code>v4qiv4si2<mask_name>_2): Likewise.
	(*sse4_1_<code>v4hiv4si2<mask_name>_1): Likewise.
	(*sse4_1_<code>v4hiv4si2<mask_name>_2): Likewise.
	(*avx512f_<code>v8qiv8di2<mask_name>_1): Likewise.
	(*avx512f_<code>v8qiv8di2<mask_name>_2): Likewise.
	(*avx2_<code>v4qiv4di2<mask_name>_1): Likewise.
	(*avx2_<code>v4qiv4di2<mask_name>_2): Likewise.
	(*avx2_<code>v4hiv4di2<mask_name>_1): Likewise.
	(*avx2_<code>v4hiv4di2<mask_name>_2): Likewise.
	(*sse4_1_<code>v2hiv2di2<mask_name>_1): Likewise.
	(*sse4_1_<code>v2hiv2di2<mask_name>_2): Likewise.
	(*sse4_1_<code>v2siv2di2<mask_name>_1): Likewise.
	(*sse4_1_<code>v2siv2di2<mask_name>_2): Likewise.

gcc/testsuite/

	PR target/87317
	* gcc.target/i386/pr87317-1.c: New file.
	* gcc.target/i386/pr87317-2.c: Likewise.
	* gcc.target/i386/pr87317-3.c: Likewise.
	* gcc.target/i386/pr87317-4.c: Likewise.
	* gcc.target/i386/pr87317-5.c: Likewise.
	* gcc.target/i386/pr87317-6.c: Likewise.
	* gcc.target/i386/pr87317-7.c: Likewise.
	* gcc.target/i386/pr87317-8.c: Likewise.
	* gcc.target/i386/pr87317-9.c: Likewise.
	* gcc.target/i386/pr87317-10.c: Likewise.
	* gcc.target/i386/pr87317-11.c: Likewise.
	* gcc.target/i386/pr87317-12.c: Likewise.
	* gcc.target/i386/pr87317-13.c: Likewise.

From-SVN: r266342
parent 12404d15
2018-11-21 H.J. Lu <hongjiu.lu@intel.com> 2018-11-21 H.J. Lu <hongjiu.lu@intel.com>
PR target/87317
* config/i386/sse.md (sse4_1_<code>v8qiv8hi2<mask_name>): Replace
nonimmediate_operand with register_operand.
(avx2_<code>v8qiv8si2<mask_name>): Likewise.
(sse4_1_<code>v4qiv4si2<mask_name>): Likewise.
(sse4_1_<code>v4hiv4si2<mask_name>): Likewise.
(sse4_1_<code>v2qiv2di2<mask_name>): Likewise.
(avx512f_<code>v8qiv8di2<mask_name>): Likewise.
(avx2_<code>v4qiv4di2<mask_name>): Likewise.
(avx2_<code>v4hiv4di2<mask_name>): Likewise.
(sse4_1_<code>v2hiv2di2<mask_name>): Likewise.
(sse4_1_<code>v2siv2di2<mask_name>): Likewise.
(*sse4_1_<code>v8qiv8hi2<mask_name>_1): New pattern.
(*sse4_1_<code>v8qiv8hi2<mask_name>_2): Likewise.
(*avx2_<code>v8qiv8si2<mask_name>_1): Likewise.
(*avx2_<code>v8qiv8si2<mask_name>_2): Likewise.
(*sse4_1_<code>v4qiv4si2<mask_name>_1): Likewise.
(*sse4_1_<code>v4qiv4si2<mask_name>_2): Likewise.
(*sse4_1_<code>v4hiv4si2<mask_name>_1): Likewise.
(*sse4_1_<code>v4hiv4si2<mask_name>_2): Likewise.
(*avx512f_<code>v8qiv8di2<mask_name>_1): Likewise.
(*avx512f_<code>v8qiv8di2<mask_name>_2): Likewise.
(*avx2_<code>v4qiv4di2<mask_name>_1): Likewise.
(*avx2_<code>v4qiv4di2<mask_name>_2): Likewise.
(*avx2_<code>v4hiv4di2<mask_name>_1): Likewise.
(*avx2_<code>v4hiv4di2<mask_name>_2): Likewise.
(*sse4_1_<code>v2hiv2di2<mask_name>_1): Likewise.
(*sse4_1_<code>v2hiv2di2<mask_name>_2): Likewise.
(*sse4_1_<code>v2siv2di2<mask_name>_1): Likewise.
(*sse4_1_<code>v2siv2di2<mask_name>_2): Likewise.
2018-11-21 H.J. Lu <hongjiu.lu@intel.com>
* read-rtl.c (apply_subst_iterator): Handle define_split and * read-rtl.c (apply_subst_iterator): Handle define_split and
define_insn_and_split. define_insn_and_split.
2018-11-21 H.J. Lu <hongjiu.lu@intel.com>
PR target/87317
* gcc.target/i386/pr87317-1.c: New file.
* gcc.target/i386/pr87317-2.c: Likewise.
* gcc.target/i386/pr87317-3.c: Likewise.
* gcc.target/i386/pr87317-4.c: Likewise.
* gcc.target/i386/pr87317-5.c: Likewise.
* gcc.target/i386/pr87317-6.c: Likewise.
* gcc.target/i386/pr87317-7.c: Likewise.
* gcc.target/i386/pr87317-8.c: Likewise.
* gcc.target/i386/pr87317-9.c: Likewise.
* gcc.target/i386/pr87317-10.c: Likewise.
* gcc.target/i386/pr87317-11.c: Likewise.
* gcc.target/i386/pr87317-12.c: Likewise.
* gcc.target/i386/pr87317-13.c: Likewise.
2018-11-21 Tom de Vries <tdevries@suse.de> 2018-11-21 Tom de Vries <tdevries@suse.de>
PR driver/79855 PR driver/79855
......
/* { dg-do compile { target { ! ia32 } } } */
/* { dg-options "-O2 -march=haswell" } */
/* { dg-final { scan-assembler-times "vpmovzxbw" 1 } } */
/* { dg-final { scan-assembler-not "vmovq" } } */
#include <immintrin.h>
void
f (void *dst, void *ptr)
{
__m128i data = _mm_cvtsi64_si128(*(long long int*)ptr);
data = _mm_cvtepu8_epi16(data);
_mm_storeu_si128((__m128i*)dst, data);
}
/* { dg-do compile { target { ! ia32 } } } */
/* { dg-options "-O2 -march=haswell" } */
/* { dg-final { scan-assembler-times "vpmovzxbd" 1 } } */
/* { dg-final { scan-assembler-not "vmovq" } } */
#include <immintrin.h>
void
f (void *dst, void *ptr)
{
__m128i y = _mm_cvtsi64_si128(*(long long int*)ptr);
__m256i z = _mm256_cvtepu8_epi32 (y);
_mm256_storeu_si256((__m256i*)dst, z);
}
/* { dg-do compile { target { ! ia32 } } } */
/* { dg-options "-O2 -march=haswell" } */
/* { dg-final { scan-assembler-times "vpmovzxwq" 1 } } */
/* { dg-final { scan-assembler-not "vmovq" } } */
#include <immintrin.h>
void
f (void *dst, void *ptr)
{
__m128i y = _mm_cvtsi64_si128(*(long long int*)ptr);
__m256i z = _mm256_cvtepu16_epi64 (y);
_mm256_storeu_si256((__m256i*)dst, z);
}
/* { dg-do compile { target { ! ia32 } } } */
/* { dg-options "-O3 -march=haswell" } */
/* { dg-final { scan-assembler-times "vpmovsxwq" 1 } } */
#include <immintrin.h>
#define MAX 4
long long int dst[MAX];
short src[MAX];
void
foo (void)
{
int i;
for (i = 0; i < MAX; i += 4)
{
__m128i data = _mm_cvtsi64_si128(*(long long int*)(src + i));
__m256i x = _mm256_cvtepi16_epi64(data);
_mm256_storeu_si256((__m256i*)(dst + i), x);
}
}
/* { dg-do compile { target { ! ia32 } } } */
/* { dg-options "-O2 -mavx512f" } */
/* { dg-final { scan-assembler-times "vpmovzxbq" 1 } } */
/* { dg-final { scan-assembler-not "vmovq" } } */
#include <immintrin.h>
void
f (void *dst, void *ptr)
{
__m128i y = _mm_cvtsi64_si128(*(long long int*)ptr);
__m512i z = _mm512_cvtepu8_epi64 (y);
_mm512_storeu_si512((__m512i*)dst, z);
}
/* { dg-do compile { target { ! ia32 } } } */
/* { dg-options "-O2 -march=haswell" } */
/* { dg-final { scan-assembler-times "vpmovsxwd" 1 } } */
/* { dg-final { scan-assembler-not "vmovq" } } */
#include <immintrin.h>
void
f (void *dst, void *ptr)
{
__m128i data = _mm_cvtsi64_si128(*(long long int*)ptr);
data = _mm_cvtepi16_epi32(data);
_mm_storeu_si128((__m128i*)dst, data);
}
/* { dg-do compile { target { ! ia32 } } } */
/* { dg-options "-O2 -march=haswell" } */
/* { dg-final { scan-assembler-times "vpmovsxdq" 1 } } */
/* { dg-final { scan-assembler-not "vmovq" } } */
#include <immintrin.h>
void
f (void *dst, void *ptr)
{
__m128i data = _mm_cvtsi64_si128(*(long long int*)ptr);
data = _mm_cvtepi32_epi64(data);
_mm_storeu_si128((__m128i*)dst, data);
}
/* { dg-do compile } */
/* { dg-options "-O2 -march=haswell" } */
/* { dg-final { scan-assembler-times "vpmovzxbd" 1 } } */
/* { dg-final { scan-assembler-not "vmovd" } } */
#include <immintrin.h>
void
f (void *dst, void *ptr)
{
__m128i data = _mm_cvtsi32_si128(*(int*)ptr);
data = _mm_cvtepu8_epi32(data);
_mm_storeu_si128((__m128i*)dst, data);
}
/* { dg-do compile } */
/* { dg-options "-O2 -march=haswell" } */
/* { dg-final { scan-assembler-times "vpmovzxwq" 1 } } */
/* { dg-final { scan-assembler-not "vmovd" } } */
#include <immintrin.h>
void
f (void *dst, void *ptr)
{
__m128i data = _mm_cvtsi32_si128(*(int*)ptr);
data = _mm_cvtepu16_epi64(data);
_mm_storeu_si128((__m128i*)dst, data);
}
/* { dg-do compile } */
/* { dg-options "-O2 -march=haswell" } */
/* { dg-final { scan-assembler-times "vpmovzxbq" 1 } } */
/* { dg-final { scan-assembler-not "vmovq" } } */
#include <immintrin.h>
void
f (void *dst, void *ptr)
{
__m128i y = _mm_cvtsi32_si128(*(int*)ptr);
__m256i z = _mm256_cvtepu8_epi64 (y);
_mm256_storeu_si256((__m256i*)dst, z);
}
/* { dg-do compile { target { ! ia32 } } } */
/* { dg-options "-O2 -march=haswell" } */
/* { dg-final { scan-assembler-times "vpmovzxbd" 1 } } */
/* { dg-final { scan-assembler-not "vmovd" } } */
#include <immintrin.h>
void
f (void *dst, void *ptr)
{
__m128i data = _mm_cvtsi32_si128(*(int*)ptr);
data = _mm_cvtepu8_epi32(data);
_mm_storeu_si128((__m128i*)dst, data);
}
/* { dg-do compile { target { ! ia32 } } } */
/* { dg-options "-O2 -march=haswell" } */
/* { dg-final { scan-assembler-times "vpmovzxwq" 1 } } */
/* { dg-final { scan-assembler-not "vmovq" } } */
#include <immintrin.h>
void
f (void *dst, void *ptr)
{
__m128i data = _mm_cvtsi64_si128(*(long long int*)ptr);
__m256i x = _mm256_cvtepu16_epi64(data);
_mm256_storeu_si256((__m256i*)dst, x);
}
/* { dg-do compile { target { ! ia32 } } } */
/* { dg-options "-O2 -march=haswell" } */
/* { dg-final { scan-assembler-times "vpmovzxbd" 1 } } */
/* { dg-final { scan-assembler-not "vmovq" } } */
#include <immintrin.h>
void
f (void *dst, void *ptr)
{
__m128i data = _mm_cvtsi64_si128(*(long long int*)ptr);
__m256i x = _mm256_cvtepu8_epi32(data);
_mm256_storeu_si256((__m256i*)dst, x);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment