Commit 24bfafbc by Richard Henderson Committed by Richard Henderson

re PR target/31361 (SSE2 generation bug with shifts)

        PR target/31361
        * config/i386/i386.c (IX86_BUILTIN_PSLLDQ128, IX86_BUILTIN_PSLLW128,
        IX86_BUILTIN_PSLLD128, IX86_BUILTIN_PSLLQ128, IX86_BUILTIN_PSRAW128,
        IX86_BUILTIN_PSRAD128, IX86_BUILTIN_PSRLW128, IX86_BUILTIN_PSRLD128,
        IX86_BUILTIN_PSRLQ128): New.
        (ix86_init_mmx_sse_builtins): Add them.
        (ix86_expand_builtin): Expand them.
        * config/i386/sse.md (ashr<mode>3, lshr<mode>3, ashl<mode>3): Make
        operand 2 be TImode.
        * config/i386/emmintrin.h (_mm_slli_epi64, _mm_srai_epi16,
        _mm_srai_epi32, _mm_srli_epi16, _mm_srli_epi32,
        _mm_srli_epi64): Mark __B const.
        (_mm_srli_si128, _mm_srli_si128): Fix disabled inline versions.
        (_mm_sll_epi16, _mm_sll_epi32, _mm_sll_epi64, _mm_sra_epi16,
        _mm_sra_epi32, _mm_srl_epi16, _mm_srl_epi32, _mm_srl_epi64): Use 
        new two-vector shift builtins.

From-SVN: r123250
parent df12b78f
2007-03-26 Richard Henderson <rth@redhat.com>
PR target/31361
* config/i386/i386.c (IX86_BUILTIN_PSLLDQ128, IX86_BUILTIN_PSLLW128,
IX86_BUILTIN_PSLLD128, IX86_BUILTIN_PSLLQ128, IX86_BUILTIN_PSRAW128,
IX86_BUILTIN_PSRAD128, IX86_BUILTIN_PSRLW128, IX86_BUILTIN_PSRLD128,
IX86_BUILTIN_PSRLQ128): New.
(ix86_init_mmx_sse_builtins): Add them.
(ix86_expand_builtin): Expand them.
* config/i386/sse.md (ashr<mode>3, lshr<mode>3, ashl<mode>3): Make
operand 2 be TImode.
* config/i386/emmintrin.h (_mm_slli_epi64, _mm_srai_epi16,
_mm_srai_epi32, _mm_srli_epi16, _mm_srli_epi32,
_mm_srli_epi64): Mark __B const.
(_mm_srli_si128, _mm_srli_si128): Fix disabled inline versions.
(_mm_sll_epi16, _mm_sll_epi32, _mm_sll_epi64, _mm_sra_epi16,
_mm_sra_epi32, _mm_srl_epi16, _mm_srl_epi32, _mm_srl_epi64): Use
new two-vector shift builtins.
2007-03-26 Ian Lance Taylor <iant@google.com> 2007-03-26 Ian Lance Taylor <iant@google.com>
PR tree-optimization/31345 PR tree-optimization/31345
......
...@@ -1117,19 +1117,19 @@ _mm_slli_epi32 (__m128i __A, int __B) ...@@ -1117,19 +1117,19 @@ _mm_slli_epi32 (__m128i __A, int __B)
} }
static __inline __m128i __attribute__((__always_inline__)) static __inline __m128i __attribute__((__always_inline__))
_mm_slli_epi64 (__m128i __A, int __B) _mm_slli_epi64 (__m128i __A, const int __B)
{ {
return (__m128i)__builtin_ia32_psllqi128 ((__v2di)__A, __B); return (__m128i)__builtin_ia32_psllqi128 ((__v2di)__A, __B);
} }
static __inline __m128i __attribute__((__always_inline__)) static __inline __m128i __attribute__((__always_inline__))
_mm_srai_epi16 (__m128i __A, int __B) _mm_srai_epi16 (__m128i __A, const int __B)
{ {
return (__m128i)__builtin_ia32_psrawi128 ((__v8hi)__A, __B); return (__m128i)__builtin_ia32_psrawi128 ((__v8hi)__A, __B);
} }
static __inline __m128i __attribute__((__always_inline__)) static __inline __m128i __attribute__((__always_inline__))
_mm_srai_epi32 (__m128i __A, int __B) _mm_srai_epi32 (__m128i __A, const int __B)
{ {
return (__m128i)__builtin_ia32_psradi128 ((__v4si)__A, __B); return (__m128i)__builtin_ia32_psradi128 ((__v4si)__A, __B);
} }
...@@ -1138,13 +1138,13 @@ _mm_srai_epi32 (__m128i __A, int __B) ...@@ -1138,13 +1138,13 @@ _mm_srai_epi32 (__m128i __A, int __B)
static __m128i __attribute__((__always_inline__)) static __m128i __attribute__((__always_inline__))
_mm_srli_si128 (__m128i __A, const int __B) _mm_srli_si128 (__m128i __A, const int __B)
{ {
return ((__m128i)__builtin_ia32_psrldqi128 (__A, __B)) return ((__m128i)__builtin_ia32_psrldqi128 (__A, __B * 8));
} }
static __m128i __attribute__((__always_inline__)) static __m128i __attribute__((__always_inline__))
_mm_srli_si128 (__m128i __A, const int __B) _mm_srli_si128 (__m128i __A, const int __B)
{ {
return ((__m128i)__builtin_ia32_pslldqi128 (__A, __B)) return ((__m128i)__builtin_ia32_pslldqi128 (__A, __B * 8));
} }
#else #else
#define _mm_srli_si128(__A, __B) \ #define _mm_srli_si128(__A, __B) \
...@@ -1154,19 +1154,19 @@ _mm_srli_si128 (__m128i __A, const int __B) ...@@ -1154,19 +1154,19 @@ _mm_srli_si128 (__m128i __A, const int __B)
#endif #endif
static __inline __m128i __attribute__((__always_inline__)) static __inline __m128i __attribute__((__always_inline__))
_mm_srli_epi16 (__m128i __A, int __B) _mm_srli_epi16 (__m128i __A, const int __B)
{ {
return (__m128i)__builtin_ia32_psrlwi128 ((__v8hi)__A, __B); return (__m128i)__builtin_ia32_psrlwi128 ((__v8hi)__A, __B);
} }
static __inline __m128i __attribute__((__always_inline__)) static __inline __m128i __attribute__((__always_inline__))
_mm_srli_epi32 (__m128i __A, int __B) _mm_srli_epi32 (__m128i __A, const int __B)
{ {
return (__m128i)__builtin_ia32_psrldi128 ((__v4si)__A, __B); return (__m128i)__builtin_ia32_psrldi128 ((__v4si)__A, __B);
} }
static __inline __m128i __attribute__((__always_inline__)) static __inline __m128i __attribute__((__always_inline__))
_mm_srli_epi64 (__m128i __A, int __B) _mm_srli_epi64 (__m128i __A, const int __B)
{ {
return (__m128i)__builtin_ia32_psrlqi128 ((__v2di)__A, __B); return (__m128i)__builtin_ia32_psrlqi128 ((__v2di)__A, __B);
} }
...@@ -1174,49 +1174,49 @@ _mm_srli_epi64 (__m128i __A, int __B) ...@@ -1174,49 +1174,49 @@ _mm_srli_epi64 (__m128i __A, int __B)
static __inline __m128i __attribute__((__always_inline__)) static __inline __m128i __attribute__((__always_inline__))
_mm_sll_epi16 (__m128i __A, __m128i __B) _mm_sll_epi16 (__m128i __A, __m128i __B)
{ {
return _mm_slli_epi16 (__A, _mm_cvtsi128_si32 (__B)); return (__m128i)__builtin_ia32_psllw128((__v8hi)__A, (__v8hi)__B);
} }
static __inline __m128i __attribute__((__always_inline__)) static __inline __m128i __attribute__((__always_inline__))
_mm_sll_epi32 (__m128i __A, __m128i __B) _mm_sll_epi32 (__m128i __A, __m128i __B)
{ {
return _mm_slli_epi32 (__A, _mm_cvtsi128_si32 (__B)); return (__m128i)__builtin_ia32_pslld128((__v4si)__A, (__v4si)__B);
} }
static __inline __m128i __attribute__((__always_inline__)) static __inline __m128i __attribute__((__always_inline__))
_mm_sll_epi64 (__m128i __A, __m128i __B) _mm_sll_epi64 (__m128i __A, __m128i __B)
{ {
return _mm_slli_epi64 (__A, _mm_cvtsi128_si32 (__B)); return (__m128i)__builtin_ia32_psllq128((__v2di)__A, (__v2di)__B);
} }
static __inline __m128i __attribute__((__always_inline__)) static __inline __m128i __attribute__((__always_inline__))
_mm_sra_epi16 (__m128i __A, __m128i __B) _mm_sra_epi16 (__m128i __A, __m128i __B)
{ {
return _mm_srai_epi16 (__A, _mm_cvtsi128_si32 (__B)); return (__m128i)__builtin_ia32_psraw128 ((__v8hi)__A, (__v8hi)__B);
} }
static __inline __m128i __attribute__((__always_inline__)) static __inline __m128i __attribute__((__always_inline__))
_mm_sra_epi32 (__m128i __A, __m128i __B) _mm_sra_epi32 (__m128i __A, __m128i __B)
{ {
return _mm_srai_epi32 (__A, _mm_cvtsi128_si32 (__B)); return (__m128i)__builtin_ia32_psrad128 ((__v4si)__A, (__v4si)__B);
} }
static __inline __m128i __attribute__((__always_inline__)) static __inline __m128i __attribute__((__always_inline__))
_mm_srl_epi16 (__m128i __A, __m128i __B) _mm_srl_epi16 (__m128i __A, __m128i __B)
{ {
return _mm_srli_epi16 (__A, _mm_cvtsi128_si32 (__B)); return (__m128i)__builtin_ia32_psrlw128 ((__v8hi)__A, (__v8hi)__B);
} }
static __inline __m128i __attribute__((__always_inline__)) static __inline __m128i __attribute__((__always_inline__))
_mm_srl_epi32 (__m128i __A, __m128i __B) _mm_srl_epi32 (__m128i __A, __m128i __B)
{ {
return _mm_srli_epi32 (__A, _mm_cvtsi128_si32 (__B)); return (__m128i)__builtin_ia32_psrld128 ((__v4si)__A, (__v4si)__B);
} }
static __inline __m128i __attribute__((__always_inline__)) static __inline __m128i __attribute__((__always_inline__))
_mm_srl_epi64 (__m128i __A, __m128i __B) _mm_srl_epi64 (__m128i __A, __m128i __B)
{ {
return _mm_srli_epi64 (__A, _mm_cvtsi128_si32 (__B)); return (__m128i)__builtin_ia32_psrlq128 ((__v2di)__A, (__v2di)__B);
} }
static __inline __m128i __attribute__((__always_inline__)) static __inline __m128i __attribute__((__always_inline__))
......
...@@ -15937,6 +15937,16 @@ enum ix86_builtins ...@@ -15937,6 +15937,16 @@ enum ix86_builtins
IX86_BUILTIN_PSRLDI128, IX86_BUILTIN_PSRLDI128,
IX86_BUILTIN_PSRLQI128, IX86_BUILTIN_PSRLQI128,
IX86_BUILTIN_PSLLDQ128,
IX86_BUILTIN_PSLLW128,
IX86_BUILTIN_PSLLD128,
IX86_BUILTIN_PSLLQ128,
IX86_BUILTIN_PSRAW128,
IX86_BUILTIN_PSRAD128,
IX86_BUILTIN_PSRLW128,
IX86_BUILTIN_PSRLD128,
IX86_BUILTIN_PSRLQ128,
IX86_BUILTIN_PUNPCKHBW128, IX86_BUILTIN_PUNPCKHBW128,
IX86_BUILTIN_PUNPCKHWD128, IX86_BUILTIN_PUNPCKHWD128,
IX86_BUILTIN_PUNPCKHDQ128, IX86_BUILTIN_PUNPCKHDQ128,
...@@ -17055,14 +17065,22 @@ ix86_init_mmx_sse_builtins (void) ...@@ -17055,14 +17065,22 @@ ix86_init_mmx_sse_builtins (void)
def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128); def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128); def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128); def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128); def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128); def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128); def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128); def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128); def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128); def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128); def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
...@@ -17784,9 +17802,106 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, ...@@ -17784,9 +17802,106 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
emit_insn (pat); emit_insn (pat);
return target; return target;
case IX86_BUILTIN_PSLLWI128:
icode = CODE_FOR_ashlv8hi3;
goto do_pshifti;
case IX86_BUILTIN_PSLLDI128:
icode = CODE_FOR_ashlv4si3;
goto do_pshifti;
case IX86_BUILTIN_PSLLQI128:
icode = CODE_FOR_ashlv2di3;
goto do_pshifti;
case IX86_BUILTIN_PSRAWI128:
icode = CODE_FOR_ashrv8hi3;
goto do_pshifti;
case IX86_BUILTIN_PSRADI128:
icode = CODE_FOR_ashrv4si3;
goto do_pshifti;
case IX86_BUILTIN_PSRLWI128:
icode = CODE_FOR_lshrv8hi3;
goto do_pshifti;
case IX86_BUILTIN_PSRLDI128:
icode = CODE_FOR_lshrv4si3;
goto do_pshifti;
case IX86_BUILTIN_PSRLQI128:
icode = CODE_FOR_lshrv2di3;
goto do_pshifti;
do_pshifti:
arg0 = CALL_EXPR_ARG (exp, 0);
arg1 = CALL_EXPR_ARG (exp, 1);
op0 = expand_normal (arg0);
op1 = expand_normal (arg1);
if (!CONST_INT_P (op1))
{
error ("shift must be an immediate");
return const0_rtx;
}
if (INTVAL (op1) < 0 || INTVAL (op1) > 255)
op1 = GEN_INT (255);
tmode = insn_data[icode].operand[0].mode;
mode1 = insn_data[icode].operand[1].mode;
if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
op0 = copy_to_reg (op0);
target = gen_reg_rtx (tmode);
pat = GEN_FCN (icode) (target, op0, op1);
if (!pat)
return 0;
emit_insn (pat);
return target;
case IX86_BUILTIN_PSLLW128:
icode = CODE_FOR_ashlv8hi3;
goto do_pshift;
case IX86_BUILTIN_PSLLD128:
icode = CODE_FOR_ashlv4si3;
goto do_pshift;
case IX86_BUILTIN_PSLLQ128:
icode = CODE_FOR_ashlv2di3;
goto do_pshift;
case IX86_BUILTIN_PSRAW128:
icode = CODE_FOR_ashrv8hi3;
goto do_pshift;
case IX86_BUILTIN_PSRAD128:
icode = CODE_FOR_ashrv4si3;
goto do_pshift;
case IX86_BUILTIN_PSRLW128:
icode = CODE_FOR_lshrv8hi3;
goto do_pshift;
case IX86_BUILTIN_PSRLD128:
icode = CODE_FOR_lshrv4si3;
goto do_pshift;
case IX86_BUILTIN_PSRLQ128:
icode = CODE_FOR_lshrv2di3;
goto do_pshift;
do_pshift:
arg0 = CALL_EXPR_ARG (exp, 0);
arg1 = CALL_EXPR_ARG (exp, 1);
op0 = expand_normal (arg0);
op1 = expand_normal (arg1);
tmode = insn_data[icode].operand[0].mode;
mode1 = insn_data[icode].operand[1].mode;
if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
op0 = copy_to_reg (op0);
op1 = simplify_gen_subreg (TImode, op1, GET_MODE (op1), 0);
if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
op1 = copy_to_reg (op1);
target = gen_reg_rtx (tmode);
pat = GEN_FCN (icode) (target, op0, op1);
if (!pat)
return 0;
emit_insn (pat);
return target;
case IX86_BUILTIN_PSLLDQI128: case IX86_BUILTIN_PSLLDQI128:
case IX86_BUILTIN_PSRLDQI128: case IX86_BUILTIN_PSRLDQI128:
icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3 icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
: CODE_FOR_sse2_lshrti3); : CODE_FOR_sse2_lshrti3);
arg0 = CALL_EXPR_ARG (exp, 0); arg0 = CALL_EXPR_ARG (exp, 0);
arg1 = CALL_EXPR_ARG (exp, 1); arg1 = CALL_EXPR_ARG (exp, 1);
...@@ -17807,7 +17922,8 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, ...@@ -17807,7 +17922,8 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
return const0_rtx; return const0_rtx;
} }
target = gen_reg_rtx (V2DImode); target = gen_reg_rtx (V2DImode);
pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1); pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
op0, op1);
if (! pat) if (! pat)
return 0; return 0;
emit_insn (pat); emit_insn (pat);
......
...@@ -3085,7 +3085,7 @@ ...@@ -3085,7 +3085,7 @@
[(set (match_operand:SSEMODE24 0 "register_operand" "=x") [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
(ashiftrt:SSEMODE24 (ashiftrt:SSEMODE24
(match_operand:SSEMODE24 1 "register_operand" "0") (match_operand:SSEMODE24 1 "register_operand" "0")
(match_operand:SI 2 "nonmemory_operand" "xi")))] (match_operand:TI 2 "nonmemory_operand" "xn")))]
"TARGET_SSE2" "TARGET_SSE2"
"psra<ssevecsize>\t{%2, %0|%0, %2}" "psra<ssevecsize>\t{%2, %0|%0, %2}"
[(set_attr "type" "sseishft") [(set_attr "type" "sseishft")
...@@ -3095,7 +3095,7 @@ ...@@ -3095,7 +3095,7 @@
[(set (match_operand:SSEMODE248 0 "register_operand" "=x") [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
(lshiftrt:SSEMODE248 (lshiftrt:SSEMODE248
(match_operand:SSEMODE248 1 "register_operand" "0") (match_operand:SSEMODE248 1 "register_operand" "0")
(match_operand:SI 2 "nonmemory_operand" "xi")))] (match_operand:TI 2 "nonmemory_operand" "xn")))]
"TARGET_SSE2" "TARGET_SSE2"
"psrl<ssevecsize>\t{%2, %0|%0, %2}" "psrl<ssevecsize>\t{%2, %0|%0, %2}"
[(set_attr "type" "sseishft") [(set_attr "type" "sseishft")
...@@ -3105,7 +3105,7 @@ ...@@ -3105,7 +3105,7 @@
[(set (match_operand:SSEMODE248 0 "register_operand" "=x") [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
(ashift:SSEMODE248 (ashift:SSEMODE248
(match_operand:SSEMODE248 1 "register_operand" "0") (match_operand:SSEMODE248 1 "register_operand" "0")
(match_operand:SI 2 "nonmemory_operand" "xi")))] (match_operand:TI 2 "nonmemory_operand" "xn")))]
"TARGET_SSE2" "TARGET_SSE2"
"psll<ssevecsize>\t{%2, %0|%0, %2}" "psll<ssevecsize>\t{%2, %0|%0, %2}"
[(set_attr "type" "sseishft") [(set_attr "type" "sseishft")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment