Commit 93703e79 by Richard Henderson Committed by Richard Henderson

i386: Add widen_multiply_even/odd builtins and hooks

From-SVN: r188959
parent 379eaa6f
2012-06-25 Richard Henderson <rth@redhat.com> 2012-06-25 Richard Henderson <rth@redhat.com>
* config/i386/i386-builtin-types.def (V4UDI, V8USI): New.
(V2UDI_FUNC_V4USI_V4USI): New.
(V4UDI_FUNC_V8USI_V8USI): New.
* config/i386/i386.c (ix86_expand_args_builtin): Handle them.
(IX86_BUILTIN_VEC_WIDEN_SMUL_ODD_V4SI): New.
(IX86_BUILTIN_VEC_WIDEN_SMUL_ODD_V8SI): New.
(IX86_BUILTIN_VEC_WIDEN_UMUL_ODD_V4SI): New.
(IX86_BUILTIN_VEC_WIDEN_UMUL_ODD_V8SI): New.
(IX86_BUILTIN_VEC_WIDEN_UMUL_EVEN_V4SI): New.
(IX86_BUILTIN_VEC_WIDEN_UMUL_EVEN_V8SI): New.
(bdesc_args): Add them.
(ix86_builtin_mul_widen_even, ix86_builtin_mul_widen_odd): New.
(TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN): New.
(TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD): New.
(ix86_expand_mul_widen_evenodd): Use xop_pmacsdqh.
* config/i386/sse.md (vec_widen_<s>mult_odd_<V124_AVX2>): New.
2012-06-25 Richard Henderson <rth@redhat.com>
* config/i386.sse.md (mul<VI4_AVX2>3): Use xop_pmacsdd. * config/i386.sse.md (mul<VI4_AVX2>3): Use xop_pmacsdd.
2012-06-25 Richard Henderson <rth@redhat.com> 2012-06-25 Richard Henderson <rth@redhat.com>
......
...@@ -97,7 +97,8 @@ DEF_VECTOR_TYPE (V4DI, DI) ...@@ -97,7 +97,8 @@ DEF_VECTOR_TYPE (V4DI, DI)
DEF_VECTOR_TYPE (V8SI, SI) DEF_VECTOR_TYPE (V8SI, SI)
DEF_VECTOR_TYPE (V16HI, HI) DEF_VECTOR_TYPE (V16HI, HI)
DEF_VECTOR_TYPE (V32QI, QI) DEF_VECTOR_TYPE (V32QI, QI)
DEF_VECTOR_TYPE (V4UDI, UDI, V4DI)
DEF_VECTOR_TYPE (V8USI, USI, V8SI)
DEF_POINTER_TYPE (PCCHAR, CHAR, CONST) DEF_POINTER_TYPE (PCCHAR, CHAR, CONST)
DEF_POINTER_TYPE (PCDOUBLE, DOUBLE, CONST) DEF_POINTER_TYPE (PCDOUBLE, DOUBLE, CONST)
...@@ -283,6 +284,7 @@ DEF_FUNCTION_TYPE (V2DI, V2DI, SI) ...@@ -283,6 +284,7 @@ DEF_FUNCTION_TYPE (V2DI, V2DI, SI)
DEF_FUNCTION_TYPE (V2DI, V2DI, V16QI) DEF_FUNCTION_TYPE (V2DI, V2DI, V16QI)
DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI) DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI)
DEF_FUNCTION_TYPE (V2DI, V4SI, V4SI) DEF_FUNCTION_TYPE (V2DI, V4SI, V4SI)
DEF_FUNCTION_TYPE (V2UDI, V4USI, V4USI)
DEF_FUNCTION_TYPE (V2DI, PCV2DI, V2DI) DEF_FUNCTION_TYPE (V2DI, PCV2DI, V2DI)
DEF_FUNCTION_TYPE (V2SF, V2SF, V2SF) DEF_FUNCTION_TYPE (V2SF, V2SF, V2SF)
DEF_FUNCTION_TYPE (V2SI, INT, INT) DEF_FUNCTION_TYPE (V2SI, INT, INT)
...@@ -349,6 +351,7 @@ DEF_FUNCTION_TYPE (V8SI, V8SI, SI) ...@@ -349,6 +351,7 @@ DEF_FUNCTION_TYPE (V8SI, V8SI, SI)
DEF_FUNCTION_TYPE (V8SI, PCV8SI, V8SI) DEF_FUNCTION_TYPE (V8SI, PCV8SI, V8SI)
DEF_FUNCTION_TYPE (V4DI, V4DI, V4DI) DEF_FUNCTION_TYPE (V4DI, V4DI, V4DI)
DEF_FUNCTION_TYPE (V4DI, V8SI, V8SI) DEF_FUNCTION_TYPE (V4DI, V8SI, V8SI)
DEF_FUNCTION_TYPE (V4UDI, V8USI, V8USI)
DEF_FUNCTION_TYPE (V4DI, V4DI, V2DI) DEF_FUNCTION_TYPE (V4DI, V4DI, V2DI)
DEF_FUNCTION_TYPE (V4DI, PCV4DI, V4DI) DEF_FUNCTION_TYPE (V4DI, PCV4DI, V4DI)
DEF_FUNCTION_TYPE (V4DI, V4DI, INT) DEF_FUNCTION_TYPE (V4DI, V4DI, INT)
......
...@@ -25754,6 +25754,13 @@ enum ix86_builtins ...@@ -25754,6 +25754,13 @@ enum ix86_builtins
IX86_BUILTIN_CPYSGNPS256, IX86_BUILTIN_CPYSGNPS256,
IX86_BUILTIN_CPYSGNPD256, IX86_BUILTIN_CPYSGNPD256,
IX86_BUILTIN_VEC_WIDEN_SMUL_ODD_V4SI,
IX86_BUILTIN_VEC_WIDEN_SMUL_ODD_V8SI,
IX86_BUILTIN_VEC_WIDEN_UMUL_ODD_V4SI,
IX86_BUILTIN_VEC_WIDEN_UMUL_ODD_V8SI,
IX86_BUILTIN_VEC_WIDEN_UMUL_EVEN_V4SI,
IX86_BUILTIN_VEC_WIDEN_UMUL_EVEN_V8SI,
/* FMA4 instructions. */ /* FMA4 instructions. */
IX86_BUILTIN_VFMADDSS, IX86_BUILTIN_VFMADDSS,
IX86_BUILTIN_VFMADDSD, IX86_BUILTIN_VFMADDSD,
...@@ -26612,6 +26619,8 @@ static const struct builtin_description bdesc_args[] = ...@@ -26612,6 +26619,8 @@ static const struct builtin_description bdesc_args[] =
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI }, { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI }, { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_vw_umul_even_v4si", IX86_BUILTIN_VEC_WIDEN_UMUL_EVEN_V4SI, UNKNOWN, (int) V2UDI_FTYPE_V4USI_V4USI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_odd_v4si, "__builtin_ia32_vw_umul_odd_v4si", IX86_BUILTIN_VEC_WIDEN_UMUL_ODD_V4SI, UNKNOWN, (int) V2UDI_FTYPE_V4USI_V4USI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI }, { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
...@@ -26738,6 +26747,7 @@ static const struct builtin_description bdesc_args[] = ...@@ -26738,6 +26747,7 @@ static const struct builtin_description bdesc_args[] =
{ OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
{ OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
{ OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI }, { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
{ OPTION_MASK_ISA_SSE4_1, CODE_FOR_vec_widen_smult_odd_v4si, "__builtin_ia32_vw_smul_odd_v4si", IX86_BUILTIN_VEC_WIDEN_SMUL_ODD_V4SI, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
{ OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
/* SSE4.1 */ /* SSE4.1 */
...@@ -27004,12 +27014,15 @@ static const struct builtin_description bdesc_args[] = ...@@ -27004,12 +27014,15 @@ static const struct builtin_description bdesc_args[] =
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI }, { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2 , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI }, { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2 , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mulv4siv4di3 , "__builtin_ia32_pmuldq256" , IX86_BUILTIN_PMULDQ256 , UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI }, { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mulv4siv4di3 , "__builtin_ia32_pmuldq256" , IX86_BUILTIN_PMULDQ256 , UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_odd_v8si, "__builtin_ia32_vw_smul_odd_v8si", IX86_BUILTIN_VEC_WIDEN_SMUL_ODD_V8SI, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI }, { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI }, { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI }, { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256" , IX86_BUILTIN_PMULHW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI }, { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256" , IX86_BUILTIN_PMULLW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI }, { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256" , IX86_BUILTIN_PMULLD256 , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umulv4siv4di3 , "__builtin_ia32_pmuludq256" , IX86_BUILTIN_PMULUDQ256 , UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI }, { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umulv4siv4di3 , "__builtin_ia32_pmuludq256" , IX86_BUILTIN_PMULUDQ256 , UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umulv4siv4di3 , "__builtin_i386_vw_umul_even_v8si" , IX86_BUILTIN_VEC_WIDEN_UMUL_EVEN_V8SI, UNKNOWN, (int) V4UDI_FTYPE_V8USI_V8USI },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_odd_v8si, "__builtin_ia32_vw_umul_odd_v8si", IX86_BUILTIN_VEC_WIDEN_UMUL_ODD_V8SI, UNKNOWN, (int) V4UDI_FTYPE_V8USI_V8USI },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI }, { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI }, { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI }, { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
...@@ -29142,6 +29155,7 @@ ix86_expand_args_builtin (const struct builtin_description *d, ...@@ -29142,6 +29155,7 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V2DI_FTYPE_V2DI_V2DI: case V2DI_FTYPE_V2DI_V2DI:
case V2DI_FTYPE_V16QI_V16QI: case V2DI_FTYPE_V16QI_V16QI:
case V2DI_FTYPE_V4SI_V4SI: case V2DI_FTYPE_V4SI_V4SI:
case V2UDI_FTYPE_V4USI_V4USI:
case V2DI_FTYPE_V2DI_V16QI: case V2DI_FTYPE_V2DI_V16QI:
case V2DI_FTYPE_V2DF_V2DF: case V2DI_FTYPE_V2DF_V2DF:
case V2SI_FTYPE_V2SI_V2SI: case V2SI_FTYPE_V2SI_V2SI:
...@@ -29166,6 +29180,7 @@ ix86_expand_args_builtin (const struct builtin_description *d, ...@@ -29166,6 +29180,7 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V8SI_FTYPE_V16HI_V16HI: case V8SI_FTYPE_V16HI_V16HI:
case V4DI_FTYPE_V4DI_V4DI: case V4DI_FTYPE_V4DI_V4DI:
case V4DI_FTYPE_V8SI_V8SI: case V4DI_FTYPE_V8SI_V8SI:
case V4UDI_FTYPE_V8USI_V8USI:
if (comparison == UNKNOWN) if (comparison == UNKNOWN)
return ix86_expand_binop_builtin (icode, exp, target); return ix86_expand_binop_builtin (icode, exp, target);
nargs = 2; nargs = 2;
...@@ -31042,6 +31057,78 @@ ix86_builtin_reciprocal (unsigned int fn, bool md_fn, ...@@ -31042,6 +31057,78 @@ ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
return NULL_TREE; return NULL_TREE;
} }
} }
static tree
ix86_builtin_mul_widen_even (tree type)
{
bool uns_p = TYPE_UNSIGNED (type);
enum ix86_builtins code;
switch (TYPE_MODE (type))
{
case V4SImode:
if (uns_p)
{
if (!TARGET_SSE2)
return NULL;
code = IX86_BUILTIN_VEC_WIDEN_UMUL_EVEN_V4SI;
}
else
{
if (!TARGET_SSE4_1)
return NULL;
code = IX86_BUILTIN_PMULDQ128;
}
break;
case V8SImode:
if (!TARGET_AVX2)
return NULL;
code = (uns_p ? IX86_BUILTIN_VEC_WIDEN_UMUL_EVEN_V8SI
: IX86_BUILTIN_PMULDQ256);
break;
default:
return NULL;
}
return ix86_builtins[code];
}
static tree
ix86_builtin_mul_widen_odd (tree type)
{
bool uns_p = TYPE_UNSIGNED (type);
enum ix86_builtins code;
switch (TYPE_MODE (type))
{
case V4SImode:
if (uns_p)
{
if (!TARGET_SSE2)
return NULL;
code = IX86_BUILTIN_VEC_WIDEN_UMUL_ODD_V4SI;
}
else
{
if (!TARGET_SSE4_1)
return NULL;
code = IX86_BUILTIN_VEC_WIDEN_SMUL_ODD_V4SI;
}
break;
case V8SImode:
if (!TARGET_AVX2)
return NULL;
code = (uns_p ? IX86_BUILTIN_VEC_WIDEN_UMUL_ODD_V8SI
: IX86_BUILTIN_VEC_WIDEN_SMUL_ODD_V8SI);
break;
default:
return NULL;
}
return ix86_builtins[code];
}
/* Helper for avx_vpermilps256_operand et al. This is also used by /* Helper for avx_vpermilps256_operand et al. This is also used by
the expansion functions to turn the parallel back into a mask. the expansion functions to turn the parallel back into a mask.
...@@ -38663,6 +38750,7 @@ ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2, ...@@ -38663,6 +38750,7 @@ ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
bool uns_p, bool odd_p) bool uns_p, bool odd_p)
{ {
enum machine_mode mode = GET_MODE (op1); enum machine_mode mode = GET_MODE (op1);
enum machine_mode wmode = GET_MODE (dest);
rtx x; rtx x;
/* We only play even/odd games with vectors of SImode. */ /* We only play even/odd games with vectors of SImode. */
...@@ -38672,8 +38760,12 @@ ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2, ...@@ -38672,8 +38760,12 @@ ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
the even slots. For some cpus this is faster than a PSHUFD. */ the even slots. For some cpus this is faster than a PSHUFD. */
if (odd_p) if (odd_p)
{ {
enum machine_mode wmode = GET_MODE (dest); if (TARGET_XOP && mode == V4SImode)
{
x = force_reg (wmode, CONST0_RTX (wmode));
emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
return;
}
op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1), op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
GEN_INT (GET_MODE_UNIT_BITSIZE (mode)), NULL, GEN_INT (GET_MODE_UNIT_BITSIZE (mode)), NULL,
1, OPTAB_DIRECT); 1, OPTAB_DIRECT);
...@@ -38697,7 +38789,7 @@ ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2, ...@@ -38697,7 +38789,7 @@ ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2); x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
else if (TARGET_XOP) else if (TARGET_XOP)
{ {
x = force_reg (V2DImode, CONST0_RTX (V2DImode)); x = force_reg (wmode, CONST0_RTX (wmode));
x = gen_xop_pmacsdql (dest, op1, op2, x); x = gen_xop_pmacsdql (dest, op1, op2, x);
} }
else else
...@@ -39980,6 +40072,11 @@ ix86_memmodel_check (unsigned HOST_WIDE_INT val) ...@@ -39980,6 +40072,11 @@ ix86_memmodel_check (unsigned HOST_WIDE_INT val)
#undef TARGET_VECTORIZE_BUILTIN_GATHER #undef TARGET_VECTORIZE_BUILTIN_GATHER
#define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN ix86_builtin_mul_widen_even
#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD ix86_builtin_mul_widen_odd
#undef TARGET_BUILTIN_RECIPROCAL #undef TARGET_BUILTIN_RECIPROCAL
#define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
...@@ -5708,6 +5708,20 @@ ...@@ -5708,6 +5708,20 @@
DONE; DONE;
}) })
(define_expand "vec_widen_<s>mult_odd_<mode>"
[(match_operand:<sseunpackmode> 0 "register_operand")
(any_extend:<sseunpackmode>
(match_operand:VI124_AVX2 1 "register_operand"))
(match_operand:VI124_AVX2 2 "register_operand")]
; Note that SSE2 does not have signed SI multiply
"TARGET_AVX || TARGET_XOP || TARGET_SSE4_1
|| (TARGET_SSE2 && (<u_bool> || <MODE>mode != V4SImode))"
{
ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
<u_bool>, true);
DONE;
})
(define_expand "sdot_prod<mode>" (define_expand "sdot_prod<mode>"
[(match_operand:<sseunpackmode> 0 "register_operand") [(match_operand:<sseunpackmode> 0 "register_operand")
(match_operand:VI2_AVX2 1 "register_operand") (match_operand:VI2_AVX2 1 "register_operand")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment