Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
R
riscv-gcc-1
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
lvzhengyang
riscv-gcc-1
Commits
93703e79
Commit
93703e79
authored
Jun 25, 2012
by
Richard Henderson
Committed by
Richard Henderson
Jun 25, 2012
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
i386: Add widen_multiply_even/odd builtins and hooks
From-SVN: r188959
parent
379eaa6f
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
137 additions
and
4 deletions
+137
-4
gcc/ChangeLog
+19
-0
gcc/config/i386/i386-builtin-types.def
+4
-1
gcc/config/i386/i386.c
+100
-3
gcc/config/i386/sse.md
+14
-0
No files found.
gcc/ChangeLog
View file @
93703e79
2012-06-25 Richard Henderson <rth@redhat.com>
2012-06-25 Richard Henderson <rth@redhat.com>
* config/i386/i386-builtin-types.def (V4UDI, V8USI): New.
(V2UDI_FUNC_V4USI_V4USI): New.
(V4UDI_FUNC_V8USI_V8USI): New.
* config/i386/i386.c (ix86_expand_args_builtin): Handle them.
(IX86_BUILTIN_VEC_WIDEN_SMUL_ODD_V4SI): New.
(IX86_BUILTIN_VEC_WIDEN_SMUL_ODD_V8SI): New.
(IX86_BUILTIN_VEC_WIDEN_UMUL_ODD_V4SI): New.
(IX86_BUILTIN_VEC_WIDEN_UMUL_ODD_V8SI): New.
(IX86_BUILTIN_VEC_WIDEN_UMUL_EVEN_V4SI): New.
(IX86_BUILTIN_VEC_WIDEN_UMUL_EVEN_V8SI): New.
(bdesc_args): Add them.
(ix86_builtin_mul_widen_even, ix86_builtin_mul_widen_odd): New.
(TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN): New.
(TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD): New.
(ix86_expand_mul_widen_evenodd): Use xop_pmacsdqh.
* config/i386/sse.md (vec_widen_<s>mult_odd_<V124_AVX2>): New.
2012-06-25 Richard Henderson <rth@redhat.com>
* config/i386.sse.md (mul<VI4_AVX2>3): Use xop_pmacsdd.
* config/i386.sse.md (mul<VI4_AVX2>3): Use xop_pmacsdd.
2012-06-25 Richard Henderson <rth@redhat.com>
2012-06-25 Richard Henderson <rth@redhat.com>
...
...
gcc/config/i386/i386-builtin-types.def
View file @
93703e79
...
@@ -97,7 +97,8 @@ DEF_VECTOR_TYPE (V4DI, DI)
...
@@ -97,7 +97,8 @@ DEF_VECTOR_TYPE (V4DI, DI)
DEF_VECTOR_TYPE (V8SI, SI)
DEF_VECTOR_TYPE (V8SI, SI)
DEF_VECTOR_TYPE (V16HI, HI)
DEF_VECTOR_TYPE (V16HI, HI)
DEF_VECTOR_TYPE (V32QI, QI)
DEF_VECTOR_TYPE (V32QI, QI)
DEF_VECTOR_TYPE (V4UDI, UDI, V4DI)
DEF_VECTOR_TYPE (V8USI, USI, V8SI)
DEF_POINTER_TYPE (PCCHAR, CHAR, CONST)
DEF_POINTER_TYPE (PCCHAR, CHAR, CONST)
DEF_POINTER_TYPE (PCDOUBLE, DOUBLE, CONST)
DEF_POINTER_TYPE (PCDOUBLE, DOUBLE, CONST)
...
@@ -283,6 +284,7 @@ DEF_FUNCTION_TYPE (V2DI, V2DI, SI)
...
@@ -283,6 +284,7 @@ DEF_FUNCTION_TYPE (V2DI, V2DI, SI)
DEF_FUNCTION_TYPE (V2DI, V2DI, V16QI)
DEF_FUNCTION_TYPE (V2DI, V2DI, V16QI)
DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI)
DEF_FUNCTION_TYPE (V2DI, V2DI, V2DI)
DEF_FUNCTION_TYPE (V2DI, V4SI, V4SI)
DEF_FUNCTION_TYPE (V2DI, V4SI, V4SI)
DEF_FUNCTION_TYPE (V2UDI, V4USI, V4USI)
DEF_FUNCTION_TYPE (V2DI, PCV2DI, V2DI)
DEF_FUNCTION_TYPE (V2DI, PCV2DI, V2DI)
DEF_FUNCTION_TYPE (V2SF, V2SF, V2SF)
DEF_FUNCTION_TYPE (V2SF, V2SF, V2SF)
DEF_FUNCTION_TYPE (V2SI, INT, INT)
DEF_FUNCTION_TYPE (V2SI, INT, INT)
...
@@ -349,6 +351,7 @@ DEF_FUNCTION_TYPE (V8SI, V8SI, SI)
...
@@ -349,6 +351,7 @@ DEF_FUNCTION_TYPE (V8SI, V8SI, SI)
DEF_FUNCTION_TYPE (V8SI, PCV8SI, V8SI)
DEF_FUNCTION_TYPE (V8SI, PCV8SI, V8SI)
DEF_FUNCTION_TYPE (V4DI, V4DI, V4DI)
DEF_FUNCTION_TYPE (V4DI, V4DI, V4DI)
DEF_FUNCTION_TYPE (V4DI, V8SI, V8SI)
DEF_FUNCTION_TYPE (V4DI, V8SI, V8SI)
DEF_FUNCTION_TYPE (V4UDI, V8USI, V8USI)
DEF_FUNCTION_TYPE (V4DI, V4DI, V2DI)
DEF_FUNCTION_TYPE (V4DI, V4DI, V2DI)
DEF_FUNCTION_TYPE (V4DI, PCV4DI, V4DI)
DEF_FUNCTION_TYPE (V4DI, PCV4DI, V4DI)
DEF_FUNCTION_TYPE (V4DI, V4DI, INT)
DEF_FUNCTION_TYPE (V4DI, V4DI, INT)
...
...
gcc/config/i386/i386.c
View file @
93703e79
...
@@ -25754,6 +25754,13 @@ enum ix86_builtins
...
@@ -25754,6 +25754,13 @@ enum ix86_builtins
IX86_BUILTIN_CPYSGNPS256
,
IX86_BUILTIN_CPYSGNPS256
,
IX86_BUILTIN_CPYSGNPD256
,
IX86_BUILTIN_CPYSGNPD256
,
IX86_BUILTIN_VEC_WIDEN_SMUL_ODD_V4SI
,
IX86_BUILTIN_VEC_WIDEN_SMUL_ODD_V8SI
,
IX86_BUILTIN_VEC_WIDEN_UMUL_ODD_V4SI
,
IX86_BUILTIN_VEC_WIDEN_UMUL_ODD_V8SI
,
IX86_BUILTIN_VEC_WIDEN_UMUL_EVEN_V4SI
,
IX86_BUILTIN_VEC_WIDEN_UMUL_EVEN_V8SI
,
/* FMA4 instructions. */
/* FMA4 instructions. */
IX86_BUILTIN_VFMADDSS
,
IX86_BUILTIN_VFMADDSS
,
IX86_BUILTIN_VFMADDSD
,
IX86_BUILTIN_VFMADDSD
,
...
@@ -26612,6 +26619,8 @@ static const struct builtin_description bdesc_args[] =
...
@@ -26612,6 +26619,8 @@ static const struct builtin_description bdesc_args[] =
{
OPTION_MASK_ISA_SSE2
,
CODE_FOR_sse2_umulv1siv1di3
,
"__builtin_ia32_pmuludq"
,
IX86_BUILTIN_PMULUDQ
,
UNKNOWN
,
(
int
)
V1DI_FTYPE_V2SI_V2SI
},
{
OPTION_MASK_ISA_SSE2
,
CODE_FOR_sse2_umulv1siv1di3
,
"__builtin_ia32_pmuludq"
,
IX86_BUILTIN_PMULUDQ
,
UNKNOWN
,
(
int
)
V1DI_FTYPE_V2SI_V2SI
},
{
OPTION_MASK_ISA_SSE2
,
CODE_FOR_sse2_umulv2siv2di3
,
"__builtin_ia32_pmuludq128"
,
IX86_BUILTIN_PMULUDQ128
,
UNKNOWN
,
(
int
)
V2DI_FTYPE_V4SI_V4SI
},
{
OPTION_MASK_ISA_SSE2
,
CODE_FOR_sse2_umulv2siv2di3
,
"__builtin_ia32_pmuludq128"
,
IX86_BUILTIN_PMULUDQ128
,
UNKNOWN
,
(
int
)
V2DI_FTYPE_V4SI_V4SI
},
{
OPTION_MASK_ISA_SSE2
,
CODE_FOR_sse2_umulv2siv2di3
,
"__builtin_vw_umul_even_v4si"
,
IX86_BUILTIN_VEC_WIDEN_UMUL_EVEN_V4SI
,
UNKNOWN
,
(
int
)
V2UDI_FTYPE_V4USI_V4USI
},
{
OPTION_MASK_ISA_SSE2
,
CODE_FOR_vec_widen_umult_odd_v4si
,
"__builtin_ia32_vw_umul_odd_v4si"
,
IX86_BUILTIN_VEC_WIDEN_UMUL_ODD_V4SI
,
UNKNOWN
,
(
int
)
V2UDI_FTYPE_V4USI_V4USI
},
{
OPTION_MASK_ISA_SSE2
,
CODE_FOR_sse2_pmaddwd
,
"__builtin_ia32_pmaddwd128"
,
IX86_BUILTIN_PMADDWD128
,
UNKNOWN
,
(
int
)
V4SI_FTYPE_V8HI_V8HI
},
{
OPTION_MASK_ISA_SSE2
,
CODE_FOR_sse2_pmaddwd
,
"__builtin_ia32_pmaddwd128"
,
IX86_BUILTIN_PMADDWD128
,
UNKNOWN
,
(
int
)
V4SI_FTYPE_V8HI_V8HI
},
...
@@ -26738,6 +26747,7 @@ static const struct builtin_description bdesc_args[] =
...
@@ -26738,6 +26747,7 @@ static const struct builtin_description bdesc_args[] =
{
OPTION_MASK_ISA_SSE4_1
,
CODE_FOR_uminv4si3
,
"__builtin_ia32_pminud128"
,
IX86_BUILTIN_PMINUD128
,
UNKNOWN
,
(
int
)
V4SI_FTYPE_V4SI_V4SI
},
{
OPTION_MASK_ISA_SSE4_1
,
CODE_FOR_uminv4si3
,
"__builtin_ia32_pminud128"
,
IX86_BUILTIN_PMINUD128
,
UNKNOWN
,
(
int
)
V4SI_FTYPE_V4SI_V4SI
},
{
OPTION_MASK_ISA_SSE4_1
,
CODE_FOR_uminv8hi3
,
"__builtin_ia32_pminuw128"
,
IX86_BUILTIN_PMINUW128
,
UNKNOWN
,
(
int
)
V8HI_FTYPE_V8HI_V8HI
},
{
OPTION_MASK_ISA_SSE4_1
,
CODE_FOR_uminv8hi3
,
"__builtin_ia32_pminuw128"
,
IX86_BUILTIN_PMINUW128
,
UNKNOWN
,
(
int
)
V8HI_FTYPE_V8HI_V8HI
},
{
OPTION_MASK_ISA_SSE4_1
,
CODE_FOR_sse4_1_mulv2siv2di3
,
"__builtin_ia32_pmuldq128"
,
IX86_BUILTIN_PMULDQ128
,
UNKNOWN
,
(
int
)
V2DI_FTYPE_V4SI_V4SI
},
{
OPTION_MASK_ISA_SSE4_1
,
CODE_FOR_sse4_1_mulv2siv2di3
,
"__builtin_ia32_pmuldq128"
,
IX86_BUILTIN_PMULDQ128
,
UNKNOWN
,
(
int
)
V2DI_FTYPE_V4SI_V4SI
},
{
OPTION_MASK_ISA_SSE4_1
,
CODE_FOR_vec_widen_smult_odd_v4si
,
"__builtin_ia32_vw_smul_odd_v4si"
,
IX86_BUILTIN_VEC_WIDEN_SMUL_ODD_V4SI
,
UNKNOWN
,
(
int
)
V2DI_FTYPE_V4SI_V4SI
},
{
OPTION_MASK_ISA_SSE4_1
,
CODE_FOR_mulv4si3
,
"__builtin_ia32_pmulld128"
,
IX86_BUILTIN_PMULLD128
,
UNKNOWN
,
(
int
)
V4SI_FTYPE_V4SI_V4SI
},
{
OPTION_MASK_ISA_SSE4_1
,
CODE_FOR_mulv4si3
,
"__builtin_ia32_pmulld128"
,
IX86_BUILTIN_PMULLD128
,
UNKNOWN
,
(
int
)
V4SI_FTYPE_V4SI_V4SI
},
/* SSE4.1 */
/* SSE4.1 */
...
@@ -27004,12 +27014,15 @@ static const struct builtin_description bdesc_args[] =
...
@@ -27004,12 +27014,15 @@ static const struct builtin_description bdesc_args[] =
{
OPTION_MASK_ISA_AVX2
,
CODE_FOR_avx2_zero_extendv4hiv4di2
,
"__builtin_ia32_pmovzxwq256"
,
IX86_BUILTIN_PMOVZXWQ256
,
UNKNOWN
,
(
int
)
V4DI_FTYPE_V8HI
},
{
OPTION_MASK_ISA_AVX2
,
CODE_FOR_avx2_zero_extendv4hiv4di2
,
"__builtin_ia32_pmovzxwq256"
,
IX86_BUILTIN_PMOVZXWQ256
,
UNKNOWN
,
(
int
)
V4DI_FTYPE_V8HI
},
{
OPTION_MASK_ISA_AVX2
,
CODE_FOR_avx2_zero_extendv4siv4di2
,
"__builtin_ia32_pmovzxdq256"
,
IX86_BUILTIN_PMOVZXDQ256
,
UNKNOWN
,
(
int
)
V4DI_FTYPE_V4SI
},
{
OPTION_MASK_ISA_AVX2
,
CODE_FOR_avx2_zero_extendv4siv4di2
,
"__builtin_ia32_pmovzxdq256"
,
IX86_BUILTIN_PMOVZXDQ256
,
UNKNOWN
,
(
int
)
V4DI_FTYPE_V4SI
},
{
OPTION_MASK_ISA_AVX2
,
CODE_FOR_avx2_mulv4siv4di3
,
"__builtin_ia32_pmuldq256"
,
IX86_BUILTIN_PMULDQ256
,
UNKNOWN
,
(
int
)
V4DI_FTYPE_V8SI_V8SI
},
{
OPTION_MASK_ISA_AVX2
,
CODE_FOR_avx2_mulv4siv4di3
,
"__builtin_ia32_pmuldq256"
,
IX86_BUILTIN_PMULDQ256
,
UNKNOWN
,
(
int
)
V4DI_FTYPE_V8SI_V8SI
},
{
OPTION_MASK_ISA_AVX2
,
CODE_FOR_vec_widen_smult_odd_v8si
,
"__builtin_ia32_vw_smul_odd_v8si"
,
IX86_BUILTIN_VEC_WIDEN_SMUL_ODD_V8SI
,
UNKNOWN
,
(
int
)
V4DI_FTYPE_V8SI_V8SI
},
{
OPTION_MASK_ISA_AVX2
,
CODE_FOR_avx2_umulhrswv16hi3
,
"__builtin_ia32_pmulhrsw256"
,
IX86_BUILTIN_PMULHRSW256
,
UNKNOWN
,
(
int
)
V16HI_FTYPE_V16HI_V16HI
},
{
OPTION_MASK_ISA_AVX2
,
CODE_FOR_avx2_umulhrswv16hi3
,
"__builtin_ia32_pmulhrsw256"
,
IX86_BUILTIN_PMULHRSW256
,
UNKNOWN
,
(
int
)
V16HI_FTYPE_V16HI_V16HI
},
{
OPTION_MASK_ISA_AVX2
,
CODE_FOR_umulv16hi3_highpart
,
"__builtin_ia32_pmulhuw256"
,
IX86_BUILTIN_PMULHUW256
,
UNKNOWN
,
(
int
)
V16HI_FTYPE_V16HI_V16HI
},
{
OPTION_MASK_ISA_AVX2
,
CODE_FOR_umulv16hi3_highpart
,
"__builtin_ia32_pmulhuw256"
,
IX86_BUILTIN_PMULHUW256
,
UNKNOWN
,
(
int
)
V16HI_FTYPE_V16HI_V16HI
},
{
OPTION_MASK_ISA_AVX2
,
CODE_FOR_smulv16hi3_highpart
,
"__builtin_ia32_pmulhw256"
,
IX86_BUILTIN_PMULHW256
,
UNKNOWN
,
(
int
)
V16HI_FTYPE_V16HI_V16HI
},
{
OPTION_MASK_ISA_AVX2
,
CODE_FOR_smulv16hi3_highpart
,
"__builtin_ia32_pmulhw256"
,
IX86_BUILTIN_PMULHW256
,
UNKNOWN
,
(
int
)
V16HI_FTYPE_V16HI_V16HI
},
{
OPTION_MASK_ISA_AVX2
,
CODE_FOR_mulv16hi3
,
"__builtin_ia32_pmullw256"
,
IX86_BUILTIN_PMULLW256
,
UNKNOWN
,
(
int
)
V16HI_FTYPE_V16HI_V16HI
},
{
OPTION_MASK_ISA_AVX2
,
CODE_FOR_mulv16hi3
,
"__builtin_ia32_pmullw256"
,
IX86_BUILTIN_PMULLW256
,
UNKNOWN
,
(
int
)
V16HI_FTYPE_V16HI_V16HI
},
{
OPTION_MASK_ISA_AVX2
,
CODE_FOR_mulv8si3
,
"__builtin_ia32_pmulld256"
,
IX86_BUILTIN_PMULLD256
,
UNKNOWN
,
(
int
)
V8SI_FTYPE_V8SI_V8SI
},
{
OPTION_MASK_ISA_AVX2
,
CODE_FOR_mulv8si3
,
"__builtin_ia32_pmulld256"
,
IX86_BUILTIN_PMULLD256
,
UNKNOWN
,
(
int
)
V8SI_FTYPE_V8SI_V8SI
},
{
OPTION_MASK_ISA_AVX2
,
CODE_FOR_avx2_umulv4siv4di3
,
"__builtin_ia32_pmuludq256"
,
IX86_BUILTIN_PMULUDQ256
,
UNKNOWN
,
(
int
)
V4DI_FTYPE_V8SI_V8SI
},
{
OPTION_MASK_ISA_AVX2
,
CODE_FOR_avx2_umulv4siv4di3
,
"__builtin_ia32_pmuludq256"
,
IX86_BUILTIN_PMULUDQ256
,
UNKNOWN
,
(
int
)
V4DI_FTYPE_V8SI_V8SI
},
{
OPTION_MASK_ISA_AVX2
,
CODE_FOR_avx2_umulv4siv4di3
,
"__builtin_i386_vw_umul_even_v8si"
,
IX86_BUILTIN_VEC_WIDEN_UMUL_EVEN_V8SI
,
UNKNOWN
,
(
int
)
V4UDI_FTYPE_V8USI_V8USI
},
{
OPTION_MASK_ISA_AVX2
,
CODE_FOR_vec_widen_umult_odd_v8si
,
"__builtin_ia32_vw_umul_odd_v8si"
,
IX86_BUILTIN_VEC_WIDEN_UMUL_ODD_V8SI
,
UNKNOWN
,
(
int
)
V4UDI_FTYPE_V8USI_V8USI
},
{
OPTION_MASK_ISA_AVX2
,
CODE_FOR_iorv4di3
,
"__builtin_ia32_por256"
,
IX86_BUILTIN_POR256
,
UNKNOWN
,
(
int
)
V4DI_FTYPE_V4DI_V4DI
},
{
OPTION_MASK_ISA_AVX2
,
CODE_FOR_iorv4di3
,
"__builtin_ia32_por256"
,
IX86_BUILTIN_POR256
,
UNKNOWN
,
(
int
)
V4DI_FTYPE_V4DI_V4DI
},
{
OPTION_MASK_ISA_AVX2
,
CODE_FOR_avx2_psadbw
,
"__builtin_ia32_psadbw256"
,
IX86_BUILTIN_PSADBW256
,
UNKNOWN
,
(
int
)
V16HI_FTYPE_V32QI_V32QI
},
{
OPTION_MASK_ISA_AVX2
,
CODE_FOR_avx2_psadbw
,
"__builtin_ia32_psadbw256"
,
IX86_BUILTIN_PSADBW256
,
UNKNOWN
,
(
int
)
V16HI_FTYPE_V32QI_V32QI
},
{
OPTION_MASK_ISA_AVX2
,
CODE_FOR_avx2_pshufbv32qi3
,
"__builtin_ia32_pshufb256"
,
IX86_BUILTIN_PSHUFB256
,
UNKNOWN
,
(
int
)
V32QI_FTYPE_V32QI_V32QI
},
{
OPTION_MASK_ISA_AVX2
,
CODE_FOR_avx2_pshufbv32qi3
,
"__builtin_ia32_pshufb256"
,
IX86_BUILTIN_PSHUFB256
,
UNKNOWN
,
(
int
)
V32QI_FTYPE_V32QI_V32QI
},
...
@@ -29142,6 +29155,7 @@ ix86_expand_args_builtin (const struct builtin_description *d,
...
@@ -29142,6 +29155,7 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case
V2DI_FTYPE_V2DI_V2DI
:
case
V2DI_FTYPE_V2DI_V2DI
:
case
V2DI_FTYPE_V16QI_V16QI
:
case
V2DI_FTYPE_V16QI_V16QI
:
case
V2DI_FTYPE_V4SI_V4SI
:
case
V2DI_FTYPE_V4SI_V4SI
:
case
V2UDI_FTYPE_V4USI_V4USI
:
case
V2DI_FTYPE_V2DI_V16QI
:
case
V2DI_FTYPE_V2DI_V16QI
:
case
V2DI_FTYPE_V2DF_V2DF
:
case
V2DI_FTYPE_V2DF_V2DF
:
case
V2SI_FTYPE_V2SI_V2SI
:
case
V2SI_FTYPE_V2SI_V2SI
:
...
@@ -29166,6 +29180,7 @@ ix86_expand_args_builtin (const struct builtin_description *d,
...
@@ -29166,6 +29180,7 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case
V8SI_FTYPE_V16HI_V16HI
:
case
V8SI_FTYPE_V16HI_V16HI
:
case
V4DI_FTYPE_V4DI_V4DI
:
case
V4DI_FTYPE_V4DI_V4DI
:
case
V4DI_FTYPE_V8SI_V8SI
:
case
V4DI_FTYPE_V8SI_V8SI
:
case
V4UDI_FTYPE_V8USI_V8USI
:
if
(
comparison
==
UNKNOWN
)
if
(
comparison
==
UNKNOWN
)
return
ix86_expand_binop_builtin
(
icode
,
exp
,
target
);
return
ix86_expand_binop_builtin
(
icode
,
exp
,
target
);
nargs
=
2
;
nargs
=
2
;
...
@@ -31042,6 +31057,78 @@ ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
...
@@ -31042,6 +31057,78 @@ ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
return
NULL_TREE
;
return
NULL_TREE
;
}
}
}
}
static
tree
ix86_builtin_mul_widen_even
(
tree
type
)
{
bool
uns_p
=
TYPE_UNSIGNED
(
type
);
enum
ix86_builtins
code
;
switch
(
TYPE_MODE
(
type
))
{
case
V4SImode
:
if
(
uns_p
)
{
if
(
!
TARGET_SSE2
)
return
NULL
;
code
=
IX86_BUILTIN_VEC_WIDEN_UMUL_EVEN_V4SI
;
}
else
{
if
(
!
TARGET_SSE4_1
)
return
NULL
;
code
=
IX86_BUILTIN_PMULDQ128
;
}
break
;
case
V8SImode
:
if
(
!
TARGET_AVX2
)
return
NULL
;
code
=
(
uns_p
?
IX86_BUILTIN_VEC_WIDEN_UMUL_EVEN_V8SI
:
IX86_BUILTIN_PMULDQ256
);
break
;
default
:
return
NULL
;
}
return
ix86_builtins
[
code
];
}
static
tree
ix86_builtin_mul_widen_odd
(
tree
type
)
{
bool
uns_p
=
TYPE_UNSIGNED
(
type
);
enum
ix86_builtins
code
;
switch
(
TYPE_MODE
(
type
))
{
case
V4SImode
:
if
(
uns_p
)
{
if
(
!
TARGET_SSE2
)
return
NULL
;
code
=
IX86_BUILTIN_VEC_WIDEN_UMUL_ODD_V4SI
;
}
else
{
if
(
!
TARGET_SSE4_1
)
return
NULL
;
code
=
IX86_BUILTIN_VEC_WIDEN_SMUL_ODD_V4SI
;
}
break
;
case
V8SImode
:
if
(
!
TARGET_AVX2
)
return
NULL
;
code
=
(
uns_p
?
IX86_BUILTIN_VEC_WIDEN_UMUL_ODD_V8SI
:
IX86_BUILTIN_VEC_WIDEN_SMUL_ODD_V8SI
);
break
;
default
:
return
NULL
;
}
return
ix86_builtins
[
code
];
}
/* Helper for avx_vpermilps256_operand et al. This is also used by
/* Helper for avx_vpermilps256_operand et al. This is also used by
the expansion functions to turn the parallel back into a mask.
the expansion functions to turn the parallel back into a mask.
...
@@ -38663,6 +38750,7 @@ ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
...
@@ -38663,6 +38750,7 @@ ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
bool
uns_p
,
bool
odd_p
)
bool
uns_p
,
bool
odd_p
)
{
{
enum
machine_mode
mode
=
GET_MODE
(
op1
);
enum
machine_mode
mode
=
GET_MODE
(
op1
);
enum
machine_mode
wmode
=
GET_MODE
(
dest
);
rtx
x
;
rtx
x
;
/* We only play even/odd games with vectors of SImode. */
/* We only play even/odd games with vectors of SImode. */
...
@@ -38672,8 +38760,12 @@ ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
...
@@ -38672,8 +38760,12 @@ ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
the even slots. For some cpus this is faster than a PSHUFD. */
the even slots. For some cpus this is faster than a PSHUFD. */
if
(
odd_p
)
if
(
odd_p
)
{
{
enum
machine_mode
wmode
=
GET_MODE
(
dest
);
if
(
TARGET_XOP
&&
mode
==
V4SImode
)
{
x
=
force_reg
(
wmode
,
CONST0_RTX
(
wmode
));
emit_insn
(
gen_xop_pmacsdqh
(
dest
,
op1
,
op2
,
x
));
return
;
}
op1
=
expand_binop
(
wmode
,
lshr_optab
,
gen_lowpart
(
wmode
,
op1
),
op1
=
expand_binop
(
wmode
,
lshr_optab
,
gen_lowpart
(
wmode
,
op1
),
GEN_INT
(
GET_MODE_UNIT_BITSIZE
(
mode
)),
NULL
,
GEN_INT
(
GET_MODE_UNIT_BITSIZE
(
mode
)),
NULL
,
1
,
OPTAB_DIRECT
);
1
,
OPTAB_DIRECT
);
...
@@ -38697,7 +38789,7 @@ ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
...
@@ -38697,7 +38789,7 @@ ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
x
=
gen_sse4_1_mulv2siv2di3
(
dest
,
op1
,
op2
);
x
=
gen_sse4_1_mulv2siv2di3
(
dest
,
op1
,
op2
);
else
if
(
TARGET_XOP
)
else
if
(
TARGET_XOP
)
{
{
x
=
force_reg
(
V2DImode
,
CONST0_RTX
(
V2DI
mode
));
x
=
force_reg
(
wmode
,
CONST0_RTX
(
w
mode
));
x
=
gen_xop_pmacsdql
(
dest
,
op1
,
op2
,
x
);
x
=
gen_xop_pmacsdql
(
dest
,
op1
,
op2
,
x
);
}
}
else
else
...
@@ -39980,6 +40072,11 @@ ix86_memmodel_check (unsigned HOST_WIDE_INT val)
...
@@ -39980,6 +40072,11 @@ ix86_memmodel_check (unsigned HOST_WIDE_INT val)
#undef TARGET_VECTORIZE_BUILTIN_GATHER
#undef TARGET_VECTORIZE_BUILTIN_GATHER
#define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
#define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN ix86_builtin_mul_widen_even
#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD ix86_builtin_mul_widen_odd
#undef TARGET_BUILTIN_RECIPROCAL
#undef TARGET_BUILTIN_RECIPROCAL
#define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
#define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
gcc/config/i386/sse.md
View file @
93703e79
...
@@ -5708,6 +5708,20 @@
...
@@ -5708,6 +5708,20 @@
DONE;
DONE;
})
})
(define_expand "vec_widen_
<s>
mult_odd_
<mode>
"
[
(match_operand:
<sseunpackmode>
0 "register_operand")
(any_extend:
<sseunpackmode>
(match_operand:VI124_AVX2 1 "register_operand"))
(match_operand:VI124_AVX2 2 "register_operand")]
; Note that SSE2 does not have signed SI multiply
"TARGET_AVX || TARGET_XOP || TARGET_SSE4_1
|| (TARGET_SSE2 && (
<u
_bool
>
||
<MODE>
mode != V4SImode))"
{
ix86_expand_mul_widen_evenodd (operands
[
0
]
, operands
[
1
]
, operands
[
2
]
,
<u
_bool
>
, true);
DONE;
})
(define_expand "sdot_prod
<mode>
"
(define_expand "sdot_prod
<mode>
"
[
(match_operand:
<sseunpackmode>
0 "register_operand")
[
(match_operand:
<sseunpackmode>
0 "register_operand")
(match_operand:VI2_AVX2 1 "register_operand")
(match_operand:VI2_AVX2 1 "register_operand")
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment