Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
R
riscv-gcc-1
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
lvzhengyang
riscv-gcc-1
Commits
916b60b7
Commit
916b60b7
authored
May 04, 2002
by
Bernd Schmidt
Committed by
Bernd Schmidt
May 04, 2002
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Fix bugs in SSE2 suppport and add SSE2 functions to xmmintrin.h
From-SVN: r53161
parent
c26fbbca
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
1227 additions
and
46 deletions
+1227
-46
gcc/ChangeLog
+67
-0
gcc/config/i386/i386.c
+102
-15
gcc/config/i386/i386.md
+135
-31
gcc/config/i386/xmmintrin.h
+923
-0
No files found.
gcc/ChangeLog
View file @
916b60b7
2002-05-04 Bernd Schmidt <bernds@redhat.com>
* config/i386/i386.c (bdesc_2arg): Add a couple of missing SSE2
builtins. Use V2DI patterns instead of TI for logical operations.
(ix86_init_mmx_sse_builtins): Add a couple of missing SSE2 builtins.
Correct definitions of psadbw, pmovmskb128, movntdq, cvtdq2ps.
(ix86_expand_builtins): Change the pattern used for movntdq.
* config/i386/i386.md (sse2_andv2di3, sse2_iorv2di3, sse2_xorv2di3,
sse2_nandv2di3): New patterns.
(sse2_anddf3, sse2_nanddf3, sse2_iordf3, sse2_xordf3): Correct modes
on operands.
(sse2_movntv2di): Renamed from sse2_movntti and modes adjusted.
(cvtdq2pd): Correct mode on operand 1.
(sse2_umulsidi3): Describe without unspec.
(sse2_psadbw, mmx_psadbw): Describe with unspec; use more appropriate
machine modes.
(lshrv2di3): Renamed from sse2_lshrv2di3 and removed unspec.
(ashlv2di3): Likewise, from sse2_ashlv2di3.
(ashrv8hi3, ashrv4si3, lshrv8hi3, lshrv4si3, lshrv2di3, ashlv8hi3,
ashlv4si3, ashlv2di3): Use SImode for shift count.
(ashrv8hi3_ti, ashrv4si3_ti, lshrv8hi3_ti, lshrv4si3_ti, lshrv2di3_ti,
lshrv4si3_ti, lshrv2di3_ti, ashlv8hi3_ti, ashlv4si3_ti, ashlv2di3_ti):
New patterns.
* config/i386/xmmintrin.h (__v2df, __v2di, __v4si, __v8hi, __v16qi):
New typedefs.
(__m128i, __m128d): New macros.
(_mm_add_pd, _mm_add_sd, _mm_sub_pd, _mm_sub_sd, _mm_mul_pd,
_mm_mul_sd, _mm_div_pd, _mm_div_sd, _mm_sqrt_pd, _mm_sqrt_sd,
_mm_min_pd, _mm_min_sd, _mm_max_sd, _mm_max_pd, _mm_and_pd,
_mm_andnot_pd, _mm_xor_pd, _mm_or_pd, _mm_cmpeq_pd, _mm_cmplt_pd,
_mm_cmple_pd, _mm_cmpgt_pd, _mm_cmpge_pd, _mm_cmpneq_pd,
_mm_cmpnlt_pd, _mm_cmpnle_pd, _mm_cmpngt_pd, _mm_cmpnge_pd,
_mm_cmpord_pd, _mm_cmpunord_pd, _mm_cmpeq_sd, _mm_cmplt_sd,
_mm_cmple_sd, _mm_cmpgt_sd, _mm_cmpge_sd, _mm_cmpneq_sd,
_mm_cmpnlt_sd, _mm_cmpnle_sd, _mm_cmpngt_sd, _mm_cmpnge_sd,
_mm_cmpord_sd, _mm_cmpunord_sd, _mm_comieq_sd, _mm_comilt_sd,
_mm_comile_sd, _mm_comigt_sd, _mm_comige_sd, _mm_comineq_sd,
_mm_ucomieq_sd, _mm_ucomieq_sd, _mm_ucomilt_sd, _mm_ucomile_sd,
_mm_ucomigt_sd, _mm_ucomige_sd, _mm_ucomineq_sd, _mm_cvtepi32_pd,
_mm_cvtepi32_ps, _mm_cvtpd_epi32, _mm_cvtpd_pi32, _mm_cvtpd_ps,
_mm_cvttpd_epi32, _mm_cvttpd_pi32, _mm_cvtpi32_pd, _mm_cvtps_epi32,
_mm_cvttps_epi32, _mm_cvtps_pd, _mm_cvtsd_si32, _mm_cvttsd_si32,
_mm_cvtsd_ss, _mm_cvtsi32_sd, _mm_cvtss_sd, _mm_unpackhi_pd,
_mm_unpacklo_pd, _mm_loadh_pd, _mm_storeh_pd, _mm_storel_pd,
_mm_movemask_pd, _mm_packs_epi16, _mm_packs_epi32, _mm_packus_epi16,
_mm_unpackhi_epi8, _mm_unpackhi_epi16, _mm_unpackhi_epi32,
_mm_unpacklo_epi8, _mm_unpacklo_epi16, _mm_unpacklo_epi32,
_mm_add_epi8, _mm_add_epi16, _mm_add_epi32, _mm_add_epi64,
_mm_adds_epi8, _mm_adds_epi16, _mm_adds_epu8, _mm_adds_epu16,
_mm_sub_epi8, _mm_sub_epi16, _mm_sub_epi32, _mm_sub_epi64,
_mm_subs_epi8, _mm_subs_epi16, _mm_subs_epu8, _mm_subs_epu16,
_mm_madd_epi16, _mm_mulhi_epi16, _mm_mullo_epi16, _mm_mul_pu16,
_mm_mul_epu16, _mm_sll_epi16, _mm_sll_epi32, _mm_sll_epi64,
_mm_sra_epi16, _mm_sra_epi32, _mm_srl_epi16, _mm_srl_epi32,
_mm_srl_epi64, _mm_slli_epi16, _mm_slli_epi32, _mm_slli_epi64,
_mm_srai_epi16, _mm_srai_epi32, _mm_srli_epi16, _mm_srli_epi32,
_mm_srli_epi64, _mm_and_si128, _mm_andnot_si128, _mm_or_si128,
_mm_xor_si128, _mm_cmpeq_epi8, _mm_cmpeq_epi16, _mm_cmpeq_epi32,
_mm_cmpgt_epi8, _mm_cmpgt_epi16, _mm_cmpgt_epi32, _mm_max_epi16,
_mm_max_epu8, _mm_min_epi16, _mm_min_epu8, _mm_movemask_epi8,
_mm_mulhi_epu16, _mm_maskmoveu_si128, _mm_avg_epu8, _mm_avg_epu16,
_mm_sad_epu8, _mm_stream_si32, _mm_stream_si128, _mm_stream_pd,
_mm_movpi64_epi64, _mm_clflush, _mm_lfence, _mm_mfence): New
functions.
(_mm_shufflehi_epi16, _mm_shufflelo_epi16, _mm_shuffle_epi32,
_mm_extract_epi16, _mm_insert_epi16, _mm_shuffle_pd): New macros.
2002-05-04 Kazu Hirata <kazu@cs.umass.edu>
* dwarf2out.c: Fix formatting.
...
...
gcc/config/i386/i386.c
View file @
916b60b7
...
...
@@ -11179,10 +11179,10 @@ static const struct builtin_description bdesc_2arg[] =
{
MASK_SSE2
,
CODE_FOR_sse2_umulsidi3
,
"__builtin_ia32_pmuludq"
,
IX86_BUILTIN_PMULUDQ
,
0
,
0
},
{
MASK_SSE2
,
CODE_FOR_sse2_umulv2siv2di3
,
"__builtin_ia32_pmuludq128"
,
IX86_BUILTIN_PMULUDQ128
,
0
,
0
},
{
MASK_SSE2
,
CODE_FOR_sse2_and
t
i3
,
"__builtin_ia32_pand128"
,
IX86_BUILTIN_PAND128
,
0
,
0
},
{
MASK_SSE2
,
CODE_FOR_sse2_nand
t
i3
,
"__builtin_ia32_pandn128"
,
IX86_BUILTIN_PANDN128
,
0
,
0
},
{
MASK_SSE2
,
CODE_FOR_sse2_ior
t
i3
,
"__builtin_ia32_por128"
,
IX86_BUILTIN_POR128
,
0
,
0
},
{
MASK_SSE2
,
CODE_FOR_sse2_xor
t
i3
,
"__builtin_ia32_pxor128"
,
IX86_BUILTIN_PXOR128
,
0
,
0
},
{
MASK_SSE2
,
CODE_FOR_sse2_and
v2d
i3
,
"__builtin_ia32_pand128"
,
IX86_BUILTIN_PAND128
,
0
,
0
},
{
MASK_SSE2
,
CODE_FOR_sse2_nand
v2d
i3
,
"__builtin_ia32_pandn128"
,
IX86_BUILTIN_PANDN128
,
0
,
0
},
{
MASK_SSE2
,
CODE_FOR_sse2_ior
v2d
i3
,
"__builtin_ia32_por128"
,
IX86_BUILTIN_POR128
,
0
,
0
},
{
MASK_SSE2
,
CODE_FOR_sse2_xor
v2d
i3
,
"__builtin_ia32_pxor128"
,
IX86_BUILTIN_PXOR128
,
0
,
0
},
{
MASK_SSE2
,
CODE_FOR_sse2_uavgv16qi3
,
"__builtin_ia32_pavgb128"
,
IX86_BUILTIN_PAVGB128
,
0
,
0
},
{
MASK_SSE2
,
CODE_FOR_sse2_uavgv8hi3
,
"__builtin_ia32_pavgw128"
,
IX86_BUILTIN_PAVGW128
,
0
,
0
},
...
...
@@ -11206,6 +11206,34 @@ static const struct builtin_description bdesc_2arg[] =
{
MASK_SSE2
,
CODE_FOR_sse2_punpcklwd
,
"__builtin_ia32_punpcklwd128"
,
IX86_BUILTIN_PUNPCKLWD128
,
0
,
0
},
{
MASK_SSE2
,
CODE_FOR_sse2_punpckldq
,
"__builtin_ia32_punpckldq128"
,
IX86_BUILTIN_PUNPCKLDQ128
,
0
,
0
},
{
MASK_SSE2
,
CODE_FOR_sse2_packsswb
,
"__builtin_ia32_packsswb128"
,
IX86_BUILTIN_PACKSSWB128
,
0
,
0
},
{
MASK_SSE2
,
CODE_FOR_sse2_packssdw
,
"__builtin_ia32_packssdw128"
,
IX86_BUILTIN_PACKSSDW128
,
0
,
0
},
{
MASK_SSE2
,
CODE_FOR_sse2_packuswb
,
"__builtin_ia32_packuswb128"
,
IX86_BUILTIN_PACKUSWB128
,
0
,
0
},
{
MASK_SSE2
,
CODE_FOR_umulv8hi3_highpart
,
"__builtin_ia32_pmulhuw128"
,
IX86_BUILTIN_PMULHUW128
,
0
,
0
},
{
MASK_SSE2
,
CODE_FOR_sse2_psadbw
,
0
,
IX86_BUILTIN_PSADBW128
,
0
,
0
},
{
MASK_SSE2
,
CODE_FOR_ashlv8hi3_ti
,
0
,
IX86_BUILTIN_PSLLW128
,
0
,
0
},
{
MASK_SSE2
,
CODE_FOR_ashlv8hi3
,
0
,
IX86_BUILTIN_PSLLWI128
,
0
,
0
},
{
MASK_SSE2
,
CODE_FOR_ashlv4si3_ti
,
0
,
IX86_BUILTIN_PSLLD128
,
0
,
0
},
{
MASK_SSE2
,
CODE_FOR_ashlv4si3
,
0
,
IX86_BUILTIN_PSLLDI128
,
0
,
0
},
{
MASK_SSE2
,
CODE_FOR_ashlv2di3_ti
,
0
,
IX86_BUILTIN_PSLLQ128
,
0
,
0
},
{
MASK_SSE2
,
CODE_FOR_ashlv2di3
,
0
,
IX86_BUILTIN_PSLLQI128
,
0
,
0
},
{
MASK_SSE2
,
CODE_FOR_lshrv8hi3_ti
,
0
,
IX86_BUILTIN_PSRLW128
,
0
,
0
},
{
MASK_SSE2
,
CODE_FOR_lshrv8hi3
,
0
,
IX86_BUILTIN_PSRLWI128
,
0
,
0
},
{
MASK_SSE2
,
CODE_FOR_lshrv4si3_ti
,
0
,
IX86_BUILTIN_PSRLD128
,
0
,
0
},
{
MASK_SSE2
,
CODE_FOR_lshrv4si3
,
0
,
IX86_BUILTIN_PSRLDI128
,
0
,
0
},
{
MASK_SSE2
,
CODE_FOR_lshrv2di3_ti
,
0
,
IX86_BUILTIN_PSRLQ128
,
0
,
0
},
{
MASK_SSE2
,
CODE_FOR_lshrv2di3
,
0
,
IX86_BUILTIN_PSRLQI128
,
0
,
0
},
{
MASK_SSE2
,
CODE_FOR_ashrv8hi3_ti
,
0
,
IX86_BUILTIN_PSRAW128
,
0
,
0
},
{
MASK_SSE2
,
CODE_FOR_ashrv8hi3
,
0
,
IX86_BUILTIN_PSRAWI128
,
0
,
0
},
{
MASK_SSE2
,
CODE_FOR_ashrv4si3_ti
,
0
,
IX86_BUILTIN_PSRAD128
,
0
,
0
},
{
MASK_SSE2
,
CODE_FOR_ashrv4si3
,
0
,
IX86_BUILTIN_PSRADI128
,
0
,
0
},
{
MASK_SSE2
,
CODE_FOR_sse2_pmaddwd
,
0
,
IX86_BUILTIN_PMADDWD128
,
0
,
0
},
{
MASK_SSE2
,
CODE_FOR_cvtsi2sd
,
0
,
IX86_BUILTIN_CVTSI2SD
,
0
,
0
},
{
MASK_SSE2
,
CODE_FOR_cvtsd2ss
,
0
,
IX86_BUILTIN_CVTSD2SS
,
0
,
0
},
{
MASK_SSE2
,
CODE_FOR_cvtss2sd
,
0
,
IX86_BUILTIN_CVTSS2SD
,
0
,
0
}
...
...
@@ -11270,6 +11298,7 @@ ix86_init_mmx_sse_builtins ()
tree
pchar_type_node
=
build_pointer_type
(
char_type_node
);
tree
pfloat_type_node
=
build_pointer_type
(
float_type_node
);
tree
pv2si_type_node
=
build_pointer_type
(
V2SI_type_node
);
tree
pv2di_type_node
=
build_pointer_type
(
V2DI_type_node
);
tree
pdi_type_node
=
build_pointer_type
(
long_long_unsigned_type_node
);
/* Comparisons. */
...
...
@@ -11334,11 +11363,6 @@ ix86_init_mmx_sse_builtins ()
tree_cons
(
NULL_TREE
,
integer_type_node
,
endlink
))));
tree
v4hi_ftype_v8qi_v8qi
=
build_function_type
(
V4HI_type_node
,
tree_cons
(
NULL_TREE
,
V8QI_type_node
,
tree_cons
(
NULL_TREE
,
V8QI_type_node
,
endlink
)));
tree
v2si_ftype_v4hi_v4hi
=
build_function_type
(
V2SI_type_node
,
tree_cons
(
NULL_TREE
,
V4HI_type_node
,
...
...
@@ -11411,6 +11435,12 @@ ix86_init_mmx_sse_builtins ()
tree_cons
(
NULL_TREE
,
long_long_unsigned_type_node
,
endlink
)));
tree
void_ftype_pv2di_v2di
=
build_function_type
(
void_type_node
,
tree_cons
(
NULL_TREE
,
pv2di_type_node
,
tree_cons
(
NULL_TREE
,
V2DI_type_node
,
endlink
)));
/* Normal vector unops. */
tree
v4sf_ftype_v4sf
=
build_function_type
(
V4SF_type_node
,
...
...
@@ -11629,6 +11659,11 @@ ix86_init_mmx_sse_builtins ()
tree_cons
(
NULL_TREE
,
integer_type_node
,
endlink
))));
tree
v2di_ftype_v2di_int
=
build_function_type
(
V2DI_type_node
,
tree_cons
(
NULL_TREE
,
V2DI_type_node
,
tree_cons
(
NULL_TREE
,
integer_type_node
,
endlink
)));
tree
v4si_ftype_v4si_int
=
build_function_type
(
V4SI_type_node
,
tree_cons
(
NULL_TREE
,
V4SI_type_node
,
...
...
@@ -11639,6 +11674,34 @@ ix86_init_mmx_sse_builtins ()
tree_cons
(
NULL_TREE
,
V8HI_type_node
,
tree_cons
(
NULL_TREE
,
integer_type_node
,
endlink
)));
tree
v8hi_ftype_v8hi_v2di
=
build_function_type
(
V8HI_type_node
,
tree_cons
(
NULL_TREE
,
V8HI_type_node
,
tree_cons
(
NULL_TREE
,
V2DI_type_node
,
endlink
)));
tree
v4si_ftype_v4si_v2di
=
build_function_type
(
V4SI_type_node
,
tree_cons
(
NULL_TREE
,
V4SI_type_node
,
tree_cons
(
NULL_TREE
,
V2DI_type_node
,
endlink
)));
tree
v4si_ftype_v8hi_v8hi
=
build_function_type
(
V4SI_type_node
,
tree_cons
(
NULL_TREE
,
V8HI_type_node
,
tree_cons
(
NULL_TREE
,
V8HI_type_node
,
endlink
)));
tree
di_ftype_v8qi_v8qi
=
build_function_type
(
long_long_unsigned_type_node
,
tree_cons
(
NULL_TREE
,
V8QI_type_node
,
tree_cons
(
NULL_TREE
,
V8QI_type_node
,
endlink
)));
tree
v2di_ftype_v16qi_v16qi
=
build_function_type
(
V2DI_type_node
,
tree_cons
(
NULL_TREE
,
V16QI_type_node
,
tree_cons
(
NULL_TREE
,
V16QI_type_node
,
endlink
)));
tree
int_ftype_v16qi
=
build_function_type
(
integer_type_node
,
tree_cons
(
NULL_TREE
,
V16QI_type_node
,
endlink
));
/* Add all builtins that are more or less simple operations on two
operands. */
...
...
@@ -11775,7 +11838,7 @@ ix86_init_mmx_sse_builtins ()
def_builtin
(
MASK_SSE1
|
MASK_3DNOW_A
,
"__builtin_ia32_sfence"
,
void_ftype_void
,
IX86_BUILTIN_SFENCE
);
def_builtin
(
MASK_SSE1
|
MASK_3DNOW_A
,
"__builtin_ia32_psadbw"
,
v4h
i_ftype_v8qi_v8qi
,
IX86_BUILTIN_PSADBW
);
def_builtin
(
MASK_SSE1
|
MASK_3DNOW_A
,
"__builtin_ia32_psadbw"
,
d
i_ftype_v8qi_v8qi
,
IX86_BUILTIN_PSADBW
);
def_builtin
(
MASK_SSE1
,
"__builtin_ia32_rcpps"
,
v4sf_ftype_v4sf
,
IX86_BUILTIN_RCPPS
);
def_builtin
(
MASK_SSE1
,
"__builtin_ia32_rcpss"
,
v4sf_ftype_v4sf
,
IX86_BUILTIN_RCPSS
);
...
...
@@ -11838,15 +11901,15 @@ ix86_init_mmx_sse_builtins ()
def_builtin
(
MASK_SSE2
,
"__builtin_ia32_storelpd"
,
void_ftype_pv2si_v2df
,
IX86_BUILTIN_STORELPD
);
def_builtin
(
MASK_SSE2
,
"__builtin_ia32_movmskpd"
,
int_ftype_v2df
,
IX86_BUILTIN_MOVMSKPD
);
def_builtin
(
MASK_SSE2
,
"__builtin_ia32_pmovmskb128"
,
int_ftype_v
8
qi
,
IX86_BUILTIN_PMOVMSKB128
);
def_builtin
(
MASK_SSE2
,
"__builtin_ia32_pmovmskb128"
,
int_ftype_v
16
qi
,
IX86_BUILTIN_PMOVMSKB128
);
def_builtin
(
MASK_SSE2
,
"__builtin_ia32_movnti"
,
void_ftype_pint_int
,
IX86_BUILTIN_MOVNTI
);
def_builtin
(
MASK_SSE2
,
"__builtin_ia32_movntpd"
,
void_ftype_pdouble_v2df
,
IX86_BUILTIN_MOVNTPD
);
def_builtin
(
MASK_SSE2
,
"__builtin_ia32_movntdq"
,
void_ftype_p
di_
di
,
IX86_BUILTIN_MOVNTDQ
);
def_builtin
(
MASK_SSE2
,
"__builtin_ia32_movntdq"
,
void_ftype_p
v2di_v2
di
,
IX86_BUILTIN_MOVNTDQ
);
def_builtin
(
MASK_SSE2
,
"__builtin_ia32_pshufd"
,
v4si_ftype_v4si_int
,
IX86_BUILTIN_PSHUFD
);
def_builtin
(
MASK_SSE2
,
"__builtin_ia32_pshuflw"
,
v8hi_ftype_v8hi_int
,
IX86_BUILTIN_PSHUFLW
);
def_builtin
(
MASK_SSE2
,
"__builtin_ia32_pshufhw"
,
v8hi_ftype_v8hi_int
,
IX86_BUILTIN_PSHUFHW
);
def_builtin
(
MASK_SSE2
,
"__builtin_ia32_psadbw128"
,
v
4hi_ftype_v8qi_v8
qi
,
IX86_BUILTIN_PSADBW128
);
def_builtin
(
MASK_SSE2
,
"__builtin_ia32_psadbw128"
,
v
2di_ftype_v16qi_v16
qi
,
IX86_BUILTIN_PSADBW128
);
def_builtin
(
MASK_SSE2
,
"__builtin_ia32_sqrtpd"
,
v2df_ftype_v2df
,
IX86_BUILTIN_SQRTPD
);
def_builtin
(
MASK_SSE2
,
"__builtin_ia32_sqrtsd"
,
v2df_ftype_v2df
,
IX86_BUILTIN_SQRTSD
);
...
...
@@ -11854,7 +11917,7 @@ ix86_init_mmx_sse_builtins ()
def_builtin
(
MASK_SSE2
,
"__builtin_ia32_shufpd"
,
v2df_ftype_v2df_v2df_int
,
IX86_BUILTIN_SHUFPD
);
def_builtin
(
MASK_SSE2
,
"__builtin_ia32_cvtdq2pd"
,
v2df_ftype_v4si
,
IX86_BUILTIN_CVTDQ2PD
);
def_builtin
(
MASK_SSE2
,
"__builtin_ia32_cvtdq2ps"
,
v4sf_ftype_v4si
,
IX86_BUILTIN_CVTDQ2P
D
);
def_builtin
(
MASK_SSE2
,
"__builtin_ia32_cvtdq2ps"
,
v4sf_ftype_v4si
,
IX86_BUILTIN_CVTDQ2P
S
);
def_builtin
(
MASK_SSE2
,
"__builtin_ia32_cvtpd2dq"
,
v4si_ftype_v2df
,
IX86_BUILTIN_CVTPD2DQ
);
def_builtin
(
MASK_SSE2
,
"__builtin_ia32_cvtpd2pi"
,
v2si_ftype_v2df
,
IX86_BUILTIN_CVTPD2PI
);
...
...
@@ -11886,6 +11949,30 @@ ix86_init_mmx_sse_builtins ()
def_builtin
(
MASK_SSE2
,
"__builtin_ia32_clflush"
,
void_ftype_pvoid
,
IX86_BUILTIN_CLFLUSH
);
def_builtin
(
MASK_SSE2
,
"__builtin_ia32_lfence"
,
void_ftype_void
,
IX86_BUILTIN_LFENCE
);
def_builtin
(
MASK_SSE2
,
"__builtin_ia32_mfence"
,
void_ftype_void
,
IX86_BUILTIN_MFENCE
);
def_builtin
(
MASK_SSE2
,
"__builtin_ia32_psllw128"
,
v8hi_ftype_v8hi_v2di
,
IX86_BUILTIN_PSLLW128
);
def_builtin
(
MASK_SSE2
,
"__builtin_ia32_pslld128"
,
v4si_ftype_v4si_v2di
,
IX86_BUILTIN_PSLLD128
);
def_builtin
(
MASK_SSE2
,
"__builtin_ia32_psllq128"
,
v2di_ftype_v2di_v2di
,
IX86_BUILTIN_PSLLQ128
);
def_builtin
(
MASK_SSE2
,
"__builtin_ia32_psrlw128"
,
v8hi_ftype_v8hi_v2di
,
IX86_BUILTIN_PSRLW128
);
def_builtin
(
MASK_SSE2
,
"__builtin_ia32_psrld128"
,
v4si_ftype_v4si_v2di
,
IX86_BUILTIN_PSRLD128
);
def_builtin
(
MASK_SSE2
,
"__builtin_ia32_psrlq128"
,
v2di_ftype_v2di_v2di
,
IX86_BUILTIN_PSRLQ128
);
def_builtin
(
MASK_SSE2
,
"__builtin_ia32_psraw128"
,
v8hi_ftype_v8hi_v2di
,
IX86_BUILTIN_PSRAW128
);
def_builtin
(
MASK_SSE2
,
"__builtin_ia32_psrad128"
,
v4si_ftype_v4si_v2di
,
IX86_BUILTIN_PSRAD128
);
def_builtin
(
MASK_SSE2
,
"__builtin_ia32_psllwi128"
,
v8hi_ftype_v8hi_int
,
IX86_BUILTIN_PSLLWI128
);
def_builtin
(
MASK_SSE2
,
"__builtin_ia32_pslldi128"
,
v4si_ftype_v4si_int
,
IX86_BUILTIN_PSLLDI128
);
def_builtin
(
MASK_SSE2
,
"__builtin_ia32_psllqi128"
,
v2di_ftype_v2di_int
,
IX86_BUILTIN_PSLLQI128
);
def_builtin
(
MASK_SSE2
,
"__builtin_ia32_psrlwi128"
,
v8hi_ftype_v8hi_int
,
IX86_BUILTIN_PSRLWI128
);
def_builtin
(
MASK_SSE2
,
"__builtin_ia32_psrldi128"
,
v4si_ftype_v4si_int
,
IX86_BUILTIN_PSRLDI128
);
def_builtin
(
MASK_SSE2
,
"__builtin_ia32_psrlqi128"
,
v2di_ftype_v2di_int
,
IX86_BUILTIN_PSRLQI128
);
def_builtin
(
MASK_SSE2
,
"__builtin_ia32_psrawi128"
,
v8hi_ftype_v8hi_int
,
IX86_BUILTIN_PSRAWI128
);
def_builtin
(
MASK_SSE2
,
"__builtin_ia32_psradi128"
,
v4si_ftype_v4si_int
,
IX86_BUILTIN_PSRADI128
);
def_builtin
(
MASK_SSE2
,
"__builtin_ia32_pmaddwd128"
,
v4si_ftype_v8hi_v8hi
,
IX86_BUILTIN_PMADDWD128
);
}
/* Errors in the source file can cause expand_expr to return const0_rtx
...
...
@@ -12681,7 +12768,7 @@ ix86_expand_builtin (exp, target, subtarget, mode, ignore)
case
IX86_BUILTIN_MOVNTPD
:
return
ix86_expand_store_builtin
(
CODE_FOR_sse2_movntv2df
,
arglist
);
case
IX86_BUILTIN_MOVNTDQ
:
return
ix86_expand_store_builtin
(
CODE_FOR_sse2_movnt
t
i
,
arglist
);
return
ix86_expand_store_builtin
(
CODE_FOR_sse2_movnt
v2d
i
,
arglist
);
case
IX86_BUILTIN_MOVNTI
:
return
ix86_expand_store_builtin
(
CODE_FOR_sse2_movntsi
,
arglist
);
...
...
gcc/config/i386/i386.md
View file @
916b60b7
...
...
@@ -104,6 +104,7 @@
;; 58 This is a `
sfence' operation.
;; 59 This is a
`mfence' operation.
;; 60 This is a `
lfence' operation.
;; 61 This is a
`psadbw' operation.
;; Insns whose names begin with "x86_" are emitted by gen_FOO calls
;; from i386.c.
...
...
@@ -18593,6 +18594,15 @@
[
(set_attr "type" "sselog")
(set_attr "mode" "TI")])
(define_insn "sse2_andv2di3"
[
(set (match_operand:V2DI 0 "register_operand" "=x")
(and:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0")
(match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
"TARGET_SSE2
&& (GET_CODE (operands
[
1
]
) != MEM || GET_CODE (operands
[
2
]
) != MEM)"
"pand
\t
{%2, %0|%0, %2}"
[
(set_attr "type" "sse")
]
)
(define_insn "
*
sse_nandti3_df"
[
(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0)
(and:TI (not:TI (subreg:TI (match_operand:DF 1 "register_operand" "0") 0))
...
...
@@ -18628,6 +18638,15 @@
"pandn
\t
{%2, %0|%0, %2}"
[
(set_attr "type" "sselog")
]
)
(define_insn "sse2_nandv2di3"
[
(set (match_operand:V2DI 0 "register_operand" "=x")
(and:V2DI (not:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0"))
(match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
"TARGET_SSE2
&& (GET_CODE (operands
[
1
]
) != MEM || GET_CODE (operands
[
2
]
) != MEM)"
"pandn
\t
{%2, %0|%0, %2}"
[
(set_attr "type" "sse")
]
)
(define_insn "
*
sse_iorti3_df_1"
[
(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0)
(ior:TI (subreg:TI (match_operand:DF 1 "register_operand" "%0") 0)
...
...
@@ -18684,6 +18703,15 @@
[
(set_attr "type" "sselog")
(set_attr "mode" "TI")])
(define_insn "sse2_iorv2di3"
[
(set (match_operand:V2DI 0 "register_operand" "=x")
(ior:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0")
(match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
"TARGET_SSE2
&& (GET_CODE (operands
[
1
]
) != MEM || GET_CODE (operands
[
2
]
) != MEM)"
"por
\t
{%2, %0|%0, %2}"
[
(set_attr "type" "sse")
]
)
(define_insn "
*
sse_xorti3_df_1"
[
(set (subreg:TI (match_operand:DF 0 "register_operand" "=Y") 0)
(xor:TI (subreg:TI (match_operand:DF 1 "register_operand" "%0") 0)
...
...
@@ -18740,6 +18768,15 @@
[
(set_attr "type" "sselog")
(set_attr "mode" "TI")])
(define_insn "sse2_xorv2di3"
[
(set (match_operand:V2DI 0 "register_operand" "=x")
(xor:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0")
(match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
"TARGET_SSE2
&& (GET_CODE (operands
[
1
]
) != MEM || GET_CODE (operands
[
2
]
) != MEM)"
"pxor
\t
{%2, %0|%0, %2}"
[
(set_attr "type" "sse")
]
)
;; Use xor, but don't show input operands so they aren't live before
;; this insn.
(define_insn "sse_clrv4sf"
...
...
@@ -19279,9 +19316,9 @@
(set_attr "mode" "DI")])
(define_insn "mmx_psadbw"
[
(set (match_operand:
V8Q
I 0 "register_operand" "=y")
(
abs:V8QI (minus:V8QI
(match_operand:V8QI 1 "register_operand" "0")
(match_operand:V8QI 2 "nonimmediate_operand" "ym"))
))]
[
(set (match_operand:
D
I 0 "register_operand" "=y")
(
unspec:DI
[
(match_operand:V8QI 1 "register_operand" "0")
(match_operand:V8QI 2 "nonimmediate_operand" "ym")] 61
))]
"TARGET_SSE || TARGET_3DNOW_A"
"psadbw
\t
{%2, %0|%0, %2}"
[
(set_attr "type" "mmxshft")
...
...
@@ -20250,8 +20287,8 @@
(define_insn "sse2_anddf3"
[
(set (match_operand:V2DF 0 "register_operand" "=x")
(subreg:V2DF (and:TI (subreg:TI (match_operand:
TI
1 "register_operand" "%0") 0)
(subreg:TI (match_operand:
TI
2 "nonimmediate_operand" "xm") 0)) 0))]
(subreg:V2DF (and:TI (subreg:TI (match_operand:
V2DF
1 "register_operand" "%0") 0)
(subreg:TI (match_operand:
V2DF
2 "nonimmediate_operand" "xm") 0)) 0))]
"TARGET_SSE2"
"andpd
\t
{%2, %0|%0, %2}"
[
(set_attr "type" "sselog")
...
...
@@ -20259,8 +20296,8 @@
(define_insn "sse2_nanddf3"
[
(set (match_operand:V2DF 0 "register_operand" "=x")
(subreg:V2DF (and:TI (not:TI (subreg:TI (match_operand:
TI
1 "register_operand" "0") 0))
(subreg:TI (match_operand:
TI
2 "nonimmediate_operand" "xm") 0)) 0))]
(subreg:V2DF (and:TI (not:TI (subreg:TI (match_operand:
V2DF
1 "register_operand" "0") 0))
(subreg:TI (match_operand:
V2DF
2 "nonimmediate_operand" "xm") 0)) 0))]
"TARGET_SSE2"
"andnpd
\t
{%2, %0|%0, %2}"
[
(set_attr "type" "sselog")
...
...
@@ -20268,8 +20305,8 @@
(define_insn "sse2_iordf3"
[
(set (match_operand:V2DF 0 "register_operand" "=x")
(subreg:V2DF (ior:TI (subreg:TI (match_operand:
TI
1 "register_operand" "%0") 0)
(subreg:TI (match_operand:
TI
2 "nonimmediate_operand" "xm") 0)) 0))]
(subreg:V2DF (ior:TI (subreg:TI (match_operand:
V2DF
1 "register_operand" "%0") 0)
(subreg:TI (match_operand:
V2DF
2 "nonimmediate_operand" "xm") 0)) 0))]
"TARGET_SSE2"
"orpd
\t
{%2, %0|%0, %2}"
[
(set_attr "type" "sselog")
...
...
@@ -20277,8 +20314,8 @@
(define_insn "sse2_xordf3"
[
(set (match_operand:V2DF 0 "register_operand" "=x")
(subreg:V2DF (xor:TI (subreg:TI (match_operand:
TI
1 "register_operand" "%0") 0)
(subreg:TI (match_operand:
TI
2 "nonimmediate_operand" "xm") 0)) 0))]
(subreg:V2DF (xor:TI (subreg:TI (match_operand:
V2DF
1 "register_operand" "%0") 0)
(subreg:TI (match_operand:
V2DF
2 "nonimmediate_operand" "xm") 0)) 0))]
"TARGET_SSE2"
"xorpd
\t
{%2, %0|%0, %2}"
[
(set_attr "type" "sselog")
...
...
@@ -20418,9 +20455,9 @@
[
(set_attr "type" "ssecvt")
(set_attr "mode" "V2DF")])
(define_insn "sse2_movnt
t
i"
[
(set (match_operand:
T
I 0 "memory_operand" "=m")
(unspec:
TI
[
(match_operand:T
I 1 "register_operand" "x")
]
34))]
(define_insn "sse2_movnt
v2d
i"
[
(set (match_operand:
V2D
I 0 "memory_operand" "=m")
(unspec:
V2DI
[
(match_operand:V2D
I 1 "register_operand" "x")
]
34))]
"TARGET_SSE2"
"movntdq
\t
{%1, %0|%0, %1}"
[
(set_attr "type" "ssecvt")
...
...
@@ -20467,7 +20504,7 @@
(define_insn "cvtdq2pd"
[
(set (match_operand:V2DF 0 "register_operand" "=x")
(float:V2DF (vec_select:V2SI
(match_operand:V
2
SI 1 "nonimmediate_operand" "xm")
(match_operand:V
4
SI 1 "nonimmediate_operand" "xm")
(parallel
[
(const_int 0)
(const_int 1)]))))]
...
...
@@ -20784,11 +20821,14 @@
[
(set_attr "type" "sseimul")
(set_attr "mode" "TI")])
;; See the MMX logical operations for the reason for the unspec
(define_insn "sse2_umulsidi3"
[
(set (match_operand:DI 0 "register_operand" "=y")
(unspec:DI
[
(mult:DI (zero_extend:DI (match_operand:DI 1 "register_operand" "0"))
(zero_extend:DI (match_operand:DI 2 "nonimmediate_operand" "ym")))] 45))]
(mult:DI (zero_extend:DI (vec_select:SI
(match_operand:V2SI 1 "register_operand" "0")
(parallel
[
(const_int 0)
]
)))
(zero_extend:DI (vec_select:SI
(match_operand:V2SI 2 "nonimmediate_operand" "ym")
(parallel
[
(const_int 0)
]
)))))]
"TARGET_SSE2"
"pmuludq
\t
{%2, %0|%0, %2}"
[
(set_attr "type" "sseimul")
...
...
@@ -20889,9 +20929,9 @@
;; @@@ this isn't the right representation.
(define_insn "sse2_psadbw"
[
(set (match_operand:V
16Q
I 0 "register_operand" "=x")
(
abs:V16QI (minus:V16QI
(match_operand:V16QI 1 "register_operand" "0")
(match_operand:V16QI 2 "nonimmediate_operand" "ym"))
))]
[
(set (match_operand:V
2D
I 0 "register_operand" "=x")
(
unspec:V2DI
[
(match_operand:V16QI 1 "register_operand" "0")
(match_operand:V16QI 2 "nonimmediate_operand" "ym")] 61
))]
"TARGET_SSE2"
"psadbw
\t
{%2, %0|%0, %2}"
[
(set_attr "type" "sseiadd")
...
...
@@ -21050,7 +21090,7 @@
(define_insn "ashrv8hi3"
[
(set (match_operand:V8HI 0 "register_operand" "=x")
(ashiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0")
(match_operand:
TI 2 "nonmemory_operand" "x
i")))]
(match_operand:
SI 2 "nonmemory_operand" "r
i")))]
"TARGET_SSE2"
"psraw
\t
{%2, %0|%0, %2}"
[
(set_attr "type" "sseishft")
...
...
@@ -21059,7 +21099,7 @@
(define_insn "ashrv4si3"
[
(set (match_operand:V4SI 0 "register_operand" "=x")
(ashiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0")
(match_operand:
TI 2 "nonmemory_operand" "x
i")))]
(match_operand:
SI 2 "nonmemory_operand" "r
i")))]
"TARGET_SSE2"
"psrad
\t
{%2, %0|%0, %2}"
[
(set_attr "type" "sseishft")
...
...
@@ -21068,7 +21108,7 @@
(define_insn "lshrv8hi3"
[
(set (match_operand:V8HI 0 "register_operand" "=x")
(lshiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0")
(match_operand:
TI 2 "nonmemory_operand" "x
i")))]
(match_operand:
SI 2 "nonmemory_operand" "r
i")))]
"TARGET_SSE2"
"psrlw
\t
{%2, %0|%0, %2}"
[
(set_attr "type" "sseishft")
...
...
@@ -21077,16 +21117,16 @@
(define_insn "lshrv4si3"
[
(set (match_operand:V4SI 0 "register_operand" "=x")
(lshiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0")
(match_operand:
TI 2 "nonmemory_operand" "x
i")))]
(match_operand:
SI 2 "nonmemory_operand" "r
i")))]
"TARGET_SSE2"
"psrld
\t
{%2, %0|%0, %2}"
[
(set_attr "type" "sseishft")
(set_attr "mode" "TI")])
(define_insn "
sse2_
lshrv2di3"
(define_insn "lshrv2di3"
[
(set (match_operand:V2DI 0 "register_operand" "=x")
(lshiftrt:V2DI (match_operand:V2DI 1 "register_operand" "0")
(match_operand:
TI 2 "nonmemory_operand" "x
i")))]
(match_operand:
SI 2 "nonmemory_operand" "r
i")))]
"TARGET_SSE2"
"psrlq
\t
{%2, %0|%0, %2}"
[
(set_attr "type" "sseishft")
...
...
@@ -21095,7 +21135,7 @@
(define_insn "ashlv8hi3"
[
(set (match_operand:V8HI 0 "register_operand" "=x")
(ashift:V8HI (match_operand:V8HI 1 "register_operand" "0")
(match_operand:
TI 2 "nonmemory_operand" "x
i")))]
(match_operand:
SI 2 "nonmemory_operand" "r
i")))]
"TARGET_SSE2"
"psllw
\t
{%2, %0|%0, %2}"
[
(set_attr "type" "sseishft")
...
...
@@ -21104,16 +21144,80 @@
(define_insn "ashlv4si3"
[
(set (match_operand:V4SI 0 "register_operand" "=x")
(ashift:V4SI (match_operand:V4SI 1 "register_operand" "0")
(match_operand:TI 2 "nonmemory_operand" "xi")))]
(match_operand:SI 2 "nonmemory_operand" "ri")))]
"TARGET_SSE2"
"pslld
\t
{%2, %0|%0, %2}"
[
(set_attr "type" "sse")
]
)
(define_insn "ashlv2di3"
[
(set (match_operand:V2DI 0 "register_operand" "=x")
(ashift:V2DI (match_operand:V2DI 1 "register_operand" "0")
(match_operand:SI 2 "nonmemory_operand" "ri")))]
"TARGET_SSE2"
"psllq
\t
{%2, %0|%0, %2}"
[
(set_attr "type" "sse")
]
)
(define_insn "ashrv8hi3_ti"
[
(set (match_operand:V8HI 0 "register_operand" "=x")
(ashiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0")
(subreg:TI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))]
"TARGET_SSE2"
"psraw
\t
{%2, %0|%0, %2}"
[
(set_attr "type" "sse")
]
)
(define_insn "ashrv4si3_ti"
[
(set (match_operand:V4SI 0 "register_operand" "=x")
(ashiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0")
(subreg:TI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))]
"TARGET_SSE2"
"psrad
\t
{%2, %0|%0, %2}"
[
(set_attr "type" "sse")
]
)
(define_insn "lshrv8hi3_ti"
[
(set (match_operand:V8HI 0 "register_operand" "=x")
(lshiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0")
(subreg:TI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))]
"TARGET_SSE2"
"psrlw
\t
{%2, %0|%0, %2}"
[
(set_attr "type" "sse")
]
)
(define_insn "lshrv4si3_ti"
[
(set (match_operand:V4SI 0 "register_operand" "=x")
(lshiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0")
(subreg:TI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))]
"TARGET_SSE2"
"psrld
\t
{%2, %0|%0, %2}"
[
(set_attr "type" "sse")
]
)
(define_insn "lshrv2di3_ti"
[
(set (match_operand:V2DI 0 "register_operand" "=x")
(lshiftrt:V2DI (match_operand:V2DI 1 "register_operand" "0")
(subreg:TI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))]
"TARGET_SSE2"
"psrlq
\t
{%2, %0|%0, %2}"
[
(set_attr "type" "sse")
]
)
(define_insn "ashlv8hi3_ti"
[
(set (match_operand:V8HI 0 "register_operand" "=x")
(ashift:V8HI (match_operand:V8HI 1 "register_operand" "0")
(subreg:TI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))]
"TARGET_SSE2"
"psllw
\t
{%2, %0|%0, %2}"
[
(set_attr "type" "sse")
]
)
(define_insn "ashlv4si3_ti"
[
(set (match_operand:V4SI 0 "register_operand" "=x")
(ashift:V4SI (match_operand:V4SI 1 "register_operand" "0")
(subreg:TI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0)))]
"TARGET_SSE2"
"pslld
\t
{%2, %0|%0, %2}"
[
(set_attr "type" "sseishft")
(set_attr "mode" "TI")])
(define_insn "
sse2_ashlv2di3
"
(define_insn "
ashlv2di3_ti
"
[
(set (match_operand:V2DI 0 "register_operand" "=x")
(ashift:V2DI (match_operand:V2DI 1 "register_operand" "0")
(
match_operand:TI 2 "nonmemory_operand" "xi"
)))]
(
subreg:TI (match_operand:V2DI 2 "nonmemory_operand" "xi") 0
)))]
"TARGET_SSE2"
"psllq
\t
{%2, %0|%0, %2}"
[
(set_attr "type" "sseishft")
...
...
gcc/config/i386/xmmintrin.h
View file @
916b60b7
...
...
@@ -1058,4 +1058,927 @@ do { \
(row3) = __builtin_ia32_shufps (__t2, __t3, 0xDD); \
} while (0)
/* SSE2 */
typedef
int
__v2df
__attribute__
((
mode
(
V2DF
)));
typedef
int
__v2di
__attribute__
((
mode
(
V2DI
)));
typedef
int
__v4si
__attribute__
((
mode
(
V4SI
)));
typedef
int
__v8hi
__attribute__
((
mode
(
V8HI
)));
typedef
int
__v16qi
__attribute__
((
mode
(
V16QI
)));
#define __m128i __m128
#define __m128d __v2df
static
__inline
__m128d
_mm_add_pd
(
__m128d
__A
,
__m128d
__B
)
{
return
(
__m128d
)
__builtin_ia32_addpd
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
__m128d
_mm_add_sd
(
__m128d
__A
,
__m128d
__B
)
{
return
(
__m128d
)
__builtin_ia32_addsd
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
__m128d
_mm_sub_pd
(
__m128d
__A
,
__m128d
__B
)
{
return
(
__m128d
)
__builtin_ia32_subpd
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
__m128d
_mm_sub_sd
(
__m128d
__A
,
__m128d
__B
)
{
return
(
__m128d
)
__builtin_ia32_subsd
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
__m128d
_mm_mul_pd
(
__m128d
__A
,
__m128d
__B
)
{
return
(
__m128d
)
__builtin_ia32_mulpd
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
__m128d
_mm_mul_sd
(
__m128d
__A
,
__m128d
__B
)
{
return
(
__m128d
)
__builtin_ia32_mulsd
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
__m128d
_mm_div_pd
(
__m128d
__A
,
__m128d
__B
)
{
return
(
__m128d
)
__builtin_ia32_divpd
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
__m128d
_mm_div_sd
(
__m128d
__A
,
__m128d
__B
)
{
return
(
__m128d
)
__builtin_ia32_divsd
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
__m128d
_mm_sqrt_pd
(
__m128d
__A
)
{
return
(
__m128d
)
__builtin_ia32_sqrtpd
((
__v2df
)
__A
);
}
static
__inline
__m128d
_mm_sqrt_sd
(
__m128d
__A
)
{
return
(
__m128d
)
__builtin_ia32_sqrtsd
((
__v2df
)
__A
);
}
static
__inline
__m128d
_mm_min_pd
(
__m128d
__A
,
__m128d
__B
)
{
return
(
__m128d
)
__builtin_ia32_minpd
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
__m128d
_mm_min_sd
(
__m128d
__A
,
__m128d
__B
)
{
return
(
__m128d
)
__builtin_ia32_minsd
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
__m128d
_mm_max_pd
(
__m128d
__A
,
__m128d
__B
)
{
return
(
__m128d
)
__builtin_ia32_maxpd
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
__m128d
_mm_max_sd
(
__m128d
__A
,
__m128d
__B
)
{
return
(
__m128d
)
__builtin_ia32_minsd
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
__m128d
_mm_and_pd
(
__m128d
__A
,
__m128d
__B
)
{
return
(
__m128d
)
__builtin_ia32_andpd
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
__m128d
_mm_andnot_pd
(
__m128d
__A
,
__m128d
__B
)
{
return
(
__m128d
)
__builtin_ia32_andnpd
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
__m128d
_mm_or_pd
(
__m128d
__A
,
__m128d
__B
)
{
return
(
__m128d
)
__builtin_ia32_orpd
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
__m128d
_mm_xor_pd
(
__m128d
__A
,
__m128d
__B
)
{
return
(
__m128d
)
__builtin_ia32_xorpd
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
__m128d
_mm_cmpeq_pd
(
__m128d
__A
,
__m128d
__B
)
{
return
(
__m128d
)
__builtin_ia32_cmpeqpd
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
__m128d
_mm_cmplt_pd
(
__m128d
__A
,
__m128d
__B
)
{
return
(
__m128d
)
__builtin_ia32_cmpltpd
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
__m128d
_mm_cmple_pd
(
__m128d
__A
,
__m128d
__B
)
{
return
(
__m128d
)
__builtin_ia32_cmplepd
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
__m128d
_mm_cmpgt_pd
(
__m128d
__A
,
__m128d
__B
)
{
return
(
__m128d
)
__builtin_ia32_cmpgtpd
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
__m128d
_mm_cmpge_pd
(
__m128d
__A
,
__m128d
__B
)
{
return
(
__m128d
)
__builtin_ia32_cmpgepd
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
__m128d
_mm_cmpneq_pd
(
__m128d
__A
,
__m128d
__B
)
{
return
(
__m128d
)
__builtin_ia32_cmpneqpd
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
__m128d
_mm_cmpnlt_pd
(
__m128d
__A
,
__m128d
__B
)
{
return
(
__m128d
)
__builtin_ia32_cmpnltpd
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
__m128d
_mm_cmpnle_pd
(
__m128d
__A
,
__m128d
__B
)
{
return
(
__m128d
)
__builtin_ia32_cmpnlepd
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
__m128d
_mm_cmpngt_pd
(
__m128d
__A
,
__m128d
__B
)
{
return
(
__m128d
)
__builtin_ia32_cmpngtpd
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
__m128d
_mm_cmpnge_pd
(
__m128d
__A
,
__m128d
__B
)
{
return
(
__m128d
)
__builtin_ia32_cmpngepd
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
__m128d
_mm_cmpord_pd
(
__m128d
__A
,
__m128d
__B
)
{
return
(
__m128d
)
__builtin_ia32_cmpordpd
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
__m128d
_mm_cmpunord_pd
(
__m128d
__A
,
__m128d
__B
)
{
return
(
__m128d
)
__builtin_ia32_cmpunordpd
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
__m128d
_mm_cmpeq_sd
(
__m128d
__A
,
__m128d
__B
)
{
return
(
__m128d
)
__builtin_ia32_cmpeqsd
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
__m128d
_mm_cmplt_sd
(
__m128d
__A
,
__m128d
__B
)
{
return
(
__m128d
)
__builtin_ia32_cmpltsd
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
__m128d
_mm_cmple_sd
(
__m128d
__A
,
__m128d
__B
)
{
return
(
__m128d
)
__builtin_ia32_cmplesd
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
__m128d
_mm_cmpgt_sd
(
__m128d
__A
,
__m128d
__B
)
{
return
(
__m128d
)
__builtin_ia32_cmpgtsd
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
__m128d
_mm_cmpge_sd
(
__m128d
__A
,
__m128d
__B
)
{
return
(
__m128d
)
__builtin_ia32_cmpgesd
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
__m128d
_mm_cmpneq_sd
(
__m128d
__A
,
__m128d
__B
)
{
return
(
__m128d
)
__builtin_ia32_cmpneqsd
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
__m128d
_mm_cmpnlt_sd
(
__m128d
__A
,
__m128d
__B
)
{
return
(
__m128d
)
__builtin_ia32_cmpnltsd
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
__m128d
_mm_cmpnle_sd
(
__m128d
__A
,
__m128d
__B
)
{
return
(
__m128d
)
__builtin_ia32_cmpnlesd
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
__m128d
_mm_cmpngt_sd
(
__m128d
__A
,
__m128d
__B
)
{
return
(
__m128d
)
__builtin_ia32_cmpngtsd
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
__m128d
_mm_cmpnge_sd
(
__m128d
__A
,
__m128d
__B
)
{
return
(
__m128d
)
__builtin_ia32_cmpngesd
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
__m128d
_mm_cmpord_sd
(
__m128d
__A
,
__m128d
__B
)
{
return
(
__m128d
)
__builtin_ia32_cmpordsd
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
__m128d
_mm_cmpunord_sd
(
__m128d
__A
,
__m128d
__B
)
{
return
(
__m128d
)
__builtin_ia32_cmpunordsd
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
int
_mm_comieq_sd
(
__m128d
__A
,
__m128d
__B
)
{
return
__builtin_ia32_comisdeq
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
int
_mm_comilt_sd
(
__m128d
__A
,
__m128d
__B
)
{
return
__builtin_ia32_comisdlt
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
int
_mm_comile_sd
(
__m128d
__A
,
__m128d
__B
)
{
return
__builtin_ia32_comisdle
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
int
_mm_comigt_sd
(
__m128d
__A
,
__m128d
__B
)
{
return
__builtin_ia32_comisdgt
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
int
_mm_comige_sd
(
__m128d
__A
,
__m128d
__B
)
{
return
__builtin_ia32_comisdge
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
int
_mm_comineq_sd
(
__m128d
__A
,
__m128d
__B
)
{
return
__builtin_ia32_comisdneq
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
int
_mm_ucomieq_sd
(
__m128d
__A
,
__m128d
__B
)
{
return
__builtin_ia32_ucomisdeq
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
int
_mm_ucomilt_sd
(
__m128d
__A
,
__m128d
__B
)
{
return
__builtin_ia32_ucomisdlt
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
int
_mm_ucomile_sd
(
__m128d
__A
,
__m128d
__B
)
{
return
__builtin_ia32_ucomisdle
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
int
_mm_ucomigt_sd
(
__m128d
__A
,
__m128d
__B
)
{
return
__builtin_ia32_ucomisdgt
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
int
_mm_ucomige_sd
(
__m128d
__A
,
__m128d
__B
)
{
return
__builtin_ia32_ucomisdge
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
int
_mm_ucomineq_sd
(
__m128d
__A
,
__m128d
__B
)
{
return
__builtin_ia32_ucomisdneq
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
__m128d
_mm_cvtepi32_pd
(
__m128i
__A
)
{
return
(
__m128d
)
__builtin_ia32_cvtdq2pd
((
__v4si
)
__A
);
}
static
__inline
__m128d
_mm_cvtepi32_ps
(
__m128i
__A
)
{
return
(
__m128d
)
__builtin_ia32_cvtdq2ps
((
__v4si
)
__A
);
}
static
__inline
__m128d
_mm_cvtpd_epi32
(
__m128d
__A
)
{
return
(
__m128d
)
__builtin_ia32_cvtpd2dq
((
__v2df
)
__A
);
}
static
__inline
__m64
_mm_cvtpd_pi32
(
__m128d
__A
)
{
return
(
__m64
)
__builtin_ia32_cvtpd2pi
((
__v2df
)
__A
);
}
static
__inline
__m128d
_mm_cvtpd_ps
(
__m128d
__A
)
{
return
(
__m128d
)
__builtin_ia32_cvtpd2ps
((
__v2df
)
__A
);
}
static
__inline
__m128d
_mm_cvttpd_epi32
(
__m128d
__A
)
{
return
(
__m128d
)
__builtin_ia32_cvttpd2dq
((
__v2df
)
__A
);
}
static
__inline
__m64
_mm_cvttpd_pi32
(
__m128d
__A
)
{
return
(
__m64
)
__builtin_ia32_cvttpd2pi
((
__v2df
)
__A
);
}
static
__inline
__m128d
_mm_cvtpi32_pd
(
__m64
__A
)
{
return
(
__m128d
)
__builtin_ia32_cvtpi2pd
((
__v2si
)
__A
);
}
static
__inline
__m128d
_mm_cvtps_epi32
(
__m128d
__A
)
{
return
(
__m128d
)
__builtin_ia32_cvtps2dq
((
__v4sf
)
__A
);
}
static
__inline
__m128d
_mm_cvttps_epi32
(
__m128d
__A
)
{
return
(
__m128d
)
__builtin_ia32_cvttps2dq
((
__v4sf
)
__A
);
}
static
__inline
__m128d
_mm_cvtps_pd
(
__m128d
__A
)
{
return
(
__m128d
)
__builtin_ia32_cvtps2pd
((
__v4sf
)
__A
);
}
static
__inline
int
_mm_cvtsd_si32
(
__m128d
__A
)
{
return
__builtin_ia32_cvtsd2si
((
__v2df
)
__A
);
}
static
__inline
int
_mm_cvttsd_si32
(
__m128d
__A
)
{
return
__builtin_ia32_cvttsd2si
((
__v2df
)
__A
);
}
static
__inline
__m128d
_mm_cvtsd_ss
(
__m128d
__A
,
__m128d
__B
)
{
return
(
__m128d
)
__builtin_ia32_cvtsd2ss
((
__v4sf
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
__m128d
_mm_cvtsi32_sd
(
__m128d
__A
,
int
__B
)
{
return
(
__m128d
)
__builtin_ia32_cvtsi2sd
((
__v2df
)
__A
,
__B
);
}
static
__inline
__m128d
_mm_cvtss_sd
(
__m128d
__A
,
__m128d
__B
)
{
return
(
__m128d
)
__builtin_ia32_cvtss2sd
((
__v2df
)
__A
,
(
__v4sf
)
__B
);
}
#define _mm_shuffle_pd(__A, __B, __C) ((__m128d)__builtin_ia32_shufpd ((__v2df)__A, (__v2df)__B, (C)))
static
__inline
__m128d
_mm_unpackhi_pd
(
__m128d
__A
,
__m128d
__B
)
{
return
(
__m128d
)
__builtin_ia32_unpckhpd
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
__m128d
_mm_unpacklo_pd
(
__m128d
__A
,
__m128d
__B
)
{
return
(
__m128d
)
__builtin_ia32_unpcklpd
((
__v2df
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
__m128d
_mm_loadh_pd
(
__m128d
__A
,
__m128d
*
__B
)
{
return
(
__m128d
)
__builtin_ia32_loadhpd
((
__v2df
)
__A
,
(
__v2si
*
)
__B
);
}
static
__inline
void
_mm_storeh_pd
(
__m128d
*
__A
,
__m128d
__B
)
{
__builtin_ia32_storehpd
((
__v2si
*
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
__m128d
_mm_loadl_pd
(
__m128d
__A
,
__m128d
*
__B
)
{
return
(
__m128d
)
__builtin_ia32_loadlpd
((
__v2df
)
__A
,
(
__v2si
*
)
__B
);
}
static
__inline
void
_mm_storel_pd
(
__m128d
*
__A
,
__m128d
__B
)
{
__builtin_ia32_storelpd
((
__v2si
*
)
__A
,
(
__v2df
)
__B
);
}
static
__inline
int
_mm_movemask_pd
(
__m128d
__A
)
{
return
__builtin_ia32_movmskpd
((
__v2df
)
__A
);
}
static
__inline
__m128i
_mm_packs_epi16
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_packsswb128
((
__v8hi
)
__A
,
(
__v8hi
)
__B
);
}
static
__inline
__m128i
_mm_packs_epi32
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_packssdw128
((
__v4si
)
__A
,
(
__v4si
)
__B
);
}
static
__inline
__m128i
_mm_packus_epi16
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_packuswb128
((
__v8hi
)
__A
,
(
__v8hi
)
__B
);
}
static
__inline
__m128i
_mm_unpackhi_epi8
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_punpckhbw128
((
__v16qi
)
__A
,
(
__v16qi
)
__B
);
}
static
__inline
__m128i
_mm_unpackhi_epi16
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_punpckhwd128
((
__v8hi
)
__A
,
(
__v8hi
)
__B
);
}
static
__inline
__m128i
_mm_unpackhi_epi32
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_punpckhdq128
((
__v4si
)
__A
,
(
__v4si
)
__B
);
}
static
__inline
__m128i
_mm_unpacklo_epi8
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_punpcklbw128
((
__v16qi
)
__A
,
(
__v16qi
)
__B
);
}
static
__inline
__m128i
_mm_unpacklo_epi16
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_punpcklwd128
((
__v8hi
)
__A
,
(
__v8hi
)
__B
);
}
static
__inline
__m128i
_mm_unpacklo_epi32
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_punpckldq128
((
__v4si
)
__A
,
(
__v4si
)
__B
);
}
static
__inline
__m128i
_mm_add_epi8
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_paddb128
((
__v16qi
)
__A
,
(
__v16qi
)
__B
);
}
static
__inline
__m128i
_mm_add_epi16
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_paddw128
((
__v8hi
)
__A
,
(
__v8hi
)
__B
);
}
static
__inline
__m128i
_mm_add_epi32
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_paddd128
((
__v4si
)
__A
,
(
__v4si
)
__B
);
}
static
__inline
__m128i
_mm_add_epi64
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_paddq128
((
__v4si
)
__A
,
(
__v4si
)
__B
);
}
static
__inline
__m128i
_mm_adds_epi8
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_paddsb128
((
__v16qi
)
__A
,
(
__v16qi
)
__B
);
}
static
__inline
__m128i
_mm_adds_epi16
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_paddsw128
((
__v8hi
)
__A
,
(
__v8hi
)
__B
);
}
static
__inline
__m128i
_mm_adds_epu8
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_paddusb128
((
__v16qi
)
__A
,
(
__v16qi
)
__B
);
}
static
__inline
__m128i
_mm_adds_epu16
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_paddusw128
((
__v8hi
)
__A
,
(
__v8hi
)
__B
);
}
static
__inline
__m128i
_mm_sub_epi8
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_psubb128
((
__v16qi
)
__A
,
(
__v16qi
)
__B
);
}
static
__inline
__m128i
_mm_sub_epi16
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_psubw128
((
__v8hi
)
__A
,
(
__v8hi
)
__B
);
}
static
__inline
__m128i
_mm_sub_epi32
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_psubd128
((
__v4si
)
__A
,
(
__v4si
)
__B
);
}
static
__inline
__m128i
_mm_sub_epi64
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_psubq128
((
__v4si
)
__A
,
(
__v4si
)
__B
);
}
static
__inline
__m128i
_mm_subs_epi8
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_psubsb128
((
__v16qi
)
__A
,
(
__v16qi
)
__B
);
}
static
__inline
__m128i
_mm_subs_epi16
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_psubsw128
((
__v8hi
)
__A
,
(
__v8hi
)
__B
);
}
static
__inline
__m128i
_mm_subs_epu8
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_psubusb128
((
__v16qi
)
__A
,
(
__v16qi
)
__B
);
}
static
__inline
__m128i
_mm_subs_epu16
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_psubusw128
((
__v8hi
)
__A
,
(
__v8hi
)
__B
);
}
static
__inline
__m128i
_mm_madd_epi16
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_pmaddwd128
((
__v8hi
)
__A
,
(
__v8hi
)
__B
);
}
static
__inline
__m128i
_mm_mulhi_epi16
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_pmulhw128
((
__v8hi
)
__A
,
(
__v8hi
)
__B
);
}
static
__inline
__m128i
_mm_mullo_epi16
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_pmullw128
((
__v8hi
)
__A
,
(
__v8hi
)
__B
);
}
static
__inline
__m64
_mm_mul_pu16
(
__m64
__A
,
__m64
__B
)
{
return
(
__m64
)
__builtin_ia32_pmuludq
((
__v2si
)
__A
,
(
__v2si
)
__B
);
}
static
__inline
__m128i
_mm_mul_epu16
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_pmuludq128
((
__v4si
)
__A
,
(
__v4si
)
__B
);
}
static
__inline
__m128i
_mm_sll_epi16
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_psllw128
((
__v8hi
)
__A
,
(
__v2di
)
__B
);
}
static
__inline
__m128i
_mm_sll_epi32
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_pslld128
((
__v4si
)
__A
,
(
__v2di
)
__B
);
}
static
__inline
__m128i
_mm_sll_epi64
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_psllq128
((
__v2di
)
__A
,
(
__v2di
)
__B
);
}
static
__inline
__m128i
_mm_sra_epi16
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_psraw128
((
__v8hi
)
__A
,
(
__v2di
)
__B
);
}
static
__inline
__m128i
_mm_sra_epi32
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_psrad128
((
__v4si
)
__A
,
(
__v2di
)
__B
);
}
static
__inline
__m128i
_mm_srl_epi16
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_psrlw128
((
__v8hi
)
__A
,
(
__v2di
)
__B
);
}
static
__inline
__m128i
_mm_srl_epi32
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_psrld128
((
__v4si
)
__A
,
(
__v2di
)
__B
);
}
static
__inline
__m128i
_mm_srl_epi64
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_psrlq128
((
__v2di
)
__A
,
(
__v2di
)
__B
);
}
static
__inline
__m128i
_mm_slli_epi16
(
__m128i
__A
,
int
__B
)
{
return
(
__m128i
)
__builtin_ia32_psllwi128
((
__v8hi
)
__A
,
__B
);
}
static
__inline
__m128i
_mm_slli_epi32
(
__m128i
__A
,
int
__B
)
{
return
(
__m128i
)
__builtin_ia32_pslldi128
((
__v4si
)
__A
,
__B
);
}
static
__inline
__m128i
_mm_slli_epi64
(
__m128i
__A
,
int
__B
)
{
return
(
__m128i
)
__builtin_ia32_psllqi128
((
__v2di
)
__A
,
__B
);
}
static
__inline
__m128i
_mm_srai_epi16
(
__m128i
__A
,
int
__B
)
{
return
(
__m128i
)
__builtin_ia32_psrawi128
((
__v8hi
)
__A
,
__B
);
}
static
__inline
__m128i
_mm_srai_epi32
(
__m128i
__A
,
int
__B
)
{
return
(
__m128i
)
__builtin_ia32_psradi128
((
__v4si
)
__A
,
__B
);
}
static
__inline
__m128i
_mm_srli_epi16
(
__m128i
__A
,
int
__B
)
{
return
(
__m128i
)
__builtin_ia32_psrlwi128
((
__v8hi
)
__A
,
__B
);
}
static
__inline
__m128i
_mm_srli_epi32
(
__m128i
__A
,
int
__B
)
{
return
(
__m128i
)
__builtin_ia32_psrldi128
((
__v4si
)
__A
,
__B
);
}
static
__inline
__m128i
_mm_srli_epi64
(
__m128i
__A
,
int
__B
)
{
return
(
__m128i
)
__builtin_ia32_psrlqi128
((
__v2di
)
__A
,
__B
);
}
static
__inline
__m128i
_mm_and_si128
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_pand128
((
__v2di
)
__A
,
(
__v2di
)
__B
);
}
static
__inline
__m128i
_mm_andnot_si128
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_pandn128
((
__v2di
)
__A
,
(
__v2di
)
__B
);
}
static
__inline
__m128i
_mm_or_si128
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_por128
((
__v2di
)
__A
,
(
__v2di
)
__B
);
}
static
__inline
__m128i
_mm_xor_si128
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_pxor128
((
__v2di
)
__A
,
(
__v2di
)
__B
);
}
static
__inline
__m128i
_mm_cmpeq_epi8
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_pcmpeqb128
((
__v16qi
)
__A
,
(
__v16qi
)
__B
);
}
static
__inline
__m128i
_mm_cmpeq_epi16
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_pcmpeqw128
((
__v8hi
)
__A
,
(
__v8hi
)
__B
);
}
static
__inline
__m128i
_mm_cmpeq_epi32
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_pcmpeqd128
((
__v4si
)
__A
,
(
__v4si
)
__B
);
}
static
__inline
__m128i
_mm_cmpgt_epi8
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_pcmpgtb128
((
__v16qi
)
__A
,
(
__v16qi
)
__B
);
}
static
__inline
__m128i
_mm_cmpgt_epi16
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_pcmpgtw128
((
__v8hi
)
__A
,
(
__v8hi
)
__B
);
}
static
__inline
__m128i
_mm_cmpgt_epi32
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_pcmpgtd128
((
__v4si
)
__A
,
(
__v4si
)
__B
);
}
#define _mm_extract_epi16(__A, __B) __builtin_ia32_pextrw128 ((__v8hi)__A, __B)
#define _mm_insert_epi16 (__A, __B, __C) ((__m128i)__builtin_ia32_pinsrw128 ((__v8hi)__A, __B, __C))
static
__inline
__m128i
_mm_max_epi16
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_pmaxsw128
((
__v8hi
)
__A
,
(
__v8hi
)
__B
);
}
static
__inline
__m128i
_mm_max_epu8
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_pmaxub128
((
__v16qi
)
__A
,
(
__v16qi
)
__B
);
}
static
__inline
__m128i
_mm_min_epi16
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_pminsw128
((
__v8hi
)
__A
,
(
__v8hi
)
__B
);
}
static
__inline
__m128i
_mm_min_epu8
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_pminub128
((
__v16qi
)
__A
,
(
__v16qi
)
__B
);
}
static
__inline
int
_mm_movemask_epi8
(
__m128i
__A
)
{
return
__builtin_ia32_pmovmskb128
((
__v16qi
)
__A
);
}
static
__inline
__m128i
_mm_mulhi_epu16
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_pmulhuw128
((
__v8hi
)
__A
,
(
__v8hi
)
__B
);
}
#define _mm_shufflehi_epi16(__A, __B) ((__m128i)__builtin_ia32_pshufhw128 ((__v8hi)__A, __B))
#define _mm_shufflelo_epi16(__A, __B) ((__m128i)__builtin_ia32_pshuflw128 ((__v8hi)__A, __B))
#define _mm_shuffle_epi32(__A, __B) ((__m128i)__builtin_ia32_pshufd ((__v4si)__A, __B))
static
__inline
void
_mm_maskmoveu_si128
(
__m128i
__A
,
__m128i
__B
,
char
*
__C
)
{
__builtin_ia32_maskmovdqu
((
__v16qi
)
__A
,
(
__v16qi
)
__B
,
__C
);
}
static
__inline
__m128i
_mm_avg_epu8
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_pavgb128
((
__v16qi
)
__A
,
(
__v16qi
)
__B
);
}
static
__inline
__m128i
_mm_avg_epu16
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_pavgw128
((
__v8hi
)
__A
,
(
__v8hi
)
__B
);
}
static
__inline
__m128i
_mm_sad_epu8
(
__m128i
__A
,
__m128i
__B
)
{
return
(
__m128i
)
__builtin_ia32_psadbw128
((
__v16qi
)
__A
,
(
__v16qi
)
__B
);
}
static
__inline
void
_mm_stream_si32
(
int
*
__A
,
int
__B
)
{
__builtin_ia32_movnti
(
__A
,
__B
);
}
static
__inline
void
_mm_stream_si128
(
__m128i
*
__A
,
__m128i
__B
)
{
__builtin_ia32_movntdq
((
__v2di
*
)
__A
,
(
__v2di
)
__B
);
}
static
__inline
void
_mm_stream_pd
(
__m128d
*
__A
,
__m128d
__B
)
{
__builtin_ia32_movntpd
(
__A
,
(
__v2df
)
__B
);
}
static
__inline
__m128i
_mm_movpi64_epi64
(
__m64
__A
)
{
return
(
__m128i
)
__builtin_ia32_movq2dq
((
unsigned
long
long
)
__A
);
}
static
__inline
void
_mm_clflush
(
void
*
__A
)
{
return
__builtin_ia32_clflush
(
__A
);
}
static
__inline
void
_mm_lfence
(
void
)
{
__builtin_ia32_lfence
();
}
static
__inline
void
_mm_mfence
(
void
)
{
__builtin_ia32_mfence
();
}
/* End of SSE2. */
#endif
/* _XMMINTRIN_H_INCLUDED */
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment