Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
R
riscv-gcc-1
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
lvzhengyang
riscv-gcc-1
Commits
5e04b3b6
Commit
5e04b3b6
authored
Nov 30, 2009
by
Richard Henderson
Committed by
Richard Henderson
Nov 30, 2009
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Implement vec_perm broadcast, and tidy lots of patterns to help.
From-SVN: r154836
parent
9fda11a2
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
623 additions
and
294 deletions
+623
-294
gcc/ChangeLog
+47
-0
gcc/config/i386/i386-protos.h
+1
-0
gcc/config/i386/i386.c
+218
-128
gcc/config/i386/predicates.md
+17
-0
gcc/config/i386/sse.md
+340
-166
No files found.
gcc/ChangeLog
View file @
5e04b3b6
2009
-
11
-
30
Richard
Henderson
<
rth
@
redhat
.
com
>
*
config
/
i386
/
i386
.
c
(
ix86_vec_interleave_v2df_operator_ok
):
New
.
(
bdesc_special_args
):
Update
insn
codes
.
(
avx_vpermilp_parallel
):
Correct
range
check
.
(
ix86_rtx_costs
):
Handle
vector
permutation
rtx
codes
.
(
struct
expand_vec_perm_d
):
Move
earlier
.
(
get_mode_wider_vector
):
New
.
(
expand_vec_perm_broadcast_1
):
New
.
(
ix86_expand_vector_init_duplicate
):
Use
it
.
Tidy
AVX
modes
.
(
expand_vec_perm_broadcast
):
New
.
(
ix86_expand_vec_perm_builtin_1
):
Use
it
.
*
config
/
i386
/
i386
-
protos
.
h
:
Update
.
*
config
/
i386
/
predicates
.
md
(
avx_vbroadcast_operand
):
New
.
*
config
/
i386
/
sse
.
md
(
AVX256MODE24P
):
New
.
(
ssescalarmodesuffix2s
):
New
.
(
avxhalfvecmode
,
avxscalarmode
):
Fill
out
to
all
modes
.
(
avxmodesuffixf2c
):
Add
V8SI
,
V4DI
.
(
vec_dupv4sf
):
New
expander
.
(*
vec_dupv4sf_avx
):
Add
vbroadcastss
alternative
.
(*
vec_set
<
mode
>
_0_avx
,
**
vec_set
<
mode
>
_0_sse4_1
):
Macro
-
ize
for
V4SF
and
V4SI
.
Move
C
alternatives
to
front
.
Add
insertps
and
pinsrd
alternatives
.
(*
vec_set
<
mode
>
_0_sse2
):
Split
out
from
...
(
vec_set
<
mode
>
_0
):
Macro
-
ize
for
V4SF
and
V4SI
.
(
vec_interleave_highv2df
,
vec_interleave_lowv2df
):
Require
register
destination
;
use
ix86_vec_interleave_v2df_operator_ok
,
instead
of
ix86_fixup_binary_operands
.
(*
avx_interleave_highv2df
,
avx_interleave_lowv2df
):
Add
movddup
.
(*
sse3_interleave_highv2df
,
sse3_interleave_lowv2df
):
New
.
(*
avx_movddup
,
*
sse3_movddup
):
Remove
.
New
splitter
from
vec_select
form
to
vec_duplicate
form
.
(*
sse2_interleave_highv2df
,
sse2_interleave_lowv2df
):
Use
ix86_vec_interleave_v2df_operator_ok
.
(
avx_movddup256
,
avx_unpcklpd256
):
Change
to
expanders
,
merge
into
...
(*
avx_unpcklpd256
):
...
here
.
(*
vec_dupv4si_avx
):
New
.
(*
vec_dupv2di_avx
):
Add
movddup
alternative
.
(*
vec_dupv2di_sse3
):
New
.
(
vec_dup
<
AVX256MODE24P
>):
Replace
avx_vbroadcasts
<
AVXMODEF4P
>
and
avx_vbroadcastss256
;
represent
with
vec_duplicate
instead
of
nested
vec_concat
operations
.
(
avx_vbroadcastf128_
<
mode
>):
Rename
from
avx_vbroadcastf128_p
<
avxmodesuffixf2c
>
256.
(*
avx_vperm_broadcast_v4sf
):
New
.
(*
avx_vperm_broadcast_
<
AVX256MODEF2P
>):
New
.
2009
-
11
-
30
Martin
Jambor
<
mjambor
@
suse
.
cz
>
PR
middle
-
end
/
42196
gcc/config/i386/i386-protos.h
View file @
5e04b3b6
...
...
@@ -86,6 +86,7 @@ extern void ix86_expand_binary_operator (enum rtx_code,
enum
machine_mode
,
rtx
[]);
extern
int
ix86_binary_operator_ok
(
enum
rtx_code
,
enum
machine_mode
,
rtx
[]);
extern
bool
ix86_lea_for_add_ok
(
enum
rtx_code
,
rtx
,
rtx
[]);
extern
bool
ix86_vec_interleave_v2df_operator_ok
(
rtx
operands
[
3
],
bool
high
);
extern
bool
ix86_dep_by_shift_count
(
const_rtx
set_insn
,
const_rtx
use_insn
);
extern
bool
ix86_agi_dependent
(
rtx
set_insn
,
rtx
use_insn
);
extern
void
ix86_expand_unary_operator
(
enum
rtx_code
,
enum
machine_mode
,
...
...
gcc/config/i386/i386.c
View file @
5e04b3b6
...
...
@@ -13849,6 +13849,19 @@ ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
return
TRUE
;
}
/* Return TRUE if the operands to a vec_interleave_{high,low}v2df
are ok, keeping in mind the possible movddup alternative. */
bool
ix86_vec_interleave_v2df_operator_ok
(
rtx
operands
[
3
],
bool
high
)
{
if
(
MEM_P
(
operands
[
0
]))
return
rtx_equal_p
(
operands
[
0
],
operands
[
1
+
high
]);
if
(
MEM_P
(
operands
[
1
])
&&
MEM_P
(
operands
[
2
]))
return
TARGET_SSE3
&&
rtx_equal_p
(
operands
[
1
],
operands
[
2
]);
return
true
;
}
/* Post-reload splitter for converting an SF or DFmode value in an
SSE register into an unsigned SImode. */
...
...
@@ -21480,11 +21493,11 @@ static const struct builtin_description bdesc_special_args[] =
{
OPTION_MASK_ISA_AVX
,
CODE_FOR_avx_vzeroall
,
"__builtin_ia32_vzeroall"
,
IX86_BUILTIN_VZEROALL
,
UNKNOWN
,
(
int
)
VOID_FTYPE_VOID
},
{
OPTION_MASK_ISA_AVX
,
CODE_FOR_avx_vzeroupper
,
"__builtin_ia32_vzeroupper"
,
IX86_BUILTIN_VZEROUPPER
,
UNKNOWN
,
(
int
)
VOID_FTYPE_VOID
},
{
OPTION_MASK_ISA_AVX
,
CODE_FOR_
avx_vbroadcastss
,
"__builtin_ia32_vbroadcastss"
,
IX86_BUILTIN_VBROADCASTSS
,
UNKNOWN
,
(
int
)
V4SF_FTYPE_PCFLOAT
},
{
OPTION_MASK_ISA_AVX
,
CODE_FOR_
avx_vbroadcastsd256
,
"__builtin_ia32_vbroadcastsd256"
,
IX86_BUILTIN_VBROADCASTSD256
,
UNKNOWN
,
(
int
)
V4DF_FTYPE_PCDOUBLE
},
{
OPTION_MASK_ISA_AVX
,
CODE_FOR_
avx_vbroadcastss256
,
"__builtin_ia32_vbroadcastss256"
,
IX86_BUILTIN_VBROADCASTSS256
,
UNKNOWN
,
(
int
)
V8SF_FTYPE_PCFLOAT
},
{
OPTION_MASK_ISA_AVX
,
CODE_FOR_avx_vbroadcastf128_
pd256
,
"__builtin_ia32_vbroadcastf128_pd256"
,
IX86_BUILTIN_VBROADCASTPD256
,
UNKNOWN
,
(
int
)
V4DF_FTYPE_PCV2DF
},
{
OPTION_MASK_ISA_AVX
,
CODE_FOR_avx_vbroadcastf128_
ps256
,
"__builtin_ia32_vbroadcastf128_ps256"
,
IX86_BUILTIN_VBROADCASTPS256
,
UNKNOWN
,
(
int
)
V8SF_FTYPE_PCV4SF
},
{
OPTION_MASK_ISA_AVX
,
CODE_FOR_
vec_dupv4sf
,
"__builtin_ia32_vbroadcastss"
,
IX86_BUILTIN_VBROADCASTSS
,
UNKNOWN
,
(
int
)
V4SF_FTYPE_PCFLOAT
},
{
OPTION_MASK_ISA_AVX
,
CODE_FOR_
vec_dupv4df
,
"__builtin_ia32_vbroadcastsd256"
,
IX86_BUILTIN_VBROADCASTSD256
,
UNKNOWN
,
(
int
)
V4DF_FTYPE_PCDOUBLE
},
{
OPTION_MASK_ISA_AVX
,
CODE_FOR_
vec_dupv8sf
,
"__builtin_ia32_vbroadcastss256"
,
IX86_BUILTIN_VBROADCASTSS256
,
UNKNOWN
,
(
int
)
V8SF_FTYPE_PCFLOAT
},
{
OPTION_MASK_ISA_AVX
,
CODE_FOR_avx_vbroadcastf128_
v4df
,
"__builtin_ia32_vbroadcastf128_pd256"
,
IX86_BUILTIN_VBROADCASTPD256
,
UNKNOWN
,
(
int
)
V4DF_FTYPE_PCV2DF
},
{
OPTION_MASK_ISA_AVX
,
CODE_FOR_avx_vbroadcastf128_
v8sf
,
"__builtin_ia32_vbroadcastf128_ps256"
,
IX86_BUILTIN_VBROADCASTPS256
,
UNKNOWN
,
(
int
)
V8SF_FTYPE_PCV4SF
},
{
OPTION_MASK_ISA_AVX
,
CODE_FOR_avx_movupd256
,
"__builtin_ia32_loadupd256"
,
IX86_BUILTIN_LOADUPD256
,
UNKNOWN
,
(
int
)
V4DF_FTYPE_PCDOUBLE
},
{
OPTION_MASK_ISA_AVX
,
CODE_FOR_avx_movups256
,
"__builtin_ia32_loadups256"
,
IX86_BUILTIN_LOADUPS256
,
UNKNOWN
,
(
int
)
V8SF_FTYPE_PCFLOAT
},
...
...
@@ -24597,7 +24610,7 @@ avx_vpermilp_parallel (rtx par, enum machine_mode mode)
if
(
!
CONST_INT_P
(
er
))
return
0
;
ei
=
INTVAL
(
er
);
if
(
ei
>=
nelt
)
if
(
ei
>=
2
*
nelt
)
return
0
;
ipar
[
i
]
=
ei
;
}
...
...
@@ -25713,6 +25726,16 @@ ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
*
total
=
0
;
return
false
;
case
VEC_SELECT
:
case
VEC_CONCAT
:
case
VEC_MERGE
:
case
VEC_DUPLICATE
:
/* ??? Assume all of these vector manipulation patterns are
recognizable. In which case they all pretty much have the
same cost. */
*
total
=
COSTS_N_INSNS
(
1
);
return
true
;
default
:
return
false
;
}
...
...
@@ -26547,16 +26570,43 @@ x86_emit_floatuns (rtx operands[2])
emit_label
(
donelab
);
}
/* AVX does not support 32-byte integer vector operations,
thus the longest vector we are faced with is V16QImode. */
#define MAX_VECT_LEN 16
struct
expand_vec_perm_d
{
rtx
target
,
op0
,
op1
;
unsigned
char
perm
[
MAX_VECT_LEN
];
enum
machine_mode
vmode
;
unsigned
char
nelt
;
bool
testing_p
;
};
static
bool
expand_vec_perm_1
(
struct
expand_vec_perm_d
*
d
);
static
bool
expand_vec_perm_broadcast_1
(
struct
expand_vec_perm_d
*
d
);
/* Get a vector mode of the same size as the original but with elements
twice as wide. This is only guaranteed to apply to integral vectors. */
static
inline
enum
machine_mode
get_mode_wider_vector
(
enum
machine_mode
o
)
{
/* ??? Rely on the ordering that genmodes.c gives to vectors. */
enum
machine_mode
n
=
GET_MODE_WIDER_MODE
(
o
);
gcc_assert
(
GET_MODE_NUNITS
(
o
)
==
GET_MODE_NUNITS
(
n
)
*
2
);
gcc_assert
(
GET_MODE_SIZE
(
o
)
==
GET_MODE_SIZE
(
n
));
return
n
;
}
/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
with all elements equal to VAR. Return true if successful. */
/* ??? Call into the vec_perm support to implement the broadcast. */
static
bool
ix86_expand_vector_init_duplicate
(
bool
mmx_ok
,
enum
machine_mode
mode
,
rtx
target
,
rtx
val
)
{
enum
machine_mode
hmode
,
smode
,
wsmode
,
wvmode
;
rtx
x
;
bool
ok
;
switch
(
mode
)
{
...
...
@@ -26566,13 +26616,28 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
return
false
;
/* FALLTHRU */
case
V4DFmode
:
case
V4DImode
:
case
V8SFmode
:
case
V8SImode
:
case
V2DFmode
:
case
V2DImode
:
case
V4SFmode
:
case
V4SImode
:
val
=
force_reg
(
GET_MODE_INNER
(
mode
),
val
);
x
=
gen_rtx_VEC_DUPLICATE
(
mode
,
val
);
emit_insn
(
gen_rtx_SET
(
VOIDmode
,
target
,
x
));
{
rtx
insn
,
dup
;
/* First attempt to recognize VAL as-is. */
dup
=
gen_rtx_VEC_DUPLICATE
(
mode
,
val
);
insn
=
emit_insn
(
gen_rtx_SET
(
VOIDmode
,
target
,
dup
));
if
(
recog_memoized
(
insn
)
<
0
)
{
/* If that fails, force VAL into a register. */
XEXP
(
dup
,
0
)
=
force_reg
(
GET_MODE_INNER
(
mode
),
val
);
ok
=
recog_memoized
(
insn
)
>=
0
;
gcc_assert
(
ok
);
}
}
return
true
;
case
V4HImode
:
...
...
@@ -26580,130 +26645,87 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
return
false
;
if
(
TARGET_SSE
||
TARGET_3DNOW_A
)
{
rtx
x
;
val
=
gen_lowpart
(
SImode
,
val
);
x
=
gen_rtx_TRUNCATE
(
HImode
,
val
);
x
=
gen_rtx_VEC_DUPLICATE
(
mode
,
x
);
emit_insn
(
gen_rtx_SET
(
VOIDmode
,
target
,
x
));
return
true
;
}
else
{
smode
=
HImode
;
wsmode
=
SImode
;
wvmode
=
V2SImode
;
goto
widen
;
}
goto
widen
;
case
V8QImode
:
if
(
!
mmx_ok
)
return
false
;
smode
=
QImode
;
wsmode
=
HImode
;
wvmode
=
V4HImode
;
goto
widen
;
case
V8HImode
:
if
(
TARGET_SSE2
)
{
struct
expand_vec_perm_d
dperm
;
rtx
tmp1
,
tmp2
;
/* Extend HImode to SImode using a paradoxical SUBREG. */
permute:
memset
(
&
dperm
,
0
,
sizeof
(
dperm
));
dperm
.
target
=
target
;
dperm
.
vmode
=
mode
;
dperm
.
nelt
=
GET_MODE_NUNITS
(
mode
);
dperm
.
op0
=
dperm
.
op1
=
gen_reg_rtx
(
mode
);
/* Extend to SImode using a paradoxical SUBREG. */
tmp1
=
gen_reg_rtx
(
SImode
);
emit_move_insn
(
tmp1
,
gen_lowpart
(
SImode
,
val
));
/* Insert the SImode value as low element of V4SImode vector. */
tmp2
=
gen_reg_rtx
(
V4SImode
);
tmp1
=
gen_rtx_VEC_MERGE
(
V4SImode
,
gen_rtx_VEC_DUPLICATE
(
V4SImode
,
tmp1
),
CONST0_RTX
(
V4SImode
),
const1_rtx
);
emit_insn
(
gen_rtx_SET
(
VOIDmode
,
tmp2
,
tmp1
));
/* Cast the V4SImode vector back to a V8HImode vector. */
tmp1
=
gen_reg_rtx
(
V8HImode
);
emit_move_insn
(
tmp1
,
gen_lowpart
(
V8HImode
,
tmp2
));
/* Duplicate the low short through the whole low SImode word. */
emit_insn
(
gen_vec_interleave_lowv8hi
(
tmp1
,
tmp1
,
tmp1
));
/* Cast the V8HImode vector back to a V4SImode vector. */
tmp2
=
gen_reg_rtx
(
V4SImode
);
emit_move_insn
(
tmp2
,
gen_lowpart
(
V4SImode
,
tmp1
));
/* Replicate the low element of the V4SImode vector. */
emit_insn
(
gen_sse2_pshufd
(
tmp2
,
tmp2
,
const0_rtx
));
/* Cast the V2SImode back to V8HImode, and store in target. */
emit_move_insn
(
target
,
gen_lowpart
(
V8HImode
,
tmp2
));
return
true
;
/* Insert the SImode value as low element of a V4SImode vector. */
tmp2
=
gen_lowpart
(
V4SImode
,
dperm
.
op0
);
emit_insn
(
gen_vec_setv4si_0
(
tmp2
,
CONST0_RTX
(
V4SImode
),
tmp1
));
ok
=
(
expand_vec_perm_1
(
&
dperm
)
||
expand_vec_perm_broadcast_1
(
&
dperm
));
gcc_assert
(
ok
);
return
ok
;
}
smode
=
HImode
;
wsmode
=
SImode
;
wvmode
=
V4SImode
;
goto
widen
;
case
V16QImode
:
if
(
TARGET_SSE2
)
{
rtx
tmp1
,
tmp2
;
/* Extend QImode to SImode using a paradoxical SUBREG. */
tmp1
=
gen_reg_rtx
(
SImode
);
emit_move_insn
(
tmp1
,
gen_lowpart
(
SImode
,
val
));
/* Insert the SImode value as low element of V4SImode vector. */
tmp2
=
gen_reg_rtx
(
V4SImode
);
tmp1
=
gen_rtx_VEC_MERGE
(
V4SImode
,
gen_rtx_VEC_DUPLICATE
(
V4SImode
,
tmp1
),
CONST0_RTX
(
V4SImode
),
const1_rtx
);
emit_insn
(
gen_rtx_SET
(
VOIDmode
,
tmp2
,
tmp1
));
/* Cast the V4SImode vector back to a V16QImode vector. */
tmp1
=
gen_reg_rtx
(
V16QImode
);
emit_move_insn
(
tmp1
,
gen_lowpart
(
V16QImode
,
tmp2
));
/* Duplicate the low byte through the whole low SImode word. */
emit_insn
(
gen_vec_interleave_lowv16qi
(
tmp1
,
tmp1
,
tmp1
));
emit_insn
(
gen_vec_interleave_lowv16qi
(
tmp1
,
tmp1
,
tmp1
));
/* Cast the V16QImode vector back to a V4SImode vector. */
tmp2
=
gen_reg_rtx
(
V4SImode
);
emit_move_insn
(
tmp2
,
gen_lowpart
(
V4SImode
,
tmp1
));
/* Replicate the low element of the V4SImode vector. */
emit_insn
(
gen_sse2_pshufd
(
tmp2
,
tmp2
,
const0_rtx
));
/* Cast the V2SImode back to V16QImode, and store in target. */
emit_move_insn
(
target
,
gen_lowpart
(
V16QImode
,
tmp2
));
return
true
;
}
smode
=
QImode
;
wsmode
=
HImode
;
wvmode
=
V8HImode
;
goto
permute
;
goto
widen
;
widen:
/* Replicate the value once into the next wider mode and recurse. */
val
=
convert_modes
(
wsmode
,
smode
,
val
,
true
);
x
=
expand_simple_binop
(
wsmode
,
ASHIFT
,
val
,
GEN_INT
(
GET_MODE_BITSIZE
(
smode
)),
NULL_RTX
,
1
,
OPTAB_LIB_WIDEN
);
val
=
expand_simple_binop
(
wsmode
,
IOR
,
val
,
x
,
x
,
1
,
OPTAB_LIB_WIDEN
);
x
=
gen_reg_rtx
(
wvmode
);
if
(
!
ix86_expand_vector_init_duplicate
(
mmx_ok
,
wvmode
,
x
,
val
))
gcc_unreachable
();
emit_move_insn
(
target
,
gen_lowpart
(
mode
,
x
));
return
true
;
{
enum
machine_mode
smode
,
wsmode
,
wvmode
;
rtx
x
;
smode
=
GET_MODE_INNER
(
mode
);
wvmode
=
get_mode_wider_vector
(
mode
);
wsmode
=
GET_MODE_INNER
(
wvmode
);
val
=
convert_modes
(
wsmode
,
smode
,
val
,
true
);
x
=
expand_simple_binop
(
wsmode
,
ASHIFT
,
val
,
GEN_INT
(
GET_MODE_BITSIZE
(
smode
)),
NULL_RTX
,
1
,
OPTAB_LIB_WIDEN
);
val
=
expand_simple_binop
(
wsmode
,
IOR
,
val
,
x
,
x
,
1
,
OPTAB_LIB_WIDEN
);
x
=
gen_lowpart
(
wvmode
,
target
);
ok
=
ix86_expand_vector_init_duplicate
(
mmx_ok
,
wvmode
,
x
,
val
);
gcc_assert
(
ok
);
return
ok
;
}
case
V4DFmode
:
hmode
=
V2DFmode
;
goto
half
;
case
V4DImode
:
hmode
=
V2DImode
;
goto
half
;
case
V8SFmode
:
hmode
=
V4SFmode
;
goto
half
;
case
V8SImode
:
hmode
=
V4SImode
;
goto
half
;
case
V16HImode
:
hmode
=
V8HImode
;
goto
half
;
case
V32QImode
:
hmode
=
V16QImode
;
goto
half
;
half
:
{
rtx
tmp
=
gen_reg_rtx
(
hmode
);
ix86_expand_vector_init_duplicate
(
mmx_ok
,
hmode
,
tmp
,
val
);
emit_insn
(
gen_rtx_SET
(
VOIDmode
,
target
,
gen_rtx_VEC_CONCAT
(
mode
,
tmp
,
tmp
)));
enum
machine_mode
hvmode
=
(
mode
==
V16HImode
?
V8HImode
:
V16QImode
);
rtx
x
=
gen_reg_rtx
(
hvmode
);
ok
=
ix86_expand_vector_init_duplicate
(
false
,
hvmode
,
x
,
val
);
gcc_assert
(
ok
);
x
=
gen_rtx_VEC_CONCAT
(
mode
,
x
,
x
);
emit_insn
(
gen_rtx_SET
(
VOIDmode
,
target
,
x
));
}
return
true
;
...
...
@@ -29085,19 +29107,6 @@ ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
return
ix86_builtins
[(
int
)
fcode
];
}
/* AVX does not support 32-byte integer vector operations,
thus the longest vector we are faced with is V16QImode. */
#define MAX_VECT_LEN 16
struct
expand_vec_perm_d
{
rtx
target
,
op0
,
op1
;
unsigned
char
perm
[
MAX_VECT_LEN
];
enum
machine_mode
vmode
;
unsigned
char
nelt
;
bool
testing_p
;
};
/* Return a vector mode with twice as many elements as VMODE. */
/* ??? Consider moving this to a table generated by genmodes.c. */
...
...
@@ -29739,8 +29748,8 @@ expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
return
true
;
}
/* A subroutine of ix86_expand_vec_perm_builtin_1.
Pattern match
extract-even
and extract-odd permutations. */
/* A subroutine of ix86_expand_vec_perm_builtin_1.
Implement extract-even
and extract-odd permutations. */
static
bool
expand_vec_perm_even_odd_1
(
struct
expand_vec_perm_d
*
d
,
unsigned
odd
)
...
...
@@ -29855,6 +29864,9 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
return
true
;
}
/* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
extract-even and extract-odd permutations. */
static
bool
expand_vec_perm_even_odd
(
struct
expand_vec_perm_d
*
d
)
{
...
...
@@ -29871,6 +29883,84 @@ expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
return
expand_vec_perm_even_odd_1
(
d
,
odd
);
}
/* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
permutations. We assume that expand_vec_perm_1 has already failed. */
static
bool
expand_vec_perm_broadcast_1
(
struct
expand_vec_perm_d
*
d
)
{
unsigned
elt
=
d
->
perm
[
0
],
nelt2
=
d
->
nelt
/
2
;
enum
machine_mode
vmode
=
d
->
vmode
;
unsigned
char
perm2
[
4
];
rtx
op0
=
d
->
op0
;
bool
ok
;
switch
(
vmode
)
{
case
V4DFmode
:
case
V8SFmode
:
/* These are special-cased in sse.md so that we can optionally
use the vbroadcast instruction. They expand to two insns
if the input happens to be in a register. */
gcc_unreachable
();
case
V2DFmode
:
case
V2DImode
:
case
V4SFmode
:
case
V4SImode
:
/* These are always implementable using standard shuffle patterns. */
gcc_unreachable
();
case
V8HImode
:
case
V16QImode
:
/* These can be implemented via interleave. We save one insn by
stopping once we have promoted to V4SImode and then use pshufd. */
do
{
optab
otab
=
vec_interleave_low_optab
;
if
(
elt
>=
nelt2
)
{
otab
=
vec_interleave_high_optab
;
elt
-=
nelt2
;
}
nelt2
/=
2
;
op0
=
expand_binop
(
vmode
,
otab
,
op0
,
op0
,
NULL
,
0
,
OPTAB_DIRECT
);
vmode
=
get_mode_wider_vector
(
vmode
);
op0
=
gen_lowpart
(
vmode
,
op0
);
}
while
(
vmode
!=
V4SImode
);
memset
(
perm2
,
elt
,
4
);
ok
=
expand_vselect
(
gen_lowpart
(
V4SImode
,
d
->
target
),
op0
,
perm2
,
4
);
gcc_assert
(
ok
);
return
true
;
default:
gcc_unreachable
();
}
}
/* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
broadcast permutations. */
static
bool
expand_vec_perm_broadcast
(
struct
expand_vec_perm_d
*
d
)
{
unsigned
i
,
elt
,
nelt
=
d
->
nelt
;
if
(
d
->
op0
!=
d
->
op1
)
return
false
;
elt
=
d
->
perm
[
0
];
for
(
i
=
1
;
i
<
nelt
;
++
i
)
if
(
d
->
perm
[
i
]
!=
elt
)
return
false
;
return
expand_vec_perm_broadcast_1
(
d
);
}
/* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
With all of the interface bits taken care of, perform the expansion
in D and return true on success. */
...
...
@@ -29878,8 +29968,7 @@ expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
static
bool
ix86_expand_vec_perm_builtin_1
(
struct
expand_vec_perm_d
*
d
)
{
/* First things first -- check if the instruction is implementable
with a single instruction. */
/* Try a single instruction expansion. */
if
(
expand_vec_perm_1
(
d
))
return
true
;
...
...
@@ -29894,13 +29983,16 @@ ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
if
(
expand_vec_perm_interleave2
(
d
))
return
true
;
if
(
expand_vec_perm_broadcast
(
d
))
return
true
;
/* Try sequences of three instructions. */
if
(
expand_vec_perm_pshufb2
(
d
))
return
true
;
/* ??? Look for narrow permutations whose element orderings would
allow the prom
i
tion to a wider mode. */
allow the prom
o
tion to a wider mode. */
/* ??? Look for sequences of interleave or a wider permute that place
the data into the correct lanes for a half-vector shuffle like
...
...
@@ -29912,8 +30004,6 @@ ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
if
(
expand_vec_perm_even_odd
(
d
))
return
true
;
/* ??? Pattern match broadcast. */
return
false
;
}
...
...
gcc/config/i386/predicates.md
View file @
5e04b3b6
...
...
@@ -1241,3 +1241,20 @@
(define_predicate "avx_vperm2f128_v4df_operand"
(and (match_code "parallel")
(match_test "avx_vperm2f128_parallel (op, V4DFmode)")))
;; Return 1 if OP is a parallel for a vbroadcast permute.
(define_predicate "avx_vbroadcast_operand"
(and (match_code "parallel")
(match_code "const_int" "a"))
{
rtx elt = XVECEXP (op, 0, 0);
int i, nelt = XVECLEN (op, 0);
/
*
Don't bother checking there are the right number of operands,
merely that they're all identical.
*
/
for (i = 1; i < nelt; ++i)
if (XVECEXP (op, 0, i) != elt)
return false;
return true;
})
gcc/config/i386/sse.md
View file @
5e04b3b6
...
...
@@ -54,6 +54,7 @@
(define_mode_iterator AVX256MODEF2P
[
V8SF V4DF
]
)
(define_mode_iterator AVX256MODE2P
[
V8SI V8SF V4DF
]
)
(define_mode_iterator AVX256MODE24P
[
V8SI V8SF V4DI V4DF
]
)
(define_mode_iterator AVX256MODE4P
[
V4DI V4DF
]
)
(define_mode_iterator AVX256MODE8P
[
V8SI V8SF
]
)
(define_mode_iterator AVXMODEF2P
[
V4SF V2DF V8SF V4DF
]
)
...
...
@@ -96,6 +97,8 @@
(define_mode_attr ssemodesuffixf2c
[
(V4SF "s") (V2DF "d")
]
)
(define_mode_attr ssescalarmodesuffix2s
[
(V4SF "ss") (V4SI "d")
]
)
;; Mapping of the max integer size for xop rotate immediate constraint
(define_mode_attr sserotatemax
[
(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")
]
)
...
...
@@ -125,17 +128,18 @@
[
(V16QI "V4SF") (V8HI "V4SF") (V4SI "V4SF") (V2DI "V4SF")
(V32QI "V8SF") (V16HI "V8SF") (V8SI "V8SF") (V4DI "V8SF")])
(define_mode_attr avxhalfvecmode
[
(V4SF "V2SF") (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI")
(V4DI "V2DI") (V8SF "V4SF") (V4DF "V2DF")])
[
(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
(V8SF "V4SF") (V4DF "V2DF")
(V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI") (V4SF "V2SF")])
(define_mode_attr avxscalarmode
[
(V16QI "QI") (V8HI
"HI") (V4SI "S
I") (V4SF "SF") (V2DF "DF")
(V8SF "SF") (V4DF "DF")])
[
(V16QI "QI") (V8HI
"HI") (V4SI "SI") (V2DI "D
I") (V4SF "SF") (V2DF "DF")
(V
32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI") (V
8SF "SF") (V4DF "DF")])
(define_mode_attr avxcvtvecmode
[
(V4SF "V4SI") (V8SF "V8SI") (V4SI "V4SF") (V8SI "V8SF")
]
)
(define_mode_attr avxpermvecmode
[
(V2DF "V2DI") (V4SF "V4SI") (V4DF "V4DI") (V8SF "V8SI")
]
)
(define_mode_attr avxmodesuffixf2c
[
(V4SF "s") (V2DF "d") (V8S
F "s
") (V4DF "d")
]
)
[
(V4SF "s") (V2DF "d") (V8S
I "s") (V8SF "s") (V4DI "d
") (V4DF "d")
]
)
(define_mode_attr avxmodesuffixp
[
(V2DF "pd") (V4SI "si") (V4SF "ps") (V8SF "ps") (V8SI "si")
(V4DF "pd")])
...
...
@@ -4012,14 +4016,27 @@
[
(set_attr "type" "ssemov")
(set_attr "mode" "SF")])
(define_expand "vec_dupv4sf"
[
(set (match_operand:V4SF 0 "register_operand" "")
(vec_duplicate:V4SF
(match_operand:SF 1 "nonimmediate_operand" "")))]
"TARGET_SSE"
{
if (!TARGET_AVX)
operands
[
1
]
= force_reg (V4SFmode, operands
[
1
]
);
})
(define_insn "
*
vec_dupv4sf_avx"
[
(set (match_operand:V4SF 0 "register_operand" "=x")
[
(set (match_operand:V4SF 0 "register_operand" "=x
,x
")
(vec_duplicate:V4SF
(match_operand:SF 1 "
register_operand" "x
")))]
(match_operand:SF 1 "
nonimmediate_operand" "x,m
")))]
"TARGET_AVX"
"vshufps
\t
{$0, %1, %1, %0|%0, %1, %1, 0}"
[
(set_attr "type" "sselog1")
(set_attr "length_immediate" "1")
"@
vshufps
\t
{$0, %1, %1, %0|%0, %1, %1, 0}
vbroadcastss
\t
{%1, %0|%0, %1}"
[
(set_attr "type" "sselog1,ssemov")
(set_attr "length_immediate" "1,0")
(set_attr "prefix_extra" "0,1")
(set_attr "prefix" "vex")
(set_attr "mode" "V4SF")])
...
...
@@ -4125,35 +4142,78 @@
DONE;
})
(define_insn "
*
vec_setv4sf_0_avx"
[
(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,m")
(vec_merge:V4SF
(vec_duplicate:V4SF
(match_operand:SF 2 "general_operand" " x,m,
*r,x*
rfF"))
(match_operand:V4SF 1 "vector_move_operand" " x,C,C ,0")
(define_insn "
*
vec_set
<mode>
_0_
avx"
[
(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x, x,x, x,m")
(vec_merge:SSEMODE4S
(vec_duplicate:SSEMODE4S
(match_operand:
<ssescalarmode>
2
"general_operand" " x,m,
*r,x,*
rm,x
*
rfF"))
(match_operand:SSEMODE4S 1 "vector_move_operand" " C,C, C,x, x,0")
(const_int 1)))]
"TARGET_AVX"
"@
v
movss
\t
{%2, %1, %0|%0, %1, %2
}
vmov
ss
\t
{%2, %0|%0, %2}
v
insertps
\t
{$0xe, %2, %2, %0|%0, %2, %2, 0xe
}
vmov
<ssescalarmodesuffix2s>
\t
{%2, %0|%0, %2}
vmovd
\t
{%2, %0|%0, %2}
vmovss
\t
{%2, %1, %0|%0, %1, %2}
vpinsrd
\t
{$0, %2, %1, %0|%0, %1, %2, 0}
#"
[
(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,
*
")
(set_attr "prefix_extra" "
*,*
,
*,*
,1,
*
")
(set_attr "length_immediate" "
*,*
,
*,*
,1,
*
")
(set_attr "prefix" "vex")
(set_attr "mode" "SF,
<ssescalarmode>
,SI,SF,TI,
*
")])
(define_insn "
*
vec_set
<mode>
_0_
sse4_1"
[
(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x, x,x, x,m")
(vec_merge:SSEMODE4S
(vec_duplicate:SSEMODE4S
(match_operand:
<ssescalarmode>
2
"general_operand" " x,m,
*r,x,*
rm,
*
rfF"))
(match_operand:SSEMODE4S 1 "vector_move_operand" " C,C, C,0, 0,0")
(const_int 1)))]
"TARGET_SSE4_1"
"@
insertps
\t
{$0xe, %2, %0|%0, %2, 0xe}
mov
<ssescalarmodesuffix2s>
\t
{%2, %0|%0, %2}
movd
\t
{%2, %0|%0, %2}
movss
\t
{%2, %0|%0, %2}
pinsrd
\t
{$0, %2, %0|%0, %2, 0}
#"
[
(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,
*
")
(set_attr "prefix_extra" "
*,*
,
*,*
,1,
*
")
(set_attr "length_immediate" "
*,*
,
*,*
,1,
*
")
(set_attr "mode" "SF,
<ssescalarmode>
,SI,SF,TI,
*
")])
(define_insn "
*
vec_set
<mode>
_0_
sse2"
[
(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x, x,x,m")
(vec_merge:SSEMODE4S
(vec_duplicate:SSEMODE4S
(match_operand:
<ssescalarmode>
2
"general_operand" " m,
*r,x,x*
rfF"))
(match_operand:SSEMODE4S 1 "vector_move_operand" " C, C,0,0")
(const_int 1)))]
"TARGET_SSE2"
"@
mov
<ssescalarmodesuffix2s>
\t
{%2, %0|%0, %2}
movd
\t
{%2, %0|%0, %2}
movss
\t
{%2, %0|%0, %2}
#"
[
(set_attr "type" "ssemov")
(set_attr "
prefix" "vex"
)
(set_attr "mode" "SF")])
(define_insn "vec_setv4sf_0"
[
(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y2,m")
(vec_merge:V4SF
(vec_duplicate:V4SF
(match_operand:SF 2 "general_operand" " x,m,
*r
,x*
rfF"))
(match_operand:
V4SF 1 "vector_move_operand" " 0,C,C
,0")
(set_attr "
mode" "
<ssescalarmode>
,SI,SF,
*
")]
)
(define_insn "vec_set
<mode>
_0"
[
(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x,m")
(vec_merge:SSEMODE4S
(vec_duplicate:SSEMODE4S
(match_operand:
<ssescalarmode>
2
"general_operand" " m,x
,x
*
rfF"))
(match_operand:
SSEMODE4S 1 "vector_move_operand" " C,0
,0")
(const_int 1)))]
"TARGET_SSE"
"@
movss
\t
{%2, %0|%0, %2}
movss
\t
{%2, %0|%0, %2}
movd
\t
{%2, %0|%0, %2}
#"
[
(set_attr "type" "ssemov")
(set_attr "mode" "SF")])
...
...
@@ -4484,7 +4544,7 @@
(set_attr "mode" "V4DF")])
(define_expand "vec_interleave_highv2df"
[
(set (match_operand:V2DF 0 "
nonimmediate
_operand" "")
[
(set (match_operand:V2DF 0 "
register
_operand" "")
(vec_select:V2DF
(vec_concat:V4DF
(match_operand:V2DF 1 "nonimmediate_operand" "")
...
...
@@ -4492,24 +4552,46 @@
(parallel
[
(const_int 1)
(const_int 3)])))]
"TARGET_SSE2"
"ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
{
if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
operands
[
2
]
= force_reg (V2DFmode, operands
[
2
]
);
})
(define_insn "
*
avx_interleave_highv2df"
[
(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
[
(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,
x,
m")
(vec_select:V2DF
(vec_concat:V4DF
(match_operand:V2DF 1 "nonimmediate_operand" " x,o,x")
(match_operand:V2DF 2 "nonimmediate_operand" " x,x,0"))
(match_operand:V2DF 1 "nonimmediate_operand" " x,o,
o,
x")
(match_operand:V2DF 2 "nonimmediate_operand" " x,
1,
x,0"))
(parallel
[
(const_int 1)
(const_int 3)])))]
"TARGET_AVX &&
!(MEM_P (operands
[
1
]
) && MEM_P (operands
[
2
]
)
)"
"TARGET_AVX &&
ix86_vec_interleave_v2df_operator_ok (operands, 1
)"
"@
vunpckhpd
\t
{%2, %1, %0|%0, %1, %2}
vmovddup
\t
{%H1, %0|%0, %H1}
vmovlpd
\t
{%H1, %2, %0|%0, %2, %H1}
vmovhpd
\t
{%1, %0|%0, %1}"
[
(set_attr "type" "sselog,ssemov,ssemov")
[
(set_attr "type" "sselog,sse
log,sse
mov,ssemov")
(set_attr "prefix" "vex")
(set_attr "mode" "V2DF,V1DF,V1DF")])
(set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
(define_insn "
*
sse3_interleave_highv2df"
[
(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,m")
(vec_select:V2DF
(vec_concat:V4DF
(match_operand:V2DF 1 "nonimmediate_operand" " 0,o,o,x")
(match_operand:V2DF 2 "nonimmediate_operand" " x,1,0,0"))
(parallel
[
(const_int 1)
(const_int 3)])))]
"TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
"@
unpckhpd
\t
{%2, %0|%0, %2}
movddup
\t
{%H1, %0|%0, %H1}
movlpd
\t
{%H1, %0|%0, %H1}
movhpd
\t
{%1, %0|%0, %1}"
[
(set_attr "type" "sselog,sselog,ssemov,ssemov")
(set_attr "prefix_data16" "
*,*
,1,1")
(set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
(define_insn "
*
sse2_interleave_highv2df"
[
(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
...
...
@@ -4519,7 +4601,7 @@
(match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
(parallel
[
(const_int 1)
(const_int 3)])))]
"TARGET_SSE2 &&
!(MEM_P (operands
[
1
]
) && MEM_P (operands
[
2
]
)
)"
"TARGET_SSE2 &&
ix86_vec_interleave_v2df_operator_ok (operands, 1
)"
"@
unpckhpd
\t
{%2, %0|%0, %2}
movlpd
\t
{%H1, %0|%0, %H1}
...
...
@@ -4528,85 +4610,48 @@
(set_attr "prefix_data16" "
*
,1,1")
(set_attr "mode" "V2DF,V1DF,V1DF")])
(define_insn "avx_movddup256"
[
(set (match_operand:V4DF 0 "register_operand" "=x")
;; Recall that the 256-bit unpck insns only shuffle within their lanes.
(define_expand "avx_movddup256"
[
(set (match_operand:V4DF 0 "register_operand" "")
(vec_select:V4DF
(vec_concat:V8DF
(match_operand:V4DF 1 "nonimmediate_operand" "
xm
")
(match_operand:V4DF 1 "nonimmediate_operand" "")
(match_dup 1))
(parallel
[
(const_int 0) (const_int
2
)
(const_int
4
) (const_int 6)])))]
(parallel
[
(const_int 0) (const_int
4
)
(const_int
2
) (const_int 6)])))]
"TARGET_AVX"
"vmovddup
\t
{%1, %0|%0, %1}"
[
(set_attr "type" "sselog1")
(set_attr "prefix" "vex")
(set_attr "mode" "V4DF")])
(define_insn "
*
avx_movddup"
[
(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
(vec_select:V2DF
(vec_concat:V4DF
(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
(match_dup 1))
(parallel
[
(const_int 0)
(const_int 2)])))]
"TARGET_AVX && !(MEM_P (operands
[
0
]
) && MEM_P (operands
[
1
]
))"
"@
vmovddup
\t
{%1, %0|%0, %1}
#"
[
(set_attr "type" "sselog1,ssemov")
(set_attr "prefix" "vex")
(set_attr "mode" "V2DF")])
(define_insn "
*
sse3_movddup"
[
(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
(vec_select:V2DF
(vec_concat:V4DF
(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")
(match_dup 1))
(parallel
[
(const_int 0)
(const_int 2)])))]
"TARGET_SSE3 && !(MEM_P (operands
[
0
]
) && MEM_P (operands
[
1
]
))"
"@
movddup
\t
{%1, %0|%0, %1}
#"
[
(set_attr "type" "sselog1,ssemov")
(set_attr "mode" "V2DF")])
(define_split
[
(set (match_operand:V2DF 0 "memory_operand" "")
(vec_select:V2DF
(vec_concat:V4DF
(match_operand:V2DF 1 "register_operand" "")
(match_dup 1))
(parallel
[
(const_int 0)
(const_int 2)])))]
"TARGET_SSE3 && reload_completed"
[
(const_int 0)
]
{
rtx low = gen_rtx_REG (DFmode, REGNO (operands
[
1
]
));
emit_move_insn (adjust_address (operands
[
0
]
, DFmode, 0), low);
emit_move_insn (adjust_address (operands
[
0
]
, DFmode, 8), low);
DONE;
})
"")
;; Recall that the 256-bit unpck insns only shuffle within their lanes.
(define_insn "avx_unpcklpd256"
[
(set (match_operand:V4DF 0 "register_operand" "=x")
(define_expand "avx_unpcklpd256"
[
(set (match_operand:V4DF 0 "register_operand" "")
(vec_select:V4DF
(vec_concat:V8DF
(match_operand:V4DF 1 "register_operand" "
x
")
(match_operand:V4DF 2 "nonimmediate_operand" "
xm
"))
(match_operand:V4DF 1 "register_operand" "")
(match_operand:V4DF 2 "nonimmediate_operand" ""))
(parallel
[
(const_int 0) (const_int 4)
(const_int 2) (const_int 6)])))]
"TARGET_AVX"
"vunpcklpd
\t
{%2, %1, %0|%0, %1, %2}"
"")
(define_insn "
*
avx_unpcklpd256"
[
(set (match_operand:V4DF 0 "register_operand" "=x,x")
(vec_select:V4DF
(vec_concat:V8DF
(match_operand:V4DF 1 "nonimmediate_operand" "xm,x")
(match_operand:V4DF 2 "nonimmediate_operand" " 1,xm"))
(parallel
[
(const_int 0) (const_int 4)
(const_int 2) (const_int 6)])))]
"TARGET_AVX
&& (!MEM_P (operands
[
1
]
) || rtx_equal_p (operands
[
1
]
, operands
[
2
]
))"
"@
vmovddup
\t
{%1, %0|%0, %1}
vunpcklpd
\t
{%2, %1, %0|%0, %1, %2}"
[
(set_attr "type" "sselog")
(set_attr "prefix" "vex")
(set_attr "mode" "V4DF")])
(define_expand "vec_interleave_lowv2df"
[
(set (match_operand:V2DF 0 "
nonimmediate
_operand" "")
[
(set (match_operand:V2DF 0 "
register
_operand" "")
(vec_select:V2DF
(vec_concat:V4DF
(match_operand:V2DF 1 "nonimmediate_operand" "")
...
...
@@ -4614,24 +4659,46 @@
(parallel
[
(const_int 0)
(const_int 2)])))]
"TARGET_SSE2"
"ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
{
if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
operands
[
1
]
= force_reg (V2DFmode, operands
[
1
]
);
})
(define_insn "
*
avx_interleave_lowv2df"
[
(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
[
(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,
x,
o")
(vec_select:V2DF
(vec_concat:V4DF
(match_operand:V2DF 1 "nonimmediate_operand" " x,x,0")
(match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
(match_operand:V2DF 1 "nonimmediate_operand" " x,
m,
x,0")
(match_operand:V2DF 2 "nonimmediate_operand" " x,
1,
m,x"))
(parallel
[
(const_int 0)
(const_int 2)])))]
"TARGET_AVX &&
!(MEM_P (operands
[
1
]
) && MEM_P (operands
[
2
]
)
)"
"TARGET_AVX &&
ix86_vec_interleave_v2df_operator_ok (operands, 0
)"
"@
vunpcklpd
\t
{%2, %1, %0|%0, %1, %2}
vmovddup
\t
{%1, %0|%0, %1}
vmovhpd
\t
{%2, %1, %0|%0, %1, %2}
vmovlpd
\t
{%2, %H0|%H0, %2}"
[
(set_attr "type" "sselog,ssemov,ssemov")
[
(set_attr "type" "sselog,sse
log,sse
mov,ssemov")
(set_attr "prefix" "vex")
(set_attr "mode" "V2DF,V1DF,V1DF")])
(set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
(define_insn "
*
sse3_interleave_lowv2df"
[
(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
(vec_select:V2DF
(vec_concat:V4DF
(match_operand:V2DF 1 "nonimmediate_operand" " 0,m,0,0")
(match_operand:V2DF 2 "nonimmediate_operand" " x,1,m,x"))
(parallel
[
(const_int 0)
(const_int 2)])))]
"TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
"@
unpcklpd
\t
{%2, %0|%0, %2}
movddup
\t
{%1, %0|%0, %1}
movhpd
\t
{%2, %0|%0, %2}
movlpd
\t
{%2, %H0|%H0, %2}"
[
(set_attr "type" "sselog,sselog,ssemov,ssemov")
(set_attr "prefix_data16" "
*,*
,1,1")
(set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
(define_insn "
*
sse2_interleave_lowv2df"
[
(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
...
...
@@ -4641,7 +4708,7 @@
(match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
(parallel
[
(const_int 0)
(const_int 2)])))]
"TARGET_SSE2 &&
!(MEM_P (operands
[
1
]
) && MEM_P (operands
[
2
]
)
)"
"TARGET_SSE2 &&
ix86_vec_interleave_v2df_operator_ok (operands, 0
)"
"@
unpcklpd
\t
{%2, %0|%0, %2}
movhpd
\t
{%2, %0|%0, %2}
...
...
@@ -4650,6 +4717,37 @@
(set_attr "prefix_data16" "
*
,1,1")
(set_attr "mode" "V2DF,V1DF,V1DF")])
(define_split
[
(set (match_operand:V2DF 0 "memory_operand" "")
(vec_select:V2DF
(vec_concat:V4DF
(match_operand:V2DF 1 "register_operand" "")
(match_dup 1))
(parallel
[
(const_int 0)
(const_int 2)])))]
"TARGET_SSE3 && reload_completed"
[
(const_int 0)
]
{
rtx low = gen_rtx_REG (DFmode, REGNO (operands
[
1
]
));
emit_move_insn (adjust_address (operands
[
0
]
, DFmode, 0), low);
emit_move_insn (adjust_address (operands
[
0
]
, DFmode, 8), low);
DONE;
})
(define_split
[
(set (match_operand:V2DF 0 "register_operand" "")
(vec_select:V2DF
(vec_concat:V4DF
(match_operand:V2DF 1 "memory_operand" "")
(match_dup 1))
(parallel
[
(match_operand:SI 2 "const_0_to_1_operand" "")
(match_operand:SI 3 "const_int_operand" "")])))]
"TARGET_SSE3 && INTVAL (operands
[
2
]
) + 2 == INTVAL (operands
[
3
]
)"
[
(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))
]
{
operands
[
1
]
= adjust_address (operands
[
1
]
, DFmode, INTVAL (operands
[
2
]
)
*
8);
})
(define_expand "avx_shufpd256"
[
(match_operand:V4DF 0 "register_operand" "")
(match_operand:V4DF 1 "register_operand" "")
...
...
@@ -7408,6 +7506,20 @@
[
(set_attr "type" "ssemov")
(set_attr "mode" "V2SF,V4SF,V2SF")])
(define_insn "
*
vec_dupv4si_avx"
[
(set (match_operand:V4SI 0 "register_operand" "=x,x")
(vec_duplicate:V4SI
(match_operand:SI 1 "register_operand" "x,m")))]
"TARGET_AVX"
"@
vpshufd
\t
{$0, %1, %0|%0, %1, 0}
vbroadcastss
\t
{%1, %0|%0, %1}"
[
(set_attr "type" "sselog1,ssemov")
(set_attr "length_immediate" "1,0")
(set_attr "prefix_extra" "0,1")
(set_attr "prefix" "vex")
(set_attr "mode" "TI,V4SF")])
(define_insn "
*
vec_dupv4si"
[
(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
(vec_duplicate:V4SI
...
...
@@ -7417,19 +7529,31 @@
%vpshufd
\t
{$0, %1, %0|%0, %1, 0}
shufps
\t
{$0, %0, %0|%0, %0, 0}"
[
(set_attr "type" "sselog1")
(set_attr "prefix" "maybe_vex,orig")
(set_attr "length_immediate" "1")
(set_attr "mode" "TI,V4SF")])
(define_insn "
*
vec_dupv2di_avx"
[
(set (match_operand:V2DI 0 "register_operand"
"=
x")
[
(set (match_operand:V2DI 0 "register_operand"
"=x,
x")
(vec_duplicate:V2DI
(match_operand:DI 1 "
register_operand" "x
")))]
(match_operand:DI 1 "
nonimmediate_operand" " x,m
")))]
"TARGET_AVX"
"vpunpcklqdq
\t
{%1, %1, %0|%0, %1, %1}"
"@
vpunpcklqdq
\t
{%1, %1, %0|%0, %1, %1}
vmovddup
\t
{%1, %0|%0, %1}"
[
(set_attr "type" "sselog1")
(set_attr "prefix" "vex")
(set_attr "mode" "TI")])
(set_attr "mode" "TI,DF")])
(define_insn "
*
vec_dupv2di_sse3"
[
(set (match_operand:V2DI 0 "register_operand" "=x,x")
(vec_duplicate:V2DI
(match_operand:DI 1 "nonimmediate_operand" " 0,m")))]
"TARGET_SSE3"
"@
punpcklqdq
\t
%0, %0
movddup
\t
{%1, %0|%0, %1}"
[
(set_attr "type" "sselog1")
(set_attr "mode" "TI,DF")])
(define_insn "
*
vec_dupv2di"
[
(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
...
...
@@ -11838,6 +11962,108 @@
(set_attr "prefix" "vex")
(set_attr "mode" "OI")])
(define_insn_and_split "vec_dup
<mode>
"
[
(set (match_operand:AVX256MODE24P 0 "register_operand" "=x,x")
(vec_duplicate:AVX256MODE24P
(match_operand:
<avxscalarmode>
1 "nonimmediate_operand" "m,?x")))]
"TARGET_AVX"
"@
vbroadcasts
<avxmodesuffixf2c>
\t
{%1, %0|%0, %1}
#"
"&& reload_completed && REG_P (operands
[
1
]
)"
[
(set (match_dup 2) (vec_duplicate:
<avxhalfvecmode>
(match_dup 1)))
(set (match_dup 0) (vec_concat:AVX256MODE24P (match_dup 2) (match_dup 2)))]
{
operands
[
2
]
= gen_rtx_REG (
<avxhalfvecmode>
mode, REGNO (operands
[
0
]
));
}
[
(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
(set_attr "prefix" "vex")
(set_attr "mode" "V8SF")])
(define_insn "avx_vbroadcastf128_
<mode>
"
[
(set (match_operand:AVX256MODE 0 "register_operand" "=x,x,x")
(vec_concat:AVX256MODE
(match_operand:
<avxhalfvecmode>
1 "nonimmediate_operand" "m,0,?x")
(match_dup 1)))]
"TARGET_AVX"
"@
vbroadcastf128
\t
{%1, %0|%0, %1}
vinsertf128
\t
{$1, %1, %0, %0|%0, %0, %1, 1}
vperm2f128
\t
{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
[
(set_attr "type" "ssemov,sselog1,sselog1")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "0,1,1")
(set_attr "prefix" "vex")
(set_attr "mode" "V4SF,V8SF,V8SF")])
;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
;; If it so happens that the input is in memory, use vbroadcast.
;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
(define_insn "
*
avx_vperm_broadcast_v4sf"
[
(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
(vec_select:V4SF
(match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
(match_parallel 2 "avx_vbroadcast_operand"
[
(match_operand 3 "const_int_operand" "C,n,n")
]
)))]
"TARGET_AVX"
{
int elt = INTVAL (operands
[
3
]
);
switch (which_alternative)
{
case 0:
case 1:
operands
[
1
]
= adjust_address_nv (operands
[
1
]
, SFmode, elt
*
4);
return "vbroadcastss
\t
{%1, %0|%0, %1}";
case 2:
operands
[
2
]
= GEN_INT (elt
*
0x55);
return "vpermilps
\t
{%2, %1, %0|%0, %1, %2}";
default:
gcc_unreachable ();
}
}
[
(set_attr "type" "ssemov,ssemov,sselog1")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "0,0,1")
(set_attr "prefix" "vex")
(set_attr "mode" "SF,SF,V4SF")])
(define_insn_and_split "
*
avx_vperm_broadcast_
<mode>
"
[
(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x,x,x")
(vec_select:AVX256MODEF2P
(match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "m,o,?x")
(match_parallel 2 "avx_vbroadcast_operand"
[
(match_operand 3 "const_int_operand" "C,n,n")
]
)))]
"TARGET_AVX"
"#"
"&& reload_completed"
[
(set (match_dup 0) (vec_duplicate:AVX256MODEF2P (match_dup 1)))
]
{
rtx op0 = operands
[
0
]
, op1 = operands
[
1
]
;
int elt = INTVAL (operands
[
3
]
);
if (REG_P (op1))
{
int mask;
/* Shuffle element we care about into all elements of the 128-bit lane.
The other lane gets shuffled too, but we don't care. */
if (<MODE>mode == V4DFmode)
mask = (elt & 1 ? 15 : 0);
else
mask = (elt & 3) * 0x55;
emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
/* Shuffle the lane we care about into both lanes of the dest. */
mask = (elt / (<ssescalarnum> / 2)) * 0x11;
emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
DONE;
}
operands
[
1
]
= adjust_address_nv (op1,
<avxscalarmode>
mode,
elt
*
GET_MODE_SIZE (
<avxscalarmode>
mode));
})
(define_expand "avx_vpermil
<mode>
"
[
(set (match_operand:AVXMODEFDP 0 "register_operand" "")
(vec_select:AVXMODEFDP
...
...
@@ -11989,58 +12215,6 @@
(set_attr "prefix" "vex")
(set_attr "mode" "V8SF")])
(define_insn "avx_vbroadcasts
<avxmodesuffixf2c><avxmodesuffix>
"
[
(set (match_operand:AVXMODEF4P 0 "register_operand" "=x")
(vec_concat:AVXMODEF4P
(vec_concat:
<avxhalfvecmode>
(match_operand:
<avxscalarmode>
1 "memory_operand" "m")
(match_dup 1))
(vec_concat:
<avxhalfvecmode>
(match_dup 1)
(match_dup 1))))]
"TARGET_AVX"
"vbroadcasts
<avxmodesuffixf2c>
\t
{%1, %0|%0, %1}"
[
(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
(set_attr "prefix" "vex")
(set_attr "mode" "
<avxscalarmode>
")])
(define_insn "avx_vbroadcastss256"
[
(set (match_operand:V8SF 0 "register_operand" "=x")
(vec_concat:V8SF
(vec_concat:V4SF
(vec_concat:V2SF
(match_operand:SF 1 "memory_operand" "m")
(match_dup 1))
(vec_concat:V2SF
(match_dup 1)
(match_dup 1)))
(vec_concat:V4SF
(vec_concat:V2SF
(match_dup 1)
(match_dup 1))
(vec_concat:V2SF
(match_dup 1)
(match_dup 1)))))]
"TARGET_AVX"
"vbroadcastss
\t
{%1, %0|%0, %1}"
[
(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
(set_attr "prefix" "vex")
(set_attr "mode" "SF")])
(define_insn "avx_vbroadcastf128_p
<avxmodesuffixf2c>
256"
[
(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
(vec_concat:AVX256MODEF2P
(match_operand:
<avxhalfvecmode>
1 "memory_operand" "m")
(match_dup 1)))]
"TARGET_AVX"
"vbroadcastf128
\t
{%1, %0|%0, %1}"
[
(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
(set_attr "prefix" "vex")
(set_attr "mode" "V4SF")])
(define_expand "avx_vinsertf128
<mode>
"
[
(match_operand:AVX256MODE 0 "register_operand" "")
(match_operand:AVX256MODE 1 "register_operand" "")
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment