Commit 6e9fffcf by H.J. Lu Committed by H.J. Lu

i386: Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX

Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX.  For MMX punpckhXX,
move bits 64:127 to bits 0:63 in SSE register.  Only SSE register source
operand is allowed.

	PR target/89021
	* config/i386/i386-expand.c (ix86_split_mmx_punpck): New function.
	* config/i386/i386-protos.h (ix86_split_mmx_punpck): New
	prototype.
	* config/i386/mmx.m (mmx_punpckhbw): Changed to
	define_insn_and_split to support SSE emulation.
	(mmx_punpcklbw): Likewise.
	(mmx_punpckhwd): Likewise.
	(mmx_punpcklwd): Likewise.
	(mmx_punpckhdq): Likewise.
	(mmx_punpckldq): Likewise.

From-SVN: r271216
parent b74ebb2a
2019-05-15 H.J. Lu <hongjiu.lu@intel.com>
PR target/89021
* config/i386/i386-expand.c (ix86_split_mmx_punpck): New function.
* config/i386/i386-protos.h (ix86_split_mmx_punpck): New
prototype.
* config/i386/mmx.m (mmx_punpckhbw): Changed to
define_insn_and_split to support SSE emulation.
(mmx_punpcklbw): Likewise.
(mmx_punpckhwd): Likewise.
(mmx_punpcklwd): Likewise.
(mmx_punpckhdq): Likewise.
(mmx_punpckldq): Likewise.
2019-05-15 H.J. Lu <hongjiu.lu@intel.com>
Uros Bizjak <ubizjak@gmail.com>
PR target/89021
......
......@@ -716,6 +716,83 @@ ix86_split_mmx_pack (rtx operands[], enum rtx_code code)
ix86_move_vector_high_sse_to_mmx (op0);
}
/* Split MMX punpcklXX/punpckhXX with SSE punpcklXX. */
void
ix86_split_mmx_punpck (rtx operands[], bool high_p)
{
rtx op0 = operands[0];
rtx op1 = operands[1];
rtx op2 = operands[2];
machine_mode mode = GET_MODE (op0);
rtx mask;
/* The corresponding SSE mode. */
machine_mode sse_mode, double_sse_mode;
switch (mode)
{
case E_V8QImode:
sse_mode = V16QImode;
double_sse_mode = V32QImode;
mask = gen_rtx_PARALLEL (VOIDmode,
gen_rtvec (16,
GEN_INT (0), GEN_INT (16),
GEN_INT (1), GEN_INT (17),
GEN_INT (2), GEN_INT (18),
GEN_INT (3), GEN_INT (19),
GEN_INT (4), GEN_INT (20),
GEN_INT (5), GEN_INT (21),
GEN_INT (6), GEN_INT (22),
GEN_INT (7), GEN_INT (23)));
break;
case E_V4HImode:
sse_mode = V8HImode;
double_sse_mode = V16HImode;
mask = gen_rtx_PARALLEL (VOIDmode,
gen_rtvec (8,
GEN_INT (0), GEN_INT (8),
GEN_INT (1), GEN_INT (9),
GEN_INT (2), GEN_INT (10),
GEN_INT (3), GEN_INT (11)));
break;
case E_V2SImode:
sse_mode = V4SImode;
double_sse_mode = V8SImode;
mask = gen_rtx_PARALLEL (VOIDmode,
gen_rtvec (4,
GEN_INT (0), GEN_INT (4),
GEN_INT (1), GEN_INT (5)));
break;
default:
gcc_unreachable ();
}
/* Generate SSE punpcklXX. */
rtx dest = lowpart_subreg (sse_mode, op0, GET_MODE (op0));
op1 = lowpart_subreg (sse_mode, op1, GET_MODE (op1));
op2 = lowpart_subreg (sse_mode, op2, GET_MODE (op2));
op1 = gen_rtx_VEC_CONCAT (double_sse_mode, op1, op2);
op2 = gen_rtx_VEC_SELECT (sse_mode, op1, mask);
rtx insn = gen_rtx_SET (dest, op2);
emit_insn (insn);
if (high_p)
{
/* Move bits 64:127 to bits 0:63. */
mask = gen_rtx_PARALLEL (VOIDmode,
gen_rtvec (4, GEN_INT (2), GEN_INT (3),
GEN_INT (0), GEN_INT (0)));
dest = lowpart_subreg (V4SImode, dest, GET_MODE (dest));
op1 = gen_rtx_VEC_SELECT (V4SImode, dest, mask);
insn = gen_rtx_SET (dest, op1);
emit_insn (insn);
}
}
/* Helper function of ix86_fixup_binary_operands to canonicalize
operand order. Returns true if the operands should be swapped. */
......
......@@ -202,6 +202,7 @@ extern rtx ix86_split_stack_guard (void);
extern void ix86_move_vector_high_sse_to_mmx (rtx);
extern void ix86_split_mmx_pack (rtx[], enum rtx_code);
extern void ix86_split_mmx_punpck (rtx[], bool);
#ifdef TREE_CODE
extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int);
......
......@@ -1089,87 +1089,129 @@
(set_attr "type" "mmxshft,sselog,sselog")
(set_attr "mode" "DI,TI,TI")])
(define_insn "mmx_punpckhbw"
[(set (match_operand:V8QI 0 "register_operand" "=y")
(define_insn_and_split "mmx_punpckhbw"
[(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
(vec_select:V8QI
(vec_concat:V16QI
(match_operand:V8QI 1 "register_operand" "0")
(match_operand:V8QI 2 "nonimmediate_operand" "ym"))
(match_operand:V8QI 1 "register_operand" "0,0,Yv")
(match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv"))
(parallel [(const_int 4) (const_int 12)
(const_int 5) (const_int 13)
(const_int 6) (const_int 14)
(const_int 7) (const_int 15)])))]
"TARGET_MMX"
"punpckhbw\t{%2, %0|%0, %2}"
[(set_attr "type" "mmxcvt")
(set_attr "mode" "DI")])
"TARGET_MMX || TARGET_MMX_WITH_SSE"
"@
punpckhbw\t{%2, %0|%0, %2}
#
#"
"TARGET_MMX_WITH_SSE && reload_completed"
[(const_int 0)]
"ix86_split_mmx_punpck (operands, true); DONE;"
[(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
(set_attr "type" "mmxcvt,sselog,sselog")
(set_attr "mode" "DI,TI,TI")])
(define_insn "mmx_punpcklbw"
[(set (match_operand:V8QI 0 "register_operand" "=y")
(define_insn_and_split "mmx_punpcklbw"
[(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
(vec_select:V8QI
(vec_concat:V16QI
(match_operand:V8QI 1 "register_operand" "0")
(match_operand:V8QI 2 "nonimmediate_operand" "ym"))
(match_operand:V8QI 1 "register_operand" "0,0,Yv")
(match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv"))
(parallel [(const_int 0) (const_int 8)
(const_int 1) (const_int 9)
(const_int 2) (const_int 10)
(const_int 3) (const_int 11)])))]
"TARGET_MMX"
"punpcklbw\t{%2, %0|%0, %k2}"
[(set_attr "type" "mmxcvt")
(set_attr "mode" "DI")])
"TARGET_MMX || TARGET_MMX_WITH_SSE"
"@
punpcklbw\t{%2, %0|%0, %k2}
#
#"
"TARGET_MMX_WITH_SSE && reload_completed"
[(const_int 0)]
"ix86_split_mmx_punpck (operands, false); DONE;"
[(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
(set_attr "type" "mmxcvt,sselog,sselog")
(set_attr "mode" "DI,TI,TI")])
(define_insn "mmx_punpckhwd"
[(set (match_operand:V4HI 0 "register_operand" "=y")
(define_insn_and_split "mmx_punpckhwd"
[(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
(vec_select:V4HI
(vec_concat:V8HI
(match_operand:V4HI 1 "register_operand" "0")
(match_operand:V4HI 2 "nonimmediate_operand" "ym"))
(match_operand:V4HI 1 "register_operand" "0,0,Yv")
(match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv"))
(parallel [(const_int 2) (const_int 6)
(const_int 3) (const_int 7)])))]
"TARGET_MMX"
"punpckhwd\t{%2, %0|%0, %2}"
[(set_attr "type" "mmxcvt")
(set_attr "mode" "DI")])
"TARGET_MMX || TARGET_MMX_WITH_SSE"
"@
punpckhwd\t{%2, %0|%0, %2}
#
#"
"TARGET_MMX_WITH_SSE && reload_completed"
[(const_int 0)]
"ix86_split_mmx_punpck (operands, true); DONE;"
[(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
(set_attr "type" "mmxcvt,sselog,sselog")
(set_attr "mode" "DI,TI,TI")])
(define_insn "mmx_punpcklwd"
[(set (match_operand:V4HI 0 "register_operand" "=y")
(define_insn_and_split "mmx_punpcklwd"
[(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
(vec_select:V4HI
(vec_concat:V8HI
(match_operand:V4HI 1 "register_operand" "0")
(match_operand:V4HI 2 "nonimmediate_operand" "ym"))
(match_operand:V4HI 1 "register_operand" "0,0,Yv")
(match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv"))
(parallel [(const_int 0) (const_int 4)
(const_int 1) (const_int 5)])))]
"TARGET_MMX"
"punpcklwd\t{%2, %0|%0, %k2}"
[(set_attr "type" "mmxcvt")
(set_attr "mode" "DI")])
"TARGET_MMX || TARGET_MMX_WITH_SSE"
"@
punpcklwd\t{%2, %0|%0, %k2}
#
#"
"TARGET_MMX_WITH_SSE && reload_completed"
[(const_int 0)]
"ix86_split_mmx_punpck (operands, false); DONE;"
[(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
(set_attr "type" "mmxcvt,sselog,sselog")
(set_attr "mode" "DI,TI,TI")])
(define_insn "mmx_punpckhdq"
[(set (match_operand:V2SI 0 "register_operand" "=y")
(define_insn_and_split "mmx_punpckhdq"
[(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv")
(vec_select:V2SI
(vec_concat:V4SI
(match_operand:V2SI 1 "register_operand" "0")
(match_operand:V2SI 2 "nonimmediate_operand" "ym"))
(match_operand:V2SI 1 "register_operand" "0,0,Yv")
(match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv"))
(parallel [(const_int 1)
(const_int 3)])))]
"TARGET_MMX"
"punpckhdq\t{%2, %0|%0, %2}"
[(set_attr "type" "mmxcvt")
(set_attr "mode" "DI")])
"TARGET_MMX || TARGET_MMX_WITH_SSE"
"@
punpckhdq\t{%2, %0|%0, %2}
#
#"
"TARGET_MMX_WITH_SSE && reload_completed"
[(const_int 0)]
"ix86_split_mmx_punpck (operands, true); DONE;"
[(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
(set_attr "type" "mmxcvt,sselog,sselog")
(set_attr "mode" "DI,TI,TI")])
(define_insn "mmx_punpckldq"
[(set (match_operand:V2SI 0 "register_operand" "=y")
(define_insn_and_split "mmx_punpckldq"
[(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv")
(vec_select:V2SI
(vec_concat:V4SI
(match_operand:V2SI 1 "register_operand" "0")
(match_operand:V2SI 2 "nonimmediate_operand" "ym"))
(match_operand:V2SI 1 "register_operand" "0,0,Yv")
(match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv"))
(parallel [(const_int 0)
(const_int 2)])))]
"TARGET_MMX"
"punpckldq\t{%2, %0|%0, %k2}"
[(set_attr "type" "mmxcvt")
(set_attr "mode" "DI")])
"TARGET_MMX || TARGET_MMX_WITH_SSE"
"@
punpckldq\t{%2, %0|%0, %k2}
#
#"
"TARGET_MMX_WITH_SSE && reload_completed"
[(const_int 0)]
"ix86_split_mmx_punpck (operands, false); DONE;"
[(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
(set_attr "type" "mmxcvt,sselog,sselog")
(set_attr "mode" "DI,TI,TI")])
(define_expand "mmx_pinsrw"
[(set (match_operand:V4HI 0 "register_operand")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment