Commit 9fede15c by Segher Boessenkool Committed by Segher Boessenkool

rs6000: Improve vsx_init_v4si

This changes vsx_init_v4si to be an expander.  That way, no special
cases are needed anymore for special arguments: the normal RTL passes
can deal with it.


	* config/rs6000/rs6000-p8swap.c (rtx_is_swappable_p): Adjust.
	* config/rs6000/rs6000-protos.h (rs6000_split_v4si_init): Delete.
	* config/rs6000/rs6000.c (rs6000_expand_vector_init): Always force
	the elements into a register.
	(rs6000_split_v4si_init_di_reg): Delete.
	(rs6000_split_v4si_init): Delete.
	* config/rs6000/vsx.md (unspec): Delete UNSPEC_VSX_VEC_INIT.
	(vsx_init_v4si): Rewrite as a define_expand.

From-SVN: r262930
parent 268e16e8
2018-07-23 Segher Boessenkool <segher@kernel.crashing.org>
* config/rs6000/rs6000-p8swap.c (rtx_is_swappable_p): Adjust.
* config/rs6000/rs6000-protos.h (rs6000_split_v4si_init): Delete.
* config/rs6000/rs6000.c (rs6000_expand_vector_init): Always force
the elements into a register.
(rs6000_split_v4si_init_di_reg): Delete.
(rs6000_split_v4si_init): Delete.
* config/rs6000/vsx.md (unspec): Delete UNSPEC_VSX_VEC_INIT.
(vsx_init_v4si): Rewrite as a define_expand.
2018-07-23 Segher Boessenkool <segher@kernel.crashing.org>
* config/rs6000/rs6000.md (splitters for rldimi and rlwimi with the
zero_extend argument from memory): New.
......
......@@ -772,7 +772,6 @@ rtx_is_swappable_p (rtx op, unsigned int *special)
case UNSPEC_VSX_EXTRACT:
case UNSPEC_VSX_SET:
case UNSPEC_VSX_SLDWI:
case UNSPEC_VSX_VEC_INIT:
case UNSPEC_VSX_VSLO:
case UNSPEC_VUNPACK_HI_SIGN:
case UNSPEC_VUNPACK_HI_SIGN_DIRECT:
......
......@@ -61,7 +61,6 @@ extern void rs6000_expand_vector_set (rtx, rtx, int);
extern void rs6000_expand_vector_extract (rtx, rtx, rtx);
extern void rs6000_split_vec_extract_var (rtx, rtx, rtx, rtx, rtx);
extern rtx rs6000_adjust_vec_address (rtx, rtx, rtx, rtx, machine_mode);
extern void rs6000_split_v4si_init (rtx []);
extern void altivec_expand_vec_perm_le (rtx op[4]);
extern void rs6000_expand_extract_even (rtx, rtx, rtx);
extern void rs6000_expand_interleave (rtx, rtx, rtx, bool);
......
......@@ -6857,11 +6857,7 @@ rs6000_expand_vector_init (rtx target, rtx vals)
size_t i;
for (i = 0; i < 4; i++)
{
elements[i] = XVECEXP (vals, 0, i);
if (!CONST_INT_P (elements[i]) && !REG_P (elements[i]))
elements[i] = copy_to_mode_reg (SImode, elements[i]);
}
elements[i] = force_reg (SImode, XVECEXP (vals, 0, i));
emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
elements[2], elements[3]));
......@@ -7568,92 +7564,6 @@ rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
gcc_unreachable ();
}
/* Helper function for rs6000_split_v4si_init to build up a DImode value from
two SImode values. */
static void
rs6000_split_v4si_init_di_reg (rtx dest, rtx si1, rtx si2, rtx tmp)
{
const unsigned HOST_WIDE_INT mask_32bit = HOST_WIDE_INT_C (0xffffffff);
if (CONST_INT_P (si1) && CONST_INT_P (si2))
{
unsigned HOST_WIDE_INT const1 = (UINTVAL (si1) & mask_32bit) << 32;
unsigned HOST_WIDE_INT const2 = UINTVAL (si2) & mask_32bit;
emit_move_insn (dest, GEN_INT (const1 | const2));
return;
}
/* Put si1 into upper 32-bits of dest. */
if (CONST_INT_P (si1))
emit_move_insn (dest, GEN_INT ((UINTVAL (si1) & mask_32bit) << 32));
else
{
/* Generate RLDIC. */
rtx si1_di = gen_rtx_REG (DImode, regno_or_subregno (si1));
rtx shift_rtx = gen_rtx_ASHIFT (DImode, si1_di, GEN_INT (32));
rtx mask_rtx = GEN_INT (mask_32bit << 32);
rtx and_rtx = gen_rtx_AND (DImode, shift_rtx, mask_rtx);
gcc_assert (!reg_overlap_mentioned_p (dest, si1));
emit_insn (gen_rtx_SET (dest, and_rtx));
}
/* Put si2 into the temporary. */
gcc_assert (!reg_overlap_mentioned_p (dest, tmp));
if (CONST_INT_P (si2))
emit_move_insn (tmp, GEN_INT (UINTVAL (si2) & mask_32bit));
else
emit_insn (gen_zero_extendsidi2 (tmp, si2));
/* Combine the two parts. */
emit_insn (gen_iordi3 (dest, dest, tmp));
return;
}
/* Split a V4SI initialization. */
void
rs6000_split_v4si_init (rtx operands[])
{
rtx dest = operands[0];
/* Destination is a GPR, build up the two DImode parts in place. */
if (REG_P (dest) || SUBREG_P (dest))
{
int d_regno = regno_or_subregno (dest);
rtx scalar1 = operands[1];
rtx scalar2 = operands[2];
rtx scalar3 = operands[3];
rtx scalar4 = operands[4];
rtx tmp1 = operands[5];
rtx tmp2 = operands[6];
/* Even though we only need one temporary (plus the destination, which
has an early clobber constraint, try to use two temporaries, one for
each double word created. That way the 2nd insn scheduling pass can
rearrange things so the two parts are done in parallel. */
if (BYTES_BIG_ENDIAN)
{
rtx di_lo = gen_rtx_REG (DImode, d_regno);
rtx di_hi = gen_rtx_REG (DImode, d_regno + 1);
rs6000_split_v4si_init_di_reg (di_lo, scalar1, scalar2, tmp1);
rs6000_split_v4si_init_di_reg (di_hi, scalar3, scalar4, tmp2);
}
else
{
rtx di_lo = gen_rtx_REG (DImode, d_regno + 1);
rtx di_hi = gen_rtx_REG (DImode, d_regno);
rs6000_split_v4si_init_di_reg (di_lo, scalar4, scalar3, tmp1);
rs6000_split_v4si_init_di_reg (di_hi, scalar2, scalar1, tmp2);
}
return;
}
else
gcc_unreachable ();
}
/* Return alignment of TYPE. Existing alignment is ALIGN. HOW
selects whether the alignment is abi mandated, optional, or
both abi and optional alignment. */
......@@ -388,7 +388,6 @@
UNSPEC_VSX_VXSIG
UNSPEC_VSX_VIEXP
UNSPEC_VSX_VTSTDC
UNSPEC_VSX_VEC_INIT
UNSPEC_VSX_VSIGNED2
UNSPEC_LXVL
......@@ -2946,23 +2945,41 @@
}
[(set_attr "type" "vecperm")])
;; V4SImode initialization splitter
(define_insn_and_split "vsx_init_v4si"
[(set (match_operand:V4SI 0 "gpc_reg_operand" "=&r")
(unspec:V4SI
[(match_operand:SI 1 "reg_or_cint_operand" "rn")
(match_operand:SI 2 "reg_or_cint_operand" "rn")
(match_operand:SI 3 "reg_or_cint_operand" "rn")
(match_operand:SI 4 "reg_or_cint_operand" "rn")]
UNSPEC_VSX_VEC_INIT))
(clobber (match_scratch:DI 5 "=&r"))
(clobber (match_scratch:DI 6 "=&r"))]
;; Concatenate 4 SImode elements into a V4SImode reg.
(define_expand "vsx_init_v4si"
[(use (match_operand:V4SI 0 "gpc_reg_operand"))
(use (match_operand:SI 1 "gpc_reg_operand"))
(use (match_operand:SI 2 "gpc_reg_operand"))
(use (match_operand:SI 3 "gpc_reg_operand"))
(use (match_operand:SI 4 "gpc_reg_operand"))]
"VECTOR_MEM_VSX_P (V4SImode) && TARGET_DIRECT_MOVE_64BIT"
"#"
"&& reload_completed"
[(const_int 0)]
{
rs6000_split_v4si_init (operands);
rtx a = gen_reg_rtx (DImode);
rtx b = gen_reg_rtx (DImode);
rtx c = gen_reg_rtx (DImode);
rtx d = gen_reg_rtx (DImode);
emit_insn (gen_zero_extendsidi2 (a, operands[1]));
emit_insn (gen_zero_extendsidi2 (b, operands[2]));
emit_insn (gen_zero_extendsidi2 (c, operands[3]));
emit_insn (gen_zero_extendsidi2 (d, operands[4]));
if (!BYTES_BIG_ENDIAN)
{
std::swap (a, b);
std::swap (c, d);
}
rtx aa = gen_reg_rtx (DImode);
rtx ab = gen_reg_rtx (DImode);
rtx cc = gen_reg_rtx (DImode);
rtx cd = gen_reg_rtx (DImode);
emit_insn (gen_ashldi3 (aa, a, GEN_INT (32)));
emit_insn (gen_ashldi3 (cc, c, GEN_INT (32)));
emit_insn (gen_iordi3 (ab, aa, b));
emit_insn (gen_iordi3 (cd, cc, d));
rtx abcd = gen_reg_rtx (V2DImode);
emit_insn (gen_vsx_concat_v2di (abcd, ab, cd));
emit_move_insn (operands[0], gen_lowpart (V4SImode, abcd));
DONE;
})
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment