Commit 0cf68694 by Bill Schmidt Committed by William Schmidt

vector.md (mov<mode>): Emit permuted move sequences for LE VSX loads and stores at expand time.

gcc:

2013-10-07  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

	* config/rs6000/vector.md (mov<mode>): Emit permuted move
	sequences for LE VSX loads and stores at expand time.
	* config/rs6000/rs6000-protos.h (rs6000_emit_le_vsx_move): New
	prototype.
	* config/rs6000/rs6000.c (rs6000_const_vec): New.
	(rs6000_gen_le_vsx_permute): New.
	(rs6000_gen_le_vsx_load): New.
	(rs6000_gen_le_vsx_store): New.
	(rs6000_gen_le_vsx_move): New.
	* config/rs6000/vsx.md (*vsx_le_perm_load_v2di): New.
	(*vsx_le_perm_load_v4si): New.
	(*vsx_le_perm_load_v8hi): New.
	(*vsx_le_perm_load_v16qi): New.
	(*vsx_le_perm_store_v2di): New.
	(*vsx_le_perm_store_v4si): New.
	(*vsx_le_perm_store_v8hi): New.
	(*vsx_le_perm_store_v16qi): New.
	(*vsx_xxpermdi2_le_<mode>): New.
	(*vsx_xxpermdi4_le_<mode>): New.
	(*vsx_xxpermdi8_le_V8HI): New.
	(*vsx_xxpermdi16_le_V16QI): New.
	(*vsx_lxvd2x2_le_<mode>): New.
	(*vsx_lxvd2x4_le_<mode>): New.
	(*vsx_lxvd2x8_le_V8HI): New.
	(*vsx_lxvd2x16_le_V16QI): New.
	(*vsx_stxvd2x2_le_<mode>): New.
	(*vsx_stxvd2x4_le_<mode>): New.
	(*vsx_stxvd2x8_le_V8HI): New.
	(*vsx_stxvd2x16_le_V16QI): New.

gcc/testsuite:

2013-10-07  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

	* gcc.target/powerpc/pr43154.c: Skip for ppc64 little endian.
	* gcc.target/powerpc/fusion.c: Likewise.

From-SVN: r203246
parent 9520e1eb
2013-10-07 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
* config/rs6000/vector.md (mov<mode>): Emit permuted move
sequences for LE VSX loads and stores at expand time.
* config/rs6000/rs6000-protos.h (rs6000_emit_le_vsx_move): New
prototype.
* config/rs6000/rs6000.c (rs6000_const_vec): New.
(rs6000_gen_le_vsx_permute): New.
(rs6000_gen_le_vsx_load): New.
(rs6000_gen_le_vsx_store): New.
(rs6000_gen_le_vsx_move): New.
* config/rs6000/vsx.md (*vsx_le_perm_load_v2di): New.
(*vsx_le_perm_load_v4si): New.
(*vsx_le_perm_load_v8hi): New.
(*vsx_le_perm_load_v16qi): New.
(*vsx_le_perm_store_v2di): New.
(*vsx_le_perm_store_v4si): New.
(*vsx_le_perm_store_v8hi): New.
(*vsx_le_perm_store_v16qi): New.
(*vsx_xxpermdi2_le_<mode>): New.
(*vsx_xxpermdi4_le_<mode>): New.
(*vsx_xxpermdi8_le_V8HI): New.
(*vsx_xxpermdi16_le_V16QI): New.
(*vsx_lxvd2x2_le_<mode>): New.
(*vsx_lxvd2x4_le_<mode>): New.
(*vsx_lxvd2x8_le_V8HI): New.
(*vsx_lxvd2x16_le_V16QI): New.
(*vsx_stxvd2x2_le_<mode>): New.
(*vsx_stxvd2x4_le_<mode>): New.
(*vsx_stxvd2x8_le_V8HI): New.
(*vsx_stxvd2x16_le_V16QI): New.
2013-10-07 Renlin Li <Renlin.Li@arm.com>
* config/arm/arm-cores.def (cortex-a53): Use cortex tuning.
......
......@@ -122,6 +122,7 @@ extern rtx rs6000_longcall_ref (rtx);
extern void rs6000_fatal_bad_address (rtx);
extern rtx create_TOC_reference (rtx, rtx);
extern void rs6000_split_multireg_move (rtx, rtx);
extern void rs6000_emit_le_vsx_move (rtx, rtx, enum machine_mode);
extern void rs6000_emit_move (rtx, rtx, enum machine_mode);
extern rtx rs6000_secondary_memory_needed_rtx (enum machine_mode);
extern rtx (*rs6000_legitimize_reload_address_ptr) (rtx, enum machine_mode,
......
......@@ -7665,6 +7665,106 @@ rs6000_eliminate_indexed_memrefs (rtx operands[2])
copy_addr_to_reg (XEXP (operands[1], 0)));
}
/* Generate a vector of constants to permute MODE for a little-endian
storage operation by swapping the two halves of a vector. */
static rtvec
rs6000_const_vec (enum machine_mode mode)
{
int i, subparts;
rtvec v;
switch (mode)
{
case V2DFmode:
case V2DImode:
subparts = 2;
break;
case V4SFmode:
case V4SImode:
subparts = 4;
break;
case V8HImode:
subparts = 8;
break;
case V16QImode:
subparts = 16;
break;
default:
gcc_unreachable();
}
v = rtvec_alloc (subparts);
for (i = 0; i < subparts / 2; ++i)
RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
for (i = subparts / 2; i < subparts; ++i)
RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
return v;
}
/* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi
for a VSX load or store operation. */
rtx
rs6000_gen_le_vsx_permute (rtx source, enum machine_mode mode)
{
rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
return gen_rtx_VEC_SELECT (mode, source, par);
}
/* Emit a little-endian load from vector memory location SOURCE to VSX
register DEST in mode MODE. The load is done with two permuting
insn's that represent an lxvd2x and xxpermdi. */
void
rs6000_emit_le_vsx_load (rtx dest, rtx source, enum machine_mode mode)
{
rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
rtx permute_mem = rs6000_gen_le_vsx_permute (source, mode);
rtx permute_reg = rs6000_gen_le_vsx_permute (tmp, mode);
emit_insn (gen_rtx_SET (VOIDmode, tmp, permute_mem));
emit_insn (gen_rtx_SET (VOIDmode, dest, permute_reg));
}
/* Emit a little-endian store to vector memory location DEST from VSX
register SOURCE in mode MODE. The store is done with two permuting
insn's that represent an xxpermdi and an stxvd2x. */
void
rs6000_emit_le_vsx_store (rtx dest, rtx source, enum machine_mode mode)
{
rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
rtx permute_src = rs6000_gen_le_vsx_permute (source, mode);
rtx permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode);
emit_insn (gen_rtx_SET (VOIDmode, tmp, permute_src));
emit_insn (gen_rtx_SET (VOIDmode, dest, permute_tmp));
}
/* Emit a sequence representing a little-endian VSX load or store,
moving data from SOURCE to DEST in mode MODE. This is done
separately from rs6000_emit_move to ensure it is called only
during expand. LE VSX loads and stores introduced later are
handled with a split. The expand-time RTL generation allows
us to optimize away redundant pairs of register-permutes. */
void
rs6000_emit_le_vsx_move (rtx dest, rtx source, enum machine_mode mode)
{
gcc_assert (!BYTES_BIG_ENDIAN
&& VECTOR_MEM_VSX_P (mode)
&& mode != TImode
&& (MEM_P (source) ^ MEM_P (dest)));
if (MEM_P (source))
{
gcc_assert (REG_P (dest));
rs6000_emit_le_vsx_load (dest, source, mode);
}
else
{
if (!REG_P (source))
source = force_reg (mode, source);
rs6000_emit_le_vsx_store (dest, source, mode);
}
}
/* Emit a move from SOURCE to DEST in mode MODE. */
void
rs6000_emit_move (rtx dest, rtx source, enum machine_mode mode)
......
......@@ -88,7 +88,8 @@
(smax "smax")])
;; Vector move instructions.
;; Vector move instructions. Little-endian VSX loads and stores require
;; special handling to circumvent "element endianness."
(define_expand "mov<mode>"
[(set (match_operand:VEC_M 0 "nonimmediate_operand" "")
(match_operand:VEC_M 1 "any_operand" ""))]
......@@ -104,6 +105,15 @@
&& !vlogical_operand (operands[1], <MODE>mode))
operands[1] = force_reg (<MODE>mode, operands[1]);
}
if (!BYTES_BIG_ENDIAN
&& VECTOR_MEM_VSX_P (<MODE>mode)
&& <MODE>mode != TImode
&& (memory_operand (operands[0], <MODE>mode)
^ memory_operand (operands[1], <MODE>mode)))
{
rs6000_emit_le_vsx_move (operands[0], operands[1], <MODE>mode);
DONE;
}
})
;; Generic vector floating point load/store instructions. These will match
......
2013-10-07 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
* gcc.target/powerpc/pr43154.c: Skip for ppc64 little endian.
* gcc.target/powerpc/fusion.c: Likewise.
2013-10-07 Andreas Krebbel <Andreas.Krebbel@de.ibm.com>
* gcc.target/s390/htm-nofloat-2.c: New testcase.
......
/* { dg-do compile { target { powerpc*-*-* } } } */
/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
/* { dg-skip-if "" { powerpc*le-*-* } { "*" } { "" } } */
/* { dg-require-effective-target powerpc_p8vector_ok } */
/* { dg-options "-mcpu=power7 -mtune=power8 -O3" } */
......
/* { dg-do compile { target { powerpc*-*-* } } } */
/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
/* { dg-skip-if "" { powerpc*le-*-* } { "*" } { "" } } */
/* { dg-require-effective-target powerpc_vsx_ok } */
/* { dg-options "-O2 -mcpu=power7" } */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment