Commit f200869a by Bill Schmidt Committed by William Schmidt

vector.md (vec_realign_load<mode>): Generate vperm directly to circumvent…

vector.md (vec_realign_load<mode>): Generate vperm directly to circumvent subtract from splat{31} workaround.

2013-10-11  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

	* config/rs6000/vector.md (vec_realign_load<mode>): Generate vperm
	directly to circumvent subtract from splat{31} workaround.
	* config/rs6000/rs6000-protos.h (altivec_expand_vec_perm_le): New
	prototype.
	* config/rs6000/rs6000.c (altivec_expand_vec_perm_le): New.
	* config/rs6000/altivec.md (define_c_enum "unspec"): Add
	UNSPEC_VPERM_X and UNSPEC_VPERM_UNS_X.
	(altivec_vperm_<mode>): Convert to define_insn_and_split to
	separate big and little endian logic.
	(*altivec_vperm_<mode>_internal): New define_insn.
	(altivec_vperm_<mode>_uns): Convert to define_insn_and_split to
	separate big and little endian logic.
	(*altivec_vperm_<mode>_uns_internal): New define_insn.
	(vec_permv16qi): Add little endian logic.

From-SVN: r203457
parent 39aeb94e
2013-10-11 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
* config/rs6000/vector.md (vec_realign_load<mode>): Generate vperm
directly to circumvent subtract from splat{31} workaround.
* config/rs6000/rs6000-protos.h (altivec_expand_vec_perm_le): New
prototype.
* config/rs6000/rs6000.c (altivec_expand_vec_perm_le): New.
* config/rs6000/altivec.md (define_c_enum "unspec"): Add
UNSPEC_VPERM_X and UNSPEC_VPERM_UNS_X.
(altivec_vperm_<mode>): Convert to define_insn_and_split to
separate big and little endian logic.
(*altivec_vperm_<mode>_internal): New define_insn.
(altivec_vperm_<mode>_uns): Convert to define_insn_and_split to
separate big and little endian logic.
(*altivec_vperm_<mode>_uns_internal): New define_insn.
(vec_permv16qi): Add little endian logic.
2013-10-11 Marc Glisse <marc.glisse@inria.fr> 2013-10-11 Marc Glisse <marc.glisse@inria.fr>
* doc/extend.texi (returns_nonnull): Remove arguments. * doc/extend.texi (returns_nonnull): Remove arguments.
......
...@@ -59,6 +59,8 @@ ...@@ -59,6 +59,8 @@
UNSPEC_VSUMSWS UNSPEC_VSUMSWS
UNSPEC_VPERM UNSPEC_VPERM
UNSPEC_VPERM_UNS UNSPEC_VPERM_UNS
UNSPEC_VPERM_X
UNSPEC_VPERM_UNS_X
UNSPEC_VRFIN UNSPEC_VRFIN
UNSPEC_VCFUX UNSPEC_VCFUX
UNSPEC_VCFSX UNSPEC_VCFSX
...@@ -1279,21 +1281,91 @@ ...@@ -1279,21 +1281,91 @@
"vrfiz %0,%1" "vrfiz %0,%1"
[(set_attr "type" "vecfloat")]) [(set_attr "type" "vecfloat")])
(define_insn "altivec_vperm_<mode>" (define_insn_and_split "altivec_vperm_<mode>"
[(set (match_operand:VM 0 "register_operand" "=v") [(set (match_operand:VM 0 "register_operand" "=v")
(unspec:VM [(match_operand:VM 1 "register_operand" "v") (unspec:VM [(match_operand:VM 1 "register_operand" "v")
(match_operand:VM 2 "register_operand" "v") (match_operand:VM 2 "register_operand" "v")
(match_operand:V16QI 3 "register_operand" "v")] (match_operand:V16QI 3 "register_operand" "v")]
UNSPEC_VPERM_X))]
"TARGET_ALTIVEC"
"#"
"!reload_in_progress && !reload_completed"
[(set (match_dup 0) (match_dup 4))]
{
if (BYTES_BIG_ENDIAN)
operands[4] = gen_rtx_UNSPEC (<MODE>mode,
gen_rtvec (3, operands[1],
operands[2], operands[3]),
UNSPEC_VPERM);
else
{
/* We want to subtract from 31, but we can't vspltisb 31 since
it's out of range. -1 works as well because only the low-order
five bits of the permute control vector elements are used. */
rtx splat = gen_rtx_VEC_DUPLICATE (V16QImode,
gen_rtx_CONST_INT (QImode, -1));
rtx tmp = gen_reg_rtx (V16QImode);
emit_move_insn (tmp, splat);
rtx sel = gen_rtx_MINUS (V16QImode, tmp, operands[3]);
emit_move_insn (tmp, sel);
operands[4] = gen_rtx_UNSPEC (<MODE>mode,
gen_rtvec (3, operands[2],
operands[1], tmp),
UNSPEC_VPERM);
}
}
[(set_attr "type" "vecperm")])
(define_insn "*altivec_vperm_<mode>_internal"
[(set (match_operand:VM 0 "register_operand" "=v")
(unspec:VM [(match_operand:VM 1 "register_operand" "v")
(match_operand:VM 2 "register_operand" "v")
(match_operand:V16QI 3 "register_operand" "+v")]
UNSPEC_VPERM))] UNSPEC_VPERM))]
"TARGET_ALTIVEC" "TARGET_ALTIVEC"
"vperm %0,%1,%2,%3" "vperm %0,%1,%2,%3"
[(set_attr "type" "vecperm")]) [(set_attr "type" "vecperm")])
(define_insn "altivec_vperm_<mode>_uns" (define_insn_and_split "altivec_vperm_<mode>_uns"
[(set (match_operand:VM 0 "register_operand" "=v") [(set (match_operand:VM 0 "register_operand" "=v")
(unspec:VM [(match_operand:VM 1 "register_operand" "v") (unspec:VM [(match_operand:VM 1 "register_operand" "v")
(match_operand:VM 2 "register_operand" "v") (match_operand:VM 2 "register_operand" "v")
(match_operand:V16QI 3 "register_operand" "v")] (match_operand:V16QI 3 "register_operand" "v")]
UNSPEC_VPERM_UNS_X))]
"TARGET_ALTIVEC"
"#"
"!reload_in_progress && !reload_completed"
[(set (match_dup 0) (match_dup 4))]
{
if (BYTES_BIG_ENDIAN)
operands[4] = gen_rtx_UNSPEC (<MODE>mode,
gen_rtvec (3, operands[1],
operands[2], operands[3]),
UNSPEC_VPERM_UNS);
else
{
/* We want to subtract from 31, but we can't vspltisb 31 since
it's out of range. -1 works as well because only the low-order
five bits of the permute control vector elements are used. */
rtx splat = gen_rtx_VEC_DUPLICATE (V16QImode,
gen_rtx_CONST_INT (QImode, -1));
rtx tmp = gen_reg_rtx (V16QImode);
emit_move_insn (tmp, splat);
rtx sel = gen_rtx_MINUS (V16QImode, tmp, operands[3]);
emit_move_insn (tmp, sel);
operands[4] = gen_rtx_UNSPEC (<MODE>mode,
gen_rtvec (3, operands[2],
operands[1], tmp),
UNSPEC_VPERM_UNS);
}
}
[(set_attr "type" "vecperm")])
(define_insn "*altivec_vperm_<mode>_uns_internal"
[(set (match_operand:VM 0 "register_operand" "=v")
(unspec:VM [(match_operand:VM 1 "register_operand" "v")
(match_operand:VM 2 "register_operand" "v")
(match_operand:V16QI 3 "register_operand" "+v")]
UNSPEC_VPERM_UNS))] UNSPEC_VPERM_UNS))]
"TARGET_ALTIVEC" "TARGET_ALTIVEC"
"vperm %0,%1,%2,%3" "vperm %0,%1,%2,%3"
...@@ -1306,7 +1378,12 @@ ...@@ -1306,7 +1378,12 @@
(match_operand:V16QI 3 "register_operand" "")] (match_operand:V16QI 3 "register_operand" "")]
UNSPEC_VPERM))] UNSPEC_VPERM))]
"TARGET_ALTIVEC" "TARGET_ALTIVEC"
"") {
if (!BYTES_BIG_ENDIAN) {
altivec_expand_vec_perm_le (operands);
DONE;
}
})
(define_expand "vec_perm_constv16qi" (define_expand "vec_perm_constv16qi"
[(match_operand:V16QI 0 "register_operand" "") [(match_operand:V16QI 0 "register_operand" "")
......
...@@ -56,6 +56,7 @@ extern void paired_expand_vector_init (rtx, rtx); ...@@ -56,6 +56,7 @@ extern void paired_expand_vector_init (rtx, rtx);
extern void rs6000_expand_vector_set (rtx, rtx, int); extern void rs6000_expand_vector_set (rtx, rtx, int);
extern void rs6000_expand_vector_extract (rtx, rtx, int); extern void rs6000_expand_vector_extract (rtx, rtx, int);
extern bool altivec_expand_vec_perm_const (rtx op[4]); extern bool altivec_expand_vec_perm_const (rtx op[4]);
extern void altivec_expand_vec_perm_le (rtx op[4]);
extern bool rs6000_expand_vec_perm_const (rtx op[4]); extern bool rs6000_expand_vec_perm_const (rtx op[4]);
extern void rs6000_expand_extract_even (rtx, rtx, rtx); extern void rs6000_expand_extract_even (rtx, rtx, rtx);
extern void rs6000_expand_interleave (rtx, rtx, rtx, bool); extern void rs6000_expand_interleave (rtx, rtx, rtx, bool);
......
...@@ -28608,6 +28608,54 @@ altivec_expand_vec_perm_const_le (rtx operands[4]) ...@@ -28608,6 +28608,54 @@ altivec_expand_vec_perm_const_le (rtx operands[4])
emit_move_insn (target, unspec); emit_move_insn (target, unspec);
} }
/* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
permute control vector. But here it's not a constant, so we must
generate a vector splat/subtract to do the adjustment. */
void
altivec_expand_vec_perm_le (rtx operands[4])
{
rtx splat, unspec;
rtx target = operands[0];
rtx op0 = operands[1];
rtx op1 = operands[2];
rtx sel = operands[3];
rtx tmp = target;
/* Get everything in regs so the pattern matches. */
if (!REG_P (op0))
op0 = force_reg (V16QImode, op0);
if (!REG_P (op1))
op1 = force_reg (V16QImode, op1);
if (!REG_P (sel))
sel = force_reg (V16QImode, sel);
if (!REG_P (target))
tmp = gen_reg_rtx (V16QImode);
/* SEL = splat(31) - SEL. */
/* We want to subtract from 31, but we can't vspltisb 31 since
it's out of range. -1 works as well because only the low-order
five bits of the permute control vector elements are used. */
splat = gen_rtx_VEC_DUPLICATE (V16QImode,
gen_rtx_CONST_INT (QImode, -1));
emit_move_insn (tmp, splat);
sel = gen_rtx_MINUS (V16QImode, tmp, sel);
emit_move_insn (tmp, sel);
/* Permute with operands reversed and adjusted selector. */
unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, tmp),
UNSPEC_VPERM);
/* Copy into target, possibly by way of a register. */
if (!REG_P (target))
{
emit_move_insn (tmp, unspec);
unspec = tmp;
}
emit_move_insn (target, unspec);
}
/* Expand an Altivec constant permutation. Return true if we match /* Expand an Altivec constant permutation. Return true if we match
an efficient implementation; false to fall back to VPERM. */ an efficient implementation; false to fall back to VPERM. */
......
...@@ -950,8 +950,15 @@ ...@@ -950,8 +950,15 @@
emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[1], emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[1],
operands[2], operands[3])); operands[2], operands[3]));
else else
emit_insn (gen_altivec_vperm_<mode> (operands[0], operands[2], {
operands[1], operands[3])); /* Avoid the "subtract from splat31" workaround for vperm since
we have changed lvsr to lvsl instead. */
rtx unspec = gen_rtx_UNSPEC (<MODE>mode,
gen_rtvec (3, operands[2],
operands[1], operands[3]),
UNSPEC_VPERM);
emit_move_insn (operands[0], unspec);
}
DONE; DONE;
}) })
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment