Commit d958ae79 by Robin Dapp Committed by Andreas Krebbel

S/390: Simplify vector conditionals

gcc/ChangeLog:

2015-12-18  Robin Dapp  <rdapp@linux.vnet.ibm.com>

	* config/s390/s390.c (s390_expand_vcond): Convert vector
	conditional into shift.
	* config/s390/vector.md: Change operand predicate.

gcc/testsuite/ChangeLog:

2015-12-18  Robin Dapp  <rdapp@linux.vnet.ibm.com>

	* gcc.target/s390/vcond-shift.c: New test to check vcond
	simplification.

From-SVN: r231808
parent 33247762
2015-12-18 Robin Dapp <rdapp@linux.vnet.ibm.com>
* config/s390/s390.c (s390_expand_vcond): Convert vector
conditional into shift.
* config/s390/vector.md: Change operand predicate.
2015-12-18 Andreas Krebbel <krebbel@linux.vnet.ibm.com>
* config/s390/driver-native.c (s390_host_detect_local_cpu): Pick
......@@ -6139,19 +6139,60 @@ s390_expand_vcond (rtx target, rtx then, rtx els,
machine_mode result_mode;
rtx result_target;
machine_mode target_mode = GET_MODE (target);
machine_mode cmp_mode = GET_MODE (cmp_op1);
rtx op = (cond == LT) ? els : then;
/* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
and x < 0 ? 1 : 0 into (unsigned) x >> 31. Likewise
for short and byte (x >> 15 and x >> 7 respectively). */
if ((cond == LT || cond == GE)
&& target_mode == cmp_mode
&& cmp_op2 == CONST0_RTX (cmp_mode)
&& op == CONST0_RTX (target_mode)
&& s390_vector_mode_supported_p (target_mode)
&& GET_MODE_CLASS (target_mode) == MODE_VECTOR_INT)
{
rtx negop = (cond == LT) ? then : els;
int shift = GET_MODE_BITSIZE (GET_MODE_INNER (target_mode)) - 1;
/* if x < 0 ? 1 : 0 or if x >= 0 ? 0 : 1 */
if (negop == CONST1_RTX (target_mode))
{
rtx res = expand_simple_binop (cmp_mode, LSHIFTRT, cmp_op1,
GEN_INT (shift), target,
1, OPTAB_DIRECT);
if (res != target)
emit_move_insn (target, res);
return;
}
/* if x < 0 ? -1 : 0 or if x >= 0 ? 0 : -1 */
else if (constm1_operand (negop, target_mode))
{
rtx res = expand_simple_binop (cmp_mode, ASHIFTRT, cmp_op1,
GEN_INT (shift), target,
0, OPTAB_DIRECT);
if (res != target)
emit_move_insn (target, res);
return;
}
}
/* We always use an integral type vector to hold the comparison
result. */
result_mode = GET_MODE (cmp_op1) == V2DFmode ? V2DImode : GET_MODE (cmp_op1);
result_mode = cmp_mode == V2DFmode ? V2DImode : cmp_mode;
result_target = gen_reg_rtx (result_mode);
/* Alternatively this could be done by reload by lowering the cmp*
predicates. But it appears to be better for scheduling etc. to
have that in early. */
/* We allow vector immediates as comparison operands that
can be handled by the optimization above but not by the
following code. Hence, force them into registers here. */
if (!REG_P (cmp_op1))
cmp_op1 = force_reg (GET_MODE (target), cmp_op1);
cmp_op1 = force_reg (target_mode, cmp_op1);
if (!REG_P (cmp_op2))
cmp_op2 = force_reg (GET_MODE (target), cmp_op2);
cmp_op2 = force_reg (target_mode, cmp_op2);
s390_expand_vec_compare (result_target, cond,
cmp_op1, cmp_op2);
......@@ -6161,7 +6202,7 @@ s390_expand_vcond (rtx target, rtx then, rtx els,
if (constm1_operand (then, GET_MODE (then))
&& const0_operand (els, GET_MODE (els)))
{
emit_move_insn (target, gen_rtx_SUBREG (GET_MODE (target),
emit_move_insn (target, gen_rtx_SUBREG (target_mode,
result_target, 0));
return;
}
......@@ -6170,10 +6211,10 @@ s390_expand_vcond (rtx target, rtx then, rtx els,
/* This gets triggered e.g.
with gcc.c-torture/compile/pr53410-1.c */
if (!REG_P (then))
then = force_reg (GET_MODE (target), then);
then = force_reg (target_mode, then);
if (!REG_P (els))
els = force_reg (GET_MODE (target), els);
els = force_reg (target_mode, els);
tmp = gen_rtx_fmt_ee (EQ, VOIDmode,
result_target,
......@@ -6181,9 +6222,9 @@ s390_expand_vcond (rtx target, rtx then, rtx els,
/* We compared the result against zero above so we have to swap then
and els here. */
tmp = gen_rtx_IF_THEN_ELSE (GET_MODE (target), tmp, els, then);
tmp = gen_rtx_IF_THEN_ELSE (target_mode, tmp, els, then);
gcc_assert (GET_MODE (target) == GET_MODE (then));
gcc_assert (target_mode == GET_MODE (then));
emit_insn (gen_rtx_SET (target, tmp));
}
......
......@@ -403,7 +403,7 @@
(if_then_else:V_HW
(match_operator 3 "comparison_operator"
[(match_operand:V_HW2 4 "register_operand" "")
(match_operand:V_HW2 5 "register_operand" "")])
(match_operand:V_HW2 5 "nonmemory_operand" "")])
(match_operand:V_HW 1 "nonmemory_operand" "")
(match_operand:V_HW 2 "nonmemory_operand" "")))]
"TARGET_VX && GET_MODE_NUNITS (<V_HW:MODE>mode) == GET_MODE_NUNITS (<V_HW2:MODE>mode)"
......@@ -418,7 +418,7 @@
(if_then_else:V_HW
(match_operator 3 "comparison_operator"
[(match_operand:V_HW2 4 "register_operand" "")
(match_operand:V_HW2 5 "register_operand" "")])
(match_operand:V_HW2 5 "nonmemory_operand" "")])
(match_operand:V_HW 1 "nonmemory_operand" "")
(match_operand:V_HW 2 "nonmemory_operand" "")))]
"TARGET_VX && GET_MODE_NUNITS (<V_HW:MODE>mode) == GET_MODE_NUNITS (<V_HW2:MODE>mode)"
......
2015-12-18 Robin Dapp <rdapp@linux.vnet.ibm.com>
* gcc.target/s390/vcond-shift.c: New test to check vcond
simplification.
2015-12-18 Paul Thomas <pault@gcc.gnu.org>
PR fortran/68196
......
/* Check if conditional vector instructions are simplified
into shift operations. */
/* { dg-do compile { target { s390*-*-* } } } */
/* { dg-options "-O3 -march=z13 -mzarch" } */
/* { dg-final { scan-assembler "vesraf\t%v.?,%v.?,31" } } */
/* { dg-final { scan-assembler "vesrah\t%v.?,%v.?,15" } } */
/* { dg-final { scan-assembler "vesrab\t%v.?,%v.?,7" } } */
/* { dg-final { scan-assembler-not "vzero\t*" } } */
/* { dg-final { scan-assembler "vesrlf\t%v.?,%v.?,31" } } */
/* { dg-final { scan-assembler "vesrlh\t%v.?,%v.?,15" } } */
/* { dg-final { scan-assembler "vesrlb\t%v.?,%v.?,7" } } */
#define SZ 4
#define SZ2 8
#define SZ3 16
void foo(int *w)
{
int i;
/* Should expand to (w + (w < 0 ? 1 : 0)) >> 1
which in turn should get simplified to (w + (w >> 31)) >> 1. */
for (i = 0; i < SZ; i++)
w[i] = w[i] / 2;
}
void foo2(short *w)
{
int i;
for (i = 0; i < SZ2; i++)
w[i] = w[i] / 2;
}
void foo3(signed char *w)
{
int i;
for (i = 0; i < SZ3; i++)
w[i] = w[i] / 2;
}
int baz(int *x)
{
int i;
for (i = 0; i < SZ; i++)
x[i] = x[i] < 0 ? -1 : 0;
}
int baf(short *x)
{
int i;
for (i = 0; i < SZ2; i++)
x[i] = x[i] >= 0 ? 0 : 1;
}
int bal(signed char *x)
{
int i;
for (i = 0; i < SZ3; i++)
x[i] = x[i] >= 0 ? 0 : -1;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment