Commit 062468db by Robin Dapp Committed by Robin Dapp

S/390: Implement vector copysign.

This patch implements the vector copysign operation using vector select and a
signbit mask.

gcc/ChangeLog:

2019-02-08  Robin Dapp  <rdapp@linux.ibm.com>

	* config/s390/vector.md: Implement vector copysign.

gcc/testsuite/ChangeLog:

2019-02-08  Robin Dapp  <rdapp@linux.ibm.com>

	* gcc.target/s390/vector/vec-copysign-execute.c: New test.
	* gcc.target/s390/vector/vec-copysign.c: New test.

From-SVN: r268697
parent 94e35e0b
2019-02-08 Robin Dapp <rdapp@linux.ibm.com>
* config/s390/vector.md: Implement vector copysign.
2019-02-08 H.J. Lu <hongjiu.lu@intel.com>
* expr.c (expand_constructor): Correct indentations.
......
......@@ -1362,6 +1362,31 @@
operands[4] = CONST0_RTX (V2DImode);
})
; Vector copysign, implement using vector select
(define_expand "copysign<mode>3"
[(set (match_operand:VFT 0 "register_operand" "")
(if_then_else:VFT
(eq (match_dup 3)
(match_dup 4))
(match_operand:VFT 1 "register_operand" "")
(match_operand:VFT 2 "register_operand" "")))]
"TARGET_VX"
{
int sz = GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode));
int prec = GET_MODE_PRECISION (GET_MODE_INNER (<tointvec>mode));
wide_int mask_val = wi::shwi (1l << (sz - 1), prec);
rtx mask = gen_reg_rtx (<tointvec>mode);
int nunits = GET_MODE_NUNITS (<tointvec>mode);
rtvec v = rtvec_alloc (nunits);
for (int i = 0; i < nunits; i++)
RTVEC_ELT (v, i) = GEN_INT (mask_val.to_shwi ());
mask = gen_rtx_CONST_VECTOR (<tointvec>mode, v);
operands[3] = force_reg (<tointvec>mode, mask);
operands[4] = CONST0_RTX (<tointvec>mode);
})
;;
;; Integer compares
......
2019-02-08 Robin Dapp <rdapp@linux.ibm.com>
* gcc.target/s390/vector/vec-copysign-execute.c: New test.
* gcc.target/s390/vector/vec-copysign.c: New test.
2019-02-08 Richard Biener <rguenther@suse.de>
PR tree-optimization/89247
......
/* { dg-do run } */
/* { dg-options "-O2 -ftree-vectorize -mzarch -march=z13" } */
#include <math.h>
#include <assert.h>
#define N 20
double a[N] = {-0.1, -3.2, -6.3, -9.4, -12.5, -15.6, -18.7, -21.8, 24.9,
27.1, 30.2, 33.3, 36.4, 39.5, 42.6, nan("123"), __DBL_MIN__ / 2.0,
-nan ("1"), __DBL_MAX__ * 2.0, -__DBL_MAX__ * 1e199};
double b[N] = {-1.2, 3.4, -5.6, 7.8, -9.0, 1.0, -2.0, 3.0, -4.0, -5.0, 6.0,
7.0, -8.0, -9.0, 10.0, -11.0, -1., 0., -0., 1.3};
double r[N];
double r2[N];
void
foo (void)
{
for (int i = 0; i < N; i++)
r[i] = copysign (a[i], b[i]);
}
__attribute__((optimize("no-tree-vectorize")))
void
check (void)
{
for (int i = 0; i < N; i++)
{
r2[i] = copysign (a[i], b[i]);
assert (r[i] == r2[i]
|| (isnan (r[i]) && isnan (r2[i])
&& signbit (r[i]) == signbit (r2[i])));
}
}
float af[N] = {-0.1, -3.2, -6.3, -9.4, -12.5, -15.6, -18.7, -21.8, 24.9,
27.1, 30.2, 33.3, 36.4, 39.5, 42.6, nan("123"), __DBL_MIN__ / 2.0,
-nan ("1"), __DBL_MAX__ * 2.0, -__DBL_MAX__ * 1e199};
float bf[N] = {-1.2, 3.4, -5.6, 7.8, -9.0, 1.0, -2.0, 3.0, -4.0, -5.0, 6.0,
7.0, -8.0, -9.0, 10.0, -11.0, -1., 0., -0., 1.3};
float rf[N];
float rf2[N];
__attribute__ ((__target__ ("arch=z14")))
void
foof (void)
{
for (int i = 0; i < N; i++)
rf[i] = copysignf (af[i], bf[i]);
}
__attribute__((optimize("no-tree-vectorize")))
void
checkf (void)
{
for (int i = 0; i < N; i++)
{
rf2[i] = copysignf (af[i], bf[i]);
assert (rf[i] == rf2[i]
|| (isnan (rf[i]) && isnan (rf2[i])
&& signbit (rf[i]) == signbit (rf2[i])));
}
}
int main()
{
foo ();
check ();
foof ();
checkf ();
return r[0];
}
/* { dg-do compile { target { s390*-*-* } } } */
/* { dg-options "-O2 -ftree-vectorize -mzarch" } */
/* { dg-final { scan-assembler-times "vgmg" 1 } } */
/* { dg-final { scan-assembler-times "vgmf" 1 } } */
/* { dg-final { scan-assembler-times "vsel" 2 } } */
#include <math.h>
#define N 20
double a[N] = {-0.1, -3.2, -6.3, -9.4, -12.5, -15.6, -18.7, -21.8, 24.9,
27.1, 30.2, 33.3, 36.4, 39.5, 42.6, nan ("123"), __DBL_MIN__ / 2.0,
-nan ("1"), __DBL_MAX__ * 2.0, -__DBL_MAX__ * 1e199};
double b[N] = {-1.2, 3.4, -5.6, 7.8, -9.0, 1.0, -2.0, 3.0, -4.0, -5.0, 6.0,
7.0, -8.0, -9.0, 10.0, -11.0, -1., 0., -0., 1.3};
double r[N];
float af[N] = {-0.1, -3.2, -6.3, -9.4, -12.5, -15.6, -18.7, -21.8, 24.9,
27.1, 30.2, 33.3, 36.4, 39.5, 42.6, nan ("123"), __DBL_MIN__ / 2.0,
-nan ("1"), __DBL_MAX__ * 2.0, -__DBL_MAX__ * 1e199};
float bf[N] = {-1.2, 3.4, -5.6, 7.8, -9.0, 1.0, -2.0, 3.0, -4.0, -5.0, 6.0,
7.0, -8.0, -9.0, 10.0, -11.0, -1., 0., -0., 1.3};
float rf[N];
__attribute__ ((__target__ ("arch=z13")))
void
foo (void)
{
for (int i = 0; i < N; i++)
r[i] = copysign (a[i], b[i]);
}
__attribute__ ((__target__ ("arch=z14")))
void
foof (void)
{
for (int i = 0; i < N; i++)
rf[i] = copysignf (af[i], bf[i]);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment