Commit 70c67693 by James Greenhalgh Committed by James Greenhalgh

[AArch64] Fix unordered comparisons to floating-point vcond.

gcc/
	* config/aarch64/aarch64-simd.md
	(aarch64_vcond_internal<mode>): Handle unordered cases.
	* config/aarch64/iterators.md (v_cmp_result): New.

gcc/testsuite/
	* gcc.target/aarch64/vect-fcm-gt-f.c: Change expected output.
	* gcc.target/aarch64/vect-fcm-gt-d.c: Likewise.
	* gcc.target/aarch64/vect-fcm-ge-f.c: Likewise.
	* gcc.target/aarch64/vect-fcm-ge-d.c: Likewise.
	* gcc.target/aarch64/vect-fcm-eq-f.c: Likewise.

From-SVN: r195297
parent f5ce60b6
2013-01-18 James Greenhalgh <james.greenhalgh@arm.com>
* config/aarch64/aarch64-simd.md
(aarch64_vcond_internal<mode>): Handle unordered cases.
* config/aarch64/iterators.md (v_cmp_result): New.
2013-01-18 Yi-Hsiu Hsu <ahsu@marvell.com>
Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
......
......@@ -1586,37 +1586,128 @@
"TARGET_SIMD"
{
int inverse = 0;
int swap_bsl_operands = 0;
rtx mask = gen_reg_rtx (<V_cmp_result>mode);
rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
if (!REG_P (operands[5])
&& (operands[5] != CONST0_RTX (<MODE>mode)))
operands[5] = force_reg (<MODE>mode, operands[5]);
rtx (*base_comparison) (rtx, rtx, rtx);
rtx (*complimentary_comparison) (rtx, rtx, rtx);
switch (GET_CODE (operands[3]))
{
case GE:
case LE:
case EQ:
if (!REG_P (operands[5])
&& (operands[5] != CONST0_RTX (<MODE>mode)))
operands[5] = force_reg (<MODE>mode, operands[5]);
break;
default:
if (!REG_P (operands[5]))
operands[5] = force_reg (<MODE>mode, operands[5]);
}
switch (GET_CODE (operands[3]))
{
case LT:
case UNLT:
inverse = 1;
/* Fall through. */
case GE:
emit_insn (gen_aarch64_cmge<mode> (mask, operands[4], operands[5]));
case UNGE:
case ORDERED:
case UNORDERED:
base_comparison = gen_aarch64_cmge<mode>;
complimentary_comparison = gen_aarch64_cmgt<mode>;
break;
case LE:
case UNLE:
inverse = 1;
/* Fall through. */
case GT:
emit_insn (gen_aarch64_cmgt<mode> (mask, operands[4], operands[5]));
case UNGT:
base_comparison = gen_aarch64_cmgt<mode>;
complimentary_comparison = gen_aarch64_cmge<mode>;
break;
case EQ:
case NE:
inverse = 1;
/* Fall through. */
case UNEQ:
base_comparison = gen_aarch64_cmeq<mode>;
complimentary_comparison = gen_aarch64_cmeq<mode>;
break;
default:
gcc_unreachable ();
}
switch (GET_CODE (operands[3]))
{
case LT:
case LE:
case GT:
case GE:
case EQ:
emit_insn (gen_aarch64_cmeq<mode> (mask, operands[4], operands[5]));
/* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
a GE b -> a GE b
a GT b -> a GT b
a LE b -> b GE a
a LT b -> b GT a
a EQ b -> a EQ b */
if (!inverse)
emit_insn (base_comparison (mask, operands[4], operands[5]));
else
emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
break;
case UNLT:
case UNLE:
case UNGT:
case UNGE:
case NE:
/* FCM returns false for lanes which are unordered, so if we use
the inverse of the comparison we actually want to emit, then
swap the operands to BSL, we will end up with the correct result.
Note that a NE NaN and NaN NE b are true for all a, b.
Our transformations are:
a GE b -> !(b GT a)
a GT b -> !(b GE a)
a LE b -> !(a GT b)
a LT b -> !(a GE b)
a NE b -> !(a EQ b) */
if (inverse)
emit_insn (base_comparison (mask, operands[4], operands[5]));
else
emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
swap_bsl_operands = 1;
break;
case UNEQ:
/* We check (a > b || b > a). combining these comparisons give us
true iff !(a != b && a ORDERED b), swapping the operands to BSL
will then give us (a == b || a UNORDERED b) as intended. */
emit_insn (gen_aarch64_cmgt<mode> (mask, operands[4], operands[5]));
emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[5], operands[4]));
emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
swap_bsl_operands = 1;
break;
case UNORDERED:
/* Operands are ORDERED iff (a > b || b >= a).
Swapping the operands to BSL will give the UNORDERED case. */
swap_bsl_operands = 1;
/* Fall through. */
case ORDERED:
emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[4], operands[5]));
emit_insn (gen_aarch64_cmge<mode> (mask, operands[5], operands[4]));
emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
break;
default:
gcc_unreachable ();
}
if (inverse)
if (swap_bsl_operands)
emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], mask, operands[2],
operands[1]));
else
......
......@@ -430,6 +430,14 @@
(V2SF "V2SI") (V4SF "V4SI")
(V2DF "V2DI")])
;; Lower case mode of results of comparison operations.
(define_mode_attr v_cmp_result [(V8QI "v8qi") (V16QI "v16qi")
(V4HI "v4hi") (V8HI "v8hi")
(V2SI "v2si") (V4SI "v4si")
(DI "di") (V2DI "v2di")
(V2SF "v2si") (V4SF "v4si")
(V2DF "v2di")])
;; Vm for lane instructions is restricted to FP_LO_REGS.
(define_mode_attr vwx [(V4HI "x") (V8HI "x") (HI "x")
(V2SI "w") (V4SI "w") (SI "w")])
......
2013-01-18 James Greenhalgh <james.greenhalgh@arm.com>
* gcc.target/aarch64/vect-fcm-gt-f.c: Change expected output.
* gcc.target/aarch64/vect-fcm-gt-d.c: Likewise.
* gcc.target/aarch64/vect-fcm-ge-f.c: Likewise.
* gcc.target/aarch64/vect-fcm-ge-d.c: Likewise.
* gcc.target/aarch64/vect-fcm-eq-f.c: Likewise.
2013-01-17 Jeff Law <law@redhat.com>
* gcc.dg/pr52573.c: Move to...
......
......@@ -8,7 +8,7 @@
#include "vect-fcm.x"
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */
/* { dg-final { scan-assembler-times "fcmeq\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" 2 } } */
/* { dg-final { scan-assembler-times "fcmeq\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" 1 } } */
/* { dg-final { scan-assembler "fcmeq\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
/* { dg-final { scan-assembler "fcmeq\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-final { cleanup-saved-temps } } */
......@@ -8,7 +8,7 @@
#include "vect-fcm.x"
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */
/* { dg-final { scan-assembler-times "fcmeq\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" 2 } } */
/* { dg-final { scan-assembler-times "fcmeq\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" 1 } } */
/* { dg-final { scan-assembler "fcmeq\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" } } */
/* { dg-final { scan-assembler "fcmeq\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-final { cleanup-saved-temps } } */
......@@ -8,7 +8,7 @@
#include "vect-fcm.x"
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */
/* { dg-final { scan-assembler-times "fcmge\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" 2 } } */
/* { dg-final { scan-assembler-times "fcmge\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" 1 } } */
/* { dg-final { scan-assembler "fcmge\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
/* { dg-final { scan-assembler "fcmge\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-final { cleanup-saved-temps } } */
......@@ -8,7 +8,7 @@
#include "vect-fcm.x"
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */
/* { dg-final { scan-assembler-times "fcmge\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" 2 } } */
/* { dg-final { scan-assembler-times "fcmge\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" 1 } } */
/* { dg-final { scan-assembler "fcmge\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" } } */
/* { dg-final { scan-assembler "fcmge\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-final { cleanup-saved-temps } } */
......@@ -8,7 +8,7 @@
#include "vect-fcm.x"
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */
/* { dg-final { scan-assembler-times "fcmgt\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" 2 } } */
/* { dg-final { scan-assembler-times "fcmgt\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" 1 } } */
/* { dg-final { scan-assembler "fcmgt\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
/* { dg-final { scan-assembler "fcmgt\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-final { cleanup-saved-temps } } */
......@@ -8,7 +8,7 @@
#include "vect-fcm.x"
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */
/* { dg-final { scan-assembler-times "fcmgt\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" 2 } } */
/* { dg-final { scan-assembler-times "fcmgt\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" 1 } } */
/* { dg-final { scan-assembler "fcmgt\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" } } */
/* { dg-final { scan-assembler "fcmgt\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-final { cleanup-saved-temps } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment