[PATCH, AARCH64] Fix unrecognizable insn issue with vcond against 0.0f

gcc/ * config/aarch64/aarch64-simd.md (aarch64_vcond_internal): Fix floating-point vector comparisons against 0. gcc/testsuite/ * gcc.target/aarch64/vect-fcm.x: Add check for zero forms of inverse operands. * gcc.target/aarch64/vect-fcm-eq-d.c: Check that new zero form loop is vectorized. * gcc.target/aarch64/vect-fcm-eq-f.c: Likewise. * gcc.target/aarch64/vect-fcm-ge-d.c: Check that new zero form loop is vectorized and that the correct instruction is generated. * gcc.target/aarch64/vect-fcm-ge-f.c: Likewise. * gcc.target/aarch64/vect-fcm-gt-d.c: Likewise. * gcc.target/aarch64/vect-fcm-gt-f.c: Likewise. From-SVN: r197741

[PATCH, AARCH64] Fix unrecognizable insn issue with vcond against 0.0f
gcc/ * config/aarch64/aarch64-simd.md (aarch64_vcond_internal): Fix floating-point vector comparisons against 0. gcc/testsuite/ * gcc.target/aarch64/vect-fcm.x: Add check for zero forms of inverse operands. * gcc.target/aarch64/vect-fcm-eq-d.c: Check that new zero form loop is vectorized. * gcc.target/aarch64/vect-fcm-eq-f.c: Likewise. * gcc.target/aarch64/vect-fcm-ge-d.c: Check that new zero form loop is vectorized and that the correct instruction is generated. * gcc.target/aarch64/vect-fcm-ge-f.c: Likewise. * gcc.target/aarch64/vect-fcm-gt-d.c: Likewise. * gcc.target/aarch64/vect-fcm-gt-f.c: Likewise. From-SVN: r197741
d07458be · James Greenhalgh · James Greenhalgh · 146b8692 · d07458be · d07458be
Commit d07458be authored Apr 11, 2013 by James Greenhalgh Committed by James Greenhalgh Apr 11, 2013
10 changed files
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
+2013-04-11  James Greenhalgh  <james.greenhalgh@arm.com>
+	* config/aarch64/aarch64-simd.md (aarch64_vcond_internal): Fix
+	floating-point vector comparisons against 0.
 2013-04-11  Jakub Jelinek  <jakub@redhat.com>
 	PR tree-optimization/56899

--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1622,6 +1622,7 @@
  "TARGET_SIMD"
 {
  int inverse = 0;
+  int use_zero_form = 0;
  int swap_bsl_operands = 0;
  rtx mask = gen_reg_rtx (<V_cmp_result>mode);
  rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
@@ -1632,12 +1633,16 @@
  switch (GET_CODE (operands[3]))
    {
    case GE:
+    case GT:
    case LE:
+    case LT:
    case EQ:
-      if (!REG_P (operands[5])
+      if (operands[5] == CONST0_RTX (<MODE>mode))
-	  && (operands[5] != CONST0_RTX (<MODE>mode)))
+	{
-	operands[5] = force_reg (<MODE>mode, operands[5]);
+	  use_zero_form = 1;
-      break;
+	  break;
+	}
+      /* Fall through.  */
    default:
      if (!REG_P (operands[5]))
 	operands[5] = force_reg (<MODE>mode, operands[5]);
@@ -1688,7 +1693,26 @@
 	 a GT b -> a GT b
 	 a LE b -> b GE a
 	 a LT b -> b GT a
-	 a EQ b -> a EQ b  */
+	 a EQ b -> a EQ b
+	 Note that there also exist direct comparison against 0 forms,
+	 so catch those as a special case.  */
+      if (use_zero_form)
+	{
+	  inverse = 0;
+	  switch (GET_CODE (operands[3]))
+	    {
+	    case LT:
+	      base_comparison = gen_aarch64_cmlt<mode>;
+	      break;
+	    case LE:
+	      base_comparison = gen_aarch64_cmle<mode>;
+	      break;
+	    default:
+	      /* Do nothing, other zero form cases already have the correct
+		 base_comparison.  */
+	      break;
+	    }
+	}
      if (!inverse)
 	emit_insn (base_comparison (mask, operands[4], operands[5]));

--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
+2013-04-11  James Greenhalgh  <james.greenhalgh@arm.com>
+	* gcc.target/aarch64/vect-fcm.x: Add check for zero forms of
+	inverse operands.
+	* gcc.target/aarch64/vect-fcm-eq-d.c: Check that new zero form
+	loop is vectorized.
+	* gcc.target/aarch64/vect-fcm-eq-f.c: Likewise.
+	* gcc.target/aarch64/vect-fcm-ge-d.c: Check that new zero form
+	loop is vectorized and that the correct instruction is generated.
+	* gcc.target/aarch64/vect-fcm-ge-f.c: Likewise.
+	* gcc.target/aarch64/vect-fcm-gt-d.c: Likewise.
+	* gcc.target/aarch64/vect-fcm-gt-f.c: Likewise.
 2013-04-11  Jakub Jelinek  <jakub@redhat.com>
 	PR tree-optimization/56899

--- a/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-d.c
+++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-d.c
@@ -7,7 +7,7 @@
 #include "vect-fcm.x"
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
 /* { dg-final { scan-assembler "fcmeq\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
 /* { dg-final { scan-assembler "fcmeq\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */

--- a/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-f.c
+++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-f.c
@@ -7,7 +7,7 @@
 #include "vect-fcm.x"
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
 /* { dg-final { scan-assembler "fcmeq\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" } } */
 /* { dg-final { scan-assembler "fcmeq\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */

--- a/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-d.c
+++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-d.c
@@ -7,8 +7,9 @@
 #include "vect-fcm.x"
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
 /* { dg-final { scan-assembler "fcmge\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
 /* { dg-final { scan-assembler "fcmge\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */
+/* { dg-final { scan-assembler "fcmlt\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-f.c
+++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-f.c
@@ -7,8 +7,9 @@
 #include "vect-fcm.x"
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
 /* { dg-final { scan-assembler "fcmge\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" } } */
 /* { dg-final { scan-assembler "fcmge\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */
+/* { dg-final { scan-assembler "fcmlt\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-d.c
+++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-d.c
@@ -7,8 +7,9 @@
 #include "vect-fcm.x"
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
 /* { dg-final { scan-assembler "fcmgt\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
 /* { dg-final { scan-assembler "fcmgt\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */
+/* { dg-final { scan-assembler "fcmle\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-f.c
+++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-f.c
@@ -7,8 +7,9 @@
 #include "vect-fcm.x"
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
 /* { dg-final { scan-assembler "fcmgt\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" } } */
 /* { dg-final { scan-assembler "fcmgt\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */
+/* { dg-final { scan-assembler "fcmle\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */
 /* { dg-final { cleanup-tree-dump "vect" } } */
 /* { dg-final { cleanup-saved-temps } } */
--- a/gcc/testsuite/gcc.target/aarch64/vect-fcm.x
+++ b/gcc/testsuite/gcc.target/aarch64/vect-fcm.x
@@ -40,6 +40,15 @@ foobar (FTYPE *in1, FTYPE *in2, FTYPE *output)
    output[i] = (in1[i] OP 0.0) ? 4.0 : 2.0;
 }
+void
+foobarbar (FTYPE *in1, FTYPE *in2, FTYPE *output)
+{
+  int i = 0;
+  /* Vectorizable.  */
+  for (i = 0; i < N; i++)
+    output[i] = (in1[i] INV_OP 0.0) ? 4.0 : 2.0;
+}
 int
 main (int argc, char **argv)
 {
@@ -51,6 +60,11 @@ main (int argc, char **argv)
  for (i = 0; i < N; i++)
    if (out1[i] != out2[i])
      abort ();
+  foobar (input1, input2, out1);
+  foobarbar (input1, input2, out2);
+  for (i = 0; i < N; i++)
+    if (out1[i] == out2[i])
+      abort ();
  return 0;
 }