Commit d07458be by James Greenhalgh Committed by James Greenhalgh

[PATCH, AARCH64] Fix unrecognizable insn issue with vcond against 0.0f

gcc/
	* config/aarch64/aarch64-simd.md (aarch64_vcond_internal): Fix
	floating-point vector comparisons against 0.

gcc/testsuite/
   	* gcc.target/aarch64/vect-fcm.x: Add check for zero forms of
	inverse operands.
	* gcc.target/aarch64/vect-fcm-eq-d.c: Check that new zero form
	loop is vectorized.
   	* gcc.target/aarch64/vect-fcm-eq-f.c: Likewise.
   	* gcc.target/aarch64/vect-fcm-ge-d.c: Check that new zero form
	loop is vectorized and that the correct instruction is generated.
   	* gcc.target/aarch64/vect-fcm-ge-f.c: Likewise.
   	* gcc.target/aarch64/vect-fcm-gt-d.c: Likewise.
   	* gcc.target/aarch64/vect-fcm-gt-f.c: Likewise.

From-SVN: r197741
parent 146b8692
2013-04-11 James Greenhalgh <james.greenhalgh@arm.com>
* config/aarch64/aarch64-simd.md (aarch64_vcond_internal): Fix
floating-point vector comparisons against 0.
2013-04-11 Jakub Jelinek <jakub@redhat.com> 2013-04-11 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/56899 PR tree-optimization/56899
......
...@@ -1622,6 +1622,7 @@ ...@@ -1622,6 +1622,7 @@
"TARGET_SIMD" "TARGET_SIMD"
{ {
int inverse = 0; int inverse = 0;
int use_zero_form = 0;
int swap_bsl_operands = 0; int swap_bsl_operands = 0;
rtx mask = gen_reg_rtx (<V_cmp_result>mode); rtx mask = gen_reg_rtx (<V_cmp_result>mode);
rtx tmp = gen_reg_rtx (<V_cmp_result>mode); rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
...@@ -1632,12 +1633,16 @@ ...@@ -1632,12 +1633,16 @@
switch (GET_CODE (operands[3])) switch (GET_CODE (operands[3]))
{ {
case GE: case GE:
case GT:
case LE: case LE:
case LT:
case EQ: case EQ:
if (!REG_P (operands[5]) if (operands[5] == CONST0_RTX (<MODE>mode))
&& (operands[5] != CONST0_RTX (<MODE>mode))) {
operands[5] = force_reg (<MODE>mode, operands[5]); use_zero_form = 1;
break; break;
}
/* Fall through. */
default: default:
if (!REG_P (operands[5])) if (!REG_P (operands[5]))
operands[5] = force_reg (<MODE>mode, operands[5]); operands[5] = force_reg (<MODE>mode, operands[5]);
...@@ -1688,7 +1693,26 @@ ...@@ -1688,7 +1693,26 @@
a GT b -> a GT b a GT b -> a GT b
a LE b -> b GE a a LE b -> b GE a
a LT b -> b GT a a LT b -> b GT a
a EQ b -> a EQ b */ a EQ b -> a EQ b
Note that there also exist direct comparison against 0 forms,
so catch those as a special case. */
if (use_zero_form)
{
inverse = 0;
switch (GET_CODE (operands[3]))
{
case LT:
base_comparison = gen_aarch64_cmlt<mode>;
break;
case LE:
base_comparison = gen_aarch64_cmle<mode>;
break;
default:
/* Do nothing, other zero form cases already have the correct
base_comparison. */
break;
}
}
if (!inverse) if (!inverse)
emit_insn (base_comparison (mask, operands[4], operands[5])); emit_insn (base_comparison (mask, operands[4], operands[5]));
......
2013-04-11 James Greenhalgh <james.greenhalgh@arm.com>
* gcc.target/aarch64/vect-fcm.x: Add check for zero forms of
inverse operands.
* gcc.target/aarch64/vect-fcm-eq-d.c: Check that new zero form
loop is vectorized.
* gcc.target/aarch64/vect-fcm-eq-f.c: Likewise.
* gcc.target/aarch64/vect-fcm-ge-d.c: Check that new zero form
loop is vectorized and that the correct instruction is generated.
* gcc.target/aarch64/vect-fcm-ge-f.c: Likewise.
* gcc.target/aarch64/vect-fcm-gt-d.c: Likewise.
* gcc.target/aarch64/vect-fcm-gt-f.c: Likewise.
2013-04-11 Jakub Jelinek <jakub@redhat.com> 2013-04-11 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/56899 PR tree-optimization/56899
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
#include "vect-fcm.x" #include "vect-fcm.x"
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
/* { dg-final { scan-assembler "fcmeq\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */ /* { dg-final { scan-assembler "fcmeq\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
/* { dg-final { scan-assembler "fcmeq\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */ /* { dg-final { scan-assembler "fcmeq\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
#include "vect-fcm.x" #include "vect-fcm.x"
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
/* { dg-final { scan-assembler "fcmeq\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" } } */ /* { dg-final { scan-assembler "fcmeq\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" } } */
/* { dg-final { scan-assembler "fcmeq\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */ /* { dg-final { scan-assembler "fcmeq\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */
......
...@@ -7,8 +7,9 @@ ...@@ -7,8 +7,9 @@
#include "vect-fcm.x" #include "vect-fcm.x"
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
/* { dg-final { scan-assembler "fcmge\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */ /* { dg-final { scan-assembler "fcmge\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
/* { dg-final { scan-assembler "fcmge\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */ /* { dg-final { scan-assembler "fcmge\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */
/* { dg-final { scan-assembler "fcmlt\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-final { cleanup-saved-temps } } */ /* { dg-final { cleanup-saved-temps } } */
...@@ -7,8 +7,9 @@ ...@@ -7,8 +7,9 @@
#include "vect-fcm.x" #include "vect-fcm.x"
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
/* { dg-final { scan-assembler "fcmge\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" } } */ /* { dg-final { scan-assembler "fcmge\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" } } */
/* { dg-final { scan-assembler "fcmge\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */ /* { dg-final { scan-assembler "fcmge\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */
/* { dg-final { scan-assembler "fcmlt\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-final { cleanup-saved-temps } } */ /* { dg-final { cleanup-saved-temps } } */
...@@ -7,8 +7,9 @@ ...@@ -7,8 +7,9 @@
#include "vect-fcm.x" #include "vect-fcm.x"
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
/* { dg-final { scan-assembler "fcmgt\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */ /* { dg-final { scan-assembler "fcmgt\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" } } */
/* { dg-final { scan-assembler "fcmgt\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */ /* { dg-final { scan-assembler "fcmgt\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */
/* { dg-final { scan-assembler "fcmle\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-final { cleanup-saved-temps } } */ /* { dg-final { cleanup-saved-temps } } */
...@@ -7,8 +7,9 @@ ...@@ -7,8 +7,9 @@
#include "vect-fcm.x" #include "vect-fcm.x"
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
/* { dg-final { scan-assembler "fcmgt\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" } } */ /* { dg-final { scan-assembler "fcmgt\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" } } */
/* { dg-final { scan-assembler "fcmgt\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */ /* { dg-final { scan-assembler "fcmgt\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */
/* { dg-final { scan-assembler "fcmle\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-final { cleanup-saved-temps } } */ /* { dg-final { cleanup-saved-temps } } */
...@@ -40,6 +40,15 @@ foobar (FTYPE *in1, FTYPE *in2, FTYPE *output) ...@@ -40,6 +40,15 @@ foobar (FTYPE *in1, FTYPE *in2, FTYPE *output)
output[i] = (in1[i] OP 0.0) ? 4.0 : 2.0; output[i] = (in1[i] OP 0.0) ? 4.0 : 2.0;
} }
void
foobarbar (FTYPE *in1, FTYPE *in2, FTYPE *output)
{
int i = 0;
/* Vectorizable. */
for (i = 0; i < N; i++)
output[i] = (in1[i] INV_OP 0.0) ? 4.0 : 2.0;
}
int int
main (int argc, char **argv) main (int argc, char **argv)
{ {
...@@ -51,6 +60,11 @@ main (int argc, char **argv) ...@@ -51,6 +60,11 @@ main (int argc, char **argv)
for (i = 0; i < N; i++) for (i = 0; i < N; i++)
if (out1[i] != out2[i]) if (out1[i] != out2[i])
abort (); abort ();
foobar (input1, input2, out1);
foobarbar (input1, input2, out2);
for (i = 0; i < N; i++)
if (out1[i] == out2[i])
abort ();
return 0; return 0;
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment