Commit 6c553b76 by Bin Cheng

iterators.md (V_cmp_mixed, [...]): New.

	* config/aarch64/iterators.md (V_cmp_mixed, v_cmp_mixed): New.
	* config/aarch64/aarch64-simd.md (<su><maxmin>v2di3): Call
	gen_vcondv2div2di instead of gen_aarch64_vcond_internalv2div2di.
	(aarch64_vcond_internal<mode><mode>): Delete pattern.
	(aarch64_vcond_internal<VDQF_COND:mode><VDQF:mode>): Ditto.
	(vcond<v_cmp_result><mode>): Ditto.
	(vcond<mode><mode>): Re-implement using vec_cmp and vcond_mask.
	(vcondu<mode><mode>): Ditto.
	(vcond<v_cmp_mixed><mode>): New pattern.
	(vcondu<mode><v_cmp_mixed>): New pattern.
	(aarch64_cmtst<mode>): Revise comment using aarch64_vcond instead
	of aarch64_vcond_internal.

	gcc/testsuite
	* gcc.target/aarch64/simd/vcond-ne.c: New test.

From-SVN: r239328
parent 45d569f3
...@@ -2,6 +2,23 @@ ...@@ -2,6 +2,23 @@
Renlin Li <renlin.li@arm.com> Renlin Li <renlin.li@arm.com>
Bin Cheng <bin.cheng@arm.com> Bin Cheng <bin.cheng@arm.com>
* config/aarch64/iterators.md (V_cmp_mixed, v_cmp_mixed): New.
* config/aarch64/aarch64-simd.md (<su><maxmin>v2di3): Call
gen_vcondv2div2di instead of gen_aarch64_vcond_internalv2div2di.
(aarch64_vcond_internal<mode><mode>): Delete pattern.
(aarch64_vcond_internal<VDQF_COND:mode><VDQF:mode>): Ditto.
(vcond<v_cmp_result><mode>): Ditto.
(vcond<mode><mode>): Re-implement using vec_cmp and vcond_mask.
(vcondu<mode><mode>): Ditto.
(vcond<v_cmp_mixed><mode>): New pattern.
(vcondu<mode><v_cmp_mixed>): New pattern.
(aarch64_cmtst<mode>): Revise comment using aarch64_vcond instead
of aarch64_vcond_internal.
2016-08-10 Alan Lawrence <alan.lawrence@arm.com>
Renlin Li <renlin.li@arm.com>
Bin Cheng <bin.cheng@arm.com>
* config/aarch64/aarch64-simd.md (vec_cmp<mode><mode>): New pattern. * config/aarch64/aarch64-simd.md (vec_cmp<mode><mode>): New pattern.
(vec_cmp<mode><v_cmp_result>): New pattern. (vec_cmp<mode><v_cmp_result>): New pattern.
(vec_cmpu<mode><mode>): New pattern. (vec_cmpu<mode><mode>): New pattern.
......
...@@ -1087,7 +1087,7 @@ ...@@ -1087,7 +1087,7 @@
} }
cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]); cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
emit_insn (gen_aarch64_vcond_internalv2div2di (operands[0], operands[1], emit_insn (gen_vcondv2div2di (operands[0], operands[1],
operands[2], cmp_fmt, operands[1], operands[2])); operands[2], cmp_fmt, operands[1], operands[2]));
DONE; DONE;
}) })
...@@ -2560,314 +2560,6 @@ ...@@ -2560,314 +2560,6 @@
DONE; DONE;
}) })
(define_expand "aarch64_vcond_internal<mode><mode>"
[(set (match_operand:VSDQ_I_DI 0 "register_operand")
(if_then_else:VSDQ_I_DI
(match_operator 3 "comparison_operator"
[(match_operand:VSDQ_I_DI 4 "register_operand")
(match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
(match_operand:VSDQ_I_DI 1 "nonmemory_operand")
(match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
"TARGET_SIMD"
{
rtx op1 = operands[1];
rtx op2 = operands[2];
rtx mask = gen_reg_rtx (<MODE>mode);
enum rtx_code code = GET_CODE (operands[3]);
/* Switching OP1 and OP2 is necessary for NE (to output a cmeq insn),
and desirable for other comparisons if it results in FOO ? -1 : 0
(this allows direct use of the comparison result without a bsl). */
if (code == NE
|| (code != EQ
&& op1 == CONST0_RTX (<V_cmp_result>mode)
&& op2 == CONSTM1_RTX (<V_cmp_result>mode)))
{
op1 = operands[2];
op2 = operands[1];
switch (code)
{
case LE: code = GT; break;
case LT: code = GE; break;
case GE: code = LT; break;
case GT: code = LE; break;
/* No case EQ. */
case NE: code = EQ; break;
case LTU: code = GEU; break;
case LEU: code = GTU; break;
case GTU: code = LEU; break;
case GEU: code = LTU; break;
default: gcc_unreachable ();
}
}
/* Make sure we can handle the last operand. */
switch (code)
{
case NE:
/* Normalized to EQ above. */
gcc_unreachable ();
case LE:
case LT:
case GE:
case GT:
case EQ:
/* These instructions have a form taking an immediate zero. */
if (operands[5] == CONST0_RTX (<MODE>mode))
break;
/* Fall through, as may need to load into register. */
default:
if (!REG_P (operands[5]))
operands[5] = force_reg (<MODE>mode, operands[5]);
break;
}
switch (code)
{
case LT:
emit_insn (gen_aarch64_cmlt<mode> (mask, operands[4], operands[5]));
break;
case GE:
emit_insn (gen_aarch64_cmge<mode> (mask, operands[4], operands[5]));
break;
case LE:
emit_insn (gen_aarch64_cmle<mode> (mask, operands[4], operands[5]));
break;
case GT:
emit_insn (gen_aarch64_cmgt<mode> (mask, operands[4], operands[5]));
break;
case LTU:
emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[5], operands[4]));
break;
case GEU:
emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[4], operands[5]));
break;
case LEU:
emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[5], operands[4]));
break;
case GTU:
emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[4], operands[5]));
break;
/* NE has been normalized to EQ above. */
case EQ:
emit_insn (gen_aarch64_cmeq<mode> (mask, operands[4], operands[5]));
break;
default:
gcc_unreachable ();
}
/* If we have (a = (b CMP c) ? -1 : 0);
Then we can simply move the generated mask. */
if (op1 == CONSTM1_RTX (<V_cmp_result>mode)
&& op2 == CONST0_RTX (<V_cmp_result>mode))
emit_move_insn (operands[0], mask);
else
{
if (!REG_P (op1))
op1 = force_reg (<MODE>mode, op1);
if (!REG_P (op2))
op2 = force_reg (<MODE>mode, op2);
emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], mask,
op1, op2));
}
DONE;
})
(define_expand "aarch64_vcond_internal<VDQF_COND:mode><VDQF:mode>"
[(set (match_operand:VDQF_COND 0 "register_operand")
(if_then_else:VDQF
(match_operator 3 "comparison_operator"
[(match_operand:VDQF 4 "register_operand")
(match_operand:VDQF 5 "nonmemory_operand")])
(match_operand:VDQF_COND 1 "nonmemory_operand")
(match_operand:VDQF_COND 2 "nonmemory_operand")))]
"TARGET_SIMD"
{
int inverse = 0;
int use_zero_form = 0;
int swap_bsl_operands = 0;
rtx op1 = operands[1];
rtx op2 = operands[2];
rtx mask = gen_reg_rtx (<VDQF_COND:V_cmp_result>mode);
rtx tmp = gen_reg_rtx (<VDQF_COND:V_cmp_result>mode);
rtx (*base_comparison) (rtx, rtx, rtx);
rtx (*complimentary_comparison) (rtx, rtx, rtx);
switch (GET_CODE (operands[3]))
{
case GE:
case GT:
case LE:
case LT:
case EQ:
if (operands[5] == CONST0_RTX (<MODE>mode))
{
use_zero_form = 1;
break;
}
/* Fall through. */
default:
if (!REG_P (operands[5]))
operands[5] = force_reg (<VDQF:MODE>mode, operands[5]);
}
switch (GET_CODE (operands[3]))
{
case LT:
case UNLT:
inverse = 1;
/* Fall through. */
case GE:
case UNGE:
case ORDERED:
case UNORDERED:
base_comparison = gen_aarch64_cmge<VDQF:mode>;
complimentary_comparison = gen_aarch64_cmgt<VDQF:mode>;
break;
case LE:
case UNLE:
inverse = 1;
/* Fall through. */
case GT:
case UNGT:
base_comparison = gen_aarch64_cmgt<VDQF:mode>;
complimentary_comparison = gen_aarch64_cmge<VDQF:mode>;
break;
case EQ:
case NE:
case UNEQ:
base_comparison = gen_aarch64_cmeq<VDQF:mode>;
complimentary_comparison = gen_aarch64_cmeq<VDQF:mode>;
break;
default:
gcc_unreachable ();
}
switch (GET_CODE (operands[3]))
{
case LT:
case LE:
case GT:
case GE:
case EQ:
/* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
a GE b -> a GE b
a GT b -> a GT b
a LE b -> b GE a
a LT b -> b GT a
a EQ b -> a EQ b
Note that there also exist direct comparison against 0 forms,
so catch those as a special case. */
if (use_zero_form)
{
inverse = 0;
switch (GET_CODE (operands[3]))
{
case LT:
base_comparison = gen_aarch64_cmlt<VDQF:mode>;
break;
case LE:
base_comparison = gen_aarch64_cmle<VDQF:mode>;
break;
default:
/* Do nothing, other zero form cases already have the correct
base_comparison. */
break;
}
}
if (!inverse)
emit_insn (base_comparison (mask, operands[4], operands[5]));
else
emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
break;
case UNLT:
case UNLE:
case UNGT:
case UNGE:
case NE:
/* FCM returns false for lanes which are unordered, so if we use
the inverse of the comparison we actually want to emit, then
swap the operands to BSL, we will end up with the correct result.
Note that a NE NaN and NaN NE b are true for all a, b.
Our transformations are:
a GE b -> !(b GT a)
a GT b -> !(b GE a)
a LE b -> !(a GT b)
a LT b -> !(a GE b)
a NE b -> !(a EQ b) */
if (inverse)
emit_insn (base_comparison (mask, operands[4], operands[5]));
else
emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
swap_bsl_operands = 1;
break;
case UNEQ:
/* We check (a > b || b > a). combining these comparisons give us
true iff !(a != b && a ORDERED b), swapping the operands to BSL
will then give us (a == b || a UNORDERED b) as intended. */
emit_insn (gen_aarch64_cmgt<VDQF:mode> (mask, operands[4], operands[5]));
emit_insn (gen_aarch64_cmgt<VDQF:mode> (tmp, operands[5], operands[4]));
emit_insn (gen_ior<VDQF_COND:v_cmp_result>3 (mask, mask, tmp));
swap_bsl_operands = 1;
break;
case UNORDERED:
/* Operands are ORDERED iff (a > b || b >= a).
Swapping the operands to BSL will give the UNORDERED case. */
swap_bsl_operands = 1;
/* Fall through. */
case ORDERED:
emit_insn (gen_aarch64_cmgt<VDQF:mode> (tmp, operands[4], operands[5]));
emit_insn (gen_aarch64_cmge<VDQF:mode> (mask, operands[5], operands[4]));
emit_insn (gen_ior<VDQF_COND:v_cmp_result>3 (mask, mask, tmp));
break;
default:
gcc_unreachable ();
}
if (swap_bsl_operands)
{
op1 = operands[2];
op2 = operands[1];
}
/* If we have (a = (b CMP c) ? -1 : 0);
Then we can simply move the generated mask. */
if (op1 == CONSTM1_RTX (<VDQF_COND:V_cmp_result>mode)
&& op2 == CONST0_RTX (<VDQF_COND:V_cmp_result>mode))
emit_move_insn (operands[0], mask);
else
{
if (!REG_P (op1))
op1 = force_reg (<VDQF_COND:MODE>mode, op1);
if (!REG_P (op2))
op2 = force_reg (<VDQF_COND:MODE>mode, op2);
emit_insn (gen_aarch64_simd_bsl<VDQF_COND:mode> (operands[0], mask,
op1, op2));
}
DONE;
})
(define_expand "vcond<mode><mode>" (define_expand "vcond<mode><mode>"
[(set (match_operand:VALLDI 0 "register_operand") [(set (match_operand:VALLDI 0 "register_operand")
(if_then_else:VALLDI (if_then_else:VALLDI
...@@ -2878,26 +2570,36 @@ ...@@ -2878,26 +2570,36 @@
(match_operand:VALLDI 2 "nonmemory_operand")))] (match_operand:VALLDI 2 "nonmemory_operand")))]
"TARGET_SIMD" "TARGET_SIMD"
{ {
emit_insn (gen_aarch64_vcond_internal<mode><mode> (operands[0], operands[1], rtx mask = gen_reg_rtx (<V_cmp_result>mode);
operands[2], operands[3], enum rtx_code code = GET_CODE (operands[3]);
operands[4], operands[5]));
emit_insn (gen_vec_cmp<mode><v_cmp_result> (mask, operands[3],
operands[4], operands[5]));
emit_insn (gen_vcond_mask_<mode><v_cmp_result> (operands[0], operands[1],
operands[2], mask));
DONE; DONE;
}) })
(define_expand "vcond<v_cmp_result><mode>" (define_expand "vcond<v_cmp_mixed><mode>"
[(set (match_operand:<V_cmp_result> 0 "register_operand") [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
(if_then_else:<V_cmp_result> (if_then_else:<V_cmp_mixed>
(match_operator 3 "comparison_operator" (match_operator 3 "comparison_operator"
[(match_operand:VDQF 4 "register_operand") [(match_operand:VDQF_COND 4 "register_operand")
(match_operand:VDQF 5 "nonmemory_operand")]) (match_operand:VDQF_COND 5 "nonmemory_operand")])
(match_operand:<V_cmp_result> 1 "nonmemory_operand") (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
(match_operand:<V_cmp_result> 2 "nonmemory_operand")))] (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
"TARGET_SIMD" "TARGET_SIMD"
{ {
emit_insn (gen_aarch64_vcond_internal<v_cmp_result><mode> ( rtx mask = gen_reg_rtx (<V_cmp_result>mode);
enum rtx_code code = GET_CODE (operands[3]);
emit_insn (gen_vec_cmp<mode><v_cmp_result> (mask, operands[3],
operands[4], operands[5]));
emit_insn (gen_vcond_mask_<v_cmp_mixed><v_cmp_result> (
operands[0], operands[1], operands[0], operands[1],
operands[2], operands[3], operands[2], mask));
operands[4], operands[5]));
DONE; DONE;
}) })
...@@ -2911,9 +2613,34 @@ ...@@ -2911,9 +2613,34 @@
(match_operand:VSDQ_I_DI 2 "nonmemory_operand")))] (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
"TARGET_SIMD" "TARGET_SIMD"
{ {
emit_insn (gen_aarch64_vcond_internal<mode><mode> (operands[0], operands[1], rtx mask = gen_reg_rtx (<MODE>mode);
operands[2], operands[3], enum rtx_code code = GET_CODE (operands[3]);
operands[4], operands[5]));
emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
operands[4], operands[5]));
emit_insn (gen_vcond_mask_<mode><v_cmp_result> (operands[0], operands[1],
operands[2], mask));
DONE;
})
(define_expand "vcondu<mode><v_cmp_mixed>"
[(set (match_operand:VDQF 0 "register_operand")
(if_then_else:VDQF
(match_operator 3 "comparison_operator"
[(match_operand:<V_cmp_mixed> 4 "register_operand")
(match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
(match_operand:VDQF 1 "nonmemory_operand")
(match_operand:VDQF 2 "nonmemory_operand")))]
"TARGET_SIMD"
{
rtx mask = gen_reg_rtx (<V_cmp_result>mode);
enum rtx_code code = GET_CODE (operands[3]);
emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
mask, operands[3],
operands[4], operands[5]));
emit_insn (gen_vcond_mask_<mode><v_cmp_result> (operands[0], operands[1],
operands[2], mask));
DONE; DONE;
}) })
...@@ -4507,7 +4234,7 @@ ...@@ -4507,7 +4234,7 @@
;; cmtst ;; cmtst
;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst, ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
;; we don't have any insns using ne, and aarch64_vcond_internal outputs ;; we don't have any insns using ne, and aarch64_vcond outputs
;; not (neg (eq (and x y) 0)) ;; not (neg (eq (and x y) 0))
;; which is rewritten by simplify_rtx as ;; which is rewritten by simplify_rtx as
;; plus (eq (and x y) 0) -1. ;; plus (eq (and x y) 0) -1.
......
...@@ -662,6 +662,16 @@ ...@@ -662,6 +662,16 @@
(V2DF "v2di") (DF "di") (V2DF "v2di") (DF "di")
(SF "si")]) (SF "si")])
;; Mode for vector conditional operations where the comparison has
;; different type from the lhs.
(define_mode_attr V_cmp_mixed [(V2SI "V2SF") (V4SI "V4SF")
(V2DI "V2DF") (V2SF "V2SI")
(V4SF "V4SI") (V2DF "V2DI")])
(define_mode_attr v_cmp_mixed [(V2SI "v2sf") (V4SI "v4sf")
(V2DI "v2df") (V2SF "v2si")
(V4SF "v4si") (V2DF "v2di")])
;; Lower case element modes (as used in shift immediate patterns). ;; Lower case element modes (as used in shift immediate patterns).
(define_mode_attr ve_mode [(V8QI "qi") (V16QI "qi") (define_mode_attr ve_mode [(V8QI "qi") (V16QI "qi")
(V4HI "hi") (V8HI "hi") (V4HI "hi") (V8HI "hi")
......
2016-08-10 Bin Cheng <bin.cheng@arm.com>
* gcc.target/aarch64/simd/vcond-ne.c: New test.
2016-08-10 Michael Meissner <meissner@linux.vnet.ibm.com> 2016-08-10 Michael Meissner <meissner@linux.vnet.ibm.com>
PR target/72853 PR target/72853
......
/* { dg-do compile } */
/* { dg-require-effective-target vect_int } */
/* { dg-require-effective-target vect_float } */
/* { dg-require-effective-target vect_condition } */
float a,b,c;
float z[1024]; int ok[1024];
void foo()
{
int i;
for (i=0; i!=1024; ++i)
{
float rR = a*z[i];
float rL = b*z[i];
float rMin = (rR!=rL) ? rR : 0.0;
ok[i] = rMin-c;
}
}
/* { dg-final { scan-assembler-not "\[ \t\]not\[ \t\]" } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment