Commit dd4ba939 by Ben Elliston

spu.c (spu_emit_branch_or_set): Remove PROCESSOR_CELL condition from HONOR_NANS test.

	* config/spu/spu.c (spu_emit_branch_or_set): Remove PROCESSOR_CELL
	condition from HONOR_NANS test.
	* config/spu/spu.md (ceq_df): Always generate comparison code
	inline, including checks for NaNs and infinities.
	(cgt_df): Likewise.
	(cgt_v2df): Likewise.
	(cmpdf): Make this expander unconditional.

testsuite/
	* gcc.target/spu/compare-dp.c: New test.

From-SVN: r130198
parent 4a648c5d
2007-11-15 Sa Liu <saliu@de.ibm.com>
* config/spu/spu.c (spu_emit_branch_or_set): Remove PROCESSOR_CELL
condition from HONOR_NANS test.
* config/spu/spu.md (ceq_df): Always generate comparison code
inline, including checks for NaNs and infinities.
(cgt_df): Likewise.
(cgt_v2df): Likewise.
(cmpdf): Make this expander unconditional.
2007-11-15 Richard Guenther <rguenther@suse.de> 2007-11-15 Richard Guenther <rguenther@suse.de>
* tree-ssa-alias.c (create_overlap_variables_for): Make sure * tree-ssa-alias.c (create_overlap_variables_for): Make sure
...@@ -765,7 +765,7 @@ spu_emit_branch_or_set (int is_set, enum rtx_code code, rtx operands[]) ...@@ -765,7 +765,7 @@ spu_emit_branch_or_set (int is_set, enum rtx_code code, rtx operands[])
{ {
case GE: case GE:
scode = SPU_GT; scode = SPU_GT;
if (HONOR_NANS (op_mode) && spu_arch == PROCESSOR_CELLEDP) if (HONOR_NANS (op_mode))
{ {
reverse_compare = 0; reverse_compare = 0;
reverse_test = 0; reverse_test = 0;
...@@ -780,7 +780,7 @@ spu_emit_branch_or_set (int is_set, enum rtx_code code, rtx operands[]) ...@@ -780,7 +780,7 @@ spu_emit_branch_or_set (int is_set, enum rtx_code code, rtx operands[])
break; break;
case LE: case LE:
scode = SPU_GT; scode = SPU_GT;
if (HONOR_NANS (op_mode) && spu_arch == PROCESSOR_CELLEDP) if (HONOR_NANS (op_mode))
{ {
reverse_compare = 1; reverse_compare = 1;
reverse_test = 0; reverse_test = 0;
...@@ -883,23 +883,9 @@ spu_emit_branch_or_set (int is_set, enum rtx_code code, rtx operands[]) ...@@ -883,23 +883,9 @@ spu_emit_branch_or_set (int is_set, enum rtx_code code, rtx operands[])
abort (); abort ();
} }
if (GET_MODE (spu_compare_op1) == DFmode) if (GET_MODE (spu_compare_op1) == DFmode
{ && (scode != SPU_GT && scode != SPU_EQ))
rtx reg = gen_reg_rtx (DFmode);
if ((!flag_unsafe_math_optimizations && spu_arch == PROCESSOR_CELL)
|| (scode != SPU_GT && scode != SPU_EQ))
abort (); abort ();
if (spu_arch == PROCESSOR_CELL)
{
if (reverse_compare)
emit_insn (gen_subdf3 (reg, spu_compare_op1, spu_compare_op0));
else
emit_insn (gen_subdf3 (reg, spu_compare_op0, spu_compare_op1));
reverse_compare = 0;
spu_compare_op0 = reg;
spu_compare_op1 = CONST0_RTX (DFmode);
}
}
if (is_set == 0 && spu_compare_op1 == const0_rtx if (is_set == 0 && spu_compare_op1 == const0_rtx
&& (GET_MODE (spu_compare_op0) == SImode && (GET_MODE (spu_compare_op0) == SImode
......
...@@ -2534,32 +2534,70 @@ ...@@ -2534,32 +2534,70 @@
"" ""
"fcmeq\t%0,%1,%2") "fcmeq\t%0,%1,%2")
;; These implementations of ceq_df and cgt_df do not correctly handle ;; These implementations will ignore checking of NaN or INF if
;; NAN or INF. We will also get incorrect results when the result ;; compiled with option -ffinite-math-only.
;; of the double subtract is too small.
(define_expand "ceq_df" (define_expand "ceq_df"
[(set (match_operand:SI 0 "spu_reg_operand" "=r") [(set (match_operand:SI 0 "spu_reg_operand" "=r")
(eq:SI (match_operand:DF 1 "spu_reg_operand" "r") (eq:SI (match_operand:DF 1 "spu_reg_operand" "r")
(match_operand:DF 2 "const_zero_operand" "i")))] (match_operand:DF 2 "const_zero_operand" "i")))]
"" ""
{ {
if (flag_unsafe_math_optimizations && spu_arch == PROCESSOR_CELL) if (spu_arch == PROCESSOR_CELL)
{
rtx ra = gen_reg_rtx (V4SImode);
rtx rb = gen_reg_rtx (V4SImode);
rtx temp = gen_reg_rtx (TImode);
rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
rtx temp2 = gen_reg_rtx (V4SImode);
rtx biteq = gen_reg_rtx (V4SImode);
rtx ahi_inf = gen_reg_rtx (V4SImode);
rtx a_nan = gen_reg_rtx (V4SImode);
rtx a_abs = gen_reg_rtx (V4SImode);
rtx b_abs = gen_reg_rtx (V4SImode);
rtx iszero = gen_reg_rtx (V4SImode);
rtx sign_mask = gen_reg_rtx (V4SImode);
rtx nan_mask = gen_reg_rtx (V4SImode);
rtx hihi_promote = gen_reg_rtx (TImode);
rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF,
0x7FFFFFFF, 0xFFFFFFFF);
emit_move_insn (sign_mask, pat);
pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0,
0x7FF00000, 0x0);
emit_move_insn (nan_mask, pat);
pat = spu_const_from_ints (TImode, 0x00010203, 0x10111213,
0x08090A0B, 0x18191A1B);
emit_move_insn (hihi_promote, pat);
emit_insn (gen_spu_convert (ra, operands[1]));
emit_insn (gen_spu_convert (rb, operands[2]));
emit_insn (gen_ceq_v4si (biteq, ra, rb));
emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, biteq),
GEN_INT (4 * 8)));
emit_insn (gen_andv4si3 (biteq, biteq, temp_v4si));
emit_insn (gen_andv4si3 (a_abs, ra, sign_mask));
emit_insn (gen_andv4si3 (b_abs, rb, sign_mask));
if (!flag_finite_math_only)
{
emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask));
emit_insn (gen_ceq_v4si (ahi_inf, a_abs, nan_mask));
emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan),
GEN_INT (4 * 8)));
emit_insn (gen_andv4si3 (temp2, temp_v4si, ahi_inf));
emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2));
}
emit_insn (gen_iorv4si3 (temp2, a_abs, b_abs));
emit_insn (gen_ceq_v4si (iszero, temp2, CONST0_RTX (V4SImode)));
emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, iszero),
GEN_INT (4 * 8)));
emit_insn (gen_andv4si3 (iszero, iszero, temp_v4si));
emit_insn (gen_iorv4si3 (temp2, biteq, iszero));
if (!flag_finite_math_only)
{ {
rtx s0_ti = gen_reg_rtx(TImode); emit_insn (gen_andc_v4si (temp2, temp2, a_nan));
rtx s1_v4 = gen_reg_rtx(V4SImode); }
rtx s0_v4 = spu_gen_subreg(V4SImode, s0_ti); emit_insn (gen_shufb (operands[0], temp2, temp2, hihi_promote));
rtx to_ti = gen_reg_rtx(TImode);
rtx to_v4 = gen_reg_rtx(V4SImode);
rtx l_v4 = gen_reg_rtx(V4SImode);
emit_insn (gen_spu_convert (l_v4, operands[1]));
emit_insn (gen_movv4si(s1_v4, spu_const(V4SImode, -0x80000000ll)));
emit_insn (gen_ceq_v4si(s0_v4, l_v4, CONST0_RTX(V4SImode)));
emit_insn (gen_ceq_v4si(s1_v4, l_v4, s1_v4));
emit_insn (gen_rotqby_ti(to_ti, s0_ti, GEN_INT(4)));
emit_insn (gen_spu_convert (to_v4, to_ti));
emit_insn (gen_iorv4si3(s1_v4, s0_v4, s1_v4));
emit_insn (gen_andv4si3(to_v4, to_v4, s1_v4));
emit_insn (gen_spu_convert (operands[0], to_v4));
DONE; DONE;
} }
}) })
...@@ -2777,22 +2815,100 @@ selb\t%0,%5,%0,%3" ...@@ -2777,22 +2815,100 @@ selb\t%0,%5,%0,%3"
(match_operand:DF 2 "const_zero_operand" "i")))] (match_operand:DF 2 "const_zero_operand" "i")))]
"" ""
{ {
if (flag_unsafe_math_optimizations && spu_arch == PROCESSOR_CELL) if (spu_arch == PROCESSOR_CELL)
{
rtx ra = gen_reg_rtx (V4SImode);
rtx rb = gen_reg_rtx (V4SImode);
rtx zero = gen_reg_rtx (V4SImode);
rtx temp = gen_reg_rtx (TImode);
rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
rtx temp2 = gen_reg_rtx (V4SImode);
rtx hi_inf = gen_reg_rtx (V4SImode);
rtx a_nan = gen_reg_rtx (V4SImode);
rtx b_nan = gen_reg_rtx (V4SImode);
rtx a_abs = gen_reg_rtx (V4SImode);
rtx b_abs = gen_reg_rtx (V4SImode);
rtx asel = gen_reg_rtx (V4SImode);
rtx bsel = gen_reg_rtx (V4SImode);
rtx abor = gen_reg_rtx (V4SImode);
rtx bbor = gen_reg_rtx (V4SImode);
rtx gt_hi = gen_reg_rtx (V4SImode);
rtx gt_lo = gen_reg_rtx (V4SImode);
rtx sign_mask = gen_reg_rtx (V4SImode);
rtx nan_mask = gen_reg_rtx (V4SImode);
rtx hi_promote = gen_reg_rtx (TImode);
rtx borrow_shuffle = gen_reg_rtx (TImode);
rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF,
0x7FFFFFFF, 0xFFFFFFFF);
emit_move_insn (sign_mask, pat);
pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0,
0x7FF00000, 0x0);
emit_move_insn (nan_mask, pat);
pat = spu_const_from_ints (TImode, 0x00010203, 0x00010203,
0x08090A0B, 0x08090A0B);
emit_move_insn (hi_promote, pat);
pat = spu_const_from_ints (TImode, 0x04050607, 0xC0C0C0C0,
0x0C0D0E0F, 0xC0C0C0C0);
emit_move_insn (borrow_shuffle, pat);
emit_insn (gen_spu_convert (ra, operands[1]));
emit_insn (gen_spu_convert (rb, operands[2]));
emit_insn (gen_andv4si3 (a_abs, ra, sign_mask));
emit_insn (gen_andv4si3 (b_abs, rb, sign_mask));
if (!flag_finite_math_only)
{
/* check if ra is NaN */
emit_insn (gen_ceq_v4si (hi_inf, a_abs, nan_mask));
emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask));
emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan),
GEN_INT (4 * 8)));
emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf));
emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2));
emit_insn (gen_shufb (a_nan, a_nan, a_nan, hi_promote));
/* check if rb is NaN */
emit_insn (gen_ceq_v4si (hi_inf, b_abs, nan_mask));
emit_insn (gen_clgt_v4si (b_nan, b_abs, nan_mask));
emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, b_nan),
GEN_INT (4 * 8)));
emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf));
emit_insn (gen_iorv4si3 (b_nan, b_nan, temp2));
emit_insn (gen_shufb (b_nan, b_nan, b_nan, hi_promote));
/* check if ra or rb is NaN */
emit_insn (gen_iorv4si3 (a_nan, a_nan, b_nan));
}
emit_move_insn (zero, CONST0_RTX (V4SImode));
emit_insn (gen_ashrv4si3 (asel, ra, spu_const (V4SImode, 31)));
emit_insn (gen_shufb (asel, asel, asel, hi_promote));
emit_insn (gen_bg_v4si (abor, zero, a_abs));
emit_insn (gen_shufb (abor, abor, abor, borrow_shuffle));
emit_insn (gen_sfx_v4si (abor, zero, a_abs, abor));
emit_insn (gen_selb (abor, a_abs, abor, asel));
emit_insn (gen_ashrv4si3 (bsel, rb, spu_const (V4SImode, 31)));
emit_insn (gen_shufb (bsel, bsel, bsel, hi_promote));
emit_insn (gen_bg_v4si (bbor, zero, b_abs));
emit_insn (gen_shufb (bbor, bbor, bbor, borrow_shuffle));
emit_insn (gen_sfx_v4si (bbor, zero, b_abs, bbor));
emit_insn (gen_selb (bbor, b_abs, bbor, bsel));
emit_insn (gen_cgt_v4si (gt_hi, abor, bbor));
emit_insn (gen_clgt_v4si (gt_lo, abor, bbor));
emit_insn (gen_ceq_v4si (temp2, abor, bbor));
emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, gt_lo),
GEN_INT (4 * 8)));
emit_insn (gen_andv4si3 (temp2, temp2, temp_v4si));
emit_insn (gen_iorv4si3 (temp2, gt_hi, temp2));
emit_insn (gen_shufb (temp2, temp2, temp2, hi_promote));
if (!flag_finite_math_only)
{ {
rtx s0_ti = gen_reg_rtx(TImode); /* correct for NaNs */
rtx s1_v4 = gen_reg_rtx(V4SImode); emit_insn (gen_andc_v4si (temp2, temp2, a_nan));
rtx s0_v4 = spu_gen_subreg(V4SImode, s0_ti); }
rtx to_ti = gen_reg_rtx(TImode); emit_insn (gen_spu_convert (operands[0], temp2));
rtx to_v4 = gen_reg_rtx(V4SImode);
rtx l_v4 = gen_reg_rtx(V4SImode);
emit_insn (gen_spu_convert(l_v4, operands[1]));
emit_insn (gen_ceq_v4si(s0_v4, l_v4, const0_rtx));
emit_insn (gen_cgt_v4si(s1_v4, l_v4, const0_rtx));
emit_insn (gen_rotqby_ti(to_ti, s0_ti, GEN_INT(4)));
emit_insn (gen_spu_convert(to_v4, to_ti));
emit_insn (gen_andc_v4si(to_v4, s0_v4, to_v4));
emit_insn (gen_iorv4si3(to_v4, to_v4, s1_v4));
emit_insn (gen_spu_convert(operands[0], to_v4));
DONE; DONE;
} }
}) })
...@@ -2855,17 +2971,17 @@ selb\t%0,%5,%0,%3" ...@@ -2855,17 +2971,17 @@ selb\t%0,%5,%0,%3"
0x0C0D0E0F, 0xC0C0C0C0); 0x0C0D0E0F, 0xC0C0C0C0);
emit_move_insn (borrow_shuffle, pat); emit_move_insn (borrow_shuffle, pat);
emit_insn (gen_andv4si3 (a_nan, ra, sign_mask)); emit_insn (gen_andv4si3 (a_abs, ra, sign_mask));
emit_insn (gen_ceq_v4si (hi_inf, a_nan, nan_mask)); emit_insn (gen_ceq_v4si (hi_inf, a_abs, nan_mask));
emit_insn (gen_clgt_v4si (a_nan, a_nan, nan_mask)); emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask));
emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan), emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan),
GEN_INT (4 * 8))); GEN_INT (4 * 8)));
emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf)); emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf));
emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2)); emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2));
emit_insn (gen_shufb (a_nan, a_nan, a_nan, hi_promote)); emit_insn (gen_shufb (a_nan, a_nan, a_nan, hi_promote));
emit_insn (gen_andv4si3 (b_nan, rb, sign_mask)); emit_insn (gen_andv4si3 (b_abs, rb, sign_mask));
emit_insn (gen_ceq_v4si (hi_inf, b_nan, nan_mask)); emit_insn (gen_ceq_v4si (hi_inf, b_abs, nan_mask));
emit_insn (gen_clgt_v4si (b_nan, b_nan, nan_mask)); emit_insn (gen_clgt_v4si (b_nan, b_abs, nan_mask));
emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, b_nan), emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, b_nan),
GEN_INT (4 * 8))); GEN_INT (4 * 8)));
emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf)); emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf));
...@@ -2875,14 +2991,12 @@ selb\t%0,%5,%0,%3" ...@@ -2875,14 +2991,12 @@ selb\t%0,%5,%0,%3"
emit_move_insn (zero, CONST0_RTX (V4SImode)); emit_move_insn (zero, CONST0_RTX (V4SImode));
emit_insn (gen_ashrv4si3 (asel, ra, spu_const (V4SImode, 31))); emit_insn (gen_ashrv4si3 (asel, ra, spu_const (V4SImode, 31)));
emit_insn (gen_shufb (asel, asel, asel, hi_promote)); emit_insn (gen_shufb (asel, asel, asel, hi_promote));
emit_insn (gen_andv4si3 (a_abs, ra, sign_mask));
emit_insn (gen_bg_v4si (abor, zero, a_abs)); emit_insn (gen_bg_v4si (abor, zero, a_abs));
emit_insn (gen_shufb (abor, abor, abor, borrow_shuffle)); emit_insn (gen_shufb (abor, abor, abor, borrow_shuffle));
emit_insn (gen_sfx_v4si (abor, zero, a_abs, abor)); emit_insn (gen_sfx_v4si (abor, zero, a_abs, abor));
emit_insn (gen_selb (abor, a_abs, abor, asel)); emit_insn (gen_selb (abor, a_abs, abor, asel));
emit_insn (gen_ashrv4si3 (bsel, rb, spu_const (V4SImode, 31))); emit_insn (gen_ashrv4si3 (bsel, rb, spu_const (V4SImode, 31)));
emit_insn (gen_shufb (bsel, bsel, bsel, hi_promote)); emit_insn (gen_shufb (bsel, bsel, bsel, hi_promote));
emit_insn (gen_andv4si3 (b_abs, rb, sign_mask));
emit_insn (gen_bg_v4si (bbor, zero, b_abs)); emit_insn (gen_bg_v4si (bbor, zero, b_abs));
emit_insn (gen_shufb (bbor, bbor, bbor, borrow_shuffle)); emit_insn (gen_shufb (bbor, bbor, bbor, borrow_shuffle));
emit_insn (gen_sfx_v4si (bbor, zero, b_abs, bbor)); emit_insn (gen_sfx_v4si (bbor, zero, b_abs, bbor));
...@@ -3267,8 +3381,7 @@ selb\t%0,%4,%0,%3" ...@@ -3267,8 +3381,7 @@ selb\t%0,%4,%0,%3"
[(set (cc0) [(set (cc0)
(compare (match_operand:DF 0 "register_operand" "") (compare (match_operand:DF 0 "register_operand" "")
(match_operand:DF 1 "register_operand" "")))] (match_operand:DF 1 "register_operand" "")))]
"(flag_unsafe_math_optimizations && spu_arch == PROCESSOR_CELL) ""
|| spu_arch == PROCESSOR_CELLEDP "
"{ "{
spu_compare_op0 = operands[0]; spu_compare_op0 = operands[0];
spu_compare_op1 = operands[1]; spu_compare_op1 = operands[1];
......
2007-11-15 Ben Elliston <bje@au.ibm.com>
* gcc.target/spu/compare-dp.c: New test.
2007-11-14 Eric Botcazou <ebotcazou@libertysurf.fr> 2007-11-14 Eric Botcazou <ebotcazou@libertysurf.fr>
* gcc.dg/pr33923.c: New test. * gcc.dg/pr33923.c: New test.
/* { dg-do compile } */
/* { dg-final { scan-assembler-not "__eqdf2" } } */
/* Ensure double precision comparisons are always inlined. */
int test (double a, double b) __attribute__((noinline));
int test (double a, double b)
{
return a == b;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment