Commit 467e9f38 by H.J. Lu Committed by Hongtao Liu

re PR target/89750 (Wrong code for _mm_comi_round_ss)

2019-05-06  H.J. Lu  <hongjiu.lu@intel.com>
	    Hongtao Liu  <hongtao.liu@intel.com>

	PR target/89750
	PR target/86444
	* config/i386/i386-expand.c (ix86_expand_sse_comi_round):
	Modified, original implementation isn't correct.

2019-05-06  H.J. Lu  <hongjiu.lu@intel.com>
	    Hongtao Liu  <hongtao.liu@intel.com>

	PR target/89750
	PR target/86444
	* gcc.target/i386/avx512f-vcomisd-2.c: New.
	* gcc.target/i386/avx512f-vcomisd-2.c: Likewise.

Co-Authored-By: Hongtao Liu <hongtao.liu@intel.com>

From-SVN: r271853
parent 4bd04267
......@@ -2576,6 +2576,14 @@ Fix test-suite.
* tree-ssa-phiopt.c (two_value_replacement): Fix a typo in parameter
detection.
2019-05-06 H.J. Lu <hongjiu.lu@intel.com>
Hongtao Liu <hongtao.liu@intel.com>
PR target/89750
PR target/86444
* config/i386/i386-expand.c (ix86_expand_sse_comi_round):
Modified, original implementation isn't correct.
2019-05-06 Segher Boessenkool <segher@kernel.crashing.org>
* config/rs6000/rs6000.md (FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO)
......
......@@ -10007,17 +10007,23 @@ ix86_expand_sse_comi_round (const struct builtin_description *d,
const struct insn_data_d *insn_p = &insn_data[icode];
machine_mode mode0 = insn_p->operand[0].mode;
machine_mode mode1 = insn_p->operand[1].mode;
enum rtx_code comparison = UNEQ;
bool need_ucomi = false;
/* See avxintrin.h for values. */
enum rtx_code comi_comparisons[32] =
static const enum rtx_code comparisons[32] =
{
UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
EQ, LT, LE, UNORDERED, NE, UNGE, UNGT, ORDERED,
UNEQ, UNLT, UNLE, UNORDERED, LTGT, GE, GT, ORDERED,
EQ, LT, LE, UNORDERED, NE, UNGE, UNGT, ORDERED,
UNEQ, UNLT, UNLE, UNORDERED, LTGT, GE, GT, ORDERED
};
bool need_ucomi_values[32] =
static const bool ordereds[32] =
{
true, true, true, false, false, false, false, true,
false, false, false, true, true, true, true, false,
true, true, true, false, false, false, false, true,
false, false, false, true, true, true, true, false
};
static const bool non_signalings[32] =
{
true, false, false, true, true, false, false, true,
true, false, false, true, true, false, false, true,
......@@ -10042,16 +10048,94 @@ ix86_expand_sse_comi_round (const struct builtin_description *d,
return const0_rtx;
}
comparison = comi_comparisons[INTVAL (op2)];
need_ucomi = need_ucomi_values[INTVAL (op2)];
if (VECTOR_MODE_P (mode0))
op0 = safe_vector_operand (op0, mode0);
if (VECTOR_MODE_P (mode1))
op1 = safe_vector_operand (op1, mode1);
enum rtx_code comparison = comparisons[INTVAL (op2)];
bool ordered = ordereds[INTVAL (op2)];
bool non_signaling = non_signalings[INTVAL (op2)];
rtx const_val = const0_rtx;
bool check_unordered = false;
machine_mode mode = CCFPmode;
switch (comparison)
{
case ORDERED:
if (!ordered)
{
/* NB: Use CCSmode/NE for _CMP_TRUE_UQ/_CMP_TRUE_US. */
if (!non_signaling)
ordered = true;
mode = CCSmode;
}
else
{
/* NB: Use CCPmode/NE for _CMP_ORD_Q/_CMP_ORD_S. */
if (non_signaling)
ordered = false;
mode = CCPmode;
}
comparison = NE;
break;
case UNORDERED:
if (ordered)
{
/* NB: Use CCSmode/EQ for _CMP_FALSE_OQ/_CMP_FALSE_OS. */
if (non_signaling)
ordered = false;
mode = CCSmode;
}
else
{
/* NB: Use CCPmode/NE for _CMP_UNORD_Q/_CMP_UNORD_S. */
if (!non_signaling)
ordered = true;
mode = CCPmode;
}
comparison = EQ;
break;
case LE: /* -> GE */
case LT: /* -> GT */
case UNGE: /* -> UNLE */
case UNGT: /* -> UNLT */
std::swap (op0, op1);
comparison = swap_condition (comparison);
/* FALLTHRU */
case GT:
case GE:
case UNEQ:
case UNLT:
case UNLE:
case LTGT:
/* These are supported by CCFPmode. NB: Use ordered/signaling
COMI or unordered/non-signaling UCOMI. Both set ZF, PF, CF
with NAN operands. */
if (ordered == non_signaling)
ordered = !ordered;
break;
case EQ:
/* NB: COMI/UCOMI will set ZF with NAN operands. Use CCZmode for
_CMP_EQ_OQ/_CMP_EQ_OS. */
check_unordered = true;
mode = CCZmode;
break;
case NE:
/* NB: COMI/UCOMI will set ZF with NAN operands. Use CCZmode for
_CMP_NEQ_UQ/_CMP_NEQ_US. */
gcc_assert (!ordered);
check_unordered = true;
mode = CCZmode;
const_val = const1_rtx;
break;
default:
gcc_unreachable ();
}
target = gen_reg_rtx (SImode);
emit_move_insn (target, const0_rtx);
emit_move_insn (target, const_val);
target = gen_rtx_SUBREG (QImode, target, 0);
if ((optimize && !register_operand (op0, mode0))
......@@ -10061,10 +10145,14 @@ ix86_expand_sse_comi_round (const struct builtin_description *d,
|| !insn_p->operand[1].predicate (op1, mode1))
op1 = copy_to_mode_reg (mode1, op1);
if (need_ucomi)
icode = icode == CODE_FOR_sse_comi_round
? CODE_FOR_sse_ucomi_round
: CODE_FOR_sse2_ucomi_round;
/*
1. COMI: ordered and signaling.
2. UCOMI: unordered and non-signaling.
*/
if (non_signaling)
icode = (icode == CODE_FOR_sse_comi_round
? CODE_FOR_sse_ucomi_round
: CODE_FOR_sse2_ucomi_round);
pat = GEN_FCN (icode) (op0, op1, op3);
if (! pat)
......@@ -10086,11 +10174,42 @@ ix86_expand_sse_comi_round (const struct builtin_description *d,
}
emit_insn (pat);
rtx_code_label *label = NULL;
/* NB: For ordered EQ or unordered NE, check ZF alone isn't sufficient
with NAN operands. */
if (check_unordered)
{
gcc_assert (comparison == EQ || comparison == NE);
rtx flag = gen_rtx_REG (CCFPmode, FLAGS_REG);
label = gen_label_rtx ();
rtx tmp = gen_rtx_fmt_ee (UNORDERED, VOIDmode, flag, const0_rtx);
tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
gen_rtx_LABEL_REF (VOIDmode, label),
pc_rtx);
emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
}
/* NB: Set CCFPmode and check a different CCmode which is in subset
of CCFPmode. */
if (GET_MODE (set_dst) != mode)
{
gcc_assert (mode == CCAmode || mode == CCCmode
|| mode == CCOmode || mode == CCPmode
|| mode == CCSmode || mode == CCZmode);
set_dst = gen_rtx_REG (mode, FLAGS_REG);
}
emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target),
gen_rtx_fmt_ee (comparison, QImode,
set_dst,
const0_rtx)));
if (label)
emit_label (label);
return SUBREG_REG (target);
}
......
......@@ -1563,6 +1563,14 @@
optimization.
* gcc.dg/tree-ssa/pr88676-2.c: New testcase.
2019-05-06 H.J. Lu <hongjiu.lu@intel.com>
Hongtao Liu <hongtao.liu@intel.com>
PR target/89750
PR target/86444
* gcc.target/i386/avx512f-vcomisd-2.c: New.
* gcc.target/i386/avx512f-vcomisd-2.c: Likewise.
2019-05-06 Steven G. Kargl <kargl@gcc.gnu.org>
PR fortran/90290
......
/* { dg-do run } */
/* { dg-require-effective-target avx512f } */
/* { dg-options "-O2 -mavx512f" } */
#include "avx512f-check.h"
static inline void __attribute__ ((__always_inline__))
check_cmp (double s1, double s2, const int imm, int expected)
{
__m128d source1 = _mm_load_sd (&s1);
__m128d source2 = _mm_load_sd (&s2);
int res = _mm_comi_round_sd (source1, source2, imm,
_MM_FROUND_NO_EXC);
if (expected != res)
abort();
}
static void
do_check (double s1, double s2)
{
check_cmp (s1, s2, _CMP_EQ_OQ,
!__builtin_isunordered (s1, s2) && s1 == s2);
check_cmp (s1, s2, _CMP_LT_OS,
!__builtin_isunordered (s1, s2) && s1 < s2);
check_cmp (s1, s2, _CMP_LE_OS,
!__builtin_isunordered (s1, s2) && s1 <= s2);
check_cmp (s1, s2, _CMP_UNORD_Q,
__builtin_isunordered (s1, s2));
check_cmp (s1, s2, _CMP_NEQ_UQ,
__builtin_isunordered (s1, s2) || s1 != s2);
check_cmp (s1, s2, _CMP_NLT_US,
__builtin_isunordered (s1, s2) || s1 >= s2);
check_cmp (s1, s2, _CMP_NLE_US,
__builtin_isunordered (s1, s2) || s1 > s2);
check_cmp (s1, s2, _CMP_ORD_Q,
!__builtin_isunordered (s1, s2));
check_cmp (s1, s2, _CMP_EQ_UQ,
__builtin_isunordered (s1, s2) || s1 == s2);
check_cmp (s1, s2, _CMP_NGE_US,
__builtin_isunordered (s1, s2) || s1 < s2);
check_cmp (s1, s2, _CMP_NGT_US,
__builtin_isunordered (s1, s2) || s1 <= s2);
check_cmp (s1, s2, _CMP_FALSE_OQ, 0);
check_cmp (s1, s2, _CMP_NEQ_OQ,
!__builtin_isunordered (s1, s2) && s1 != s2);
check_cmp (s1, s2, _CMP_GE_OS,
!__builtin_isunordered (s1, s2) && s1 >= s2);
check_cmp (s1, s2, _CMP_GT_OS,
!__builtin_isunordered (s1, s2) && s1 > s2);
check_cmp (s1, s2, _CMP_TRUE_UQ, 1);
check_cmp (s1, s2, _CMP_EQ_OS,
!__builtin_isunordered (s1, s2) && s1 == s2);
check_cmp (s1, s2, _CMP_LT_OQ,
!__builtin_isunordered (s1, s2) && s1 < s2);
check_cmp (s1, s2, _CMP_LE_OQ,
!__builtin_isunordered (s1, s2) && s1 <= s2);
check_cmp (s1, s2, _CMP_UNORD_S,
__builtin_isunordered (s1, s2));
check_cmp (s1, s2, _CMP_NEQ_US,
__builtin_isunordered (s1, s2) || s1 != s2);
check_cmp (s1, s2, _CMP_NLT_UQ,
__builtin_isunordered (s1, s2) || s1 >= s2);
check_cmp (s1, s2, _CMP_NLE_UQ,
__builtin_isunordered (s1, s2) || s1 > s2);
check_cmp (s1, s2, _CMP_ORD_S, !__builtin_isunordered (s1, s2));
check_cmp (s1, s2, _CMP_EQ_US,
__builtin_isunordered (s1, s2) || s1 == s2);
check_cmp (s1, s2, _CMP_NGE_UQ,
__builtin_isunordered (s1, s2) || s1 < s2);
check_cmp (s1, s2, _CMP_NGT_UQ,
__builtin_isunordered (s1, s2) || s1 <= s2);
check_cmp (s1, s2, _CMP_FALSE_OS, 0);
check_cmp (s1, s2, _CMP_NEQ_OS,
!__builtin_isunordered (s1, s2) && s1 != s2);
check_cmp (s1, s2, _CMP_GE_OQ,
!__builtin_isunordered (s1, s2) && s1 >= s2);
check_cmp (s1, s2, _CMP_GT_OQ,
!__builtin_isunordered (s1, s2) && s1 > s2);
check_cmp (s1, s2, _CMP_TRUE_US, 1);
}
static void
avx512f_test (void)
{
struct
{
double x1;
double x2;
}
inputs[] =
{
{ 4.3, 2.18 },
{ -4.3, 3.18 },
{ __builtin_nan (""), -5.8 },
{ -4.8, __builtin_nans ("") },
{ 3.8, __builtin_nans ("") },
{ 4.2, 4.2 },
{ __builtin_nan (""), __builtin_nans ("") },
};
int i;
for (i = 0; i < sizeof (inputs) / sizeof (inputs[0]); i++)
do_check (inputs[i].x1, inputs[i].x2);
}
/* { dg-do run } */
/* { dg-require-effective-target avx512f } */
/* { dg-options "-O2 -mavx512f" } */
#include "avx512f-check.h"
static inline void __attribute__ ((__always_inline__))
check_cmp (float s1, float s2, const int imm, int expected)
{
__m128 source1 = _mm_load_ss (&s1);
__m128 source2 = _mm_load_ss (&s2);
int res = _mm_comi_round_ss (source1, source2, imm,
_MM_FROUND_NO_EXC);
if (expected != res)
abort();
}
static void
do_check (float s1, float s2)
{
check_cmp (s1, s2, _CMP_EQ_OQ,
!__builtin_isunordered (s1, s2) && s1 == s2);
check_cmp (s1, s2, _CMP_LT_OS,
!__builtin_isunordered (s1, s2) && s1 < s2);
check_cmp (s1, s2, _CMP_LE_OS,
!__builtin_isunordered (s1, s2) && s1 <= s2);
check_cmp (s1, s2, _CMP_UNORD_Q,
__builtin_isunordered (s1, s2));
check_cmp (s1, s2, _CMP_NEQ_UQ,
__builtin_isunordered (s1, s2) || s1 != s2);
check_cmp (s1, s2, _CMP_NLT_US,
__builtin_isunordered (s1, s2) || s1 >= s2);
check_cmp (s1, s2, _CMP_NLE_US,
__builtin_isunordered (s1, s2) || s1 > s2);
check_cmp (s1, s2, _CMP_ORD_Q,
!__builtin_isunordered (s1, s2));
check_cmp (s1, s2, _CMP_EQ_UQ,
__builtin_isunordered (s1, s2) || s1 == s2);
check_cmp (s1, s2, _CMP_NGE_US,
__builtin_isunordered (s1, s2) || s1 < s2);
check_cmp (s1, s2, _CMP_NGT_US,
__builtin_isunordered (s1, s2) || s1 <= s2);
check_cmp (s1, s2, _CMP_FALSE_OQ, 0);
check_cmp (s1, s2, _CMP_NEQ_OQ,
!__builtin_isunordered (s1, s2) && s1 != s2);
check_cmp (s1, s2, _CMP_GE_OS,
!__builtin_isunordered (s1, s2) && s1 >= s2);
check_cmp (s1, s2, _CMP_GT_OS,
!__builtin_isunordered (s1, s2) && s1 > s2);
check_cmp (s1, s2, _CMP_TRUE_UQ, 1);
check_cmp (s1, s2, _CMP_EQ_OS,
!__builtin_isunordered (s1, s2) && s1 == s2);
check_cmp (s1, s2, _CMP_LT_OQ,
!__builtin_isunordered (s1, s2) && s1 < s2);
check_cmp (s1, s2, _CMP_LE_OQ,
!__builtin_isunordered (s1, s2) && s1 <= s2);
check_cmp (s1, s2, _CMP_UNORD_S,
__builtin_isunordered (s1, s2));
check_cmp (s1, s2, _CMP_NEQ_US,
__builtin_isunordered (s1, s2) || s1 != s2);
check_cmp (s1, s2, _CMP_NLT_UQ,
__builtin_isunordered (s1, s2) || s1 >= s2);
check_cmp (s1, s2, _CMP_NLE_UQ,
__builtin_isunordered (s1, s2) || s1 > s2);
check_cmp (s1, s2, _CMP_ORD_S, !__builtin_isunordered (s1, s2));
check_cmp (s1, s2, _CMP_EQ_US,
__builtin_isunordered (s1, s2) || s1 == s2);
check_cmp (s1, s2, _CMP_NGE_UQ,
__builtin_isunordered (s1, s2) || s1 < s2);
check_cmp (s1, s2, _CMP_NGT_UQ,
__builtin_isunordered (s1, s2) || s1 <= s2);
check_cmp (s1, s2, _CMP_FALSE_OS, 0);
check_cmp (s1, s2, _CMP_NEQ_OS,
!__builtin_isunordered (s1, s2) && s1 != s2);
check_cmp (s1, s2, _CMP_GE_OQ,
!__builtin_isunordered (s1, s2) && s1 >= s2);
check_cmp (s1, s2, _CMP_GT_OQ,
!__builtin_isunordered (s1, s2) && s1 > s2);
check_cmp (s1, s2, _CMP_TRUE_US, 1);
}
static void
avx512f_test (void)
{
struct
{
float x1;
float x2;
}
inputs[] =
{
{ 4.3, 2.18 },
{ -4.3, 3.18 },
{ __builtin_nanf (""), -5.8 },
{ -4.8, __builtin_nansf ("") },
{ 3.8, __builtin_nansf ("") },
{ 4.2, 4.2 },
{ __builtin_nanf (""), __builtin_nansf ("") },
};
int i;
for (i = 0; i < sizeof (inputs) / sizeof (inputs[0]); i++)
do_check (inputs[i].x1, inputs[i].x2);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment