Commit 93cf5515 by Richard Biener Committed by Richard Biener

re PR rtl-optimization/91154 (456.hmmer regression on Haswell caused by r272922)

2019-08-14  Richard Biener  <rguenther@suse.de>
        Uroš Bizjak  <ubizjak@gmail.com>

	PR target/91154
	* config/i386/i386-features.h (scalar_chain::scalar_chain): Add
	mode arguments.
	(scalar_chain::smode): New member.
	(scalar_chain::vmode): Likewise.
	(dimode_scalar_chain): Rename to...
	(general_scalar_chain): ... this.
	(general_scalar_chain::general_scalar_chain): Take mode arguments.
	(timode_scalar_chain::timode_scalar_chain): Initialize scalar_chain
	base with TImode and V1TImode.
	* config/i386/i386-features.c (scalar_chain::scalar_chain): Adjust.
	(general_scalar_chain::vector_const_cost): Adjust for SImode
	chains.
	(general_scalar_chain::compute_convert_gain): Likewise.  Add
	{S,U}{MIN,MAX} support.
	(general_scalar_chain::replace_with_subreg): Use vmode/smode.
	(general_scalar_chain::make_vector_copies): Likewise.  Handle
	non-DImode chains appropriately.
	(general_scalar_chain::convert_reg): Likewise.
	(general_scalar_chain::convert_op): Likewise.
	(general_scalar_chain::convert_insn): Likewise.  Add
	fatal_insn_not_found if the result is not recognized.
	(convertible_comparison_p): Pass in the scalar mode and use that.
	(general_scalar_to_vector_candidate_p): Likewise.  Rename from
	dimode_scalar_to_vector_candidate_p.  Add {S,U}{MIN,MAX} support.
	(scalar_to_vector_candidate_p): Remove by inlining into single
	caller.
	(general_remove_non_convertible_regs): Rename from
	dimode_remove_non_convertible_regs.
	(remove_non_convertible_regs): Remove by inlining into single caller.
	(convert_scalars_to_vector): Handle SImode and DImode chains
	in addition to TImode chains.
	* config/i386/i386.md (<maxmin><MAXMIN_IMODE>3): New expander.
	(*<maxmin><MAXMIN_IMODE>3_1): New insn-and-split.
	(*<maxmin>di3_doubleword): Likewise.

	* gcc.target/i386/pr91154.c: New testcase.
	* gcc.target/i386/minmax-3.c: Likewise.
	* gcc.target/i386/minmax-4.c: Likewise.
	* gcc.target/i386/minmax-5.c: Likewise.
	* gcc.target/i386/minmax-6.c: Likewise.
	* gcc.target/i386/minmax-1.c: Add -mno-stv.
	* gcc.target/i386/minmax-2.c: Likewise.

Co-Authored-By: Uros Bizjak <ubizjak@gmail.com>

From-SVN: r274481
parent 1b187f36
2019-08-14 Richard Biener <rguenther@suse.de>
Uroš Bizjak <ubizjak@gmail.com>
PR target/91154
* config/i386/i386-features.h (scalar_chain::scalar_chain): Add
mode arguments.
(scalar_chain::smode): New member.
(scalar_chain::vmode): Likewise.
(dimode_scalar_chain): Rename to...
(general_scalar_chain): ... this.
(general_scalar_chain::general_scalar_chain): Take mode arguments.
(timode_scalar_chain::timode_scalar_chain): Initialize scalar_chain
base with TImode and V1TImode.
* config/i386/i386-features.c (scalar_chain::scalar_chain): Adjust.
(general_scalar_chain::vector_const_cost): Adjust for SImode
chains.
(general_scalar_chain::compute_convert_gain): Likewise. Add
{S,U}{MIN,MAX} support.
(general_scalar_chain::replace_with_subreg): Use vmode/smode.
(general_scalar_chain::make_vector_copies): Likewise. Handle
non-DImode chains appropriately.
(general_scalar_chain::convert_reg): Likewise.
(general_scalar_chain::convert_op): Likewise.
(general_scalar_chain::convert_insn): Likewise. Add
fatal_insn_not_found if the result is not recognized.
(convertible_comparison_p): Pass in the scalar mode and use that.
(general_scalar_to_vector_candidate_p): Likewise. Rename from
dimode_scalar_to_vector_candidate_p. Add {S,U}{MIN,MAX} support.
(scalar_to_vector_candidate_p): Remove by inlining into single
caller.
(general_remove_non_convertible_regs): Rename from
dimode_remove_non_convertible_regs.
(remove_non_convertible_regs): Remove by inlining into single caller.
(convert_scalars_to_vector): Handle SImode and DImode chains
in addition to TImode chains.
* config/i386/i386.md (<maxmin><MAXMIN_IMODE>3): New expander.
(*<maxmin><MAXMIN_IMODE>3_1): New insn-and-split.
(*<maxmin>di3_doubleword): Likewise.
2019-08-14 Richard Sandiford <richard.sandiford@arm.com> 2019-08-14 Richard Sandiford <richard.sandiford@arm.com>
Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org> Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org>
......
...@@ -127,11 +127,16 @@ namespace { ...@@ -127,11 +127,16 @@ namespace {
class scalar_chain class scalar_chain
{ {
public: public:
scalar_chain (); scalar_chain (enum machine_mode, enum machine_mode);
virtual ~scalar_chain (); virtual ~scalar_chain ();
static unsigned max_id; static unsigned max_id;
/* Scalar mode. */
enum machine_mode smode;
/* Vector mode. */
enum machine_mode vmode;
/* ID of a chain. */ /* ID of a chain. */
unsigned int chain_id; unsigned int chain_id;
/* A queue of instructions to be included into a chain. */ /* A queue of instructions to be included into a chain. */
...@@ -159,9 +164,11 @@ class scalar_chain ...@@ -159,9 +164,11 @@ class scalar_chain
virtual void convert_registers () = 0; virtual void convert_registers () = 0;
}; };
class dimode_scalar_chain : public scalar_chain class general_scalar_chain : public scalar_chain
{ {
public: public:
general_scalar_chain (enum machine_mode smode_, enum machine_mode vmode_)
: scalar_chain (smode_, vmode_) {}
int compute_convert_gain (); int compute_convert_gain ();
private: private:
void mark_dual_mode_def (df_ref def); void mark_dual_mode_def (df_ref def);
...@@ -178,6 +185,8 @@ class dimode_scalar_chain : public scalar_chain ...@@ -178,6 +185,8 @@ class dimode_scalar_chain : public scalar_chain
class timode_scalar_chain : public scalar_chain class timode_scalar_chain : public scalar_chain
{ {
public: public:
timode_scalar_chain () : scalar_chain (TImode, V1TImode) {}
/* Convert from TImode to V1TImode is always faster. */ /* Convert from TImode to V1TImode is always faster. */
int compute_convert_gain () { return 1; } int compute_convert_gain () { return 1; }
......
...@@ -17719,6 +17719,110 @@ ...@@ -17719,6 +17719,110 @@
(match_operand:SWI 3 "const_int_operand")] (match_operand:SWI 3 "const_int_operand")]
"" ""
"if (ix86_expand_int_addcc (operands)) DONE; else FAIL;") "if (ix86_expand_int_addcc (operands)) DONE; else FAIL;")
;; min/max patterns
(define_mode_iterator MAXMIN_IMODE
[(SI "TARGET_SSE4_1") (DI "TARGET_AVX512VL")])
(define_code_attr maxmin_rel
[(smax "GE") (smin "LE") (umax "GEU") (umin "LEU")])
(define_expand "<code><mode>3"
[(parallel
[(set (match_operand:MAXMIN_IMODE 0 "register_operand")
(maxmin:MAXMIN_IMODE
(match_operand:MAXMIN_IMODE 1 "register_operand")
(match_operand:MAXMIN_IMODE 2 "nonimmediate_operand")))
(clobber (reg:CC FLAGS_REG))])]
"TARGET_STV")
(define_insn_and_split "*<code><mode>3_1"
[(set (match_operand:MAXMIN_IMODE 0 "register_operand")
(maxmin:MAXMIN_IMODE
(match_operand:MAXMIN_IMODE 1 "register_operand")
(match_operand:MAXMIN_IMODE 2 "nonimmediate_operand")))
(clobber (reg:CC FLAGS_REG))]
"(TARGET_64BIT || <MODE>mode != DImode) && TARGET_STV
&& can_create_pseudo_p ()"
"#"
"&& 1"
[(set (match_dup 0)
(if_then_else:MAXMIN_IMODE (match_dup 3)
(match_dup 1)
(match_dup 2)))]
{
machine_mode mode = <MODE>mode;
if (!register_operand (operands[2], mode))
operands[2] = force_reg (mode, operands[2]);
enum rtx_code code = <maxmin_rel>;
machine_mode cmpmode = SELECT_CC_MODE (code, operands[1], operands[2]);
rtx flags = gen_rtx_REG (cmpmode, FLAGS_REG);
rtx tmp = gen_rtx_COMPARE (cmpmode, operands[1], operands[2]);
emit_insn (gen_rtx_SET (flags, tmp));
operands[3] = gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
})
(define_insn_and_split "*<code>di3_doubleword"
[(set (match_operand:DI 0 "register_operand")
(maxmin:DI (match_operand:DI 1 "register_operand")
(match_operand:DI 2 "nonimmediate_operand")))
(clobber (reg:CC FLAGS_REG))]
"!TARGET_64BIT && TARGET_STV && TARGET_AVX512VL
&& can_create_pseudo_p ()"
"#"
"&& 1"
[(set (match_dup 0)
(if_then_else:SI (match_dup 6)
(match_dup 1)
(match_dup 2)))
(set (match_dup 3)
(if_then_else:SI (match_dup 6)
(match_dup 4)
(match_dup 5)))]
{
if (!register_operand (operands[2], DImode))
operands[2] = force_reg (DImode, operands[2]);
split_double_mode (DImode, &operands[0], 3, &operands[0], &operands[3]);
rtx cmplo[2] = { operands[1], operands[2] };
rtx cmphi[2] = { operands[4], operands[5] };
enum rtx_code code = <maxmin_rel>;
switch (code)
{
case LE: case LEU:
std::swap (cmplo[0], cmplo[1]);
std::swap (cmphi[0], cmphi[1]);
code = swap_condition (code);
/* FALLTHRU */
case GE: case GEU:
{
bool uns = (code == GEU);
rtx (*sbb_insn) (machine_mode, rtx, rtx, rtx)
= uns ? gen_sub3_carry_ccc : gen_sub3_carry_ccgz;
emit_insn (gen_cmp_1 (SImode, cmplo[0], cmplo[1]));
rtx tmp = gen_rtx_SCRATCH (SImode);
emit_insn (sbb_insn (SImode, tmp, cmphi[0], cmphi[1]));
rtx flags = gen_rtx_REG (uns ? CCCmode : CCGZmode, FLAGS_REG);
operands[6] = gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
break;
}
default:
gcc_unreachable ();
}
})
;; Misc patterns (?) ;; Misc patterns (?)
......
2019-08-14 Richard Biener <rguenther@suse.de>
PR target/91154
* gcc.target/i386/pr91154.c: New testcase.
* gcc.target/i386/minmax-3.c: Likewise.
* gcc.target/i386/minmax-4.c: Likewise.
* gcc.target/i386/minmax-5.c: Likewise.
* gcc.target/i386/minmax-6.c: Likewise.
* gcc.target/i386/minmax-1.c: Add -mno-stv.
* gcc.target/i386/minmax-2.c: Likewise.
2019-08-14 Richard Sandiford <richard.sandiford@arm.com> 2019-08-14 Richard Sandiford <richard.sandiford@arm.com>
Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org> Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org>
......
/* { dg-do compile } */ /* { dg-do compile } */
/* { dg-options "-O2 -march=opteron" } */ /* { dg-options "-O2 -march=opteron -mno-stv" } */
/* { dg-final { scan-assembler "test" } } */ /* { dg-final { scan-assembler "test" } } */
/* { dg-final { scan-assembler-not "cmp" } } */ /* { dg-final { scan-assembler-not "cmp" } } */
#define max(a,b) (((a) > (b))? (a) : (b)) #define max(a,b) (((a) > (b))? (a) : (b))
......
/* { dg-do compile } */ /* { dg-do compile } */
/* { dg-options "-O2" } */ /* { dg-options "-O2 -mno-stv" } */
/* { dg-final { scan-assembler "test" } } */ /* { dg-final { scan-assembler "test" } } */
/* { dg-final { scan-assembler-not "cmp" } } */ /* { dg-final { scan-assembler-not "cmp" } } */
#define max(a,b) (((a) > (b))? (a) : (b)) #define max(a,b) (((a) > (b))? (a) : (b))
......
/* { dg-do compile } */
/* { dg-options "-O2 -mstv" } */
#define max(a,b) (((a) > (b))? (a) : (b))
#define min(a,b) (((a) < (b))? (a) : (b))
int ssi[1024];
unsigned int usi[1024];
long long sdi[1024];
unsigned long long udi[1024];
#define CHECK(FN, VARIANT) \
void \
FN ## VARIANT (void) \
{ \
for (int i = 1; i < 1024; ++i) \
VARIANT[i] = FN(VARIANT[i-1], VARIANT[i]); \
}
CHECK(max, ssi);
CHECK(min, ssi);
CHECK(max, usi);
CHECK(min, usi);
CHECK(max, sdi);
CHECK(min, sdi);
CHECK(max, udi);
CHECK(min, udi);
/* { dg-do compile } */
/* { dg-options "-O2 -mstv -msse4.1" } */
#include "minmax-3.c"
/* { dg-final { scan-assembler-times "pmaxsd" 1 } } */
/* { dg-final { scan-assembler-times "pmaxud" 1 } } */
/* { dg-final { scan-assembler-times "pminsd" 1 } } */
/* { dg-final { scan-assembler-times "pminud" 1 } } */
/* { dg-do compile } */
/* { dg-options "-O2 -mstv -mavx512vl" } */
#include "minmax-3.c"
/* { dg-final { scan-assembler-times "vpmaxsd" 1 } } */
/* { dg-final { scan-assembler-times "vpmaxud" 1 } } */
/* { dg-final { scan-assembler-times "vpminsd" 1 } } */
/* { dg-final { scan-assembler-times "vpminud" 1 } } */
/* { dg-final { scan-assembler-times "vpmaxsq" 1 { target lp64 } } } */
/* { dg-final { scan-assembler-times "vpmaxuq" 1 { target lp64 } } } */
/* { dg-final { scan-assembler-times "vpminsq" 1 { target lp64 } } } */
/* { dg-final { scan-assembler-times "vpminuq" 1 { target lp64 } } } */
/* { dg-do compile } */
/* { dg-options "-O2 -march=haswell" } */
unsigned short
UMVLine16Y_11 (short unsigned int * Pic, int y, int width)
{
if (y != width)
{
y = y < 0 ? 0 : y;
return Pic[y * width];
}
return Pic[y];
}
/* We do not want the RA to spill %esi for it's dual-use but using
pmaxsd is OK. */
/* { dg-final { scan-assembler-not "rsp" { target { ! { ia32 } } } } } */
/* { dg-final { scan-assembler "pmaxsd" } } */
/* { dg-do compile } */
/* { dg-options "-O2 -msse4.1 -mstv" } */
void foo (int *dc, int *mc, int *tpdd, int *tpmd, int M)
{
int sc;
int k;
for (k = 1; k <= M; k++)
{
dc[k] = dc[k-1] + tpdd[k-1];
if ((sc = mc[k-1] + tpmd[k-1]) > dc[k]) dc[k] = sc;
if (dc[k] < -987654321) dc[k] = -987654321;
}
}
/* We want to convert the loop to SSE since SSE pmaxsd is faster than
compare + conditional move. */
/* { dg-final { scan-assembler-not "cmov" } } */
/* { dg-final { scan-assembler-times "pmaxsd" 2 } } */
/* { dg-final { scan-assembler-times "paddd" 2 } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment