Commit 4ca47ced by Jan Hubicka Committed by Jan Hubicka

re PR target/81616 (Update -mtune=generic for the current Intel and AMD processors)


	PR target/81616
	* x86-tune.def: Remove obsolette FIXMEs.
	(X86_TUNE_PARTIAL_FLAG_REG_STALL): Disable for generic
	(X86_TUNE_FUSE_CMP_AND_BRANCH_32, X86_TUNE_FUSE_CMP_AND_BRANCH_64,
	X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS, X86_TUNE_FUSE_ALU_AND_BRANCH):
	Enable for generic.
	(X86_TUNE_PAD_RETURNS): Disable for generic.
	* gcc.target/i386/pad-1.c: Compile for amdfam10.
	* gcc.target/i386/align-limit.c: Likewise.

From-SVN: r255357
parent 30fa8e9c
2017-12-01 Jan Hubicka <hubicka@ucw.cz>
PR target/81616
* x86-tune.def: Remove obsolette FIXMEs.
(X86_TUNE_PARTIAL_FLAG_REG_STALL): Disable for generic
(X86_TUNE_FUSE_CMP_AND_BRANCH_32, X86_TUNE_FUSE_CMP_AND_BRANCH_64,
X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS, X86_TUNE_FUSE_ALU_AND_BRANCH):
Enable for generic.
(X86_TUNE_PAD_RETURNS): Disable for generic.
2017-12-02 Jakub Jelinek <jakub@redhat.com> 2017-12-02 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/83170 PR tree-optimization/83170
...@@ -75,12 +75,10 @@ DEF_TUNE (X86_TUNE_SSE_SPLIT_REGS, "sse_split_regs", m_ATHLON_K8) ...@@ -75,12 +75,10 @@ DEF_TUNE (X86_TUNE_SSE_SPLIT_REGS, "sse_split_regs", m_ATHLON_K8)
setting full flags. setting full flags.
The flags does not affect generation of INC and DEC that is controlled The flags does not affect generation of INC and DEC that is controlled
by X86_TUNE_USE_INCDEC. by X86_TUNE_USE_INCDEC. */
This flag may be dropped from generic once core2-corei5 machines are
rare enough. */
DEF_TUNE (X86_TUNE_PARTIAL_FLAG_REG_STALL, "partial_flag_reg_stall", DEF_TUNE (X86_TUNE_PARTIAL_FLAG_REG_STALL, "partial_flag_reg_stall",
m_CORE2 | m_GENERIC) m_CORE2)
/* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
partial dependencies. */ partial dependencies. */
...@@ -96,28 +94,26 @@ DEF_TUNE (X86_TUNE_MEMORY_MISMATCH_STALL, "memory_mismatch_stall", ...@@ -96,28 +94,26 @@ DEF_TUNE (X86_TUNE_MEMORY_MISMATCH_STALL, "memory_mismatch_stall",
| m_KNL | m_KNM | m_AMD_MULTIPLE | m_GENERIC) | m_KNL | m_KNM | m_AMD_MULTIPLE | m_GENERIC)
/* X86_TUNE_FUSE_CMP_AND_BRANCH_32: Fuse compare with a subsequent /* X86_TUNE_FUSE_CMP_AND_BRANCH_32: Fuse compare with a subsequent
conditional jump instruction for 32 bit TARGET. conditional jump instruction for 32 bit TARGET. */
FIXME: revisit for generic. */
DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_32, "fuse_cmp_and_branch_32", DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_32, "fuse_cmp_and_branch_32",
m_CORE_ALL | m_BDVER | m_ZNVER1) m_CORE_ALL | m_BDVER | m_ZNVER1 | m_GENERIC)
/* X86_TUNE_FUSE_CMP_AND_BRANCH_64: Fuse compare with a subsequent /* X86_TUNE_FUSE_CMP_AND_BRANCH_64: Fuse compare with a subsequent
conditional jump instruction for TARGET_64BIT. conditional jump instruction for TARGET_64BIT. */
FIXME: revisit for generic. */
DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_64, "fuse_cmp_and_branch_64", DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_64, "fuse_cmp_and_branch_64",
m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_BDVER | m_ZNVER1) m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_BDVER | m_ZNVER1 | m_GENERIC)
/* X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS: Fuse compare with a /* X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS: Fuse compare with a
subsequent conditional jump instruction when the condition jump subsequent conditional jump instruction when the condition jump
check sign flag (SF) or overflow flag (OF). */ check sign flag (SF) or overflow flag (OF). */
DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS, "fuse_cmp_and_branch_soflags", DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS, "fuse_cmp_and_branch_soflags",
m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_BDVER | m_ZNVER1) m_NEHALEM | m_SANDYBRIDGE | m_HASWELL | m_BDVER | m_ZNVER1 | m_GENERIC)
/* X86_TUNE_FUSE_ALU_AND_BRANCH: Fuse alu with a subsequent conditional /* X86_TUNE_FUSE_ALU_AND_BRANCH: Fuse alu with a subsequent conditional
jump instruction when the alu instruction produces the CCFLAG consumed by jump instruction when the alu instruction produces the CCFLAG consumed by
the conditional jump instruction. */ the conditional jump instruction. */
DEF_TUNE (X86_TUNE_FUSE_ALU_AND_BRANCH, "fuse_alu_and_branch", DEF_TUNE (X86_TUNE_FUSE_ALU_AND_BRANCH, "fuse_alu_and_branch",
m_SANDYBRIDGE | m_HASWELL) m_SANDYBRIDGE | m_HASWELL | m_GENERIC)
/*****************************************************************************/ /*****************************************************************************/
...@@ -130,12 +126,8 @@ DEF_TUNE (X86_TUNE_FUSE_ALU_AND_BRANCH, "fuse_alu_and_branch", ...@@ -130,12 +126,8 @@ DEF_TUNE (X86_TUNE_FUSE_ALU_AND_BRANCH, "fuse_alu_and_branch",
This increase code size by about 5% in 32bit mode, less so in 64bit mode This increase code size by about 5% in 32bit mode, less so in 64bit mode
because parameters are passed in registers. It is considerable because parameters are passed in registers. It is considerable
win for targets without stack engine that prevents multple push operations win for targets without stack engine that prevents multple push operations
to happen in parallel. to happen in parallel. */
FIXME: the flags is incorrectly enabled for amdfam10, Bulldozer,
Bobcat and Generic. This is because disabling it causes large
regression on mgrid due to IRA limitation leading to unecessary
use of the frame pointer in 32bit mode. */
DEF_TUNE (X86_TUNE_ACCUMULATE_OUTGOING_ARGS, "accumulate_outgoing_args", DEF_TUNE (X86_TUNE_ACCUMULATE_OUTGOING_ARGS, "accumulate_outgoing_args",
m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL
| m_ATHLON_K8) | m_ATHLON_K8)
...@@ -194,7 +186,7 @@ DEF_TUNE (X86_TUNE_PAD_SHORT_FUNCTION, "pad_short_function", m_BONNELL) ...@@ -194,7 +186,7 @@ DEF_TUNE (X86_TUNE_PAD_SHORT_FUNCTION, "pad_short_function", m_BONNELL)
architecture expect at most one jump per 2 byte window. Failing to architecture expect at most one jump per 2 byte window. Failing to
pad returns leads to misaligned return stack. */ pad returns leads to misaligned return stack. */
DEF_TUNE (X86_TUNE_PAD_RETURNS, "pad_returns", DEF_TUNE (X86_TUNE_PAD_RETURNS, "pad_returns",
m_ATHLON_K8 | m_AMDFAM10 | m_GENERIC) m_ATHLON_K8 | m_AMDFAM10)
/* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
than 4 branch instructions in the 16 byte window. */ than 4 branch instructions in the 16 byte window. */
...@@ -380,8 +372,7 @@ DEF_TUNE (X86_TUNE_INTER_UNIT_MOVES_FROM_VEC, "inter_unit_moves_from_vec", ...@@ -380,8 +372,7 @@ DEF_TUNE (X86_TUNE_INTER_UNIT_MOVES_FROM_VEC, "inter_unit_moves_from_vec",
~m_ATHLON_K8) ~m_ATHLON_K8)
/* X86_TUNE_INTER_UNIT_CONVERSIONS: Enable float<->integer conversions /* X86_TUNE_INTER_UNIT_CONVERSIONS: Enable float<->integer conversions
to use both SSE and integer registers at a same time. to use both SSE and integer registers at a same time. */
FIXME: revisit importance of this for generic. */
DEF_TUNE (X86_TUNE_INTER_UNIT_CONVERSIONS, "inter_unit_conversions", DEF_TUNE (X86_TUNE_INTER_UNIT_CONVERSIONS, "inter_unit_conversions",
~(m_AMDFAM10 | m_BDVER)) ~(m_AMDFAM10 | m_BDVER))
......
2017-12-01 Jan Hubicka <hubicka@ucw.cz>
PR target/81616
* gcc.target/i386/pad-1.c: Compile for amdfam10.
* gcc.target/i386/align-limit.c: Likewise.
2017-12-02 Jakub Jelinek <jakub@redhat.com> 2017-12-02 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/83170 PR tree-optimization/83170
......
/* { dg-do compile } */ /* { dg-do compile } */
/* { dg-options "-O2 -falign-functions=64 -flimit-function-alignment" } */ /* { dg-options "-O2 -falign-functions=64 -flimit-function-alignment -march=amdfam10" } */
/* { dg-final { scan-assembler ".p2align 6,,1" } } */ /* { dg-final { scan-assembler ".p2align 6,,1" } } */
/* { dg-final { scan-assembler-not ".p2align 6,,63" } } */ /* { dg-final { scan-assembler-not ".p2align 6,,63" } } */
......
/* { dg-do compile } */ /* { dg-do compile } */
/* { dg-options "-O2 -fomit-frame-pointer -mtune=generic" } */ /* { dg-options "-O2 -fomit-frame-pointer -mtune=amdfam10" } */
/* { dg-final { scan-assembler "rep" { target { ! x86_64-*-mingw* } } } } */ /* { dg-final { scan-assembler "rep" { target { ! x86_64-*-mingw* } } } } */
/* { dg-final { scan-assembler-not "nop" } } */ /* { dg-final { scan-assembler-not "nop" } } */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment