Commit a813c280 by Jan Hubicka Committed by Jan Hubicka

i386.c (ix86_size_cost, [...]): Set reassociation width to 1.

	* i386.c (ix86_size_cost, i386_cost, i486_cost, pentium_cost,
	lakemont_cost, pentiumpro_cost, geode_cost, k6_cost,
	athlon_cost, k8_cost, amdfam10_cost, btver1_cost, btver2_cost,
	pentium4_cost, nocona_cost): Set reassociation width to 1.
	(bdver1_cost, bdver2_cost, bdver3_cost, bdver4_cost): Set reassociation
	width to 2 for fp operations and 1 otherwise.
	(znver1_cost): Set scalar reassoc width to 4 and vector to 3 and 6
	for int and fp.
	(atom_cost): Set reassociation width to 2.
	(slm_cost, generic_cost): Set fp reassociation width to 2 and 1 otherwise.
	(intel_cost): Set fp reassociation width to 4 and 1 otherwise.
	(core_cost): Set fp reassociation width to 4 and vector to 2.
	(ix86_reassociation_width): Rewrite using cost table; special case
	plus/minus on Zen; honor X86_TUNE_SSE_SPLIT_REGS
	and TARGET_AVX128_OPTIMAL.
	* i386.h (processor_costs): Add
	reassoc_int, reassoc_fp, reassoc_vec_int, reassoc_vec_fp.
	(TARGET_VECTOR_PARALLEL_EXECUTION, TARGET_REASSOC_INT_TO_PARALLEL,
	TARGET_REASSOC_FP_TO_PARALLEL): Remove.
	* x86-tune.def (X86_TUNE_REASSOC_INT_TO_PARALLEL): Remove.
	(X86_TUNE_REASSOC_FP_TO_PARALLEL): Remove.
	(X86_TUNE_VECTOR_PARALLEL_EXECUTION):  Remove.

From-SVN: r253448
parent 807e3be2
2017-10-05 Jan Hubicka <hubicka@ucw.cz>
* i386.c (ix86_size_cost, i386_cost, i486_cost, pentium_cost,
lakemont_cost, pentiumpro_cost, geode_cost, k6_cost,
athlon_cost, k8_cost, amdfam10_cost, btver1_cost, btver2_cost,
pentium4_cost, nocona_cost): Set reassociation width to 1.
(bdver1_cost, bdver2_cost, bdver3_cost, bdver4_cost): Set reassociation
width to 2 for fp operations and 1 otherwise.
(znver1_cost): Set scalar reassoc width to 4 and vector to 3 and 6
for int and fp.
(atom_cost): Set reassociation width to 2.
(slm_cost, generic_cost): Set fp reassociation width to 2 and 1 otherwise.
(intel_cost): Set fp reassociation width to 4 and 1 otherwise.
(core_cost): Set fp reassociation width to 4 and vector to 2.
(ix86_reassociation_width): Rewrite using cost table; special case
plus/minus on Zen; honor X86_TUNE_SSE_SPLIT_REGS
and TARGET_AVX128_OPTIMAL.
* i386.h (processor_costs): Add
reassoc_int, reassoc_fp, reassoc_vec_int, reassoc_vec_fp.
(TARGET_VECTOR_PARALLEL_EXECUTION, TARGET_REASSOC_INT_TO_PARALLEL,
TARGET_REASSOC_FP_TO_PARALLEL): Remove.
* x86-tune.def (X86_TUNE_REASSOC_INT_TO_PARALLEL): Remove.
(X86_TUNE_REASSOC_FP_TO_PARALLEL): Remove.
(X86_TUNE_VECTOR_PARALLEL_EXECUTION): Remove.
2017-10-05 Nathan Sidwell <nathan@acm.org>
* doc/invoke.texi (Wparentheses): Document C++ MVP behaviour.
......@@ -257,6 +257,13 @@ struct processor_costs {
const int fsqrt; /* cost of FSQRT instruction. */
/* Specify what algorithm
to use for stringops on unknown size. */
const int reassoc_int, reassoc_fp, reassoc_vec_int, reassoc_vec_fp;
/* Specify reassociation width for integer,
fp, vector integer and vector fp
operations. Generally should correspond
to number of instructions executed in
parallel. See also
ix86_reassociation_width. */
struct stringop_algs *memcpy, *memset;
const int scalar_stmt_cost; /* Cost of any scalar operation, excluding
load and store. */
......@@ -466,8 +473,6 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
ix86_tune_features[X86_TUNE_USE_VECTOR_CONVERTS]
#define TARGET_SLOW_PSHUFB \
ix86_tune_features[X86_TUNE_SLOW_PSHUFB]
#define TARGET_VECTOR_PARALLEL_EXECUTION \
ix86_tune_features[X86_TUNE_VECTOR_PARALLEL_EXECUTION]
#define TARGET_AVOID_4BYTE_PREFIXES \
ix86_tune_features[X86_TUNE_AVOID_4BYTE_PREFIXES]
#define TARGET_FUSE_CMP_AND_BRANCH_32 \
......@@ -488,10 +493,6 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
ix86_tune_features[X86_TUNE_SOFTWARE_PREFETCHING_BENEFICIAL]
#define TARGET_AVX128_OPTIMAL \
ix86_tune_features[X86_TUNE_AVX128_OPTIMAL]
#define TARGET_REASSOC_INT_TO_PARALLEL \
ix86_tune_features[X86_TUNE_REASSOC_INT_TO_PARALLEL]
#define TARGET_REASSOC_FP_TO_PARALLEL \
ix86_tune_features[X86_TUNE_REASSOC_FP_TO_PARALLEL]
#define TARGET_GENERAL_REGS_SSE_SPILL \
ix86_tune_features[X86_TUNE_GENERAL_REGS_SSE_SPILL]
#define TARGET_AVOID_MEM_OPND_FOR_CMOVE \
......
......@@ -117,16 +117,6 @@ DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS, "fuse_cmp_and_branch_soflags",
DEF_TUNE (X86_TUNE_FUSE_ALU_AND_BRANCH, "fuse_alu_and_branch",
m_SANDYBRIDGE | m_HASWELL)
/* X86_TUNE_REASSOC_INT_TO_PARALLEL: Try to produce parallel computations
during reassociation of integer computation. */
DEF_TUNE (X86_TUNE_REASSOC_INT_TO_PARALLEL, "reassoc_int_to_parallel",
m_BONNELL)
/* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
during reassociation of fp computation. */
DEF_TUNE (X86_TUNE_REASSOC_FP_TO_PARALLEL, "reassoc_fp_to_parallel",
m_BONNELL | m_SILVERMONT | m_HASWELL | m_KNL | m_KNM |m_INTEL | m_BDVER1
| m_BDVER2 | m_ZNVER1 | m_GENERIC)
/*****************************************************************************/
/* Function prologue, epilogue and function calling sequences. */
......@@ -391,11 +381,6 @@ DEF_TUNE (X86_TUNE_USE_VECTOR_CONVERTS, "use_vector_converts", m_AMDFAM10)
DEF_TUNE (X86_TUNE_SLOW_PSHUFB, "slow_pshufb",
m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL)
/* X86_TUNE_VECTOR_PARALLEL_EXECUTION: Indicates tunings with ability to
execute 2 or more vector instructions in parallel. */
DEF_TUNE (X86_TUNE_VECTOR_PARALLEL_EXECUTION, "vec_parallel",
m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
/* X86_TUNE_AVOID_4BYTE_PREFIXES: Avoid instructions requiring 4+ bytes of prefixes. */
DEF_TUNE (X86_TUNE_AVOID_4BYTE_PREFIXES, "avoid_4byte_prefixes",
m_SILVERMONT | m_INTEL)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment