i386.c (ix86_size_cost, [...]): Set reassociation width to 1.

* i386.c (ix86_size_cost, i386_cost, i486_cost, pentium_cost, lakemont_cost, pentiumpro_cost, geode_cost, k6_cost, athlon_cost, k8_cost, amdfam10_cost, btver1_cost, btver2_cost, pentium4_cost, nocona_cost): Set reassociation width to 1. (bdver1_cost, bdver2_cost, bdver3_cost, bdver4_cost): Set reassociation width to 2 for fp operations and 1 otherwise. (znver1_cost): Set scalar reassoc width to 4 and vector to 3 and 6 for int and fp. (atom_cost): Set reassociation width to 2. (slm_cost, generic_cost): Set fp reassociation width to 2 and 1 otherwise. (intel_cost): Set fp reassociation width to 4 and 1 otherwise. (core_cost): Set fp reassociation width to 4 and vector to 2. (ix86_reassociation_width): Rewrite using cost table; special case plus/minus on Zen; honor X86_TUNE_SSE_SPLIT_REGS and TARGET_AVX128_OPTIMAL. * i386.h (processor_costs): Add reassoc_int, reassoc_fp, reassoc_vec_int, reassoc_vec_fp. (TARGET_VECTOR_PARALLEL_EXECUTION, TARGET_REASSOC_INT_TO_PARALLEL, TARGET_REASSOC_FP_TO_PARALLEL): Remove. * x86-tune.def (X86_TUNE_REASSOC_INT_TO_PARALLEL): Remove. (X86_TUNE_REASSOC_FP_TO_PARALLEL): Remove. (X86_TUNE_VECTOR_PARALLEL_EXECUTION): Remove. From-SVN: r253448

i386.c (ix86_size_cost, [...]): Set reassociation width to 1.
* i386.c (ix86_size_cost, i386_cost, i486_cost, pentium_cost, lakemont_cost, pentiumpro_cost, geode_cost, k6_cost, athlon_cost, k8_cost, amdfam10_cost, btver1_cost, btver2_cost, pentium4_cost, nocona_cost): Set reassociation width to 1. (bdver1_cost, bdver2_cost, bdver3_cost, bdver4_cost): Set reassociation width to 2 for fp operations and 1 otherwise. (znver1_cost): Set scalar reassoc width to 4 and vector to 3 and 6 for int and fp. (atom_cost): Set reassociation width to 2. (slm_cost, generic_cost): Set fp reassociation width to 2 and 1 otherwise. (intel_cost): Set fp reassociation width to 4 and 1 otherwise. (core_cost): Set fp reassociation width to 4 and vector to 2. (ix86_reassociation_width): Rewrite using cost table; special case plus/minus on Zen; honor X86_TUNE_SSE_SPLIT_REGS and TARGET_AVX128_OPTIMAL. * i386.h (processor_costs): Add reassoc_int, reassoc_fp, reassoc_vec_int, reassoc_vec_fp. (TARGET_VECTOR_PARALLEL_EXECUTION, TARGET_REASSOC_INT_TO_PARALLEL, TARGET_REASSOC_FP_TO_PARALLEL): Remove. * x86-tune.def (X86_TUNE_REASSOC_INT_TO_PARALLEL): Remove. (X86_TUNE_REASSOC_FP_TO_PARALLEL): Remove. (X86_TUNE_VECTOR_PARALLEL_EXECUTION): Remove. From-SVN: r253448
a813c280 · Jan Hubicka · Jan Hubicka · 807e3be2 · a813c280 · a813c280
Commit a813c280 authored Oct 05, 2017 by Jan Hubicka Committed by Jan Hubicka Oct 05, 2017
Expand all Hide whitespace changes
Inline Side-by-side

Showing with 97 additions and 48 deletions

gcc/ChangeLog
+25 -0

gcc/config/i386/i386.c
+65 -27

gcc/config/i386/i386.h
+7 -6

gcc/config/i386/x86-tune.def
+0 -15

No files found.
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
+2017-10-05  Jan Hubicka <hubicka@ucw.cz>
+
+	* i386.c (ix86_size_cost, i386_cost, i486_cost, pentium_cost,
+	lakemont_cost, pentiumpro_cost, geode_cost, k6_cost,
+	athlon_cost, k8_cost, amdfam10_cost, btver1_cost, btver2_cost,
+	pentium4_cost, nocona_cost): Set reassociation width to 1.
+	(bdver1_cost, bdver2_cost, bdver3_cost, bdver4_cost): Set reassociation
+	width to 2 for fp operations and 1 otherwise.
+	(znver1_cost): Set scalar reassoc width to 4 and vector to 3 and 6
+	for int and fp.
+	(atom_cost): Set reassociation width to 2.
+	(slm_cost, generic_cost): Set fp reassociation width to 2 and 1 otherwise.
+	(intel_cost): Set fp reassociation width to 4 and 1 otherwise.
+	(core_cost): Set fp reassociation width to 4 and vector to 2.
+	(ix86_reassociation_width): Rewrite using cost table; special case
+	plus/minus on Zen; honor X86_TUNE_SSE_SPLIT_REGS
+	and TARGET_AVX128_OPTIMAL.
+	* i386.h (processor_costs): Add
+	reassoc_int, reassoc_fp, reassoc_vec_int, reassoc_vec_fp.
+	(TARGET_VECTOR_PARALLEL_EXECUTION, TARGET_REASSOC_INT_TO_PARALLEL,
+	TARGET_REASSOC_FP_TO_PARALLEL): Remove.
+	* x86-tune.def (X86_TUNE_REASSOC_INT_TO_PARALLEL): Remove.
+	(X86_TUNE_REASSOC_FP_TO_PARALLEL): Remove.
+	(X86_TUNE_VECTOR_PARALLEL_EXECUTION):  Remove.
+
 2017-10-05  Nathan Sidwell  <nathan@acm.org>

 	* doc/invoke.texi (Wparentheses): Document C++ MVP behaviour.
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -257,6 +257,13 @@ struct processor_costs {
  const int fsqrt;		/* cost of FSQRT instruction.  */
 				/* Specify what algorithm
 				   to use for stringops on unknown size.  */
+  const int reassoc_int, reassoc_fp, reassoc_vec_int, reassoc_vec_fp;
+				/* Specify reassociation width for integer,
+				   fp, vector integer and vector fp
+				   operations.  Generally should correspond
+				   to number of instructions executed in
+				   parallel.  See also
+				   ix86_reassociation_width.  */
  struct stringop_algs *memcpy, *memset;
  const int scalar_stmt_cost;   /* Cost of any scalar operation, excluding
 				   load and store.  */
@@ -466,8 +473,6 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
 	ix86_tune_features[X86_TUNE_USE_VECTOR_CONVERTS]
 #define TARGET_SLOW_PSHUFB \
 	ix86_tune_features[X86_TUNE_SLOW_PSHUFB]
-#define TARGET_VECTOR_PARALLEL_EXECUTION \
-	ix86_tune_features[X86_TUNE_VECTOR_PARALLEL_EXECUTION]
 #define TARGET_AVOID_4BYTE_PREFIXES \
 	ix86_tune_features[X86_TUNE_AVOID_4BYTE_PREFIXES]
 #define TARGET_FUSE_CMP_AND_BRANCH_32 \
@@ -488,10 +493,6 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
 	ix86_tune_features[X86_TUNE_SOFTWARE_PREFETCHING_BENEFICIAL]
 #define TARGET_AVX128_OPTIMAL \
 	ix86_tune_features[X86_TUNE_AVX128_OPTIMAL]
-#define TARGET_REASSOC_INT_TO_PARALLEL \
-	ix86_tune_features[X86_TUNE_REASSOC_INT_TO_PARALLEL]
-#define TARGET_REASSOC_FP_TO_PARALLEL \
-	ix86_tune_features[X86_TUNE_REASSOC_FP_TO_PARALLEL]
 #define TARGET_GENERAL_REGS_SSE_SPILL \
 	ix86_tune_features[X86_TUNE_GENERAL_REGS_SSE_SPILL]
 #define TARGET_AVOID_MEM_OPND_FOR_CMOVE \

--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -117,16 +117,6 @@ DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS, "fuse_cmp_and_branch_soflags",
 DEF_TUNE (X86_TUNE_FUSE_ALU_AND_BRANCH, "fuse_alu_and_branch",
          m_SANDYBRIDGE | m_HASWELL)

-/* X86_TUNE_REASSOC_INT_TO_PARALLEL: Try to produce parallel computations
-   during reassociation of integer computation.  */
-DEF_TUNE (X86_TUNE_REASSOC_INT_TO_PARALLEL, "reassoc_int_to_parallel",
-          m_BONNELL)
-
-/* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
-   during reassociation of fp computation.  */
-DEF_TUNE (X86_TUNE_REASSOC_FP_TO_PARALLEL, "reassoc_fp_to_parallel",
-          m_BONNELL | m_SILVERMONT | m_HASWELL | m_KNL | m_KNM |m_INTEL | m_BDVER1
-	  | m_BDVER2 | m_ZNVER1 | m_GENERIC)

 /*****************************************************************************/
 /* Function prologue, epilogue and function calling sequences.               */
@@ -391,11 +381,6 @@ DEF_TUNE (X86_TUNE_USE_VECTOR_CONVERTS, "use_vector_converts", m_AMDFAM10)
 DEF_TUNE (X86_TUNE_SLOW_PSHUFB, "slow_pshufb",
          m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL)

-/* X86_TUNE_VECTOR_PARALLEL_EXECUTION: Indicates tunings with ability to
-   execute 2 or more vector instructions in parallel.  */
-DEF_TUNE (X86_TUNE_VECTOR_PARALLEL_EXECUTION, "vec_parallel",
-          m_NEHALEM | m_SANDYBRIDGE | m_HASWELL)
-
 /* X86_TUNE_AVOID_4BYTE_PREFIXES: Avoid instructions requiring 4+ bytes of prefixes.  */
 DEF_TUNE (X86_TUNE_AVOID_4BYTE_PREFIXES, "avoid_4byte_prefixes",
          m_SILVERMONT | m_INTEL)