pr79683.c: Disable costmodel.

* gcc.target/i386/pr79683.c: Disable costmodel. * i386.c (ix86_builtin_vectorization_cost): Use existing rtx_cost latencies instead of having separate table; make difference between integer and float costs. * i386.h (processor_costs): Remove scalar_stmt_cost, scalar_load_cost, scalar_store_cost, vec_stmt_cost, vec_to_scalar_cost, scalar_to_vec_cost, vec_align_load_cost, vec_unalign_load_cost, vec_store_cost. * x86-tune-costs.h: Remove entries which has been removed in procesor_costs from all tables; make cond_taken_branch_cost and cond_not_taken_branch_cost COST_N_INSNS based. Index: testsuite/gcc.target/i386/pr79683.c =================================================================== --- testsuite/gcc.target/i386/pr79683.c (revision 253957) +++ testsuite/gcc.target/i386/pr79683.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -msse2" } */ +/* { dg-options "-O3 -msse2 -fvect-cost-model=unlimited" } */ struct s { __INT64_TYPE__ a; Index: config/i386/i386.c =================================================================== --- config/i386/i386.c (revision 253957) +++ config/i386/i386.c (working copy) @@ -44051,37 +44051,61 @@ static int ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, tree vectype, int) { + bool fp = false; + machine_mode mode = TImode; + if (vectype != NULL) + { + fp = FLOAT_TYPE_P (vectype); + mode = TYPE_MODE (vectype); + } + switch (type_of_cost) { case scalar_stmt: - return ix86_cost->scalar_stmt_cost; + return fp ? ix86_cost->addss : COSTS_N_INSNS (1); case scalar_load: - return ix86_cost->scalar_load_cost; + /* load/store costs are relative to register move which is 2. Recompute + it to COSTS_N_INSNS so everything have same base. */ + return COSTS_N_INSNS (fp ? ix86_cost->sse_load[0] + : ix86_cost->int_load [2]) / 2; case scalar_store: - return ix86_cost->scalar_store_cost; + return COSTS_N_INSNS (fp ? ix86_cost->sse_store[0] + : ix86_cost->int_store [2]) / 2; case vector_stmt: - return ix86_cost->vec_stmt_cost; + return ix86_vec_cost (mode, + fp ? ix86_cost->addss : ix86_cost->sse_op, + true); case vector_load: - return ix86_cost->vec_align_load_cost; + return ix86_vec_cost (mode, + COSTS_N_INSNS (ix86_cost->sse_load[2]) / 2, + true); case vector_store: - return ix86_cost->vec_store_cost; + return ix86_vec_cost (mode, + COSTS_N_INSNS (ix86_cost->sse_store[2]) / 2, + true); case vec_to_scalar: - return ix86_cost->vec_to_scalar_cost; - case scalar_to_vec: - return ix86_cost->scalar_to_vec_cost; + return ix86_vec_cost (mode, ix86_cost->sse_op, true); + /* We should have separate costs for unaligned loads and gather/scatter. + Do that incrementally. */ case unaligned_load: - case unaligned_store: case vector_gather_load: + return ix86_vec_cost (mode, + COSTS_N_INSNS (ix86_cost->sse_load[2]), + true); + + case unaligned_store: case vector_scatter_store: - return ix86_cost->vec_unalign_load_cost; + return ix86_vec_cost (mode, + COSTS_N_INSNS (ix86_cost->sse_store[2]), + true); case cond_branch_taken: return ix86_cost->cond_taken_branch_cost; @@ -44091,10 +44115,11 @@ ix86_builtin_vectorization_cost (enum ve case vec_perm: case vec_promote_demote: - return ix86_cost->vec_stmt_cost; + return ix86_vec_cost (mode, + ix86_cost->sse_op, true); case vec_construct: - return ix86_cost->vec_stmt_cost * (TYPE_VECTOR_SUBPARTS (vectype) - 1); + return ix86_vec_cost (mode, ix86_cost->sse_op, false); default: gcc_unreachable (); Index: config/i386/i386.h =================================================================== --- config/i386/i386.h (revision 253957) +++ config/i386/i386.h (working copy) @@ -277,18 +277,6 @@ struct processor_costs { parallel. See also ix86_reassociation_width. */ struct stringop_algs *memcpy, *memset; - const int scalar_stmt_cost; /* Cost of any scalar operation, excluding - load and store. */ - const int scalar_load_cost; /* Cost of scalar load. */ - const int scalar_store_cost; /* Cost of scalar store. */ - const int vec_stmt_cost; /* Cost of any vector operation, excluding - load, store, vector-to-scalar and - scalar-to-vector operation. */ - const int vec_to_scalar_cost; /* Cost of vect-to-scalar operation. */ - const int scalar_to_vec_cost; /* Cost of scalar-to-vector operation. */ - const int vec_align_load_cost; /* Cost of aligned vector load. */ - const int vec_unalign_load_cost; /* Cost of unaligned vector load. */ - const int vec_store_cost; /* Cost of vector store. */ const int cond_taken_branch_cost; /* Cost of taken branch for vectorizer cost model. */ const int cond_not_taken_branch_cost;/* Cost of not taken branch for Index: config/i386/x86-tune-costs.h =================================================================== --- config/i386/x86-tune-costs.h (revision 253958) +++ config/i386/x86-tune-costs.h (working copy) @@ -79,17 +79,8 @@ struct processor_costs ix86_size_cost = 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ ix86_size_memcpy, ix86_size_memset, - 1, /* scalar_stmt_cost. */ - 1, /* scalar load_cost. */ - 1, /* scalar_store_cost. */ - 1, /* vec_stmt_cost. */ - 1, /* vec_to_scalar_cost. */ - 1, /* scalar_to_vec_cost. */ - 1, /* vec_align_load_cost. */ - 1, /* vec_unalign_load_cost. */ - 1, /* vec_store_cost. */ - 1, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ + COSTS_N_BYTES (1), /* cond_taken_branch_cost. */ + COSTS_N_BYTES (1), /* cond_not_taken_branch_cost. */ }; /* Processor costs (relative to an add) */ @@ -167,17 +158,8 @@ struct processor_costs i386_cost = { /* 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ i386_memcpy, i386_memset, - 1, /* scalar_stmt_cost. */ - 1, /* scalar load_cost. */ - 1, /* scalar_store_cost. */ - 1, /* vec_stmt_cost. */ - 1, /* vec_to_scalar_cost. */ - 1, /* scalar_to_vec_cost. */ - 1, /* vec_align_load_cost. */ - 2, /* vec_unalign_load_cost. */ - 1, /* vec_store_cost. */ - 3, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ + COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ + COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ }; static stringop_algs i486_memcpy[2] = { @@ -256,17 +238,8 @@ struct processor_costs i486_cost = { /* 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ i486_memcpy, i486_memset, - 1, /* scalar_stmt_cost. */ - 1, /* scalar load_cost. */ - 1, /* scalar_store_cost. */ - 1, /* vec_stmt_cost. */ - 1, /* vec_to_scalar_cost. */ - 1, /* scalar_to_vec_cost. */ - 1, /* vec_align_load_cost. */ - 2, /* vec_unalign_load_cost. */ - 1, /* vec_store_cost. */ - 3, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ + COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ + COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ }; static stringop_algs pentium_memcpy[2] = { @@ -343,17 +316,8 @@ struct processor_costs pentium_cost = { 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ pentium_memcpy, pentium_memset, - 1, /* scalar_stmt_cost. */ - 1, /* scalar load_cost. */ - 1, /* scalar_store_cost. */ - 1, /* vec_stmt_cost. */ - 1, /* vec_to_scalar_cost. */ - 1, /* scalar_to_vec_cost. */ - 1, /* vec_align_load_cost. */ - 2, /* vec_unalign_load_cost. */ - 1, /* vec_store_cost. */ - 3, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ + COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ + COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ }; static const @@ -423,17 +387,8 @@ struct processor_costs lakemont_cost = { 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ pentium_memcpy, pentium_memset, - 1, /* scalar_stmt_cost. */ - 1, /* scalar load_cost. */ - 1, /* scalar_store_cost. */ - 1, /* vec_stmt_cost. */ - 1, /* vec_to_scalar_cost. */ - 1, /* scalar_to_vec_cost. */ - 1, /* vec_align_load_cost. */ - 2, /* vec_unalign_load_cost. */ - 1, /* vec_store_cost. */ - 3, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ + COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ + COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ }; /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes @@ -518,17 +473,8 @@ struct processor_costs pentiumpro_cost = 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ pentiumpro_memcpy, pentiumpro_memset, - 1, /* scalar_stmt_cost. */ - 1, /* scalar load_cost. */ - 1, /* scalar_store_cost. */ - 1, /* vec_stmt_cost. */ - 1, /* vec_to_scalar_cost. */ - 1, /* scalar_to_vec_cost. */ - 1, /* vec_align_load_cost. */ - 2, /* vec_unalign_load_cost. */ - 1, /* vec_store_cost. */ - 3, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ + COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ + COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ }; static stringop_algs geode_memcpy[2] = { @@ -605,17 +551,8 @@ struct processor_costs geode_cost = { 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ geode_memcpy, geode_memset, - 1, /* scalar_stmt_cost. */ - 1, /* scalar load_cost. */ - 1, /* scalar_store_cost. */ - 1, /* vec_stmt_cost. */ - 1, /* vec_to_scalar_cost. */ - 1, /* scalar_to_vec_cost. */ - 1, /* vec_align_load_cost. */ - 2, /* vec_unalign_load_cost. */ - 1, /* vec_store_cost. */ - 3, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ + COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ + COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ }; static stringop_algs k6_memcpy[2] = { @@ -694,17 +631,8 @@ struct processor_costs k6_cost = { 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ k6_memcpy, k6_memset, - 1, /* scalar_stmt_cost. */ - 1, /* scalar load_cost. */ - 1, /* scalar_store_cost. */ - 1, /* vec_stmt_cost. */ - 1, /* vec_to_scalar_cost. */ - 1, /* scalar_to_vec_cost. */ - 1, /* vec_align_load_cost. */ - 2, /* vec_unalign_load_cost. */ - 1, /* vec_store_cost. */ - 3, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ + COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ + COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ }; /* For some reason, Athlon deals better with REP prefix (relative to loops) @@ -784,17 +712,8 @@ struct processor_costs athlon_cost = { 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ athlon_memcpy, athlon_memset, - 1, /* scalar_stmt_cost. */ - 1, /* scalar load_cost. */ - 1, /* scalar_store_cost. */ - 1, /* vec_stmt_cost. */ - 1, /* vec_to_scalar_cost. */ - 1, /* scalar_to_vec_cost. */ - 1, /* vec_align_load_cost. */ - 2, /* vec_unalign_load_cost. */ - 1, /* vec_store_cost. */ - 3, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ + COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ + COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ }; /* K8 has optimized REP instruction for medium sized blocks, but for very @@ -883,17 +802,8 @@ struct processor_costs k8_cost = { 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ k8_memcpy, k8_memset, - 4, /* scalar_stmt_cost. */ - 2, /* scalar load_cost. */ - 2, /* scalar_store_cost. */ - 5, /* vec_stmt_cost. */ - 0, /* vec_to_scalar_cost. */ - 2, /* scalar_to_vec_cost. */ - 2, /* vec_align_load_cost. */ - 3, /* vec_unalign_load_cost. */ - 3, /* vec_store_cost. */ - 3, /* cond_taken_branch_cost. */ - 2, /* cond_not_taken_branch_cost. */ + COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ + COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */ }; /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for @@ -989,17 +899,8 @@ struct processor_costs amdfam10_cost = { 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ amdfam10_memcpy, amdfam10_memset, - 4, /* scalar_stmt_cost. */ - 2, /* scalar load_cost. */ - 2, /* scalar_store_cost. */ - 6, /* vec_stmt_cost. */ - 0, /* vec_to_scalar_cost. */ - 2, /* scalar_to_vec_cost. */ - 2, /* vec_align_load_cost. */ - 2, /* vec_unalign_load_cost. */ - 2, /* vec_store_cost. */ - 2, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ + COSTS_N_INSNS (2), /* cond_taken_branch_cost. */ + COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ }; /* BDVER1 has optimized REP instruction for medium sized blocks, but for @@ -1097,17 +998,8 @@ const struct processor_costs bdver1_cost 1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ bdver1_memcpy, bdver1_memset, - 6, /* scalar_stmt_cost. */ - 4, /* scalar load_cost. */ - 4, /* scalar_store_cost. */ - 6, /* vec_stmt_cost. */ - 0, /* vec_to_scalar_cost. */ - 2, /* scalar_to_vec_cost. */ - 4, /* vec_align_load_cost. */ - 4, /* vec_unalign_load_cost. */ - 4, /* vec_store_cost. */ - 4, /* cond_taken_branch_cost. */ - 2, /* cond_not_taken_branch_cost. */ + COSTS_N_INSNS (4), /* cond_taken_branch_cost. */ + COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */ }; /* BDVER2 has optimized REP instruction for medium sized blocks, but for @@ -1206,17 +1098,8 @@ const struct processor_costs bdver2_cost 1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ bdver2_memcpy, bdver2_memset, - 6, /* scalar_stmt_cost. */ - 4, /* scalar load_cost. */ - 4, /* scalar_store_cost. */ - 6, /* vec_stmt_cost. */ - 0, /* vec_to_scalar_cost. */ - 2, /* scalar_to_vec_cost. */ - 4, /* vec_align_load_cost. */ - 4, /* vec_unalign_load_cost. */ - 4, /* vec_store_cost. */ - 4, /* cond_taken_branch_cost. */ - 2, /* cond_not_taken_branch_cost. */ + COSTS_N_INSNS (4), /* cond_taken_branch_cost. */ + COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */ }; @@ -1306,17 +1189,8 @@ struct processor_costs bdver3_cost = { 1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ bdver3_memcpy, bdver3_memset, - 6, /* scalar_stmt_cost. */ - 4, /* scalar load_cost. */ - 4, /* scalar_store_cost. */ - 6, /* vec_stmt_cost. */ - 0, /* vec_to_scalar_cost. */ - 2, /* scalar_to_vec_cost. */ - 4, /* vec_align_load_cost. */ - 4, /* vec_unalign_load_cost. */ - 4, /* vec_store_cost. */ - 4, /* cond_taken_branch_cost. */ - 2, /* cond_not_taken_branch_cost. */ + COSTS_N_INSNS (4), /* cond_taken_branch_cost. */ + COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */ }; /* BDVER4 has optimized REP instruction for medium sized blocks, but for @@ -1405,17 +1279,8 @@ struct processor_costs bdver4_cost = { 1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ bdver4_memcpy, bdver4_memset, - 6, /* scalar_stmt_cost. */ - 4, /* scalar load_cost. */ - 4, /* scalar_store_cost. */ - 6, /* vec_stmt_cost. */ - 0, /* vec_to_scalar_cost. */ - 2, /* scalar_to_vec_cost. */ - 4, /* vec_align_load_cost. */ - 4, /* vec_unalign_load_cost. */ - 4, /* vec_store_cost. */ - 4, /* cond_taken_branch_cost. */ - 2, /* cond_not_taken_branch_cost. */ + COSTS_N_INSNS (4), /* cond_taken_branch_cost. */ + COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */ }; @@ -1524,17 +1389,8 @@ struct processor_costs znver1_cost = { 4, 4, 3, 6, /* reassoc int, fp, vec_int, vec_fp. */ znver1_memcpy, znver1_memset, - 6, /* scalar_stmt_cost. */ - 4, /* scalar load_cost. */ - 4, /* scalar_store_cost. */ - 6, /* vec_stmt_cost. */ - 0, /* vec_to_scalar_cost. */ - 2, /* scalar_to_vec_cost. */ - 4, /* vec_align_load_cost. */ - 4, /* vec_unalign_load_cost. */ - 4, /* vec_store_cost. */ - 4, /* cond_taken_branch_cost. */ - 2, /* cond_not_taken_branch_cost. */ + COSTS_N_INSNS (4), /* cond_taken_branch_cost. */ + COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */ }; /* BTVER1 has optimized REP instruction for medium sized blocks, but for @@ -1624,17 +1480,8 @@ const struct processor_costs btver1_cost 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ btver1_memcpy, btver1_memset, - 4, /* scalar_stmt_cost. */ - 2, /* scalar load_cost. */ - 2, /* scalar_store_cost. */ - 6, /* vec_stmt_cost. */ - 0, /* vec_to_scalar_cost. */ - 2, /* scalar_to_vec_cost. */ - 2, /* vec_align_load_cost. */ - 2, /* vec_unalign_load_cost. */ - 2, /* vec_store_cost. */ - 2, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ + COSTS_N_INSNS (2), /* cond_taken_branch_cost. */ + COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ }; static stringop_algs btver2_memcpy[2] = { @@ -1721,17 +1568,8 @@ const struct processor_costs btver2_cost 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ btver2_memcpy, btver2_memset, - 4, /* scalar_stmt_cost. */ - 2, /* scalar load_cost. */ - 2, /* scalar_store_cost. */ - 6, /* vec_stmt_cost. */ - 0, /* vec_to_scalar_cost. */ - 2, /* scalar_to_vec_cost. */ - 2, /* vec_align_load_cost. */ - 2, /* vec_unalign_load_cost. */ - 2, /* vec_store_cost. */ - 2, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ + COSTS_N_INSNS (2), /* cond_taken_branch_cost. */ + COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ }; static stringop_algs pentium4_memcpy[2] = { @@ -1809,17 +1647,8 @@ struct processor_costs pentium4_cost = { 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ pentium4_memcpy, pentium4_memset, - 1, /* scalar_stmt_cost. */ - 1, /* scalar load_cost. */ - 1, /* scalar_store_cost. */ - 1, /* vec_stmt_cost. */ - 1, /* vec_to_scalar_cost. */ - 1, /* scalar_to_vec_cost. */ - 1, /* vec_align_load_cost. */ - 2, /* vec_unalign_load_cost. */ - 1, /* vec_store_cost. */ - 3, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ + COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ + COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ }; static stringop_algs nocona_memcpy[2] = { @@ -1900,17 +1729,8 @@ struct processor_costs nocona_cost = { 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ nocona_memcpy, nocona_memset, - 1, /* scalar_stmt_cost. */ - 1, /* scalar load_cost. */ - 1, /* scalar_store_cost. */ - 1, /* vec_stmt_cost. */ - 1, /* vec_to_scalar_cost. */ - 1, /* scalar_to_vec_cost. */ - 1, /* vec_align_load_cost. */ - 2, /* vec_unalign_load_cost. */ - 1, /* vec_store_cost. */ - 3, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ + COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ + COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ }; static stringop_algs atom_memcpy[2] = { @@ -1989,17 +1809,8 @@ struct processor_costs atom_cost = { 2, 2, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */ atom_memcpy, atom_memset, - 1, /* scalar_stmt_cost. */ - 1, /* scalar load_cost. */ - 1, /* scalar_store_cost. */ - 1, /* vec_stmt_cost. */ - 1, /* vec_to_scalar_cost. */ - 1, /* scalar_to_vec_cost. */ - 1, /* vec_align_load_cost. */ - 2, /* vec_unalign_load_cost. */ - 1, /* vec_store_cost. */ - 3, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ + COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ + COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ }; static stringop_algs slm_memcpy[2] = { @@ -2078,17 +1889,8 @@ struct processor_costs slm_cost = { 1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ slm_memcpy, slm_memset, - 1, /* scalar_stmt_cost. */ - 1, /* scalar load_cost. */ - 1, /* scalar_store_cost. */ - 1, /* vec_stmt_cost. */ - 4, /* vec_to_scalar_cost. */ - 1, /* scalar_to_vec_cost. */ - 1, /* vec_align_load_cost. */ - 2, /* vec_unalign_load_cost. */ - 1, /* vec_store_cost. */ - 3, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ + COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ + COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ }; static stringop_algs intel_memcpy[2] = { @@ -2167,17 +1969,8 @@ struct processor_costs intel_cost = { 1, 4, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ intel_memcpy, intel_memset, - 1, /* scalar_stmt_cost. */ - 1, /* scalar load_cost. */ - 1, /* scalar_store_cost. */ - 1, /* vec_stmt_cost. */ - 4, /* vec_to_scalar_cost. */ - 1, /* scalar_to_vec_cost. */ - 1, /* vec_align_load_cost. */ - 2, /* vec_unalign_load_cost. */ - 1, /* vec_store_cost. */ - 3, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ + COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ + COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ }; /* Generic should produce code tuned for Core-i7 (and newer chips) @@ -2265,17 +2058,8 @@ struct processor_costs generic_cost = { 1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ generic_memcpy, generic_memset, - 1, /* scalar_stmt_cost. */ - 1, /* scalar load_cost. */ - 1, /* scalar_store_cost. */ - 1, /* vec_stmt_cost. */ - 1, /* vec_to_scalar_cost. */ - 1, /* scalar_to_vec_cost. */ - 1, /* vec_align_load_cost. */ - 2, /* vec_unalign_load_cost. */ - 1, /* vec_store_cost. */ - 3, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ + COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ + COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ }; /* core_cost should produce code tuned for Core familly of CPUs. */ @@ -2366,16 +2150,7 @@ struct processor_costs core_cost = { 1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */ core_memcpy, core_memset, - 1, /* scalar_stmt_cost. */ - 1, /* scalar load_cost. */ - 1, /* scalar_store_cost. */ - 1, /* vec_stmt_cost. */ - 1, /* vec_to_scalar_cost. */ - 1, /* scalar_to_vec_cost. */ - 1, /* vec_align_load_cost. */ - 2, /* vec_unalign_load_cost. */ - 1, /* vec_store_cost. */ - 3, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ + COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ + COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ }; From-SVN: r253975

pr79683.c: Disable costmodel.
* gcc.target/i386/pr79683.c: Disable costmodel. * i386.c (ix86_builtin_vectorization_cost): Use existing rtx_cost latencies instead of having separate table; make difference between integer and float costs. * i386.h (processor_costs): Remove scalar_stmt_cost, scalar_load_cost, scalar_store_cost, vec_stmt_cost, vec_to_scalar_cost, scalar_to_vec_cost, vec_align_load_cost, vec_unalign_load_cost, vec_store_cost. * x86-tune-costs.h: Remove entries which has been removed in procesor_costs from all tables; make cond_taken_branch_cost and cond_not_taken_branch_cost COST_N_INSNS based. Index: testsuite/gcc.target/i386/pr79683.c =================================================================== --- testsuite/gcc.target/i386/pr79683.c (revision 253957) +++ testsuite/gcc.target/i386/pr79683.c (working copy) @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O3 -msse2" } */ +/* { dg-options "-O3 -msse2 -fvect-cost-model=unlimited" } */ struct s { __INT64_TYPE__ a; Index: config/i386/i386.c =================================================================== --- config/i386/i386.c (revision 253957) +++ config/i386/i386.c (working copy) @@ -44051,37 +44051,61 @@ static int ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, tree vectype, int) { + bool fp = false; + machine_mode mode = TImode; + if (vectype != NULL) + { + fp = FLOAT_TYPE_P (vectype); + mode = TYPE_MODE (vectype); + } + switch (type_of_cost) { case scalar_stmt: - return ix86_cost->scalar_stmt_cost; + return fp ? ix86_cost->addss : COSTS_N_INSNS (1); case scalar_load: - return ix86_cost->scalar_load_cost; + /* load/store costs are relative to register move which is 2. Recompute + it to COSTS_N_INSNS so everything have same base. */ + return COSTS_N_INSNS (fp ? ix86_cost->sse_load[0] + : ix86_cost->int_load [2]) / 2; case scalar_store: - return ix86_cost->scalar_store_cost; + return COSTS_N_INSNS (fp ? ix86_cost->sse_store[0] + : ix86_cost->int_store [2]) / 2; case vector_stmt: - return ix86_cost->vec_stmt_cost; + return ix86_vec_cost (mode, + fp ? ix86_cost->addss : ix86_cost->sse_op, + true); case vector_load: - return ix86_cost->vec_align_load_cost; + return ix86_vec_cost (mode, + COSTS_N_INSNS (ix86_cost->sse_load[2]) / 2, + true); case vector_store: - return ix86_cost->vec_store_cost; + return ix86_vec_cost (mode, + COSTS_N_INSNS (ix86_cost->sse_store[2]) / 2, + true); case vec_to_scalar: - return ix86_cost->vec_to_scalar_cost; - case scalar_to_vec: - return ix86_cost->scalar_to_vec_cost; + return ix86_vec_cost (mode, ix86_cost->sse_op, true); + /* We should have separate costs for unaligned loads and gather/scatter. + Do that incrementally. */ case unaligned_load: - case unaligned_store: case vector_gather_load: + return ix86_vec_cost (mode, + COSTS_N_INSNS (ix86_cost->sse_load[2]), + true); + + case unaligned_store: case vector_scatter_store: - return ix86_cost->vec_unalign_load_cost; + return ix86_vec_cost (mode, + COSTS_N_INSNS (ix86_cost->sse_store[2]), + true); case cond_branch_taken: return ix86_cost->cond_taken_branch_cost; @@ -44091,10 +44115,11 @@ ix86_builtin_vectorization_cost (enum ve case vec_perm: case vec_promote_demote: - return ix86_cost->vec_stmt_cost; + return ix86_vec_cost (mode, + ix86_cost->sse_op, true); case vec_construct: - return ix86_cost->vec_stmt_cost * (TYPE_VECTOR_SUBPARTS (vectype) - 1); + return ix86_vec_cost (mode, ix86_cost->sse_op, false); default: gcc_unreachable (); Index: config/i386/i386.h =================================================================== --- config/i386/i386.h (revision 253957) +++ config/i386/i386.h (working copy) @@ -277,18 +277,6 @@ struct processor_costs { parallel. See also ix86_reassociation_width. */ struct stringop_algs *memcpy, *memset; - const int scalar_stmt_cost; /* Cost of any scalar operation, excluding - load and store. */ - const int scalar_load_cost; /* Cost of scalar load. */ - const int scalar_store_cost; /* Cost of scalar store. */ - const int vec_stmt_cost; /* Cost of any vector operation, excluding - load, store, vector-to-scalar and - scalar-to-vector operation. */ - const int vec_to_scalar_cost; /* Cost of vect-to-scalar operation. */ - const int scalar_to_vec_cost; /* Cost of scalar-to-vector operation. */ - const int vec_align_load_cost; /* Cost of aligned vector load. */ - const int vec_unalign_load_cost; /* Cost of unaligned vector load. */ - const int vec_store_cost; /* Cost of vector store. */ const int cond_taken_branch_cost; /* Cost of taken branch for vectorizer cost model. */ const int cond_not_taken_branch_cost;/* Cost of not taken branch for Index: config/i386/x86-tune-costs.h =================================================================== --- config/i386/x86-tune-costs.h (revision 253958) +++ config/i386/x86-tune-costs.h (working copy) @@ -79,17 +79,8 @@ struct processor_costs ix86_size_cost = 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ ix86_size_memcpy, ix86_size_memset, - 1, /* scalar_stmt_cost. */ - 1, /* scalar load_cost. */ - 1, /* scalar_store_cost. */ - 1, /* vec_stmt_cost. */ - 1, /* vec_to_scalar_cost. */ - 1, /* scalar_to_vec_cost. */ - 1, /* vec_align_load_cost. */ - 1, /* vec_unalign_load_cost. */ - 1, /* vec_store_cost. */ - 1, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ + COSTS_N_BYTES (1), /* cond_taken_branch_cost. */ + COSTS_N_BYTES (1), /* cond_not_taken_branch_cost. */ }; /* Processor costs (relative to an add) */ @@ -167,17 +158,8 @@ struct processor_costs i386_cost = { /* 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ i386_memcpy, i386_memset, - 1, /* scalar_stmt_cost. */ - 1, /* scalar load_cost. */ - 1, /* scalar_store_cost. */ - 1, /* vec_stmt_cost. */ - 1, /* vec_to_scalar_cost. */ - 1, /* scalar_to_vec_cost. */ - 1, /* vec_align_load_cost. */ - 2, /* vec_unalign_load_cost. */ - 1, /* vec_store_cost. */ - 3, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ + COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ + COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ }; static stringop_algs i486_memcpy[2] = { @@ -256,17 +238,8 @@ struct processor_costs i486_cost = { /* 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ i486_memcpy, i486_memset, - 1, /* scalar_stmt_cost. */ - 1, /* scalar load_cost. */ - 1, /* scalar_store_cost. */ - 1, /* vec_stmt_cost. */ - 1, /* vec_to_scalar_cost. */ - 1, /* scalar_to_vec_cost. */ - 1, /* vec_align_load_cost. */ - 2, /* vec_unalign_load_cost. */ - 1, /* vec_store_cost. */ - 3, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ + COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ + COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ }; static stringop_algs pentium_memcpy[2] = { @@ -343,17 +316,8 @@ struct processor_costs pentium_cost = { 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ pentium_memcpy, pentium_memset, - 1, /* scalar_stmt_cost. */ - 1, /* scalar load_cost. */ - 1, /* scalar_store_cost. */ - 1, /* vec_stmt_cost. */ - 1, /* vec_to_scalar_cost. */ - 1, /* scalar_to_vec_cost. */ - 1, /* vec_align_load_cost. */ - 2, /* vec_unalign_load_cost. */ - 1, /* vec_store_cost. */ - 3, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ + COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ + COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ }; static const @@ -423,17 +387,8 @@ struct processor_costs lakemont_cost = { 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ pentium_memcpy, pentium_memset, - 1, /* scalar_stmt_cost. */ - 1, /* scalar load_cost. */ - 1, /* scalar_store_cost. */ - 1, /* vec_stmt_cost. */ - 1, /* vec_to_scalar_cost. */ - 1, /* scalar_to_vec_cost. */ - 1, /* vec_align_load_cost. */ - 2, /* vec_unalign_load_cost. */ - 1, /* vec_store_cost. */ - 3, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ + COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ + COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ }; /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes @@ -518,17 +473,8 @@ struct processor_costs pentiumpro_cost = 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ pentiumpro_memcpy, pentiumpro_memset, - 1, /* scalar_stmt_cost. */ - 1, /* scalar load_cost. */ - 1, /* scalar_store_cost. */ - 1, /* vec_stmt_cost. */ - 1, /* vec_to_scalar_cost. */ - 1, /* scalar_to_vec_cost. */ - 1, /* vec_align_load_cost. */ - 2, /* vec_unalign_load_cost. */ - 1, /* vec_store_cost. */ - 3, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ + COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ + COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ }; static stringop_algs geode_memcpy[2] = { @@ -605,17 +551,8 @@ struct processor_costs geode_cost = { 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ geode_memcpy, geode_memset, - 1, /* scalar_stmt_cost. */ - 1, /* scalar load_cost. */ - 1, /* scalar_store_cost. */ - 1, /* vec_stmt_cost. */ - 1, /* vec_to_scalar_cost. */ - 1, /* scalar_to_vec_cost. */ - 1, /* vec_align_load_cost. */ - 2, /* vec_unalign_load_cost. */ - 1, /* vec_store_cost. */ - 3, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ + COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ + COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ }; static stringop_algs k6_memcpy[2] = { @@ -694,17 +631,8 @@ struct processor_costs k6_cost = { 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ k6_memcpy, k6_memset, - 1, /* scalar_stmt_cost. */ - 1, /* scalar load_cost. */ - 1, /* scalar_store_cost. */ - 1, /* vec_stmt_cost. */ - 1, /* vec_to_scalar_cost. */ - 1, /* scalar_to_vec_cost. */ - 1, /* vec_align_load_cost. */ - 2, /* vec_unalign_load_cost. */ - 1, /* vec_store_cost. */ - 3, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ + COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ + COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ }; /* For some reason, Athlon deals better with REP prefix (relative to loops) @@ -784,17 +712,8 @@ struct processor_costs athlon_cost = { 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ athlon_memcpy, athlon_memset, - 1, /* scalar_stmt_cost. */ - 1, /* scalar load_cost. */ - 1, /* scalar_store_cost. */ - 1, /* vec_stmt_cost. */ - 1, /* vec_to_scalar_cost. */ - 1, /* scalar_to_vec_cost. */ - 1, /* vec_align_load_cost. */ - 2, /* vec_unalign_load_cost. */ - 1, /* vec_store_cost. */ - 3, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ + COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ + COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ }; /* K8 has optimized REP instruction for medium sized blocks, but for very @@ -883,17 +802,8 @@ struct processor_costs k8_cost = { 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ k8_memcpy, k8_memset, - 4, /* scalar_stmt_cost. */ - 2, /* scalar load_cost. */ - 2, /* scalar_store_cost. */ - 5, /* vec_stmt_cost. */ - 0, /* vec_to_scalar_cost. */ - 2, /* scalar_to_vec_cost. */ - 2, /* vec_align_load_cost. */ - 3, /* vec_unalign_load_cost. */ - 3, /* vec_store_cost. */ - 3, /* cond_taken_branch_cost. */ - 2, /* cond_not_taken_branch_cost. */ + COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ + COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */ }; /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for @@ -989,17 +899,8 @@ struct processor_costs amdfam10_cost = { 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ amdfam10_memcpy, amdfam10_memset, - 4, /* scalar_stmt_cost. */ - 2, /* scalar load_cost. */ - 2, /* scalar_store_cost. */ - 6, /* vec_stmt_cost. */ - 0, /* vec_to_scalar_cost. */ - 2, /* scalar_to_vec_cost. */ - 2, /* vec_align_load_cost. */ - 2, /* vec_unalign_load_cost. */ - 2, /* vec_store_cost. */ - 2, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ + COSTS_N_INSNS (2), /* cond_taken_branch_cost. */ + COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ }; /* BDVER1 has optimized REP instruction for medium sized blocks, but for @@ -1097,17 +998,8 @@ const struct processor_costs bdver1_cost 1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ bdver1_memcpy, bdver1_memset, - 6, /* scalar_stmt_cost. */ - 4, /* scalar load_cost. */ - 4, /* scalar_store_cost. */ - 6, /* vec_stmt_cost. */ - 0, /* vec_to_scalar_cost. */ - 2, /* scalar_to_vec_cost. */ - 4, /* vec_align_load_cost. */ - 4, /* vec_unalign_load_cost. */ - 4, /* vec_store_cost. */ - 4, /* cond_taken_branch_cost. */ - 2, /* cond_not_taken_branch_cost. */ + COSTS_N_INSNS (4), /* cond_taken_branch_cost. */ + COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */ }; /* BDVER2 has optimized REP instruction for medium sized blocks, but for @@ -1206,17 +1098,8 @@ const struct processor_costs bdver2_cost 1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ bdver2_memcpy, bdver2_memset, - 6, /* scalar_stmt_cost. */ - 4, /* scalar load_cost. */ - 4, /* scalar_store_cost. */ - 6, /* vec_stmt_cost. */ - 0, /* vec_to_scalar_cost. */ - 2, /* scalar_to_vec_cost. */ - 4, /* vec_align_load_cost. */ - 4, /* vec_unalign_load_cost. */ - 4, /* vec_store_cost. */ - 4, /* cond_taken_branch_cost. */ - 2, /* cond_not_taken_branch_cost. */ + COSTS_N_INSNS (4), /* cond_taken_branch_cost. */ + COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */ }; @@ -1306,17 +1189,8 @@ struct processor_costs bdver3_cost = { 1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ bdver3_memcpy, bdver3_memset, - 6, /* scalar_stmt_cost. */ - 4, /* scalar load_cost. */ - 4, /* scalar_store_cost. */ - 6, /* vec_stmt_cost. */ - 0, /* vec_to_scalar_cost. */ - 2, /* scalar_to_vec_cost. */ - 4, /* vec_align_load_cost. */ - 4, /* vec_unalign_load_cost. */ - 4, /* vec_store_cost. */ - 4, /* cond_taken_branch_cost. */ - 2, /* cond_not_taken_branch_cost. */ + COSTS_N_INSNS (4), /* cond_taken_branch_cost. */ + COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */ }; /* BDVER4 has optimized REP instruction for medium sized blocks, but for @@ -1405,17 +1279,8 @@ struct processor_costs bdver4_cost = { 1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ bdver4_memcpy, bdver4_memset, - 6, /* scalar_stmt_cost. */ - 4, /* scalar load_cost. */ - 4, /* scalar_store_cost. */ - 6, /* vec_stmt_cost. */ - 0, /* vec_to_scalar_cost. */ - 2, /* scalar_to_vec_cost. */ - 4, /* vec_align_load_cost. */ - 4, /* vec_unalign_load_cost. */ - 4, /* vec_store_cost. */ - 4, /* cond_taken_branch_cost. */ - 2, /* cond_not_taken_branch_cost. */ + COSTS_N_INSNS (4), /* cond_taken_branch_cost. */ + COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */ }; @@ -1524,17 +1389,8 @@ struct processor_costs znver1_cost = { 4, 4, 3, 6, /* reassoc int, fp, vec_int, vec_fp. */ znver1_memcpy, znver1_memset, - 6, /* scalar_stmt_cost. */ - 4, /* scalar load_cost. */ - 4, /* scalar_store_cost. */ - 6, /* vec_stmt_cost. */ - 0, /* vec_to_scalar_cost. */ - 2, /* scalar_to_vec_cost. */ - 4, /* vec_align_load_cost. */ - 4, /* vec_unalign_load_cost. */ - 4, /* vec_store_cost. */ - 4, /* cond_taken_branch_cost. */ - 2, /* cond_not_taken_branch_cost. */ + COSTS_N_INSNS (4), /* cond_taken_branch_cost. */ + COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */ }; /* BTVER1 has optimized REP instruction for medium sized blocks, but for @@ -1624,17 +1480,8 @@ const struct processor_costs btver1_cost 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ btver1_memcpy, btver1_memset, - 4, /* scalar_stmt_cost. */ - 2, /* scalar load_cost. */ - 2, /* scalar_store_cost. */ - 6, /* vec_stmt_cost. */ - 0, /* vec_to_scalar_cost. */ - 2, /* scalar_to_vec_cost. */ - 2, /* vec_align_load_cost. */ - 2, /* vec_unalign_load_cost. */ - 2, /* vec_store_cost. */ - 2, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ + COSTS_N_INSNS (2), /* cond_taken_branch_cost. */ + COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ }; static stringop_algs btver2_memcpy[2] = { @@ -1721,17 +1568,8 @@ const struct processor_costs btver2_cost 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ btver2_memcpy, btver2_memset, - 4, /* scalar_stmt_cost. */ - 2, /* scalar load_cost. */ - 2, /* scalar_store_cost. */ - 6, /* vec_stmt_cost. */ - 0, /* vec_to_scalar_cost. */ - 2, /* scalar_to_vec_cost. */ - 2, /* vec_align_load_cost. */ - 2, /* vec_unalign_load_cost. */ - 2, /* vec_store_cost. */ - 2, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ + COSTS_N_INSNS (2), /* cond_taken_branch_cost. */ + COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ }; static stringop_algs pentium4_memcpy[2] = { @@ -1809,17 +1647,8 @@ struct processor_costs pentium4_cost = { 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ pentium4_memcpy, pentium4_memset, - 1, /* scalar_stmt_cost. */ - 1, /* scalar load_cost. */ - 1, /* scalar_store_cost. */ - 1, /* vec_stmt_cost. */ - 1, /* vec_to_scalar_cost. */ - 1, /* scalar_to_vec_cost. */ - 1, /* vec_align_load_cost. */ - 2, /* vec_unalign_load_cost. */ - 1, /* vec_store_cost. */ - 3, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ + COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ + COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ }; static stringop_algs nocona_memcpy[2] = { @@ -1900,17 +1729,8 @@ struct processor_costs nocona_cost = { 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ nocona_memcpy, nocona_memset, - 1, /* scalar_stmt_cost. */ - 1, /* scalar load_cost. */ - 1, /* scalar_store_cost. */ - 1, /* vec_stmt_cost. */ - 1, /* vec_to_scalar_cost. */ - 1, /* scalar_to_vec_cost. */ - 1, /* vec_align_load_cost. */ - 2, /* vec_unalign_load_cost. */ - 1, /* vec_store_cost. */ - 3, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ + COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ + COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ }; static stringop_algs atom_memcpy[2] = { @@ -1989,17 +1809,8 @@ struct processor_costs atom_cost = { 2, 2, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */ atom_memcpy, atom_memset, - 1, /* scalar_stmt_cost. */ - 1, /* scalar load_cost. */ - 1, /* scalar_store_cost. */ - 1, /* vec_stmt_cost. */ - 1, /* vec_to_scalar_cost. */ - 1, /* scalar_to_vec_cost. */ - 1, /* vec_align_load_cost. */ - 2, /* vec_unalign_load_cost. */ - 1, /* vec_store_cost. */ - 3, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ + COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ + COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ }; static stringop_algs slm_memcpy[2] = { @@ -2078,17 +1889,8 @@ struct processor_costs slm_cost = { 1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ slm_memcpy, slm_memset, - 1, /* scalar_stmt_cost. */ - 1, /* scalar load_cost. */ - 1, /* scalar_store_cost. */ - 1, /* vec_stmt_cost. */ - 4, /* vec_to_scalar_cost. */ - 1, /* scalar_to_vec_cost. */ - 1, /* vec_align_load_cost. */ - 2, /* vec_unalign_load_cost. */ - 1, /* vec_store_cost. */ - 3, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ + COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ + COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ }; static stringop_algs intel_memcpy[2] = { @@ -2167,17 +1969,8 @@ struct processor_costs intel_cost = { 1, 4, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ intel_memcpy, intel_memset, - 1, /* scalar_stmt_cost. */ - 1, /* scalar load_cost. */ - 1, /* scalar_store_cost. */ - 1, /* vec_stmt_cost. */ - 4, /* vec_to_scalar_cost. */ - 1, /* scalar_to_vec_cost. */ - 1, /* vec_align_load_cost. */ - 2, /* vec_unalign_load_cost. */ - 1, /* vec_store_cost. */ - 3, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ + COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ + COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ }; /* Generic should produce code tuned for Core-i7 (and newer chips) @@ -2265,17 +2058,8 @@ struct processor_costs generic_cost = { 1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ generic_memcpy, generic_memset, - 1, /* scalar_stmt_cost. */ - 1, /* scalar load_cost. */ - 1, /* scalar_store_cost. */ - 1, /* vec_stmt_cost. */ - 1, /* vec_to_scalar_cost. */ - 1, /* scalar_to_vec_cost. */ - 1, /* vec_align_load_cost. */ - 2, /* vec_unalign_load_cost. */ - 1, /* vec_store_cost. */ - 3, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ + COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ + COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ }; /* core_cost should produce code tuned for Core familly of CPUs. */ @@ -2366,16 +2150,7 @@ struct processor_costs core_cost = { 1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */ core_memcpy, core_memset, - 1, /* scalar_stmt_cost. */ - 1, /* scalar load_cost. */ - 1, /* scalar_store_cost. */ - 1, /* vec_stmt_cost. */ - 1, /* vec_to_scalar_cost. */ - 1, /* scalar_to_vec_cost. */ - 1, /* vec_align_load_cost. */ - 2, /* vec_unalign_load_cost. */ - 1, /* vec_store_cost. */ - 3, /* cond_taken_branch_cost. */ - 1, /* cond_not_taken_branch_cost. */ + COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ + COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ }; From-SVN: r253975
f6fd8f2b · Jan Hubicka · Jan Hubicka · 0071b8a1 · f6fd8f2b · f6fd8f2b
Commit f6fd8f2b authored Oct 21, 2017 by Jan Hubicka Committed by Jan Hubicka Oct 21, 2017
Hide whitespace changes
Inline Side-by-side

Showing with 93 additions and 301 deletions

gcc/config/i386/i386.c
+38 -13

gcc/config/i386/i386.h
+0 -12

gcc/config/i386/x86-tune-costs.h
+50 -275

gcc/testsuite/ChangeLog
+4 -0

gcc/testsuite/gcc.target/i386/pr79683.c
+1 -1

No files found.
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -44051,37 +44051,61 @@ static int
 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
                                 tree vectype, int)
 {
+  bool fp = false;
+  machine_mode mode = TImode;
+  if (vectype != NULL)
+    {
+      fp = FLOAT_TYPE_P (vectype);
+      mode = TYPE_MODE (vectype);
+    }
  switch (type_of_cost)
    {
      case scalar_stmt:
-        return ix86_cost->scalar_stmt_cost;
+        return fp ? ix86_cost->addss : COSTS_N_INSNS (1);
      case scalar_load:
-        return ix86_cost->scalar_load_cost;
+	/* load/store costs are relative to register move which is 2. Recompute
+ 	   it to COSTS_N_INSNS so everything have same base.  */
+        return COSTS_N_INSNS (fp ? ix86_cost->sse_load[0]
+			      : ix86_cost->int_load [2]) / 2;
      case scalar_store:
-        return ix86_cost->scalar_store_cost;
+        return COSTS_N_INSNS (fp ? ix86_cost->sse_store[0]
+			      : ix86_cost->int_store [2]) / 2;
      case vector_stmt:
-        return ix86_cost->vec_stmt_cost;
+        return ix86_vec_cost (mode,
+			      fp ? ix86_cost->addss : ix86_cost->sse_op,
+			      true);
      case vector_load:
-        return ix86_cost->vec_align_load_cost;
+        return ix86_vec_cost (mode,
+			      COSTS_N_INSNS (ix86_cost->sse_load[2]) / 2,
+			      true);
      case vector_store:
-        return ix86_cost->vec_store_cost;
+        return ix86_vec_cost (mode,
+			      COSTS_N_INSNS (ix86_cost->sse_store[2]) / 2,
+			      true);
      case vec_to_scalar:
-        return ix86_cost->vec_to_scalar_cost;
      case scalar_to_vec:
-        return ix86_cost->scalar_to_vec_cost;
+        return ix86_vec_cost (mode, ix86_cost->sse_op, true);
+      /* We should have separate costs for unaligned loads and gather/scatter.
+	 Do that incrementally.  */
      case unaligned_load:
-      case unaligned_store:
      case vector_gather_load:
+        return ix86_vec_cost (mode,
+			      COSTS_N_INSNS (ix86_cost->sse_load[2]),
+			      true);
+      case unaligned_store:
      case vector_scatter_store:
-        return ix86_cost->vec_unalign_load_cost;
+        return ix86_vec_cost (mode,
+			      COSTS_N_INSNS (ix86_cost->sse_store[2]),
+			      true);
      case cond_branch_taken:
        return ix86_cost->cond_taken_branch_cost;
@@ -44091,10 +44115,11 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
      case vec_perm:
      case vec_promote_demote:
-        return ix86_cost->vec_stmt_cost;
+        return ix86_vec_cost (mode,
+			      ix86_cost->sse_op, true);
      case vec_construct:
-	return ix86_cost->vec_stmt_cost * (TYPE_VECTOR_SUBPARTS (vectype) - 1);
+	return ix86_vec_cost (mode, ix86_cost->sse_op, false);
      default:
        gcc_unreachable ();
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -277,18 +277,6 @@ struct processor_costs {
 				   parallel.  See also
 				   ix86_reassociation_width.  */
  struct stringop_algs *memcpy, *memset;
-  const int scalar_stmt_cost;   /* Cost of any scalar operation, excluding
-				   load and store.  */
-  const int scalar_load_cost;   /* Cost of scalar load.  */
-  const int scalar_store_cost;  /* Cost of scalar store.  */
-  const int vec_stmt_cost;      /* Cost of any vector operation, excluding
-                                   load, store, vector-to-scalar and
-                                   scalar-to-vector operation.  */
-  const int vec_to_scalar_cost;    /* Cost of vect-to-scalar operation.  */
-  const int scalar_to_vec_cost;    /* Cost of scalar-to-vector operation.  */
-  const int vec_align_load_cost;   /* Cost of aligned vector load.  */
-  const int vec_unalign_load_cost; /* Cost of unaligned vector load.  */
-  const int vec_store_cost;        /* Cost of vector store.  */
  const int cond_taken_branch_cost;    /* Cost of taken branch for vectorizer
 					  cost model.  */
  const int cond_not_taken_branch_cost;/* Cost of not taken branch for

--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -79,17 +79,8 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */
  1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
  ix86_size_memcpy,
  ix86_size_memset,
-  1,					/* scalar_stmt_cost.  */
+  COSTS_N_BYTES (1),			/* cond_taken_branch_cost.  */
-  1,					/* scalar load_cost.  */
+  COSTS_N_BYTES (1),			/* cond_not_taken_branch_cost.  */
-  1,					/* scalar_store_cost.  */
-  1,					/* vec_stmt_cost.  */
-  1,					/* vec_to_scalar_cost.  */
-  1,					/* scalar_to_vec_cost.  */
-  1,					/* vec_align_load_cost.  */
-  1,					/* vec_unalign_load_cost.  */
-  1,					/* vec_store_cost.  */
-  1,					/* cond_taken_branch_cost.  */
-  1,					/* cond_not_taken_branch_cost.  */
 };
 /* Processor costs (relative to an add) */
@@ -167,17 +158,8 @@ struct processor_costs i386_cost = {	/* 386 specific costs */
  1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
  i386_memcpy,
  i386_memset,
-  1,					/* scalar_stmt_cost.  */
+  COSTS_N_INSNS (3),			/* cond_taken_branch_cost.  */
-  1,					/* scalar load_cost.  */
+  COSTS_N_INSNS (1),			/* cond_not_taken_branch_cost.  */
-  1,					/* scalar_store_cost.  */
-  1,					/* vec_stmt_cost.  */
-  1,					/* vec_to_scalar_cost.  */
-  1,					/* scalar_to_vec_cost.  */
-  1,					/* vec_align_load_cost.  */
-  2,					/* vec_unalign_load_cost.  */
-  1,					/* vec_store_cost.  */
-  3,					/* cond_taken_branch_cost.  */
-  1,					/* cond_not_taken_branch_cost.  */
 };
 static stringop_algs i486_memcpy[2] = {
@@ -256,17 +238,8 @@ struct processor_costs i486_cost = {	/* 486 specific costs */
  1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
  i486_memcpy,
  i486_memset,
-  1,					/* scalar_stmt_cost.  */
+  COSTS_N_INSNS (3),			/* cond_taken_branch_cost.  */
-  1,					/* scalar load_cost.  */
+  COSTS_N_INSNS (1),			/* cond_not_taken_branch_cost.  */
-  1,					/* scalar_store_cost.  */
-  1,					/* vec_stmt_cost.  */
-  1,					/* vec_to_scalar_cost.  */
-  1,					/* scalar_to_vec_cost.  */
-  1,					/* vec_align_load_cost.  */
-  2,					/* vec_unalign_load_cost.  */
-  1,					/* vec_store_cost.  */
-  3,					/* cond_taken_branch_cost.  */
-  1,					/* cond_not_taken_branch_cost.  */
 };
 static stringop_algs pentium_memcpy[2] = {
@@ -343,17 +316,8 @@ struct processor_costs pentium_cost = {
  1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
  pentium_memcpy,
  pentium_memset,
-  1,					/* scalar_stmt_cost.  */
+  COSTS_N_INSNS (3),			/* cond_taken_branch_cost.  */
-  1,					/* scalar load_cost.  */
+  COSTS_N_INSNS (1),			/* cond_not_taken_branch_cost.  */
-  1,					/* scalar_store_cost.  */
-  1,					/* vec_stmt_cost.  */
-  1,					/* vec_to_scalar_cost.  */
-  1,					/* scalar_to_vec_cost.  */
-  1,					/* vec_align_load_cost.  */
-  2,					/* vec_unalign_load_cost.  */
-  1,					/* vec_store_cost.  */
-  3,					/* cond_taken_branch_cost.  */
-  1,					/* cond_not_taken_branch_cost.  */
 };
 static const
@@ -423,17 +387,8 @@ struct processor_costs lakemont_cost = {
  1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
  pentium_memcpy,
  pentium_memset,
-  1,					/* scalar_stmt_cost.  */
+  COSTS_N_INSNS (3),			/* cond_taken_branch_cost.  */
-  1,					/* scalar load_cost.  */
+  COSTS_N_INSNS (1),			/* cond_not_taken_branch_cost.  */
-  1,					/* scalar_store_cost.  */
-  1,					/* vec_stmt_cost.  */
-  1,					/* vec_to_scalar_cost.  */
-  1,					/* scalar_to_vec_cost.  */
-  1,					/* vec_align_load_cost.  */
-  2,					/* vec_unalign_load_cost.  */
-  1,					/* vec_store_cost.  */
-  3,					/* cond_taken_branch_cost.  */
-  1,					/* cond_not_taken_branch_cost.  */
 };
 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
@@ -518,17 +473,8 @@ struct processor_costs pentiumpro_cost = {
  1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
  pentiumpro_memcpy,
  pentiumpro_memset,
-  1,					/* scalar_stmt_cost.  */
+  COSTS_N_INSNS (3),			/* cond_taken_branch_cost.  */
-  1,					/* scalar load_cost.  */
+  COSTS_N_INSNS (1),			/* cond_not_taken_branch_cost.  */
-  1,					/* scalar_store_cost.  */
-  1,					/* vec_stmt_cost.  */
-  1,					/* vec_to_scalar_cost.  */
-  1,					/* scalar_to_vec_cost.  */
-  1,					/* vec_align_load_cost.  */
-  2,					/* vec_unalign_load_cost.  */
-  1,					/* vec_store_cost.  */
-  3,					/* cond_taken_branch_cost.  */
-  1,					/* cond_not_taken_branch_cost.  */
 };
 static stringop_algs geode_memcpy[2] = {
@@ -605,17 +551,8 @@ struct processor_costs geode_cost = {
  1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
  geode_memcpy,
  geode_memset,
-  1,					/* scalar_stmt_cost.  */
+  COSTS_N_INSNS (3),			/* cond_taken_branch_cost.  */
-  1,					/* scalar load_cost.  */
+  COSTS_N_INSNS (1),			/* cond_not_taken_branch_cost.  */
-  1,					/* scalar_store_cost.  */
-  1,					/* vec_stmt_cost.  */
-  1,					/* vec_to_scalar_cost.  */
-  1,					/* scalar_to_vec_cost.  */
-  1,					/* vec_align_load_cost.  */
-  2,					/* vec_unalign_load_cost.  */
-  1,					/* vec_store_cost.  */
-  3,					/* cond_taken_branch_cost.  */
-  1,					/* cond_not_taken_branch_cost.  */
 };
 static stringop_algs k6_memcpy[2] = {
@@ -694,17 +631,8 @@ struct processor_costs k6_cost = {
  1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
  k6_memcpy,
  k6_memset,
-  1,					/* scalar_stmt_cost.  */
+  COSTS_N_INSNS (3),			/* cond_taken_branch_cost.  */
-  1,					/* scalar load_cost.  */
+  COSTS_N_INSNS (1),			/* cond_not_taken_branch_cost.  */
-  1,					/* scalar_store_cost.  */
-  1,					/* vec_stmt_cost.  */
-  1,					/* vec_to_scalar_cost.  */
-  1,					/* scalar_to_vec_cost.  */
-  1,					/* vec_align_load_cost.  */
-  2,					/* vec_unalign_load_cost.  */
-  1,					/* vec_store_cost.  */
-  3,					/* cond_taken_branch_cost.  */
-  1,					/* cond_not_taken_branch_cost.  */
 };
 /* For some reason, Athlon deals better with REP prefix (relative to loops)
@@ -784,17 +712,8 @@ struct processor_costs athlon_cost = {
  1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
  athlon_memcpy,
  athlon_memset,
-  1,					/* scalar_stmt_cost.  */
+  COSTS_N_INSNS (3),			/* cond_taken_branch_cost.  */
-  1,					/* scalar load_cost.  */
+  COSTS_N_INSNS (1),			/* cond_not_taken_branch_cost.  */
-  1,					/* scalar_store_cost.  */
-  1,					/* vec_stmt_cost.  */
-  1,					/* vec_to_scalar_cost.  */
-  1,					/* scalar_to_vec_cost.  */
-  1,					/* vec_align_load_cost.  */
-  2,					/* vec_unalign_load_cost.  */
-  1,					/* vec_store_cost.  */
-  3,					/* cond_taken_branch_cost.  */
-  1,					/* cond_not_taken_branch_cost.  */
 };
 /* K8 has optimized REP instruction for medium sized blocks, but for very
@@ -883,17 +802,8 @@ struct processor_costs k8_cost = {
  1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
  k8_memcpy,
  k8_memset,
-  4,					/* scalar_stmt_cost.  */
+  COSTS_N_INSNS (3),			/* cond_taken_branch_cost.  */
-  2,					/* scalar load_cost.  */
+  COSTS_N_INSNS (2),			/* cond_not_taken_branch_cost.  */
-  2,					/* scalar_store_cost.  */
-  5,					/* vec_stmt_cost.  */
-  0,					/* vec_to_scalar_cost.  */
-  2,					/* scalar_to_vec_cost.  */
-  2,					/* vec_align_load_cost.  */
-  3,					/* vec_unalign_load_cost.  */
-  3,					/* vec_store_cost.  */
-  3,					/* cond_taken_branch_cost.  */
-  2,					/* cond_not_taken_branch_cost.  */
 };
 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
@@ -989,17 +899,8 @@ struct processor_costs amdfam10_cost = {
  1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
  amdfam10_memcpy,
  amdfam10_memset,
-  4,					/* scalar_stmt_cost.  */
+  COSTS_N_INSNS (2),			/* cond_taken_branch_cost.  */
-  2,					/* scalar load_cost.  */
+  COSTS_N_INSNS (1),			/* cond_not_taken_branch_cost.  */
-  2,					/* scalar_store_cost.  */
-  6,					/* vec_stmt_cost.  */
-  0,					/* vec_to_scalar_cost.  */
-  2,					/* scalar_to_vec_cost.  */
-  2,					/* vec_align_load_cost.  */
-  2,					/* vec_unalign_load_cost.  */
-  2,					/* vec_store_cost.  */
-  2,					/* cond_taken_branch_cost.  */
-  1,					/* cond_not_taken_branch_cost.  */
 };
 /*  BDVER1 has optimized REP instruction for medium sized blocks, but for
@@ -1097,17 +998,8 @@ const struct processor_costs bdver1_cost = {
  1, 2, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
  bdver1_memcpy,
  bdver1_memset,
-  6,					/* scalar_stmt_cost.  */
+  COSTS_N_INSNS (4),			/* cond_taken_branch_cost.  */
-  4,					/* scalar load_cost.  */
+  COSTS_N_INSNS (2),			/* cond_not_taken_branch_cost.  */
-  4,					/* scalar_store_cost.  */
-  6,					/* vec_stmt_cost.  */
-  0,					/* vec_to_scalar_cost.  */
-  2,					/* scalar_to_vec_cost.  */
-  4,					/* vec_align_load_cost.  */
-  4,					/* vec_unalign_load_cost.  */
-  4,					/* vec_store_cost.  */
-  4,					/* cond_taken_branch_cost.  */
-  2,					/* cond_not_taken_branch_cost.  */
 };
 /*  BDVER2 has optimized REP instruction for medium sized blocks, but for
@@ -1206,17 +1098,8 @@ const struct processor_costs bdver2_cost = {
  1, 2, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
  bdver2_memcpy,
  bdver2_memset,
-  6,					/* scalar_stmt_cost.  */
+  COSTS_N_INSNS (4),			/* cond_taken_branch_cost.  */
-  4,					/* scalar load_cost.  */
+  COSTS_N_INSNS (2),			/* cond_not_taken_branch_cost.  */
-  4,					/* scalar_store_cost.  */
-  6,					/* vec_stmt_cost.  */
-  0,					/* vec_to_scalar_cost.  */
-  2,					/* scalar_to_vec_cost.  */
-  4,					/* vec_align_load_cost.  */
-  4,					/* vec_unalign_load_cost.  */
-  4,					/* vec_store_cost.  */
-  4,					/* cond_taken_branch_cost.  */
-  2,					/* cond_not_taken_branch_cost.  */
 };
@@ -1306,17 +1189,8 @@ struct processor_costs bdver3_cost = {
  1, 2, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
  bdver3_memcpy,
  bdver3_memset,
-  6,					/* scalar_stmt_cost.  */
+  COSTS_N_INSNS (4),			/* cond_taken_branch_cost.  */
-  4,					/* scalar load_cost.  */
+  COSTS_N_INSNS (2),			/* cond_not_taken_branch_cost.  */
-  4,					/* scalar_store_cost.  */
-  6,					/* vec_stmt_cost.  */
-  0,					/* vec_to_scalar_cost.  */
-  2,					/* scalar_to_vec_cost.  */
-  4,					/* vec_align_load_cost.  */
-  4,					/* vec_unalign_load_cost.  */
-  4,					/* vec_store_cost.  */
-  4,					/* cond_taken_branch_cost.  */
-  2,					/* cond_not_taken_branch_cost.  */
 };
 /*  BDVER4 has optimized REP instruction for medium sized blocks, but for
@@ -1405,17 +1279,8 @@ struct processor_costs bdver4_cost = {
  1, 2, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
  bdver4_memcpy,
  bdver4_memset,
-  6,					/* scalar_stmt_cost.  */
+  COSTS_N_INSNS (4),			/* cond_taken_branch_cost.  */
-  4,					/* scalar load_cost.  */
+  COSTS_N_INSNS (2),			/* cond_not_taken_branch_cost.  */
-  4,					/* scalar_store_cost.  */
-  6,					/* vec_stmt_cost.  */
-  0,					/* vec_to_scalar_cost.  */
-  2,					/* scalar_to_vec_cost.  */
-  4,					/* vec_align_load_cost.  */
-  4,					/* vec_unalign_load_cost.  */
-  4,					/* vec_store_cost.  */
-  4,					/* cond_taken_branch_cost.  */
-  2,					/* cond_not_taken_branch_cost.  */
 };
@@ -1524,17 +1389,8 @@ struct processor_costs znver1_cost = {
  4, 4, 3, 6,				/* reassoc int, fp, vec_int, vec_fp.  */
  znver1_memcpy,
  znver1_memset,
-  6,					/* scalar_stmt_cost.  */
+  COSTS_N_INSNS (4),			/* cond_taken_branch_cost.  */
-  4,					/* scalar load_cost.  */
+  COSTS_N_INSNS (2),			/* cond_not_taken_branch_cost.  */
-  4,					/* scalar_store_cost.  */
-  6,					/* vec_stmt_cost.  */
-  0,					/* vec_to_scalar_cost.  */
-  2,					/* scalar_to_vec_cost.  */
-  4,					/* vec_align_load_cost.  */
-  4,					/* vec_unalign_load_cost.  */
-  4,					/* vec_store_cost.  */
-  4,					/* cond_taken_branch_cost.  */
-  2,					/* cond_not_taken_branch_cost.  */
 };
  /* BTVER1 has optimized REP instruction for medium sized blocks, but for
@@ -1624,17 +1480,8 @@ const struct processor_costs btver1_cost = {
  1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
  btver1_memcpy,
  btver1_memset,
-  4,					/* scalar_stmt_cost.  */
+  COSTS_N_INSNS (2),			/* cond_taken_branch_cost.  */
-  2,					/* scalar load_cost.  */
+  COSTS_N_INSNS (1),			/* cond_not_taken_branch_cost.  */
-  2,					/* scalar_store_cost.  */
-  6,					/* vec_stmt_cost.  */
-  0,					/* vec_to_scalar_cost.  */
-  2,					/* scalar_to_vec_cost.  */
-  2,					/* vec_align_load_cost.  */
-  2,					/* vec_unalign_load_cost.  */
-  2,					/* vec_store_cost.  */
-  2,					/* cond_taken_branch_cost.  */
-  1,					/* cond_not_taken_branch_cost.  */
 };
 static stringop_algs btver2_memcpy[2] = {
@@ -1721,17 +1568,8 @@ const struct processor_costs btver2_cost = {
  1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
  btver2_memcpy,
  btver2_memset,
-  4,					/* scalar_stmt_cost.  */
+  COSTS_N_INSNS (2),			/* cond_taken_branch_cost.  */
-  2,					/* scalar load_cost.  */
+  COSTS_N_INSNS (1),			/* cond_not_taken_branch_cost.  */
-  2,					/* scalar_store_cost.  */
-  6,					/* vec_stmt_cost.  */
-  0,					/* vec_to_scalar_cost.  */
-  2,					/* scalar_to_vec_cost.  */
-  2,					/* vec_align_load_cost.  */
-  2,					/* vec_unalign_load_cost.  */
-  2,					/* vec_store_cost.  */
-  2,					/* cond_taken_branch_cost.  */
-  1,					/* cond_not_taken_branch_cost.  */
 };
 static stringop_algs pentium4_memcpy[2] = {
@@ -1809,17 +1647,8 @@ struct processor_costs pentium4_cost = {
  1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
  pentium4_memcpy,
  pentium4_memset,
-  1,					/* scalar_stmt_cost.  */
+  COSTS_N_INSNS (3),			/* cond_taken_branch_cost.  */
-  1,					/* scalar load_cost.  */
+  COSTS_N_INSNS (1),			/* cond_not_taken_branch_cost.  */
-  1,					/* scalar_store_cost.  */
-  1,					/* vec_stmt_cost.  */
-  1,					/* vec_to_scalar_cost.  */
-  1,					/* scalar_to_vec_cost.  */
-  1,					/* vec_align_load_cost.  */
-  2,					/* vec_unalign_load_cost.  */
-  1,					/* vec_store_cost.  */
-  3,					/* cond_taken_branch_cost.  */
-  1,					/* cond_not_taken_branch_cost.  */
 };
 static stringop_algs nocona_memcpy[2] = {
@@ -1900,17 +1729,8 @@ struct processor_costs nocona_cost = {
  1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
  nocona_memcpy,
  nocona_memset,
-  1,					/* scalar_stmt_cost.  */
+  COSTS_N_INSNS (3),			/* cond_taken_branch_cost.  */
-  1,					/* scalar load_cost.  */
+  COSTS_N_INSNS (1),			/* cond_not_taken_branch_cost.  */
-  1,					/* scalar_store_cost.  */
-  1,					/* vec_stmt_cost.  */
-  1,					/* vec_to_scalar_cost.  */
-  1,					/* scalar_to_vec_cost.  */
-  1,					/* vec_align_load_cost.  */
-  2,					/* vec_unalign_load_cost.  */
-  1,					/* vec_store_cost.  */
-  3,					/* cond_taken_branch_cost.  */
-  1,					/* cond_not_taken_branch_cost.  */
 };
 static stringop_algs atom_memcpy[2] = {
@@ -1989,17 +1809,8 @@ struct processor_costs atom_cost = {
  2, 2, 2, 2,				/* reassoc int, fp, vec_int, vec_fp.  */
  atom_memcpy,
  atom_memset,
-  1,					/* scalar_stmt_cost.  */
+  COSTS_N_INSNS (3),			/* cond_taken_branch_cost.  */
-  1,					/* scalar load_cost.  */
+  COSTS_N_INSNS (1),			/* cond_not_taken_branch_cost.  */
-  1,					/* scalar_store_cost.  */
-  1,					/* vec_stmt_cost.  */
-  1,					/* vec_to_scalar_cost.  */
-  1,					/* scalar_to_vec_cost.  */
-  1,					/* vec_align_load_cost.  */
-  2,					/* vec_unalign_load_cost.  */
-  1,					/* vec_store_cost.  */
-  3,					/* cond_taken_branch_cost.  */
-  1,					/* cond_not_taken_branch_cost.  */
 };
 static stringop_algs slm_memcpy[2] = {
@@ -2078,17 +1889,8 @@ struct processor_costs slm_cost = {
  1, 2, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
  slm_memcpy,
  slm_memset,
-  1,					/* scalar_stmt_cost.  */
+  COSTS_N_INSNS (3),			/* cond_taken_branch_cost.  */
-  1,					/* scalar load_cost.  */
+  COSTS_N_INSNS (1),			/* cond_not_taken_branch_cost.  */
-  1,					/* scalar_store_cost.  */
-  1,					/* vec_stmt_cost.  */
-  4,					/* vec_to_scalar_cost.  */
-  1,					/* scalar_to_vec_cost.  */
-  1,					/* vec_align_load_cost.  */
-  2,					/* vec_unalign_load_cost.  */
-  1,					/* vec_store_cost.  */
-  3,					/* cond_taken_branch_cost.  */
-  1,					/* cond_not_taken_branch_cost.  */
 };
 static stringop_algs intel_memcpy[2] = {
@@ -2167,17 +1969,8 @@ struct processor_costs intel_cost = {
  1, 4, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
  intel_memcpy,
  intel_memset,
-  1,					/* scalar_stmt_cost.  */
+  COSTS_N_INSNS (3),			/* cond_taken_branch_cost.  */
-  1,					/* scalar load_cost.  */
+  COSTS_N_INSNS (1),			/* cond_not_taken_branch_cost.  */
-  1,					/* scalar_store_cost.  */
-  1,					/* vec_stmt_cost.  */
-  4,					/* vec_to_scalar_cost.  */
-  1,					/* scalar_to_vec_cost.  */
-  1,					/* vec_align_load_cost.  */
-  2,					/* vec_unalign_load_cost.  */
-  1,					/* vec_store_cost.  */
-  3,					/* cond_taken_branch_cost.  */
-  1,					/* cond_not_taken_branch_cost.  */
 };
 /* Generic should produce code tuned for Core-i7 (and newer chips)
@@ -2265,17 +2058,8 @@ struct processor_costs generic_cost = {
  1, 2, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
  generic_memcpy,
  generic_memset,
-  1,					/* scalar_stmt_cost.  */
+  COSTS_N_INSNS (3),			/* cond_taken_branch_cost.  */
-  1,					/* scalar load_cost.  */
+  COSTS_N_INSNS (1),			/* cond_not_taken_branch_cost.  */
-  1,					/* scalar_store_cost.  */
-  1,					/* vec_stmt_cost.  */
-  1,					/* vec_to_scalar_cost.  */
-  1,					/* scalar_to_vec_cost.  */
-  1,					/* vec_align_load_cost.  */
-  2,					/* vec_unalign_load_cost.  */
-  1,					/* vec_store_cost.  */
-  3,					/* cond_taken_branch_cost.  */
-  1,					/* cond_not_taken_branch_cost.  */
 };
 /* core_cost should produce code tuned for Core familly of CPUs.  */
@@ -2366,16 +2150,7 @@ struct processor_costs core_cost = {
  1, 4, 2, 2,				/* reassoc int, fp, vec_int, vec_fp.  */
  core_memcpy,
  core_memset,
-  1,					/* scalar_stmt_cost.  */
+  COSTS_N_INSNS (3),			/* cond_taken_branch_cost.  */
-  1,					/* scalar load_cost.  */
+  COSTS_N_INSNS (1),			/* cond_not_taken_branch_cost.  */
-  1,					/* scalar_store_cost.  */
-  1,					/* vec_stmt_cost.  */
-  1,					/* vec_to_scalar_cost.  */
-  1,					/* scalar_to_vec_cost.  */
-  1,					/* vec_align_load_cost.  */
-  2,					/* vec_unalign_load_cost.  */
-  1,					/* vec_store_cost.  */
-  3,					/* cond_taken_branch_cost.  */
-  1,					/* cond_not_taken_branch_cost.  */
 };
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
+2017-10-20  Jan Hubicka  <hubicka@ucw.cz>
+	* gcc.target/i386/pr79683.c: Disable costmodel.
 2017-10-21  Eric Botcazou  <ebotcazou@adacore.com>
 	* gnat.dg/specs/discr_private.ads: Rename into ...

--- a/gcc/testsuite/gcc.target/i386/pr79683.c
+++ b/gcc/testsuite/gcc.target/i386/pr79683.c
 /* { dg-do compile } */
-/* { dg-options "-O3 -msse2" } */
+/* { dg-options "-O3 -msse2 -fvect-cost-model=unlimited" } */
 struct s {
    __INT64_TYPE__ a;