Commit 7dc58b50 by Martin Liska Committed by Martin Liska

i386: move alignment defaults to processor_costs.

2018-09-17  Martin Liska  <mliska@suse.cz>

	* common/config/i386/i386-common.c (ix86_get_valid_option_values):
	Use processor_names table.
	* config/i386/i386.c (ix86_default_align): Use
	processor_cost_table for alignment values.
	(ix86_option_override_internal): Use processor_names.
	(ix86_function_specific_print): Likewise.
	* config/i386/i386.h (struct processor_costs):
	Add alignment values.
	(struct ptt): Remove and replace with const char *.
	* config/i386/x86-tune-costs.h (struct processor_costs):
	Declare default alignments for all costs.

From-SVN: r264359
parent ca32d61b
2018-09-17 Martin Liska <mliska@suse.cz>
* common/config/i386/i386-common.c (ix86_get_valid_option_values):
Use processor_names table.
* config/i386/i386.c (ix86_default_align): Use
processor_cost_table for alignment values.
(ix86_option_override_internal): Use processor_names.
(ix86_function_specific_print): Likewise.
* config/i386/i386.h (struct processor_costs):
Add alignment values.
(struct ptt): Remove and replace with const char *.
* config/i386/x86-tune-costs.h (struct processor_costs):
Declare default alignments for all costs.
2018-09-17 Aldy Hernandez <aldyh@redhat.com>
* tree-vrp.c (extract_range_from_unary_expr): Do not special case
......
......@@ -1461,49 +1461,45 @@ i386_except_unwind_info (struct gcc_options *opts)
#define TARGET_SUPPORTS_SPLIT_STACK ix86_supports_split_stack
/* This table must be in sync with enum processor_type in i386.h. */
const struct ptt processor_target_table[PROCESSOR_max] =
const char *const processor_names[PROCESSOR_max] =
{
/* The "0:0:8" label alignment specified for some processors generates
secondary 8-byte alignment only for those label/jump/loop targets
which have primary alignment. */
{"generic", "16:11:8", "16:11:8", "0:0:8", "16"},
{"i386", "4", "4", NULL, "4" },
{"i486", "16", "16", "0:0:8", "16"},
{"pentium", "16:8:8", "16:8:8", "0:0:8", "16"},
{"lakemont", "16:8:8", "16:8:8", "0:0:8", "16"},
{"pentiumpro", "16", "16:11:8", "0:0:8", "16"},
{"pentium4", NULL, NULL, NULL, NULL},
{"nocona", NULL, NULL, NULL, NULL},
{"core2", "16:11:8", "16:11:8", "0:0:8", "16"},
{"nehalem", "16:11:8", "16:11:8", "0:0:8", "16"},
{"sandybridge", "16:11:8", "16:11:8", "0:0:8", "16"},
{"haswell", "16:11:8", "16:11:8", "0:0:8", "16"},
{"bonnell", "16", "16:8:8", "0:0:8", "16"},
{"silvermont", "16", "16:8:8", "0:0:8", "16"},
{"goldmont", "16", "16:8:8", "0:0:8", "16"},
{"goldmont-plus", "16", "16:8:8", "0:0:8", "16"},
{"tremont", "16", "16:8:8", "0:0:8", "16"},
{"knl", "16", "16:8:8", "0:0:8", "16"},
{"knm", "16", "16:8:8", "0:0:8", "16"},
{"skylake", "16:11:8", "16:11:8", "0:0:8", "16"},
{"skylake-avx512", "16:11:8", "16:11:8", "0:0:8", "16"},
{"cannonlake", "16:11:8", "16:11:8", "0:0:8", "16"},
{"icelake-client", "16:11:8", "16:11:8", "0:0:8", "16"},
{"icelake-server", "16:11:8", "16:11:8", "0:0:8", "16"},
{"intel", "16", "16:8:8", "0:0:8", "16"},
{"geode", NULL, NULL, NULL, NULL},
{"k6", "32:8:8", "32:8:8", "0:0:8", "32"},
{"athlon", "16:8:8", "16:8:8", "0:0:8", "16"},
{"k8", "16:8:8", "16:8:8", "0:0:8", "16"},
{"amdfam10", "32:25:8", "32:8:8", "0:0:8", "32"},
{"bdver1", "16:11:8", "16:8:8", "0:0:8", "11"},
{"bdver2", "16:11:8", "16:8:8", "0:0:8", "11"},
{"bdver3", "16:11:8", "16:8:8", "0:0:8", "11"},
{"bdver4", "16:11:8", "16:8:8", "0:0:8", "11"},
{"btver1", "16:11:8", "16:8:8", "0:0:8", "11"},
{"btver2", "16:11:8", "16:8:8", "0:0:8", "11"},
{"znver1", "16", "16", "0:0:8", "16"}
"generic",
"i386",
"i486",
"pentium",
"lakemont",
"pentiumpro",
"pentium4",
"nocona",
"core2",
"nehalem",
"sandybridge",
"haswell",
"bonnell",
"silvermont",
"goldmont",
"goldmont-plus",
"tremont",
"knl",
"knm",
"skylake",
"skylake-avx512",
"cannonlake",
"icelake-client",
"icelake-server",
"intel",
"geode",
"k6",
"athlon",
"k8",
"amdfam10",
"bdver1",
"bdver2",
"bdver3",
"bdver4",
"btver1",
"btver2",
"znver1"
};
const pta processor_alias_table[] =
......@@ -1715,7 +1711,7 @@ ix86_get_valid_option_values (int option_code,
break;
case OPT_mtune_:
for (unsigned i = 0; i < PROCESSOR_max; i++)
v.safe_push (processor_target_table[i].name);
v.safe_push (processor_names[i]);
break;
default:
break;
......
......@@ -3352,13 +3352,13 @@ ix86_default_align (struct gcc_options *opts)
{
/* -falign-foo without argument: supply one. */
if (opts->x_flag_align_loops && !opts->x_str_align_loops)
opts->x_str_align_loops = processor_target_table[ix86_tune].align_loop;
opts->x_str_align_loops = processor_cost_table[ix86_tune]->align_loop;
if (opts->x_flag_align_jumps && !opts->x_str_align_jumps)
opts->x_str_align_jumps = processor_target_table[ix86_tune].align_jump;
opts->x_str_align_jumps = processor_cost_table[ix86_tune]->align_jump;
if (opts->x_flag_align_labels && !opts->x_str_align_labels)
opts->x_str_align_labels = processor_target_table[ix86_tune].align_label;
opts->x_str_align_labels = processor_cost_table[ix86_tune]->align_label;
if (opts->x_flag_align_functions && !opts->x_str_align_functions)
opts->x_str_align_functions = processor_target_table[ix86_tune].align_func;
opts->x_str_align_functions = processor_cost_table[ix86_tune]->align_func;
}
/* Implement TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE hook. */
......@@ -3488,8 +3488,7 @@ ix86_option_override_internal (bool main_args_p,
opts->x_ix86_tune_string = opts->x_ix86_arch_string;
if (!opts->x_ix86_tune_string)
{
opts->x_ix86_tune_string
= processor_target_table[TARGET_CPU_DEFAULT].name;
opts->x_ix86_tune_string = processor_names[TARGET_CPU_DEFAULT];
ix86_tune_defaulted = 1;
}
......@@ -4940,12 +4939,12 @@ ix86_function_specific_print (FILE *file, int indent,
gcc_assert (ptr->arch < PROCESSOR_max);
fprintf (file, "%*sarch = %d (%s)\n",
indent, "",
ptr->arch, processor_target_table[ptr->arch].name);
ptr->arch, processor_names[ptr->arch]);
gcc_assert (ptr->tune < PROCESSOR_max);
fprintf (file, "%*stune = %d (%s)\n",
indent, "",
ptr->tune, processor_target_table[ptr->tune].name);
ptr->tune, processor_names[ptr->tune]);
fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
......@@ -311,6 +311,14 @@ struct processor_costs {
cost model. */
const int cond_not_taken_branch_cost;/* Cost of not taken branch for
vectorizer cost model. */
/* The "0:0:8" label alignment specified for some processors generates
secondary 8-byte alignment only for those label/jump/loop targets
which have primary alignment. */
const char *const align_loop; /* Loop alignment. */
const char *const align_jump; /* Jump alignment. */
const char *const align_label; /* Label alignment. */
const char *const align_func; /* Function alignment. */
};
extern const struct processor_costs *ix86_cost;
......@@ -2278,19 +2286,7 @@ enum processor_type
};
#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS)
/* Processor target table, indexed by processor number */
struct ptt
{
const char *const name; /* processor name */
/* Default alignments. */
const char *const align_loop;
const char *const align_jump;
const char *const align_label;
const char *const align_func;
};
extern const struct ptt processor_target_table[PROCESSOR_max];
extern const char *const processor_names[PROCESSOR_max];
#include "wide-int-bitmask.h"
......
......@@ -111,6 +111,10 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */
ix86_size_memset,
COSTS_N_BYTES (1), /* cond_taken_branch_cost. */
COSTS_N_BYTES (1), /* cond_not_taken_branch_cost. */
NULL, /* Loop alignment. */
NULL, /* Jump alignment. */
NULL, /* Label alignment. */
NULL, /* Func alignment. */
};
/* Processor costs (relative to an add) */
......@@ -197,6 +201,10 @@ struct processor_costs i386_cost = { /* 386 specific costs */
i386_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
"4", /* Loop alignment. */
"4", /* Jump alignment. */
NULL, /* Label alignment. */
"4", /* Func alignment. */
};
static stringop_algs i486_memcpy[2] = {
......@@ -284,6 +292,10 @@ struct processor_costs i486_cost = { /* 486 specific costs */
i486_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
"16", /* Loop alignment. */
"16", /* Jump alignment. */
"0:0:8", /* Label alignment. */
"16", /* Func alignment. */
};
static stringop_algs pentium_memcpy[2] = {
......@@ -369,6 +381,10 @@ struct processor_costs pentium_cost = {
pentium_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
"16:8:8", /* Loop alignment. */
"16:8:8", /* Jump alignment. */
"0:0:8", /* Label alignment. */
"16", /* Func alignment. */
};
static const
......@@ -447,6 +463,10 @@ struct processor_costs lakemont_cost = {
pentium_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
"16:8:8", /* Loop alignment. */
"16:8:8", /* Jump alignment. */
"0:0:8", /* Label alignment. */
"16", /* Func alignment. */
};
/* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
......@@ -540,6 +560,10 @@ struct processor_costs pentiumpro_cost = {
pentiumpro_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
"16", /* Loop alignment. */
"16:11:8", /* Jump alignment. */
"0:0:8", /* Label alignment. */
"16", /* Func alignment. */
};
static stringop_algs geode_memcpy[2] = {
......@@ -625,6 +649,10 @@ struct processor_costs geode_cost = {
geode_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
NULL, /* Loop alignment. */
NULL, /* Jump alignment. */
NULL, /* Label alignment. */
NULL, /* Func alignment. */
};
static stringop_algs k6_memcpy[2] = {
......@@ -712,6 +740,10 @@ struct processor_costs k6_cost = {
k6_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
"32:8:8", /* Loop alignment. */
"32:8:8", /* Jump alignment. */
"0:0:8", /* Label alignment. */
"32", /* Func alignment. */
};
/* For some reason, Athlon deals better with REP prefix (relative to loops)
......@@ -800,6 +832,10 @@ struct processor_costs athlon_cost = {
athlon_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
"16:8:8", /* Loop alignment. */
"16:8:8", /* Jump alignment. */
"0:0:8", /* Label alignment. */
"16", /* Func alignment. */
};
/* K8 has optimized REP instruction for medium sized blocks, but for very
......@@ -897,6 +933,10 @@ struct processor_costs k8_cost = {
k8_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */
"16:8:8", /* Loop alignment. */
"16:8:8", /* Jump alignment. */
"0:0:8", /* Label alignment. */
"16", /* Func alignment. */
};
/* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
......@@ -1001,6 +1041,10 @@ struct processor_costs amdfam10_cost = {
amdfam10_memset,
COSTS_N_INSNS (2), /* cond_taken_branch_cost. */
COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
"32:25:8", /* Loop alignment. */
"32:8:8", /* Jump alignment. */
"0:0:8", /* Label alignment. */
"32", /* Func alignment. */
};
/* BDVER1 has optimized REP instruction for medium sized blocks, but for
......@@ -1099,6 +1143,10 @@ const struct processor_costs bdver1_cost = {
bdver1_memset,
COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */
"16:11:8", /* Loop alignment. */
"16:8:8", /* Jump alignment. */
"0:0:8", /* Label alignment. */
"11", /* Func alignment. */
};
/* BDVER2 has optimized REP instruction for medium sized blocks, but for
......@@ -1198,6 +1246,10 @@ const struct processor_costs bdver2_cost = {
bdver2_memset,
COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */
"16:11:8", /* Loop alignment. */
"16:8:8", /* Jump alignment. */
"0:0:8", /* Label alignment. */
"11", /* Func alignment. */
};
......@@ -1296,6 +1348,10 @@ struct processor_costs bdver3_cost = {
bdver3_memset,
COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */
"16:11:8", /* Loop alignment. */
"16:8:8", /* Jump alignment. */
"0:0:8", /* Label alignment. */
"11", /* Func alignment. */
};
/* BDVER4 has optimized REP instruction for medium sized blocks, but for
......@@ -1393,6 +1449,10 @@ struct processor_costs bdver4_cost = {
bdver4_memset,
COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */
"16:11:8", /* Loop alignment. */
"16:8:8", /* Jump alignment. */
"0:0:8", /* Label alignment. */
"11", /* Func alignment. */
};
......@@ -1513,6 +1573,10 @@ struct processor_costs znver1_cost = {
znver1_memset,
COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */
"16", /* Loop alignment. */
"16", /* Jump alignment. */
"0:0:8", /* Label alignment. */
"16", /* Func alignment. */
};
/* skylake_cost should produce code tuned for Skylake familly of CPUs. */
......@@ -1605,6 +1669,10 @@ struct processor_costs skylake_cost = {
skylake_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
"16:11:8", /* Loop alignment. */
"16:11:8", /* Jump alignment. */
"0:0:8", /* Label alignment. */
"16", /* Func alignment. */
};
/* BTVER1 has optimized REP instruction for medium sized blocks, but for
very small blocks it is better to use loop. For large blocks, libcall can
......@@ -1694,6 +1762,10 @@ const struct processor_costs btver1_cost = {
btver1_memset,
COSTS_N_INSNS (2), /* cond_taken_branch_cost. */
COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
"16:11:8", /* Loop alignment. */
"16:8:8", /* Jump alignment. */
"0:0:8", /* Label alignment. */
"11", /* Func alignment. */
};
static stringop_algs btver2_memcpy[2] = {
......@@ -1781,6 +1853,10 @@ const struct processor_costs btver2_cost = {
btver2_memset,
COSTS_N_INSNS (2), /* cond_taken_branch_cost. */
COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
"16:11:8", /* Loop alignment. */
"16:8:8", /* Jump alignment. */
"0:0:8", /* Label alignment. */
"11", /* Func alignment. */
};
static stringop_algs pentium4_memcpy[2] = {
......@@ -1867,6 +1943,10 @@ struct processor_costs pentium4_cost = {
pentium4_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
NULL, /* Loop alignment. */
NULL, /* Jump alignment. */
NULL, /* Label alignment. */
NULL, /* Func alignment. */
};
static stringop_algs nocona_memcpy[2] = {
......@@ -1956,6 +2036,10 @@ struct processor_costs nocona_cost = {
nocona_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
NULL, /* Loop alignment. */
NULL, /* Jump alignment. */
NULL, /* Label alignment. */
NULL, /* Func alignment. */
};
static stringop_algs atom_memcpy[2] = {
......@@ -2043,6 +2127,10 @@ struct processor_costs atom_cost = {
atom_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
"16", /* Loop alignment. */
"16:8:8", /* Jump alignment. */
"0:0:8", /* Label alignment. */
"16", /* Func alignment. */
};
static stringop_algs slm_memcpy[2] = {
......@@ -2130,6 +2218,10 @@ struct processor_costs slm_cost = {
slm_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
"16", /* Loop alignment. */
"16:8:8", /* Jump alignment. */
"0:0:8", /* Label alignment. */
"16", /* Func alignment. */
};
static stringop_algs intel_memcpy[2] = {
......@@ -2217,6 +2309,10 @@ struct processor_costs intel_cost = {
intel_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
"16", /* Loop alignment. */
"16:8:8", /* Jump alignment. */
"0:0:8", /* Label alignment. */
"16", /* Func alignment. */
};
/* Generic should produce code tuned for Core-i7 (and newer chips)
......@@ -2313,6 +2409,10 @@ struct processor_costs generic_cost = {
generic_memset,
COSTS_N_INSNS (4), /* cond_taken_branch_cost. */
COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */
"16:11:8", /* Loop alignment. */
"16:11:8", /* Jump alignment. */
"0:0:8", /* Label alignment. */
"16", /* Func alignment. */
};
/* core_cost should produce code tuned for Core familly of CPUs. */
......@@ -2416,5 +2516,9 @@ struct processor_costs core_cost = {
core_memset,
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
"16:11:8", /* Loop alignment. */
"16:11:8", /* Jump alignment. */
"0:0:8", /* Label alignment. */
"16", /* Func alignment. */
};
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment