Commit 71d9b493 by Richard Henderson Committed by Richard Henderson

genattrtab.c (expand_units): For large nr opclasses, expand function_units_used…

genattrtab.c (expand_units): For large nr opclasses, expand function_units_used with ORX to prevent blowups.

* genattrtab.c (expand_units): For large nr opclasses, expand
function_units_used with ORX to prevent blowups.  Tag with FFS.
(num_unit_opclasses): New variable.
(gen_unit): Update it.
(enum operator): Add ORX_OP.
(operate_exp): Treat ORX as or, except don't expand across an if.
Reuse number rtx's after operating on them.
(check_attr_value): Accept IOR, AND, & FFS.
(write_test_expr): Transmute `in_comparison' to `flags'.  Allow
for attribute value caching.  Handle CONST_STRING, IF_THEN_ELSE.
(write_expr_attr_cache, write_toplevel_expr): New functions.
(write_attr_get): Handle FFS-tagged expressions.
(make_canonical): Don't expand const attributes.
(convert_const_symbol_ref): Dike out.
(evaluate_eq_attr): Handle SYMBOL_REF.
(main): Don't emit get_attr_foo for const attributes.
* alpha.c (override_options): Reinstate PROCESSOR_EV6.
(alpha_adjust_cost): Add EV6 tuning; streamline EV5 tests.
* alpha.h (REGISTER_MOVE_COST): Increase ftoi/itof cost slightly.
* alpha.md: Redo all of the scheduling, adding EV6 support, and
combining function units where possible.
(attr "type"): Split loads, stores, cmov into int/fp.  Combine
multiplies and divides.  Add EV6 sqrt, ftoi, itof.
(attr "opsize"): New attribute.
(sqrtsf2-1, sqrtdf2-1): Provide proper TP_INSN patterns.
(movsf2-[12], movdf2-[12]): Provide CIX varients; don't allow CIX
to control register allocation.
(movsi2-1, movdi2-1): Likewise.

From-SVN: r17212
parent b0435cf4
Tue Dec 23 05:17:28 1997 Richard Henderson <rth@cygnus.com>
* genattrtab.c (expand_units): For large nr opclasses, expand
function_units_used with ORX to prevent blowups. Tag with FFS.
(num_unit_opclasses): New variable.
(gen_unit): Update it.
(enum operator): Add ORX_OP.
(operate_exp): Treat ORX as or, except don't expand across an if.
Reuse number rtx's after operating on them.
(check_attr_value): Accept IOR, AND, & FFS.
(write_test_expr): Transmute `in_comparison' to `flags'. Allow
for attribute value caching. Handle CONST_STRING, IF_THEN_ELSE.
(write_expr_attr_cache, write_toplevel_expr): New functions.
(write_attr_get): Handle FFS-tagged expressions.
(make_canonical): Don't expand const attributes.
(convert_const_symbol_ref): Dike out.
(evaluate_eq_attr): Handle SYMBOL_REF.
(main): Don't emit get_attr_foo for const attributes.
* alpha.c (override_options): Reinstate PROCESSOR_EV6.
(alpha_adjust_cost): Add EV6 tuning; streamline EV5 tests.
* alpha.h (REGISTER_MOVE_COST): Increase ftoi/itof cost slightly.
* alpha.md: Redo all of the scheduling, adding EV6 support, and
combining function units where possible.
(attr "type"): Split loads, stores, cmov into int/fp. Combine
multiplies and divides. Add EV6 sqrt, ftoi, itof.
(attr "opsize"): New attribute.
(sqrtsf2-1, sqrtdf2-1): Provide proper TP_INSN patterns.
(movsf2-[12], movdf2-[12]): Provide CIX varients; don't allow CIX
to control register allocation.
(movsi2-1, movdi2-1): Likewise.
Tue Dec 23 03:53:21 1997 Richard Henderson <rth@cygnus.com> Tue Dec 23 03:53:21 1997 Richard Henderson <rth@cygnus.com>
* alpha.h (CPP_PREDEFINES, LIB_SPEC, LINK_SPEC, STARTFILE_SPEC, * alpha.h (CPP_PREDEFINES, LIB_SPEC, LINK_SPEC, STARTFILE_SPEC,
......
...@@ -134,7 +134,7 @@ override_options () ...@@ -134,7 +134,7 @@ override_options ()
/* 971208 -- EV6 scheduling parameters are still secret, so don't even /* 971208 -- EV6 scheduling parameters are still secret, so don't even
pretend and just schedule for an EV5 for now. -- r~ */ pretend and just schedule for an EV5 for now. -- r~ */
alpha_cpu alpha_cpu
= TARGET_CPU_DEFAULT & MASK_CPU_EV6 ? PROCESSOR_EV5 = TARGET_CPU_DEFAULT & MASK_CPU_EV6 ? PROCESSOR_EV6
: (TARGET_CPU_DEFAULT & MASK_CPU_EV5 ? PROCESSOR_EV5 : PROCESSOR_EV4); : (TARGET_CPU_DEFAULT & MASK_CPU_EV5 ? PROCESSOR_EV5 : PROCESSOR_EV4);
if (alpha_cpu_string) if (alpha_cpu_string)
...@@ -169,7 +169,7 @@ override_options () ...@@ -169,7 +169,7 @@ override_options ()
else if (! strcmp (alpha_cpu_string, "ev6") else if (! strcmp (alpha_cpu_string, "ev6")
|| ! strcmp (alpha_cpu_string, "21264")) || ! strcmp (alpha_cpu_string, "21264"))
{ {
alpha_cpu = PROCESSOR_EV5; alpha_cpu = PROCESSOR_EV6;
target_flags |= MASK_BWX | MASK_CIX | MASK_MAX; target_flags |= MASK_BWX | MASK_CIX | MASK_MAX;
} }
else else
...@@ -274,7 +274,7 @@ override_options () ...@@ -274,7 +274,7 @@ override_options ()
{ {
{ 3, 30, -1 }, /* ev4 -- Bcache is a guess */ { 3, 30, -1 }, /* ev4 -- Bcache is a guess */
{ 2, 12, 38 }, /* ev5 -- Bcache from PC164 LMbench numbers */ { 2, 12, 38 }, /* ev5 -- Bcache from PC164 LMbench numbers */
{ 3, 12, -1 }, /* ev6 -- Ho hum, doesn't exist yet */ { 3, 13, -1 }, /* ev6 -- Ho hum, doesn't exist yet */
}; };
lat = alpha_mlat_string[1] - '0'; lat = alpha_mlat_string[1] - '0';
...@@ -1291,80 +1291,30 @@ alpha_adjust_cost (insn, link, dep_insn, cost) ...@@ -1291,80 +1291,30 @@ alpha_adjust_cost (insn, link, dep_insn, cost)
dep_insn_type = get_attr_type (dep_insn); dep_insn_type = get_attr_type (dep_insn);
/* Bring in the user-defined memory latency. */ /* Bring in the user-defined memory latency. */
if (dep_insn_type == TYPE_LD || dep_insn_type == TYPE_LDSYM) if (dep_insn_type == TYPE_ILD
|| dep_insn_type == TYPE_FLD
|| dep_insn_type == TYPE_LDSYM)
cost += alpha_memory_latency-1; cost += alpha_memory_latency-1;
if (alpha_cpu == PROCESSOR_EV5) switch (alpha_cpu)
{
/* And the lord DEC saith: "A special bypass provides an effective
latency of 0 cycles for an ICMP or ILOG insn producing the test
operand of an IBR or CMOV insn." */
if ((dep_insn_type == TYPE_ICMP
|| dep_insn_type == TYPE_ILOG)
&& (insn_type == TYPE_IBR
|| (insn_type == TYPE_CMOV
&& !((set = single_set (dep_insn)) != 0
&& GET_CODE (PATTERN (insn)) == SET
&& (set_src = SET_SRC (PATTERN (insn)),
GET_CODE (set_src) == IF_THEN_ELSE)
&& (set = SET_DEST (set),
rtx_equal_p (set, XEXP (set_src, 1))
|| rtx_equal_p (set, XEXP (set_src, 2)))))))
return 0;
/* "The multiplier is unable to receive data from IEU bypass paths.
The instruction issues at the expected time, but its latency is
increased by the time it takes for the input data to become
available to the multiplier" -- which happens in pipeline stage
six, when results are comitted to the register file. */
if ((insn_type == TYPE_IMULL
|| insn_type == TYPE_IMULQ
|| insn_type == TYPE_IMULH)
&& (set = single_set (dep_insn)) != 0
&& GET_CODE (PATTERN (insn)) == SET
&& (set_src = SET_SRC (PATTERN (insn)),
GET_CODE (set_src) == MULT)
&& (set = SET_DEST (set),
rtx_equal_p (set, XEXP (set_src, 0))
|| rtx_equal_p (set, XEXP (set_src, 1))))
{
switch (dep_insn_type)
{
/* These insns produce their results in pipeline stage five. */
case TYPE_LD:
case TYPE_CMOV:
case TYPE_IMULL:
case TYPE_IMULQ:
case TYPE_IMULH:
case TYPE_MVI:
return cost + 1;
/* Other integer insns produce results in pipeline stage four. */
default:
return cost + 2;
}
}
}
else
{ {
case PROCESSOR_EV4:
/* On EV4, if INSN is a store insn and DEP_INSN is setting the data /* On EV4, if INSN is a store insn and DEP_INSN is setting the data
being stored, we can sometimes lower the cost. */ being stored, we can sometimes lower the cost. */
if (insn_type == TYPE_ST if ((insn_type == TYPE_IST || insn_type == TYPE_FST)
&& (set = single_set (dep_insn)) != 0 && (set = single_set (dep_insn)) != 0
&& GET_CODE (PATTERN (insn)) == SET && GET_CODE (PATTERN (insn)) == SET
&& rtx_equal_p (SET_DEST (set), SET_SRC (PATTERN (insn)))) && rtx_equal_p (SET_DEST (set), SET_SRC (PATTERN (insn))))
{ {
switch (dep_insn_type) switch (dep_insn_type)
{ {
case TYPE_LD: case TYPE_ILD:
case TYPE_FLD:
/* No savings here. */ /* No savings here. */
return cost; return cost;
case TYPE_IMULL: case TYPE_IMUL:
case TYPE_IMULQ:
case TYPE_IMULH:
/* In these cases, we save one cycle. */ /* In these cases, we save one cycle. */
return cost - 1; return cost - 1;
...@@ -1377,14 +1327,17 @@ alpha_adjust_cost (insn, link, dep_insn, cost) ...@@ -1377,14 +1327,17 @@ alpha_adjust_cost (insn, link, dep_insn, cost)
/* Another case that needs adjustment is an arithmetic or logical /* Another case that needs adjustment is an arithmetic or logical
operation. It's cost is usually one cycle, but we default it to operation. It's cost is usually one cycle, but we default it to
two in the MD file. The only case that it is actually two is two in the MD file. The only case that it is actually two is
for the address in loads and stores. */ for the address in loads, stores, and jumps. */
if (dep_insn_type == TYPE_IADD || dep_insn_type == TYPE_ILOG) if (dep_insn_type == TYPE_IADD || dep_insn_type == TYPE_ILOG)
{ {
switch (insn_type) switch (insn_type)
{ {
case TYPE_LD: case TYPE_ILD:
case TYPE_ST: case TYPE_IST:
case TYPE_FLD:
case TYPE_FST:
case TYPE_JSR:
return cost; return cost;
default: default:
return 1; return 1;
...@@ -1396,6 +1349,62 @@ alpha_adjust_cost (insn, link, dep_insn, cost) ...@@ -1396,6 +1349,62 @@ alpha_adjust_cost (insn, link, dep_insn, cost)
if (dep_insn_type == TYPE_ICMP && insn_type == TYPE_IBR) if (dep_insn_type == TYPE_ICMP && insn_type == TYPE_IBR)
return 1; return 1;
break;
case PROCESSOR_EV5:
/* And the lord DEC saith: "A special bypass provides an effective
latency of 0 cycles for an ICMP or ILOG insn producing the test
operand of an IBR or ICMOV insn." */
if ((dep_insn_type == TYPE_ICMP || dep_insn_type == TYPE_ILOG)
&& (set = single_set (dep_insn)) != 0)
{
/* A branch only has one input. This must be it. */
if (insn_type == TYPE_IBR)
return 0;
/* A conditional move has three, make sure it is the test. */
if (insn_type == TYPE_ICMOV
&& GET_CODE (set_src = PATTERN (insn)) == SET
&& GET_CODE (set_src = SET_SRC (set_src)) == IF_THEN_ELSE
&& rtx_equal_p (SET_DEST (set), XEXP (set_src, 0)))
return 0;
}
/* "The multiplier is unable to receive data from IEU bypass paths.
The instruction issues at the expected time, but its latency is
increased by the time it takes for the input data to become
available to the multiplier" -- which happens in pipeline stage
six, when results are comitted to the register file. */
if (insn_type == TYPE_IMUL)
{
switch (dep_insn_type)
{
/* These insns produce their results in pipeline stage five. */
case TYPE_ILD:
case TYPE_ICMOV:
case TYPE_IMUL:
case TYPE_MVI:
return cost + 1;
/* Other integer insns produce results in pipeline stage four. */
default:
return cost + 2;
}
}
break;
case PROCESSOR_EV6:
/* There is additional latency to move the result of (most) FP
operations anywhere but the FP register file. */
if ((insn_type == TYPE_FST || insn_type == TYPE_FTOI)
&& (dep_insn_type == TYPE_FADD ||
dep_insn_type == TYPE_FMUL ||
dep_insn_type == TYPE_FCMOV))
return cost + 2;
break;
} }
/* Otherwise, return the default cost. */ /* Otherwise, return the default cost. */
......
...@@ -769,9 +769,10 @@ enum reg_class { NO_REGS, GENERAL_REGS, FLOAT_REGS, ALL_REGS, ...@@ -769,9 +769,10 @@ enum reg_class { NO_REGS, GENERAL_REGS, FLOAT_REGS, ALL_REGS,
reduce the impact of not being able to allocate a pseudo to a reduce the impact of not being able to allocate a pseudo to a
hard register. */ hard register. */
#define REGISTER_MOVE_COST(CLASS1, CLASS2) \ #define REGISTER_MOVE_COST(CLASS1, CLASS2) \
(TARGET_CIX || ((CLASS1) == FLOAT_REGS) == ((CLASS2) == FLOAT_REGS) \ (((CLASS1) == FLOAT_REGS) == ((CLASS2) == FLOAT_REGS) \
? 2 : 4+2*alpha_memory_latency) ? 2 \
: TARGET_CIX ? 3 : 4+2*alpha_memory_latency)
/* A C expressions returning the cost of moving data of MODE from a register to /* A C expressions returning the cost of moving data of MODE from a register to
or from memory. or from memory.
...@@ -1596,7 +1597,7 @@ extern void alpha_init_expanders (); ...@@ -1596,7 +1597,7 @@ extern void alpha_init_expanders ();
our own exit function. */ our own exit function. */
#define HAVE_ATEXIT #define HAVE_ATEXIT
/* The EV4 is dual issue; EV5 is quad issue. */ /* The EV4 is dual issue; EV5/EV6 are quad issue. */
#define ISSUE_RATE (alpha_cpu == PROCESSOR_EV4 ? 2 : 4) #define ISSUE_RATE (alpha_cpu == PROCESSOR_EV4 ? 2 : 4)
/* Compute the cost of computing a constant rtl expression RTX /* Compute the cost of computing a constant rtl expression RTX
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment