Commit b5c82fa1 by Paolo Bonzini Committed by Paolo Bonzini

re PR target/19653 (x87 reg allocated for constants for -mfpmath=sse)

2005-08-08  Paolo Bonzini  <bonzini@gnu.org>
	    Dale Johannesen  <dalej@apple.com>

	PR target/19653
	* regclass.c (struct reg_pref): Update documentation.
	(regclass): Set prefclass to NO_REGS if memory is the best option.
	(record_reg_classes): Cope with a prefclass set to NO_REGS.
	* reload.c (find_reloads): Take PREFERRED_OUTPUT_RELOAD_CLASS
	into account.  For non-registers, equate an empty preferred
	reload class to a `!' in the constraint; move the if clause to
	do so after those that reject the insn.
	(push_reload): Allow PREFERRED_*_RELOAD_CLASS to liberally
	return NO_REGS.
	(find_dummy_reload): Likewise.
	* doc/tm.texi (Register Classes): Document what it means
	if PREFERRED_*_RELOAD_CLASS return NO_REGS.
	* config/i386/i386.c (ix86_preferred_reload_class): Force
	using SSE registers (and return NO_REGS for floating-point
	constants) if math is done with SSE.
	(ix86_preferred_output_reload_class): New.
	* config/i386/i386-protos.h (ix86_preferred_output_reload_class): New.
	* config/i386/i386.h (PREFERRED_OUTPUT_RELOAD_CLASS): New.
	* config/i386/i386.md: Remove # register preferences.


Co-Authored-By: Dale Johannesen <dalej@apple.com>

From-SVN: r112637
parent f096660b
2005-08-08 Paolo Bonzini <bonzini@gnu.org>
Dale Johannesen <dalej@apple.com>
PR target/19653
* regclass.c (struct reg_pref): Update documentation.
(regclass): Set prefclass to NO_REGS if memory is the best option.
(record_reg_classes): Cope with a prefclass set to NO_REGS.
* reload.c (find_reloads): Take PREFERRED_OUTPUT_RELOAD_CLASS
into account. For non-registers, equate an empty preferred
reload class to a `!' in the constraint; move the if clause to
do so after those that reject the insn.
(push_reload): Allow PREFERRED_*_RELOAD_CLASS to liberally
return NO_REGS.
(find_dummy_reload): Likewise.
* doc/tm.texi (Register Classes): Document what it means
if PREFERRED_*_RELOAD_CLASS return NO_REGS.
* config/i386/i386.c (ix86_preferred_reload_class): Force
using SSE registers (and return NO_REGS for floating-point
constants) if math is done with SSE.
(ix86_preferred_output_reload_class): New.
* config/i386/i386-protos.h (ix86_preferred_output_reload_class): New.
* config/i386/i386.h (PREFERRED_OUTPUT_RELOAD_CLASS): New.
* config/i386/i386.md: Remove # register preferences.
2006-04-02 Sebastian Pop <pop@cri.ensmp.fr> 2006-04-02 Sebastian Pop <pop@cri.ensmp.fr>
PR bootstrap/26992 PR bootstrap/26992
......
...@@ -142,6 +142,7 @@ extern int ix86_secondary_memory_needed (enum reg_class, enum reg_class, ...@@ -142,6 +142,7 @@ extern int ix86_secondary_memory_needed (enum reg_class, enum reg_class,
extern bool ix86_cannot_change_mode_class (enum machine_mode, extern bool ix86_cannot_change_mode_class (enum machine_mode,
enum machine_mode, enum reg_class); enum machine_mode, enum reg_class);
extern enum reg_class ix86_preferred_reload_class (rtx, enum reg_class); extern enum reg_class ix86_preferred_reload_class (rtx, enum reg_class);
extern enum reg_class ix86_preferred_output_reload_class (rtx, enum reg_class);
extern int ix86_memory_move_cost (enum machine_mode, enum reg_class, int); extern int ix86_memory_move_cost (enum machine_mode, enum reg_class, int);
extern int ix86_mode_needed (int, rtx); extern int ix86_mode_needed (int, rtx);
extern void emit_i387_cw_initialization (int); extern void emit_i387_cw_initialization (int);
......
...@@ -16361,15 +16361,28 @@ ix86_free_from_memory (enum machine_mode mode) ...@@ -16361,15 +16361,28 @@ ix86_free_from_memory (enum machine_mode mode)
enum reg_class enum reg_class
ix86_preferred_reload_class (rtx x, enum reg_class class) ix86_preferred_reload_class (rtx x, enum reg_class class)
{ {
enum machine_mode mode = GET_MODE (x);
/* We're only allowed to return a subclass of CLASS. Many of the /* We're only allowed to return a subclass of CLASS. Many of the
following checks fail for NO_REGS, so eliminate that early. */ following checks fail for NO_REGS, so eliminate that early. */
if (class == NO_REGS) if (class == NO_REGS)
return NO_REGS; return NO_REGS;
/* All classes can load zeros. */ /* All classes can load zeros. */
if (x == CONST0_RTX (GET_MODE (x))) if (x == CONST0_RTX (mode))
return class; return class;
/* Force constants into memory if we are loading a (non-zero) constant into
an MMX or SSE register. This is because there are no MMX/SSE instructions
to load from a constant. */
if (CONSTANT_P (x)
&& (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
return NO_REGS;
/* Prefer SSE regs only, if we can use them for math. */
if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
return SSE_CLASS_P (class) ? class : NO_REGS;
/* Floating-point constants need more complex checks. */ /* Floating-point constants need more complex checks. */
if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode) if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
{ {
...@@ -16381,8 +16394,6 @@ ix86_preferred_reload_class (rtx x, enum reg_class class) ...@@ -16381,8 +16394,6 @@ ix86_preferred_reload_class (rtx x, enum reg_class class)
zero above. We only want to wind up preferring 80387 registers if zero above. We only want to wind up preferring 80387 registers if
we plan on doing computation with them. */ we plan on doing computation with them. */
if (TARGET_80387 if (TARGET_80387
&& (TARGET_MIX_SSE_I387
|| !(TARGET_SSE_MATH && SSE_FLOAT_MODE_P (GET_MODE (x))))
&& standard_80387_constant_p (x)) && standard_80387_constant_p (x))
{ {
/* Limit class to non-sse. */ /* Limit class to non-sse. */
...@@ -16398,10 +16409,6 @@ ix86_preferred_reload_class (rtx x, enum reg_class class) ...@@ -16398,10 +16409,6 @@ ix86_preferred_reload_class (rtx x, enum reg_class class)
return NO_REGS; return NO_REGS;
} }
if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
return NO_REGS;
if (MAYBE_SSE_CLASS_P (class) && CONSTANT_P (x))
return NO_REGS;
/* Generally when we see PLUS here, it's the function invariant /* Generally when we see PLUS here, it's the function invariant
(plus soft-fp const_int). Which can only be computed into general (plus soft-fp const_int). Which can only be computed into general
...@@ -16423,6 +16430,33 @@ ix86_preferred_reload_class (rtx x, enum reg_class class) ...@@ -16423,6 +16430,33 @@ ix86_preferred_reload_class (rtx x, enum reg_class class)
return class; return class;
} }
/* Discourage putting floating-point values in SSE registers unless
SSE math is being used, and likewise for the 387 registers. */
enum reg_class
ix86_preferred_output_reload_class (rtx x, enum reg_class class)
{
enum machine_mode mode = GET_MODE (x);
/* Restrict the output reload class to the register bank that we are doing
math on. If we would like not to return a subset of CLASS, reject this
alternative: if reload cannot do this, it will still use its choice. */
mode = GET_MODE (x);
if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
{
if (class == FP_TOP_SSE_REGS)
return FP_TOP_REG;
else if (class == FP_SECOND_SSE_REGS)
return FP_SECOND_REG;
else
return FLOAT_CLASS_P (class) ? class : NO_REGS;
}
return class;
}
/* If we are copying between general and FP registers, we need a memory /* If we are copying between general and FP registers, we need a memory
location. The same is true for SSE and MMX registers. location. The same is true for SSE and MMX registers.
......
...@@ -669,7 +669,9 @@ extern int x86_prefetch_sse; ...@@ -669,7 +669,9 @@ extern int x86_prefetch_sse;
#define STACK_REGS #define STACK_REGS
#define IS_STACK_MODE(MODE) \ #define IS_STACK_MODE(MODE) \
((MODE) == DFmode || (MODE) == SFmode || (MODE) == XFmode) \ (((MODE) == SFmode && (!TARGET_SSE || !TARGET_SSE_MATH)) \
|| ((MODE) == DFmode && (!TARGET_SSE2 || !TARGET_SSE_MATH)) \
|| (MODE) == XFmode)
/* Number of actual hardware registers. /* Number of actual hardware registers.
The hardware registers are assigned numbers for the compiler The hardware registers are assigned numbers for the compiler
...@@ -1222,6 +1224,12 @@ enum reg_class ...@@ -1222,6 +1224,12 @@ enum reg_class
#define PREFERRED_RELOAD_CLASS(X, CLASS) \ #define PREFERRED_RELOAD_CLASS(X, CLASS) \
ix86_preferred_reload_class ((X), (CLASS)) ix86_preferred_reload_class ((X), (CLASS))
/* Discourage putting floating-point values in SSE registers unless
SSE math is being used, and likewise for the 387 registers. */
#define PREFERRED_OUTPUT_RELOAD_CLASS(X, CLASS) \
ix86_preferred_output_reload_class ((X), (CLASS))
/* If we are copying between general and FP registers, we need a memory /* If we are copying between general and FP registers, we need a memory
location. The same is true for SSE and MMX registers. */ location. The same is true for SSE and MMX registers. */
#define SECONDARY_MEMORY_NEEDED(CLASS1, CLASS2, MODE) \ #define SECONDARY_MEMORY_NEEDED(CLASS1, CLASS2, MODE) \
......
...@@ -2404,12 +2404,22 @@ register, so @code{PREFERRED_RELOAD_CLASS} returns @code{NO_REGS} when ...@@ -2404,12 +2404,22 @@ register, so @code{PREFERRED_RELOAD_CLASS} returns @code{NO_REGS} when
into any kind of register, code generation will be better if into any kind of register, code generation will be better if
@code{LEGITIMATE_CONSTANT_P} makes the constant illegitimate instead @code{LEGITIMATE_CONSTANT_P} makes the constant illegitimate instead
of using @code{PREFERRED_RELOAD_CLASS}. of using @code{PREFERRED_RELOAD_CLASS}.
If an insn has pseudos in it after register allocation, reload will go
through the alternatives and call repeatedly @code{PREFERRED_RELOAD_CLASS}
to find the best one. Returning @code{NO_REGS}, in this case, makes
reload add a @code{!} in front of the constraint: the x86 back-end uses
this feature to discourage usage of 387 registers when math is done in
the SSE registers (and vice versa).
@end defmac @end defmac
@defmac PREFERRED_OUTPUT_RELOAD_CLASS (@var{x}, @var{class}) @defmac PREFERRED_OUTPUT_RELOAD_CLASS (@var{x}, @var{class})
Like @code{PREFERRED_RELOAD_CLASS}, but for output reloads instead of Like @code{PREFERRED_RELOAD_CLASS}, but for output reloads instead of
input reloads. If you don't define this macro, the default is to use input reloads. If you don't define this macro, the default is to use
@var{class}, unchanged. @var{class}, unchanged.
You can also use @code{PREFERRED_OUTPUT_RELOAD_CLASS} to discourage
reload from using some alternatives, like @code{PREFERRED_RELOAD_CLASS}.
@end defmac @end defmac
@defmac LIMIT_RELOAD_CLASS (@var{mode}, @var{class}) @defmac LIMIT_RELOAD_CLASS (@var{mode}, @var{class})
......
...@@ -811,7 +811,8 @@ struct costs ...@@ -811,7 +811,8 @@ struct costs
/* Structure used to record preferences of given pseudo. */ /* Structure used to record preferences of given pseudo. */
struct reg_pref struct reg_pref
{ {
/* (enum reg_class) prefclass is the preferred class. */ /* (enum reg_class) prefclass is the preferred class. May be
NO_REGS if no class is better than memory. */
char prefclass; char prefclass;
/* altclass is a register class that we should use for allocating /* altclass is a register class that we should use for allocating
...@@ -1314,6 +1315,10 @@ regclass (rtx f, int nregs) ...@@ -1314,6 +1315,10 @@ regclass (rtx f, int nregs)
best = reg_class_subunion[(int) best][class]; best = reg_class_subunion[(int) best][class];
} }
/* If no register class is better than memory, use memory. */
if (p->mem_cost < best_cost)
best = NO_REGS;
/* Record the alternate register class; i.e., a class for which /* Record the alternate register class; i.e., a class for which
every register in it is better than using memory. If adding a every register in it is better than using memory. If adding a
class would make a smaller class (i.e., no union of just those class would make a smaller class (i.e., no union of just those
...@@ -1524,7 +1529,7 @@ record_reg_classes (int n_alts, int n_ops, rtx *ops, ...@@ -1524,7 +1529,7 @@ record_reg_classes (int n_alts, int n_ops, rtx *ops,
to what we would add if this register were not in the to what we would add if this register were not in the
appropriate class. */ appropriate class. */
if (reg_pref) if (reg_pref && reg_pref[REGNO (op)].prefclass != NO_REGS)
alt_cost alt_cost
+= (may_move_in_cost[mode] += (may_move_in_cost[mode]
[(unsigned char) reg_pref[REGNO (op)].prefclass] [(unsigned char) reg_pref[REGNO (op)].prefclass]
...@@ -1750,7 +1755,7 @@ record_reg_classes (int n_alts, int n_ops, rtx *ops, ...@@ -1750,7 +1755,7 @@ record_reg_classes (int n_alts, int n_ops, rtx *ops,
to what we would add if this register were not in the to what we would add if this register were not in the
appropriate class. */ appropriate class. */
if (reg_pref) if (reg_pref && reg_pref[REGNO (op)].prefclass != NO_REGS)
alt_cost alt_cost
+= (may_move_in_cost[mode] += (may_move_in_cost[mode]
[(unsigned char) reg_pref[REGNO (op)].prefclass] [(unsigned char) reg_pref[REGNO (op)].prefclass]
...@@ -1836,7 +1841,8 @@ record_reg_classes (int n_alts, int n_ops, rtx *ops, ...@@ -1836,7 +1841,8 @@ record_reg_classes (int n_alts, int n_ops, rtx *ops,
int class; int class;
unsigned int nr; unsigned int nr;
if (regno >= FIRST_PSEUDO_REGISTER && reg_pref != 0) if (regno >= FIRST_PSEUDO_REGISTER && reg_pref != 0
&& reg_pref[regno].prefclass != NO_REGS)
{ {
enum reg_class pref = reg_pref[regno].prefclass; enum reg_class pref = reg_pref[regno].prefclass;
......
...@@ -1184,15 +1184,24 @@ push_reload (rtx in, rtx out, rtx *inloc, rtx *outloc, ...@@ -1184,15 +1184,24 @@ push_reload (rtx in, rtx out, rtx *inloc, rtx *outloc,
/* Narrow down the class of register wanted if that is /* Narrow down the class of register wanted if that is
desirable on this machine for efficiency. */ desirable on this machine for efficiency. */
{
enum reg_class preferred_class = class;
if (in != 0) if (in != 0)
class = PREFERRED_RELOAD_CLASS (in, class); preferred_class = PREFERRED_RELOAD_CLASS (in, class);
/* Output reloads may need analogous treatment, different in detail. */ /* Output reloads may need analogous treatment, different in detail. */
#ifdef PREFERRED_OUTPUT_RELOAD_CLASS #ifdef PREFERRED_OUTPUT_RELOAD_CLASS
if (out != 0) if (out != 0)
class = PREFERRED_OUTPUT_RELOAD_CLASS (out, class); preferred_class = PREFERRED_OUTPUT_RELOAD_CLASS (out, preferred_class);
#endif #endif
/* Discard what the target said if we cannot do it. */
if (preferred_class != NO_REGS
|| (optional && type == RELOAD_FOR_OUTPUT))
class = preferred_class;
}
/* Make sure we use a class that can handle the actual pseudo /* Make sure we use a class that can handle the actual pseudo
inside any subreg. For example, on the 386, QImode regs inside any subreg. For example, on the 386, QImode regs
can appear within SImode subregs. Although GENERAL_REGS can appear within SImode subregs. Although GENERAL_REGS
...@@ -1885,7 +1894,11 @@ find_dummy_reload (rtx real_in, rtx real_out, rtx *inloc, rtx *outloc, ...@@ -1885,7 +1894,11 @@ find_dummy_reload (rtx real_in, rtx real_out, rtx *inloc, rtx *outloc,
/* Narrow down the reg class, the same way push_reload will; /* Narrow down the reg class, the same way push_reload will;
otherwise we might find a dummy now, but push_reload won't. */ otherwise we might find a dummy now, but push_reload won't. */
class = PREFERRED_RELOAD_CLASS (in, class); {
enum reg_class preferred_class = PREFERRED_RELOAD_CLASS (in, class);
if (class != NO_REGS)
class = preferred_class;
}
/* See if OUT will do. */ /* See if OUT will do. */
if (REG_P (out) if (REG_P (out)
...@@ -3401,22 +3414,10 @@ find_reloads (rtx insn, int replace, int ind_levels, int live_known, ...@@ -3401,22 +3414,10 @@ find_reloads (rtx insn, int replace, int ind_levels, int live_known,
losers++; losers++;
} }
/* If we can't reload this value at all, reject this
alternative. Note that we could also lose due to
LIMIT_RELOAD_RELOAD_CLASS, but we don't check that
here. */
if (! CONSTANT_P (operand)
&& (enum reg_class) this_alternative[i] != NO_REGS
&& (PREFERRED_RELOAD_CLASS (operand,
(enum reg_class) this_alternative[i])
== NO_REGS))
bad = 1;
/* Alternative loses if it requires a type of reload not /* Alternative loses if it requires a type of reload not
permitted for this insn. We can always reload SCRATCH permitted for this insn. We can always reload SCRATCH
and objects with a REG_UNUSED note. */ and objects with a REG_UNUSED note. */
else if (GET_CODE (operand) != SCRATCH if (GET_CODE (operand) != SCRATCH
&& modified[i] != RELOAD_READ && no_output_reloads && modified[i] != RELOAD_READ && no_output_reloads
&& ! find_reg_note (insn, REG_UNUSED, operand)) && ! find_reg_note (insn, REG_UNUSED, operand))
bad = 1; bad = 1;
...@@ -3424,6 +3425,28 @@ find_reloads (rtx insn, int replace, int ind_levels, int live_known, ...@@ -3424,6 +3425,28 @@ find_reloads (rtx insn, int replace, int ind_levels, int live_known,
&& ! const_to_mem) && ! const_to_mem)
bad = 1; bad = 1;
/* If we can't reload this value at all, reject this
alternative. Note that we could also lose due to
LIMIT_RELOAD_CLASS, but we don't check that
here. */
if (! CONSTANT_P (operand)
&& (enum reg_class) this_alternative[i] != NO_REGS)
{
if (PREFERRED_RELOAD_CLASS
(operand, (enum reg_class) this_alternative[i])
== NO_REGS)
reject = 600;
#ifdef PREFERRED_OUTPUT_RELOAD_CLASS
if (operand_type[i] == RELOAD_FOR_OUTPUT
&& PREFERRED_OUTPUT_RELOAD_CLASS
(operand, (enum reg_class) this_alternative[i])
== NO_REGS)
reject = 600;
#endif
}
/* We prefer to reload pseudos over reloading other things, /* We prefer to reload pseudos over reloading other things,
since such reloads may be able to be eliminated later. since such reloads may be able to be eliminated later.
If we are reloading a SCRATCH, we won't be generating any If we are reloading a SCRATCH, we won't be generating any
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment