Commit 01b1efaa by Vladimir Makarov Committed by Vladimir Makarov

2013-10-25 Vladimir Makarov <vmakarov@redhat.com>

	* config/rs6000/rs6000-protos.h
	(rs6000_secondary_memory_needed_mode): New prototype.
	* config/rs6000/rs6000.c: Include ira.h.
	(TARGET_LRA_P): Redefine.
	(rs6000_legitimate_offset_address_p): Call
	legitimate_constant_pool_address_p in strict mode for LRA.
	(rs6000_legitimate_address_p): Ditto.
	(legitimate_lo_sum_address_p): Add code for LRA.
	Use lra_in_progress.
	(rs6000_emit_move): Add LRA version of code to generate load/store
	of SDmode values.
	(rs6000_secondary_memory_needed_mode): New.
	(rs6000_alloc_sdmode_stack_slot): Do nothing for LRA.
	(rs6000_secondary_reload_class): Return NO_REGS for LRA for
	constants, memory, and FP registers.
	(rs6000_lra_p): New.
	* config/rs6000/rs6000.h (SECONDARY_MEMORY_NEEDED_MODE): New
	macro.
	* config/rs6000/rs6000.opt (mlra): New option.
	* lra-spills.c (lra_final_code_change): Remove useless move insns.

From-SVN: r204079
parent 3ff4317f
2013-10-25 Vladimir Makarov <vmakarov@redhat.com>
* config/rs6000/rs6000-protos.h
(rs6000_secondary_memory_needed_mode): New prototype.
* config/rs6000/rs6000.c: Include ira.h.
(TARGET_LRA_P): Redefine.
(rs6000_legitimate_offset_address_p): Call
legitimate_constant_pool_address_p in strict mode for LRA.
(rs6000_legitimate_address_p): Ditto.
(legitimate_lo_sum_address_p): Add code for LRA.
Use lra_in_progress.
(rs6000_emit_move): Add LRA version of code to generate load/store
of SDmode values.
(rs6000_secondary_memory_needed_mode): New.
(rs6000_alloc_sdmode_stack_slot): Do nothing for LRA.
(rs6000_secondary_reload_class): Return NO_REGS for LRA for
constants, memory, and FP registers.
(rs6000_lra_p): New.
* config/rs6000/rs6000.h (SECONDARY_MEMORY_NEEDED_MODE): New
macro.
* config/rs6000/rs6000.opt (mlra): New option.
* lra-spills.c (lra_final_code_change): Remove useless move insns.
2013-10-25 Yufeng Zhang <yufeng.zhang@arm.com> 2013-10-25 Yufeng Zhang <yufeng.zhang@arm.com>
* tree-ssa-math-opts.c (convert_plusminus_to_widen): Call * tree-ssa-math-opts.c (convert_plusminus_to_widen): Call
...@@ -126,6 +126,8 @@ extern void rs6000_split_multireg_move (rtx, rtx); ...@@ -126,6 +126,8 @@ extern void rs6000_split_multireg_move (rtx, rtx);
extern void rs6000_emit_le_vsx_move (rtx, rtx, enum machine_mode); extern void rs6000_emit_le_vsx_move (rtx, rtx, enum machine_mode);
extern void rs6000_emit_move (rtx, rtx, enum machine_mode); extern void rs6000_emit_move (rtx, rtx, enum machine_mode);
extern rtx rs6000_secondary_memory_needed_rtx (enum machine_mode); extern rtx rs6000_secondary_memory_needed_rtx (enum machine_mode);
extern enum machine_mode rs6000_secondary_memory_needed_mode (enum
machine_mode);
extern rtx (*rs6000_legitimize_reload_address_ptr) (rtx, enum machine_mode, extern rtx (*rs6000_legitimize_reload_address_ptr) (rtx, enum machine_mode,
int, int, int, int *); int, int, int, int *);
extern bool rs6000_legitimate_offset_address_p (enum machine_mode, rtx, extern bool rs6000_legitimate_offset_address_p (enum machine_mode, rtx,
......
...@@ -55,6 +55,7 @@ ...@@ -55,6 +55,7 @@
#include "intl.h" #include "intl.h"
#include "params.h" #include "params.h"
#include "tm-constrs.h" #include "tm-constrs.h"
#include "ira.h"
#include "opts.h" #include "opts.h"
#include "tree-vectorizer.h" #include "tree-vectorizer.h"
#include "dumpfile.h" #include "dumpfile.h"
...@@ -1554,6 +1555,9 @@ static const struct attribute_spec rs6000_attribute_table[] = ...@@ -1554,6 +1555,9 @@ static const struct attribute_spec rs6000_attribute_table[] =
#undef TARGET_MODE_DEPENDENT_ADDRESS_P #undef TARGET_MODE_DEPENDENT_ADDRESS_P
#define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
#undef TARGET_LRA_P
#define TARGET_LRA_P rs6000_lra_p
#undef TARGET_CAN_ELIMINATE #undef TARGET_CAN_ELIMINATE
#define TARGET_CAN_ELIMINATE rs6000_can_eliminate #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
...@@ -6226,7 +6230,7 @@ rs6000_legitimate_offset_address_p (enum machine_mode mode, rtx x, ...@@ -6226,7 +6230,7 @@ rs6000_legitimate_offset_address_p (enum machine_mode mode, rtx x,
return false; return false;
if (!reg_offset_addressing_ok_p (mode)) if (!reg_offset_addressing_ok_p (mode))
return virtual_stack_registers_memory_p (x); return virtual_stack_registers_memory_p (x);
if (legitimate_constant_pool_address_p (x, mode, strict)) if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
return true; return true;
if (GET_CODE (XEXP (x, 1)) != CONST_INT) if (GET_CODE (XEXP (x, 1)) != CONST_INT)
return false; return false;
...@@ -6366,19 +6370,31 @@ legitimate_lo_sum_address_p (enum machine_mode mode, rtx x, int strict) ...@@ -6366,19 +6370,31 @@ legitimate_lo_sum_address_p (enum machine_mode mode, rtx x, int strict)
if (TARGET_ELF || TARGET_MACHO) if (TARGET_ELF || TARGET_MACHO)
{ {
bool large_toc_ok;
if (DEFAULT_ABI != ABI_AIX && DEFAULT_ABI != ABI_DARWIN && flag_pic) if (DEFAULT_ABI != ABI_AIX && DEFAULT_ABI != ABI_DARWIN && flag_pic)
return false; return false;
if (TARGET_TOC) /* LRA don't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
recognizes some LO_SUM addresses as valid although this
function says opposite. In most cases, LRA through different
transformations can generate correct code for address reloads.
It can not manage only some LO_SUM cases. So we need to add
code analogous to one in rs6000_legitimize_reload_address for
LOW_SUM here saying that some addresses are still valid. */
large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
&& small_toc_ref (x, VOIDmode));
if (TARGET_TOC && ! large_toc_ok)
return false; return false;
if (GET_MODE_NUNITS (mode) != 1) if (GET_MODE_NUNITS (mode) != 1)
return false; return false;
if (GET_MODE_SIZE (mode) > UNITS_PER_WORD if (! lra_in_progress && GET_MODE_SIZE (mode) > UNITS_PER_WORD
&& !(/* ??? Assume floating point reg based on mode? */ && !(/* ??? Assume floating point reg based on mode? */
TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
&& (mode == DFmode || mode == DDmode))) && (mode == DFmode || mode == DDmode)))
return false; return false;
return CONSTANT_P (x); return CONSTANT_P (x) || large_toc_ok;
} }
return false; return false;
...@@ -7368,7 +7384,8 @@ rs6000_legitimate_address_p (enum machine_mode mode, rtx x, bool reg_ok_strict) ...@@ -7368,7 +7384,8 @@ rs6000_legitimate_address_p (enum machine_mode mode, rtx x, bool reg_ok_strict)
if (reg_offset_p && legitimate_small_data_p (mode, x)) if (reg_offset_p && legitimate_small_data_p (mode, x))
return 1; return 1;
if (reg_offset_p if (reg_offset_p
&& legitimate_constant_pool_address_p (x, mode, reg_ok_strict)) && legitimate_constant_pool_address_p (x, mode,
reg_ok_strict || lra_in_progress))
return 1; return 1;
/* For TImode, if we have load/store quad and TImode in VSX registers, only /* For TImode, if we have load/store quad and TImode in VSX registers, only
allow register indirect addresses. This will allow the values to go in allow register indirect addresses. This will allow the values to go in
...@@ -7654,6 +7671,7 @@ rs6000_conditional_register_usage (void) ...@@ -7654,6 +7671,7 @@ rs6000_conditional_register_usage (void)
fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1; fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
} }
} }
/* Try to output insns to set TARGET equal to the constant C if it can /* Try to output insns to set TARGET equal to the constant C if it can
be done in less than N insns. Do all computations in MODE. be done in less than N insns. Do all computations in MODE.
...@@ -8058,6 +8076,68 @@ rs6000_emit_move (rtx dest, rtx source, enum machine_mode mode) ...@@ -8058,6 +8076,68 @@ rs6000_emit_move (rtx dest, rtx source, enum machine_mode mode)
cfun->machine->sdmode_stack_slot = cfun->machine->sdmode_stack_slot =
eliminate_regs (cfun->machine->sdmode_stack_slot, VOIDmode, NULL_RTX); eliminate_regs (cfun->machine->sdmode_stack_slot, VOIDmode, NULL_RTX);
if (lra_in_progress
&& mode == SDmode
&& REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
&& reg_preferred_class (REGNO (operands[0])) == NO_REGS
&& (REG_P (operands[1])
|| (GET_CODE (operands[1]) == SUBREG
&& REG_P (SUBREG_REG (operands[1])))))
{
int regno = REGNO (GET_CODE (operands[1]) == SUBREG
? SUBREG_REG (operands[1]) : operands[1]);
enum reg_class cl;
if (regno >= FIRST_PSEUDO_REGISTER)
{
cl = reg_preferred_class (regno);
gcc_assert (cl != NO_REGS);
regno = ira_class_hard_regs[cl][0];
}
if (FP_REGNO_P (regno))
{
if (GET_MODE (operands[0]) != DDmode)
operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
emit_insn (gen_movsd_store (operands[0], operands[1]));
}
else if (INT_REGNO_P (regno))
emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
else
gcc_unreachable();
return;
}
if (lra_in_progress
&& mode == SDmode
&& (REG_P (operands[0])
|| (GET_CODE (operands[0]) == SUBREG
&& REG_P (SUBREG_REG (operands[0]))))
&& REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
&& reg_preferred_class (REGNO (operands[1])) == NO_REGS)
{
int regno = REGNO (GET_CODE (operands[0]) == SUBREG
? SUBREG_REG (operands[0]) : operands[0]);
enum reg_class cl;
if (regno >= FIRST_PSEUDO_REGISTER)
{
cl = reg_preferred_class (regno);
gcc_assert (cl != NO_REGS);
regno = ira_class_hard_regs[cl][0];
}
if (FP_REGNO_P (regno))
{
if (GET_MODE (operands[1]) != DDmode)
operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
emit_insn (gen_movsd_load (operands[0], operands[1]));
}
else if (INT_REGNO_P (regno))
emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
else
gcc_unreachable();
return;
}
if (reload_in_progress if (reload_in_progress
&& mode == SDmode && mode == SDmode
&& cfun->machine->sdmode_stack_slot != NULL_RTX && cfun->machine->sdmode_stack_slot != NULL_RTX
...@@ -14905,6 +14985,17 @@ rs6000_secondary_memory_needed_rtx (enum machine_mode mode) ...@@ -14905,6 +14985,17 @@ rs6000_secondary_memory_needed_rtx (enum machine_mode mode)
return ret; return ret;
} }
/* Return the mode to be used for memory when a secondary memory
location is needed. For SDmode values we need to use DDmode, in
all other cases we can use the same mode. */
enum machine_mode
rs6000_secondary_memory_needed_mode (enum machine_mode mode)
{
if (mode == SDmode)
return DDmode;
return mode;
}
static tree static tree
rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED) rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
{ {
...@@ -15798,6 +15889,10 @@ rs6000_alloc_sdmode_stack_slot (void) ...@@ -15798,6 +15889,10 @@ rs6000_alloc_sdmode_stack_slot (void)
gimple_stmt_iterator gsi; gimple_stmt_iterator gsi;
gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX); gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX);
/* We use a different approach for dealing with the secondary
memory in LRA. */
if (ira_use_lra_p)
return;
if (TARGET_NO_SDMODE_STACK) if (TARGET_NO_SDMODE_STACK)
return; return;
...@@ -16019,7 +16114,7 @@ rs6000_secondary_reload_class (enum reg_class rclass, enum machine_mode mode, ...@@ -16019,7 +16114,7 @@ rs6000_secondary_reload_class (enum reg_class rclass, enum machine_mode mode,
/* Constants, memory, and FP registers can go into FP registers. */ /* Constants, memory, and FP registers can go into FP registers. */
if ((regno == -1 || FP_REGNO_P (regno)) if ((regno == -1 || FP_REGNO_P (regno))
&& (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS)) && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
return (mode != SDmode) ? NO_REGS : GENERAL_REGS; return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
/* Memory, and FP/altivec registers can go into fp/altivec registers under /* Memory, and FP/altivec registers can go into fp/altivec registers under
VSX. However, for scalar variables, use the traditional floating point VSX. However, for scalar variables, use the traditional floating point
...@@ -29364,6 +29459,13 @@ rs6000_libcall_value (enum machine_mode mode) ...@@ -29364,6 +29459,13 @@ rs6000_libcall_value (enum machine_mode mode)
} }
/* Return true if we use LRA instead of reload pass. */
static bool
rs6000_lra_p (void)
{
return rs6000_lra_flag;
}
/* Given FROM and TO register numbers, say whether this elimination is allowed. /* Given FROM and TO register numbers, say whether this elimination is allowed.
Frame pointer elimination is automatically handled. Frame pointer elimination is automatically handled.
......
...@@ -1488,6 +1488,13 @@ extern enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX]; ...@@ -1488,6 +1488,13 @@ extern enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
#define SECONDARY_MEMORY_NEEDED_RTX(MODE) \ #define SECONDARY_MEMORY_NEEDED_RTX(MODE) \
rs6000_secondary_memory_needed_rtx (MODE) rs6000_secondary_memory_needed_rtx (MODE)
/* Specify the mode to be used for memory when a secondary memory
location is needed. For cpus that cannot load/store SDmode values
from the 64-bit FP registers without using a full 64-bit
load/store, we need a wider mode. */
#define SECONDARY_MEMORY_NEEDED_MODE(MODE) \
rs6000_secondary_memory_needed_mode (MODE)
/* Return the maximum number of consecutive registers /* Return the maximum number of consecutive registers
needed to represent mode MODE in a register of class CLASS. needed to represent mode MODE in a register of class CLASS.
......
...@@ -449,6 +449,10 @@ mlong-double- ...@@ -449,6 +449,10 @@ mlong-double-
Target RejectNegative Joined UInteger Var(rs6000_long_double_type_size) Save Target RejectNegative Joined UInteger Var(rs6000_long_double_type_size) Save
-mlong-double-<n> Specify size of long double (64 or 128 bits) -mlong-double-<n> Specify size of long double (64 or 128 bits)
mlra
Target Report Var(rs6000_lra_flag) Init(0) Save
Use LRA instead of reload
msched-costly-dep= msched-costly-dep=
Target RejectNegative Joined Var(rs6000_sched_costly_dep_str) Target RejectNegative Joined Var(rs6000_sched_costly_dep_str)
Determine which dependences between insns are considered costly Determine which dependences between insns are considered costly
......
...@@ -625,7 +625,7 @@ lra_final_code_change (void) ...@@ -625,7 +625,7 @@ lra_final_code_change (void)
{ {
int i, hard_regno; int i, hard_regno;
basic_block bb; basic_block bb;
rtx insn, curr; rtx insn, curr, set;
int max_regno = max_reg_num (); int max_regno = max_reg_num ();
for (i = FIRST_PSEUDO_REGISTER; i < max_regno; i++) for (i = FIRST_PSEUDO_REGISTER; i < max_regno; i++)
...@@ -661,5 +661,19 @@ lra_final_code_change (void) ...@@ -661,5 +661,19 @@ lra_final_code_change (void)
} }
if (insn_change_p) if (insn_change_p)
lra_update_operator_dups (id); lra_update_operator_dups (id);
if ((set = single_set (insn)) != NULL
&& REG_P (SET_SRC (set)) && REG_P (SET_DEST (set))
&& REGNO (SET_SRC (set)) == REGNO (SET_DEST (set)))
{
/* Remove an useless move insn. IRA can generate move
insns involving pseudos. It is better remove them
earlier to speed up compiler a bit. It is also
better to do it here as they might not pass final RTL
check in LRA, (e.g. insn moving a control register
into itself). */
lra_invalidate_insn_data (insn);
delete_insn (insn);
}
} }
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment