Commit 55805e54 by Yuri Rumyantsev Committed by H.J. Lu

Silvermont (SLM) architecture performance tuning

2013-05-31  Yuri Rumyantsev  <yuri.s.rumyantsev@intel.com>
	    Igor Zamyatin  <igor.zamyatin@intel.com>

	* config/i386/i386.h (enum ix86_tune_indices): Add
	X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS.
	(TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS): New define.

	* config/i386/i386.c (initial_ix86_tune_features)
	<X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS>: Initialize.
	(ix86_lea_outperforms): Handle Silvermont tuning.
	(ix86_avoid_lea_for_add): Add new argument to ix86_lea_outperforms
	call.
	(ix86_use_lea_for_mov): Likewise.
	(ix86_avoid_lea_for_addr): Likewise.
	(ix86_lea_for_add_ok): Likewise.
	(exact_dependency_1): New function.
	(exact_store_load_dependency): Likewise.
	(ix86_adjust_cost): Handle Silvermont tuning.
	(do_reoder_for_imul): Likewise.
	(swap_top_of_ready_list): New function.
	(ix86_sched_reorder): Changed to handle Silvermont tuning.

	* config/i386/i386.md (peepholes that split memory operand in fp
	converts): New.

From-SVN: r199546
parent e19c9de2
2013-05-31 Yuri Rumyantsev <yuri.s.rumyantsev@intel.com>
Igor Zamyatin <igor.zamyatin@intel.com>
Silvermont (SLM) architecture performance tuning.
* config/i386/i386.h (enum ix86_tune_indices): Add
X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS.
(TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS): New define.
* config/i386/i386.c (initial_ix86_tune_features)
<X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS>: Initialize.
(ix86_lea_outperforms): Handle Silvermont tuning.
(ix86_avoid_lea_for_add): Add new argument to ix86_lea_outperforms
call.
(ix86_use_lea_for_mov): Likewise.
(ix86_avoid_lea_for_addr): Likewise.
(ix86_lea_for_add_ok): Likewise.
(exact_dependency_1): New function.
(exact_store_load_dependency): Likewise.
(ix86_adjust_cost): Handle Silvermont tuning.
(do_reoder_for_imul): Likewise.
(swap_top_of_ready_list): New function.
(ix86_sched_reorder): Changed to handle Silvermont tuning.
* config/i386/i386.md (peepholes that split memory operand in fp
converts): New.
2013-05-31 Marcus Shawcroft <marcus.shawcroft@arm.com> 2013-05-31 Marcus Shawcroft <marcus.shawcroft@arm.com>
* config/aarch64/aarch64.c (aarch64_load_symref_appropriately): * config/aarch64/aarch64.c (aarch64_load_symref_appropriately):
...@@ -718,11 +744,11 @@ ...@@ -718,11 +744,11 @@
2013-05-24 Vladimir Makarov <vmakarov@redhat.com> 2013-05-24 Vladimir Makarov <vmakarov@redhat.com>
* lra-constraints.c (emit_spill_move): Use smaller mode for * lra-constraints.c (emit_spill_move): Use smaller mode for
mem-mem moves. mem-mem moves.
(check_and_process_move): Consider mem-reg moves for secondary (check_and_process_move): Consider mem-reg moves for secondary
too. too.
(curr_insn_transform): Don't lose insns emitted before for (curr_insn_transform): Don't lose insns emitted before for
secondary memory moves. secondary memory moves.
(inherit_in_ebb): Mark defined reg. Add usage only if it is not a (inherit_in_ebb): Mark defined reg. Add usage only if it is not a
reg set up in the current insn. reg set up in the current insn.
...@@ -1085,8 +1111,8 @@ ...@@ -1085,8 +1111,8 @@
2013-05-21 Christian Bruel <christian.bruel@st.com> 2013-05-21 Christian Bruel <christian.bruel@st.com>
* dwarf2out.c (multiple_reg_loc_descriptor): Use dbx_reg_number for * dwarf2out.c (multiple_reg_loc_descriptor): Use dbx_reg_number for
spanning registers. LEAF_REG_REMAP is supported only for contiguous spanning registers. LEAF_REG_REMAP is supported only for contiguous
registers. Set register size out of the PARALLEL loop. registers. Set register size out of the PARALLEL loop.
2013-05-20 Oleg Endo <olegendo@gcc.gnu.org> 2013-05-20 Oleg Endo <olegendo@gcc.gnu.org>
......
...@@ -333,6 +333,7 @@ enum ix86_tune_indices { ...@@ -333,6 +333,7 @@ enum ix86_tune_indices {
X86_TUNE_REASSOC_FP_TO_PARALLEL, X86_TUNE_REASSOC_FP_TO_PARALLEL,
X86_TUNE_GENERAL_REGS_SSE_SPILL, X86_TUNE_GENERAL_REGS_SSE_SPILL,
X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE, X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE,
X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS,
X86_TUNE_LAST X86_TUNE_LAST
}; };
...@@ -443,6 +444,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; ...@@ -443,6 +444,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
ix86_tune_features[X86_TUNE_GENERAL_REGS_SSE_SPILL] ix86_tune_features[X86_TUNE_GENERAL_REGS_SSE_SPILL]
#define TARGET_AVOID_MEM_OPND_FOR_CMOVE \ #define TARGET_AVOID_MEM_OPND_FOR_CMOVE \
ix86_tune_features[X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE] ix86_tune_features[X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE]
#define TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS \
ix86_tune_features[X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS]
/* Feature tests against the various architecture variations. */ /* Feature tests against the various architecture variations. */
enum ix86_arch_indices { enum ix86_arch_indices {
......
...@@ -3625,6 +3625,18 @@ ...@@ -3625,6 +3625,18 @@
CONST0_RTX (V4SFmode), operands[1])); CONST0_RTX (V4SFmode), operands[1]));
}) })
;; It's more profitable to split and then extend in the same register.
(define_peephole2
[(set (match_operand:DF 0 "register_operand")
(float_extend:DF
(match_operand:SF 1 "memory_operand")))]
"TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS
&& optimize_insn_for_speed_p ()
&& SSE_REG_P (operands[0])"
[(set (match_dup 2) (match_dup 1))
(set (match_dup 0) (float_extend:DF (match_dup 2)))]
"operands[2] = gen_rtx_REG (SFmode, REGNO (operands[0]));")
(define_insn "*extendsfdf2_mixed" (define_insn "*extendsfdf2_mixed"
[(set (match_operand:DF 0 "nonimmediate_operand" "=f,m,x") [(set (match_operand:DF 0 "nonimmediate_operand" "=f,m,x")
(float_extend:DF (float_extend:DF
...@@ -3766,6 +3778,18 @@ ...@@ -3766,6 +3778,18 @@
CONST0_RTX (V2DFmode), operands[1])); CONST0_RTX (V2DFmode), operands[1]));
}) })
;; It's more profitable to split and then extend in the same register.
(define_peephole2
[(set (match_operand:SF 0 "register_operand")
(float_truncate:SF
(match_operand:DF 1 "memory_operand")))]
"TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS
&& optimize_insn_for_speed_p ()
&& SSE_REG_P (operands[0])"
[(set (match_dup 2) (match_dup 1))
(set (match_dup 0) (float_truncate:SF (match_dup 2)))]
"operands[2] = gen_rtx_REG (DFmode, REGNO (operands[0]));")
(define_expand "truncdfsf2_with_temp" (define_expand "truncdfsf2_with_temp"
[(parallel [(set (match_operand:SF 0) [(parallel [(set (match_operand:SF 0)
(float_truncate:SF (match_operand:DF 1))) (float_truncate:SF (match_operand:DF 1)))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment