Commit 7b38ee83 by Teresa Johnson Committed by Teresa Johnson

Avoid instructions that incur expensive length-changing prefix (LCP) stalls on…

Avoid instructions that incur expensive length-changing prefix (LCP) stalls on some x86-64 implementations...

Avoid instructions that incur expensive length-changing prefix (LCP) stalls
on some x86-64 implementations, notably Core2 and Corei7. Specifically, a move of
a 16-bit constant into memory requires a length-changing prefix and can incur significant
penalties. Modified an old patch written by H.J to split such instructions
during peephole2.

2012-04-05  Teresa Johnson  <tejohnson@google.com>
	    H.J. Lu  <hongjiu.lu@intel.com>

	* config/i386/i386.h (ix86_tune_indices): Add
	X86_TUNE_LCP_STALL.
	* config/i386/i386.md (move immediate to memory peephole2):
	Add cases for HImode move when LCP stall avoidance is needed.
	* config/i386/i386.c (initial_ix86_tune_features): Initialize
	X86_TUNE_LCP_STALL entry.

Co-Authored-By: H.J. Lu <hongjiu.lu@intel.com>

From-SVN: r186176
parent 939c8f05
2012-04-05 Teresa Johnson <tejohnson@google.com>
H.J. Lu <hongjiu.lu@intel.com>
* config/i386/i386.h (ix86_tune_indices): Add
X86_TUNE_LCP_STALL.
* config/i386/i386.md (move immediate to memory peephole2):
Add cases for HImode move when LCP stall avoidance is needed.
* config/i386/i386.c (initial_ix86_tune_features): Initialize
X86_TUNE_LCP_STALL entry.
2012-04-05 Uros Bizjak <ubizjak@gmail.com> 2012-04-05 Uros Bizjak <ubizjak@gmail.com>
PR target/52882 PR target/52882
......
...@@ -1964,6 +1964,10 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { ...@@ -1964,6 +1964,10 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
/* X86_TUNE_PARTIAL_FLAG_REG_STALL */ /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
m_CORE2I7 | m_GENERIC, m_CORE2I7 | m_GENERIC,
/* X86_TUNE_LCP_STALL: Avoid an expensive length-changing prefix stall
* on 16-bit immediate moves into memory on Core2 and Corei7. */
m_CORE2I7 | m_GENERIC,
/* X86_TUNE_USE_HIMODE_FIOP */ /* X86_TUNE_USE_HIMODE_FIOP */
m_386 | m_486 | m_K6_GEODE, m_386 | m_486 | m_K6_GEODE,
...@@ -262,6 +262,7 @@ enum ix86_tune_indices { ...@@ -262,6 +262,7 @@ enum ix86_tune_indices {
X86_TUNE_MOVX, X86_TUNE_MOVX,
X86_TUNE_PARTIAL_REG_STALL, X86_TUNE_PARTIAL_REG_STALL,
X86_TUNE_PARTIAL_FLAG_REG_STALL, X86_TUNE_PARTIAL_FLAG_REG_STALL,
X86_TUNE_LCP_STALL,
X86_TUNE_USE_HIMODE_FIOP, X86_TUNE_USE_HIMODE_FIOP,
X86_TUNE_USE_SIMODE_FIOP, X86_TUNE_USE_SIMODE_FIOP,
X86_TUNE_USE_MOV0, X86_TUNE_USE_MOV0,
...@@ -340,6 +341,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; ...@@ -340,6 +341,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
#define TARGET_PARTIAL_REG_STALL ix86_tune_features[X86_TUNE_PARTIAL_REG_STALL] #define TARGET_PARTIAL_REG_STALL ix86_tune_features[X86_TUNE_PARTIAL_REG_STALL]
#define TARGET_PARTIAL_FLAG_REG_STALL \ #define TARGET_PARTIAL_FLAG_REG_STALL \
ix86_tune_features[X86_TUNE_PARTIAL_FLAG_REG_STALL] ix86_tune_features[X86_TUNE_PARTIAL_FLAG_REG_STALL]
#define TARGET_LCP_STALL \
ix86_tune_features[X86_TUNE_LCP_STALL]
#define TARGET_USE_HIMODE_FIOP ix86_tune_features[X86_TUNE_USE_HIMODE_FIOP] #define TARGET_USE_HIMODE_FIOP ix86_tune_features[X86_TUNE_USE_HIMODE_FIOP]
#define TARGET_USE_SIMODE_FIOP ix86_tune_features[X86_TUNE_USE_SIMODE_FIOP] #define TARGET_USE_SIMODE_FIOP ix86_tune_features[X86_TUNE_USE_SIMODE_FIOP]
#define TARGET_USE_MOV0 ix86_tune_features[X86_TUNE_USE_MOV0] #define TARGET_USE_MOV0 ix86_tune_features[X86_TUNE_USE_MOV0]
......
...@@ -16971,15 +16971,17 @@ ...@@ -16971,15 +16971,17 @@
(set (match_dup 0) (match_dup 2))]) (set (match_dup 0) (match_dup 2))])
;; Don't move an immediate directly to memory when the instruction ;; Don't move an immediate directly to memory when the instruction
;; gets too big. ;; gets too big, or if LCP stalls are a problem for 16-bit moves.
(define_peephole2 (define_peephole2
[(match_scratch:SWI124 1 "<r>") [(match_scratch:SWI124 1 "<r>")
(set (match_operand:SWI124 0 "memory_operand") (set (match_operand:SWI124 0 "memory_operand")
(const_int 0))] (const_int 0))]
"optimize_insn_for_speed_p () "optimize_insn_for_speed_p ()
&& !TARGET_USE_MOV0 && ((<MODE>mode == HImode
&& TARGET_LCP_STALL)
|| (!TARGET_USE_MOV0
&& TARGET_SPLIT_LONG_MOVES && TARGET_SPLIT_LONG_MOVES
&& get_attr_length (insn) >= ix86_cur_cost ()->large_insn && get_attr_length (insn) >= ix86_cur_cost ()->large_insn))
&& peep2_regno_dead_p (0, FLAGS_REG)" && peep2_regno_dead_p (0, FLAGS_REG)"
[(parallel [(set (match_dup 2) (const_int 0)) [(parallel [(set (match_dup 2) (const_int 0))
(clobber (reg:CC FLAGS_REG))]) (clobber (reg:CC FLAGS_REG))])
...@@ -16991,8 +16993,10 @@ ...@@ -16991,8 +16993,10 @@
(set (match_operand:SWI124 0 "memory_operand") (set (match_operand:SWI124 0 "memory_operand")
(match_operand:SWI124 1 "immediate_operand"))] (match_operand:SWI124 1 "immediate_operand"))]
"optimize_insn_for_speed_p () "optimize_insn_for_speed_p ()
&& TARGET_SPLIT_LONG_MOVES && ((<MODE>mode == HImode
&& get_attr_length (insn) >= ix86_cur_cost ()->large_insn" && TARGET_LCP_STALL)
|| (TARGET_SPLIT_LONG_MOVES
&& get_attr_length (insn) >= ix86_cur_cost ()->large_insn))"
[(set (match_dup 2) (match_dup 1)) [(set (match_dup 2) (match_dup 1))
(set (match_dup 0) (match_dup 2))]) (set (match_dup 0) (match_dup 2))])
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment