Commit e7ed95a2 by H.J. Lu Committed by H.J. Lu

Pad short functions with NOPs for Atom.

gcc/

2010-09-17  H.J. Lu  <hongjiu.lu@intel.com>
	    Richard Henderson  <rth@redhat.com>

	* config/i386/i386.c (initial_ix86_tune_features): Add
	X86_TUNE_PAD_SHORT_FUNCTION.
	(ix86_code_end): Pad with 8 NOPs for TARGET_PAD_SHORT_FUNCTION.
	(ix86_count_insn): New.
	(ix86_pad_short_function): Likewise.
	(ix86_reorg): Support TARGET_PAD_SHORT_FUNCTION.

	* config/i386/i386.h (ix86_tune_indices): Add
	X86_TUNE_PAD_SHORT_FUNCTION.
	(TARGET_PAD_SHORT_FUNCTION): New.

	* config/i386/i386.md (UNSPEC_NOPS): New.
	(nops): Likewise.

gcc/testsuite/

2010-09-17  H.J. Lu  <hongjiu.lu@intel.com>

	* gcc.target/i386/pad-1.c: New.
	* gcc.target/i386/pad-2.c: Likewise.
	* gcc.target/i386/pad-3.c: Likewise.
	* gcc.target/i386/pad-4.c: Likewise.
	* gcc.target/i386/pad-5a.c: Likewise.
	* gcc.target/i386/pad-5b.c: Likewise.
	* gcc.target/i386/pad-6a.c: Likewise.
	* gcc.target/i386/pad-6b.c: Likewise.
	* gcc.target/i386/pad-7.c: Likewise.
	* gcc.target/i386/pad-8.c: Likewise.
	* gcc.target/i386/pad-9.c: Likewise.
	* gcc.target/i386/pad-10.c: Likewise.

Co-Authored-By: Richard Henderson <rth@redhat.com>

From-SVN: r164379
parent 8cab7c13
2010-09-17 H.J. Lu <hongjiu.lu@intel.com>
Richard Henderson <rth@redhat.com>
* config/i386/i386.c (initial_ix86_tune_features): Add
X86_TUNE_PAD_SHORT_FUNCTION.
(ix86_code_end): Pad with 8 NOPs for TARGET_PAD_SHORT_FUNCTION.
(ix86_count_insn): New.
(ix86_pad_short_function): Likewise.
(ix86_reorg): Support TARGET_PAD_SHORT_FUNCTION.
* config/i386/i386.h (ix86_tune_indices): Add
X86_TUNE_PAD_SHORT_FUNCTION.
(TARGET_PAD_SHORT_FUNCTION): New.
* config/i386/i386.md (UNSPEC_NOPS): New.
(nops): Likewise.
2010-09-17 H.J. Lu <hongjiu.lu@intel.com>
PR middle-end/45234
* calls.c (expand_call): Make sure that all variable sized
......
......@@ -1576,6 +1576,9 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
/* X86_TUNE_PAD_RETURNS */
m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
/* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */
m_ATOM,
/* X86_TUNE_EXT_80387_CONSTANTS */
m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
| m_CORE2 | m_GENERIC,
......@@ -8021,6 +8024,11 @@ ix86_code_end (void)
xops[0] = gen_rtx_REG (Pmode, regno);
xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
/* Pad stack IP move with 4 instructions. 2 NOPs count as 1
instruction. */
if (TARGET_PAD_SHORT_FUNCTION)
output_asm_insn ("nop; nop; nop; nop; nop; nop; nop; nop",
xops);
output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
output_asm_insn ("ret", xops);
final_end_function ();
......@@ -27882,6 +27890,120 @@ ix86_pad_returns (void)
}
}
/* Count the minimum number of instructions in BB. Return 4 if the
number of instructions >= 4. */
static int
ix86_count_insn_bb (basic_block bb)
{
rtx insn;
int insn_count = 0;
/* Count number of instructions in this block. Return 4 if the number
of instructions >= 4. */
FOR_BB_INSNS (bb, insn)
{
/* Only happen in exit blocks. */
if (JUMP_P (insn)
&& GET_CODE (PATTERN (insn)) == RETURN)
break;
if (NONDEBUG_INSN_P (insn)
&& GET_CODE (PATTERN (insn)) != USE
&& GET_CODE (PATTERN (insn)) != CLOBBER)
{
insn_count++;
if (insn_count >= 4)
return insn_count;
}
}
return insn_count;
}
/* Count the minimum number of instructions in code path in BB.
Return 4 if the number of instructions >= 4. */
static int
ix86_count_insn (basic_block bb)
{
edge e;
edge_iterator ei;
int min_prev_count;
/* Only bother counting instructions along paths with no
more than 2 basic blocks between entry and exit. Given
that BB has an edge to exit, determine if a predecessor
of BB has an edge from entry. If so, compute the number
of instructions in the predecessor block. If there
happen to be multiple such blocks, compute the minimum. */
min_prev_count = 4;
FOR_EACH_EDGE (e, ei, bb->preds)
{
edge prev_e;
edge_iterator prev_ei;
if (e->src == ENTRY_BLOCK_PTR)
{
min_prev_count = 0;
break;
}
FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
{
if (prev_e->src == ENTRY_BLOCK_PTR)
{
int count = ix86_count_insn_bb (e->src);
if (count < min_prev_count)
min_prev_count = count;
break;
}
}
}
if (min_prev_count < 4)
min_prev_count += ix86_count_insn_bb (bb);
return min_prev_count;
}
/* Pad short funtion to 4 instructions. */
static void
ix86_pad_short_function (void)
{
edge e;
edge_iterator ei;
FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
{
rtx ret = BB_END (e->src);
if (JUMP_P (ret) && GET_CODE (PATTERN (ret)) == RETURN)
{
int insn_count = ix86_count_insn (e->src);
/* Pad short function. */
if (insn_count < 4)
{
rtx insn = ret;
/* Find epilogue. */
while (insn
&& (!NOTE_P (insn)
|| NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
insn = PREV_INSN (insn);
if (!insn)
insn = ret;
/* Two NOPs are counted as one instruction. */
insn_count = 2 * (4 - insn_count);
emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
}
}
}
}
/* Implement machine specific optimizations. We implement padding of returns
for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
static void
......@@ -27889,7 +28011,9 @@ ix86_reorg (void)
{
if (optimize && optimize_function_for_speed_p (cfun))
{
if (TARGET_PAD_RETURNS)
if (TARGET_PAD_SHORT_FUNCTION)
ix86_pad_short_function ();
else if (TARGET_PAD_RETURNS)
ix86_pad_returns ();
#ifdef ASM_OUTPUT_MAX_SKIP_PAD
if (TARGET_FOUR_JUMP_LIMIT)
......
......@@ -299,6 +299,7 @@ enum ix86_tune_indices {
X86_TUNE_USE_BT,
X86_TUNE_USE_INCDEC,
X86_TUNE_PAD_RETURNS,
X86_TUNE_PAD_SHORT_FUNCTION,
X86_TUNE_EXT_80387_CONSTANTS,
X86_TUNE_SHORTEN_X87_SSE,
X86_TUNE_AVOID_VECTOR_DECODE,
......@@ -385,6 +386,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
#define TARGET_USE_BT ix86_tune_features[X86_TUNE_USE_BT]
#define TARGET_USE_INCDEC ix86_tune_features[X86_TUNE_USE_INCDEC]
#define TARGET_PAD_RETURNS ix86_tune_features[X86_TUNE_PAD_RETURNS]
#define TARGET_PAD_SHORT_FUNCTION \
ix86_tune_features[X86_TUNE_PAD_SHORT_FUNCTION]
#define TARGET_EXT_80387_CONSTANTS \
ix86_tune_features[X86_TUNE_EXT_80387_CONSTANTS]
#define TARGET_SHORTEN_X87_SSE ix86_tune_features[X86_TUNE_SHORTEN_X87_SSE]
......
......@@ -104,6 +104,7 @@
UNSPEC_LD_MPIC ; load_macho_picbase
UNSPEC_TRUNC_NOOP
UNSPEC_DIV_ALREADY_SPLIT
UNSPEC_NOPS
;; For SSE/MMX support:
UNSPEC_FIX_NOTRUNC
......@@ -11465,6 +11466,39 @@
(set_attr "length_immediate" "0")
(set_attr "modrm" "0")])
;; Generate nops. Operand 0 is the number of nops, up to 8.
(define_insn "nops"
[(unspec [(match_operand 0 "const_int_operand" "")]
UNSPEC_NOPS)]
"reload_completed"
{
switch (INTVAL (operands[0]))
{
case 1:
return "nop";
case 2:
return "nop; nop";
case 3:
return "nop; nop; nop";
case 4:
return "nop; nop; nop; nop";
case 5:
return "nop; nop; nop; nop; nop";
case 6:
return "nop; nop; nop; nop; nop; nop";
case 7:
return "nop; nop; nop; nop; nop; nop; nop";
case 8:
return "nop; nop; nop; nop; nop; nop; nop; nop";
default:
gcc_unreachable ();
break;
}
}
[(set (attr "length") (symbol_ref "INTVAL (operands[0])"))
(set_attr "length_immediate" "0")
(set_attr "modrm" "0")])
;; Pad to 16-byte boundary, max skip in op0. Used to avoid
;; branch prediction penalty for the third jump in a 16-byte
;; block on K8.
......
2010-09-17 H.J. Lu <hongjiu.lu@intel.com>
* gcc.target/i386/pad-1.c: New.
* gcc.target/i386/pad-2.c: Likewise.
* gcc.target/i386/pad-3.c: Likewise.
* gcc.target/i386/pad-4.c: Likewise.
* gcc.target/i386/pad-5a.c: Likewise.
* gcc.target/i386/pad-5b.c: Likewise.
* gcc.target/i386/pad-6a.c: Likewise.
* gcc.target/i386/pad-6b.c: Likewise.
* gcc.target/i386/pad-7.c: Likewise.
* gcc.target/i386/pad-8.c: Likewise.
* gcc.target/i386/pad-9.c: Likewise.
* gcc.target/i386/pad-10.c: Likewise.
2010-09-17 H.J. Lu <hongjiu.lu@intel.com>
PR middle-end/45234
* gcc.dg/torture/stackalign/alloca-5.c: New.
......
/* { dg-do compile } */
/* { dg-options "-O2 -fomit-frame-pointer -mtune=generic -S" } */
/* { dg-final { scan-assembler "rep" } } */
/* { dg-final { scan-assembler-not "nop" } } */
void
foo ()
{
}
/* { dg-do compile } */
/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */
/* { dg-final { scan-assembler-not "nop" } } */
/* { dg-final { scan-assembler-not "rep" } } */
extern void bar ();
int
foo2 (int z, int x)
{
if (x == 1)
{
bar ();
return z;
}
else
return x + z;
}
/* { dg-do compile } */
/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */
/* { dg-final { scan-assembler-times "nop; nop; nop; nop; nop; nop; nop; nop" 1 } } */
/* { dg-final { scan-assembler-not "rep" } } */
void
foo ()
{
}
/* { dg-do compile } */
/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */
/* { dg-final { scan-assembler-not "nop" } } */
/* { dg-final { scan-assembler-not "rep" } } */
int s[8] = {1, 2, 3, 4, 5, 6, 7, 8};
int d[8] = {11, 22, 33, 44, 55, 66, 77, 88};
void
foo ()
{
int i;
for (i = 0; i < 8; i++)
d[i] = s[i] + 0x1000;
}
/* { dg-do compile } */
/* { dg-require-effective-target ilp32 } */
/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S -fPIC" } */
/* { dg-final { scan-assembler-times "nop; nop; nop; nop; nop; nop; nop; nop" 1 } } */
/* { dg-final { scan-assembler-not "rep" } } */
extern int bar;
int
foo ()
{
return bar;
}
/* { dg-do compile } */
/* { dg-require-effective-target ilp32 } */
/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */
/* { dg-final { scan-assembler-times "nop; nop" 1 } } */
/* { dg-final { scan-assembler-not "nop; nop; nop" } } */
/* { dg-final { scan-assembler-not "rep" } } */
int
foo (int x, int y, int z)
{
return x + y + z;
}
/* { dg-do compile } */
/* { dg-require-effective-target lp64 } */
/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */
/* { dg-final { scan-assembler-times "nop; nop; nop; nop" 1 } } */
/* { dg-final { scan-assembler-not "nop; nop; nop; nop; nop" } } */
/* { dg-final { scan-assembler-not "rep" } } */
int
foo (int x, int y, int z)
{
return x + y + z;
}
/* { dg-do compile } */
/* { dg-require-effective-target ilp32 } */
/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */
/* { dg-final { scan-assembler-times "nop; nop; nop; nop" 1 } } */
/* { dg-final { scan-assembler-not "nop; nop; nop; nop; nop" } } */
/* { dg-final { scan-assembler-not "rep" } } */
int
foo (int x, int y)
{
return x + y;
}
/* { dg-do compile } */
/* { dg-require-effective-target lp64 } */
/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */
/* { dg-final { scan-assembler-times "nop; nop; nop; nop; nop; nop" 1 } } */
/* { dg-final { scan-assembler-not "nop; nop; nop; nop; nop; nop; nop" } } */
/* { dg-final { scan-assembler-not "rep" } } */
int
foo (int x, int y)
{
return x + y;
}
/* { dg-do compile } */
/* { dg-require-effective-target ilp32 } */
/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */
/* { dg-final { scan-assembler-not "nop" } } */
/* { dg-final { scan-assembler-not "rep" } } */
int
foo (int x, int y, int z)
{
return x + y + z + y;
}
/* { dg-do compile } */
/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */
/* { dg-final { scan-assembler-times "nop; nop; nop; nop; nop; nop" 1 } } */
/* { dg-final { scan-assembler-not "nop; nop; nop; nop; nop; nop; nop" } } */
/* { dg-final { scan-assembler-not "rep" } } */
int
foo (int x, int y)
{
return y;
}
/* { dg-do compile } */
/* { dg-require-effective-target lp64 } */
/* { dg-options "-O2 -fomit-frame-pointer -march=atom -S" } */
/* { dg-final { scan-assembler-times "nop; nop; nop; nop" 1 } } */
/* { dg-final { scan-assembler-not "nop; nop; nop; nop; nop" } } */
/* { dg-final { scan-assembler-not "rep" } } */
extern void bar (void);
void
foo (int x)
{
if (x)
bar ();
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment