Commit 80cab2f0 by Jan Hubicka Committed by Jan Hubicka

i386.c (ix86_expand_movmem, [...]): Avoid processing too many bytes on misalligned blocks.


	* i386.c (ix86_expand_movmem, ix86_expand_movstr): Avoid processing too
	many bytes on misalligned blocks.

From-SVN: r140348
parent d1facce0
2008-09-13 Jan Hubicka <jh@suse.cz>
* i386.c (ix86_expand_movmem, ix86_expand_movstr): Avoid processing too
many bytes on misalligned blocks.
2008-09-13 Ralf Wildenhues <Ralf.Wildenhues@gmx.de> 2008-09-13 Ralf Wildenhues <Ralf.Wildenhues@gmx.de>
* doc/cfg.texi: Fix some typos. * doc/cfg.texi: Fix some typos.
......
...@@ -17222,6 +17222,7 @@ ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp, ...@@ -17222,6 +17222,7 @@ ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
int desired_align = 0; int desired_align = 0;
enum stringop_alg alg; enum stringop_alg alg;
int dynamic_check; int dynamic_check;
bool need_zero_guard = false;
if (CONST_INT_P (align_exp)) if (CONST_INT_P (align_exp))
align = INTVAL (align_exp); align = INTVAL (align_exp);
...@@ -17260,9 +17261,11 @@ ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp, ...@@ -17260,9 +17261,11 @@ ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
case no_stringop: case no_stringop:
gcc_unreachable (); gcc_unreachable ();
case loop: case loop:
need_zero_guard = true;
size_needed = GET_MODE_SIZE (Pmode); size_needed = GET_MODE_SIZE (Pmode);
break; break;
case unrolled_loop: case unrolled_loop:
need_zero_guard = true;
size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2); size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
break; break;
case rep_prefix_8_byte: case rep_prefix_8_byte:
...@@ -17272,7 +17275,10 @@ ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp, ...@@ -17272,7 +17275,10 @@ ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
size_needed = 4; size_needed = 4;
break; break;
case rep_prefix_1_byte: case rep_prefix_1_byte:
size_needed = 1;
break;
case loop_1_byte: case loop_1_byte:
need_zero_guard = true;
size_needed = 1; size_needed = 1;
break; break;
} }
...@@ -17350,6 +17356,19 @@ ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp, ...@@ -17350,6 +17356,19 @@ ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
dst = change_address (dst, BLKmode, destreg); dst = change_address (dst, BLKmode, destreg);
expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align, expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
desired_align); desired_align);
if (need_zero_guard && !count)
{
/* It is possible that we copied enough so the main loop will not
execute. */
emit_cmp_and_jump_insns (count_exp,
GEN_INT (size_needed),
LTU, 0, counter_mode (count_exp), 1, label);
if (expected_size == -1
|| expected_size < (desired_align - align) / 2 + size_needed)
predict_jump (REG_BR_PROB_BASE * 20 / 100);
else
predict_jump (REG_BR_PROB_BASE * 60 / 100);
}
} }
if (label && size_needed == 1) if (label && size_needed == 1)
{ {
...@@ -17550,6 +17569,7 @@ ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp, ...@@ -17550,6 +17569,7 @@ ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
rtx promoted_val = NULL; rtx promoted_val = NULL;
bool force_loopy_epilogue = false; bool force_loopy_epilogue = false;
int dynamic_check; int dynamic_check;
bool need_zero_guard = false;
if (CONST_INT_P (align_exp)) if (CONST_INT_P (align_exp))
align = INTVAL (align_exp); align = INTVAL (align_exp);
...@@ -17587,9 +17607,11 @@ ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp, ...@@ -17587,9 +17607,11 @@ ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
case no_stringop: case no_stringop:
gcc_unreachable (); gcc_unreachable ();
case loop: case loop:
need_zero_guard = true;
size_needed = GET_MODE_SIZE (Pmode); size_needed = GET_MODE_SIZE (Pmode);
break; break;
case unrolled_loop: case unrolled_loop:
need_zero_guard = true;
size_needed = GET_MODE_SIZE (Pmode) * 4; size_needed = GET_MODE_SIZE (Pmode) * 4;
break; break;
case rep_prefix_8_byte: case rep_prefix_8_byte:
...@@ -17599,7 +17621,10 @@ ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp, ...@@ -17599,7 +17621,10 @@ ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
size_needed = 4; size_needed = 4;
break; break;
case rep_prefix_1_byte: case rep_prefix_1_byte:
size_needed = 1;
break;
case loop_1_byte: case loop_1_byte:
need_zero_guard = true;
size_needed = 1; size_needed = 1;
break; break;
} }
...@@ -17675,6 +17700,19 @@ ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp, ...@@ -17675,6 +17700,19 @@ ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
dst = change_address (dst, BLKmode, destreg); dst = change_address (dst, BLKmode, destreg);
expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align, expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
desired_align); desired_align);
if (need_zero_guard && !count)
{
/* It is possible that we copied enough so the main loop will not
execute. */
emit_cmp_and_jump_insns (count_exp,
GEN_INT (size_needed),
LTU, 0, counter_mode (count_exp), 1, label);
if (expected_size == -1
|| expected_size < (desired_align - align) / 2 + size_needed)
predict_jump (REG_BR_PROB_BASE * 20 / 100);
else
predict_jump (REG_BR_PROB_BASE * 60 / 100);
}
} }
if (label && size_needed == 1) if (label && size_needed == 1)
{ {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment