Commit 587790e6 by Andreas Krebbel Committed by Andreas Krebbel

S/390: memset: Avoid overlapping MVC operands between iterations.

A memset with a value != 0 is currently implemented using the mvc
instruction propagating the first byte through 256 byte blocks.  While
for the first mvc the byte is written with a separate instruction
subsequent MVCs used the last byte of the previous 256 byte block.

Starting with z13 this causes a major performance degradation.  With
this patch we always set the first byte with an mvi or stc in order to
avoid the overlapping of the MVC operands between loop iterations.

On older machines this basically makes no measurable difference so the
patch enables the new behavior for all machine levels in order to make
sure that code built for older machine levels runs well when moved to
a z13.

Bootstrapped and regression tested on s390 and s390x using z900 and z13
as default -march level. No regressions.

gcc/ChangeLog:

2017-01-05  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>

	* config/s390/s390.c (s390_expand_setmem): Avoid overlapping bytes
	between loop iterations.

From-SVN: r244096
parent 6ff92497
2017-01-05 Andreas Krebbel <krebbel@linux.vnet.ibm.com>
* config/s390/s390.c (s390_expand_setmem): Avoid overlapping bytes
between loop iterations.
2017-01-05 Martin Liska <mliska@suse.cz>
PR sanitizer/78815
......
......@@ -5346,6 +5346,8 @@ s390_expand_movmem (rtx dst, rtx src, rtx len)
void
s390_expand_setmem (rtx dst, rtx len, rtx val)
{
const int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
if (GET_CODE (len) == CONST_INT && INTVAL (len) == 0)
return;
......@@ -5391,13 +5393,14 @@ s390_expand_setmem (rtx dst, rtx len, rtx val)
{
rtx dst_addr, count, blocks, temp, dstp1 = NULL_RTX;
rtx_code_label *loop_start_label = gen_label_rtx ();
rtx_code_label *loop_end_label = gen_label_rtx ();
rtx_code_label *end_label = gen_label_rtx ();
rtx_code_label *onebyte_end_label = gen_label_rtx ();
rtx_code_label *zerobyte_end_label = gen_label_rtx ();
rtx_code_label *restbyte_end_label = gen_label_rtx ();
machine_mode mode;
mode = GET_MODE (len);
if (mode == VOIDmode)
mode = Pmode;
mode = Pmode;
dst_addr = gen_reg_rtx (Pmode);
count = gen_reg_rtx (mode);
......@@ -5405,39 +5408,56 @@ s390_expand_setmem (rtx dst, rtx len, rtx val)
convert_move (count, len, 1);
emit_cmp_and_jump_insns (count, const0_rtx,
EQ, NULL_RTX, mode, 1, end_label);
EQ, NULL_RTX, mode, 1, zerobyte_end_label,
very_unlikely);
/* We need to make a copy of the target address since memset is
supposed to return it unmodified. We have to make it here
already since the new reg is used at onebyte_end_label. */
emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
dst = change_address (dst, VOIDmode, dst_addr);
if (val == const0_rtx)
temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
OPTAB_DIRECT);
else
if (val != const0_rtx)
{
dstp1 = adjust_address (dst, VOIDmode, 1);
/* When using the overlapping mvc the original target
address is only accessed as single byte entity (even by
the mvc reading this value). */
set_mem_size (dst, 1);
/* Initialize memory by storing the first byte. */
emit_move_insn (adjust_address (dst, QImode, 0), val);
/* If count is 1 we are done. */
emit_cmp_and_jump_insns (count, const1_rtx,
EQ, NULL_RTX, mode, 1, end_label);
temp = expand_binop (mode, add_optab, count, GEN_INT (-2), count, 1,
OPTAB_DIRECT);
}
dstp1 = adjust_address (dst, VOIDmode, 1);
emit_cmp_and_jump_insns (count,
const1_rtx, EQ, NULL_RTX, mode, 1,
onebyte_end_label, very_unlikely);
}
/* There is one unconditional (mvi+mvc)/xc after the loop
dealing with the rest of the bytes, subtracting two (mvi+mvc)
or one (xc) here leaves this number of bytes to be handled by
it. */
temp = expand_binop (mode, add_optab, count,
val == const0_rtx ? constm1_rtx : GEN_INT (-2),
count, 1, OPTAB_DIRECT);
if (temp != count)
emit_move_insn (count, temp);
emit_move_insn (count, temp);
temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
OPTAB_DIRECT);
if (temp != blocks)
emit_move_insn (blocks, temp);
emit_move_insn (blocks, temp);
emit_cmp_and_jump_insns (blocks, const0_rtx,
EQ, NULL_RTX, mode, 1, loop_end_label);
EQ, NULL_RTX, mode, 1, restbyte_end_label);
emit_jump (loop_start_label);
if (val != const0_rtx)
{
/* The 1 byte != 0 special case. Not handled efficiently
since we require two jumps for that. However, this
should be very rare. */
emit_label (onebyte_end_label);
emit_move_insn (adjust_address (dst, QImode, 0), val);
emit_jump (zerobyte_end_label);
}
emit_label (loop_start_label);
......@@ -5455,26 +5475,39 @@ s390_expand_setmem (rtx dst, rtx len, rtx val)
if (val == const0_rtx)
emit_insn (gen_clrmem_short (dst, GEN_INT (255)));
else
emit_insn (gen_movmem_short (dstp1, dst, GEN_INT (255)));
{
/* Set the first byte in the block to the value and use an
overlapping mvc for the block. */
emit_move_insn (adjust_address (dst, QImode, 0), val);
emit_insn (gen_movmem_short (dstp1, dst, GEN_INT (254)));
}
s390_load_address (dst_addr,
gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
OPTAB_DIRECT);
if (temp != blocks)
emit_move_insn (blocks, temp);
emit_move_insn (blocks, temp);
emit_cmp_and_jump_insns (blocks, const0_rtx,
EQ, NULL_RTX, mode, 1, loop_end_label);
NE, NULL_RTX, mode, 1, loop_start_label);
emit_jump (loop_start_label);
emit_label (loop_end_label);
emit_label (restbyte_end_label);
if (val == const0_rtx)
emit_insn (gen_clrmem_short (dst, convert_to_mode (Pmode, count, 1)));
emit_insn (gen_clrmem_short (dst, convert_to_mode (Pmode, count, 1)));
else
emit_insn (gen_movmem_short (dstp1, dst, convert_to_mode (Pmode, count, 1)));
emit_label (end_label);
{
/* Set the first byte in the block to the value and use an
overlapping mvc for the block. */
emit_move_insn (adjust_address (dst, QImode, 0), val);
/* execute only uses the lowest 8 bits of count that's
exactly what we need here. */
emit_insn (gen_movmem_short (dstp1, dst,
convert_to_mode (Pmode, count, 1)));
}
emit_label (zerobyte_end_label);
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment