Commit 8eed4721 by Andreas Krebbel Committed by Andreas Krebbel

S/390: Don't unroll memory blk op loops

gcc/ChangeLog:

2018-08-06  Andreas Krebbel  <krebbel@linux.ibm.com>

	* config/s390/s390.c (s390_loop_unroll_adjust): Prevent small
	loops with memory block operations from getting unrolled.

gcc/testsuite/ChangeLog:

2018-08-06  Andreas Krebbel  <krebbel@linux.ibm.com>

	* gcc.target/s390/nomemloopunroll-1.c: New test.

From-SVN: r263336
parent 2f171da9
2018-08-06 Andreas Krebbel <krebbel@linux.ibm.com>
* config/s390/s390.c (s390_loop_unroll_adjust): Prevent small
loops with memory block operations from getting unrolled.
2018-08-06 Ulrich Weigand <uweigand@de.ibm.com> 2018-08-06 Ulrich Weigand <uweigand@de.ibm.com>
PR target/86807 PR target/86807
......
...@@ -390,6 +390,11 @@ static unsigned vfu_longrunning[NUM_SIDES]; ...@@ -390,6 +390,11 @@ static unsigned vfu_longrunning[NUM_SIDES];
base and index are registers of the class ADDR_REGS, base and index are registers of the class ADDR_REGS,
displacement is an unsigned 12-bit immediate constant. */ displacement is an unsigned 12-bit immediate constant. */
/* The max number of insns of backend generated memset/memcpy/memcmp
loops. This value is used in the unroll adjust hook to detect such
loops. Current max is 9 coming from the memcmp loop. */
#define BLOCK_MEM_OPS_LOOP_INSNS 9
struct s390_address struct s390_address
{ {
rtx base; rtx base;
...@@ -15385,9 +15390,29 @@ s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop) ...@@ -15385,9 +15390,29 @@ s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
for (i = 0; i < loop->num_nodes; i++) for (i = 0; i < loop->num_nodes; i++)
FOR_BB_INSNS (bbs[i], insn) FOR_BB_INSNS (bbs[i], insn)
if (INSN_P (insn) && INSN_CODE (insn) != -1) if (INSN_P (insn) && INSN_CODE (insn) != -1)
FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST) {
if (MEM_P (*iter)) rtx set;
mem_count += 1;
/* The runtime of small loops with memory block operations
will be determined by the memory operation. Doing
unrolling doesn't help here. Measurements to confirm
this where only done on recent CPU levels. So better do
not change anything for older CPUs. */
if (s390_tune >= PROCESSOR_2964_Z13
&& loop->ninsns <= BLOCK_MEM_OPS_LOOP_INSNS
&& ((set = single_set (insn)) != NULL_RTX)
&& ((GET_MODE (SET_DEST (set)) == BLKmode
&& (GET_MODE (SET_SRC (set)) == BLKmode
|| SET_SRC (set) == const0_rtx))
|| (GET_CODE (SET_SRC (set)) == COMPARE
&& GET_MODE (XEXP (SET_SRC (set), 0)) == BLKmode
&& GET_MODE (XEXP (SET_SRC (set), 1)) == BLKmode)))
return 1;
FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
if (MEM_P (*iter))
mem_count += 1;
}
free (bbs); free (bbs);
/* Prevent division by zero, and we do not need to adjust nunroll in this case. */ /* Prevent division by zero, and we do not need to adjust nunroll in this case. */
......
2018-08-06 Andreas Krebbel <krebbel@linux.ibm.com>
* gcc.target/s390/nomemloopunroll-1.c: New test.
2018-08-05 H.J. Lu <hongjiu.lu@intel.com> 2018-08-05 H.J. Lu <hongjiu.lu@intel.com>
PR target/86386 PR target/86386
......
/* { dg-do compile } */
/* { dg-options "-O3 -funroll-loops -march=z13" } */
/* 2x mvc */
void *
foo (char *a, int c, long len)
{
return __builtin_memset (a, c, len);
}
/* 2x mvc */
void
bar (char *a, char *b)
{
__builtin_memcpy (a, b, 30000);
}
/* 2x clc */
int
baz (char *a, char *b)
{
return __builtin_memcmp (a, b, 30000);
}
/* { dg-final { scan-assembler-times "\\\smvc\\\s" 4 } } */
/* { dg-final { scan-assembler-times "\\\sclc\\\s" 2 } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment