Commit 40ac4f73 by Christian Borntraeger Committed by Andreas Krebbel

Implement target hook for loop unrolling

2010-04-08  Christian Borntraeger  <borntraeger@de.ibm.com>
            Wolfgang Gellerich  <gellerich@de.ibm.com>

        Implement target hook for loop unrolling
        * target.h (loop_unroll_adjust): Add a new target hook function.
        * target-def.h (TARGET_LOOP_UNROLL_ADJUST): Likewise.
        * doc/tm.texi (TARGET_LOOP_UNROLL_ADJUST): Document it.
        * config/s390/s390.c (TARGET_LOOP_UNROLL_ADJUST): Define it.
        (s390_loop_unroll_adjust): Implement the new target hook for s390.
        * loop-unroll.c (decide_unroll_runtime_iterations): Call loop unroll target hook
        (decide_unroll_stupid): Likewise.


Co-Authored-By: Wolfgang Gellerich <gellerich@de.ibm.com>

From-SVN: r158132
parent 07c60ef7
2010-04-08 Christian Borntraeger <borntraeger@de.ibm.com>
Wolfgang Gellerich <gellerich@de.ibm.com>
Implement target hook for loop unrolling
* target.h (loop_unroll_adjust): Add a new target hook function.
* target-def.h (TARGET_LOOP_UNROLL_ADJUST): Likewise.
* doc/tm.texi (TARGET_LOOP_UNROLL_ADJUST): Document it.
* config/s390/s390.c (TARGET_LOOP_UNROLL_ADJUST): Define it.
(s390_loop_unroll_adjust): Implement the new target hook for s390.
* loop-unroll.c (decide_unroll_runtime_iterations): Call loop unroll target hook
(decide_unroll_stupid): Likewise.
2010-04-08 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE> 2010-04-08 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE>
PR target/43643 PR target/43643
......
...@@ -53,6 +53,7 @@ along with GCC; see the file COPYING3. If not see ...@@ -53,6 +53,7 @@ along with GCC; see the file COPYING3. If not see
#include "gimple.h" #include "gimple.h"
#include "df.h" #include "df.h"
#include "params.h" #include "params.h"
#include "cfgloop.h"
/* Define the specific costs for a given cpu. */ /* Define the specific costs for a given cpu. */
...@@ -10245,6 +10246,62 @@ s390_sched_init (FILE *file ATTRIBUTE_UNUSED, ...@@ -10245,6 +10246,62 @@ s390_sched_init (FILE *file ATTRIBUTE_UNUSED,
last_scheduled_insn = NULL_RTX; last_scheduled_insn = NULL_RTX;
} }
/* This function checks the whole of insn X for memory references. The
function always returns zero because the framework it is called
from would stop recursively analyzing the insn upon a return value
other than zero. The real result of this function is updating
counter variable MEM_COUNT. */
static int
check_dpu (rtx *x, unsigned *mem_count)
{
if (*x != NULL_RTX && MEM_P (*x))
(*mem_count)++;
return 0;
}
/* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
a new number struct loop *loop should be unrolled if tuned for the z10
cpu. The loop is analyzed for memory accesses by calling check_dpu for
each rtx of the loop. Depending on the loop_depth and the amount of
memory accesses a new number <=nunroll is returned to improve the
behaviour of the hardware prefetch unit. */
static unsigned
s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
{
basic_block *bbs;
rtx insn;
unsigned i;
unsigned mem_count = 0;
/* Only z10 needs special handling. */
if (s390_tune != PROCESSOR_2097_Z10)
return nunroll;
/* Count the number of memory references within the loop body. */
bbs = get_loop_body (loop);
for (i = 0; i < loop->num_nodes; i++)
{
for (insn = BB_HEAD (bbs[i]); insn != BB_END (bbs[i]); insn = NEXT_INSN (insn))
if (INSN_P (insn) && INSN_CODE (insn) != -1)
for_each_rtx (&insn, (rtx_function) check_dpu, &mem_count);
}
free (bbs);
/* Prevent division by zero, and we do not need to adjust nunroll in this case. */
if (mem_count == 0)
return nunroll;
switch (loop_depth(loop))
{
case 1:
return MIN (nunroll, 28 / mem_count);
case 2:
return MIN (nunroll, 22 / mem_count);
default:
return MIN (nunroll, 16 / mem_count);
}
}
/* Initialize GCC target structure. */ /* Initialize GCC target structure. */
#undef TARGET_ASM_ALIGNED_HI_OP #undef TARGET_ASM_ALIGNED_HI_OP
...@@ -10373,6 +10430,9 @@ s390_sched_init (FILE *file ATTRIBUTE_UNUSED, ...@@ -10373,6 +10430,9 @@ s390_sched_init (FILE *file ATTRIBUTE_UNUSED,
#undef TARGET_CAN_ELIMINATE #undef TARGET_CAN_ELIMINATE
#define TARGET_CAN_ELIMINATE s390_can_eliminate #define TARGET_CAN_ELIMINATE s390_can_eliminate
#undef TARGET_LOOP_UNROLL_ADJUST
#define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust
#undef TARGET_ASM_TRAMPOLINE_TEMPLATE #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
#define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template #define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template
#undef TARGET_TRAMPOLINE_INIT #undef TARGET_TRAMPOLINE_INIT
......
...@@ -10881,6 +10881,15 @@ This target hook is required only when the target has several different ...@@ -10881,6 +10881,15 @@ This target hook is required only when the target has several different
modes and they have different conditional execution capability, such as ARM. modes and they have different conditional execution capability, such as ARM.
@end deftypefn @end deftypefn
@deftypefn {Target Hook} unsigned TARGET_LOOP_UNROLL_ADJUST (unsigned @var{nunroll}, struct loop *@var{loop})
This target hook returns a new value for the number of times @var{loop}
should be unrolled. The parameter @var{nunroll} is the number of times
the loop is to be unrolled. The parameter @var{loop} is a pointer to
the loop, which is going to be checked for unrolling. This target hook
is required only when the target has special constraints like maximum
number of memory accesses.
@end deftypefn
@defmac POWI_MAX_MULTS @defmac POWI_MAX_MULTS
If defined, this macro is interpreted as a signed integer C expression If defined, this macro is interpreted as a signed integer C expression
that specifies the maximum number of floating point multiplications that specifies the maximum number of floating point multiplications
......
...@@ -33,6 +33,7 @@ along with GCC; see the file COPYING3. If not see ...@@ -33,6 +33,7 @@ along with GCC; see the file COPYING3. If not see
#include "expr.h" #include "expr.h"
#include "hashtab.h" #include "hashtab.h"
#include "recog.h" #include "recog.h"
#include "target.h"
/* This pass performs loop unrolling and peeling. We only perform these /* This pass performs loop unrolling and peeling. We only perform these
optimizations on innermost loops (with single exception) because optimizations on innermost loops (with single exception) because
...@@ -826,6 +827,9 @@ decide_unroll_runtime_iterations (struct loop *loop, int flags) ...@@ -826,6 +827,9 @@ decide_unroll_runtime_iterations (struct loop *loop, int flags)
if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES)) if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES); nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
if (targetm.loop_unroll_adjust)
nunroll = targetm.loop_unroll_adjust (nunroll, loop);
/* Skip big loops. */ /* Skip big loops. */
if (nunroll <= 1) if (nunroll <= 1)
{ {
...@@ -1366,6 +1370,9 @@ decide_unroll_stupid (struct loop *loop, int flags) ...@@ -1366,6 +1370,9 @@ decide_unroll_stupid (struct loop *loop, int flags)
if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES)) if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES))
nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES); nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
if (targetm.loop_unroll_adjust)
nunroll = targetm.loop_unroll_adjust (nunroll, loop);
/* Skip big loops. */ /* Skip big loops. */
if (nunroll <= 1) if (nunroll <= 1)
{ {
......
...@@ -545,6 +545,7 @@ ...@@ -545,6 +545,7 @@
default_branch_target_register_class default_branch_target_register_class
#define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED hook_bool_bool_false #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED hook_bool_bool_false
#define TARGET_HAVE_CONDITIONAL_EXECUTION default_have_conditional_execution #define TARGET_HAVE_CONDITIONAL_EXECUTION default_have_conditional_execution
#define TARGET_LOOP_UNROLL_ADJUST NULL
#define TARGET_CANNOT_FORCE_CONST_MEM hook_bool_rtx_false #define TARGET_CANNOT_FORCE_CONST_MEM hook_bool_rtx_false
#define TARGET_CANNOT_COPY_INSN_P NULL #define TARGET_CANNOT_COPY_INSN_P NULL
#define TARGET_COMMUTATIVE_P hook_bool_const_rtx_commutative_p #define TARGET_COMMUTATIVE_P hook_bool_const_rtx_commutative_p
...@@ -947,6 +948,7 @@ ...@@ -947,6 +948,7 @@
TARGET_BRANCH_TARGET_REGISTER_CLASS, \ TARGET_BRANCH_TARGET_REGISTER_CLASS, \
TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED, \ TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED, \
TARGET_HAVE_CONDITIONAL_EXECUTION, \ TARGET_HAVE_CONDITIONAL_EXECUTION, \
TARGET_LOOP_UNROLL_ADJUST, \
TARGET_CANNOT_FORCE_CONST_MEM, \ TARGET_CANNOT_FORCE_CONST_MEM, \
TARGET_CANNOT_COPY_INSN_P, \ TARGET_CANNOT_COPY_INSN_P, \
TARGET_COMMUTATIVE_P, \ TARGET_COMMUTATIVE_P, \
......
...@@ -97,6 +97,9 @@ struct _dep; ...@@ -97,6 +97,9 @@ struct _dep;
/* This is defined in ddg.h . */ /* This is defined in ddg.h . */
struct ddg; struct ddg;
/* This is defined in cfgloop.h . */
struct loop;
/* Assembler instructions for creating various kinds of integer object. */ /* Assembler instructions for creating various kinds of integer object. */
struct asm_int_op struct asm_int_op
...@@ -637,6 +640,9 @@ struct gcc_target ...@@ -637,6 +640,9 @@ struct gcc_target
/* Return true if the target supports conditional execution. */ /* Return true if the target supports conditional execution. */
bool (* have_conditional_execution) (void); bool (* have_conditional_execution) (void);
/* Return a new value for loop unroll size. */
unsigned (* loop_unroll_adjust) (unsigned nunroll, struct loop *loop);
/* True if the constant X cannot be placed in the constant pool. */ /* True if the constant X cannot be placed in the constant pool. */
bool (* cannot_force_const_mem) (rtx); bool (* cannot_force_const_mem) (rtx);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment