Commit 46dc0789 by Mircea Namolaru Committed by Revital Eres

Modulo-scheduling improvements. Patch 2 of 2

Co-Authored-By: Andrey Belevantsev <abel@ispras.ru>
Co-Authored-By: Revital Eres <eres@il.ibm.com>
Co-Authored-By: Vladimir Yanovsky <yanov@il.ibm.com>

From-SVN: r127848
parent 7368348c
2007-08-28 Mircea Namolaru <namolaru@il.ibm.com>
Vladimir Yanovsky <yanov@il.ibm.com>
Revital Eres <eres@il.ibm.com>
Andrey Belevantsev <abel@ispras.ru>
* config/spu/spu.md: Recognize doloop pattern when -fmodulo-sched
is set.
* modulo-sched.c: Add documentation regarding do-loop.
(doloop_register_get): Change number of arguments to support
the new do-loop pattern and check whether COUNT_REG has no other
occurences in the loop besides in the control part.
(duplicate_insns_of_cycles): Do not duplicate the insn which
changes count_reg as it is already adjusted.
(generate_prolog_epilog): New argument to support the new
do-loop pattern. Change the subtract instruction to use
expand_simple_binop. Call duplicate_insns_of_cycles with new
argument.
(sms_schedule): Call doloop_register_get and
generate_prolog_epilog with new argument. Do not handle loops
with single sets insns with subreg in their lhs.
* loop-doloop.c (doloop_optimize): Support for another do-loop
pattern.
(doloop_condition_get): Gets an instruction instead of a pattern
and change the return condition when the do-loop pattern is
not parallel.
* ddg.c (create_ddg_dep_from_intra_loop_link): Handle only reg
deps when considering to not create edges.
2007-08-27 Alexandre Oliva <aoliva@redhat.com>
* doc/extend.texi (gnu_inline funtion attribute): Document C++
......
......@@ -3887,6 +3887,48 @@ selb\t%0,%4,%0,%3"
[(set_attr "type" "br")])
;; Define the subtract-one-and-jump insns so loop.c
;; knows what to generate.
(define_expand "doloop_end"
[(use (match_operand 0 "" "")) ; loop pseudo
(use (match_operand 1 "" "")) ; iterations; zero if unknown
(use (match_operand 2 "" "")) ; max iterations
(use (match_operand 3 "" "")) ; loop level
(use (match_operand 4 "" ""))] ; label
""
"
{
/* Currently SMS relies on the do-loop pattern to recognize loops
where (1) the control part comprises of all insns defining and/or
using a certain 'count' register and (2) the loop count can be
adjusted by modifying this register prior to the loop.
. ??? The possible introduction of a new block to initialize the
new IV can potentially effects branch optimizations. */
if (optimize > 0 && flag_modulo_sched)
{
rtx s0;
rtx bcomp;
rtx loc_ref;
/* Only use this on innermost loops. */
if (INTVAL (operands[3]) > 1)
FAIL;
if (GET_MODE (operands[0]) != SImode)
FAIL;
s0 = operands [0];
emit_move_insn (s0, gen_rtx_PLUS (SImode, s0, GEN_INT (-1)));
bcomp = gen_rtx_NE(SImode, s0, const0_rtx);
loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands [4]);
emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
loc_ref, pc_rtx)));
DONE;
}
}")
;; convert between any two modes, avoiding any GCC assumptions
(define_expand "spu_convert"
[(set (match_operand 0 "spu_reg_operand" "")
......
......@@ -176,13 +176,17 @@ create_ddg_dep_from_intra_loop_link (ddg_ptr g, ddg_node_ptr src_node,
rtx set;
set = single_set (dest_node->insn);
if (set)
/* TODO: Handle registers that REG_P is not true for them, i.e.
subregs and special registers. */
if (set && REG_P (SET_DEST (set)))
{
int regno = REGNO (SET_DEST (set));
struct df_ref *first_def =
df_bb_regno_first_def_find (g->bb, regno);
struct df_ref *first_def;
struct df_rd_bb_info *bb_info = DF_RD_BB_INFO (g->bb);
first_def = df_bb_regno_first_def_find (g->bb, regno);
gcc_assert (first_def);
if (bitmap_bit_p (bb_info->gen, first_def->id))
return;
}
......
......@@ -69,35 +69,59 @@ along with GCC; see the file COPYING3. If not see
if it is not a decrement and branch jump insn. */
rtx
doloop_condition_get (rtx pattern)
doloop_condition_get (rtx doloop_pat)
{
rtx cmp;
rtx inc;
rtx reg;
rtx inc_src;
rtx condition;
rtx pattern;
/* The canonical doloop pattern we expect is:
/* The canonical doloop pattern we expect has one of the following
forms:
(parallel [(set (pc) (if_then_else (condition)
(label_ref (label))
(pc)))
(set (reg) (plus (reg) (const_int -1)))
(additional clobbers and uses)])
1) (parallel [(set (pc) (if_then_else (condition)
(label_ref (label))
(pc)))
(set (reg) (plus (reg) (const_int -1)))
(additional clobbers and uses)])
Some targets (IA-64) wrap the set of the loop counter in
an if_then_else too.
The branch must be the first entry of the parallel (also required
by jump.c), and the second entry of the parallel must be a set of
the loop counter register. Some targets (IA-64) wrap the set of
the loop counter in an if_then_else too.
In summary, the branch must be the first entry of the
parallel (also required by jump.c), and the second
entry of the parallel must be a set of the loop counter
register. */
2) (set (reg) (plus (reg) (const_int -1))
(set (pc) (if_then_else (reg != 0)
(label_ref (label))
(pc))). */
pattern = PATTERN (doloop_pat);
if (GET_CODE (pattern) != PARALLEL)
return 0;
{
rtx cond;
/* We expect the decrement to immediately precede the branch. */
cmp = XVECEXP (pattern, 0, 0);
inc = XVECEXP (pattern, 0, 1);
if ((PREV_INSN (doloop_pat) == NULL_RTX)
|| !INSN_P (PREV_INSN (doloop_pat)))
return 0;
cmp = pattern;
inc = PATTERN (PREV_INSN (doloop_pat));
/* We expect the condition to be of the form (reg != 0) */
cond = XEXP (SET_SRC (cmp), 0);
if (GET_CODE (cond) != NE || XEXP (cond, 1) != const0_rtx)
return 0;
}
else
{
cmp = XVECEXP (pattern, 0, 0);
inc = XVECEXP (pattern, 0, 1);
}
/* Check for (set (reg) (something)). */
if (GET_CODE (inc) != SET)
......@@ -139,7 +163,29 @@ doloop_condition_get (rtx pattern)
if ((XEXP (condition, 0) == reg)
|| (GET_CODE (XEXP (condition, 0)) == PLUS
&& XEXP (XEXP (condition, 0), 0) == reg))
{
if (GET_CODE (pattern) != PARALLEL)
/* The second form we expect:
(set (reg) (plus (reg) (const_int -1))
(set (pc) (if_then_else (reg != 0)
(label_ref (label))
(pc))).
is equivalent to the following:
(parallel [(set (pc) (if_then_else (reg != 1)
(label_ref (label))
(pc)))
(set (reg) (plus (reg) (const_int -1)))
(additional clobbers and uses)])
So we return that form instead.
*/
condition = gen_rtx_fmt_ee (NE, VOIDmode, inc_src, const1_rtx);
return condition;
}
/* ??? If a machine uses a funny comparison, we could return a
canonicalized form here. */
......@@ -597,9 +643,7 @@ doloop_optimize (struct loop *loop)
{
while (NEXT_INSN (doloop_pat) != NULL_RTX)
doloop_pat = NEXT_INSN (doloop_pat);
if (JUMP_P (doloop_pat))
doloop_pat = PATTERN (doloop_pat);
else
if (!JUMP_P (doloop_pat))
doloop_pat = NULL_RTX;
}
......
2007-08-28 Mircea Namolaru <namolaru@il.ibm.com>
Vladimir Yanovsky <yanov@il.ibm.com>
Revital Eres <eres@il.ibm.com>
Andrey Belevantsev <abel@ispras.ru>
* gcc.dg/sms-1.c: New test.
2007-08-27 Alexandre Oliva <aoliva@redhat.com>
* g++.dg/ext/gnu-inline-common.h: New.
/* The same test as loop-3c.c. It failed on ia64
due to not handling of subreg in the lhs that is fixed. */
/* { dg-do run } */
/* { dg-options "-O2 -fmodulo-sched -fmodulo-sched-allow-regmoves -w" } */
#include <limits.h>
void * a[255];
f (m)
{
int i;
int sh = 0x100;
i = m;
do
{
a[sh >>= 1] = ((unsigned)i << 3) + (char*)a;
i += 4;
}
while (i < INT_MAX/2 + 1 + 4 * 4);
}
main ()
{
a[0x10] = 0;
a[0x08] = 0;
f (INT_MAX/2 + INT_MAX/4 + 2);
if (a[0x10] || a[0x08])
abort ();
a[0x10] = 0;
a[0x08] = 0;
f (INT_MAX/2 + 1);
if (! a[0x10] || a[0x08])
abort ();
exit (0);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment