Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
R
riscv-gcc-1
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
lvzhengyang
riscv-gcc-1
Commits
2130b7fb
Commit
2130b7fb
authored
Dec 21, 2000
by
Bernd Schmidt
Committed by
Bernd Schmidt
Dec 21, 2000
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ia64 specific scheduling bits
From-SVN: r38419
parent
5f446d21
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
1687 additions
and
255 deletions
+1687
-255
gcc/ChangeLog
+51
-0
gcc/Makefile.in
+2
-1
gcc/config/ia64/ia64-protos.h
+8
-0
gcc/config/ia64/ia64.c
+1441
-175
gcc/config/ia64/ia64.h
+35
-18
gcc/config/ia64/ia64.md
+146
-57
gcc/rtl.h
+2
-1
gcc/rtlanal.c
+2
-3
No files found.
gcc/ChangeLog
View file @
2130b7fb
2000-12-21 Bernd Schmidt <bernds@redhat.com>
* Makefile.in (out_object_file): Depend on sched-int.h.
* rtl.h (single_set_1): New macro.
(single_set_2): Renamed from single_set_1 and extra argument added.
* rtlanal.c (single_set_2): Likewise.
* config/ia64/ia64-protos.h (get_bundle_name, ia64_issue_rate,
ia64_adjust_cost, ia64_sched_init, ia64_sched_finish,
ia64_sched_reorder, ia64_sched_reorder2, ia64_variable_issue):
Declare.
* config/ia64/ia64.c: Include "sched-int.h".
(hard_regno_rename_ok): Also disallow renaming from the various
reg_save_* regs.
(ia64_safe_itanium_requiers_unit0, ia64_safe_itanium_class,
ia64_safe_type, init_insn_group_barriers, group_barrier_needed_p,
safe_group_barrier_needed_p, fixup_errata): New static functions.
(rtx_needs_barrier): Handle bundle selector and cycle display
insns.
(emit_insn_group_barriers): Accept additional FILE * arg. All
callers changed. Rework to only generate stop bits between
basic blocks that haven't been inserted by scheduling.
(struct bundle, struct ia64_packet): New structures.
(NR_BUNDLES, NR_PACKETS): New macros.
(bundle, packets, type_names): New static arrays.
(ia64_final_schedule): New variable.
(ia64_single_set, insn_matches_slot, ia64_emit_insn_before,
gen_nop_type, finish_last_head, rotate_one_bundle, rotate_two_bundles,
cycle_end_fill_slots, packet_matches_p, get_split, find_best_insn,
find_best_packet, itanium_reorder, dump_current_packet, schedule_stop):
New static functions.
(ia64_issue_rate, ia64_sched_init, ia64_sched_reorder,
ia64_sched_finish, ia64_sched_reorder2, ia64_variable_issue): New
functions.
(ia64_reorg): Perform a final scheduling pass.
* config/ia64/ia64.h (CONST_COSTS): Slightly increase SYMBOL_REF costs.
(MAX_CONDITIONAL_EXECUTE, ADJUST_COST, ISSUE_RATE, MD_SCHED_INIT,
MD_SCHED_REORDER, MD_SCHED_REORDER2, MD_SCHED_FINISH,
MD_SCHED_VARIABLE_ISSUE): Define macros.
(ia64_final_schedule): Declare variable.
* config/ia64/ia64.md (attr itanium_class): Add some more classes.
(attr type): Account for them.
(itanium_requires_unit0): New attribute.
(function units): Rewrite.
(some splitters): Don't create scheduling barriers here.
(gr_spill_internal, gr_restore_internal): Don't predicate the
pseudo-op.
(nop_m, nop_i, nop_f, nop_b, nop_x, cycle_display, cycle_display_1,
bundle_selector): New patterns.
(insn_group_barrier): Now has an operand.
2000-12-21 DJ Delorie <dj@redhat.com>
* dwarf2out.c (simple_decl_align_in_bits): new
...
...
gcc/Makefile.in
View file @
2130b7fb
...
...
@@ -1499,7 +1499,8 @@ dependence.o : dependence.c $(CONFIG_H) system.h $(RTL_H) $(TREE_H) \
$(out_object_file)
:
$(out_file) $(CONFIG_H) $(TREE_H) $(GGC_H)
\
$(RTL_H) $(REGS_H) hard-reg-set.h real.h insn-config.h conditions.h
\
insn-flags.h output.h $(INSN_ATTR_H) insn-codes.h system.h toplev.h function.h
insn-flags.h output.h $(INSN_ATTR_H) insn-codes.h system.h toplev.h
\
function.h sched-int.h
$(CC)
-c
$(ALL_CFLAGS)
$(ALL_CPPFLAGS)
$(INCLUDES)
\
$(out_file)
$(OUTPUT_OPTION)
...
...
gcc/config/ia64/ia64-protos.h
View file @
2130b7fb
...
...
@@ -92,6 +92,14 @@ extern enum reg_class ia64_secondary_reload_class PARAMS((enum reg_class,
rtx
));
extern
void
ia64_reorg
PARAMS
((
rtx
));
extern
void
process_for_unwind_directive
PARAMS
((
FILE
*
,
rtx
));
extern
const
char
*
get_bundle_name
PARAMS
((
int
));
extern
int
ia64_issue_rate
PARAMS
((
void
));
extern
int
ia64_adjust_cost
PARAMS
((
rtx
,
rtx
,
rtx
,
int
));
extern
void
ia64_sched_init
PARAMS
((
FILE
*
,
int
,
int
));
extern
void
ia64_sched_finish
PARAMS
((
FILE
*
,
int
));
extern
int
ia64_sched_reorder
PARAMS
((
FILE
*
,
int
,
rtx
*
,
int
*
,
int
));
extern
int
ia64_sched_reorder2
PARAMS
((
FILE
*
,
int
,
rtx
*
,
int
*
,
int
));
extern
int
ia64_variable_issue
PARAMS
((
FILE
*
,
int
,
rtx
,
int
));
#endif
/* RTX_CODE */
#ifdef TREE_CODE
...
...
gcc/config/ia64/ia64.c
View file @
2130b7fb
...
...
@@ -42,6 +42,7 @@ Boston, MA 02111-1307, USA. */
#include "ggc.h"
#include "basic-block.h"
#include "toplev.h"
#include "sched-int.h"
/* This is used for communication between ASM_OUTPUT_LABEL and
ASM_OUTPUT_LABELREF. */
...
...
@@ -114,7 +115,7 @@ static void fix_range PARAMS ((const char *));
static
void
ia64_add_gc_roots
PARAMS
((
void
));
static
void
ia64_init_machine_status
PARAMS
((
struct
function
*
));
static
void
ia64_mark_machine_status
PARAMS
((
struct
function
*
));
static
void
emit_insn_group_barriers
PARAMS
((
rtx
));
static
void
emit_insn_group_barriers
PARAMS
((
FILE
*
,
rtx
));
static
void
emit_predicate_relation_info
PARAMS
((
void
));
static
int
process_set
PARAMS
((
FILE
*
,
rtx
));
...
...
@@ -128,7 +129,6 @@ static rtx ia64_expand_lock_test_and_set PARAMS ((enum machine_mode,
tree
,
rtx
));
static
rtx
ia64_expand_lock_release
PARAMS
((
enum
machine_mode
,
tree
,
rtx
));
/* Return 1 if OP is a valid operand for the MEM of a CALL insn. */
int
...
...
@@ -2401,6 +2401,14 @@ ia64_hard_regno_rename_ok (from, to)
||
to
==
current_frame_info
.
reg_save_ar_lc
)
return
0
;
if
(
from
==
current_frame_info
.
reg_fp
||
from
==
current_frame_info
.
reg_save_b0
||
from
==
current_frame_info
.
reg_save_pr
||
from
==
current_frame_info
.
reg_save_ar_pfs
||
from
==
current_frame_info
.
reg_save_ar_unat
||
from
==
current_frame_info
.
reg_save_ar_lc
)
return
0
;
/* Don't use output registers outside the register frame. */
if
(
OUT_REGNO_P
(
to
)
&&
to
>=
OUT_REG
(
current_frame_info
.
n_output_regs
))
return
0
;
...
...
@@ -3674,6 +3682,40 @@ ia64_override_options ()
ia64_add_gc_roots
();
}
static
enum
attr_itanium_requires_unit0
ia64_safe_itanium_requires_unit0
PARAMS
((
rtx
));
static
enum
attr_itanium_class
ia64_safe_itanium_class
PARAMS
((
rtx
));
static
enum
attr_type
ia64_safe_type
PARAMS
((
rtx
));
static
enum
attr_itanium_requires_unit0
ia64_safe_itanium_requires_unit0
(
insn
)
rtx
insn
;
{
if
(
recog_memoized
(
insn
)
>=
0
)
return
get_attr_itanium_requires_unit0
(
insn
);
else
return
ITANIUM_REQUIRES_UNIT0_NO
;
}
static
enum
attr_itanium_class
ia64_safe_itanium_class
(
insn
)
rtx
insn
;
{
if
(
recog_memoized
(
insn
)
>=
0
)
return
get_attr_itanium_class
(
insn
);
else
return
ITANIUM_CLASS_UNKNOWN
;
}
static
enum
attr_type
ia64_safe_type
(
insn
)
rtx
insn
;
{
if
(
recog_memoized
(
insn
)
>=
0
)
return
get_attr_type
(
insn
);
else
return
TYPE_UNKNOWN
;
}
/* The following collection of routines emit instruction group stop bits as
necessary to avoid dependencies. */
...
...
@@ -3744,6 +3786,9 @@ static void rws_update PARAMS ((struct reg_write_state *, int,
static
int
rws_access_regno
PARAMS
((
int
,
struct
reg_flags
,
int
));
static
int
rws_access_reg
PARAMS
((
rtx
,
struct
reg_flags
,
int
));
static
int
rtx_needs_barrier
PARAMS
((
rtx
,
struct
reg_flags
,
int
));
static
void
init_insn_group_barriers
PARAMS
((
void
));
static
int
group_barrier_needed_p
PARAMS
((
rtx
));
static
int
safe_group_barrier_needed_p
PARAMS
((
rtx
));
/* Update *RWS for REGNO, which is being written by the current instruction,
with predicate PRED, and associated register flags in FLAGS. */
...
...
@@ -4189,6 +4234,8 @@ rtx_needs_barrier (x, flags, pred)
case
19
:
/* fetchadd_acq */
case
20
:
/* mov = ar.bsp */
case
21
:
/* flushrs */
case
22
:
/* bundle selector */
case
23
:
/* cycle display */
break
;
case
5
:
/* recip_approx */
...
...
@@ -4279,6 +4326,179 @@ rtx_needs_barrier (x, flags, pred)
return
need_barrier
;
}
/* Clear out the state for group_barrier_needed_p at the start of a
sequence of insns. */
static
void
init_insn_group_barriers
()
{
memset
(
rws_sum
,
0
,
sizeof
(
rws_sum
));
}
/* Cumulative info for the current instruction group. */
struct
reg_write_state
rws_sum
[
NUM_REGS
];
/* Given the current state, recorded by previous calls to this function,
determine whether a group barrier (a stop bit) is necessary before INSN.
Return nonzero if so. */
static
int
group_barrier_needed_p
(
insn
)
rtx
insn
;
{
rtx
pat
;
int
need_barrier
=
0
;
struct
reg_flags
flags
;
memset
(
&
flags
,
0
,
sizeof
(
flags
));
switch
(
GET_CODE
(
insn
))
{
case
NOTE
:
break
;
case
BARRIER
:
/* A barrier doesn't imply an instruction group boundary. */
break
;
case
CODE_LABEL
:
memset
(
rws_insn
,
0
,
sizeof
(
rws_insn
));
return
1
;
case
CALL_INSN
:
flags
.
is_branch
=
1
;
flags
.
is_sibcall
=
SIBLING_CALL_P
(
insn
);
memset
(
rws_insn
,
0
,
sizeof
(
rws_insn
));
need_barrier
=
rtx_needs_barrier
(
PATTERN
(
insn
),
flags
,
0
);
break
;
case
JUMP_INSN
:
flags
.
is_branch
=
1
;
/* FALLTHRU */
case
INSN
:
if
(
GET_CODE
(
PATTERN
(
insn
))
==
USE
||
GET_CODE
(
PATTERN
(
insn
))
==
CLOBBER
)
/* Don't care about USE and CLOBBER "insns"---those are used to
indicate to the optimizer that it shouldn't get rid of
certain operations. */
break
;
pat
=
PATTERN
(
insn
);
/* Ug. Hack hacks hacked elsewhere. */
switch
(
recog_memoized
(
insn
))
{
/* We play dependency tricks with the epilogue in order
to get proper schedules. Undo this for dv analysis. */
case
CODE_FOR_epilogue_deallocate_stack
:
pat
=
XVECEXP
(
pat
,
0
,
0
);
break
;
/* The pattern we use for br.cloop confuses the code above.
The second element of the vector is representative. */
case
CODE_FOR_doloop_end_internal
:
pat
=
XVECEXP
(
pat
,
0
,
1
);
break
;
/* Doesn't generate code. */
case
CODE_FOR_pred_rel_mutex
:
return
0
;
default
:
break
;
}
memset
(
rws_insn
,
0
,
sizeof
(
rws_insn
));
need_barrier
=
rtx_needs_barrier
(
pat
,
flags
,
0
);
/* Check to see if the previous instruction was a volatile
asm. */
if
(
!
need_barrier
)
need_barrier
=
rws_access_regno
(
REG_VOLATILE
,
flags
,
0
);
break
;
default
:
abort
();
}
return
need_barrier
;
}
/* Like group_barrier_needed_p, but do not clobber the current state. */
static
int
safe_group_barrier_needed_p
(
insn
)
rtx
insn
;
{
struct
reg_write_state
rws_saved
[
NUM_REGS
];
int
t
;
memcpy
(
rws_saved
,
rws_sum
,
NUM_REGS
*
sizeof
*
rws_saved
);
t
=
group_barrier_needed_p
(
insn
);
memcpy
(
rws_sum
,
rws_saved
,
NUM_REGS
*
sizeof
*
rws_saved
);
return
t
;
}
/* INSNS is an chain of instructions. Scan the chain, and insert stop bits
as necessary to eliminate dependendencies. */
static
void
emit_insn_group_barriers
(
dump
,
insns
)
FILE
*
dump
;
rtx
insns
;
{
rtx
insn
;
rtx
last_label
=
0
;
int
insns_since_last_label
=
0
;
init_insn_group_barriers
();
for
(
insn
=
insns
;
insn
;
insn
=
NEXT_INSN
(
insn
))
{
if
(
GET_CODE
(
insn
)
==
CODE_LABEL
)
{
if
(
insns_since_last_label
)
last_label
=
insn
;
insns_since_last_label
=
0
;
}
else
if
(
GET_CODE
(
insn
)
==
NOTE
&&
NOTE_LINE_NUMBER
(
insn
)
==
NOTE_INSN_BASIC_BLOCK
)
{
if
(
insns_since_last_label
)
last_label
=
insn
;
insns_since_last_label
=
0
;
}
else
if
(
GET_CODE
(
insn
)
==
INSN
&&
GET_CODE
(
PATTERN
(
insn
))
==
UNSPEC_VOLATILE
&&
XINT
(
PATTERN
(
insn
),
1
)
==
2
)
{
init_insn_group_barriers
();
last_label
=
0
;
}
else
if
(
INSN_P
(
insn
))
{
insns_since_last_label
=
1
;
if
(
group_barrier_needed_p
(
insn
))
{
if
(
last_label
)
{
if
(
dump
)
fprintf
(
dump
,
"Emitting stop before label %d
\n
"
,
INSN_UID
(
last_label
));
emit_insn_before
(
gen_insn_group_barrier
(
GEN_INT
(
3
)),
last_label
);
insn
=
last_label
;
}
init_insn_group_barriers
();
last_label
=
0
;
}
}
}
}
static
int
errata_find_address_regs
PARAMS
((
rtx
*
,
void
*
));
static
void
errata_emit_nops
PARAMS
((
rtx
));
static
void
fixup_errata
PARAMS
((
void
));
/* This structure is used to track some details about the previous insns
groups so we can determine if it may be necessary to insert NOPs to
workaround hardware errata. */
...
...
@@ -4291,20 +4511,6 @@ static struct group
/* Index into the last_group array. */
static
int
group_idx
;
static
void
emit_group_barrier_after
PARAMS
((
rtx
));
static
int
errata_find_address_regs
PARAMS
((
rtx
*
,
void
*
));
static
void
errata_emit_nops
PARAMS
((
rtx
));
/* Create a new group barrier, emit it after AFTER, and advance group_idx. */
static
void
emit_group_barrier_after
(
after
)
rtx
after
;
{
emit_insn_after
(
gen_insn_group_barrier
(),
after
);
group_idx
=
(
group_idx
+
1
)
%
3
;
memset
(
last_group
+
group_idx
,
0
,
sizeof
last_group
[
group_idx
]);
}
/* Called through for_each_rtx; determines if a hard register that was
conditionally set in the previous group is used as an address register.
It ensures that for_each_rtx returns 1 in that case. */
...
...
@@ -4395,197 +4601,1249 @@ errata_emit_nops (insn)
}
if
(
for_each_rtx
(
&
real_pat
,
errata_find_address_regs
,
NULL
))
{
emit_insn_before
(
gen_insn_group_barrier
(),
insn
);
emit_insn_before
(
gen_insn_group_barrier
(
GEN_INT
(
3
)
),
insn
);
emit_insn_before
(
gen_nop
(),
insn
);
emit_insn_before
(
gen_insn_group_barrier
(),
insn
);
emit_insn_before
(
gen_insn_group_barrier
(
GEN_INT
(
3
)
),
insn
);
}
}
/* INSNS is an chain of instructions. Scan the chain, and insert stop bits
as necessary to eliminate dependendencies. */
/* Emit extra nops if they are required to work around hardware errata. */
static
void
emit_insn_group_barriers
(
insns
)
rtx
insns
;
fixup_errata
()
{
rtx
insn
,
prev_insn
;
memset
(
rws_sum
,
0
,
sizeof
(
rws_sum
));
rtx
insn
;
group_idx
=
0
;
memset
(
last_group
,
0
,
sizeof
last_group
);
prev_insn
=
0
;
for
(
insn
=
insns
;
insn
;
insn
=
NEXT_INSN
(
insn
))
{
int
need_barrier
=
0
;
struct
reg_flags
flags
;
if
((
TARGET_B_STEP
||
TARGET_A_STEP
)
&&
INSN_P
(
insn
))
errata_emit_nops
(
insn
);
memset
(
&
flags
,
0
,
sizeof
(
flags
));
switch
(
GET_CODE
(
insn
))
{
case
NOTE
:
/* For very small loops we can wind up with extra stop bits
inside the loop because of not putting a stop after the
assignment to ar.lc before the loop label. */
/* ??? Ideally we'd do this for any register used in the first
insn group that's been written recently. */
if
(
NOTE_LINE_NUMBER
(
insn
)
==
NOTE_INSN_LOOP_BEG
)
for
(
insn
=
get_insns
();
insn
;
insn
=
NEXT_INSN
(
insn
))
{
need_barrier
=
rws_access_regno
(
AR_LC_REGNUM
,
flags
,
0
);
if
(
need_barrier
)
if
(
INSN_P
(
insn
)
&&
ia64_safe_type
(
insn
)
==
TYPE_S
)
{
emit_group_barrier_after
(
insn
);
memset
(
rws_sum
,
0
,
sizeof
(
rws_sum
));
prev_insn
=
NULL_RTX
;
group_idx
=
(
group_idx
+
1
)
%
3
;
memset
(
last_group
+
group_idx
,
0
,
sizeof
last_group
[
group_idx
]);
}
if
((
TARGET_B_STEP
||
TARGET_A_STEP
)
&&
INSN_P
(
insn
))
errata_emit_nops
(
insn
);
}
break
;
}
case
CALL_INSN
:
flags
.
is_branch
=
1
;
flags
.
is_sibcall
=
SIBLING_CALL_P
(
insn
);
memset
(
rws_insn
,
0
,
sizeof
(
rws_insn
));
need_barrier
=
rtx_needs_barrier
(
PATTERN
(
insn
),
flags
,
0
);
/* Instruction scheduling support. */
/* Describe one bundle. */
if
(
need_barrier
)
{
/* PREV_INSN null can happen if the very first insn is a
volatile asm. */
if
(
prev_insn
)
emit_group_barrier_after
(
prev_insn
);
memcpy
(
rws_sum
,
rws_insn
,
sizeof
(
rws_sum
));
}
struct
bundle
{
/* Zero if there's no possibility of a stop in this bundle other than
at the end, otherwise the position of the optional stop bit. */
int
possible_stop
;
/* The types of the three slots. */
enum
attr_type
t
[
3
];
/* The pseudo op to be emitted into the assembler output. */
const
char
*
name
;
};
/* A call must end a bundle, otherwise the assembler might pack
it in with a following branch and then the function return
goes to the wrong place. Do this unconditionally for
unconditional calls, simply because it (1) looks nicer and
(2) keeps the data structures more accurate for the insns
following the call. */
/* ??? A call doesn't have to end a bundle if it is followed by
a mutex call or branch. Two mutex calls/branches can be put in
the same bundle. */
#define NR_BUNDLES 10
need_barrier
=
1
;
if
(
GET_CODE
(
PATTERN
(
insn
))
==
COND_EXEC
)
{
rtx
next_insn
=
insn
;
enum
attr_type
type
=
TYPE_A
;
do
next_insn
=
next_nonnote_insn
(
next_insn
);
while
(
next_insn
&&
GET_CODE
(
next_insn
)
==
INSN
&&
(
GET_CODE
(
PATTERN
(
next_insn
))
==
USE
||
GET_CODE
(
PATTERN
(
next_insn
))
==
CLOBBER
));
/* A call ends a bundle if there is a stop bit after it,
or if it is followed by a non-B-type instruction.
In the later case, we can elide the stop bit, and get faster
code when the predicate is false. */
/* ??? The proper solution for this problem is to make gcc
explicitly bundle instructions. Then we don't need to
emit stop bits to force the assembler to start a new
bundle. */
/* Check the instruction type if it is not a branch or call. */
if
(
next_insn
&&
GET_CODE
(
next_insn
)
==
INSN
)
type
=
get_attr_type
(
next_insn
);
if
(
next_insn
&&
GET_CODE
(
next_insn
)
!=
JUMP_INSN
&&
GET_CODE
(
next_insn
)
!=
CALL_INSN
&&
type
!=
TYPE_B
&&
type
!=
TYPE_UNKNOWN
)
need_barrier
=
0
;
}
if
(
need_barrier
)
{
emit_group_barrier_after
(
insn
);
memset
(
rws_sum
,
0
,
sizeof
(
rws_sum
));
prev_insn
=
NULL_RTX
;
}
else
prev_insn
=
insn
;
break
;
/* A list of all available bundles. */
case
JUMP_INSN
:
flags
.
is_branch
=
1
;
/* FALLTHRU */
static
const
struct
bundle
bundle
[
NR_BUNDLES
]
=
{
{
2
,
{
TYPE_M
,
TYPE_I
,
TYPE_I
},
".mii"
},
{
1
,
{
TYPE_M
,
TYPE_M
,
TYPE_I
},
".mmi"
},
{
0
,
{
TYPE_M
,
TYPE_F
,
TYPE_I
},
".mfi"
},
{
0
,
{
TYPE_M
,
TYPE_M
,
TYPE_F
},
".mmf"
},
#if NR_BUNDLES == 10
{
0
,
{
TYPE_B
,
TYPE_B
,
TYPE_B
},
".bbb"
},
{
0
,
{
TYPE_M
,
TYPE_B
,
TYPE_B
},
".mbb"
},
#endif
{
0
,
{
TYPE_M
,
TYPE_I
,
TYPE_B
},
".mib"
},
{
0
,
{
TYPE_M
,
TYPE_M
,
TYPE_B
},
".mmb"
},
{
0
,
{
TYPE_M
,
TYPE_F
,
TYPE_B
},
".mfb"
},
/* .mfi needs to occur earlier than .mlx, so that we only generate it if
it matches an L type insn. Otherwise we'll try to generate L type
nops. */
{
0
,
{
TYPE_M
,
TYPE_L
,
TYPE_X
},
".mlx"
}
};
case
INSN
:
if
(
GET_CODE
(
PATTERN
(
insn
))
==
USE
)
/* Don't care about USE "insns"---those are used to
indicate to the optimizer that it shouldn't get rid of
certain operations. */
break
;
else
{
rtx
pat
=
PATTERN
(
insn
);
/* Describe a packet of instructions. Packets consist of two bundles that
are visible to the hardware in one scheduling window. */
/* Ug. Hack hacks hacked elsewhere. */
switch
(
recog_memoized
(
insn
))
{
/* We play dependency tricks with the epilogue in order
to get proper schedules. Undo this for dv analysis. */
case
CODE_FOR_epilogue_deallocate_stack
:
pat
=
XVECEXP
(
pat
,
0
,
0
);
break
;
struct
ia64_packet
{
const
struct
bundle
*
t1
,
*
t2
;
/* Precomputed value of the first split issue in this packet if a cycle
starts at its beginning. */
int
first_split
;
/* For convenience, the insn types are replicated here so we don't have
to go through T1 and T2 all the time. */
enum
attr_type
t
[
6
];
};
/* The pattern we use for br.cloop confuses the code above.
The second element of the vector is representative. */
case
CODE_FOR_doloop_end_internal
:
pat
=
XVECEXP
(
pat
,
0
,
1
);
break
;
/* An array containing all possible packets. */
#define NR_PACKETS (NR_BUNDLES * NR_BUNDLES)
static
struct
ia64_packet
packets
[
NR_PACKETS
];
/* Doesn't generate code. */
case
CODE_FOR_pred_rel_mutex
:
continue
;
/* Map attr_type to a string with the name. */
default
:
break
;
}
static
const
char
*
type_names
[]
=
{
"UNKNOWN"
,
"A"
,
"I"
,
"M"
,
"F"
,
"B"
,
"L"
,
"X"
,
"S"
};
memset
(
rws_insn
,
0
,
sizeof
(
rws_insn
));
need_barrier
|=
rtx_needs_barrier
(
pat
,
flags
,
0
);
/* Nonzero if we should insert stop bits into the schedule. */
int
ia64_final_schedule
=
0
;
static
rtx
ia64_single_set
PARAMS
((
rtx
));
static
int
insn_matches_slot
PARAMS
((
const
struct
ia64_packet
*
,
enum
attr_type
,
int
,
rtx
));
static
void
ia64_emit_insn_before
PARAMS
((
rtx
,
rtx
));
static
rtx
gen_nop_type
PARAMS
((
enum
attr_type
));
static
void
finish_last_head
PARAMS
((
FILE
*
,
int
));
static
void
rotate_one_bundle
PARAMS
((
FILE
*
));
static
void
rotate_two_bundles
PARAMS
((
FILE
*
));
static
void
cycle_end_fill_slots
PARAMS
((
FILE
*
));
static
int
packet_matches_p
PARAMS
((
const
struct
ia64_packet
*
,
int
,
int
*
));
static
int
get_split
PARAMS
((
const
struct
ia64_packet
*
,
int
));
static
int
find_best_insn
PARAMS
((
rtx
*
,
enum
attr_type
*
,
int
,
const
struct
ia64_packet
*
,
int
));
static
void
find_best_packet
PARAMS
((
int
*
,
const
struct
ia64_packet
**
,
rtx
*
,
enum
attr_type
*
,
int
));
static
int
itanium_reorder
PARAMS
((
FILE
*
,
rtx
*
,
rtx
*
,
int
));
static
void
dump_current_packet
PARAMS
((
FILE
*
));
static
void
schedule_stop
PARAMS
((
FILE
*
));
/* Map a bundle number to its pseudo-op. */
const
char
*
get_bundle_name
(
b
)
int
b
;
{
return
bundle
[
b
].
name
;
}
/* Check to see if the previous instruction was a volatile
asm. */
if
(
!
need_barrier
)
need_barrier
=
rws_access_regno
(
REG_VOLATILE
,
flags
,
0
);
/* Compute the slot which will cause a split issue in packet P if the
current cycle begins at slot BEGIN. */
if
(
need_barrier
)
static
int
itanium_split_issue
(
p
,
begin
)
const
struct
ia64_packet
*
p
;
int
begin
;
{
int
type_count
[
TYPE_S
];
int
i
;
int
split
=
6
;
if
(
begin
<
3
)
{
/* PREV_INSN null can happen if the very first insn is a
volatile asm. */
if
(
prev_insn
)
emit_group_barrier_after
(
prev_insn
);
memcpy
(
rws_sum
,
rws_insn
,
sizeof
(
rws_sum
));
}
prev_insn
=
insn
;
/* Always split before and after MMF. */
if
(
p
->
t
[
0
]
==
TYPE_M
&&
p
->
t
[
1
]
==
TYPE_M
&&
p
->
t
[
2
]
==
TYPE_F
)
return
3
;
if
(
p
->
t
[
3
]
==
TYPE_M
&&
p
->
t
[
4
]
==
TYPE_M
&&
p
->
t
[
5
]
==
TYPE_F
)
return
3
;
/* Always split after MBB and BBB. */
if
(
p
->
t
[
1
]
==
TYPE_B
)
return
3
;
/* Split after first bundle in MIB BBB combination. */
if
(
p
->
t
[
2
]
==
TYPE_B
&&
p
->
t
[
3
]
==
TYPE_B
)
return
3
;
}
break
;
case
BARRIER
:
/* A barrier doesn't imply an instruction group boundary. */
break
;
case
CODE_LABEL
:
/* Leave prev_insn alone so the barrier gets generated in front
of the label, if one is needed. */
break
;
default
:
abort
();
}
memset
(
type_count
,
0
,
sizeof
type_count
);
for
(
i
=
begin
;
i
<
split
;
i
++
)
{
enum
attr_type
t0
=
p
->
t
[
i
];
/* An MLX bundle reserves the same units as an MFI bundle. */
enum
attr_type
t
=
(
t0
==
TYPE_L
?
TYPE_F
:
t0
==
TYPE_X
?
TYPE_I
:
t0
);
int
max
=
(
t
==
TYPE_B
?
3
:
t
==
TYPE_F
?
1
:
2
);
if
(
type_count
[
t
]
==
max
)
return
i
;
type_count
[
t
]
++
;
}
return
split
;
}
/* Emit pseudo-ops for the assembler to describe predicate relations.
At present this assumes that we only consider predicate pairs to
be mutex, and that the assembler can deduce proper values from
/* Return the maximum number of instructions a cpu can issue. */
int
ia64_issue_rate
()
{
return
6
;
}
/* Helper function - like single_set, but look inside COND_EXEC. */
static
rtx
ia64_single_set
(
insn
)
rtx
insn
;
{
rtx
x
=
PATTERN
(
insn
);
if
(
GET_CODE
(
x
)
==
COND_EXEC
)
x
=
COND_EXEC_CODE
(
x
);
if
(
GET_CODE
(
x
)
==
SET
)
return
x
;
return
single_set_2
(
insn
,
x
);
}
/* Adjust the cost of a scheduling dependency. Return the new cost of
a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
int
ia64_adjust_cost
(
insn
,
link
,
dep_insn
,
cost
)
rtx
insn
,
link
,
dep_insn
;
int
cost
;
{
enum
attr_type
dep_type
;
enum
attr_itanium_class
dep_class
;
enum
attr_itanium_class
insn_class
;
rtx
dep_set
,
set
,
src
,
addr
;
if
(
GET_CODE
(
PATTERN
(
insn
))
==
CLOBBER
||
GET_CODE
(
PATTERN
(
insn
))
==
USE
||
GET_CODE
(
PATTERN
(
dep_insn
))
==
CLOBBER
||
GET_CODE
(
PATTERN
(
dep_insn
))
==
USE
/* @@@ Not accurate for indirect calls. */
||
GET_CODE
(
insn
)
==
CALL_INSN
||
ia64_safe_type
(
insn
)
==
TYPE_S
)
return
0
;
if
(
REG_NOTE_KIND
(
link
)
==
REG_DEP_OUTPUT
||
REG_NOTE_KIND
(
link
)
==
REG_DEP_ANTI
)
return
0
;
dep_type
=
ia64_safe_type
(
dep_insn
);
dep_class
=
ia64_safe_itanium_class
(
dep_insn
);
insn_class
=
ia64_safe_itanium_class
(
insn
);
/* Compares that feed a conditional branch can execute in the same
cycle. */
dep_set
=
ia64_single_set
(
dep_insn
);
set
=
ia64_single_set
(
insn
);
if
(
dep_type
!=
TYPE_F
&&
dep_set
&&
GET_CODE
(
SET_DEST
(
dep_set
))
==
REG
&&
PR_REG
(
REGNO
(
SET_DEST
(
dep_set
)))
&&
GET_CODE
(
insn
)
==
JUMP_INSN
)
return
0
;
if
(
dep_set
&&
GET_CODE
(
SET_DEST
(
dep_set
))
==
MEM
)
{
/* ??? Can't find any information in the documenation about whether
a sequence
st [rx] = ra
ld rb = [ry]
splits issue. Assume it doesn't. */
return
0
;
}
src
=
set
?
SET_SRC
(
set
)
:
0
;
addr
=
0
;
if
(
set
&&
GET_CODE
(
SET_DEST
(
set
))
==
MEM
)
addr
=
XEXP
(
SET_DEST
(
set
),
0
);
else
if
(
set
&&
GET_CODE
(
src
)
==
MEM
)
addr
=
XEXP
(
src
,
0
);
else
if
(
set
&&
GET_CODE
(
src
)
==
ZERO_EXTEND
&&
GET_CODE
(
XEXP
(
src
,
0
))
==
MEM
)
addr
=
XEXP
(
XEXP
(
src
,
0
),
0
);
else
if
(
set
&&
GET_CODE
(
src
)
==
UNSPEC
&&
XVECLEN
(
XEXP
(
src
,
0
),
0
)
>
0
&&
GET_CODE
(
XVECEXP
(
src
,
0
,
0
))
==
MEM
)
addr
=
XEXP
(
XVECEXP
(
src
,
0
,
0
),
0
);
if
(
addr
&&
GET_CODE
(
addr
)
==
POST_MODIFY
)
addr
=
XEXP
(
addr
,
0
);
set
=
ia64_single_set
(
dep_insn
);
if
((
dep_class
==
ITANIUM_CLASS_IALU
||
dep_class
==
ITANIUM_CLASS_ILOG
||
dep_class
==
ITANIUM_CLASS_LD
)
&&
(
insn_class
==
ITANIUM_CLASS_LD
||
insn_class
==
ITANIUM_CLASS_ST
))
{
if
(
!
addr
||
!
set
)
abort
();
/* This isn't completely correct - an IALU that feeds an address has
a latency of 1 cycle if it's issued in an M slot, but 2 cycles
otherwise. Unfortunately there's no good way to describe this. */
if
(
reg_overlap_mentioned_p
(
SET_DEST
(
set
),
addr
))
return
cost
+
1
;
}
if
((
dep_class
==
ITANIUM_CLASS_IALU
||
dep_class
==
ITANIUM_CLASS_ILOG
||
dep_class
==
ITANIUM_CLASS_LD
)
&&
(
insn_class
==
ITANIUM_CLASS_MMMUL
||
insn_class
==
ITANIUM_CLASS_MMSHF
||
insn_class
==
ITANIUM_CLASS_MMSHFI
))
return
3
;
if
(
dep_class
==
ITANIUM_CLASS_FMAC
&&
(
insn_class
==
ITANIUM_CLASS_FMISC
||
insn_class
==
ITANIUM_CLASS_FCVTFX
||
insn_class
==
ITANIUM_CLASS_XMPY
))
return
7
;
if
((
dep_class
==
ITANIUM_CLASS_FMAC
||
dep_class
==
ITANIUM_CLASS_FMISC
||
dep_class
==
ITANIUM_CLASS_FCVTFX
||
dep_class
==
ITANIUM_CLASS_XMPY
)
&&
insn_class
==
ITANIUM_CLASS_STF
)
return
8
;
if
((
dep_class
==
ITANIUM_CLASS_MMMUL
||
dep_class
==
ITANIUM_CLASS_MMSHF
||
dep_class
==
ITANIUM_CLASS_MMSHFI
)
&&
(
insn_class
==
ITANIUM_CLASS_LD
||
insn_class
==
ITANIUM_CLASS_ST
||
insn_class
==
ITANIUM_CLASS_IALU
||
insn_class
==
ITANIUM_CLASS_ILOG
||
insn_class
==
ITANIUM_CLASS_ISHF
))
return
4
;
return
cost
;
}
/* Describe the current state of the Itanium pipeline. */
static
struct
{
/* The first slot that is used in the current cycle. */
int
first_slot
;
/* The next slot to fill. */
int
cur
;
/* The packet we have selected for the current issue window. */
const
struct
ia64_packet
*
packet
;
/* The position of the split issue that occurs due to issue width
limitations (6 if there's no split issue). */
int
split
;
/* Record data about the insns scheduled so far in the same issue
window. The elements up to but not including FIRST_SLOT belong
to the previous cycle, the ones starting with FIRST_SLOT belong
to the current cycle. */
enum
attr_type
types
[
6
];
rtx
insns
[
6
];
int
stopbit
[
6
];
/* Nonzero if we decided to schedule a stop bit. */
int
last_was_stop
;
}
sched_data
;
/* Temporary arrays; they have enough elements to hold all insns that
can be ready at the same time while scheduling of the current block.
SCHED_READY can hold ready insns, SCHED_TYPES their types. */
static
rtx
*
sched_ready
;
static
enum
attr_type
*
sched_types
;
/* Determine whether an insn INSN of type ITYPE can fit into slot SLOT
of packet P. */
static
int
insn_matches_slot
(
p
,
itype
,
slot
,
insn
)
const
struct
ia64_packet
*
p
;
enum
attr_type
itype
;
int
slot
;
rtx
insn
;
{
enum
attr_itanium_requires_unit0
u0
;
enum
attr_type
stype
=
p
->
t
[
slot
];
if
(
insn
)
{
u0
=
ia64_safe_itanium_requires_unit0
(
insn
);
if
(
u0
==
ITANIUM_REQUIRES_UNIT0_YES
)
{
int
i
;
for
(
i
=
sched_data
.
first_slot
;
i
<
slot
;
i
++
)
if
(
p
->
t
[
i
]
==
stype
)
return
0
;
}
if
(
GET_CODE
(
insn
)
==
CALL_INSN
)
{
/* Reject calls in multiway branch packets. We want to limit
the number of multiway branches we generate (since the branch
predictor is limited), and this seems to work fairly well.
(If we didn't do this, we'd have to add another test here to
force calls into the third slot of the bundle.) */
if
(
slot
<
3
)
{
if
(
p
->
t
[
1
]
==
TYPE_B
)
return
0
;
}
else
{
if
(
p
->
t
[
4
]
==
TYPE_B
)
return
0
;
}
}
}
if
(
itype
==
stype
)
return
1
;
if
(
itype
==
TYPE_A
)
return
stype
==
TYPE_M
||
stype
==
TYPE_I
;
return
0
;
}
/* Like emit_insn_before, but skip cycle_display insns. This makes the
assembly output a bit prettier. */
static
void
ia64_emit_insn_before
(
insn
,
before
)
rtx
insn
,
before
;
{
rtx
prev
=
PREV_INSN
(
before
);
if
(
prev
&&
GET_CODE
(
prev
)
==
INSN
&&
GET_CODE
(
PATTERN
(
prev
))
==
UNSPEC
&&
XINT
(
PATTERN
(
prev
),
1
)
==
23
)
before
=
prev
;
emit_insn_before
(
insn
,
before
);
}
/* Generate a nop insn of the given type. Note we never generate L type
nops. */
static
rtx
gen_nop_type
(
t
)
enum
attr_type
t
;
{
switch
(
t
)
{
case
TYPE_M
:
return
gen_nop_m
();
case
TYPE_I
:
return
gen_nop_i
();
case
TYPE_B
:
return
gen_nop_b
();
case
TYPE_F
:
return
gen_nop_f
();
case
TYPE_X
:
return
gen_nop_x
();
default
:
abort
();
}
}
/* When rotating a bundle out of the issue window, insert a bundle selector
insn in front of it. DUMP is the scheduling dump file or NULL. START
is either 0 or 3, depending on whether we want to emit a bundle selector
for the first bundle or the second bundle in the current issue window.
The selector insns are emitted this late because the selected packet can
be changed until parts of it get rotated out. */
static
void
finish_last_head
(
dump
,
start
)
FILE
*
dump
;
int
start
;
{
const
struct
ia64_packet
*
p
=
sched_data
.
packet
;
const
struct
bundle
*
b
=
start
==
0
?
p
->
t1
:
p
->
t2
;
int
bundle_type
=
b
-
bundle
;
rtx
insn
;
int
i
;
if
(
!
ia64_final_schedule
)
return
;
for
(
i
=
start
;
sched_data
.
insns
[
i
]
==
0
;
i
++
)
if
(
i
==
start
+
3
)
abort
();
insn
=
sched_data
.
insns
[
i
];
if
(
dump
)
fprintf
(
dump
,
"// Emitting template before %d: %s
\n
"
,
INSN_UID
(
insn
),
b
->
name
);
ia64_emit_insn_before
(
gen_bundle_selector
(
GEN_INT
(
bundle_type
)),
insn
);
}
/* We can't schedule more insns this cycle. Fix up the scheduling state
and advance FIRST_SLOT and CUR.
We have to distribute the insns that are currently found between
FIRST_SLOT and CUR into the slots of the packet we have selected. So
far, they are stored successively in the fields starting at FIRST_SLOT;
now they must be moved to the correct slots.
DUMP is the current scheduling dump file, or NULL. */
static
void
cycle_end_fill_slots
(
dump
)
FILE
*
dump
;
{
const
struct
ia64_packet
*
packet
=
sched_data
.
packet
;
int
slot
,
i
;
enum
attr_type
tmp_types
[
6
];
rtx
tmp_insns
[
6
];
memcpy
(
tmp_types
,
sched_data
.
types
,
6
*
sizeof
(
enum
attr_type
));
memcpy
(
tmp_insns
,
sched_data
.
insns
,
6
*
sizeof
(
rtx
));
for
(
i
=
slot
=
sched_data
.
first_slot
;
i
<
sched_data
.
cur
;
i
++
)
{
enum
attr_type
t
=
tmp_types
[
i
];
if
(
t
!=
ia64_safe_type
(
tmp_insns
[
i
]))
abort
();
while
(
!
insn_matches_slot
(
packet
,
t
,
slot
,
tmp_insns
[
i
]))
{
if
(
slot
>
sched_data
.
split
)
abort
();
if
(
dump
)
fprintf
(
dump
,
"// Packet needs %s, have %s
\n
"
,
type_names
[
packet
->
t
[
slot
]],
type_names
[
t
]);
sched_data
.
types
[
slot
]
=
packet
->
t
[
slot
];
sched_data
.
insns
[
slot
]
=
0
;
sched_data
.
stopbit
[
slot
]
=
0
;
slot
++
;
}
/* Do _not_ use T here. If T == TYPE_A, then we'd risk changing the
actual slot type later. */
sched_data
.
types
[
slot
]
=
packet
->
t
[
slot
];
sched_data
.
insns
[
slot
]
=
tmp_insns
[
i
];
sched_data
.
stopbit
[
slot
]
=
0
;
slot
++
;
}
/* This isn't right - there's no need to pad out until the forced split;
the CPU will automatically split if an insn isn't ready. */
#if 0
while (slot < sched_data.split)
{
sched_data.types[slot] = packet->t[slot];
sched_data.insns[slot] = 0;
sched_data.stopbit[slot] = 0;
slot++;
}
#endif
sched_data
.
first_slot
=
sched_data
.
cur
=
slot
;
}
/* Bundle rotations, as described in the Itanium optimization manual.
We can rotate either one or both bundles out of the issue window.
DUMP is the current scheduling dump file, or NULL. */
static
void
rotate_one_bundle
(
dump
)
FILE
*
dump
;
{
if
(
dump
)
fprintf
(
dump
,
"// Rotating one bundle.
\n
"
);
finish_last_head
(
dump
,
0
);
if
(
sched_data
.
cur
>
3
)
{
sched_data
.
cur
-=
3
;
sched_data
.
first_slot
-=
3
;
memmove
(
sched_data
.
types
,
sched_data
.
types
+
3
,
sched_data
.
cur
*
sizeof
*
sched_data
.
types
);
memmove
(
sched_data
.
stopbit
,
sched_data
.
stopbit
+
3
,
sched_data
.
cur
*
sizeof
*
sched_data
.
stopbit
);
memmove
(
sched_data
.
insns
,
sched_data
.
insns
+
3
,
sched_data
.
cur
*
sizeof
*
sched_data
.
insns
);
}
else
{
sched_data
.
cur
=
0
;
sched_data
.
first_slot
=
0
;
}
}
static
void
rotate_two_bundles
(
dump
)
FILE
*
dump
;
{
if
(
dump
)
fprintf
(
dump
,
"// Rotating two bundles.
\n
"
);
if
(
sched_data
.
cur
==
0
)
return
;
finish_last_head
(
dump
,
0
);
if
(
sched_data
.
cur
>
3
)
finish_last_head
(
dump
,
3
);
sched_data
.
cur
=
0
;
sched_data
.
first_slot
=
0
;
}
/* We're beginning a new block. Initialize data structures as necessary. */
void
ia64_sched_init
(
dump
,
sched_verbose
,
max_ready
)
FILE
*
dump
ATTRIBUTE_UNUSED
;
int
sched_verbose
ATTRIBUTE_UNUSED
;
int
max_ready
;
{
static
int
initialized
=
0
;
if
(
!
initialized
)
{
int
b1
,
b2
,
i
;
initialized
=
1
;
for
(
i
=
b1
=
0
;
b1
<
NR_BUNDLES
;
b1
++
)
{
const
struct
bundle
*
t1
=
bundle
+
b1
;
for
(
b2
=
0
;
b2
<
NR_BUNDLES
;
b2
++
,
i
++
)
{
const
struct
bundle
*
t2
=
bundle
+
b2
;
packets
[
i
].
t1
=
t1
;
packets
[
i
].
t2
=
t2
;
}
}
for
(
i
=
0
;
i
<
NR_PACKETS
;
i
++
)
{
int
j
;
for
(
j
=
0
;
j
<
3
;
j
++
)
packets
[
i
].
t
[
j
]
=
packets
[
i
].
t1
->
t
[
j
];
for
(
j
=
0
;
j
<
3
;
j
++
)
packets
[
i
].
t
[
j
+
3
]
=
packets
[
i
].
t2
->
t
[
j
];
packets
[
i
].
first_split
=
itanium_split_issue
(
packets
+
i
,
0
);
}
}
init_insn_group_barriers
();
memset
(
&
sched_data
,
0
,
sizeof
sched_data
);
sched_types
=
(
enum
attr_type
*
)
xmalloc
(
max_ready
*
sizeof
(
enum
attr_type
));
sched_ready
=
(
rtx
*
)
xmalloc
(
max_ready
*
sizeof
(
rtx
));
}
/* See if the packet P can match the insns we have already scheduled. Return
nonzero if so. In *PSLOT, we store the first slot that is available for
more instructions if we choose this packet.
SPLIT holds the last slot we can use, there's a split issue after it so
scheduling beyond it would cause us to use more than one cycle. */
static
int
packet_matches_p
(
p
,
split
,
pslot
)
const
struct
ia64_packet
*
p
;
int
split
;
int
*
pslot
;
{
int
filled
=
sched_data
.
cur
;
int
first
=
sched_data
.
first_slot
;
int
i
,
slot
;
/* First, check if the first of the two bundles must be a specific one (due
to stop bits). */
if
(
first
>
0
&&
sched_data
.
stopbit
[
0
]
&&
p
->
t1
->
possible_stop
!=
1
)
return
0
;
if
(
first
>
1
&&
sched_data
.
stopbit
[
1
]
&&
p
->
t1
->
possible_stop
!=
2
)
return
0
;
for
(
i
=
0
;
i
<
first
;
i
++
)
if
(
!
insn_matches_slot
(
p
,
sched_data
.
types
[
i
],
i
,
sched_data
.
insns
[
i
]))
return
0
;
for
(
i
=
slot
=
first
;
i
<
filled
;
i
++
)
{
while
(
slot
<
split
)
{
if
(
insn_matches_slot
(
p
,
sched_data
.
types
[
i
],
slot
,
sched_data
.
insns
[
i
]))
break
;
slot
++
;
}
if
(
slot
==
split
)
return
0
;
slot
++
;
}
if
(
pslot
)
*
pslot
=
slot
;
return
1
;
}
/* A frontend for itanium_split_issue. For a packet P and a slot
number FIRST that describes the start of the current clock cycle,
return the slot number of the first split issue. This function
uses the cached number found in P if possible. */
static
int
get_split
(
p
,
first
)
const
struct
ia64_packet
*
p
;
int
first
;
{
if
(
first
==
0
)
return
p
->
first_split
;
return
itanium_split_issue
(
p
,
first
);
}
/* Given N_READY insns in the array READY, whose types are found in the
corresponding array TYPES, return the insn that is best suited to be
scheduled in slot SLOT of packet P. */
static
int
find_best_insn
(
ready
,
types
,
n_ready
,
p
,
slot
)
rtx
*
ready
;
enum
attr_type
*
types
;
int
n_ready
;
const
struct
ia64_packet
*
p
;
int
slot
;
{
int
best
=
-
1
;
int
best_pri
=
0
;
while
(
n_ready
--
>
0
)
{
rtx
insn
=
ready
[
n_ready
];
if
(
!
insn
)
continue
;
if
(
best
>=
0
&&
INSN_PRIORITY
(
ready
[
n_ready
])
<
best_pri
)
break
;
/* If we have equally good insns, one of which has a stricter
slot requirement, prefer the one with the stricter requirement. */
if
(
best
>=
0
&&
types
[
n_ready
]
==
TYPE_A
)
continue
;
if
(
insn_matches_slot
(
p
,
types
[
n_ready
],
slot
,
insn
))
{
best
=
n_ready
;
best_pri
=
INSN_PRIORITY
(
ready
[
best
]);
/* If there's no way we could get a stricter requirement, stop
looking now. */
if
(
types
[
n_ready
]
!=
TYPE_A
&&
ia64_safe_itanium_requires_unit0
(
ready
[
n_ready
]))
break
;
break
;
}
}
return
best
;
}
/* Select the best packet to use given the current scheduler state and the
current ready list.
READY is an array holding N_READY ready insns; TYPES is a corresponding
array that holds their types. Store the best packet in *PPACKET and the
number of insns that can be scheduled in the current cycle in *PBEST. */
static
void
find_best_packet
(
pbest
,
ppacket
,
ready
,
types
,
n_ready
)
int
*
pbest
;
const
struct
ia64_packet
**
ppacket
;
rtx
*
ready
;
enum
attr_type
*
types
;
int
n_ready
;
{
int
first
=
sched_data
.
first_slot
;
int
best
=
0
;
int
lowest_end
=
6
;
const
struct
ia64_packet
*
best_packet
;
int
i
;
for
(
i
=
0
;
i
<
NR_PACKETS
;
i
++
)
{
const
struct
ia64_packet
*
p
=
packets
+
i
;
int
slot
;
int
split
=
get_split
(
p
,
first
);
int
win
=
0
;
int
first_slot
,
last_slot
;
int
b_nops
=
0
;
if
(
!
packet_matches_p
(
p
,
split
,
&
first_slot
))
continue
;
memcpy
(
sched_ready
,
ready
,
n_ready
*
sizeof
(
rtx
));
win
=
0
;
last_slot
=
6
;
for
(
slot
=
first_slot
;
slot
<
split
;
slot
++
)
{
int
insn_nr
;
/* Disallow a degenerate case where the first bundle doesn't
contain anything but NOPs! */
if
(
first_slot
==
0
&&
win
==
0
&&
slot
==
3
)
{
win
=
-
1
;
break
;
}
insn_nr
=
find_best_insn
(
sched_ready
,
types
,
n_ready
,
p
,
slot
);
if
(
insn_nr
>=
0
)
{
sched_ready
[
insn_nr
]
=
0
;
last_slot
=
slot
;
win
++
;
}
else
if
(
p
->
t
[
slot
]
==
TYPE_B
)
b_nops
++
;
}
/* We must disallow MBB/BBB packets if any of their B slots would be
filled with nops. */
if
(
last_slot
<
3
)
{
if
(
p
->
t
[
1
]
==
TYPE_B
&&
(
b_nops
||
last_slot
<
2
))
win
=
-
1
;
}
else
{
if
(
p
->
t
[
4
]
==
TYPE_B
&&
(
b_nops
||
last_slot
<
5
))
win
=
-
1
;
}
if
(
win
>
best
||
(
win
==
best
&&
last_slot
<
lowest_end
))
{
best
=
win
;
lowest_end
=
last_slot
;
best_packet
=
p
;
}
}
*
pbest
=
best
;
*
ppacket
=
best_packet
;
}
/* Reorder the ready list so that the insns that can be issued in this cycle
are found in the correct order at the end of the list.
DUMP is the scheduling dump file, or NULL. READY points to the start,
E_READY to the end of the ready list. MAY_FAIL determines what should be
done if no insns can be scheduled in this cycle: if it is zero, we abort,
otherwise we return 0.
Return 1 if any insns can be scheduled in this cycle. */
static
int
itanium_reorder
(
dump
,
ready
,
e_ready
,
may_fail
)
FILE
*
dump
;
rtx
*
ready
;
rtx
*
e_ready
;
int
may_fail
;
{
const
struct
ia64_packet
*
best_packet
;
int
n_ready
=
e_ready
-
ready
;
int
first
=
sched_data
.
first_slot
;
int
i
,
best
,
best_split
,
filled
;
for
(
i
=
0
;
i
<
n_ready
;
i
++
)
sched_types
[
i
]
=
ia64_safe_type
(
ready
[
i
]);
find_best_packet
(
&
best
,
&
best_packet
,
ready
,
sched_types
,
n_ready
);
if
(
best
==
0
)
{
if
(
may_fail
)
return
0
;
abort
();
}
if
(
dump
)
{
fprintf
(
dump
,
"// Selected bundles: %s %s (%d insns)
\n
"
,
best_packet
->
t1
->
name
,
best_packet
->
t2
?
best_packet
->
t2
->
name
:
NULL
,
best
);
}
best_split
=
itanium_split_issue
(
best_packet
,
first
);
packet_matches_p
(
best_packet
,
best_split
,
&
filled
);
for
(
i
=
filled
;
i
<
best_split
;
i
++
)
{
int
insn_nr
;
insn_nr
=
find_best_insn
(
ready
,
sched_types
,
n_ready
,
best_packet
,
i
);
if
(
insn_nr
>=
0
)
{
rtx
insn
=
ready
[
insn_nr
];
memmove
(
ready
+
insn_nr
,
ready
+
insn_nr
+
1
,
(
n_ready
-
insn_nr
-
1
)
*
sizeof
(
rtx
));
memmove
(
sched_types
+
insn_nr
,
sched_types
+
insn_nr
+
1
,
(
n_ready
-
insn_nr
-
1
)
*
sizeof
(
enum
attr_type
));
ready
[
--
n_ready
]
=
insn
;
}
}
sched_data
.
packet
=
best_packet
;
sched_data
.
split
=
best_split
;
return
1
;
}
/* Dump information about the current scheduling state to file DUMP. */
static
void
dump_current_packet
(
dump
)
FILE
*
dump
;
{
int
i
;
fprintf
(
dump
,
"// %d slots filled:"
,
sched_data
.
cur
);
for
(
i
=
0
;
i
<
sched_data
.
first_slot
;
i
++
)
{
rtx
insn
=
sched_data
.
insns
[
i
];
fprintf
(
dump
,
" %s"
,
type_names
[
sched_data
.
types
[
i
]]);
if
(
insn
)
fprintf
(
dump
,
"/%s"
,
type_names
[
ia64_safe_type
(
insn
)]);
if
(
sched_data
.
stopbit
[
i
])
fprintf
(
dump
,
" ;;"
);
}
fprintf
(
dump
,
" :::"
);
for
(
i
=
sched_data
.
first_slot
;
i
<
sched_data
.
cur
;
i
++
)
{
rtx
insn
=
sched_data
.
insns
[
i
];
enum
attr_type
t
=
ia64_safe_type
(
insn
);
fprintf
(
dump
,
" (%d) %s"
,
INSN_UID
(
insn
),
type_names
[
t
]);
}
fprintf
(
dump
,
"
\n
"
);
}
/* Schedule a stop bit. DUMP is the current scheduling dump file, or
NULL. */
static
void
schedule_stop
(
dump
)
FILE
*
dump
;
{
const
struct
ia64_packet
*
best
=
sched_data
.
packet
;
int
i
;
int
best_stop
=
6
;
if
(
dump
)
fprintf
(
dump
,
"// Stop bit, cur = %d.
\n
"
,
sched_data
.
cur
);
if
(
sched_data
.
cur
==
0
)
{
if
(
dump
)
fprintf
(
dump
,
"// At start of bundle, so nothing to do.
\n
"
);
rotate_two_bundles
(
NULL
);
return
;
}
for
(
i
=
-
1
;
i
<
NR_PACKETS
;
i
++
)
{
/* This is a slight hack to give the current packet the first chance.
This is done to avoid e.g. switching from MIB to MBB bundles. */
const
struct
ia64_packet
*
p
=
(
i
>=
0
?
packets
+
i
:
sched_data
.
packet
);
int
split
=
get_split
(
p
,
sched_data
.
first_slot
);
const
struct
bundle
*
compare
;
int
next
,
stoppos
;
if
(
!
packet_matches_p
(
p
,
split
,
&
next
))
continue
;
compare
=
next
>
3
?
p
->
t2
:
p
->
t1
;
stoppos
=
3
;
if
(
compare
->
possible_stop
)
stoppos
=
compare
->
possible_stop
;
if
(
next
>
3
)
stoppos
+=
3
;
if
(
stoppos
<
next
||
stoppos
>=
best_stop
)
{
if
(
compare
->
possible_stop
==
0
)
continue
;
stoppos
=
(
next
>
3
?
6
:
3
);
}
if
(
stoppos
<
next
||
stoppos
>=
best_stop
)
continue
;
if
(
dump
)
fprintf
(
dump
,
"// switching from %s %s to %s %s (stop at %d)
\n
"
,
best
->
t1
->
name
,
best
->
t2
->
name
,
p
->
t1
->
name
,
p
->
t2
->
name
,
stoppos
);
best_stop
=
stoppos
;
best
=
p
;
}
sched_data
.
packet
=
best
;
cycle_end_fill_slots
(
dump
);
while
(
sched_data
.
cur
<
best_stop
)
{
sched_data
.
types
[
sched_data
.
cur
]
=
best
->
t
[
sched_data
.
cur
];
sched_data
.
insns
[
sched_data
.
cur
]
=
0
;
sched_data
.
stopbit
[
sched_data
.
cur
]
=
0
;
sched_data
.
cur
++
;
}
sched_data
.
stopbit
[
sched_data
.
cur
-
1
]
=
1
;
sched_data
.
first_slot
=
best_stop
;
if
(
dump
)
dump_current_packet
(
dump
);
}
/* We are about to being issuing insns for this clock cycle.
Override the default sort algorithm to better slot instructions. */
int
ia64_sched_reorder
(
dump
,
sched_verbose
,
ready
,
pn_ready
,
reorder_type
)
FILE
*
dump
ATTRIBUTE_UNUSED
;
int
sched_verbose
ATTRIBUTE_UNUSED
;
rtx
*
ready
;
int
*
pn_ready
;
int
reorder_type
;
{
int
n_ready
=
*
pn_ready
;
rtx
*
e_ready
=
ready
+
n_ready
;
rtx
*
insnp
;
rtx
highest
;
if
(
sched_verbose
)
{
fprintf
(
dump
,
"// ia64_sched_reorder (type %d):
\n
"
,
reorder_type
);
dump_current_packet
(
dump
);
}
/* First, move all USEs, CLOBBERs and other crud out of the way. */
highest
=
ready
[
n_ready
-
1
];
for
(
insnp
=
ready
;
insnp
<
e_ready
;
insnp
++
)
if
(
insnp
<
e_ready
)
{
rtx
insn
=
*
insnp
;
enum
attr_type
t
=
ia64_safe_type
(
insn
);
if
(
t
==
TYPE_UNKNOWN
)
{
highest
=
ready
[
n_ready
-
1
];
ready
[
n_ready
-
1
]
=
insn
;
*
insnp
=
highest
;
if
(
group_barrier_needed_p
(
insn
))
{
schedule_stop
(
sched_verbose
?
dump
:
NULL
);
sched_data
.
last_was_stop
=
1
;
}
return
1
;
}
}
if
(
ia64_final_schedule
)
{
int
nr_need_stop
=
0
;
for
(
insnp
=
ready
;
insnp
<
e_ready
;
insnp
++
)
if
(
safe_group_barrier_needed_p
(
*
insnp
))
nr_need_stop
++
;
/* Schedule a stop bit if
- all insns require a stop bit, or
- we are starting a new cycle and _any_ insns require a stop bit.
The reason for the latter is that if our schedule is accurate, then
the additional stop won't decrease performance at this point (since
there's a split issue at this point anyway), but it gives us more
freedom when scheduling the currently ready insns. */
if
((
reorder_type
==
0
&&
nr_need_stop
)
||
(
reorder_type
==
1
&&
n_ready
==
nr_need_stop
))
{
schedule_stop
(
sched_verbose
?
dump
:
NULL
);
sched_data
.
last_was_stop
=
1
;
if
(
reorder_type
==
1
)
return
0
;
}
else
{
int
deleted
=
0
;
insnp
=
e_ready
;
/* Move down everything that needs a stop bit, preserving relative
order. */
while
(
insnp
--
>
ready
+
deleted
)
while
(
insnp
>=
ready
+
deleted
)
{
rtx
insn
=
*
insnp
;
if
(
!
safe_group_barrier_needed_p
(
insn
))
break
;
memmove
(
ready
+
1
,
ready
,
(
insnp
-
ready
)
*
sizeof
(
rtx
));
*
ready
=
insn
;
deleted
++
;
}
n_ready
-=
deleted
;
ready
+=
deleted
;
if
(
deleted
!=
nr_need_stop
)
abort
();
}
}
if
(
reorder_type
==
0
)
{
if
(
sched_data
.
cur
==
6
)
rotate_two_bundles
(
sched_verbose
?
dump
:
NULL
);
else
if
(
sched_data
.
cur
>=
3
)
rotate_one_bundle
(
sched_verbose
?
dump
:
NULL
);
sched_data
.
first_slot
=
sched_data
.
cur
;
}
return
itanium_reorder
(
sched_verbose
?
dump
:
NULL
,
ready
,
e_ready
,
reorder_type
==
1
);
}
/* Like ia64_sched_reorder, but called after issuing each insn.
Override the default sort algorithm to better slot instructions. */
int
ia64_sched_reorder2
(
dump
,
sched_verbose
,
ready
,
pn_ready
,
clock_var
)
FILE
*
dump
ATTRIBUTE_UNUSED
;
int
sched_verbose
ATTRIBUTE_UNUSED
;
rtx
*
ready
;
int
*
pn_ready
;
int
clock_var
ATTRIBUTE_UNUSED
;
{
if
(
sched_data
.
last_was_stop
)
return
0
;
/* Detect one special case and try to optimize it.
If we have 1.M;;MI 2.MIx, and slots 2.1 (M) and 2.2 (I) are both NOPs,
then we can get better code by transforming this to 1.MFB;; 2.MIx. */
if
(
sched_data
.
first_slot
==
1
&&
sched_data
.
stopbit
[
0
]
&&
((
sched_data
.
cur
==
4
&&
(
sched_data
.
types
[
1
]
==
TYPE_M
||
sched_data
.
types
[
1
]
==
TYPE_A
)
&&
(
sched_data
.
types
[
2
]
==
TYPE_I
||
sched_data
.
types
[
2
]
==
TYPE_A
)
&&
(
sched_data
.
types
[
3
]
!=
TYPE_M
&&
sched_data
.
types
[
3
]
!=
TYPE_A
))
||
(
sched_data
.
cur
==
3
&&
(
sched_data
.
types
[
1
]
==
TYPE_M
||
sched_data
.
types
[
1
]
==
TYPE_A
)
&&
(
sched_data
.
types
[
2
]
!=
TYPE_M
&&
sched_data
.
types
[
2
]
!=
TYPE_I
&&
sched_data
.
types
[
2
]
!=
TYPE_A
))))
{
int
i
,
best
;
rtx
stop
=
PREV_INSN
(
sched_data
.
insns
[
1
]);
rtx
pat
;
sched_data
.
stopbit
[
0
]
=
0
;
sched_data
.
stopbit
[
2
]
=
1
;
if
(
GET_CODE
(
stop
)
!=
INSN
)
abort
();
pat
=
PATTERN
(
stop
);
/* Ignore cycle displays. */
if
(
GET_CODE
(
pat
)
==
UNSPEC
&&
XINT
(
pat
,
1
)
==
23
)
stop
=
PREV_INSN
(
stop
);
pat
=
PATTERN
(
stop
);
if
(
GET_CODE
(
pat
)
!=
UNSPEC_VOLATILE
||
XINT
(
pat
,
1
)
!=
2
||
INTVAL
(
XVECEXP
(
pat
,
0
,
0
))
!=
1
)
abort
();
XVECEXP
(
pat
,
0
,
0
)
=
GEN_INT
(
3
);
sched_data
.
types
[
5
]
=
sched_data
.
types
[
3
];
sched_data
.
types
[
4
]
=
sched_data
.
types
[
2
];
sched_data
.
types
[
3
]
=
sched_data
.
types
[
1
];
sched_data
.
insns
[
5
]
=
sched_data
.
insns
[
3
];
sched_data
.
insns
[
4
]
=
sched_data
.
insns
[
2
];
sched_data
.
insns
[
3
]
=
sched_data
.
insns
[
1
];
sched_data
.
stopbit
[
5
]
=
sched_data
.
stopbit
[
4
]
=
sched_data
.
stopbit
[
3
]
=
0
;
sched_data
.
cur
+=
2
;
sched_data
.
first_slot
=
3
;
for
(
i
=
0
;
i
<
NR_PACKETS
;
i
++
)
{
const
struct
ia64_packet
*
p
=
packets
+
i
;
if
(
p
->
t
[
0
]
==
TYPE_M
&&
p
->
t
[
1
]
==
TYPE_F
&&
p
->
t
[
2
]
==
TYPE_B
)
{
sched_data
.
packet
=
p
;
break
;
}
}
rotate_one_bundle
(
sched_verbose
?
dump
:
NULL
);
best
=
6
;
for
(
i
=
0
;
i
<
NR_PACKETS
;
i
++
)
{
const
struct
ia64_packet
*
p
=
packets
+
i
;
int
split
=
get_split
(
p
,
sched_data
.
first_slot
);
int
next
;
/* Disallow multiway branches here. */
if
(
p
->
t
[
1
]
==
TYPE_B
)
continue
;
if
(
packet_matches_p
(
p
,
split
,
&
next
)
&&
next
<
best
)
{
best
=
next
;
sched_data
.
packet
=
p
;
sched_data
.
split
=
split
;
}
}
if
(
best
==
6
)
abort
();
}
if
(
*
pn_ready
>
0
)
{
int
more
=
ia64_sched_reorder
(
dump
,
sched_verbose
,
ready
,
pn_ready
,
1
);
if
(
more
)
return
more
;
/* Did we schedule a stop? If so, finish this cycle. */
if
(
sched_data
.
cur
==
sched_data
.
first_slot
)
return
0
;
}
if
(
sched_verbose
)
fprintf
(
dump
,
"// Can't issue more this cycle; updating type array.
\n
"
);
cycle_end_fill_slots
(
sched_verbose
?
dump
:
NULL
);
if
(
sched_verbose
)
dump_current_packet
(
dump
);
return
0
;
}
/* We are about to issue INSN. Return the number of insns left on the
ready queue that can be issued this cycle. */
int
ia64_variable_issue
(
dump
,
sched_verbose
,
insn
,
can_issue_more
)
FILE
*
dump
;
int
sched_verbose
;
rtx
insn
;
int
can_issue_more
ATTRIBUTE_UNUSED
;
{
enum
attr_type
t
=
ia64_safe_type
(
insn
);
if
(
sched_data
.
last_was_stop
)
{
int
t
=
sched_data
.
first_slot
;
if
(
t
==
0
)
t
=
3
;
ia64_emit_insn_before
(
gen_insn_group_barrier
(
GEN_INT
(
t
)),
insn
);
init_insn_group_barriers
();
sched_data
.
last_was_stop
=
0
;
}
if
(
t
==
TYPE_UNKNOWN
)
{
if
(
sched_verbose
)
fprintf
(
dump
,
"// Ignoring type %s
\n
"
,
type_names
[
t
]);
return
1
;
}
/* This is _not_ just a sanity check. group_barrier_needed_p will update
important state info. Don't delete this test. */
if
(
ia64_final_schedule
&&
group_barrier_needed_p
(
insn
))
abort
();
sched_data
.
stopbit
[
sched_data
.
cur
]
=
0
;
sched_data
.
insns
[
sched_data
.
cur
]
=
insn
;
sched_data
.
types
[
sched_data
.
cur
]
=
t
;
sched_data
.
cur
++
;
if
(
sched_verbose
)
fprintf
(
dump
,
"// Scheduling insn %d of type %s
\n
"
,
INSN_UID
(
insn
),
type_names
[
t
]);
if
(
GET_CODE
(
insn
)
==
CALL_INSN
&&
ia64_final_schedule
)
{
schedule_stop
(
sched_verbose
?
dump
:
NULL
);
sched_data
.
last_was_stop
=
1
;
}
return
1
;
}
/* Free data allocated by ia64_sched_init. */
void
ia64_sched_finish
(
dump
,
sched_verbose
)
FILE
*
dump
;
int
sched_verbose
;
{
if
(
sched_verbose
)
fprintf
(
dump
,
"// Finishing schedule.
\n
"
);
rotate_two_bundles
(
NULL
);
free
(
sched_types
);
free
(
sched_ready
);
}
/* Emit pseudo-ops for the assembler to describe predicate relations.
At present this assumes that we only consider predicate pairs to
be mutex, and that the assembler can deduce proper values from
straight-line code. */
static
void
...
...
@@ -4660,9 +5918,17 @@ ia64_reorg (insns)
/* Make sure the CFG and global_live_at_start are correct
for emit_predicate_relation_info. */
find_basic_blocks
(
insns
,
max_reg_num
(),
NULL
);
life_analysis
(
insns
,
NULL
,
0
);
life_analysis
(
insns
,
NULL
,
PROP_DEATH_NOTES
);
ia64_final_schedule
=
1
;
schedule_ebbs
(
rtl_dump_file
);
ia64_final_schedule
=
0
;
/* This relies on the NOTE_INSN_BASIC_BLOCK notes to be in the same
place as they were during scheduling. */
emit_insn_group_barriers
(
rtl_dump_file
,
insns
);
emit_insn_group_barriers
(
insns
);
fixup_errata
(
);
emit_predicate_relation_info
();
}
...
...
gcc/config/ia64/ia64.h
View file @
2130b7fb
...
...
@@ -1849,7 +1849,7 @@ do { \
case CONST: \
case SYMBOL_REF: \
case LABEL_REF: \
return COSTS_N_INSNS (
2
);
return COSTS_N_INSNS (
3
);
/* Like `CONST_COSTS' but applies to nonconstant RTL expressions. */
...
...
@@ -1916,19 +1916,6 @@ do { \
#define NO_FUNCTION_CSE
/* A C statement (sans semicolon) to update the integer variable COST based on
the relationship between INSN that is dependent on DEP_INSN through the
dependence LINK. */
/* ??? Investigate. */
/* #define ADJUST_COST(INSN, LINK, DEP_INSN, COST) */
/* A C statement (sans semicolon) to update the integer scheduling
priority `INSN_PRIORITY(INSN)'. */
/* ??? Investigate. */
/* #define ADJUST_PRIORITY (INSN) */
/* Dividing the output into sections. */
...
...
@@ -2816,13 +2803,43 @@ do { \
BRANCH_COST+1 is the default if the machine does not use
cc0, and 1 if it does use cc0. */
/* ??? Investigate. */
/* #define MAX_CONDITIONAL_EXECUTE */
#define MAX_CONDITIONAL_EXECUTE 12
/* A C statement (sans semicolon) to update the integer scheduling
priority `INSN_PRIORITY(INSN)'. */
/* ??? Investigate. */
/* #define ADJUST_PRIORITY (INSN) */
/* A C statement (sans semicolon) to update the integer variable COST
based on the relationship between INSN that is dependent on
DEP_INSN through the dependence LINK. The default is to make no
adjustment to COST. This can be used for example to specify to
the scheduler that an output- or anti-dependence does not incur
the same cost as a data-dependence. */
#define ADJUST_COST(insn,link,dep_insn,cost) \
(cost) = ia64_adjust_cost(insn, link, dep_insn, cost)
#define ISSUE_RATE ia64_issue_rate ()
#define MD_SCHED_INIT(DUMP, SCHED_VERBOSE, MAX_READY) \
ia64_sched_init (DUMP, SCHED_VERBOSE, MAX_READY)
#define MD_SCHED_REORDER(DUMP, SCHED_VERBOSE, READY, N_READY, CLOCK, CIM) \
(CIM) = ia64_sched_reorder (DUMP, SCHED_VERBOSE, READY, &N_READY, 0)
#define MD_SCHED_REORDER2(DUMP, SCHED_VERBOSE, READY, N_READY, CLOCK, CIM) \
(CIM) = ia64_sched_reorder2 (DUMP, SCHED_VERBOSE, READY, &N_READY, 1)
/* Indicate how many instructions can be issued at the same time. */
#define MD_SCHED_FINISH(DUMP, SCHED_VERBOSE) \
ia64_sched_finish (DUMP, SCHED_VERBOSE)
/* ??? For now, we just schedule to fill bundles. */
#define MD_SCHED_VARIABLE_ISSUE(DUMP, SCHED_VERBOSE, INSN, CAN_ISSUE_MORE) \
((CAN_ISSUE_MORE) \
= ia64_variable_issue (DUMP, SCHED_VERBOSE, INSN, CAN_ISSUE_MORE))
#define ISSUE_RATE 3
extern
int
ia64_final_schedule
;
#define IA64_UNWIND_INFO 1
#define HANDLER_SECTION fprintf (asm_out_file, "\t.personality\t__ia64_personality_v1\n\t.handlerdata\n");
...
...
gcc/config/ia64/ia64.md
View file @
2130b7fb
...
...
@@ -68,6 +68,8 @@
;; 19 fetchadd_acq
;; 20 bsp_value
;; 21 flushrs
;; 22 bundle selector
;; 23 cycle display
;;
;; unspec_volatile:
;; 0 alloc
...
...
@@ -99,23 +101,35 @@
;; multiple instructions, patterns which emit 0 instructions, and patterns
;; which emit instruction that can go in any slot (e.g. nop).
(define_attr "itanium_class" "unknown,ignore,stop_bit,br,fcmp,fcvtfx,fld,fmac,fmisc,frar_i,frar_m,frbr,frfr,frpr,ialu,icmp,ilog,ishf,ld,
long_i,mmmul,mmshf,mmshfi,rse_m,scall,sem,stf,st,syst_m0,syst_m,tbit,toar_i,toar_m,tobr,tofr,topr,xmpy,xtd
"
(define_attr "itanium_class" "unknown,ignore,stop_bit,br,fcmp,fcvtfx,fld,fmac,fmisc,frar_i,frar_m,frbr,frfr,frpr,ialu,icmp,ilog,ishf,ld,
chk_s,long_i,mmmul,mmshf,mmshfi,rse_m,scall,sem,stf,st,syst_m0,syst_m,tbit,toar_i,toar_m,tobr,tofr,topr,xmpy,xtd,nop_b,nop_f,nop_i,nop_m,nop_x
"
(const_string "unknown"))
(define_attr "type" "unknown,A,I,M,F,B,L,S"
(cond
[
(eq_attr "itanium_class" "ld,st,fld,stf,sem") (const_string "M")
;; chk_s has an I and an M form; use type A for convenience.
(define_attr "type" "unknown,A,I,M,F,B,L,X,S"
(cond
[
(eq_attr "itanium_class" "ld,st,fld,stf,sem,nop_m") (const_string "M")
(eq_attr "itanium_class" "rse_m,syst_m,syst_m0") (const_string "M")
(eq_attr "itanium_class" "frar_m,toar_m,frfr,tofr") (const_string "M")
(eq_attr "itanium_class" "ialu,icmp,ilog") (const_string "A")
(eq_attr "itanium_class" "fmisc,fmac,fcmp,xmpy,fcvtfx") (const_string "F")
(eq_attr "itanium_class" "chk_s,ialu,icmp,ilog") (const_string "A")
(eq_attr "itanium_class" "fmisc,fmac,fcmp,xmpy") (const_string "F")
(eq_attr "itanium_class" "fcvtfx,nop_f") (const_string "F")
(eq_attr "itanium_class" "frar_i,toar_i,frbr,tobr") (const_string "I")
(eq_attr "itanium_class" "frpr,topr,ishf,xtd,tbit") (const_string "I")
(eq_attr "itanium_class" "mmmul,mmshf,mmshfi") (const_string "I")
(eq_attr "itanium_class" "br,scall") (const_string "B")
(eq_attr "itanium_class" "mmmul,mmshf,mmshfi
,nop_i
") (const_string "I")
(eq_attr "itanium_class" "br,scall
,nop_b
") (const_string "B")
(eq_attr "itanium_class" "stop_bit") (const_string "S")
(eq_attr "itanium_class" "nop_x") (const_string "X")
(eq_attr "itanium_class" "long_i") (const_string "L")]
(const_string "unknown")))
(define_attr "itanium_requires_unit0" "no,yes"
(cond
[
(eq_attr "itanium_class" "syst_m0,sem,frfr,rse_m") (const_string "yes")
(eq_attr "itanium_class" "toar_m,frar_m") (const_string "yes")
(eq_attr "itanium_class" "frbr,tobr,mmmul") (const_string "yes")
(eq_attr "itanium_class" "tbit,ishf,topr,frpr") (const_string "yes")
(eq_attr "itanium_class" "toar_i,frar_i") (const_string "yes")
(eq_attr "itanium_class" "fmisc,fcmp") (const_string "yes")]
(const_string "no")))
;; Predication. True iff this instruction can be predicated.
(define_attr "predicable" "no,yes" (const_string "yes"))
...
...
@@ -127,47 +141,70 @@
;; ::
;; ::::::::::::::::::::
;; Each usage of a function units by a class of insns is specified with a
;;
`define_function_unit' expression, which looks like this:
;; (define_function_unit NAME MULTIPLICITY SIMULTANEITY TEST READY-DELAY
;; ISSUE-DELAY [CONFLICT-LIST])
;; This default scheduling info seeks to pack instructions into bundles
;; efficiently to reduce code size, so we just list how many of each
;; instruction type can go in a bundle. ISSUE_RATE is set to 3.
;; ??? Add scheduler ready-list hook (MD_SCHED_REORDER) that orders
;; instructions, so that the next instruction can fill the next bundle slot.
;; This really needs to know where the stop bits are though.
;; ??? Use MD_SCHED_REORDER to put alloc first instead of using an unspec
;; volatile. Use ADJUST_PRIORITY to set the priority of alloc very high to
;; make it schedule first.
;; ??? Modify the md_reorg code that emits stop bits so that instead of putting
;; them in the last possible place, we put them in places where bundles allow
;; them. This should reduce code size, but may decrease performance if we end
;; up with more stop bits than the minimum we need.
;; Alu instructions can execute on either the integer or memory function
;; unit. We indicate this by defining an alu function unit, and then marking
;; it as busy everytime we issue a integer or memory type instruction.
(define_function_unit "alu" 3 1 (eq_attr "type" "A,I,M") 1 0)
(define_function_unit "integer" 2 1 (eq_attr "type" "I") 1 0)
(define_function_unit "memory" 3 1 (eq_attr "type" "M") 1 0)
(define_function_unit "floating_point" 1 1 (eq_attr "type" "F") 1 0)
(define_function_unit "branch" 3 1 (eq_attr "type" "B") 1 0)
;; ??? This isn't quite right, because we can only fit two insns in a bundle
;; when using an L type instruction. That isn't modeled currently.
(define_function_unit "long_immediate" 1 1 (eq_attr "type" "L") 1 0)
;; We define 6 "dummy" functional units. All the real work to decide which
;; insn uses which unit is done by our MD_SCHED_REORDER hooks. We only
;; have to ensure here that there are enough copies of the dummy unit so
;; that the scheduler doesn't get confused by MD_SCHED_REORDER.
;; Other than the 6 dummies for normal insns, we also add a single dummy unit
;; for stop bits.
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "br") 0 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "scall") 0 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "fcmp") 2 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "fcvtfx") 7 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "fld") 9 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "fmac") 5 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "fmisc") 5 0)
;; There is only one insn
`mov = ar.bsp' for frar_i:
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "frar_i") 13 0)
;; There is only ony insn `
mov = ar.unat' for frar_m:
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "frar_m") 6 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "frbr") 2 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "frfr") 2 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "frpr") 2 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "ialu") 1 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "icmp") 1 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "ilog") 1 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "ishf") 1 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "ld") 2 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "long_i") 1 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "mmmul") 2 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "mmshf") 2 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "mmshfi") 2 0)
;; Now we have only one insn (flushrs) of such class. We assume that flushrs
;; is the 1st syllable of the bundle after stop bit.
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "rse_m") 0 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "sem") 11 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "stf") 1 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "st") 1 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "syst_m0") 1 0)
;; Now we use only one insn
`mf'. Therfore latency time is set up to 0.
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "syst_m") 0 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "tbit") 1 0)
;; There is only one insn `
mov ar.pfs =' for toar_i therefore we use
;; latency time equal to 0:
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "toar_i") 0 0)
;; There are only ony 2 insns
`mov ar.ccv =' and `
mov ar.unat =' for toar_m:
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "toar_m") 5 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "tobr") 1 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "tofr") 9 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "topr") 1 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "xmpy") 7 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "xtd") 1 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "nop_m") 0 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "nop_i") 0 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "nop_f") 0 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "nop_b") 0 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "nop_x") 0 0)
(define_function_unit "stop_bit" 1 1 (eq_attr "itanium_class" "stop_bit") 0 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "ignore") 0 0)
(define_function_unit "dummy" 6 1 (eq_attr "itanium_class" "unknown") 0 0)
;; ::::::::::::::::::::
;; ::
...
...
@@ -1411,7 +1448,6 @@
(clobber (match_operand:DI 2 "register_operand" ""))]
"reload_completed"
[
(set (match_dup 3) (ashift:DI (match_dup 1) (const_int 32)))
(unspec_volatile [(const_int 0)] 2)
(set (zero_extract:DI (match_dup 0) (const_int 32) (const_int 0))
(lshiftrt:DI (match_dup 3) (const_int 32)))]
"operands
[
3
]
= operands
[
2
]
;")
...
...
@@ -2408,9 +2444,6 @@
"#"
[
(set_attr "itanium_class" "unknown")
]
)
;; ??? Need to emit an instruction group barrier here because this gets split
;; after md_reorg.
(define_split
[
(set (match_operand:DI 0 "register_operand" "")
(plus:DI (plus:DI (mult:DI (match_operand:DI 1 "register_operand" "")
...
...
@@ -2422,9 +2455,7 @@
[
(parallel
[
(set (match_dup 5) (plus:DI (mult:DI (match_dup 1) (match_dup 2))
(match_dup 3)))
(clobber (match_dup 0))])
(unspec_volatile [(const_int 0)] 2)
(set (match_dup 0) (match_dup 5))
(unspec_volatile [(const_int 0)] 2)
(set (match_dup 0) (plus:DI (match_dup 0) (match_dup 4)))]
"")
...
...
@@ -5122,7 +5153,10 @@
(match_operand:DI 2 "const_int_operand" "")] 1))
(clobber (match_operand:DI 3 "register_operand" ""))]
""
".mem.offset %2, 0
\;
st8.spill %0 = %1%P0"
"
*
{
return
\"
.mem.offset %2, 0
\;
%,st8.spill %0 = %1%P0
\"
;
}"
[
(set_attr "itanium_class" "st")
]
)
;; Reads ar.unat
...
...
@@ -5140,7 +5174,10 @@
(match_operand:DI 2 "const_int_operand" "")] 2))
(use (match_operand:DI 3 "register_operand" ""))]
""
".mem.offset %2, 0
\;
ld8.fill %0 = %1%P1"
"
*
{
return
\"
.mem.offset %2, 0
\;
%,ld8.fill %0 = %1%P1
\"
;
}"
[
(set_attr "itanium_class" "ld")
]
)
(define_insn "fr_spill"
...
...
@@ -5193,6 +5230,58 @@
"nop 0"
[
(set_attr "itanium_class" "unknown")
]
)
(define_insn "nop_m"
[
(const_int 1)
]
""
"nop.m 0"
[
(set_attr "itanium_class" "nop_m")
]
)
(define_insn "nop_i"
[
(const_int 2)
]
""
"nop.i 0"
[
(set_attr "itanium_class" "nop_i")
]
)
(define_insn "nop_f"
[
(const_int 3)
]
""
"nop.f 0"
[
(set_attr "itanium_class" "nop_f")
]
)
(define_insn "nop_b"
[
(const_int 4)
]
""
"nop.b 0"
[
(set_attr "itanium_class" "nop_b")
]
)
(define_insn "nop_x"
[
(const_int 5)
]
""
""
[
(set_attr "itanium_class" "nop_x")
]
)
(define_expand "cycle_display"
[
(unspec [(match_operand 0 "const_int_operand" "")
]
23)]
"ia64_final_schedule"
"")
(define_insn "
*
cycle_display_1"
[
(unspec [(match_operand 0 "const_int_operand" "")
]
23)]
""
"// cycle %0"
[
(set_attr "itanium_class" "ignore")
(set_attr "predicable" "no")])
(define_insn "bundle_selector"
[
(unspec [(match_operand 0 "const_int_operand" "")
]
22)]
""
"
*
{
return get_bundle_name (INTVAL (operands
[
0
]
));
}"
[
(set_attr "itanium_class" "ignore")
(set_attr "predicable" "no")])
;; Pseudo instruction that prevents the scheduler from moving code above this
;; point.
(define_insn "blockage"
...
...
@@ -5203,7 +5292,7 @@
(set_attr "predicable" "no")])
(define_insn "insn_group_barrier"
[
(unspec_volatile [(
const_int 0
)
]
2)]
[
(unspec_volatile [(
match_operand 0 "const_int_operand" ""
)
]
2)]
""
";;"
[
(set_attr "itanium_class" "stop_bit")
...
...
gcc/rtl.h
View file @
2130b7fb
...
...
@@ -1346,6 +1346,7 @@ extern void set_unique_reg_note PARAMS ((rtx, enum reg_note, rtx));
? (GET_CODE (PATTERN (I)) == SET \
? PATTERN (I) : single_set_1 (I)) \
: NULL_RTX)
#define single_set_1(I) single_set_2 (I, PATTERN (I))
extern
int
rtx_unstable_p
PARAMS
((
rtx
));
extern
int
rtx_varies_p
PARAMS
((
rtx
));
...
...
@@ -1365,7 +1366,7 @@ extern int no_jumps_between_p PARAMS ((rtx, rtx));
extern
int
modified_in_p
PARAMS
((
rtx
,
rtx
));
extern
int
insn_dependent_p
PARAMS
((
rtx
,
rtx
));
extern
int
reg_set_p
PARAMS
((
rtx
,
rtx
));
extern
rtx
single_set_
1
PARAMS
((
rtx
));
extern
rtx
single_set_
2
PARAMS
((
rtx
,
rtx
));
extern
int
multiple_sets
PARAMS
((
rtx
));
extern
rtx
find_last_value
PARAMS
((
rtx
,
rtx
*
,
rtx
,
int
));
extern
int
refers_to_regno_p
PARAMS
((
unsigned
int
,
unsigned
int
,
...
...
gcc/rtlanal.c
View file @
2130b7fb
...
...
@@ -860,12 +860,11 @@ insn_dependent_p_1 (x, pat, data)
will not be used, which we ignore. */
rtx
single_set_
1
(
insn
)
rtx
insn
;
single_set_
2
(
insn
,
pat
)
rtx
insn
,
pat
;
{
rtx
set
=
NULL
;
int
set_verified
=
1
;
rtx
pat
=
PATTERN
(
insn
);
int
i
;
if
(
GET_CODE
(
pat
)
==
PARALLEL
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment