Commit 279c9bde by Jeff Law

* Automatic generation of inline long call sequences when needed.

	* pa.h (TARGET_LONG_CALLS): Delete.  Replace all uses of with
	TARGET_PORTABLE_RUNTIME.
	(TARGET_MILLICODE_LONG_CALLS): New target flag.
	* pa.c (output_function_prologue): Keep track of the total number
	of code bytes emitted for each source file.
	(output_call): Handle TARGET_PORTABLE_RUNTIME and millicode calls
	for TARGET_MILLICODE_LONG_CALLS.  Emit an inline long-call if
	needed.  If emitting an inline long-call, perform argument relocations
	before the call if they are needed, unfill the delay slot of the
	call if necessary.
	* pa.md (define_delay for millicode): Disable the delay slot if
	TARGET_MILLICODE_LONG_CALLS.
	(millicode insns and indirect calls): Properly compute length for
	both TARGET_PORTABLE_RUNTIME and TARGET_MILLICODE_LONG_CALLS.
	(call_internal_symref): Properly compute the length when more
	than 240000 bytes of code have already been output.  Take
	TARGET_MILLICODE_LONG_CALLS into account in the length
	computation.

From-SVN: r8422
parent ba25ac36
......@@ -52,6 +52,11 @@ static int gr_saved, fr_saved;
static rtx find_addr_reg ();
/* Keep track of the number of bytes we have output in the CODE subspaces
during this compilation so we'll know when to emit inline long-calls. */
unsigned int total_code_bytes;
/* Return non-zero only if OP is a register of mode MODE,
or CONST0_RTX. */
int
......@@ -72,7 +77,7 @@ call_operand_address (op, mode)
rtx op;
enum machine_mode mode;
{
return (CONSTANT_P (op) && ! TARGET_LONG_CALLS);
return (CONSTANT_P (op) && ! TARGET_PORTABLE_RUNTIME);
}
/* Return 1 if X contains a symbolic expression. We know these
......@@ -2044,6 +2049,19 @@ output_function_prologue (file, size)
if (profile_flag)
ASM_GENERATE_INTERNAL_LABEL (hp_profile_label_name, "LP",
hp_profile_labelno);
if (insn_addresses)
{
unsigned int old_total = total_code_bytes;
total_code_bytes += insn_addresses[INSN_UID (get_last_insn())];
total_code_bytes += FUNCTION_BOUNDARY /BITS_PER_UNIT;
/* Be prepared to handle overflows. */
total_code_bytes = old_total > total_code_bytes ? -1 : total_code_bytes;
}
else
total_code_bytes = -1;
}
void
......@@ -3760,9 +3778,9 @@ output_movb (operands, insn, which_alternative, reverse_comparison)
RETURN_POINTER is the register which will hold the return address.
%r2 for most calls, %r31 for millicode calls.
When TARGET_LONG_CALLS is true, output_call is only called for
millicode calls. In addition, no delay slots are available when
TARGET_LONG_CALLS is true. */
When TARGET_MILLICODE_LONG_CALLS is true, then we have to assume
that two instruction sequences must be used to reach the millicode
routines (including dyncall!). */
char *
output_call (insn, call_dest, return_pointer)
......@@ -3775,21 +3793,124 @@ output_call (insn, call_dest, return_pointer)
rtx xoperands[4];
rtx seq_insn;
/* Handle common case -- empty delay slot or no jump in the delay slot. */
if (dbr_sequence_length () == 0
/* Handle long millicode calls for mod, div, and mul. */
if (TARGET_PORTABLE_RUNTIME
|| (TARGET_MILLICODE_LONG_CALLS && REGNO (return_pointer) == 31))
{
xoperands[0] = call_dest;
xoperands[1] = return_pointer;
output_asm_insn ("ldil L%%%0,%%r29", xoperands);
output_asm_insn ("ldo R%%%0(%%r29),%%r29", xoperands);
output_asm_insn ("blr 0,%r1\n\tbv,n 0(%%r29)\n\tnop", xoperands);
return "";
}
/* Handle common case -- empty delay slot or no jump in the delay slot,
and we're sure that the branch will reach the beginning of the $CODE$
subspace. */
if ((dbr_sequence_length () == 0
&& get_attr_length (insn) == 8)
|| (dbr_sequence_length () != 0
&& GET_CODE (NEXT_INSN (insn)) != JUMP_INSN))
&& GET_CODE (NEXT_INSN (insn)) != JUMP_INSN
&& get_attr_length (insn) == 4))
{
xoperands[0] = call_dest;
xoperands[1] = return_pointer;
if (TARGET_LONG_CALLS)
output_asm_insn ("bl %0,%r1%#", xoperands);
return "";
}
/* This call may not reach the beginning of the $CODE$ subspace. */
if (get_attr_length (insn) > 8)
{
int delay_insn_deleted = 0;
rtx xoperands[2];
rtx link;
/* We need to emit an inline long-call branch. Furthermore,
because we're changing a named function call into an indirect
function call well after the parameters have been set up, we
need to make sure any FP args appear in both the integer
and FP registers. Also, we need move any delay slot insn
out of the delay slot -- Yuk! */
if (dbr_sequence_length () != 0
&& GET_CODE (NEXT_INSN (insn)) != JUMP_INSN)
{
output_asm_insn ("ldil L%%%0,%%r29", xoperands);
output_asm_insn ("ldo R%%%0(%%r29),%%r29", xoperands);
output_asm_insn ("blr 0,%r1\n\tbv,n 0(%%r29)\n\tnop", xoperands);
/* A non-jump insn in the delay slot. By definition we can
emit this insn before the call (and in fact before argument
relocating. */
final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 0);
/* Now delete the delay insn. */
PUT_CODE (NEXT_INSN (insn), NOTE);
NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
delay_insn_deleted = 1;
}
/* Now copy any FP arguments into integer registers. */
for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
{
int arg_mode, regno;
rtx use = XEXP (link, 0);
if (! (GET_CODE (use) == USE
&& GET_CODE (XEXP (use, 0)) == REG
&& FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0)))))
continue;
arg_mode = GET_MODE (XEXP (use, 0));
regno = REGNO (XEXP (use, 0));
/* Is it a floating point register? */
if (regno >= 32 && regno <= 39)
{
/* Copy from the FP register into an integer register
(via memory). */
if (arg_mode == SFmode)
{
xoperands[0] = XEXP (use, 0);
xoperands[1] = gen_rtx (REG, SImode, 26 - (regno - 32) / 2);
output_asm_insn ("fstws %0,-16(%%sr0,%%r30)", xoperands);
output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
}
else
{
xoperands[0] = XEXP (use, 0);
xoperands[1] = gen_rtx (REG, DImode, 25 - (regno - 34) / 2);
output_asm_insn ("fstds %0,-16(%%sr0,%%r30)", xoperands);
output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands);
output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands);
}
}
}
/* Now emit the inline long-call. */
xoperands[0] = call_dest;
output_asm_insn ("ldil L%%%0,%%r22\n\tldo R%%%0(%%r22),%%r22", xoperands);
/* If TARGET_MILLICODE_LONG_CALLS, then we must use a long-call sequence
to call dyncall! */
if (TARGET_MILLICODE_LONG_CALLS)
{
output_asm_insn ("ldil L%%$$dyncall,%%r31", xoperands);
output_asm_insn ("ldo R%%$$dyncall(%%r31),%%r31", xoperands);
output_asm_insn ("blr 0,%%r2\n\tbv,n 0(%%r31)\n\tnop", xoperands);
}
else
output_asm_insn ("bl %0,%r1%#", xoperands);
output_asm_insn ("bl $$dyncall,%%r31\n\tcopy %%r31,%%r2", xoperands);
/* If we had a jump in the call's delay slot, output it now. */
if (dbr_sequence_length () != 0
&& !delay_insn_deleted)
{
xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
output_asm_insn ("b,n %0", xoperands);
/* Now delete the delay insn. */
PUT_CODE (NEXT_INSN (insn), NOTE);
NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
}
return "";
}
......
......@@ -28,6 +28,9 @@ enum cmp_type /* comparison type */
CMP_MAX /* max comparison type */
};
/* For long call handling. */
extern unsigned int total_code_bytes;
/* Print subsidiary information on the compiler version in use. */
#define TARGET_VERSION fprintf (stderr, " (hppa)");
......@@ -57,13 +60,18 @@ extern int target_flags;
/* Allow unconditional jumps in the delay slots of call instructions. */
#define TARGET_JUMP_IN_DELAY (target_flags & 8)
/* Force all function calls to indirect addressing via a register. This
avoids lossage when the function is very far away from the current PC.
/* In rare cases, a millicode call via "bl" can not be turned into
a millicode call using "ble" (when SHLIB_INFO subspace is very large).
This option forces just millicode calls to use inline long-calls
This is far more efficient than the old long-call option which forced
every function to be called indirectly (as is still the case for
TARGET_PORTABLE_RUNTIME).
??? What about simple jumps, they can suffer from the same problem.
Would require significant surgery in pa.md. */
#define TARGET_LONG_CALLS (target_flags & 16)
#define TARGET_MILLICODE_LONG_CALLS (target_flags & 16)
/* Disable indexed addressing modes. */
......@@ -73,7 +81,8 @@ extern int target_flags;
HP wants everyone to use for ELF objects. If at all possible you want
to avoid this since it's a performance loss for non-prototyped code.
Note TARGET_PORTABLE_RUNTIME also implies TARGET_LONG_CALLS. */
Note TARGET_PORTABLE_RUNTIME also forces all calls to use inline
long-call stubs which is quite expensive. */
#define TARGET_PORTABLE_RUNTIME (target_flags & 64)
......@@ -100,8 +109,8 @@ extern int target_flags;
{"no-fast-indirect-calls", -4},\
{"jump-in-delay", 8}, \
{"no-jump-in-delay", -8}, \
{"long-calls", 16}, \
{"no-long-calls", -16}, \
{"millicode-long-calls", 16},\
{"no-millicode-long-calls", -16},\
{"disable-indexing", 32}, \
{"no-disable-indexing", -32},\
{"portable-runtime", 64+16},\
......@@ -832,9 +841,7 @@ struct hppa_args {int words, nargs_prototype; };
The caller must make a distinction between calls to explicitly named
functions and calls through pointers to functions -- the conventions
are different! Calls through pointers to functions only use general
registers for the first four argument words. Note the indirect function
calling conventions are in effect during TARGET_LONG_CALLS, but
current_call_is_indirect will not be set in such situations.
registers for the first four argument words.
Of course all this is different for the portable runtime model
HP wants everyone to use for ELF. Ugh. Here's a quick description
......@@ -869,12 +876,12 @@ struct hppa_args {int words, nargs_prototype; };
|| !FLOAT_MODE_P (MODE) || (CUM).nargs_prototype > 0) \
? gen_rtx (REG, (MODE), \
(FUNCTION_ARG_SIZE ((MODE), (TYPE)) > 1 \
? (((!(current_call_is_indirect || TARGET_LONG_CALLS) \
? (((!current_call_is_indirect \
|| TARGET_PORTABLE_RUNTIME) \
&& (MODE) == DFmode) \
? ((CUM).words ? 38 : 34) \
: ((CUM).words ? 23 : 25)) \
: (((!(current_call_is_indirect || TARGET_LONG_CALLS) \
: (((!current_call_is_indirect \
|| TARGET_PORTABLE_RUNTIME) \
&& (MODE) == SFmode) \
? (32 + 2 * (CUM).words) \
......
......@@ -93,10 +93,11 @@
[(eq_attr "in_call_delay" "true") (nil) (nil)])
;; millicode call delay slot description. Note it disallows delay slot
;; when TARGET_LONG_CALLS is true.
;; when TARGET_PORTABLE_RUNTIME or TARGET_MILLICODE_LONG_CALLS is true.
(define_delay (eq_attr "type" "milli")
[(and (eq_attr "in_call_delay" "true")
(eq (symbol_ref "TARGET_LONG_CALLS") (const_int 0)))
(and (eq (symbol_ref "TARGET_PORTABLE_RUNTIME") (const_int 0))
(eq (symbol_ref "TARGET_MILLICODE_LONG_CALLS") (const_int 0))))
(nil) (nil)])
;; Unconditional branch, return and other similar instructions.
......@@ -2268,10 +2269,13 @@
""
"* return output_mul_insn (0, insn);"
[(set_attr "type" "milli")
(set (attr "length") (if_then_else (ne (symbol_ref "TARGET_LONG_CALLS")
(const_int 0))
(const_int 4)
(const_int 24)))])
(set (attr "length")
(if_then_else (and (eq (symbol_ref "TARGET_PORTABLE_RUNTIME")
(const_int 0))
(eq (symbol_ref "TARGET_MILLICODE_LONG_CALLS")
(const_int 0)))
(const_int 4)
(const_int 24)))])
;;; Division and mod.
(define_expand "divsi3"
......@@ -2318,10 +2322,13 @@
"*
return output_div_insn (operands, 0, insn);"
[(set_attr "type" "milli")
(set (attr "length") (if_then_else (ne (symbol_ref "TARGET_LONG_CALLS")
(const_int 0))
(const_int 4)
(const_int 24)))])
(set (attr "length")
(if_then_else (and (eq (symbol_ref "TARGET_PORTABLE_RUNTIME")
(const_int 0))
(eq (symbol_ref "TARGET_MILLICODE_LONG_CALLS")
(const_int 0)))
(const_int 4)
(const_int 24)))])
(define_expand "udivsi3"
[(set (reg:SI 26) (match_operand:SI 1 "move_operand" ""))
......@@ -2367,10 +2374,13 @@
"*
return output_div_insn (operands, 1, insn);"
[(set_attr "type" "milli")
(set (attr "length") (if_then_else (ne (symbol_ref "TARGET_LONG_CALLS")
(const_int 0))
(const_int 4)
(const_int 24)))])
(set (attr "length")
(if_then_else (and (eq (symbol_ref "TARGET_PORTABLE_RUNTIME")
(const_int 0))
(eq (symbol_ref "TARGET_MILLICODE_LONG_CALLS")
(const_int 0)))
(const_int 4)
(const_int 24)))])
(define_expand "modsi3"
[(set (reg:SI 26) (match_operand:SI 1 "move_operand" ""))
......@@ -2412,10 +2422,13 @@
"*
return output_mod_insn (0, insn);"
[(set_attr "type" "milli")
(set (attr "length") (if_then_else (ne (symbol_ref "TARGET_LONG_CALLS")
(const_int 0))
(const_int 4)
(const_int 24)))])
(set (attr "length")
(if_then_else (and (eq (symbol_ref "TARGET_PORTABLE_RUNTIME")
(const_int 0))
(eq (symbol_ref "TARGET_MILLICODE_LONG_CALLS")
(const_int 0)))
(const_int 4)
(const_int 24)))])
(define_expand "umodsi3"
[(set (reg:SI 26) (match_operand:SI 1 "move_operand" ""))
......@@ -2457,10 +2470,13 @@
"*
return output_mod_insn (1, insn);"
[(set_attr "type" "milli")
(set (attr "length") (if_then_else (ne (symbol_ref "TARGET_LONG_CALLS")
(const_int 0))
(const_int 4)
(const_int 24)))])
(set (attr "length")
(if_then_else (and (eq (symbol_ref "TARGET_PORTABLE_RUNTIME")
(const_int 0))
(eq (symbol_ref "TARGET_MILLICODE_LONG_CALLS")
(const_int 0)))
(const_int 4)
(const_int 24)))])
;;- and instructions
;; We define DImode `and` so with DImode `not` we can get
......@@ -3143,7 +3159,7 @@
rtx op;
rtx call_insn;
if (TARGET_LONG_CALLS)
if (TARGET_PORTABLE_RUNTIME)
op = force_reg (SImode, XEXP (operands[0], 0));
else
op = XEXP (operands[0], 0);
......@@ -3185,14 +3201,21 @@
(match_operand 1 "" "i"))
(clobber (reg:SI 2))
(use (const_int 0))]
"! TARGET_LONG_CALLS"
"! TARGET_PORTABLE_RUNTIME"
"*
{
output_arg_descriptor (insn);
return output_call (insn, operands[0], gen_rtx (REG, SImode, 2));
}"
[(set_attr "type" "call")
(set_attr "length" "4")])
(set (attr "length")
(if_then_else (lt (plus (symbol_ref "total_code_bytes") (pc))
(const_int 240000))
(const_int 4)
(if_then_else (ne (symbol_ref "TARGET_MILLICODE_LONG_CALLS")
(const_int 0))
(const_int 64)
(const_int 52))))])
(define_insn "call_internal_reg"
[(call (mem:SI (match_operand:SI 0 "register_operand" "r"))
......@@ -3206,16 +3229,19 @@
return \"blr 0,%%r2\;bv,n 0(%r0)\;ldo 4(%%r2),%%r2\";
/* Yuk! bl may not be able to reach $$dyncall. */
if (TARGET_LONG_CALLS)
if (TARGET_PORTABLE_RUNTIME || TARGET_MILLICODE_LONG_CALLS)
return \"copy %r0,%%r22\;ldil L%%$$dyncall,%%r31\;ldo R%%$$dyncall(%%r31),%%r31\;blr 0,%%r2\;bv,n 0(%%r31)\;nop\";
else
return \"copy %r0,%%r22\;.CALL\\tARGW0=GR\;bl $$dyncall,%%r31\;copy %%r31,%%r2\";
}"
[(set_attr "type" "dyncall")
(set (attr "length") (if_then_else (ne (symbol_ref "TARGET_LONG_CALLS")
(const_int 0))
(const_int 12)
(const_int 24)))])
(set (attr "length")
(if_then_else (and (ne (symbol_ref "TARGET_PORTABLE_RUNTIME")
(const_int 0))
(ne (symbol_ref "TARGET_MILLICODE_LONG_CALLS")
(const_int 0)))
(const_int 12)
(const_int 24)))])
(define_expand "call_value"
[(parallel [(set (match_operand 0 "" "")
......@@ -3228,7 +3254,7 @@
rtx op;
rtx call_insn;
if (TARGET_LONG_CALLS)
if (TARGET_PORTABLE_RUNTIME)
op = force_reg (SImode, XEXP (operands[1], 0));
else
op = XEXP (operands[1], 0);
......@@ -3275,14 +3301,21 @@
(clobber (reg:SI 2))
(use (const_int 0))]
;;- Don't use operand 1 for most machines.
"! TARGET_LONG_CALLS"
"! TARGET_PORTABLE_RUNTIME"
"*
{
output_arg_descriptor (insn);
return output_call (insn, operands[1], gen_rtx (REG, SImode, 2));
}"
[(set_attr "type" "call")
(set_attr "length" "4")])
(set (attr "length")
(if_then_else (lt (plus (symbol_ref "total_code_bytes") (pc))
(const_int 240000))
(const_int 4)
(if_then_else (ne (symbol_ref "TARGET_MILLICODE_LONG_CALLS")
(const_int 0))
(const_int 64)
(const_int 52))))])
(define_insn "call_value_internal_reg"
[(set (match_operand 0 "" "=rf")
......@@ -3297,16 +3330,19 @@
return \"blr 0,%%r2\;bv,n 0(%r1)\;ldo 4(%%r2),%%r2\";
/* Yuk! bl may not be able to reach $$dyncall. */
if (TARGET_LONG_CALLS)
if (TARGET_PORTABLE_RUNTIME || TARGET_MILLICODE_LONG_CALLS)
return \"copy %r1,%%r22\;ldil L%%$$dyncall,%%r31\;ldo R%%$$dyncall(%%r31),%%r31\;blr 0,%%r2\;bv,n 0(%%r31)\;nop\";
else
return \"copy %r1,%%r22\;.CALL\\tARGW0=GR\;bl $$dyncall,%%r31\;copy %%r31,%%r2\";
}"
[(set_attr "type" "dyncall")
(set (attr "length") (if_then_else (ne (symbol_ref "TARGET_LONG_CALLS")
(const_int 0))
(const_int 12)
(const_int 24)))])
(set (attr "length")
(if_then_else (and (ne (symbol_ref "TARGET_PORTABLE_RUNTIME")
(const_int 0))
(ne (symbol_ref "TARGET_MILLICODE_LONG_CALLS")
(const_int 0)))
(const_int 12)
(const_int 24)))])
;; Call subroutine returning any type.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment