Commit 3914c31f by John David Anglin Committed by John David Anglin

pa.h (TRAMPOLINE_TEMPLATE): Shorten sequence when generating PA 2.0 code.

	* pa.h (TRAMPOLINE_TEMPLATE): Shorten sequence when generating PA
	2.0 code.
	(TRAMPOLINE_CODE_SIZE, MIN_CACHELINE_SIZE): New defines.
	(INITIALIZE_TRAMPOLINE): Rework to pass line length, and aligned start
	and end addresses to I and D cache instruction patterns.
	* pa.md (anddi3, iordi3): Change predicates of operands 1 and 2 to
	and_operand and ior_operand, respectively.  When generating 64-bit
	code, only one operand needs to be a register operand.
	(xordi3): Change predicates of operands 1 and 2 to register_operand.
	(one_cmpldi2): Change predicate of operand 1 to register_operand.
	(dcacheflush, icacheflush): Revise to flush an arbitrary number of
	cache lines.

From-SVN: r74888
parent 6ae533cf
2003-12-20 John David Anglin <dave.anglin@nrc-cnrc.gc.ca>
* pa.h (TRAMPOLINE_TEMPLATE): Shorten sequence when generating PA
2.0 code.
(TRAMPOLINE_CODE_SIZE, MIN_CACHELINE_SIZE): New defines.
(INITIALIZE_TRAMPOLINE): Rework to pass line length, and aligned start
and end addresses to I and D cache instruction patterns.
* pa.md (anddi3, iordi3): Change predicates of operands 1 and 2 to
and_operand and ior_operand, respectively. When generating 64-bit
code, only one operand needs to be a register operand.
(xordi3): Change predicates of operands 1 and 2 to register_operand.
(one_cmpldi2): Change predicate of operand 1 to register_operand.
(dcacheflush, icacheflush): Revise to flush an arbitrary number of
cache lines.
2003-12-20 Josef Zlomek <zlomekj@suse.cz>
PR optimization/13430, PR optimization/12322
......
......@@ -1003,10 +1003,20 @@ extern int may_call_alloca;
fputs ("\tdepwi 0,31,2,%r21\n", FILE); \
fputs ("\tldw 4(%r21),%r19\n", FILE); \
fputs ("\tldw 0(%r21),%r21\n", FILE); \
fputs ("\tldsid (%r21),%r1\n", FILE); \
fputs ("\tmtsp %r1,%sr0\n", FILE); \
fputs ("\tbe 0(%sr0,%r21)\n", FILE); \
fputs ("\tldw 40(%r22),%r29\n", FILE); \
if (TARGET_PA_20) \
{ \
fputs ("\tbve (%r21)\n", FILE); \
fputs ("\tldw 40(%r22),%r29\n", FILE); \
fputs ("\t.word 0\n", FILE); \
fputs ("\t.word 0\n", FILE); \
} \
else \
{ \
fputs ("\tldsid (%r21),%r1\n", FILE); \
fputs ("\tmtsp %r1,%sr0\n", FILE); \
fputs ("\tbe 0(%sr0,%r21)\n", FILE); \
fputs ("\tldw 40(%r22),%r29\n", FILE); \
} \
fputs ("\t.word 0\n", FILE); \
fputs ("\t.word 0\n", FILE); \
fputs ("\t.word 0\n", FILE); \
......@@ -1029,16 +1039,21 @@ extern int may_call_alloca;
} \
}
/* Length in units of the trampoline for entering a nested function.
/* Length in units of the trampoline for entering a nested function. */
#define TRAMPOLINE_SIZE (TARGET_64BIT ? 72 : 52)
Flush the cache entries corresponding to the first and last addresses
of the trampoline. This is necessary as the trampoline may cross two
cache lines.
/* Length in units of the trampoline instruction code. */
If the code part of the trampoline ever grows to > 32 bytes, then it
will become necessary to hack on the cacheflush pattern in pa.md. */
#define TRAMPOLINE_CODE_SIZE (TARGET_64BIT ? 24 : (TARGET_PA_20 ? 32 : 40))
#define TRAMPOLINE_SIZE (TARGET_64BIT ? 72 : 52)
/* Minimum length of a cache line. A length of 16 will work on all
PA-RISC processors. All PA 1.1 processors have a cache line of
32 bytes. Most but not all PA 2.0 processors have a cache line
of 64 bytes. As cache flushes are expensive and we don't support
PA 1.0, we use a minimum length of 32. */
#define MIN_CACHELINE_SIZE 32
/* Emit RTL insns to initialize the variable parts of a trampoline.
FNADDR is an RTX for the address of the function's pure code.
......@@ -1048,54 +1063,85 @@ extern int may_call_alloca;
Move the static chain value to trampoline template at offset 40.
Move the trampoline address to trampoline template at offset 44.
Move r19 to trampoline template at offset 48. The latter two
words create a plabel for the indirect call to the trampoline. */
words create a plabel for the indirect call to the trampoline.
A similar sequence is used for the 64-bit port but the plabel is
at the beginning of the trampoline.
Finally, the cache entries for the trampoline code are flushed.
This is necessary to ensure that the trampoline instruction sequence
is written to memory prior to any attempts at prefetching the code
sequence. */
#define INITIALIZE_TRAMPOLINE(TRAMP, FNADDR, CXT) \
{ \
rtx start_addr = gen_reg_rtx (Pmode); \
rtx end_addr = gen_reg_rtx (Pmode); \
rtx line_length = gen_reg_rtx (Pmode); \
rtx tmp; \
\
if (!TARGET_64BIT) \
{ \
rtx start_addr, end_addr; \
tmp = memory_address (Pmode, plus_constant ((TRAMP), 36)); \
emit_move_insn (gen_rtx_MEM (Pmode, tmp), (FNADDR)); \
tmp = memory_address (Pmode, plus_constant ((TRAMP), 40)); \
emit_move_insn (gen_rtx_MEM (Pmode, tmp), (CXT)); \
\
start_addr = memory_address (Pmode, plus_constant ((TRAMP), 36)); \
emit_move_insn (gen_rtx_MEM (Pmode, start_addr), (FNADDR)); \
start_addr = memory_address (Pmode, plus_constant ((TRAMP), 40)); \
emit_move_insn (gen_rtx_MEM (Pmode, start_addr), (CXT)); \
start_addr = memory_address (Pmode, plus_constant ((TRAMP), 44)); \
emit_move_insn (gen_rtx_MEM (Pmode, start_addr), (TRAMP)); \
start_addr = memory_address (Pmode, plus_constant ((TRAMP), 48)); \
emit_move_insn (gen_rtx_MEM (Pmode, start_addr), \
/* Create a fat pointer for the trampoline. */ \
tmp = memory_address (Pmode, plus_constant ((TRAMP), 44)); \
emit_move_insn (gen_rtx_MEM (Pmode, tmp), (TRAMP)); \
tmp = memory_address (Pmode, plus_constant ((TRAMP), 48)); \
emit_move_insn (gen_rtx_MEM (Pmode, tmp), \
gen_rtx_REG (Pmode, 19)); \
\
/* fdc and fic only use registers for the address to flush, \
they do not accept integer displacements. */ \
start_addr = force_reg (Pmode, (TRAMP)); \
end_addr = force_reg (Pmode, plus_constant ((TRAMP), 32)); \
emit_insn (gen_dcacheflush (start_addr, end_addr)); \
emit_insn (gen_icacheflush (start_addr, end_addr, start_addr, \
they do not accept integer displacements. We align the \
start and end addresses to the beginning of their respective \
cache lines to minimize the number of lines flushed. */ \
tmp = force_reg (Pmode, (TRAMP)); \
emit_insn (gen_andsi3 (start_addr, tmp, \
GEN_INT (-MIN_CACHELINE_SIZE))); \
tmp = force_reg (Pmode, \
plus_constant (tmp, TRAMPOLINE_CODE_SIZE - 1)); \
emit_insn (gen_andsi3 (end_addr, tmp, \
GEN_INT (-MIN_CACHELINE_SIZE))); \
emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE)); \
emit_insn (gen_dcacheflush (start_addr, end_addr, line_length)); \
emit_insn (gen_icacheflush (start_addr, end_addr, line_length, \
gen_reg_rtx (Pmode), \
gen_reg_rtx (Pmode), \
gen_reg_rtx (Pmode))); \
} \
else \
{ \
rtx start_addr, end_addr; \
tmp = memory_address (Pmode, plus_constant ((TRAMP), 56)); \
emit_move_insn (gen_rtx_MEM (Pmode, tmp), (FNADDR)); \
tmp = memory_address (Pmode, plus_constant ((TRAMP), 64)); \
emit_move_insn (gen_rtx_MEM (Pmode, tmp), (CXT)); \
\
start_addr = memory_address (Pmode, plus_constant ((TRAMP), 56)); \
emit_move_insn (gen_rtx_MEM (Pmode, start_addr), (FNADDR)); \
start_addr = memory_address (Pmode, plus_constant ((TRAMP), 64)); \
emit_move_insn (gen_rtx_MEM (Pmode, start_addr), (CXT)); \
/* Create a fat pointer for the trampoline. */ \
end_addr = force_reg (Pmode, plus_constant ((TRAMP), 32)); \
start_addr = memory_address (Pmode, plus_constant ((TRAMP), 16)); \
emit_move_insn (gen_rtx_MEM (Pmode, start_addr), end_addr); \
end_addr = gen_rtx_REG (Pmode, 27); \
start_addr = memory_address (Pmode, plus_constant ((TRAMP), 24)); \
emit_move_insn (gen_rtx_MEM (Pmode, start_addr), end_addr); \
tmp = memory_address (Pmode, plus_constant ((TRAMP), 16)); \
emit_move_insn (gen_rtx_MEM (Pmode, tmp), \
force_reg (Pmode, plus_constant ((TRAMP), 32))); \
tmp = memory_address (Pmode, plus_constant ((TRAMP), 24)); \
emit_move_insn (gen_rtx_MEM (Pmode, tmp), \
gen_rtx_REG (Pmode, 27)); \
\
/* fdc and fic only use registers for the address to flush, \
they do not accept integer displacements. PA 2.0 cache \
lines are 64 bytes. */ \
start_addr = force_reg (Pmode, (TRAMP)); \
end_addr = force_reg (Pmode, plus_constant ((TRAMP), 64)); \
emit_insn (gen_dcacheflush (start_addr, end_addr)); \
emit_insn (gen_icacheflush (start_addr, end_addr, start_addr, \
they do not accept integer displacements. We align the \
start and end addresses to the beginning of their respective \
cache lines to minimize the number of lines flushed. */ \
tmp = force_reg (Pmode, plus_constant ((TRAMP), 32)); \
emit_insn (gen_anddi3 (start_addr, tmp, \
GEN_INT (-MIN_CACHELINE_SIZE))); \
tmp = force_reg (Pmode, \
plus_constant (tmp, TRAMPOLINE_CODE_SIZE - 1)); \
emit_insn (gen_anddi3 (end_addr, tmp, \
GEN_INT (-MIN_CACHELINE_SIZE))); \
emit_move_insn (line_length, GEN_INT (MIN_CACHELINE_SIZE)); \
emit_insn (gen_dcacheflush (start_addr, end_addr, line_length)); \
emit_insn (gen_icacheflush (start_addr, end_addr, line_length, \
gen_reg_rtx (Pmode), \
gen_reg_rtx (Pmode), \
gen_reg_rtx (Pmode))); \
} \
......
......@@ -5322,15 +5322,25 @@
(define_expand "anddi3"
[(set (match_operand:DI 0 "register_operand" "")
(and:DI (match_operand:DI 1 "arith_double_operand" "")
(match_operand:DI 2 "arith_double_operand" "")))]
(and:DI (match_operand:DI 1 "and_operand" "")
(match_operand:DI 2 "and_operand" "")))]
""
"
{
if (! register_operand (operands[1], DImode)
|| ! register_operand (operands[2], DImode))
/* Let GCC break this into word-at-a-time operations. */
FAIL;
if (TARGET_64BIT)
{
/* One operand must be a register operand. */
if (!register_operand (operands[1], DImode)
&& !register_operand (operands[2], DImode))
FAIL;
}
else
{
/* Both operands must be register operands. */
if (!register_operand (operands[1], DImode)
|| !register_operand (operands[2], DImode))
FAIL;
}
}")
(define_insn ""
......@@ -5391,15 +5401,25 @@
(define_expand "iordi3"
[(set (match_operand:DI 0 "register_operand" "")
(ior:DI (match_operand:DI 1 "arith_double_operand" "")
(match_operand:DI 2 "arith_double_operand" "")))]
(ior:DI (match_operand:DI 1 "ior_operand" "")
(match_operand:DI 2 "ior_operand" "")))]
""
"
{
if (! register_operand (operands[1], DImode)
|| ! register_operand (operands[2], DImode))
/* Let GCC break this into word-at-a-time operations. */
FAIL;
if (TARGET_64BIT)
{
/* One operand must be a register operand. */
if (!register_operand (operands[1], DImode)
&& !register_operand (operands[2], DImode))
FAIL;
}
else
{
/* Both operands must be register operands. */
if (!register_operand (operands[1], DImode)
|| !register_operand (operands[2], DImode))
FAIL;
}
}")
(define_insn ""
......@@ -5462,15 +5482,11 @@
(define_expand "xordi3"
[(set (match_operand:DI 0 "register_operand" "")
(xor:DI (match_operand:DI 1 "arith_double_operand" "")
(match_operand:DI 2 "arith_double_operand" "")))]
(xor:DI (match_operand:DI 1 "register_operand" "")
(match_operand:DI 2 "register_operand" "")))]
""
"
{
if (! register_operand (operands[1], DImode)
|| ! register_operand (operands[2], DImode))
/* Let GCC break this into word-at-a-time operations. */
FAIL;
}")
(define_insn ""
......@@ -5532,12 +5548,10 @@
(define_expand "one_cmpldi2"
[(set (match_operand:DI 0 "register_operand" "")
(not:DI (match_operand:DI 1 "arith_double_operand" "")))]
(not:DI (match_operand:DI 1 "register_operand" "")))]
""
"
{
if (! register_operand (operands[1], DImode))
FAIL;
}")
(define_insn ""
......@@ -8828,29 +8842,59 @@ add,l %2,%3,%3\;bv,n %%r0(%3)"
return \"\";
}")
;; Flush the I and D cache line found at the address in operand 0.
;; Flush the I and D cache lines from the start address (operand0)
;; to the end address (operand1). No lines are flushed if the end
;; address is less than the start address (unsigned).
;;
;; Because the range of memory flushed is variable and the size of
;; a MEM can only be a CONST_INT, the patterns specify that they
;; perform an unspecified volatile operation on all memory.
;;
;; The address range for an icache flush must lie within a single
;; space on targets with non-equivalent space registers.
;;
;; This is used by the trampoline code for nested functions.
;; So long as the trampoline itself is less than 32 bytes this
;; is sufficient.
;;
;; Operand 0 contains the start address.
;; Operand 1 contains the end address.
;; Operand 2 contains the line length to use.
;; Operand 3 contains the start address (clobbered).
;; Operands 4 and 5 (icacheflush) are clobbered scratch registers.
(define_insn "dcacheflush"
[(unspec_volatile [(const_int 1)] 0)
(use (mem:SI (match_operand 0 "pmode_register_operand" "r")))
(use (mem:SI (match_operand 1 "pmode_register_operand" "r")))]
[(const_int 1)
(unspec_volatile [(mem:BLK (scratch))] 0)
(use (match_operand 0 "pmode_register_operand" "r"))
(use (match_operand 1 "pmode_register_operand" "r"))
(use (match_operand 2 "pmode_register_operand" "r"))
(clobber (match_scratch 3 "=&0"))]
""
"fdc 0(%0)\;fdc 0(%1)\;sync"
"*
{
if (TARGET_64BIT)
return \"cmpb,*<<=,n %3,%1,.\;fdc,m %2(%3)\;sync\";
else
return \"cmpb,<<=,n %3,%1,.\;fdc,m %2(%3)\;sync\";
}"
[(set_attr "type" "multi")
(set_attr "length" "12")])
(define_insn "icacheflush"
[(unspec_volatile [(const_int 2)] 0)
(use (mem:SI (match_operand 0 "pmode_register_operand" "r")))
(use (mem:SI (match_operand 1 "pmode_register_operand" "r")))
[(const_int 2)
(unspec_volatile [(mem:BLK (scratch))] 0)
(use (match_operand 0 "pmode_register_operand" "r"))
(use (match_operand 1 "pmode_register_operand" "r"))
(use (match_operand 2 "pmode_register_operand" "r"))
(clobber (match_operand 3 "pmode_register_operand" "=&r"))
(clobber (match_operand 4 "pmode_register_operand" "=&r"))]
(clobber (match_scratch 3 "=&0"))
(clobber (match_operand 4 "pmode_register_operand" "=&r"))
(clobber (match_operand 5 "pmode_register_operand" "=&r"))]
""
"mfsp %%sr0,%4\;ldsid (%2),%3\;mtsp %3,%%sr0\;fic 0(%%sr0,%0)\;fic 0(%%sr0,%1)\;sync\;mtsp %4,%%sr0\;nop\;nop\;nop\;nop\;nop\;nop"
"*
{
if (TARGET_64BIT)
return \"mfsp %%sr0,%5\;ldsid (%3),%4\;mtsp %4,%%sr0\;cmpb,*<<=,n %3,%1,.\;fic,m %2(%%sr0,%3)\;sync\;mtsp %5,%%sr0\;nop\;nop\;nop\;nop\;nop\;nop\";
else
return \"mfsp %%sr0,%5\;ldsid (%3),%4\;mtsp %4,%%sr0\;cmpb,<<=,n %3,%1,.\;fic,m %2(%%sr0,%3)\;sync\;mtsp %5,%%sr0\;nop\;nop\;nop\;nop\;nop\;nop\";
}"
[(set_attr "type" "multi")
(set_attr "length" "52")])
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment