Commit efcc2e30 by Claudiu Zissulescu Committed by Claudiu Zissulescu

[ARC] Revamp trampoline implementation

The new implementation attempts to clean up the existing trampoline
implementation for ARC making it to work for linux type of systems.

gcc/
2018-01-08  Claudiu Zissulescu  <claziss@synopsys.com>

        * config/arc/arc.c (TARGET_TRAMPOLINE_ADJUST_ADDRESS): Delete.
        (emit_store_direct): Likewise.
        (arc_trampoline_adjust_address): Likewise.
        (arc_asm_trampoline_template): New function.
        (arc_initialize_trampoline): Use asm_trampoline_template.
        (TARGET_ASM_TRAMPOLINE_TEMPLATE): Define.
        * config/arc/arc.h (TRAMPOLINE_SIZE): Adjust to 16.
        *config/arc/arc.md (flush_icache): Delete pattern.

From-SVN: r256334
parent 85e5629f
2018-01-08 Claudiu Zissulescu <claziss@synopsys.com> 2018-01-08 Claudiu Zissulescu <claziss@synopsys.com>
* config/arc/arc.c (TARGET_TRAMPOLINE_ADJUST_ADDRESS): Delete.
(emit_store_direct): Likewise.
(arc_trampoline_adjust_address): Likewise.
(arc_asm_trampoline_template): New function.
(arc_initialize_trampoline): Use asm_trampoline_template.
(TARGET_ASM_TRAMPOLINE_TEMPLATE): Define.
* config/arc/arc.h (TRAMPOLINE_SIZE): Adjust to 16.
* config/arc/arc.md (flush_icache): Delete pattern.
2018-01-08 Claudiu Zissulescu <claziss@synopsys.com>
* config/arc/arc-c.def (__ARC_UNALIGNED__): New define. * config/arc/arc-c.def (__ARC_UNALIGNED__): New define.
* config/arc/arc.h (STRICT_ALIGNMENT): Control this macro using * config/arc/arc.h (STRICT_ALIGNMENT): Control this macro using
munaligned-access. munaligned-access.
......
...@@ -548,8 +548,6 @@ static void arc_finalize_pic (void); ...@@ -548,8 +548,6 @@ static void arc_finalize_pic (void);
#define TARGET_TRAMPOLINE_INIT arc_initialize_trampoline #define TARGET_TRAMPOLINE_INIT arc_initialize_trampoline
#define TARGET_TRAMPOLINE_ADJUST_ADDRESS arc_trampoline_adjust_address
#define TARGET_CAN_ELIMINATE arc_can_eliminate #define TARGET_CAN_ELIMINATE arc_can_eliminate
#define TARGET_FRAME_POINTER_REQUIRED arc_frame_pointer_required #define TARGET_FRAME_POINTER_REQUIRED arc_frame_pointer_required
...@@ -3672,69 +3670,62 @@ output_shift (rtx *operands) ...@@ -3672,69 +3670,62 @@ output_shift (rtx *operands)
/* Nested function support. */ /* Nested function support. */
/* Directly store VALUE into memory object BLOCK at OFFSET. */ /* Output assembler code for a block containing the constant parts of
a trampoline, leaving space for variable parts. A trampoline looks
like this:
ld_s r12,[pcl,8]
ld r11,[pcl,12]
j_s [r12]
.word function's address
.word static chain value
*/
static void static void
emit_store_direct (rtx block, int offset, int value) arc_asm_trampoline_template (FILE *f)
{ {
emit_insn (gen_store_direct (adjust_address (block, SImode, offset), asm_fprintf (f, "\tld_s\t%s,[pcl,8]\n", ARC_TEMP_SCRATCH_REG);
force_reg (SImode, asm_fprintf (f, "\tld\t%s,[pcl,12]\n", reg_names[STATIC_CHAIN_REGNUM]);
gen_int_mode (value, SImode)))); asm_fprintf (f, "\tj_s\t[%s]\n", ARC_TEMP_SCRATCH_REG);
assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
} }
/* Emit RTL insns to initialize the variable parts of a trampoline. /* Emit RTL insns to initialize the variable parts of a trampoline.
FNADDR is an RTX for the address of the function's pure code. FNADDR is an RTX for the address of the function's pure code. CXT
CXT is an RTX for the static chain value for the function. */ is an RTX for the static chain value for the function.
/* With potentially multiple shared objects loaded, and multiple stacks
present for multiple thereds where trampolines might reside, a simple
range check will likely not suffice for the profiler to tell if a callee
is a trampoline. We a speedier check by making the trampoline start at
an address that is not 4-byte aligned.
A trampoline looks like this:
nop_s 0x78e0
entry:
ld_s r12,[pcl,12] 0xd403
ld r11,[pcl,12] 0x170c 700b
j_s [r12] 0x7c00
nop_s 0x78e0
The fastest trampoline to execute for trampolines within +-8KB of CTX The fastest trampoline to execute for trampolines within +-8KB of CTX
would be: would be:
add2 r11,pcl,s12 add2 r11,pcl,s12
j [limm] 0x20200f80 limm j [limm] 0x20200f80 limm
and that would also be faster to write to the stack by computing the offset
from CTX to TRAMP at compile time. However, it would really be better to and that would also be faster to write to the stack by computing
get rid of the high cost of cache invalidation when generating trampolines, the offset from CTX to TRAMP at compile time. However, it would
which requires that the code part of trampolines stays constant, and really be better to get rid of the high cost of cache invalidation
additionally either when generating trampolines, which requires that the code part of
- making sure that no executable code but trampolines is on the stack, trampolines stays constant, and additionally either making sure
no icache entries linger for the area of the stack from when before the that no executable code but trampolines is on the stack, no icache
stack was allocated, and allocating trampolines in trampoline-only entries linger for the area of the stack from when before the stack
cache lines was allocated, and allocating trampolines in trampoline-only cache
or lines or allocate trampolines fram a special pool of pre-allocated
- allocate trampolines fram a special pool of pre-allocated trampolines. */ trampolines. */
static void static void
arc_initialize_trampoline (rtx tramp, tree fndecl, rtx cxt) arc_initialize_trampoline (rtx tramp, tree fndecl, rtx cxt)
{ {
rtx fnaddr = XEXP (DECL_RTL (fndecl), 0); rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
emit_store_direct (tramp, 0, TARGET_BIG_ENDIAN ? 0x78e0d403 : 0xd40378e0); emit_block_move (tramp, assemble_trampoline_template (),
emit_store_direct (tramp, 4, TARGET_BIG_ENDIAN ? 0x170c700b : 0x700b170c); GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
emit_store_direct (tramp, 8, TARGET_BIG_ENDIAN ? 0x7c0078e0 : 0x78e07c00); emit_move_insn (adjust_address (tramp, SImode, 8), fnaddr);
emit_move_insn (adjust_address (tramp, SImode, 12), fnaddr); emit_move_insn (adjust_address (tramp, SImode, 12), cxt);
emit_move_insn (adjust_address (tramp, SImode, 16), cxt); emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
emit_insn (gen_flush_icache (adjust_address (tramp, SImode, 0))); LCT_NORMAL, VOIDmode, XEXP (tramp, 0), Pmode,
} plus_constant (Pmode, XEXP (tramp, 0), TRAMPOLINE_SIZE),
Pmode);
/* Allow the profiler to easily distinguish trampolines from normal
functions. */
static rtx
arc_trampoline_adjust_address (rtx addr)
{
return plus_constant (Pmode, addr, 2);
} }
/* This is set briefly to 1 when we output a ".as" address modifer, and then /* This is set briefly to 1 when we output a ".as" address modifer, and then
...@@ -10870,6 +10861,9 @@ arc_cannot_substitute_mem_equiv_p (rtx) ...@@ -10870,6 +10861,9 @@ arc_cannot_substitute_mem_equiv_p (rtx)
#undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
#define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P arc_cannot_substitute_mem_equiv_p #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P arc_cannot_substitute_mem_equiv_p
#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
#define TARGET_ASM_TRAMPOLINE_TEMPLATE arc_asm_trampoline_template
struct gcc_target targetm = TARGET_INITIALIZER; struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-arc.h" #include "gt-arc.h"
...@@ -829,7 +829,7 @@ extern int arc_initial_elimination_offset(int from, int to); ...@@ -829,7 +829,7 @@ extern int arc_initial_elimination_offset(int from, int to);
/* Trampolines. */ /* Trampolines. */
/* Length in units of the trampoline for entering a nested function. */ /* Length in units of the trampoline for entering a nested function. */
#define TRAMPOLINE_SIZE 20 #define TRAMPOLINE_SIZE 16
/* Alignment required for a trampoline in bits . */ /* Alignment required for a trampoline in bits . */
/* For actual data alignment we just need 32, no more than the stack; /* For actual data alignment we just need 32, no more than the stack;
......
...@@ -4325,15 +4325,6 @@ ...@@ -4325,15 +4325,6 @@
(set_attr "iscompact" "true") (set_attr "iscompact" "true")
(set_attr "length" "2")]) (set_attr "length" "2")])
;; Special pattern to flush the icache.
;; ??? Not sure what to do here. Some ARC's are known to support this.
(define_insn "flush_icache"
[(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")] 0)]
""
"* return \"\";"
[(set_attr "type" "misc")])
;; Split up troublesome insns for better scheduling. ;; Split up troublesome insns for better scheduling.
;; Peepholes go at the end. ;; Peepholes go at the end.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment