Commit 9129c8cf by Oleg Endo

crt1.S: Remove SH5 support.

libgcc/
	* config/sh/crt1.S: Remove SH5 support.
	* config/sh/crti.S: Likewise.
	* config/sh/crtn.S: Likewise.
	* config/sh/lib1funcs-4-300.S: Likewise.
	* config/sh/lib1funcs-Os-4-200.S: Likewise.
	* config/sh/lib1funcs.S: Likewise.
	* config/sh/linux-unwind.h: Likewise.
	* config/sh/t-sh64: Delete.

From-SVN: r235640
parent cca535a4
2016-04-29 Oleg Endo <olegendo@gcc.gnu.org>
* config/sh/crt1.S: Remove SH5 support.
* config/sh/crti.S: Likewise.
* config/sh/crtn.S: Likewise.
* config/sh/lib1funcs-4-300.S: Likewise.
* config/sh/lib1funcs-Os-4-200.S: Likewise.
* config/sh/lib1funcs.S: Likewise.
* config/sh/linux-unwind.h: Likewise.
* config/sh/t-sh64: Delete.
2016-04-29 Claudiu Zissulescu <claziss@synopsys.com>
* config/arc/ieee-754/eqdf2.S: Handle FPX NaN.
......
......@@ -38,648 +38,6 @@ __timer_stack:
/* ;----------------------------------------
Normal newlib crt1.S */
#ifdef __SH5__
.section .data,"aw"
.global ___data
___data:
.section .rodata,"a"
.global ___rodata
___rodata:
#define ICCR_BASE 0x01600000
#define OCCR_BASE 0x01e00000
#define MMUIR_BASE 0x00000000
#define MMUDR_BASE 0x00800000
#define PTE_ENABLED 1
#define PTE_DISABLED 0
#define PTE_SHARED (1 << 1)
#define PTE_NOT_SHARED 0
#define PTE_CB_UNCACHEABLE 0
#define PTE_CB_DEVICE 1
#define PTE_CB_CACHEABLE_WB 2
#define PTE_CB_CACHEABLE_WT 3
#define PTE_SZ_4KB (0 << 3)
#define PTE_SZ_64KB (1 << 3)
#define PTE_SZ_1MB (2 << 3)
#define PTE_SZ_512MB (3 << 3)
#define PTE_PRR (1 << 6)
#define PTE_PRX (1 << 7)
#define PTE_PRW (1 << 8)
#define PTE_PRU (1 << 9)
#define SR_MMU_BIT 31
#define SR_BL_BIT 28
#define ALIGN_4KB (0xfff)
#define ALIGN_1MB (0xfffff)
#define ALIGN_512MB (0x1fffffff)
#define DYNACON_BASE 0x0f000000
#define DM_CB_DLINK_BASE 0x0c000000
#define DM_DB_DLINK_BASE 0x0b000000
#define FEMI_AREA_0 0x00000000
#define FEMI_AREA_1 0x04000000
#define FEMI_AREA_2 0x05000000
#define FEMI_AREA_3 0x06000000
#define FEMI_AREA_4 0x07000000
#define FEMI_CB 0x08000000
#define EMI_BASE 0X80000000
#define DMA_BASE 0X0e000000
#define CPU_BASE 0X0d000000
#define PERIPH_BASE 0X09000000
#define DMAC_BASE 0x0e000000
#define INTC_BASE 0x0a000000
#define CPRC_BASE 0x0a010000
#define TMU_BASE 0x0a020000
#define SCIF_BASE 0x0a030000
#define RTC_BASE 0x0a040000
#define LOAD_CONST32(val, reg) \
movi ((val) >> 16) & 65535, reg; \
shori (val) & 65535, reg
#define LOAD_PTEH_VAL(sym, align, bits, scratch_reg, reg) \
LOAD_ADDR (sym, reg); \
LOAD_CONST32 ((align), scratch_reg); \
andc reg, scratch_reg, reg; \
LOAD_CONST32 ((bits), scratch_reg); \
or reg, scratch_reg, reg
#define LOAD_PTEL_VAL(sym, align, bits, scratch_reg, reg) \
LOAD_ADDR (sym, reg); \
LOAD_CONST32 ((align), scratch_reg); \
andc reg, scratch_reg, reg; \
LOAD_CONST32 ((bits), scratch_reg); \
or reg, scratch_reg, reg
#define SET_PTE(pte_addr_reg, pteh_val_reg, ptel_val_reg) \
putcfg pte_addr_reg, 0, r63; \
putcfg pte_addr_reg, 1, ptel_val_reg; \
putcfg pte_addr_reg, 0, pteh_val_reg
#if __SH5__ == 64
.section .text,"ax"
#define LOAD_ADDR(sym, reg) \
movi (sym >> 48) & 65535, reg; \
shori (sym >> 32) & 65535, reg; \
shori (sym >> 16) & 65535, reg; \
shori sym & 65535, reg
#else
.mode SHmedia
.section .text..SHmedia32,"ax"
#define LOAD_ADDR(sym, reg) \
movi (sym >> 16) & 65535, reg; \
shori sym & 65535, reg
#endif
.global start
start:
LOAD_ADDR (_stack, r15)
#ifdef MMU_SUPPORT
! Set up the VM using the MMU and caches
! .vm_ep is first instruction to execute
! after VM initialization
pt/l .vm_ep, tr1
! Configure instruction cache (ICCR)
movi 3, r2
movi 0, r3
LOAD_ADDR (ICCR_BASE, r1)
putcfg r1, 0, r2
putcfg r1, 1, r3
! movi 7, r2 ! write through
! Configure operand cache (OCCR)
LOAD_ADDR (OCCR_BASE, r1)
putcfg r1, 0, r2
putcfg r1, 1, r3
! Disable all PTE translations
LOAD_ADDR (MMUIR_BASE, r1)
LOAD_ADDR (MMUDR_BASE, r2)
movi 64, r3
pt/l .disable_ptes_loop, tr0
.disable_ptes_loop:
putcfg r1, 0, r63
putcfg r2, 0, r63
addi r1, 16, r1
addi r2, 16, r2
addi r3, -1, r3
bgt r3, r63, tr0
LOAD_ADDR (MMUIR_BASE, r1)
! FEMI instruction mappings
! Area 0 - 1Mb cacheable at 0x00000000
! Area 1 - None
! Area 2 - 1Mb cacheable at 0x05000000
! - 1Mb cacheable at 0x05100000
! Area 3 - None
! Area 4 - None
! Map a 1Mb page for instructions at 0x00000000
LOAD_PTEH_VAL (FEMI_AREA_0, ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
LOAD_PTEL_VAL (FEMI_AREA_0, ALIGN_1MB, PTE_CB_CACHEABLE_WB | PTE_SZ_1MB | PTE_PRX | PTE_PRU, r25, r3)
SET_PTE (r1, r2, r3)
! Map a 1Mb page for instructions at 0x05000000
addi r1, 16, r1
LOAD_PTEH_VAL (FEMI_AREA_2, ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
LOAD_PTEL_VAL (FEMI_AREA_2, ALIGN_1MB, PTE_CB_CACHEABLE_WB | PTE_SZ_1MB | PTE_PRX | PTE_PRU, r25, r3)
SET_PTE (r1, r2, r3)
! Map a 1Mb page for instructions at 0x05100000
addi r1, 16, r1
LOAD_PTEH_VAL ((FEMI_AREA_2+0x100000), ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
LOAD_PTEL_VAL ((FEMI_AREA_2+0x100000), ALIGN_1MB, PTE_CB_CACHEABLE_WB | PTE_SZ_1MB | PTE_PRX | PTE_PRU, r25, r3)
SET_PTE (r1, r2, r3)
! Map a 512M page for instructions at EMI base
addi r1, 16, r1
LOAD_PTEH_VAL (EMI_BASE, ALIGN_512MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
LOAD_PTEL_VAL (EMI_BASE, ALIGN_512MB, PTE_CB_CACHEABLE_WB | PTE_SZ_512MB | PTE_PRX | PTE_PRU, r25, r3)
SET_PTE (r1, r2, r3)
! Map a 4K page for instructions at DM_DB_DLINK_BASE
addi r1, 16, r1
LOAD_PTEH_VAL (DM_DB_DLINK_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
LOAD_PTEL_VAL (DM_DB_DLINK_BASE, ALIGN_4KB, PTE_CB_CACHEABLE_WB | PTE_SZ_4KB | PTE_PRX | PTE_PRU, r25, r3)
SET_PTE (r1, r2, r3)
LOAD_ADDR (MMUDR_BASE, r1)
! FEMI data mappings
! Area 0 - 1Mb cacheable at 0x00000000
! Area 1 - 1Mb device at 0x04000000
! Area 2 - 1Mb cacheable at 0x05000000
! - 1Mb cacheable at 0x05100000
! Area 3 - None
! Area 4 - None
! CB - 1Mb device at 0x08000000
! Map a 1Mb page for data at 0x00000000
LOAD_PTEH_VAL (FEMI_AREA_0, ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
LOAD_PTEL_VAL (FEMI_AREA_0, ALIGN_1MB, PTE_CB_CACHEABLE_WB | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
SET_PTE (r1, r2, r3)
! Map a 1Mb page for data at 0x04000000
addi r1, 16, r1
LOAD_PTEH_VAL (FEMI_AREA_1, ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
LOAD_PTEL_VAL (FEMI_AREA_1, ALIGN_1MB, PTE_CB_DEVICE | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
SET_PTE (r1, r2, r3)
! Map a 1Mb page for data at 0x05000000
addi r1, 16, r1
LOAD_PTEH_VAL (FEMI_AREA_2, ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
LOAD_PTEL_VAL (FEMI_AREA_2, ALIGN_1MB, PTE_CB_CACHEABLE_WB | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
SET_PTE (r1, r2, r3)
! Map a 1Mb page for data at 0x05100000
addi r1, 16, r1
LOAD_PTEH_VAL ((FEMI_AREA_2+0x100000), ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
LOAD_PTEL_VAL ((FEMI_AREA_2+0x100000), ALIGN_1MB, PTE_CB_CACHEABLE_WB | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
SET_PTE (r1, r2, r3)
! Map a 4K page for registers at 0x08000000
addi r1, 16, r1
LOAD_PTEH_VAL (FEMI_CB, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
LOAD_PTEL_VAL (FEMI_CB, ALIGN_4KB, PTE_CB_DEVICE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
SET_PTE (r1, r2, r3)
! Map a 512M page for data at EMI
addi r1, 16, r1
LOAD_PTEH_VAL (EMI_BASE, ALIGN_512MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
LOAD_PTEL_VAL (EMI_BASE, ALIGN_512MB, PTE_CB_CACHEABLE_WB | PTE_SZ_512MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
SET_PTE (r1, r2, r3)
! Map a 4K page for DYNACON at DYNACON_BASE
addi r1, 16, r1
LOAD_PTEH_VAL (DYNACON_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
LOAD_PTEL_VAL (DYNACON_BASE, ALIGN_4KB, PTE_CB_DEVICE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
SET_PTE (r1, r2, r3)
! Map a 4K page for instructions at DM_DB_DLINK_BASE
addi r1, 16, r1
LOAD_PTEH_VAL (DM_DB_DLINK_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
LOAD_PTEL_VAL (DM_DB_DLINK_BASE, ALIGN_4KB, PTE_CB_CACHEABLE_WB | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
SET_PTE (r1, r2, r3)
! Map a 4K page for data at DM_DB_DLINK_BASE+0x1000
addi r1, 16, r1
LOAD_PTEH_VAL ((DM_DB_DLINK_BASE+0x1000), ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
LOAD_PTEL_VAL ((DM_DB_DLINK_BASE+0x1000), ALIGN_4KB, PTE_CB_UNCACHEABLE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
SET_PTE (r1, r2, r3)
! Map a 4K page for stack DM_DB_DLINK_BASE+0x2000
addi r1, 16, r1
LOAD_PTEH_VAL ((DM_DB_DLINK_BASE+0x2000), ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
LOAD_PTEL_VAL ((DM_DB_DLINK_BASE+0x2000), ALIGN_4KB, PTE_CB_CACHEABLE_WB | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
SET_PTE (r1, r2, r3)
! Map a 1M page for DM_CB_BASE2 at DM_CB_DLINK
! 0x0c000000 - 0x0c0fffff
addi r1, 16, r1
LOAD_PTEH_VAL (DM_CB_DLINK_BASE, ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
LOAD_PTEL_VAL (DM_CB_DLINK_BASE, ALIGN_1MB, PTE_CB_DEVICE | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
SET_PTE (r1, r2, r3)
! Map a 1M page for DM_CB_BASE2 at DM_CB_DLINK
! 0x0c100000 - 0x0c1fffff
addi r1, 16, r1
LOAD_PTEH_VAL ((DM_CB_DLINK_BASE+0x100000), ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
LOAD_PTEL_VAL ((DM_CB_DLINK_BASE+0x100000), ALIGN_1MB, PTE_CB_DEVICE | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
SET_PTE (r1, r2, r3)
! Map a 1M page for DM_CB_BASE2 at DM_CB_DLINK
! 0x0c200000 - 0x0c2fffff
addi r1, 16, r1
LOAD_PTEH_VAL ((DM_CB_DLINK_BASE+0x200000), ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
LOAD_PTEL_VAL ((DM_CB_DLINK_BASE+0x200000), ALIGN_1MB, PTE_CB_DEVICE | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
SET_PTE (r1, r2, r3)
! Map a 1M page for DM_CB_BASE2 at DM_CB_DLINK
! 0x0c400000 - 0x0c4fffff
addi r1, 16, r1
LOAD_PTEH_VAL ((DM_CB_DLINK_BASE+0x400000), ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
LOAD_PTEL_VAL ((DM_CB_DLINK_BASE+0x400000), ALIGN_1MB, PTE_CB_DEVICE | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
SET_PTE (r1, r2, r3)
! Map a 1M page for DM_CB_BASE2 at DM_CB_DLINK
! 0x0c800000 - 0x0c8fffff
addi r1, 16, r1
LOAD_PTEH_VAL ((DM_CB_DLINK_BASE+0x800000), ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
LOAD_PTEL_VAL ((DM_CB_DLINK_BASE+0x800000), ALIGN_1MB, PTE_CB_DEVICE | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
SET_PTE (r1, r2, r3)
! Map a 4K page for DMA control registers
addi r1, 16, r1
LOAD_PTEH_VAL (DMA_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
LOAD_PTEL_VAL (DMA_BASE, ALIGN_4KB, PTE_CB_DEVICE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
SET_PTE (r1, r2, r3)
! Map lots of 4K pages for peripherals
! /* peripheral */
addi r1, 16, r1
LOAD_PTEH_VAL (PERIPH_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
LOAD_PTEL_VAL (PERIPH_BASE, ALIGN_4KB, PTE_CB_DEVICE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
SET_PTE (r1, r2, r3)
! /* dmac */
addi r1, 16, r1
LOAD_PTEH_VAL (DMAC_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
LOAD_PTEL_VAL (DMAC_BASE, ALIGN_4KB, PTE_CB_DEVICE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
SET_PTE (r1, r2, r3)
! /* intc */
addi r1, 16, r1
LOAD_PTEH_VAL (INTC_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
LOAD_PTEL_VAL (INTC_BASE, ALIGN_4KB, PTE_CB_DEVICE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
SET_PTE (r1, r2, r3)
! /* rtc */
addi r1, 16, r1
LOAD_PTEH_VAL (RTC_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
LOAD_PTEL_VAL (RTC_BASE, ALIGN_4KB, PTE_CB_DEVICE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
SET_PTE (r1, r2, r3)
! /* dmac */
addi r1, 16, r1
LOAD_PTEH_VAL (TMU_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
LOAD_PTEL_VAL (TMU_BASE, ALIGN_4KB, PTE_CB_DEVICE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
SET_PTE (r1, r2, r3)
! /* scif */
addi r1, 16, r1
LOAD_PTEH_VAL (SCIF_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
LOAD_PTEL_VAL (SCIF_BASE, ALIGN_4KB, PTE_CB_DEVICE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
SET_PTE (r1, r2, r3)
! /* cprc */
addi r1, 16, r1
LOAD_PTEH_VAL (CPRC_BASE, ALIGN_4KB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
LOAD_PTEL_VAL (CPRC_BASE, ALIGN_4KB, PTE_CB_DEVICE | PTE_SZ_4KB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
SET_PTE (r1, r2, r3)
! Map CPU WPC registers
addi r1, 16, r1
LOAD_PTEH_VAL (CPU_BASE, ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
LOAD_PTEL_VAL (CPU_BASE, ALIGN_1MB, PTE_CB_DEVICE | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
SET_PTE (r1, r2, r3)
addi r1, 16, r1
LOAD_PTEH_VAL ((CPU_BASE+0x100000), ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
LOAD_PTEL_VAL ((CPU_BASE+0x100000), ALIGN_1MB, PTE_CB_DEVICE | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
SET_PTE (r1, r2, r3)
addi r1, 16, r1
LOAD_PTEH_VAL ((CPU_BASE+0x200000), ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
LOAD_PTEL_VAL ((CPU_BASE+0x200000), ALIGN_1MB, PTE_CB_DEVICE | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
SET_PTE (r1, r2, r3)
addi r1, 16, r1
LOAD_PTEH_VAL ((CPU_BASE+0x400000), ALIGN_1MB, PTE_ENABLED | PTE_NOT_SHARED, r25, r2)
LOAD_PTEL_VAL ((CPU_BASE+0x400000), ALIGN_1MB, PTE_CB_DEVICE | PTE_SZ_1MB | PTE_PRR | PTE_PRW | PTE_PRU, r25, r3)
SET_PTE (r1, r2, r3)
! Switch over to virtual addressing and enabled cache
getcon sr, r1
movi 1, r2
shlli r2, SR_BL_BIT, r2
or r1, r2, r1
putcon r1, ssr
getcon sr, r1
movi 1, r2
shlli r2, SR_MMU_BIT, r2
or r1, r2, r1
putcon r1, ssr
gettr tr1, r1
putcon r1, spc
synco
rte
! VM entry point. From now on, we are in VM mode.
.vm_ep:
! Install the trap handler, by seeding vbr with the
! correct value, and by assigning sr.bl = 0.
LOAD_ADDR (vbr_start, r1)
putcon r1, vbr
movi ~(1<<28), r1
getcon sr, r2
and r1, r2, r2
putcon r2, sr
#endif /* MMU_SUPPORT */
pt/l .Lzero_bss_loop, tr0
pt/l GLOBAL(_init), tr5
pt/l ___setup_argv_and_call_main, tr6
pt/l _exit, tr7
! zero out bss
LOAD_ADDR (_edata, r0)
LOAD_ADDR (_end, r1)
.Lzero_bss_loop:
stx.q r0, r63, r63
addi r0, 8, r0
bgt/l r1, r0, tr0
LOAD_ADDR (___data, r26)
LOAD_ADDR (___rodata, r27)
#ifdef __SH_FPU_ANY__
getcon sr, r0
! enable the FP unit, by resetting SR.FD
! also zero out SR.FR, SR.SZ and SR.PR, as mandated by the ABI
movi 0, r1
shori 0xf000, r1
andc r0, r1, r0
putcon r0, sr
#if __SH5__ == 32
pt/l ___set_fpscr, tr0
movi 0, r4
blink tr0, r18
#endif
#endif
! arrange for exit to call fini
pt/l _atexit, tr1
LOAD_ADDR (GLOBAL(_fini), r2)
blink tr1, r18
! call init
blink tr5, r18
! call the mainline
blink tr6, r18
! call exit
blink tr7, r18
! We should never return from _exit but in case we do we would enter the
! the following tight loop. This avoids executing any data that might follow.
limbo:
pt/l limbo, tr0
blink tr0, r63
#ifdef MMU_SUPPORT
! All these traps are handled in the same place.
.balign 256
vbr_start:
pt/l handler, tr0 ! tr0 trashed.
blink tr0, r63
.balign 256
vbr_100:
pt/l handler, tr0 ! tr0 trashed.
blink tr0, r63
vbr_100_end:
.balign 256
vbr_200:
pt/l handler, tr0 ! tr0 trashed.
blink tr0, r63
.balign 256
vbr_300:
pt/l handler, tr0 ! tr0 trashed.
blink tr0, r63
.balign 256
vbr_400: ! Should be at vbr+0x400
handler:
/* If the trap handler is there call it */
LOAD_ADDR (__superh_trap_handler, r2)
pta chandler,tr2
beq r2, r63, tr2 /* If zero, ie not present branch around to chandler */
/* Now call the trap handler with as much of the context unchanged as possible.
Move trapping address into R18 to make it look like the trap point */
getcon spc, r18
pt/l __superh_trap_handler, tr0
blink tr0, r7
chandler:
getcon spc, r62
getcon expevt, r2
pt/l _exit, tr0
blink tr0, r63
/* Simulated trap handler */
.section .text..SHmedia32,"ax"
gcc2_compiled.:
.section .debug_abbrev
.Ldebug_abbrev0:
.section .text..SHmedia32
.Ltext0:
.section .debug_info
.Ldebug_info0:
.section .debug_line
.Ldebug_line0:
.section .text..SHmedia32,"ax"
.align 5
.global __superh_trap_handler
.type __superh_trap_handler,@function
__superh_trap_handler:
.LFB1:
ptabs r18, tr0
addi.l r15, -8, r15
st.l r15, 4, r14
addi.l r15, -8, r15
add.l r15, r63, r14
st.l r14, 0, r2
ptabs r7, tr0
addi.l r14, 8, r14
add.l r14, r63, r15
ld.l r15, 4, r14
addi.l r15, 8, r15
blink tr0, r63
.LFE1:
.Lfe1:
.size __superh_trap_handler,.Lfe1-__superh_trap_handler
.section .text..SHmedia32
.Letext0:
.section .debug_info
.ualong 0xa7
.uaword 0x2
.ualong .Ldebug_abbrev0
.byte 0x4
.byte 0x1
.ualong .Ldebug_line0
.ualong .Letext0
.ualong .Ltext0
.string "trap_handler.c"
.string "xxxxxxxxxxxxxxxxxxxxxxxxxxxx"
.string "GNU C 2.97-sh5-010522"
.byte 0x1
.byte 0x2
.ualong 0x9a
.byte 0x1
.string "_superh_trap_handler"
.byte 0x1
.byte 0x2
.byte 0x1
.ualong .LFB1
.ualong .LFE1
.byte 0x1
.byte 0x5e
.byte 0x3
.string "trap_reason"
.byte 0x1
.byte 0x1
.ualong 0x9a
.byte 0x2
.byte 0x91
.byte 0x0
.byte 0x0
.byte 0x4
.string "unsigned int"
.byte 0x4
.byte 0x7
.byte 0x0
.section .debug_abbrev
.byte 0x1
.byte 0x11
.byte 0x1
.byte 0x10
.byte 0x6
.byte 0x12
.byte 0x1
.byte 0x11
.byte 0x1
.byte 0x3
.byte 0x8
.byte 0x1b
.byte 0x8
.byte 0x25
.byte 0x8
.byte 0x13
.byte 0xb
.byte 0,0
.byte 0x2
.byte 0x2e
.byte 0x1
.byte 0x1
.byte 0x13
.byte 0x3f
.byte 0xc
.byte 0x3
.byte 0x8
.byte 0x3a
.byte 0xb
.byte 0x3b
.byte 0xb
.byte 0x27
.byte 0xc
.byte 0x11
.byte 0x1
.byte 0x12
.byte 0x1
.byte 0x40
.byte 0xa
.byte 0,0
.byte 0x3
.byte 0x5
.byte 0x0
.byte 0x3
.byte 0x8
.byte 0x3a
.byte 0xb
.byte 0x3b
.byte 0xb
.byte 0x49
.byte 0x13
.byte 0x2
.byte 0xa
.byte 0,0
.byte 0x4
.byte 0x24
.byte 0x0
.byte 0x3
.byte 0x8
.byte 0xb
.byte 0xb
.byte 0x3e
.byte 0xb
.byte 0,0
.byte 0
.section .debug_pubnames
.ualong 0x27
.uaword 0x2
.ualong .Ldebug_info0
.ualong 0xab
.ualong 0x5b
.string "_superh_trap_handler"
.ualong 0x0
.section .debug_aranges
.ualong 0x1c
.uaword 0x2
.ualong .Ldebug_info0
.byte 0x4
.byte 0x0
.uaword 0x0,0
.ualong .Ltext0
.ualong .Letext0-.Ltext0
.ualong 0x0
.ualong 0x0
.ident "GCC: (GNU) 2.97-sh5-010522"
#endif /* MMU_SUPPORT */
#else /* ! __SH5__ */
! make a place to keep any previous value of the vbr register
! this will only have a value if it has been set by redboot (for example)
.section .bss
......@@ -1364,4 +722,3 @@ __superh_trap_handler:
.ualong 0x0
.ualong 0x0
#endif /* VBR_SETUP */
#endif /* ! __SH5__ */
......@@ -47,19 +47,6 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#endif
.global GLOBAL(_init)
GLOBAL(_init):
#if __SHMEDIA__
addi r15, -16, r15
st.q r15, 8, r14
st.q r15, 0, r18
add r15, r63, r14
#elif __SH5__ && ! __SHMEDIA__
mov r15,r0
add #-8,r15
mov.l r14,@-r0
sts.l pr,@-r0
mov r15,r14
nop
#else
#ifdef __ELF__
mov.l r12,@-r15
mova 0f,r0
......@@ -78,7 +65,6 @@ GLOBAL(_init):
0: .long _GLOBAL_OFFSET_TABLE_
1:
#endif
#endif /* __SHMEDIA__ */
.section .fini
/* The alignment below can't be smaller, otherwise the mova below
......@@ -92,19 +78,6 @@ GLOBAL(_init):
#endif
.global GLOBAL(_fini)
GLOBAL(_fini):
#if __SHMEDIA__
addi r15, -16, r15
st.q r15, 8, r14
st.q r15, 0, r18
add r15, r63, r14
#elif __SH5__ && ! __SHMEDIA__
mov r15,r0
add #-8,r15
mov.l r14,@-r0
sts.l pr,@-r0
mov r15,r14
nop
#else
#ifdef __ELF__
mov.l r12,@-r15
mova 0f,r0
......@@ -123,4 +96,3 @@ GLOBAL(_fini):
0: .long _GLOBAL_OFFSET_TABLE_
1:
#endif
#endif /* __SHMEDIA__ */
......@@ -25,20 +25,6 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
/* See an explanation about .init and .fini in crti.S. */
.section .init
#if __SHMEDIA__
add r14, r63, r15
ld.q r15, 0, r18
ptabs r18, tr0
ld.q r15, 8, r14
addi r15, 16, r15
blink tr0, r63
#elif __SH5__ && ! __SHMEDIA__
mov r14,r15
lds.l @r14+,pr
mov.l @r14,r14
rts
add #8,r15
#else
mov r14,r15
lds.l @r15+,pr
mov.l @r15+,r14
......@@ -48,23 +34,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#else
nop
#endif
#endif /* __SHMEDIA__ */
.section .fini
#if __SHMEDIA__
add r14, r63, r15
ld.q r15, 0, r18
ptabs r18, tr0
ld.q r15, 8, r14
addi r15, 16, r15
blink tr0, r63
#elif __SH5__ && ! __SHMEDIA__
mov r14,r15
lds.l @r14+,pr
mov.l @r14,r14
rts
add #8,r15
#else
mov r14,r15
lds.l @r15+,pr
mov.l @r15+,r14
......@@ -74,4 +45,3 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#else
nop
#endif
#endif /* __SHMEDIA__ */
......@@ -25,7 +25,6 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#include "lib1funcs.h"
#if !__SHMEDIA__
#ifdef L_div_table
#if defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__)
/* This code used shld, thus is not suitable for SH1 / SH2. */
......@@ -933,4 +932,3 @@ LOCAL(div_table_inv):
#endif /* SH3 / SH4 */
#endif /* L_div_table */
#endif /* !__SHMEDIA__ */
......@@ -25,7 +25,6 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#include "lib1funcs.h"
#if !__SHMEDIA__
#ifdef L_udivsi3_i4i
/* 88 bytes; sh4-200 cycle counts:
......@@ -319,4 +318,3 @@ L1:
ENDFUNC(GLOBAL(sdivsi3_i4i))
#endif /* __SH_FPU_DOUBLE__ */
#endif /* L_sdivsi3_i4i */
#endif /* !__SHMEDIA__ */
......@@ -46,7 +46,6 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#define NO_FPSCR_VALUES
#endif
#if ! __SH5__
#ifdef L_ashiftrt
.global GLOBAL(ashiftrt_r4_0)
.global GLOBAL(ashiftrt_r4_1)
......@@ -1002,7 +1001,6 @@ hiset: sts macl,r0 ! r0 = bb*dd
ENDFUNC(GLOBAL(mulsi3))
#endif
#endif /* ! __SH5__ */
/*------------------------------------------------------------------------------
32 bit signed integer division that uses FPU double precision division. */
......@@ -1028,17 +1026,13 @@ GLOBAL(sdivsi3_i4):
ENDFUNC(GLOBAL(sdivsi3_i4))
#elif defined (__SH2A_SINGLE__) || defined (__SH2A_SINGLE_ONLY__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__) || (defined (__SH5__) && ! defined __SH4_NOFPU__)
#elif defined (__SH2A_SINGLE__) || defined (__SH2A_SINGLE_ONLY__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
/* This variant is used when FPSCR.PR = 0 (sigle precision) is the default
setting.
Args in r4 and r5, result in fpul, clobber r2, dr0, dr2.
For this to work, we must temporarily switch the FPU do double precision,
but we better do not touch FPSCR.FR. See PR 6526. */
#if ! __SH5__ || __SH5__ == 32
#if __SH5__
.mode SHcompact
#endif
.global GLOBAL(sdivsi3_i4)
HIDDEN_FUNC(GLOBAL(sdivsi3_i4))
GLOBAL(sdivsi3_i4):
......@@ -1074,7 +1068,6 @@ GLOBAL(sdivsi3_i4):
#endif /* __SH4A__ */
ENDFUNC(GLOBAL(sdivsi3_i4))
#endif /* ! __SH5__ || __SH5__ == 32 */
#endif /* ! __SH4__ || __SH2A__ */
#endif /* L_sdivsi3_i4 */
......@@ -1091,226 +1084,8 @@ GLOBAL(sdivsi3_i4):
!! args in r4 and r5, result in r0 clobber r1, r2, r3, and t bit
.global GLOBAL(sdivsi3)
#if __SHMEDIA__
#if __SH5__ == 32
.section .text..SHmedia32,"ax"
#else
.text
#endif
.align 2
#if 0
/* The assembly code that follows is a hand-optimized version of the C
code that follows. Note that the registers that are modified are
exactly those listed as clobbered in the patterns divsi3_i1 and
divsi3_i1_media.
int __sdivsi3 (i, j)
int i, j;
{
register unsigned long long r18 asm ("r18");
register unsigned long long r19 asm ("r19");
register unsigned long long r0 asm ("r0") = 0;
register unsigned long long r1 asm ("r1") = 1;
register int r2 asm ("r2") = i >> 31;
register int r3 asm ("r3") = j >> 31;
r2 = r2 ? r2 : r1;
r3 = r3 ? r3 : r1;
r18 = i * r2;
r19 = j * r3;
r2 *= r3;
r19 <<= 31;
r1 <<= 31;
do
if (r18 >= r19)
r0 |= r1, r18 -= r19;
while (r19 >>= 1, r1 >>= 1);
return r2 * (int)r0;
}
*/
GLOBAL(sdivsi3):
pt/l LOCAL(sdivsi3_dontadd), tr2
pt/l LOCAL(sdivsi3_loop), tr1
ptabs/l r18, tr0
movi 0, r0
movi 1, r1
shari.l r4, 31, r2
shari.l r5, 31, r3
cmveq r2, r1, r2
cmveq r3, r1, r3
muls.l r4, r2, r18
muls.l r5, r3, r19
muls.l r2, r3, r2
shlli r19, 31, r19
shlli r1, 31, r1
LOCAL(sdivsi3_loop):
bgtu r19, r18, tr2
or r0, r1, r0
sub r18, r19, r18
LOCAL(sdivsi3_dontadd):
shlri r1, 1, r1
shlri r19, 1, r19
bnei r1, 0, tr1
muls.l r0, r2, r0
add.l r0, r63, r0
blink tr0, r63
#elif 0 /* ! 0 */
// inputs: r4,r5
// clobbered: r1,r2,r3,r18,r19,r20,r21,r25,tr0
// result in r0
GLOBAL(sdivsi3):
// can create absolute value without extra latency,
// but dependent on proper sign extension of inputs:
// shari.l r5,31,r2
// xor r5,r2,r20
// sub r20,r2,r20 // r20 is now absolute value of r5, zero-extended.
shari.l r5,31,r2
ori r2,1,r2
muls.l r5,r2,r20 // r20 is now absolute value of r5, zero-extended.
movi 0xffffffffffffbb0c,r19 // shift count eqiv 76
shari.l r4,31,r3
nsb r20,r0
shlld r20,r0,r25
shlri r25,48,r25
sub r19,r25,r1
mmulfx.w r1,r1,r2
mshflo.w r1,r63,r1
// If r4 was to be used in-place instead of r21, could use this sequence
// to compute absolute:
// sub r63,r4,r19 // compute absolute value of r4
// shlri r4,32,r3 // into lower 32 bit of r4, keeping
// mcmv r19,r3,r4 // the sign in the upper 32 bits intact.
ori r3,1,r3
mmulfx.w r25,r2,r2
sub r19,r0,r0
muls.l r4,r3,r21
msub.w r1,r2,r2
addi r2,-2,r1
mulu.l r21,r1,r19
mmulfx.w r2,r2,r2
shlli r1,15,r1
shlrd r19,r0,r19
mulu.l r19,r20,r3
mmacnfx.wl r25,r2,r1
ptabs r18,tr0
sub r21,r3,r25
mulu.l r25,r1,r2
addi r0,14,r0
xor r4,r5,r18
shlrd r2,r0,r2
mulu.l r2,r20,r3
add r19,r2,r19
shari.l r18,31,r18
sub r25,r3,r25
mulu.l r25,r1,r2
sub r25,r20,r25
add r19,r18,r19
shlrd r2,r0,r2
mulu.l r2,r20,r3
addi r25,1,r25
add r19,r2,r19
cmpgt r25,r3,r25
add.l r19,r25,r0
xor r0,r18,r0
blink tr0,r63
#else /* ! 0 && ! 0 */
// inputs: r4,r5
// clobbered: r1,r18,r19,r20,r21,r25,tr0
// result in r0
HIDDEN_FUNC(GLOBAL(sdivsi3_2))
#ifndef __pic__
FUNC(GLOBAL(sdivsi3))
GLOBAL(sdivsi3): /* this is the shcompact entry point */
// The special SHmedia entry point sdivsi3_1 prevents accidental linking
// with the SHcompact implementation, which clobbers tr1 / tr2.
.global GLOBAL(sdivsi3_1)
GLOBAL(sdivsi3_1):
.global GLOBAL(div_table_internal)
movi (GLOBAL(div_table_internal) >> 16) & 65535, r20
shori GLOBAL(div_table_internal) & 65535, r20
#endif
.global GLOBAL(sdivsi3_2)
// div_table in r20
// clobbered: r1,r18,r19,r21,r25,tr0
GLOBAL(sdivsi3_2):
nsb r5, r1
shlld r5, r1, r25 // normalize; [-2 ..1, 1..2) in s2.62
shari r25, 58, r21 // extract 5(6) bit index (s2.4 with hole -1..1)
ldx.ub r20, r21, r19 // u0.8
shari r25, 32, r25 // normalize to s2.30
shlli r21, 1, r21
muls.l r25, r19, r19 // s2.38
ldx.w r20, r21, r21 // s2.14
ptabs r18, tr0
shari r19, 24, r19 // truncate to s2.14
sub r21, r19, r19 // some 11 bit inverse in s1.14
muls.l r19, r19, r21 // u0.28
sub r63, r1, r1
addi r1, 92, r1
muls.l r25, r21, r18 // s2.58
shlli r19, 45, r19 // multiply by two and convert to s2.58
/* bubble */
sub r19, r18, r18
shari r18, 28, r18 // some 22 bit inverse in s1.30
muls.l r18, r25, r0 // s2.60
muls.l r18, r4, r25 // s32.30
/* bubble */
shari r0, 16, r19 // s-16.44
muls.l r19, r18, r19 // s-16.74
shari r25, 63, r0
shari r4, 14, r18 // s19.-14
shari r19, 30, r19 // s-16.44
muls.l r19, r18, r19 // s15.30
xor r21, r0, r21 // You could also use the constant 1 << 27.
add r21, r25, r21
sub r21, r19, r21
shard r21, r1, r21
sub r21, r0, r0
blink tr0, r63
#ifndef __pic__
ENDFUNC(GLOBAL(sdivsi3))
#endif
ENDFUNC(GLOBAL(sdivsi3_2))
#endif
#elif __SHMEDIA__
/* m5compact-nofpu */
// clobbered: r18,r19,r20,r21,r25,tr0,tr1,tr2
.mode SHmedia
.section .text..SHmedia32,"ax"
.align 2
FUNC(GLOBAL(sdivsi3))
GLOBAL(sdivsi3):
pt/l LOCAL(sdivsi3_dontsub), tr0
pt/l LOCAL(sdivsi3_loop), tr1
ptabs/l r18,tr2
shari.l r4,31,r18
shari.l r5,31,r19
xor r4,r18,r20
xor r5,r19,r21
sub.l r20,r18,r20
sub.l r21,r19,r21
xor r18,r19,r19
shlli r21,32,r25
addi r25,-1,r21
addz.l r20,r63,r20
LOCAL(sdivsi3_loop):
shlli r20,1,r20
bgeu/u r21,r20,tr0
sub r20,r21,r20
LOCAL(sdivsi3_dontsub):
addi.l r25,-1,r25
bnei r25,-32,tr1
xor r20,r19,r20
sub.l r20,r19,r0
blink tr2,r63
ENDFUNC(GLOBAL(sdivsi3))
#else /* ! __SHMEDIA__ */
FUNC(GLOBAL(sdivsi3))
GLOBAL(sdivsi3):
mov r4,r1
......@@ -1397,7 +1172,6 @@ div0: rts
mov #0,r0
ENDFUNC(GLOBAL(sdivsi3))
#endif /* ! __SHMEDIA__ */
#endif /* L_sdivsi3 */
/*------------------------------------------------------------------------------
......@@ -1451,28 +1225,6 @@ L1:
ENDFUNC(GLOBAL(udivsi3_i4))
#elif defined (__SH5__) && ! defined (__SH4_NOFPU__) && ! defined (__SH2A_NOFPU__)
#if ! __SH5__ || __SH5__ == 32
!! args in r4 and r5, result in fpul, clobber r20, r21, dr0, fr33
.mode SHmedia
.global GLOBAL(udivsi3_i4)
HIDDEN_FUNC(GLOBAL(udivsi3_i4))
GLOBAL(udivsi3_i4):
addz.l r4,r63,r20
addz.l r5,r63,r21
fmov.qd r20,dr0
fmov.qd r21,dr32
ptabs r18,tr0
float.qd dr0,dr0
float.qd dr32,dr32
fdiv.d dr0,dr32,dr0
ftrc.dq dr0,dr32
fmov.s fr33,fr32
blink tr0,r63
ENDFUNC(GLOBAL(udivsi3_i4))
#endif /* ! __SH5__ || __SH5__ == 32 */
#elif defined (__SH2A_SINGLE__) || defined (__SH2A_SINGLE_ONLY__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
/* This variant is used when FPSCR.PR = 0 (sigle precision) is the default
setting.
......@@ -1581,133 +1333,6 @@ L1:
.global GLOBAL(udivsi3)
HIDDEN_FUNC(GLOBAL(udivsi3))
#if __SHMEDIA__
#if __SH5__ == 32
.section .text..SHmedia32,"ax"
#else
.text
#endif
.align 2
#if 0
/* The assembly code that follows is a hand-optimized version of the C
code that follows. Note that the registers that are modified are
exactly those listed as clobbered in the patterns udivsi3_i1 and
udivsi3_i1_media.
unsigned
__udivsi3 (i, j)
unsigned i, j;
{
register unsigned long long r0 asm ("r0") = 0;
register unsigned long long r18 asm ("r18") = 1;
register unsigned long long r4 asm ("r4") = i;
register unsigned long long r19 asm ("r19") = j;
r19 <<= 31;
r18 <<= 31;
do
if (r4 >= r19)
r0 |= r18, r4 -= r19;
while (r19 >>= 1, r18 >>= 1);
return r0;
}
*/
GLOBAL(udivsi3):
pt/l LOCAL(udivsi3_dontadd), tr2
pt/l LOCAL(udivsi3_loop), tr1
ptabs/l r18, tr0
movi 0, r0
movi 1, r18
addz.l r5, r63, r19
addz.l r4, r63, r4
shlli r19, 31, r19
shlli r18, 31, r18
LOCAL(udivsi3_loop):
bgtu r19, r4, tr2
or r0, r18, r0
sub r4, r19, r4
LOCAL(udivsi3_dontadd):
shlri r18, 1, r18
shlri r19, 1, r19
bnei r18, 0, tr1
blink tr0, r63
#else
GLOBAL(udivsi3):
// inputs: r4,r5
// clobbered: r18,r19,r20,r21,r22,r25,tr0
// result in r0.
addz.l r5,r63,r22
nsb r22,r0
shlld r22,r0,r25
shlri r25,48,r25
movi 0xffffffffffffbb0c,r20 // shift count eqiv 76
sub r20,r25,r21
mmulfx.w r21,r21,r19
mshflo.w r21,r63,r21
ptabs r18,tr0
mmulfx.w r25,r19,r19
sub r20,r0,r0
/* bubble */
msub.w r21,r19,r19
addi r19,-2,r21 /* It would be nice for scheduling to do this add to r21
before the msub.w, but we need a different value for
r19 to keep errors under control. */
mulu.l r4,r21,r18
mmulfx.w r19,r19,r19
shlli r21,15,r21
shlrd r18,r0,r18
mulu.l r18,r22,r20
mmacnfx.wl r25,r19,r21
/* bubble */
sub r4,r20,r25
mulu.l r25,r21,r19
addi r0,14,r0
/* bubble */
shlrd r19,r0,r19
mulu.l r19,r22,r20
add r18,r19,r18
/* bubble */
sub.l r25,r20,r25
mulu.l r25,r21,r19
addz.l r25,r63,r25
sub r25,r22,r25
shlrd r19,r0,r19
mulu.l r19,r22,r20
addi r25,1,r25
add r18,r19,r18
cmpgt r25,r20,r25
add.l r18,r25,r0
blink tr0,r63
#endif
#elif __SHMEDIA__
/* m5compact-nofpu - more emphasis on code size than on speed, but don't
ignore speed altogether - div1 needs 9 cycles, subc 7 and rotcl 4.
So use a short shmedia loop. */
// clobbered: r20,r21,r25,tr0,tr1,tr2
.mode SHmedia
.section .text..SHmedia32,"ax"
.align 2
GLOBAL(udivsi3):
pt/l LOCAL(udivsi3_dontsub), tr0
pt/l LOCAL(udivsi3_loop), tr1
ptabs/l r18,tr2
shlli r5,32,r25
addi r25,-1,r21
addz.l r4,r63,r20
LOCAL(udivsi3_loop):
shlli r20,1,r20
bgeu/u r21,r20,tr0
sub r20,r21,r20
LOCAL(udivsi3_dontsub):
addi.l r25,-1,r25
bnei r25,-32,tr1
add.l r20,r63,r0
blink tr2,r63
#else /* ! __SHMEDIA__ */
LOCAL(div8):
div1 r5,r4
LOCAL(div7):
......@@ -1769,320 +1394,11 @@ LOCAL(large_divisor):
rotcl r0
ENDFUNC(GLOBAL(udivsi3))
#endif /* ! __SHMEDIA__ */
#endif /* L_udivsi3 */
#ifdef L_udivdi3
#if __SHMEDIA__
.mode SHmedia
.section .text..SHmedia32,"ax"
.align 2
.global GLOBAL(udivdi3)
FUNC(GLOBAL(udivdi3))
GLOBAL(udivdi3):
HIDDEN_ALIAS(udivdi3_internal,udivdi3)
shlri r3,1,r4
nsb r4,r22
shlld r3,r22,r6
shlri r6,49,r5
movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */
sub r21,r5,r1
mmulfx.w r1,r1,r4
mshflo.w r1,r63,r1
sub r63,r22,r20 // r63 == 64 % 64
mmulfx.w r5,r4,r4
pta LOCAL(large_divisor),tr0
addi r20,32,r9
msub.w r1,r4,r1
madd.w r1,r1,r1
mmulfx.w r1,r1,r4
shlri r6,32,r7
bgt/u r9,r63,tr0 // large_divisor
mmulfx.w r5,r4,r4
shlri r2,32+14,r19
addi r22,-31,r0
msub.w r1,r4,r1
mulu.l r1,r7,r4
addi r1,-3,r5
mulu.l r5,r19,r5
sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
the case may be, %0000000000000000 000.11111111111, still */
muls.l r1,r4,r4 /* leaving at least one sign bit. */
mulu.l r5,r3,r8
mshalds.l r1,r21,r1
shari r4,26,r4
shlld r8,r0,r8
add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
sub r2,r8,r2
/* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */
shlri r2,22,r21
mulu.l r21,r1,r21
shlld r5,r0,r8
addi r20,30-22,r0
shlrd r21,r0,r21
mulu.l r21,r3,r5
add r8,r21,r8
mcmpgt.l r21,r63,r21 // See Note 1
addi r20,30,r0
mshfhi.l r63,r21,r21
sub r2,r5,r2
andc r2,r21,r2
/* small divisor: need a third divide step */
mulu.l r2,r1,r7
ptabs r18,tr0
addi r2,1,r2
shlrd r7,r0,r7
mulu.l r7,r3,r5
add r8,r7,r8
sub r2,r3,r2
cmpgt r2,r5,r5
add r8,r5,r2
/* could test r3 here to check for divide by zero. */
blink tr0,r63
LOCAL(large_divisor):
mmulfx.w r5,r4,r4
shlrd r2,r9,r25
shlri r25,32,r8
msub.w r1,r4,r1
mulu.l r1,r7,r4
addi r1,-3,r5
mulu.l r5,r8,r5
sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
the case may be, %0000000000000000 000.11111111111, still */
muls.l r1,r4,r4 /* leaving at least one sign bit. */
shlri r5,14-1,r8
mulu.l r8,r7,r5
mshalds.l r1,r21,r1
shari r4,26,r4
add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
sub r25,r5,r25
/* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */
shlri r25,22,r21
mulu.l r21,r1,r21
pta LOCAL(no_lo_adj),tr0
addi r22,32,r0
shlri r21,40,r21
mulu.l r21,r7,r5
add r8,r21,r8
shlld r2,r0,r2
sub r25,r5,r25
bgtu/u r7,r25,tr0 // no_lo_adj
addi r8,1,r8
sub r25,r7,r25
LOCAL(no_lo_adj):
mextr4 r2,r25,r2
/* large_divisor: only needs a few adjustments. */
mulu.l r8,r6,r5
ptabs r18,tr0
/* bubble */
cmpgtu r5,r2,r5
sub r8,r5,r2
blink tr0,r63
ENDFUNC(GLOBAL(udivdi3))
/* Note 1: To shift the result of the second divide stage so that the result
always fits into 32 bits, yet we still reduce the rest sufficiently
would require a lot of instructions to do the shifts just right. Using
the full 64 bit shift result to multiply with the divisor would require
four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
Fortunately, if the upper 32 bits of the shift result are nonzero, we
know that the rest after taking this partial result into account will
fit into 32 bits. So we just clear the upper 32 bits of the rest if the
upper 32 bits of the partial result are nonzero. */
#endif /* __SHMEDIA__ */
#endif /* L_udivdi3 */
#ifdef L_divdi3
#if __SHMEDIA__
.mode SHmedia
.section .text..SHmedia32,"ax"
.align 2
.global GLOBAL(divdi3)
FUNC(GLOBAL(divdi3))
GLOBAL(divdi3):
pta GLOBAL(udivdi3_internal),tr0
shari r2,63,r22
shari r3,63,r23
xor r2,r22,r2
xor r3,r23,r3
sub r2,r22,r2
sub r3,r23,r3
beq/u r22,r23,tr0
ptabs r18,tr1
blink tr0,r18
sub r63,r2,r2
blink tr1,r63
ENDFUNC(GLOBAL(divdi3))
#endif /* __SHMEDIA__ */
#endif /* L_divdi3 */
#ifdef L_umoddi3
#if __SHMEDIA__
.mode SHmedia
.section .text..SHmedia32,"ax"
.align 2
.global GLOBAL(umoddi3)
FUNC(GLOBAL(umoddi3))
GLOBAL(umoddi3):
HIDDEN_ALIAS(umoddi3_internal,umoddi3)
shlri r3,1,r4
nsb r4,r22
shlld r3,r22,r6
shlri r6,49,r5
movi 0xffffffffffffbaf1,r21 /* .l shift count 17. */
sub r21,r5,r1
mmulfx.w r1,r1,r4
mshflo.w r1,r63,r1
sub r63,r22,r20 // r63 == 64 % 64
mmulfx.w r5,r4,r4
pta LOCAL(large_divisor),tr0
addi r20,32,r9
msub.w r1,r4,r1
madd.w r1,r1,r1
mmulfx.w r1,r1,r4
shlri r6,32,r7
bgt/u r9,r63,tr0 // large_divisor
mmulfx.w r5,r4,r4
shlri r2,32+14,r19
addi r22,-31,r0
msub.w r1,r4,r1
mulu.l r1,r7,r4
addi r1,-3,r5
mulu.l r5,r19,r5
sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
the case may be, %0000000000000000 000.11111111111, still */
muls.l r1,r4,r4 /* leaving at least one sign bit. */
mulu.l r5,r3,r5
mshalds.l r1,r21,r1
shari r4,26,r4
shlld r5,r0,r5
add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
sub r2,r5,r2
/* Can do second step of 64 : 32 div now, using r1 and the rest in r2. */
shlri r2,22,r21
mulu.l r21,r1,r21
addi r20,30-22,r0
/* bubble */ /* could test r3 here to check for divide by zero. */
shlrd r21,r0,r21
mulu.l r21,r3,r5
mcmpgt.l r21,r63,r21 // See Note 1
addi r20,30,r0
mshfhi.l r63,r21,r21
sub r2,r5,r2
andc r2,r21,r2
/* small divisor: need a third divide step */
mulu.l r2,r1,r7
ptabs r18,tr0
sub r2,r3,r8 /* re-use r8 here for rest - r3 */
shlrd r7,r0,r7
mulu.l r7,r3,r5
/* bubble */
addi r8,1,r7
cmpgt r7,r5,r7
cmvne r7,r8,r2
sub r2,r5,r2
blink tr0,r63
LOCAL(large_divisor):
mmulfx.w r5,r4,r4
shlrd r2,r9,r25
shlri r25,32,r8
msub.w r1,r4,r1
mulu.l r1,r7,r4
addi r1,-3,r5
mulu.l r5,r8,r5
sub r63,r4,r4 // Negate to make sure r1 ends up <= 1/r2
shlri r4,2,r4 /* chop off leading %0000000000000000 001.00000000000 - or, as
the case may be, %0000000000000000 000.11111111111, still */
muls.l r1,r4,r4 /* leaving at least one sign bit. */
shlri r5,14-1,r8
mulu.l r8,r7,r5
mshalds.l r1,r21,r1
shari r4,26,r4
add r1,r4,r1 // 31 bit unsigned reciprocal now in r1 (msb equiv. 0.5)
sub r25,r5,r25
/* Can do second step of 64 : 32 div now, using r1 and the rest in r25. */
shlri r25,22,r21
mulu.l r21,r1,r21
pta LOCAL(no_lo_adj),tr0
addi r22,32,r0
shlri r21,40,r21
mulu.l r21,r7,r5
add r8,r21,r8
shlld r2,r0,r2
sub r25,r5,r25
bgtu/u r7,r25,tr0 // no_lo_adj
addi r8,1,r8
sub r25,r7,r25
LOCAL(no_lo_adj):
mextr4 r2,r25,r2
/* large_divisor: only needs a few adjustments. */
mulu.l r8,r6,r5
ptabs r18,tr0
add r2,r6,r7
cmpgtu r5,r2,r8
cmvne r8,r7,r2
sub r2,r5,r2
shlrd r2,r22,r2
blink tr0,r63
ENDFUNC(GLOBAL(umoddi3))
/* Note 1: To shift the result of the second divide stage so that the result
always fits into 32 bits, yet we still reduce the rest sufficiently
would require a lot of instructions to do the shifts just right. Using
the full 64 bit shift result to multiply with the divisor would require
four extra instructions for the upper 32 bits (shift / mulu / shift / sub).
Fortunately, if the upper 32 bits of the shift result are nonzero, we
know that the rest after taking this partial result into account will
fit into 32 bits. So we just clear the upper 32 bits of the rest if the
upper 32 bits of the partial result are nonzero. */
#endif /* __SHMEDIA__ */
#endif /* L_umoddi3 */
#ifdef L_moddi3
#if __SHMEDIA__
.mode SHmedia
.section .text..SHmedia32,"ax"
.align 2
.global GLOBAL(moddi3)
FUNC(GLOBAL(moddi3))
GLOBAL(moddi3):
pta GLOBAL(umoddi3_internal),tr0
shari r2,63,r22
shari r3,63,r23
xor r2,r22,r2
xor r3,r23,r3
sub r2,r22,r2
sub r3,r23,r3
beq/u r22,r63,tr0
ptabs r18,tr1
blink tr0,r18
sub r63,r2,r2
blink tr1,r63
ENDFUNC(GLOBAL(moddi3))
#endif /* __SHMEDIA__ */
#endif /* L_moddi3 */
#ifdef L_set_fpscr
#if !defined (__SH2A_NOFPU__)
#if defined (__SH2E__) || defined (__SH2A__) || defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || __SH5__ == 32
#ifdef __SH5__
.mode SHcompact
#endif
#if defined (__SH2E__) || defined (__SH2A__) || defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__)
.global GLOBAL(set_fpscr)
HIDDEN_FUNC(GLOBAL(set_fpscr))
GLOBAL(set_fpscr):
......@@ -2161,39 +1477,8 @@ LOCAL(set_fpscr_L1):
#endif /* __SH2A_NOFPU__ */
#endif /* L_set_fpscr */
#ifdef L_ic_invalidate
#if __SH5__ == 32
.mode SHmedia
.section .text..SHmedia32,"ax"
.align 2
.global GLOBAL(init_trampoline)
HIDDEN_FUNC(GLOBAL(init_trampoline))
GLOBAL(init_trampoline):
st.l r0,8,r2
#ifdef __LITTLE_ENDIAN__
movi 9,r20
shori 0x402b,r20
shori 0xd101,r20
shori 0xd002,r20
#else
movi 0xffffffffffffd002,r20
shori 0xd101,r20
shori 0x402b,r20
shori 9,r20
#endif
st.q r0,0,r20
st.l r0,12,r3
ENDFUNC(GLOBAL(init_trampoline))
.global GLOBAL(ic_invalidate)
HIDDEN_FUNC(GLOBAL(ic_invalidate))
GLOBAL(ic_invalidate):
ocbwb r0,0
synco
icbi r0, 0
ptabs r18, tr0
synci
blink tr0, r63
ENDFUNC(GLOBAL(ic_invalidate))
#elif defined(__SH4A__)
#if defined(__SH4A__)
.global GLOBAL(ic_invalidate)
HIDDEN_FUNC(GLOBAL(ic_invalidate))
GLOBAL(ic_invalidate):
......@@ -2203,7 +1488,7 @@ GLOBAL(ic_invalidate):
rts
nop
ENDFUNC(GLOBAL(ic_invalidate))
#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || defined(__SH4_NOFPU__)
/* For system code, we use ic_invalidate_line_i, but user code
needs a different mechanism. A kernel call is generally not
available, and it would also be slow. Different SH4 variants use
......@@ -2259,7 +1544,7 @@ GLOBAL(ic_invalidate):
#endif /* L_ic_invalidate */
#ifdef L_ic_invalidate_array
#if defined(__SH4A__) || (defined (__FORCE_SH4A__) && (defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))))
#if defined(__SH4A__) || (defined (__FORCE_SH4A__) && (defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || defined(__SH4_NOFPU__)))
.global GLOBAL(ic_invalidate_array)
/* This is needed when an SH4 dso with trampolines is used on SH4A. */
.global GLOBAL(ic_invalidate_array)
......@@ -2273,7 +1558,7 @@ GLOBAL(ic_invalidate_array):
.align 2
.long 0
ENDFUNC(GLOBAL(ic_invalidate_array))
#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || (defined(__SH4_NOFPU__) && !defined(__SH5__))
#elif defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__) || defined(__SH4_NOFPU__)
.global GLOBAL(ic_invalidate_array)
.p2align 5
FUNC(GLOBAL(ic_invalidate_array))
......@@ -2329,1047 +1614,10 @@ GLOBAL(ic_invalidate_array):
#endif /* SH4 */
#endif /* L_ic_invalidate_array */
#if defined (__SH5__) && __SH5__ == 32
#ifdef L_shcompact_call_trampoline
.section .rodata
.align 1
LOCAL(ct_main_table):
.word LOCAL(ct_r2_fp) - datalabel LOCAL(ct_main_label)
.word LOCAL(ct_r2_ld) - datalabel LOCAL(ct_main_label)
.word LOCAL(ct_r2_pop) - datalabel LOCAL(ct_main_label)
.word LOCAL(ct_r3_fp) - datalabel LOCAL(ct_main_label)
.word LOCAL(ct_r3_ld) - datalabel LOCAL(ct_main_label)
.word LOCAL(ct_r3_pop) - datalabel LOCAL(ct_main_label)
.word LOCAL(ct_r4_fp) - datalabel LOCAL(ct_main_label)
.word LOCAL(ct_r4_ld) - datalabel LOCAL(ct_main_label)
.word LOCAL(ct_r4_pop) - datalabel LOCAL(ct_main_label)
.word LOCAL(ct_r5_fp) - datalabel LOCAL(ct_main_label)
.word LOCAL(ct_r5_ld) - datalabel LOCAL(ct_main_label)
.word LOCAL(ct_r5_pop) - datalabel LOCAL(ct_main_label)
.word LOCAL(ct_r6_fph) - datalabel LOCAL(ct_main_label)
.word LOCAL(ct_r6_fpl) - datalabel LOCAL(ct_main_label)
.word LOCAL(ct_r6_ld) - datalabel LOCAL(ct_main_label)
.word LOCAL(ct_r6_pop) - datalabel LOCAL(ct_main_label)
.word LOCAL(ct_r7_fph) - datalabel LOCAL(ct_main_label)
.word LOCAL(ct_r7_fpl) - datalabel LOCAL(ct_main_label)
.word LOCAL(ct_r7_ld) - datalabel LOCAL(ct_main_label)
.word LOCAL(ct_r7_pop) - datalabel LOCAL(ct_main_label)
.word LOCAL(ct_r8_fph) - datalabel LOCAL(ct_main_label)
.word LOCAL(ct_r8_fpl) - datalabel LOCAL(ct_main_label)
.word LOCAL(ct_r8_ld) - datalabel LOCAL(ct_main_label)
.word LOCAL(ct_r8_pop) - datalabel LOCAL(ct_main_label)
.word LOCAL(ct_r9_fph) - datalabel LOCAL(ct_main_label)
.word LOCAL(ct_r9_fpl) - datalabel LOCAL(ct_main_label)
.word LOCAL(ct_r9_ld) - datalabel LOCAL(ct_main_label)
.word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
.word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
.word LOCAL(ct_pop_seq) - datalabel LOCAL(ct_main_label)
.word LOCAL(ct_r9_pop) - datalabel LOCAL(ct_main_label)
.word LOCAL(ct_ret_wide) - datalabel LOCAL(ct_main_label)
.word LOCAL(ct_call_func) - datalabel LOCAL(ct_main_label)
.mode SHmedia
.section .text..SHmedia32, "ax"
.align 2
/* This function loads 64-bit general-purpose registers from the
stack, from a memory address contained in them or from an FP
register, according to a cookie passed in r1. Its execution
time is linear on the number of registers that actually have
to be copied. See sh.h for details on the actual bit pattern.
The function to be called is passed in r0. If a 32-bit return
value is expected, the actual function will be tail-called,
otherwise the return address will be stored in r10 (that the
caller should expect to be clobbered) and the return value
will be expanded into r2/r3 upon return. */
.global GLOBAL(GCC_shcompact_call_trampoline)
FUNC(GLOBAL(GCC_shcompact_call_trampoline))
GLOBAL(GCC_shcompact_call_trampoline):
ptabs/l r0, tr0 /* Prepare to call the actual function. */
movi ((datalabel LOCAL(ct_main_table) - 31 * 2) >> 16) & 65535, r0
pt/l LOCAL(ct_loop), tr1
addz.l r1, r63, r1
shori ((datalabel LOCAL(ct_main_table) - 31 * 2)) & 65535, r0
LOCAL(ct_loop):
nsb r1, r28
shlli r28, 1, r29
ldx.w r0, r29, r30
LOCAL(ct_main_label):
ptrel/l r30, tr2
blink tr2, r63
LOCAL(ct_r2_fp): /* Copy r2 from an FP register. */
/* It must be dr0, so just do it. */
fmov.dq dr0, r2
movi 7, r30
shlli r30, 29, r31
andc r1, r31, r1
blink tr1, r63
LOCAL(ct_r3_fp): /* Copy r3 from an FP register. */
/* It is either dr0 or dr2. */
movi 7, r30
shlri r1, 26, r32
shlli r30, 26, r31
andc r1, r31, r1
fmov.dq dr0, r3
beqi/l r32, 4, tr1
fmov.dq dr2, r3
blink tr1, r63
LOCAL(ct_r4_fp): /* Copy r4 from an FP register. */
shlri r1, 23 - 3, r34
andi r34, 3 << 3, r33
addi r33, LOCAL(ct_r4_fp_copy) - datalabel LOCAL(ct_r4_fp_base), r32
LOCAL(ct_r4_fp_base):
ptrel/l r32, tr2
movi 7, r30
shlli r30, 23, r31
andc r1, r31, r1
blink tr2, r63
LOCAL(ct_r4_fp_copy):
fmov.dq dr0, r4
blink tr1, r63
fmov.dq dr2, r4
blink tr1, r63
fmov.dq dr4, r4
blink tr1, r63
LOCAL(ct_r5_fp): /* Copy r5 from an FP register. */
shlri r1, 20 - 3, r34
andi r34, 3 << 3, r33
addi r33, LOCAL(ct_r5_fp_copy) - datalabel LOCAL(ct_r5_fp_base), r32
LOCAL(ct_r5_fp_base):
ptrel/l r32, tr2
movi 7, r30
shlli r30, 20, r31
andc r1, r31, r1
blink tr2, r63
LOCAL(ct_r5_fp_copy):
fmov.dq dr0, r5
blink tr1, r63
fmov.dq dr2, r5
blink tr1, r63
fmov.dq dr4, r5
blink tr1, r63
fmov.dq dr6, r5
blink tr1, r63
LOCAL(ct_r6_fph): /* Copy r6 from a high FP register. */
/* It must be dr8. */
fmov.dq dr8, r6
movi 15, r30
shlli r30, 16, r31
andc r1, r31, r1
blink tr1, r63
LOCAL(ct_r6_fpl): /* Copy r6 from a low FP register. */
shlri r1, 16 - 3, r34
andi r34, 3 << 3, r33
addi r33, LOCAL(ct_r6_fp_copy) - datalabel LOCAL(ct_r6_fp_base), r32
LOCAL(ct_r6_fp_base):
ptrel/l r32, tr2
movi 7, r30
shlli r30, 16, r31
andc r1, r31, r1
blink tr2, r63
LOCAL(ct_r6_fp_copy):
fmov.dq dr0, r6
blink tr1, r63
fmov.dq dr2, r6
blink tr1, r63
fmov.dq dr4, r6
blink tr1, r63
fmov.dq dr6, r6
blink tr1, r63
LOCAL(ct_r7_fph): /* Copy r7 from a high FP register. */
/* It is either dr8 or dr10. */
movi 15 << 12, r31
shlri r1, 12, r32
andc r1, r31, r1
fmov.dq dr8, r7
beqi/l r32, 8, tr1
fmov.dq dr10, r7
blink tr1, r63
LOCAL(ct_r7_fpl): /* Copy r7 from a low FP register. */
shlri r1, 12 - 3, r34
andi r34, 3 << 3, r33
addi r33, LOCAL(ct_r7_fp_copy) - datalabel LOCAL(ct_r7_fp_base), r32
LOCAL(ct_r7_fp_base):
ptrel/l r32, tr2
movi 7 << 12, r31
andc r1, r31, r1
blink tr2, r63
LOCAL(ct_r7_fp_copy):
fmov.dq dr0, r7
blink tr1, r63
fmov.dq dr2, r7
blink tr1, r63
fmov.dq dr4, r7
blink tr1, r63
fmov.dq dr6, r7
blink tr1, r63
LOCAL(ct_r8_fph): /* Copy r8 from a high FP register. */
/* It is either dr8 or dr10. */
movi 15 << 8, r31
andi r1, 1 << 8, r32
andc r1, r31, r1
fmov.dq dr8, r8
beq/l r32, r63, tr1
fmov.dq dr10, r8
blink tr1, r63
LOCAL(ct_r8_fpl): /* Copy r8 from a low FP register. */
shlri r1, 8 - 3, r34
andi r34, 3 << 3, r33
addi r33, LOCAL(ct_r8_fp_copy) - datalabel LOCAL(ct_r8_fp_base), r32
LOCAL(ct_r8_fp_base):
ptrel/l r32, tr2
movi 7 << 8, r31
andc r1, r31, r1
blink tr2, r63
LOCAL(ct_r8_fp_copy):
fmov.dq dr0, r8
blink tr1, r63
fmov.dq dr2, r8
blink tr1, r63
fmov.dq dr4, r8
blink tr1, r63
fmov.dq dr6, r8
blink tr1, r63
LOCAL(ct_r9_fph): /* Copy r9 from a high FP register. */
/* It is either dr8 or dr10. */
movi 15 << 4, r31
andi r1, 1 << 4, r32
andc r1, r31, r1
fmov.dq dr8, r9
beq/l r32, r63, tr1
fmov.dq dr10, r9
blink tr1, r63
LOCAL(ct_r9_fpl): /* Copy r9 from a low FP register. */
shlri r1, 4 - 3, r34
andi r34, 3 << 3, r33
addi r33, LOCAL(ct_r9_fp_copy) - datalabel LOCAL(ct_r9_fp_base), r32
LOCAL(ct_r9_fp_base):
ptrel/l r32, tr2
movi 7 << 4, r31
andc r1, r31, r1
blink tr2, r63
LOCAL(ct_r9_fp_copy):
fmov.dq dr0, r9
blink tr1, r63
fmov.dq dr2, r9
blink tr1, r63
fmov.dq dr4, r9
blink tr1, r63
fmov.dq dr6, r9
blink tr1, r63
LOCAL(ct_r2_ld): /* Copy r2 from a memory address. */
pt/l LOCAL(ct_r2_load), tr2
movi 3, r30
shlli r30, 29, r31
and r1, r31, r32
andc r1, r31, r1
beq/l r31, r32, tr2
addi.l r2, 8, r3
ldx.q r2, r63, r2
/* Fall through. */
LOCAL(ct_r3_ld): /* Copy r3 from a memory address. */
pt/l LOCAL(ct_r3_load), tr2
movi 3, r30
shlli r30, 26, r31
and r1, r31, r32
andc r1, r31, r1
beq/l r31, r32, tr2
addi.l r3, 8, r4
ldx.q r3, r63, r3
LOCAL(ct_r4_ld): /* Copy r4 from a memory address. */
pt/l LOCAL(ct_r4_load), tr2
movi 3, r30
shlli r30, 23, r31
and r1, r31, r32
andc r1, r31, r1
beq/l r31, r32, tr2
addi.l r4, 8, r5
ldx.q r4, r63, r4
LOCAL(ct_r5_ld): /* Copy r5 from a memory address. */
pt/l LOCAL(ct_r5_load), tr2
movi 3, r30
shlli r30, 20, r31
and r1, r31, r32
andc r1, r31, r1
beq/l r31, r32, tr2
addi.l r5, 8, r6
ldx.q r5, r63, r5
LOCAL(ct_r6_ld): /* Copy r6 from a memory address. */
pt/l LOCAL(ct_r6_load), tr2
movi 3 << 16, r31
and r1, r31, r32
andc r1, r31, r1
beq/l r31, r32, tr2
addi.l r6, 8, r7
ldx.q r6, r63, r6
LOCAL(ct_r7_ld): /* Copy r7 from a memory address. */
pt/l LOCAL(ct_r7_load), tr2
movi 3 << 12, r31
and r1, r31, r32
andc r1, r31, r1
beq/l r31, r32, tr2
addi.l r7, 8, r8
ldx.q r7, r63, r7
LOCAL(ct_r8_ld): /* Copy r8 from a memory address. */
pt/l LOCAL(ct_r8_load), tr2
movi 3 << 8, r31
and r1, r31, r32
andc r1, r31, r1
beq/l r31, r32, tr2
addi.l r8, 8, r9
ldx.q r8, r63, r8
LOCAL(ct_r9_ld): /* Copy r9 from a memory address. */
pt/l LOCAL(ct_check_tramp), tr2
ldx.q r9, r63, r9
blink tr2, r63
LOCAL(ct_r2_load):
ldx.q r2, r63, r2
blink tr1, r63
LOCAL(ct_r3_load):
ldx.q r3, r63, r3
blink tr1, r63
LOCAL(ct_r4_load):
ldx.q r4, r63, r4
blink tr1, r63
LOCAL(ct_r5_load):
ldx.q r5, r63, r5
blink tr1, r63
LOCAL(ct_r6_load):
ldx.q r6, r63, r6
blink tr1, r63
LOCAL(ct_r7_load):
ldx.q r7, r63, r7
blink tr1, r63
LOCAL(ct_r8_load):
ldx.q r8, r63, r8
blink tr1, r63
LOCAL(ct_r2_pop): /* Pop r2 from the stack. */
movi 1, r30
ldx.q r15, r63, r2
shlli r30, 29, r31
addi.l r15, 8, r15
andc r1, r31, r1
blink tr1, r63
LOCAL(ct_r3_pop): /* Pop r3 from the stack. */
movi 1, r30
ldx.q r15, r63, r3
shlli r30, 26, r31
addi.l r15, 8, r15
andc r1, r31, r1
blink tr1, r63
LOCAL(ct_r4_pop): /* Pop r4 from the stack. */
movi 1, r30
ldx.q r15, r63, r4
shlli r30, 23, r31
addi.l r15, 8, r15
andc r1, r31, r1
blink tr1, r63
LOCAL(ct_r5_pop): /* Pop r5 from the stack. */
movi 1, r30
ldx.q r15, r63, r5
shlli r30, 20, r31
addi.l r15, 8, r15
andc r1, r31, r1
blink tr1, r63
LOCAL(ct_r6_pop): /* Pop r6 from the stack. */
movi 1, r30
ldx.q r15, r63, r6
shlli r30, 16, r31
addi.l r15, 8, r15
andc r1, r31, r1
blink tr1, r63
LOCAL(ct_r7_pop): /* Pop r7 from the stack. */
ldx.q r15, r63, r7
movi 1 << 12, r31
addi.l r15, 8, r15
andc r1, r31, r1
blink tr1, r63
LOCAL(ct_r8_pop): /* Pop r8 from the stack. */
ldx.q r15, r63, r8
movi 1 << 8, r31
addi.l r15, 8, r15
andc r1, r31, r1
blink tr1, r63
LOCAL(ct_pop_seq): /* Pop a sequence of registers off the stack. */
andi r1, 7 << 1, r30
movi (LOCAL(ct_end_of_pop_seq) >> 16) & 65535, r32
shlli r30, 2, r31
shori LOCAL(ct_end_of_pop_seq) & 65535, r32
sub.l r32, r31, r33
ptabs/l r33, tr2
blink tr2, r63
LOCAL(ct_start_of_pop_seq): /* Beginning of pop sequence. */
ldx.q r15, r63, r3
addi.l r15, 8, r15
ldx.q r15, r63, r4
addi.l r15, 8, r15
ldx.q r15, r63, r5
addi.l r15, 8, r15
ldx.q r15, r63, r6
addi.l r15, 8, r15
ldx.q r15, r63, r7
addi.l r15, 8, r15
ldx.q r15, r63, r8
addi.l r15, 8, r15
LOCAL(ct_r9_pop): /* Pop r9 from the stack. */
ldx.q r15, r63, r9
addi.l r15, 8, r15
LOCAL(ct_end_of_pop_seq): /* Label used to compute first pop instruction. */
LOCAL(ct_check_tramp): /* Check whether we need a trampoline. */
pt/u LOCAL(ct_ret_wide), tr2
andi r1, 1, r1
bne/u r1, r63, tr2
LOCAL(ct_call_func): /* Just branch to the function. */
blink tr0, r63
LOCAL(ct_ret_wide): /* Call the function, so that we can unpack its
64-bit return value. */
add.l r18, r63, r10
blink tr0, r18
ptabs r10, tr0
#if __LITTLE_ENDIAN__
shari r2, 32, r3
add.l r2, r63, r2
#else
add.l r2, r63, r3
shari r2, 32, r2
#endif
blink tr0, r63
ENDFUNC(GLOBAL(GCC_shcompact_call_trampoline))
#endif /* L_shcompact_call_trampoline */
#ifdef L_shcompact_return_trampoline
/* This function does the converse of the code in `ret_wide'
above. It is tail-called by SHcompact functions returning
64-bit non-floating-point values, to pack the 32-bit values in
r2 and r3 into r2. */
.mode SHmedia
.section .text..SHmedia32, "ax"
.align 2
.global GLOBAL(GCC_shcompact_return_trampoline)
HIDDEN_FUNC(GLOBAL(GCC_shcompact_return_trampoline))
GLOBAL(GCC_shcompact_return_trampoline):
ptabs/l r18, tr0
#if __LITTLE_ENDIAN__
addz.l r2, r63, r2
shlli r3, 32, r3
#else
addz.l r3, r63, r3
shlli r2, 32, r2
#endif
or r3, r2, r2
blink tr0, r63
ENDFUNC(GLOBAL(GCC_shcompact_return_trampoline))
#endif /* L_shcompact_return_trampoline */
#ifdef L_shcompact_incoming_args
.section .rodata
.align 1
LOCAL(ia_main_table):
.word 1 /* Invalid, just loop */
.word LOCAL(ia_r2_ld) - datalabel LOCAL(ia_main_label)
.word LOCAL(ia_r2_push) - datalabel LOCAL(ia_main_label)
.word 1 /* Invalid, just loop */
.word LOCAL(ia_r3_ld) - datalabel LOCAL(ia_main_label)
.word LOCAL(ia_r3_push) - datalabel LOCAL(ia_main_label)
.word 1 /* Invalid, just loop */
.word LOCAL(ia_r4_ld) - datalabel LOCAL(ia_main_label)
.word LOCAL(ia_r4_push) - datalabel LOCAL(ia_main_label)
.word 1 /* Invalid, just loop */
.word LOCAL(ia_r5_ld) - datalabel LOCAL(ia_main_label)
.word LOCAL(ia_r5_push) - datalabel LOCAL(ia_main_label)
.word 1 /* Invalid, just loop */
.word 1 /* Invalid, just loop */
.word LOCAL(ia_r6_ld) - datalabel LOCAL(ia_main_label)
.word LOCAL(ia_r6_push) - datalabel LOCAL(ia_main_label)
.word 1 /* Invalid, just loop */
.word 1 /* Invalid, just loop */
.word LOCAL(ia_r7_ld) - datalabel LOCAL(ia_main_label)
.word LOCAL(ia_r7_push) - datalabel LOCAL(ia_main_label)
.word 1 /* Invalid, just loop */
.word 1 /* Invalid, just loop */
.word LOCAL(ia_r8_ld) - datalabel LOCAL(ia_main_label)
.word LOCAL(ia_r8_push) - datalabel LOCAL(ia_main_label)
.word 1 /* Invalid, just loop */
.word 1 /* Invalid, just loop */
.word LOCAL(ia_r9_ld) - datalabel LOCAL(ia_main_label)
.word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
.word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
.word LOCAL(ia_push_seq) - datalabel LOCAL(ia_main_label)
.word LOCAL(ia_r9_push) - datalabel LOCAL(ia_main_label)
.word LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
.word LOCAL(ia_return) - datalabel LOCAL(ia_main_label)
.mode SHmedia
.section .text..SHmedia32, "ax"
.align 2
/* This function stores 64-bit general-purpose registers back in
the stack, and loads the address in which each register
was stored into itself. The lower 32 bits of r17 hold the address
to begin storing, and the upper 32 bits of r17 hold the cookie.
Its execution time is linear on the
number of registers that actually have to be copied, and it is
optimized for structures larger than 64 bits, as opposed to
individual `long long' arguments. See sh.h for details on the
actual bit pattern. */
.global GLOBAL(GCC_shcompact_incoming_args)
FUNC(GLOBAL(GCC_shcompact_incoming_args))
GLOBAL(GCC_shcompact_incoming_args):
ptabs/l r18, tr0 /* Prepare to return. */
shlri r17, 32, r0 /* Load the cookie. */
movi ((datalabel LOCAL(ia_main_table) - 31 * 2) >> 16) & 65535, r43
pt/l LOCAL(ia_loop), tr1
add.l r17, r63, r17
shori ((datalabel LOCAL(ia_main_table) - 31 * 2)) & 65535, r43
LOCAL(ia_loop):
nsb r0, r36
shlli r36, 1, r37
ldx.w r43, r37, r38
LOCAL(ia_main_label):
ptrel/l r38, tr2
blink tr2, r63
LOCAL(ia_r2_ld): /* Store r2 and load its address. */
movi 3, r38
shlli r38, 29, r39
and r0, r39, r40
andc r0, r39, r0
stx.q r17, r63, r2
add.l r17, r63, r2
addi.l r17, 8, r17
beq/u r39, r40, tr1
LOCAL(ia_r3_ld): /* Store r3 and load its address. */
movi 3, r38
shlli r38, 26, r39
and r0, r39, r40
andc r0, r39, r0
stx.q r17, r63, r3
add.l r17, r63, r3
addi.l r17, 8, r17
beq/u r39, r40, tr1
LOCAL(ia_r4_ld): /* Store r4 and load its address. */
movi 3, r38
shlli r38, 23, r39
and r0, r39, r40
andc r0, r39, r0
stx.q r17, r63, r4
add.l r17, r63, r4
addi.l r17, 8, r17
beq/u r39, r40, tr1
LOCAL(ia_r5_ld): /* Store r5 and load its address. */
movi 3, r38
shlli r38, 20, r39
and r0, r39, r40
andc r0, r39, r0
stx.q r17, r63, r5
add.l r17, r63, r5
addi.l r17, 8, r17
beq/u r39, r40, tr1
LOCAL(ia_r6_ld): /* Store r6 and load its address. */
movi 3, r38
shlli r38, 16, r39
and r0, r39, r40
andc r0, r39, r0
stx.q r17, r63, r6
add.l r17, r63, r6
addi.l r17, 8, r17
beq/u r39, r40, tr1
LOCAL(ia_r7_ld): /* Store r7 and load its address. */
movi 3 << 12, r39
and r0, r39, r40
andc r0, r39, r0
stx.q r17, r63, r7
add.l r17, r63, r7
addi.l r17, 8, r17
beq/u r39, r40, tr1
LOCAL(ia_r8_ld): /* Store r8 and load its address. */
movi 3 << 8, r39
and r0, r39, r40
andc r0, r39, r0
stx.q r17, r63, r8
add.l r17, r63, r8
addi.l r17, 8, r17
beq/u r39, r40, tr1
LOCAL(ia_r9_ld): /* Store r9 and load its address. */
stx.q r17, r63, r9
add.l r17, r63, r9
blink tr0, r63
LOCAL(ia_r2_push): /* Push r2 onto the stack. */
movi 1, r38
shlli r38, 29, r39
andc r0, r39, r0
stx.q r17, r63, r2
addi.l r17, 8, r17
blink tr1, r63
LOCAL(ia_r3_push): /* Push r3 onto the stack. */
movi 1, r38
shlli r38, 26, r39
andc r0, r39, r0
stx.q r17, r63, r3
addi.l r17, 8, r17
blink tr1, r63
LOCAL(ia_r4_push): /* Push r4 onto the stack. */
movi 1, r38
shlli r38, 23, r39
andc r0, r39, r0
stx.q r17, r63, r4
addi.l r17, 8, r17
blink tr1, r63
LOCAL(ia_r5_push): /* Push r5 onto the stack. */
movi 1, r38
shlli r38, 20, r39
andc r0, r39, r0
stx.q r17, r63, r5
addi.l r17, 8, r17
blink tr1, r63
LOCAL(ia_r6_push): /* Push r6 onto the stack. */
movi 1, r38
shlli r38, 16, r39
andc r0, r39, r0
stx.q r17, r63, r6
addi.l r17, 8, r17
blink tr1, r63
LOCAL(ia_r7_push): /* Push r7 onto the stack. */
movi 1 << 12, r39
andc r0, r39, r0
stx.q r17, r63, r7
addi.l r17, 8, r17
blink tr1, r63
LOCAL(ia_r8_push): /* Push r8 onto the stack. */
movi 1 << 8, r39
andc r0, r39, r0
stx.q r17, r63, r8
addi.l r17, 8, r17
blink tr1, r63
LOCAL(ia_push_seq): /* Push a sequence of registers onto the stack. */
andi r0, 7 << 1, r38
movi (LOCAL(ia_end_of_push_seq) >> 16) & 65535, r40
shlli r38, 2, r39
shori LOCAL(ia_end_of_push_seq) & 65535, r40
sub.l r40, r39, r41
ptabs/l r41, tr2
blink tr2, r63
LOCAL(ia_stack_of_push_seq): /* Beginning of push sequence. */
stx.q r17, r63, r3
addi.l r17, 8, r17
stx.q r17, r63, r4
addi.l r17, 8, r17
stx.q r17, r63, r5
addi.l r17, 8, r17
stx.q r17, r63, r6
addi.l r17, 8, r17
stx.q r17, r63, r7
addi.l r17, 8, r17
stx.q r17, r63, r8
addi.l r17, 8, r17
LOCAL(ia_r9_push): /* Push r9 onto the stack. */
stx.q r17, r63, r9
LOCAL(ia_return): /* Return. */
blink tr0, r63
LOCAL(ia_end_of_push_seq): /* Label used to compute the first push instruction. */
ENDFUNC(GLOBAL(GCC_shcompact_incoming_args))
#endif /* L_shcompact_incoming_args */
#endif
#if __SH5__
#ifdef L_nested_trampoline
#if __SH5__ == 32
.section .text..SHmedia32,"ax"
#else
.text
#endif
.align 3 /* It is copied in units of 8 bytes in SHmedia mode. */
.global GLOBAL(GCC_nested_trampoline)
HIDDEN_FUNC(GLOBAL(GCC_nested_trampoline))
GLOBAL(GCC_nested_trampoline):
.mode SHmedia
ptrel/u r63, tr0
gettr tr0, r0
#if __SH5__ == 64
ld.q r0, 24, r1
#else
ld.l r0, 24, r1
#endif
ptabs/l r1, tr1
#if __SH5__ == 64
ld.q r0, 32, r1
#else
ld.l r0, 28, r1
#endif
blink tr1, r63
ENDFUNC(GLOBAL(GCC_nested_trampoline))
#endif /* L_nested_trampoline */
#endif /* __SH5__ */
#if __SH5__ == 32
#ifdef L_push_pop_shmedia_regs
.section .text..SHmedia32,"ax"
.mode SHmedia
.align 2
#ifndef __SH4_NOFPU__
.global GLOBAL(GCC_push_shmedia_regs)
FUNC(GLOBAL(GCC_push_shmedia_regs))
GLOBAL(GCC_push_shmedia_regs):
addi.l r15, -14*8, r15
fst.d r15, 13*8, dr62
fst.d r15, 12*8, dr60
fst.d r15, 11*8, dr58
fst.d r15, 10*8, dr56
fst.d r15, 9*8, dr54
fst.d r15, 8*8, dr52
fst.d r15, 7*8, dr50
fst.d r15, 6*8, dr48
fst.d r15, 5*8, dr46
fst.d r15, 4*8, dr44
fst.d r15, 3*8, dr42
fst.d r15, 2*8, dr40
fst.d r15, 1*8, dr38
fst.d r15, 0*8, dr36
#else /* ! __SH4_NOFPU__ */
.global GLOBAL(GCC_push_shmedia_regs_nofpu)
FUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
GLOBAL(GCC_push_shmedia_regs_nofpu):
#endif /* ! __SH4_NOFPU__ */
ptabs/l r18, tr0
addi.l r15, -27*8, r15
gettr tr7, r62
gettr tr6, r61
gettr tr5, r60
st.q r15, 26*8, r62
st.q r15, 25*8, r61
st.q r15, 24*8, r60
st.q r15, 23*8, r59
st.q r15, 22*8, r58
st.q r15, 21*8, r57
st.q r15, 20*8, r56
st.q r15, 19*8, r55
st.q r15, 18*8, r54
st.q r15, 17*8, r53
st.q r15, 16*8, r52
st.q r15, 15*8, r51
st.q r15, 14*8, r50
st.q r15, 13*8, r49
st.q r15, 12*8, r48
st.q r15, 11*8, r47
st.q r15, 10*8, r46
st.q r15, 9*8, r45
st.q r15, 8*8, r44
st.q r15, 7*8, r35
st.q r15, 6*8, r34
st.q r15, 5*8, r33
st.q r15, 4*8, r32
st.q r15, 3*8, r31
st.q r15, 2*8, r30
st.q r15, 1*8, r29
st.q r15, 0*8, r28
blink tr0, r63
#ifndef __SH4_NOFPU__
ENDFUNC(GLOBAL(GCC_push_shmedia_regs))
#else
ENDFUNC(GLOBAL(GCC_push_shmedia_regs_nofpu))
#endif
#ifndef __SH4_NOFPU__
.global GLOBAL(GCC_pop_shmedia_regs)
FUNC(GLOBAL(GCC_pop_shmedia_regs))
GLOBAL(GCC_pop_shmedia_regs):
pt .L0, tr1
movi 41*8, r0
fld.d r15, 40*8, dr62
fld.d r15, 39*8, dr60
fld.d r15, 38*8, dr58
fld.d r15, 37*8, dr56
fld.d r15, 36*8, dr54
fld.d r15, 35*8, dr52
fld.d r15, 34*8, dr50
fld.d r15, 33*8, dr48
fld.d r15, 32*8, dr46
fld.d r15, 31*8, dr44
fld.d r15, 30*8, dr42
fld.d r15, 29*8, dr40
fld.d r15, 28*8, dr38
fld.d r15, 27*8, dr36
blink tr1, r63
#else /* ! __SH4_NOFPU__ */
.global GLOBAL(GCC_pop_shmedia_regs_nofpu)
FUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
GLOBAL(GCC_pop_shmedia_regs_nofpu):
#endif /* ! __SH4_NOFPU__ */
movi 27*8, r0
.L0:
ptabs r18, tr0
ld.q r15, 26*8, r62
ld.q r15, 25*8, r61
ld.q r15, 24*8, r60
ptabs r62, tr7
ptabs r61, tr6
ptabs r60, tr5
ld.q r15, 23*8, r59
ld.q r15, 22*8, r58
ld.q r15, 21*8, r57
ld.q r15, 20*8, r56
ld.q r15, 19*8, r55
ld.q r15, 18*8, r54
ld.q r15, 17*8, r53
ld.q r15, 16*8, r52
ld.q r15, 15*8, r51
ld.q r15, 14*8, r50
ld.q r15, 13*8, r49
ld.q r15, 12*8, r48
ld.q r15, 11*8, r47
ld.q r15, 10*8, r46
ld.q r15, 9*8, r45
ld.q r15, 8*8, r44
ld.q r15, 7*8, r35
ld.q r15, 6*8, r34
ld.q r15, 5*8, r33
ld.q r15, 4*8, r32
ld.q r15, 3*8, r31
ld.q r15, 2*8, r30
ld.q r15, 1*8, r29
ld.q r15, 0*8, r28
add.l r15, r0, r15
blink tr0, r63
#ifndef __SH4_NOFPU__
ENDFUNC(GLOBAL(GCC_pop_shmedia_regs))
#else
ENDFUNC(GLOBAL(GCC_pop_shmedia_regs_nofpu))
#endif
#endif /* __SH5__ == 32 */
#endif /* L_push_pop_shmedia_regs */
#ifdef L_div_table
#if __SH5__
#if defined(__pic__) && __SHMEDIA__
.global GLOBAL(sdivsi3)
FUNC(GLOBAL(sdivsi3))
#if __SH5__ == 32
.section .text..SHmedia32,"ax"
#else
.text
#endif
#if 0
/* ??? FIXME: Presumably due to a linker bug, exporting data symbols
in a text section does not work (at least for shared libraries):
the linker sets the LSB of the address as if this was SHmedia code. */
#define TEXT_DATA_BUG
#endif
.align 2
// inputs: r4,r5
// clobbered: r1,r18,r19,r20,r21,r25,tr0
// result in r0
.global GLOBAL(sdivsi3)
GLOBAL(sdivsi3):
#ifdef TEXT_DATA_BUG
ptb datalabel Local_div_table,tr0
#else
ptb GLOBAL(div_table_internal),tr0
#endif
nsb r5, r1
shlld r5, r1, r25 // normalize; [-2 ..1, 1..2) in s2.62
shari r25, 58, r21 // extract 5(6) bit index (s2.4 with hole -1..1)
/* bubble */
gettr tr0,r20
ldx.ub r20, r21, r19 // u0.8
shari r25, 32, r25 // normalize to s2.30
shlli r21, 1, r21
muls.l r25, r19, r19 // s2.38
ldx.w r20, r21, r21 // s2.14
ptabs r18, tr0
shari r19, 24, r19 // truncate to s2.14
sub r21, r19, r19 // some 11 bit inverse in s1.14
muls.l r19, r19, r21 // u0.28
sub r63, r1, r1
addi r1, 92, r1
muls.l r25, r21, r18 // s2.58
shlli r19, 45, r19 // multiply by two and convert to s2.58
/* bubble */
sub r19, r18, r18
shari r18, 28, r18 // some 22 bit inverse in s1.30
muls.l r18, r25, r0 // s2.60
muls.l r18, r4, r25 // s32.30
/* bubble */
shari r0, 16, r19 // s-16.44
muls.l r19, r18, r19 // s-16.74
shari r25, 63, r0
shari r4, 14, r18 // s19.-14
shari r19, 30, r19 // s-16.44
muls.l r19, r18, r19 // s15.30
xor r21, r0, r21 // You could also use the constant 1 << 27.
add r21, r25, r21
sub r21, r19, r21
shard r21, r1, r21
sub r21, r0, r0
blink tr0, r63
ENDFUNC(GLOBAL(sdivsi3))
/* This table has been generated by divtab.c .
Defects for bias -330:
Max defect: 6.081536e-07 at -1.000000e+00
Min defect: 2.849516e-08 at 1.030651e+00
Max 2nd step defect: 9.606539e-12 at -1.000000e+00
Min 2nd step defect: 0.000000e+00 at 0.000000e+00
Defect at 1: 1.238659e-07
Defect at -2: 1.061708e-07 */
#else /* ! __pic__ || ! __SHMEDIA__ */
.section .rodata
#endif /* __pic__ */
#if defined(TEXT_DATA_BUG) && defined(__pic__) && __SHMEDIA__
.balign 2
.type Local_div_table,@object
.size Local_div_table,128
/* negative division constants */
.word -16638
.word -17135
.word -17737
.word -18433
.word -19103
.word -19751
.word -20583
.word -21383
.word -22343
.word -23353
.word -24407
.word -25582
.word -26863
.word -28382
.word -29965
.word -31800
/* negative division factors */
.byte 66
.byte 70
.byte 75
.byte 81
.byte 87
.byte 93
.byte 101
.byte 109
.byte 119
.byte 130
.byte 142
.byte 156
.byte 172
.byte 192
.byte 214
.byte 241
.skip 16
Local_div_table:
.skip 16
/* positive division factors */
.byte 241
.byte 214
.byte 192
.byte 172
.byte 156
.byte 142
.byte 130
.byte 119
.byte 109
.byte 101
.byte 93
.byte 87
.byte 81
.byte 75
.byte 70
.byte 66
/* positive division constants */
.word 31801
.word 29966
.word 28383
.word 26864
.word 25583
.word 24408
.word 23354
.word 22344
.word 21384
.word 20584
.word 19752
.word 19104
.word 18434
.word 17738
.word 17136
.word 16639
.section .rodata
#endif /* TEXT_DATA_BUG */
.balign 2
.type GLOBAL(div_table),@object
.size GLOBAL(div_table),128
/* negative division constants */
.word -16638
.word -17135
.word -17737
.word -18433
.word -19103
.word -19751
.word -20583
.word -21383
.word -22343
.word -23353
.word -24407
.word -25582
.word -26863
.word -28382
.word -29965
.word -31800
/* negative division factors */
.byte 66
.byte 70
.byte 75
.byte 81
.byte 87
.byte 93
.byte 101
.byte 109
.byte 119
.byte 130
.byte 142
.byte 156
.byte 172
.byte 192
.byte 214
.byte 241
.skip 16
.global GLOBAL(div_table)
GLOBAL(div_table):
HIDDEN_ALIAS(div_table_internal,div_table)
.skip 16
/* positive division factors */
.byte 241
.byte 214
.byte 192
.byte 172
.byte 156
.byte 142
.byte 130
.byte 119
.byte 109
.byte 101
.byte 93
.byte 87
.byte 81
.byte 75
.byte 70
.byte 66
/* positive division constants */
.word 31801
.word 29966
.word 28383
.word 26864
.word 25583
.word 24408
.word 23354
.word 22344
.word 21384
.word 20584
.word 19752
.word 19104
.word 18434
.word 17738
.word 17136
.word 16639
#elif defined (__SH2A__) || defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__)
#if defined (__SH2A__) || defined (__SH3__) || defined (__SH3E__) || defined (__SH4__) || defined (__SH4_SINGLE__) || defined (__SH4_SINGLE_ONLY__) || defined (__SH4_NOFPU__)
/* This code uses shld, thus is not suitable for SH1 / SH2. */
/* Signed / unsigned division without use of FPU, optimized for SH4.
......@@ -3999,7 +2247,6 @@ LOCAL(div_table_inv):
#endif /* L_div_table */
#ifdef L_udiv_qrnnd_16
#if !__SHMEDIA__
HIDDEN_FUNC(GLOBAL(udiv_qrnnd_16))
/* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */
/* n1 < d, but n1 might be larger than d1. */
......@@ -4043,5 +2290,4 @@ GLOBAL(udiv_qrnnd_16):
0: rts
nop
ENDFUNC(GLOBAL(udiv_qrnnd_16))
#endif /* !__SHMEDIA__ */
#endif /* L_udiv_qrnnd_16 */
......@@ -33,14 +33,6 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#include <sys/ucontext.h>
#include "insn-constants.h"
# if defined (__SH5__)
#define SH_DWARF_FRAME_GP0 0
#define SH_DWARF_FRAME_FP0 77
#define SH_DWARF_FRAME_BT0 68
#define SH_DWARF_FRAME_PR_MEDIA 18
#define SH_DWARF_FRAME_SR 65
#define SH_DWARF_FRAME_FPSCR 76
#else
#define SH_DWARF_FRAME_GP0 0
#define SH_DWARF_FRAME_FP0 25
#define SH_DWARF_FRAME_XD0 87
......@@ -52,96 +44,6 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#define SH_DWARF_FRAME_SR 22
#define SH_DWARF_FRAME_FPUL 23
#define SH_DWARF_FRAME_FPSCR 24
#endif /* defined (__SH5__) */
#if defined (__SH5__)
#define MD_FALLBACK_FRAME_STATE_FOR shmedia_fallback_frame_state
static _Unwind_Reason_Code
shmedia_fallback_frame_state (struct _Unwind_Context *context,
_Unwind_FrameState *fs)
{
unsigned char *pc = context->ra;
struct sigcontext *sc;
long new_cfa;
int i, r;
/* movi 0x10,r9; shori 0x77,r9; trapa r9; nop (sigreturn) */
/* movi 0x10,r9; shori 0xad,r9; trapa r9; nop (rt_sigreturn) */
if ((*(unsigned long *) (pc-1) == 0xcc004090)
&& (*(unsigned long *) (pc+3) == 0xc801dc90)
&& (*(unsigned long *) (pc+7) == 0x6c91fff0)
&& (*(unsigned long *) (pc+11) == 0x6ff0fff0))
sc = context->cfa;
else if ((*(unsigned long *) (pc-1) == 0xcc004090)
&& (*(unsigned long *) (pc+3) == 0xc802b490)
&& (*(unsigned long *) (pc+7) == 0x6c91fff0)
&& (*(unsigned long *) (pc+11) == 0x6ff0fff0))
{
struct rt_sigframe {
siginfo_t *pinfo;
void *puc;
siginfo_t info;
struct ucontext uc;
} *rt_ = context->cfa;
/* The void * cast is necessary to avoid an aliasing warning.
The aliasing warning is correct, but should not be a problem
because it does not alias anything. */
sc = (struct sigcontext *) (void *) &rt_->uc.uc_mcontext;
}
else
return _URC_END_OF_STACK;
new_cfa = sc->sc_regs[15];
fs->regs.cfa_how = CFA_REG_OFFSET;
fs->regs.cfa_reg = 15;
fs->regs.cfa_offset = new_cfa - (long) context->cfa;
for (i = 0; i < 63; i++)
{
if (i == 15)
continue;
fs->regs.reg[i].how = REG_SAVED_OFFSET;
fs->regs.reg[i].loc.offset
= (long)&(sc->sc_regs[i]) - new_cfa;
}
fs->regs.reg[SH_DWARF_FRAME_SR].how = REG_SAVED_OFFSET;
fs->regs.reg[SH_DWARF_FRAME_SR].loc.offset
= (long)&(sc->sc_sr) - new_cfa;
r = SH_DWARF_FRAME_BT0;
for (i = 0; i < 8; i++)
{
fs->regs.reg[r+i].how = REG_SAVED_OFFSET;
fs->regs.reg[r+i].loc.offset
= (long)&(sc->sc_tregs[i]) - new_cfa;
}
r = SH_DWARF_FRAME_FP0;
for (i = 0; i < 32; i++)
{
fs->regs.reg[r+i].how = REG_SAVED_OFFSET;
fs->regs.reg[r+i].loc.offset
= (long)&(sc->sc_fpregs[i]) - new_cfa;
}
fs->regs.reg[SH_DWARF_FRAME_FPSCR].how = REG_SAVED_OFFSET;
fs->regs.reg[SH_DWARF_FRAME_FPSCR].loc.offset
= (long)&(sc->sc_fpscr) - new_cfa;
/* We use the slot for the zero register to save return address. */
fs->regs.reg[63].how = REG_SAVED_OFFSET;
fs->regs.reg[63].loc.offset
= (long)&(sc->sc_pc) - new_cfa;
fs->retaddr_column = 63;
fs->signal_frame = 1;
return _URC_NO_REASON;
}
#else /* defined (__SH5__) */
#define MD_FALLBACK_FRAME_STATE_FOR sh_fallback_frame_state
......@@ -250,6 +152,5 @@ sh_fallback_frame_state (struct _Unwind_Context *context,
fs->signal_frame = 1;
return _URC_NO_REASON;
}
#endif /* defined (__SH5__) */
#endif /* inhibit_libc */
LIB1ASMFUNCS = \
_sdivsi3 _sdivsi3_i4 _udivsi3 _udivsi3_i4 _set_fpscr \
_shcompact_call_trampoline _shcompact_return_trampoline \
_shcompact_incoming_args _ic_invalidate _nested_trampoline \
_push_pop_shmedia_regs \
_udivdi3 _divdi3 _umoddi3 _moddi3 _div_table
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment