Commit aadb5b43 by Oleg Endo

re PR target/54089 ([SH] Refactor shift patterns)

	PR target/54089
	* config/sh/sh.h (SH_DYNAMIC_SHIFT_COST): Set always to 1 if
	dynamic shifts are available.
	(SHIFT_COUNT_TRUNCATED): Always define to 0.  Correct comment.
	* config/sh/sh.c (ashl_lshr_seq, ext_ashl_lshr_seq): Add comments.
	* config/sh/predicates.md (shift_count_operand): Allow
	arith_reg_operand even if TARGET_DYNSHIFT is false.
	* config/sh/sh.md (ashlsi3, lshrsi3): Expand library call patterns
	if needed.
	(ashlsi3_d_call, lshrsi3_d_call): New insns.

	PR target/54089
	* config/sh/lib1funcs.S (ashlsi3): Reimplement as ashlsi3_r0.
	(lshrsi3): Reimplement as lshrsi3_r0.

	PR target/54089
	* gcc.target/sh/pr54089-3.c: New.

From-SVN: r191161
parent d5d66749
2012-09-10 Oleg Endo <olegendo@gcc.gnu.org>
PR target/54089
* config/sh/sh.h (SH_DYNAMIC_SHIFT_COST): Set always to 1 if
dynamic shifts are available.
(SHIFT_COUNT_TRUNCATED): Always define to 0. Correct comment.
* config/sh/sh.c (ashl_lshr_seq, ext_ashl_lshr_seq): Add comments.
* config/sh/predicates.md (shift_count_operand): Allow
arith_reg_operand even if TARGET_DYNSHIFT is false.
* config/sh/sh.md (ashlsi3, lshrsi3): Expand library call patterns
if needed.
(ashlsi3_d_call, lshrsi3_d_call): New insns.
2012-09-10 Richard Sandiford <rdsandiford@googlemail.com> 2012-09-10 Richard Sandiford <rdsandiford@googlemail.com>
* ira.c (setup_pressure_classes): Handle synonymous classes. * ira.c (setup_pressure_classes): Handle synonymous classes.
......
...@@ -791,9 +791,8 @@ ...@@ -791,9 +791,8 @@
/* Allow T_REG as shift count for dynamic shifts, although it is not /* Allow T_REG as shift count for dynamic shifts, although it is not
really possible. It will then be copied to a general purpose reg. */ really possible. It will then be copied to a general purpose reg. */
if (! TARGET_SHMEDIA) if (! TARGET_SHMEDIA)
return const_int_operand (op, mode) return const_int_operand (op, mode) || arith_reg_operand (op, mode)
|| (TARGET_DYNSHIFT && (arith_reg_operand (op, mode) || (TARGET_DYNSHIFT && t_reg_operand (op, mode));
|| t_reg_operand (op, mode)));
return (CONSTANT_P (op) return (CONSTANT_P (op)
? (CONST_INT_P (op) ? (CONST_INT_P (op)
......
...@@ -2871,35 +2871,35 @@ struct ashl_lshr_sequence ...@@ -2871,35 +2871,35 @@ struct ashl_lshr_sequence
static const struct ashl_lshr_sequence ashl_lshr_seq[32] = static const struct ashl_lshr_sequence ashl_lshr_seq[32] =
{ {
{ 0, { 0 }, 0 }, { 0, { 0 }, 0 }, // 0
{ 1, { 1 }, LSHR_CLOBBERS_T }, { 1, { 1 }, LSHR_CLOBBERS_T },
{ 1, { 2 }, 0 }, { 1, { 2 }, 0 },
{ 2, { 2, 1 }, LSHR_CLOBBERS_T }, { 2, { 2, 1 }, LSHR_CLOBBERS_T },
{ 2, { 2, 2 }, 0 }, { 2, { 2, 2 }, 0 }, // 4
{ 3, { 2, 1, 2 }, LSHR_CLOBBERS_T }, { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
{ 3, { 2, 2, 2 }, 0 }, { 3, { 2, 2, 2 }, 0 },
{ 4, { 2, 2, 1, 2 }, LSHR_CLOBBERS_T }, { 4, { 2, 2, 1, 2 }, LSHR_CLOBBERS_T },
{ 1, { 8 }, 0 }, { 1, { 8 }, 0 }, // 8
{ 2, { 8, 1 }, LSHR_CLOBBERS_T }, { 2, { 8, 1 }, LSHR_CLOBBERS_T },
{ 2, { 8, 2 }, 0 }, { 2, { 8, 2 }, 0 },
{ 3, { 8, 1, 2 }, LSHR_CLOBBERS_T }, { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
{ 3, { 8, 2, 2 }, 0 }, { 3, { 8, 2, 2 }, 0 }, // 12
{ 4, { 8, 2, 1, 2 }, LSHR_CLOBBERS_T }, { 4, { 8, 2, 1, 2 }, LSHR_CLOBBERS_T },
{ 3, { 8, -2, 8 }, 0 }, { 3, { 8, -2, 8 }, 0 },
{ 3, { 8, -1, 8 }, ASHL_CLOBBERS_T }, { 3, { 8, -1, 8 }, ASHL_CLOBBERS_T },
{ 1, { 16 }, 0 }, { 1, { 16 }, 0 }, // 16
{ 2, { 16, 1 }, LSHR_CLOBBERS_T }, { 2, { 16, 1 }, LSHR_CLOBBERS_T },
{ 2, { 16, 2 }, 0 }, { 2, { 16, 2 }, 0 },
{ 3, { 16, 1, 2 }, LSHR_CLOBBERS_T }, { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
{ 3, { 16, 2, 2 }, 0 }, { 3, { 16, 2, 2 }, 0 }, // 20
{ 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T }, { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
{ 3, { 16, -2, 8 }, 0 }, { 3, { 16, -2, 8 }, 0 },
{ 3, { 16, -1, 8 }, ASHL_CLOBBERS_T }, { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
{ 2, { 16, 8 }, 0 }, { 2, { 16, 8 }, 0 }, // 24
{ 3, { 16, 1, 8 }, LSHR_CLOBBERS_T }, { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
{ 3, { 16, 8, 2 }, 0 }, { 3, { 16, 8, 2 }, 0 },
{ 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T }, { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
{ 4, { 16, 8, 2, 2 }, 0 }, { 4, { 16, 8, 2, 2 }, 0 }, // 28
{ 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T }, { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
{ 3, { 16, -2, 16 }, 0 }, { 3, { 16, -2, 16 }, 0 },
...@@ -2915,35 +2915,35 @@ static const struct ashl_lshr_sequence ashl_lshr_seq[32] = ...@@ -2915,35 +2915,35 @@ static const struct ashl_lshr_sequence ashl_lshr_seq[32] =
kind of sign or zero extension. */ kind of sign or zero extension. */
static const struct ashl_lshr_sequence ext_ashl_lshr_seq[32] = static const struct ashl_lshr_sequence ext_ashl_lshr_seq[32] =
{ {
{ 0, { 0 }, 0 }, { 0, { 0 }, 0 }, // 0
{ 1, { 1 }, LSHR_CLOBBERS_T }, { 1, { 1 }, LSHR_CLOBBERS_T },
{ 1, { 2 }, 0 }, { 1, { 2 }, 0 },
{ 2, { 2, 1 }, LSHR_CLOBBERS_T }, { 2, { 2, 1 }, LSHR_CLOBBERS_T },
{ 2, { 2, 2 }, 0 }, { 2, { 2, 2 }, 0 }, // 4
{ 3, { 2, 1, 2 }, LSHR_CLOBBERS_T }, { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
{ 2, { 8, -2 }, 0 }, { 2, { 8, -2 }, 0 },
{ 2, { 8, -1 }, ASHL_CLOBBERS_T }, { 2, { 8, -1 }, ASHL_CLOBBERS_T },
{ 1, { 8 }, 0 }, { 1, { 8 }, 0 }, // 8
{ 2, { 8, 1 }, LSHR_CLOBBERS_T }, { 2, { 8, 1 }, LSHR_CLOBBERS_T },
{ 2, { 8, 2 }, 0 }, { 2, { 8, 2 }, 0 },
{ 3, { 8, 1, 2 }, LSHR_CLOBBERS_T }, { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
{ 3, { 8, 2, 2 }, 0 }, { 3, { 8, 2, 2 }, 0 }, // 12
{ 3, { 16, -2, -1 }, ASHL_CLOBBERS_T }, { 3, { 16, -2, -1 }, ASHL_CLOBBERS_T },
{ 2, { 16, -2 }, 0 }, { 2, { 16, -2 }, 0 },
{ 2, { 16, -1 }, ASHL_CLOBBERS_T }, { 2, { 16, -1 }, ASHL_CLOBBERS_T },
{ 1, { 16 }, 0 }, { 1, { 16 }, 0 }, // 16
{ 2, { 16, 1 }, LSHR_CLOBBERS_T }, { 2, { 16, 1 }, LSHR_CLOBBERS_T },
{ 2, { 16, 2 }, 0 }, { 2, { 16, 2 }, 0 },
{ 3, { 16, 1, 2 }, LSHR_CLOBBERS_T }, { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
{ 3, { 16, 2, 2 }, 0 }, { 3, { 16, 2, 2 }, 0 }, // 20
{ 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T }, { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
{ 3, { 16, -2, 8 }, 0 }, { 3, { 16, -2, 8 }, 0 },
{ 3, { 16, -1, 8 }, ASHL_CLOBBERS_T }, { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
{ 2, { 16, 8 }, 0 }, { 2, { 16, 8 }, 0 }, // 24
{ 3, { 16, 1, 8 }, LSHR_CLOBBERS_T }, { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
{ 3, { 16, 8, 2 }, 0 }, { 3, { 16, 8, 2 }, 0 },
{ 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T }, { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
{ 4, { 16, 8, 2, 2 }, 0 }, { 4, { 16, 8, 2, 2 }, 0 }, // 28
{ 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T }, { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
{ 3, { 16, -2, 16 }, 0 }, { 3, { 16, -2, 16 }, 0 },
{ 3, { 16, -1, 16 }, ASHL_CLOBBERS_T } { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
......
...@@ -1932,19 +1932,27 @@ struct sh_args { ...@@ -1932,19 +1932,27 @@ struct sh_args {
like shad and shld. */ like shad and shld. */
#define TARGET_DYNSHIFT (TARGET_SH3 || TARGET_SH2A) #define TARGET_DYNSHIFT (TARGET_SH3 || TARGET_SH2A)
#define SH_DYNAMIC_SHIFT_COST \ /* The cost of using the dynamic shift insns (shad, shld) are the same
(TARGET_HARD_SH4 ? 1 : TARGET_DYNSHIFT ? (optimize_size ? 1 : 2) : 20) if they are available. If they are not available a library function will
be emitted instead, which is more expensive. */
/* Immediate shift counts are truncated by the output routines (or was it #define SH_DYNAMIC_SHIFT_COST (TARGET_DYNSHIFT ? 1 : 20)
the assembler?). Shift counts in a register are truncated by SH. Note
that the native compiler puts too large (> 32) immediate shift counts /* Defining SHIFT_COUNT_TRUNCATED tells the combine pass that code like
into a register and shifts by the register, letting the SH decide what (X << (Y % 32)) for register X, Y is equivalent to (X << Y).
to do instead of doing that itself. */ This is not generally true when hardware dynamic shifts (shad, shld) are
/* ??? The library routines in lib1funcs.S truncate the shift count. used, because they check the sign bit _before_ the modulo op. The sign
However, the SH3 has hardware shifts that do not truncate exactly as gcc bit determines whether it is a left shift or a right shift:
expects - the sign bit is significant - so it appears that we need to if (Y < 0)
leave this zero for correct SH3 code. */ return X << (Y & 31);
#define SHIFT_COUNT_TRUNCATED (! TARGET_SH3 && ! TARGET_SH2A) else
return X >> (-Y) & 31);
The dynamic shift library routines in lib1funcs.S do not use the sign bit
like the hardware dynamic shifts and truncate the shift count to 31.
We define SHIFT_COUNT_TRUNCATED to 0 and express the implied shift count
truncation in the library function call patterns, as this gives slightly
more compact code. */
#define SHIFT_COUNT_TRUNCATED (0)
/* CANONICALIZE_COMPARISON macro for the combine pass. */ /* CANONICALIZE_COMPARISON macro for the combine pass. */
#define CANONICALIZE_COMPARISON(CODE, OP0, OP1) \ #define CANONICALIZE_COMPARISON(CODE, OP0, OP1) \
......
...@@ -4023,6 +4023,17 @@ label: ...@@ -4023,6 +4023,17 @@ label:
operands[2])); operands[2]));
DONE; DONE;
} }
/* Expand a library call for the dynamic shift. */
if (!CONST_INT_P (operands[2]) && !TARGET_DYNSHIFT)
{
emit_move_insn (gen_rtx_REG (SImode, R4_REG), operands[1]);
rtx funcaddr = gen_reg_rtx (Pmode);
function_symbol (funcaddr, "__ashlsi3_r0", SFUNC_STATIC);
emit_insn (gen_ashlsi3_d_call (operands[0], operands[2], funcaddr));
DONE;
}
}) })
(define_insn "ashlsi3_k" (define_insn "ashlsi3_k"
...@@ -4067,6 +4078,23 @@ label: ...@@ -4067,6 +4078,23 @@ label:
} }
[(set_attr "type" "dyn_shift")]) [(set_attr "type" "dyn_shift")])
;; If dynamic shifts are not available use a library function.
;; By specifying the pattern we reduce the number of call clobbered regs.
;; In order to make combine understand the truncation of the shift amount
;; operand we have to allow it to use pseudo regs for the shift operands.
(define_insn "ashlsi3_d_call"
[(set (match_operand:SI 0 "arith_reg_dest" "=z")
(ashift:SI (reg:SI R4_REG)
(and:SI (match_operand:SI 1 "arith_reg_operand" "z")
(const_int 31))))
(use (match_operand:SI 2 "arith_reg_operand" "r"))
(clobber (reg:SI T_REG))
(clobber (reg:SI PR_REG))]
"TARGET_SH1 && !TARGET_DYNSHIFT"
"jsr @%2%#"
[(set_attr "type" "sfunc")
(set_attr "needs_delay_slot" "yes")])
(define_insn_and_split "ashlsi3_n" (define_insn_and_split "ashlsi3_n"
[(set (match_operand:SI 0 "arith_reg_dest" "=r") [(set (match_operand:SI 0 "arith_reg_dest" "=r")
(ashift:SI (match_operand:SI 1 "arith_reg_operand" "0") (ashift:SI (match_operand:SI 1 "arith_reg_operand" "0")
...@@ -4512,6 +4540,16 @@ label: ...@@ -4512,6 +4540,16 @@ label:
operands[2])); operands[2]));
DONE; DONE;
} }
/* Expand a library call for the dynamic shift. */
if (!CONST_INT_P (operands[2]) && !TARGET_DYNSHIFT)
{
emit_move_insn (gen_rtx_REG (SImode, R4_REG), operands[1]);
rtx funcaddr = gen_reg_rtx (Pmode);
function_symbol (funcaddr, "__lshrsi3_r0", SFUNC_STATIC);
emit_insn (gen_lshrsi3_d_call (operands[0], operands[2], funcaddr));
DONE;
}
}) })
(define_insn "lshrsi3_k" (define_insn "lshrsi3_k"
...@@ -4556,6 +4594,23 @@ label: ...@@ -4556,6 +4594,23 @@ label:
} }
[(set_attr "type" "dyn_shift")]) [(set_attr "type" "dyn_shift")])
;; If dynamic shifts are not available use a library function.
;; By specifying the pattern we reduce the number of call clobbered regs.
;; In order to make combine understand the truncation of the shift amount
;; operand we have to allow it to use pseudo regs for the shift operands.
(define_insn "lshrsi3_d_call"
[(set (match_operand:SI 0 "arith_reg_dest" "=z")
(lshiftrt:SI (reg:SI R4_REG)
(and:SI (match_operand:SI 1 "arith_reg_operand" "z")
(const_int 31))))
(use (match_operand:SI 2 "arith_reg_operand" "r"))
(clobber (reg:SI T_REG))
(clobber (reg:SI PR_REG))]
"TARGET_SH1 && !TARGET_DYNSHIFT"
"jsr @%2%#"
[(set_attr "type" "sfunc")
(set_attr "needs_delay_slot" "yes")])
(define_insn_and_split "lshrsi3_n" (define_insn_and_split "lshrsi3_n"
[(set (match_operand:SI 0 "arith_reg_dest" "=r") [(set (match_operand:SI 0 "arith_reg_dest" "=r")
(lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0") (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
......
2012-09-10 Oleg Endo <olegendo@gcc.gnu.org>
PR target/54089
* gcc.target/sh/pr54089-3.c: New.
2012-09-10 Marc Glisse <marc.glisse@inria.fr> 2012-09-10 Marc Glisse <marc.glisse@inria.fr>
* gcc.dg/tree-ssa/forwprop-21.c: New testcase. * gcc.dg/tree-ssa/forwprop-21.c: New testcase.
......
/* The dynamic shift library functions truncate the shift count to 5 bits.
Verify that this is taken into account and no extra shift count
truncations are generated before the library call. */
/* { dg-do compile { target "sh*-*-*" } } */
/* { dg-options "-O1" } */
/* { dg-skip-if "" { "sh*-*-*" } { "*" } { "-m1*" "-m2" "-m2e*" } } */
/* { dg-final { scan-assembler-not "and" } } */
/* { dg-final { scan-assembler-not "31" } } */
int
test00 (unsigned int a, int* b, int c, int* d, unsigned int e)
{
int s = 0;
int i;
for (i = 0; i < c; ++i)
s += d[i] + b[i] + (e << (i & 31));
return s;
}
int
test01 (unsigned int a, int* b, int c, int* d, unsigned int e)
{
int s = 0;
int i;
for (i = 0; i < c; ++i)
s += d[i] + b[i] + (e >> (i & 31));
return s;
}
int
test03 (unsigned int a, unsigned int b)
{
return b << (a & 31);
}
unsigned int
test04 (unsigned int a, int b)
{
return a >> (b & 31);
}
2012-09-10 Oleg Endo <olegendo@gcc.gnu.org>
PR target/54089
* config/sh/lib1funcs.S (ashlsi3): Reimplement as ashlsi3_r0.
(lshrsi3): Reimplement as lshrsi3_r0.
2012-09-10 Andreas Schwab <schwab@linux-m68k.org> 2012-09-10 Andreas Schwab <schwab@linux-m68k.org>
PR target/46191 PR target/46191
......
/* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, /* Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
2004, 2005, 2006, 2009 2004, 2005, 2006, 2009, 2012
Free Software Foundation, Inc. Free Software Foundation, Inc.
This file is free software; you can redistribute it and/or modify it This file is free software; you can redistribute it and/or modify it
...@@ -241,7 +241,7 @@ GLOBAL(ashiftrt_r4_0): ...@@ -241,7 +241,7 @@ GLOBAL(ashiftrt_r4_0):
! Entry: ! Entry:
! !
! r4: Value to shift ! r4: Value to shift
! r5: Shifts ! r5: Shift count
! !
! Exit: ! Exit:
! !
...@@ -249,7 +249,7 @@ GLOBAL(ashiftrt_r4_0): ...@@ -249,7 +249,7 @@ GLOBAL(ashiftrt_r4_0):
! !
! Destroys: ! Destroys:
! !
! (none) ! T bit, r5
! !
.global GLOBAL(ashrsi3) .global GLOBAL(ashrsi3)
...@@ -388,318 +388,353 @@ LOCAL(ashrsi3_0): ...@@ -388,318 +388,353 @@ LOCAL(ashrsi3_0):
! !
! GLOBAL(ashlsi3) ! GLOBAL(ashlsi3)
! (For compatibility with older binaries, not used by compiler)
! !
! Entry: ! Entry:
!
! r4: Value to shift ! r4: Value to shift
! r5: Shifts ! r5: Shift count
! !
! Exit: ! Exit:
!
! r0: Result ! r0: Result
! !
! Destroys: ! Destroys:
! T bit
!
! !
! (none) ! GLOBAL(ashlsi3_r0)
!
! Entry:
! r4: Value to shift
! r0: Shift count
! !
! Exit:
! r0: Result
!
! Destroys:
! T bit
.global GLOBAL(ashlsi3) .global GLOBAL(ashlsi3)
.global GLOBAL(ashlsi3_r0)
HIDDEN_FUNC(GLOBAL(ashlsi3)) HIDDEN_FUNC(GLOBAL(ashlsi3))
.align 2 HIDDEN_FUNC(GLOBAL(ashlsi3_r0))
GLOBAL(ashlsi3): GLOBAL(ashlsi3):
mov #31,r0 mov r5,r0
and r0,r5 .align 2
mova LOCAL(ashlsi3_table),r0 GLOBAL(ashlsi3_r0):
mov.b @(r0,r5),r5
#ifdef __sh1__ #ifdef __sh1__
add r5,r0 and #31,r0
shll2 r0
mov.l r4,@-r15
mov r0,r4
mova LOCAL(ashlsi3_table),r0
add r4,r0
mov.l @r15+,r4
jmp @r0 jmp @r0
mov r4,r0
.align 2
#else #else
braf r5 and #31,r0
#endif shll2 r0
braf r0
mov r4,r0 mov r4,r0
#endif
.align 2
LOCAL(ashlsi3_table): LOCAL(ashlsi3_table):
.byte LOCAL(ashlsi3_0)-LOCAL(ashlsi3_table) rts // << 0
.byte LOCAL(ashlsi3_1)-LOCAL(ashlsi3_table) nop
.byte LOCAL(ashlsi3_2)-LOCAL(ashlsi3_table) LOCAL(ashlsi_1):
.byte LOCAL(ashlsi3_3)-LOCAL(ashlsi3_table) rts // << 1
.byte LOCAL(ashlsi3_4)-LOCAL(ashlsi3_table) shll r0
.byte LOCAL(ashlsi3_5)-LOCAL(ashlsi3_table) LOCAL(ashlsi_2): // << 2
.byte LOCAL(ashlsi3_6)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_7)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_8)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_9)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_10)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_11)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_12)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_13)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_14)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_15)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_16)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_17)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_18)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_19)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_20)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_21)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_22)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_23)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_24)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_25)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_26)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_27)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_28)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_29)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_30)-LOCAL(ashlsi3_table)
.byte LOCAL(ashlsi3_31)-LOCAL(ashlsi3_table)
LOCAL(ashlsi3_6):
shll2 r0
LOCAL(ashlsi3_4):
shll2 r0
LOCAL(ashlsi3_2):
rts rts
shll2 r0 shll2 r0
bra LOCAL(ashlsi_1) // << 3
LOCAL(ashlsi3_7):
shll2 r0
LOCAL(ashlsi3_5):
shll2 r0 shll2 r0
LOCAL(ashlsi3_3): bra LOCAL(ashlsi_2) // << 4
shll2 r0 shll2 r0
LOCAL(ashlsi3_1): bra LOCAL(ashlsi_5) // << 5
rts
shll r0 shll r0
bra LOCAL(ashlsi_6) // << 6
LOCAL(ashlsi3_14):
shll2 r0
LOCAL(ashlsi3_12):
shll2 r0 shll2 r0
LOCAL(ashlsi3_10): bra LOCAL(ashlsi_7) // << 7
shll2 r0 shll r0
LOCAL(ashlsi3_8): LOCAL(ashlsi_8): // << 8
rts rts
shll8 r0 shll8 r0
bra LOCAL(ashlsi_8) // << 9
LOCAL(ashlsi3_15): shll r0
shll2 r0 bra LOCAL(ashlsi_8) // << 10
LOCAL(ashlsi3_13):
shll2 r0 shll2 r0
LOCAL(ashlsi3_11): bra LOCAL(ashlsi_11) // << 11
shll r0
bra LOCAL(ashlsi_12) // << 12
shll2 r0 shll2 r0
LOCAL(ashlsi3_9): bra LOCAL(ashlsi_13) // << 13
shll r0
bra LOCAL(ashlsi_14) // << 14
shll8 r0
bra LOCAL(ashlsi_15) // << 15
shll8 r0 shll8 r0
LOCAL(ashlsi_16): // << 16
rts rts
shll16 r0
bra LOCAL(ashlsi_16) // << 17
shll r0 shll r0
bra LOCAL(ashlsi_16) // << 18
LOCAL(ashlsi3_22):
shll2 r0
LOCAL(ashlsi3_20):
shll2 r0 shll2 r0
LOCAL(ashlsi3_18): bra LOCAL(ashlsi_19) // << 19
shll r0
bra LOCAL(ashlsi_20) // << 20
shll2 r0 shll2 r0
LOCAL(ashlsi3_16): bra LOCAL(ashlsi_21) // << 21
rts shll r0
bra LOCAL(ashlsi_22) // << 22
shll16 r0 shll16 r0
bra LOCAL(ashlsi_23) // << 23
LOCAL(ashlsi3_23): shll16 r0
shll2 r0 bra LOCAL(ashlsi_16) // << 24
LOCAL(ashlsi3_21): shll8 r0
bra LOCAL(ashlsi_25) // << 25
shll r0
bra LOCAL(ashlsi_26) // << 26
shll2 r0 shll2 r0
LOCAL(ashlsi3_19): bra LOCAL(ashlsi_27) // << 27
shll r0
bra LOCAL(ashlsi_28) // << 28
shll2 r0 shll2 r0
LOCAL(ashlsi3_17): bra LOCAL(ashlsi_29) // << 29
shll16 r0
bra LOCAL(ashlsi_30) // << 30
shll16 r0 shll16 r0
and #1,r0 // << 31
rts rts
shll r0 rotr r0
LOCAL(ashlsi3_30): LOCAL(ashlsi_7):
shll2 r0
LOCAL(ashlsi3_28):
shll2 r0 shll2 r0
LOCAL(ashlsi3_26): LOCAL(ashlsi_5):
LOCAL(ashlsi_6):
shll2 r0 shll2 r0
LOCAL(ashlsi3_24):
shll16 r0
rts rts
shll8 r0 LOCAL(ashlsi_13):
LOCAL(ashlsi3_31):
shll2 r0 shll2 r0
LOCAL(ashlsi3_29): LOCAL(ashlsi_12):
LOCAL(ashlsi_11):
shll8 r0
rts
LOCAL(ashlsi_21):
shll2 r0 shll2 r0
LOCAL(ashlsi3_27): LOCAL(ashlsi_20):
LOCAL(ashlsi_19):
shll16 r0
rts
LOCAL(ashlsi_28):
LOCAL(ashlsi_27):
shll2 r0 shll2 r0
LOCAL(ashlsi3_25): LOCAL(ashlsi_26):
LOCAL(ashlsi_25):
shll16 r0 shll16 r0
rts
shll8 r0 shll8 r0
LOCAL(ashlsi_22):
LOCAL(ashlsi_14):
shlr2 r0
rts rts
shll r0 shll8 r0
LOCAL(ashlsi3_0): LOCAL(ashlsi_23):
LOCAL(ashlsi_15):
shlr r0
rts rts
nop shll8 r0
LOCAL(ashlsi_29):
shlr r0
LOCAL(ashlsi_30):
shlr2 r0
rts
shll16 r0
ENDFUNC(GLOBAL(ashlsi3)) ENDFUNC(GLOBAL(ashlsi3))
ENDFUNC(GLOBAL(ashlsi3_r0))
#endif #endif
#ifdef L_lshiftrt #ifdef L_lshiftrt
! !
! GLOBAL(lshrsi3) ! GLOBAL(lshrsi3)
! (For compatibility with older binaries, not used by compiler)
! !
! Entry: ! Entry:
!
! r4: Value to shift ! r4: Value to shift
! r5: Shifts ! r5: Shift count
! !
! Exit: ! Exit:
!
! r0: Result ! r0: Result
! !
! Destroys: ! Destroys:
! T bit
!
!
! GLOBAL(lshrsi3_r0)
! !
! (none) ! Entry:
! r4: Value to shift
! r0: Shift count
! !
! Exit:
! r0: Result
!
! Destroys:
! T bit
.global GLOBAL(lshrsi3) .global GLOBAL(lshrsi3)
.global GLOBAL(lshrsi3_r0)
HIDDEN_FUNC(GLOBAL(lshrsi3)) HIDDEN_FUNC(GLOBAL(lshrsi3))
.align 2 HIDDEN_FUNC(GLOBAL(lshrsi3_r0))
GLOBAL(lshrsi3): GLOBAL(lshrsi3):
mov #31,r0 mov r5,r0
and r0,r5 .align 2
mova LOCAL(lshrsi3_table),r0 GLOBAL(lshrsi3_r0):
mov.b @(r0,r5),r5
#ifdef __sh1__ #ifdef __sh1__
add r5,r0 and #31,r0
shll2 r0
mov.l r4,@-r15
mov r0,r4
mova LOCAL(lshrsi3_table),r0
add r4,r0
mov.l @r15+,r4
jmp @r0 jmp @r0
#else
braf r5
#endif
mov r4,r0 mov r4,r0
.align 2 .align 2
#else
and #31,r0
shll2 r0
braf r0
mov r4,r0
#endif
LOCAL(lshrsi3_table): LOCAL(lshrsi3_table):
.byte LOCAL(lshrsi3_0)-LOCAL(lshrsi3_table) rts // >> 0
.byte LOCAL(lshrsi3_1)-LOCAL(lshrsi3_table) nop
.byte LOCAL(lshrsi3_2)-LOCAL(lshrsi3_table) LOCAL(lshrsi_1): // >> 1
.byte LOCAL(lshrsi3_3)-LOCAL(lshrsi3_table) rts
.byte LOCAL(lshrsi3_4)-LOCAL(lshrsi3_table) shlr r0
.byte LOCAL(lshrsi3_5)-LOCAL(lshrsi3_table) LOCAL(lshrsi_2): // >> 2
.byte LOCAL(lshrsi3_6)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_7)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_8)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_9)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_10)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_11)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_12)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_13)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_14)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_15)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_16)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_17)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_18)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_19)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_20)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_21)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_22)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_23)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_24)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_25)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_26)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_27)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_28)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_29)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_30)-LOCAL(lshrsi3_table)
.byte LOCAL(lshrsi3_31)-LOCAL(lshrsi3_table)
LOCAL(lshrsi3_6):
shlr2 r0
LOCAL(lshrsi3_4):
shlr2 r0
LOCAL(lshrsi3_2):
rts rts
shlr2 r0 shlr2 r0
bra LOCAL(lshrsi_1) // >> 3
LOCAL(lshrsi3_7):
shlr2 r0
LOCAL(lshrsi3_5):
shlr2 r0 shlr2 r0
LOCAL(lshrsi3_3): bra LOCAL(lshrsi_2) // >> 4
shlr2 r0 shlr2 r0
LOCAL(lshrsi3_1): bra LOCAL(lshrsi_5) // >> 5
rts
shlr r0 shlr r0
bra LOCAL(lshrsi_6) // >> 6
LOCAL(lshrsi3_14):
shlr2 r0
LOCAL(lshrsi3_12):
shlr2 r0
LOCAL(lshrsi3_10):
shlr2 r0 shlr2 r0
LOCAL(lshrsi3_8): bra LOCAL(lshrsi_7) // >> 7
shlr r0
LOCAL(lshrsi_8): // >> 8
rts rts
shlr8 r0 shlr8 r0
bra LOCAL(lshrsi_8) // >> 9
LOCAL(lshrsi3_15): shlr r0
shlr2 r0 bra LOCAL(lshrsi_8) // >> 10
LOCAL(lshrsi3_13):
shlr2 r0 shlr2 r0
LOCAL(lshrsi3_11): bra LOCAL(lshrsi_11) // >> 11
shlr r0
bra LOCAL(lshrsi_12) // >> 12
shlr2 r0 shlr2 r0
LOCAL(lshrsi3_9): bra LOCAL(lshrsi_13) // >> 13
shlr r0
bra LOCAL(lshrsi_14) // >> 14
shlr8 r0 shlr8 r0
bra LOCAL(lshrsi_15) // >> 15
shlr8 r0
LOCAL(lshrsi_16): // >> 16
rts rts
shlr16 r0
bra LOCAL(lshrsi_16) // >> 17
shlr r0 shlr r0
bra LOCAL(lshrsi_16) // >> 18
LOCAL(lshrsi3_22):
shlr2 r0 shlr2 r0
LOCAL(lshrsi3_20): bra LOCAL(lshrsi_19) // >> 19
shlr2 r0 shlr r0
LOCAL(lshrsi3_18): bra LOCAL(lshrsi_20) // >> 20
shlr2 r0 shlr2 r0
LOCAL(lshrsi3_16): bra LOCAL(lshrsi_21) // >> 21
rts shlr r0
bra LOCAL(lshrsi_22) // >> 22
shlr16 r0 shlr16 r0
bra LOCAL(lshrsi_23) // >> 23
LOCAL(lshrsi3_23): shlr16 r0
shlr2 r0 bra LOCAL(lshrsi_16) // >> 24
LOCAL(lshrsi3_21): shlr8 r0
bra LOCAL(lshrsi_25) // >> 25
shlr r0
bra LOCAL(lshrsi_26) // >> 26
shlr2 r0 shlr2 r0
LOCAL(lshrsi3_19): bra LOCAL(lshrsi_27) // >> 27
shlr r0
bra LOCAL(lshrsi_28) // >> 28
shlr2 r0 shlr2 r0
LOCAL(lshrsi3_17): bra LOCAL(lshrsi_29) // >> 29
shlr16 r0 shlr16 r0
bra LOCAL(lshrsi_30) // >> 30
shlr16 r0
shll r0 // >> 31
rts rts
shlr r0 movt r0
LOCAL(lshrsi3_30): LOCAL(lshrsi_7):
shlr2 r0 shlr2 r0
LOCAL(lshrsi3_28): LOCAL(lshrsi_5):
LOCAL(lshrsi_6):
shlr2 r0 shlr2 r0
LOCAL(lshrsi3_26):
shlr2 r0
LOCAL(lshrsi3_24):
shlr16 r0
rts rts
shlr8 r0 LOCAL(lshrsi_13):
LOCAL(lshrsi3_31):
shlr2 r0 shlr2 r0
LOCAL(lshrsi3_29): LOCAL(lshrsi_12):
LOCAL(lshrsi_11):
shlr8 r0
rts
LOCAL(lshrsi_21):
shlr2 r0 shlr2 r0
LOCAL(lshrsi3_27): LOCAL(lshrsi_20):
LOCAL(lshrsi_19):
shlr16 r0
rts
LOCAL(lshrsi_28):
LOCAL(lshrsi_27):
shlr2 r0 shlr2 r0
LOCAL(lshrsi3_25): LOCAL(lshrsi_26):
LOCAL(lshrsi_25):
shlr16 r0 shlr16 r0
rts
shlr8 r0 shlr8 r0
LOCAL(lshrsi_22):
LOCAL(lshrsi_14):
shll2 r0
rts rts
shlr r0 shlr8 r0
LOCAL(lshrsi3_0): LOCAL(lshrsi_23):
LOCAL(lshrsi_15):
shll r0
rts rts
nop shlr8 r0
LOCAL(lshrsi_29):
shll r0
LOCAL(lshrsi_30):
shll2 r0
rts
shlr16 r0
ENDFUNC(GLOBAL(lshrsi3)) ENDFUNC(GLOBAL(lshrsi3))
ENDFUNC(GLOBAL(lshrsi3_r0))
#endif #endif
#ifdef L_movmem #ifdef L_movmem
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment