Commit aadb5b43 by Oleg Endo

re PR target/54089 ([SH] Refactor shift patterns)

	PR target/54089
	* config/sh/sh.h (SH_DYNAMIC_SHIFT_COST): Set always to 1 if
	dynamic shifts are available.
	(SHIFT_COUNT_TRUNCATED): Always define to 0.  Correct comment.
	* config/sh/sh.c (ashl_lshr_seq, ext_ashl_lshr_seq): Add comments.
	* config/sh/predicates.md (shift_count_operand): Allow
	arith_reg_operand even if TARGET_DYNSHIFT is false.
	* config/sh/sh.md (ashlsi3, lshrsi3): Expand library call patterns
	if needed.
	(ashlsi3_d_call, lshrsi3_d_call): New insns.

	PR target/54089
	* config/sh/lib1funcs.S (ashlsi3): Reimplement as ashlsi3_r0.
	(lshrsi3): Reimplement as lshrsi3_r0.

	PR target/54089
	* gcc.target/sh/pr54089-3.c: New.

From-SVN: r191161
parent d5d66749
2012-09-10 Oleg Endo <olegendo@gcc.gnu.org>
PR target/54089
* config/sh/sh.h (SH_DYNAMIC_SHIFT_COST): Set always to 1 if
dynamic shifts are available.
(SHIFT_COUNT_TRUNCATED): Always define to 0. Correct comment.
* config/sh/sh.c (ashl_lshr_seq, ext_ashl_lshr_seq): Add comments.
* config/sh/predicates.md (shift_count_operand): Allow
arith_reg_operand even if TARGET_DYNSHIFT is false.
* config/sh/sh.md (ashlsi3, lshrsi3): Expand library call patterns
if needed.
(ashlsi3_d_call, lshrsi3_d_call): New insns.
2012-09-10 Richard Sandiford <rdsandiford@googlemail.com> 2012-09-10 Richard Sandiford <rdsandiford@googlemail.com>
* ira.c (setup_pressure_classes): Handle synonymous classes. * ira.c (setup_pressure_classes): Handle synonymous classes.
2012-09-10 Marc Glisse <marc.glisse@inria.fr> 2012-09-10 Marc Glisse <marc.glisse@inria.fr>
* tree-ssa-forwprop.c (simplify_bitfield_ref): New function. * tree-ssa-forwprop.c (simplify_bitfield_ref): New function.
(ssa_forward_propagate_and_combine): Call it. (ssa_forward_propagate_and_combine): Call it.
2012-09-10 Steve Ellcey <sellcey@mips.com> 2012-09-10 Steve Ellcey <sellcey@mips.com>
......
...@@ -791,9 +791,8 @@ ...@@ -791,9 +791,8 @@
/* Allow T_REG as shift count for dynamic shifts, although it is not /* Allow T_REG as shift count for dynamic shifts, although it is not
really possible. It will then be copied to a general purpose reg. */ really possible. It will then be copied to a general purpose reg. */
if (! TARGET_SHMEDIA) if (! TARGET_SHMEDIA)
return const_int_operand (op, mode) return const_int_operand (op, mode) || arith_reg_operand (op, mode)
|| (TARGET_DYNSHIFT && (arith_reg_operand (op, mode) || (TARGET_DYNSHIFT && t_reg_operand (op, mode));
|| t_reg_operand (op, mode)));
return (CONSTANT_P (op) return (CONSTANT_P (op)
? (CONST_INT_P (op) ? (CONST_INT_P (op)
......
...@@ -2871,35 +2871,35 @@ struct ashl_lshr_sequence ...@@ -2871,35 +2871,35 @@ struct ashl_lshr_sequence
static const struct ashl_lshr_sequence ashl_lshr_seq[32] = static const struct ashl_lshr_sequence ashl_lshr_seq[32] =
{ {
{ 0, { 0 }, 0 }, { 0, { 0 }, 0 }, // 0
{ 1, { 1 }, LSHR_CLOBBERS_T }, { 1, { 1 }, LSHR_CLOBBERS_T },
{ 1, { 2 }, 0 }, { 1, { 2 }, 0 },
{ 2, { 2, 1 }, LSHR_CLOBBERS_T }, { 2, { 2, 1 }, LSHR_CLOBBERS_T },
{ 2, { 2, 2 }, 0 }, { 2, { 2, 2 }, 0 }, // 4
{ 3, { 2, 1, 2 }, LSHR_CLOBBERS_T }, { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
{ 3, { 2, 2, 2 }, 0 }, { 3, { 2, 2, 2 }, 0 },
{ 4, { 2, 2, 1, 2 }, LSHR_CLOBBERS_T }, { 4, { 2, 2, 1, 2 }, LSHR_CLOBBERS_T },
{ 1, { 8 }, 0 }, { 1, { 8 }, 0 }, // 8
{ 2, { 8, 1 }, LSHR_CLOBBERS_T }, { 2, { 8, 1 }, LSHR_CLOBBERS_T },
{ 2, { 8, 2 }, 0 }, { 2, { 8, 2 }, 0 },
{ 3, { 8, 1, 2 }, LSHR_CLOBBERS_T }, { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
{ 3, { 8, 2, 2 }, 0 }, { 3, { 8, 2, 2 }, 0 }, // 12
{ 4, { 8, 2, 1, 2 }, LSHR_CLOBBERS_T }, { 4, { 8, 2, 1, 2 }, LSHR_CLOBBERS_T },
{ 3, { 8, -2, 8 }, 0 }, { 3, { 8, -2, 8 }, 0 },
{ 3, { 8, -1, 8 }, ASHL_CLOBBERS_T }, { 3, { 8, -1, 8 }, ASHL_CLOBBERS_T },
{ 1, { 16 }, 0 }, { 1, { 16 }, 0 }, // 16
{ 2, { 16, 1 }, LSHR_CLOBBERS_T }, { 2, { 16, 1 }, LSHR_CLOBBERS_T },
{ 2, { 16, 2 }, 0 }, { 2, { 16, 2 }, 0 },
{ 3, { 16, 1, 2 }, LSHR_CLOBBERS_T }, { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
{ 3, { 16, 2, 2 }, 0 }, { 3, { 16, 2, 2 }, 0 }, // 20
{ 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T }, { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
{ 3, { 16, -2, 8 }, 0 }, { 3, { 16, -2, 8 }, 0 },
{ 3, { 16, -1, 8 }, ASHL_CLOBBERS_T }, { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
{ 2, { 16, 8 }, 0 }, { 2, { 16, 8 }, 0 }, // 24
{ 3, { 16, 1, 8 }, LSHR_CLOBBERS_T }, { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
{ 3, { 16, 8, 2 }, 0 }, { 3, { 16, 8, 2 }, 0 },
{ 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T }, { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
{ 4, { 16, 8, 2, 2 }, 0 }, { 4, { 16, 8, 2, 2 }, 0 }, // 28
{ 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T }, { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
{ 3, { 16, -2, 16 }, 0 }, { 3, { 16, -2, 16 }, 0 },
...@@ -2915,35 +2915,35 @@ static const struct ashl_lshr_sequence ashl_lshr_seq[32] = ...@@ -2915,35 +2915,35 @@ static const struct ashl_lshr_sequence ashl_lshr_seq[32] =
kind of sign or zero extension. */ kind of sign or zero extension. */
static const struct ashl_lshr_sequence ext_ashl_lshr_seq[32] = static const struct ashl_lshr_sequence ext_ashl_lshr_seq[32] =
{ {
{ 0, { 0 }, 0 }, { 0, { 0 }, 0 }, // 0
{ 1, { 1 }, LSHR_CLOBBERS_T }, { 1, { 1 }, LSHR_CLOBBERS_T },
{ 1, { 2 }, 0 }, { 1, { 2 }, 0 },
{ 2, { 2, 1 }, LSHR_CLOBBERS_T }, { 2, { 2, 1 }, LSHR_CLOBBERS_T },
{ 2, { 2, 2 }, 0 }, { 2, { 2, 2 }, 0 }, // 4
{ 3, { 2, 1, 2 }, LSHR_CLOBBERS_T }, { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
{ 2, { 8, -2 }, 0 }, { 2, { 8, -2 }, 0 },
{ 2, { 8, -1 }, ASHL_CLOBBERS_T }, { 2, { 8, -1 }, ASHL_CLOBBERS_T },
{ 1, { 8 }, 0 }, { 1, { 8 }, 0 }, // 8
{ 2, { 8, 1 }, LSHR_CLOBBERS_T }, { 2, { 8, 1 }, LSHR_CLOBBERS_T },
{ 2, { 8, 2 }, 0 }, { 2, { 8, 2 }, 0 },
{ 3, { 8, 1, 2 }, LSHR_CLOBBERS_T }, { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
{ 3, { 8, 2, 2 }, 0 }, { 3, { 8, 2, 2 }, 0 }, // 12
{ 3, { 16, -2, -1 }, ASHL_CLOBBERS_T }, { 3, { 16, -2, -1 }, ASHL_CLOBBERS_T },
{ 2, { 16, -2 }, 0 }, { 2, { 16, -2 }, 0 },
{ 2, { 16, -1 }, ASHL_CLOBBERS_T }, { 2, { 16, -1 }, ASHL_CLOBBERS_T },
{ 1, { 16 }, 0 }, { 1, { 16 }, 0 }, // 16
{ 2, { 16, 1 }, LSHR_CLOBBERS_T }, { 2, { 16, 1 }, LSHR_CLOBBERS_T },
{ 2, { 16, 2 }, 0 }, { 2, { 16, 2 }, 0 },
{ 3, { 16, 1, 2 }, LSHR_CLOBBERS_T }, { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
{ 3, { 16, 2, 2 }, 0 }, { 3, { 16, 2, 2 }, 0 }, // 20
{ 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T }, { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
{ 3, { 16, -2, 8 }, 0 }, { 3, { 16, -2, 8 }, 0 },
{ 3, { 16, -1, 8 }, ASHL_CLOBBERS_T }, { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
{ 2, { 16, 8 }, 0 }, { 2, { 16, 8 }, 0 }, // 24
{ 3, { 16, 1, 8 }, LSHR_CLOBBERS_T }, { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
{ 3, { 16, 8, 2 }, 0 }, { 3, { 16, 8, 2 }, 0 },
{ 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T }, { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
{ 4, { 16, 8, 2, 2 }, 0 }, { 4, { 16, 8, 2, 2 }, 0 }, // 28
{ 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T }, { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
{ 3, { 16, -2, 16 }, 0 }, { 3, { 16, -2, 16 }, 0 },
{ 3, { 16, -1, 16 }, ASHL_CLOBBERS_T } { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
......
...@@ -1932,19 +1932,27 @@ struct sh_args { ...@@ -1932,19 +1932,27 @@ struct sh_args {
like shad and shld. */ like shad and shld. */
#define TARGET_DYNSHIFT (TARGET_SH3 || TARGET_SH2A) #define TARGET_DYNSHIFT (TARGET_SH3 || TARGET_SH2A)
#define SH_DYNAMIC_SHIFT_COST \ /* The cost of using the dynamic shift insns (shad, shld) are the same
(TARGET_HARD_SH4 ? 1 : TARGET_DYNSHIFT ? (optimize_size ? 1 : 2) : 20) if they are available. If they are not available a library function will
be emitted instead, which is more expensive. */
/* Immediate shift counts are truncated by the output routines (or was it #define SH_DYNAMIC_SHIFT_COST (TARGET_DYNSHIFT ? 1 : 20)
the assembler?). Shift counts in a register are truncated by SH. Note
that the native compiler puts too large (> 32) immediate shift counts /* Defining SHIFT_COUNT_TRUNCATED tells the combine pass that code like
into a register and shifts by the register, letting the SH decide what (X << (Y % 32)) for register X, Y is equivalent to (X << Y).
to do instead of doing that itself. */ This is not generally true when hardware dynamic shifts (shad, shld) are
/* ??? The library routines in lib1funcs.S truncate the shift count. used, because they check the sign bit _before_ the modulo op. The sign
However, the SH3 has hardware shifts that do not truncate exactly as gcc bit determines whether it is a left shift or a right shift:
expects - the sign bit is significant - so it appears that we need to if (Y < 0)
leave this zero for correct SH3 code. */ return X << (Y & 31);
#define SHIFT_COUNT_TRUNCATED (! TARGET_SH3 && ! TARGET_SH2A) else
return X >> (-Y) & 31);
The dynamic shift library routines in lib1funcs.S do not use the sign bit
like the hardware dynamic shifts and truncate the shift count to 31.
We define SHIFT_COUNT_TRUNCATED to 0 and express the implied shift count
truncation in the library function call patterns, as this gives slightly
more compact code. */
#define SHIFT_COUNT_TRUNCATED (0)
/* CANONICALIZE_COMPARISON macro for the combine pass. */ /* CANONICALIZE_COMPARISON macro for the combine pass. */
#define CANONICALIZE_COMPARISON(CODE, OP0, OP1) \ #define CANONICALIZE_COMPARISON(CODE, OP0, OP1) \
......
...@@ -4023,6 +4023,17 @@ label: ...@@ -4023,6 +4023,17 @@ label:
operands[2])); operands[2]));
DONE; DONE;
} }
/* Expand a library call for the dynamic shift. */
if (!CONST_INT_P (operands[2]) && !TARGET_DYNSHIFT)
{
emit_move_insn (gen_rtx_REG (SImode, R4_REG), operands[1]);
rtx funcaddr = gen_reg_rtx (Pmode);
function_symbol (funcaddr, "__ashlsi3_r0", SFUNC_STATIC);
emit_insn (gen_ashlsi3_d_call (operands[0], operands[2], funcaddr));
DONE;
}
}) })
(define_insn "ashlsi3_k" (define_insn "ashlsi3_k"
...@@ -4067,6 +4078,23 @@ label: ...@@ -4067,6 +4078,23 @@ label:
} }
[(set_attr "type" "dyn_shift")]) [(set_attr "type" "dyn_shift")])
;; If dynamic shifts are not available use a library function.
;; By specifying the pattern we reduce the number of call clobbered regs.
;; In order to make combine understand the truncation of the shift amount
;; operand we have to allow it to use pseudo regs for the shift operands.
(define_insn "ashlsi3_d_call"
[(set (match_operand:SI 0 "arith_reg_dest" "=z")
(ashift:SI (reg:SI R4_REG)
(and:SI (match_operand:SI 1 "arith_reg_operand" "z")
(const_int 31))))
(use (match_operand:SI 2 "arith_reg_operand" "r"))
(clobber (reg:SI T_REG))
(clobber (reg:SI PR_REG))]
"TARGET_SH1 && !TARGET_DYNSHIFT"
"jsr @%2%#"
[(set_attr "type" "sfunc")
(set_attr "needs_delay_slot" "yes")])
(define_insn_and_split "ashlsi3_n" (define_insn_and_split "ashlsi3_n"
[(set (match_operand:SI 0 "arith_reg_dest" "=r") [(set (match_operand:SI 0 "arith_reg_dest" "=r")
(ashift:SI (match_operand:SI 1 "arith_reg_operand" "0") (ashift:SI (match_operand:SI 1 "arith_reg_operand" "0")
...@@ -4512,6 +4540,16 @@ label: ...@@ -4512,6 +4540,16 @@ label:
operands[2])); operands[2]));
DONE; DONE;
} }
/* Expand a library call for the dynamic shift. */
if (!CONST_INT_P (operands[2]) && !TARGET_DYNSHIFT)
{
emit_move_insn (gen_rtx_REG (SImode, R4_REG), operands[1]);
rtx funcaddr = gen_reg_rtx (Pmode);
function_symbol (funcaddr, "__lshrsi3_r0", SFUNC_STATIC);
emit_insn (gen_lshrsi3_d_call (operands[0], operands[2], funcaddr));
DONE;
}
}) })
(define_insn "lshrsi3_k" (define_insn "lshrsi3_k"
...@@ -4556,6 +4594,23 @@ label: ...@@ -4556,6 +4594,23 @@ label:
} }
[(set_attr "type" "dyn_shift")]) [(set_attr "type" "dyn_shift")])
;; If dynamic shifts are not available use a library function.
;; By specifying the pattern we reduce the number of call clobbered regs.
;; In order to make combine understand the truncation of the shift amount
;; operand we have to allow it to use pseudo regs for the shift operands.
(define_insn "lshrsi3_d_call"
[(set (match_operand:SI 0 "arith_reg_dest" "=z")
(lshiftrt:SI (reg:SI R4_REG)
(and:SI (match_operand:SI 1 "arith_reg_operand" "z")
(const_int 31))))
(use (match_operand:SI 2 "arith_reg_operand" "r"))
(clobber (reg:SI T_REG))
(clobber (reg:SI PR_REG))]
"TARGET_SH1 && !TARGET_DYNSHIFT"
"jsr @%2%#"
[(set_attr "type" "sfunc")
(set_attr "needs_delay_slot" "yes")])
(define_insn_and_split "lshrsi3_n" (define_insn_and_split "lshrsi3_n"
[(set (match_operand:SI 0 "arith_reg_dest" "=r") [(set (match_operand:SI 0 "arith_reg_dest" "=r")
(lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0") (lshiftrt:SI (match_operand:SI 1 "arith_reg_operand" "0")
......
2012-09-10 Oleg Endo <olegendo@gcc.gnu.org>
PR target/54089
* gcc.target/sh/pr54089-3.c: New.
2012-09-10 Marc Glisse <marc.glisse@inria.fr> 2012-09-10 Marc Glisse <marc.glisse@inria.fr>
* gcc.dg/tree-ssa/forwprop-21.c: New testcase. * gcc.dg/tree-ssa/forwprop-21.c: New testcase.
2012-09-10 Aldy Hernandez <aldyh@redhat.com> 2012-09-10 Aldy Hernandez <aldyh@redhat.com>
......
/* The dynamic shift library functions truncate the shift count to 5 bits.
Verify that this is taken into account and no extra shift count
truncations are generated before the library call. */
/* { dg-do compile { target "sh*-*-*" } } */
/* { dg-options "-O1" } */
/* { dg-skip-if "" { "sh*-*-*" } { "*" } { "-m1*" "-m2" "-m2e*" } } */
/* { dg-final { scan-assembler-not "and" } } */
/* { dg-final { scan-assembler-not "31" } } */
int
test00 (unsigned int a, int* b, int c, int* d, unsigned int e)
{
int s = 0;
int i;
for (i = 0; i < c; ++i)
s += d[i] + b[i] + (e << (i & 31));
return s;
}
int
test01 (unsigned int a, int* b, int c, int* d, unsigned int e)
{
int s = 0;
int i;
for (i = 0; i < c; ++i)
s += d[i] + b[i] + (e >> (i & 31));
return s;
}
int
test03 (unsigned int a, unsigned int b)
{
return b << (a & 31);
}
unsigned int
test04 (unsigned int a, int b)
{
return a >> (b & 31);
}
2012-09-10 Oleg Endo <olegendo@gcc.gnu.org>
PR target/54089
* config/sh/lib1funcs.S (ashlsi3): Reimplement as ashlsi3_r0.
(lshrsi3): Reimplement as lshrsi3_r0.
2012-09-10 Andreas Schwab <schwab@linux-m68k.org> 2012-09-10 Andreas Schwab <schwab@linux-m68k.org>
PR target/46191 PR target/46191
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment