Commit c6b0465b by John Carr Committed by John Carr

sparc.c, [...]: New trampoline code.

Mon Jul 13 16:15:10 1998  John Carr  <jfc@mit.edu>
	* sparc.c, sparc.h, sparc.md: New trampoline code.
	Allow integer operand 1 to V8+ DImode shift instructions.
	Fix bugs in V8+ wide multiply patterns.
	In 32 bit mode, split DImode register moves and logical instructions.
	Write V9 branch prediction flag.
	Use V9 conditional move more often for scc.
	* print-rtl.c (print_rtx): Use HOST_PTR_PRINTF to print pointer.

From-SVN: r21108
parent e69fd6e5
Mon Jul 13 16:15:10 1998 John Carr <jfc@mit.edu>
* sparc.c, sparc.h, sparc.md: New trampoline code.
Allow integer operand 1 to V8+ DImode shift instructions.
Fix bugs in V8+ wide multiply patterns.
In 32 bit mode, split DImode register moves and logical instructions.
Write V9 branch prediction flag.
Use V9 conditional move more often for scc.
Mon Jul 13 15:10:09 1998 Philippe De Muyter <phdm@macqel.be>
* invoke.texi(-fno-builtin): Explain that the names of built-in
......
......@@ -2480,7 +2480,7 @@ output_move_quad (operands)
int fix = offset - i * 4;
/* Back up to the appropriate place. */
temp[1] = gen_rtx_CONST_INT (VOIDmode, -fix);
temp[1] = GEN_INT (-fix);
if (addreg0)
{
temp[0] = addreg0;
......@@ -2495,7 +2495,7 @@ output_move_quad (operands)
wordpart[i]);
/* Don't modify the register that's the destination of the
move. */
temp[0] = gen_rtx_CONST_INT (VOIDmode, -(offset - fix));
temp[0] = GEN_INT (-(offset - fix));
if (addreg0 && REGNO (addreg0) != REGNO (wordpart[i][0]))
{
temp[1] = addreg0;
......@@ -2512,7 +2512,7 @@ output_move_quad (operands)
}
if (offset)
{
temp[1] = gen_rtx_CONST_INT (VOIDmode, -offset);
temp[1] = GEN_INT (-offset);
/* Undo the adds we just did. */
if (addreg0)
{
......@@ -2712,6 +2712,17 @@ output_scc_insn (operands, insn)
rtx label = 0, next = insn;
int need_label = 0;
/* This code used to be called with final_sequence nonzero (for fpcc
delay slots), but that is no longer allowed. */
if (final_sequence)
abort ();
/* On UltraSPARC a conditional moves blocks until 3 cycles after prior loads
complete. It might be beneficial here to use branches if any recent
instructions were loads. */
if (TARGET_V9 && REGNO (operands[1]) == SPARC_ICC_REG)
return "mov 0,%0\n\tmov%C2 %x1,1,%0";
/* Try doing a jump optimization which jump.c can't do for us
because we did not expose that setcc works by using branches.
......@@ -2724,21 +2735,12 @@ output_scc_insn (operands, insn)
if (GET_CODE (next) == CODE_LABEL)
label = next;
next = NEXT_INSN (next);
if (next == 0)
break;
}
while (GET_CODE (next) == NOTE || GET_CODE (next) == CODE_LABEL);
while (next && GET_CODE (next) == NOTE || GET_CODE (next) == CODE_LABEL);
/* If we are in a sequence, and the following insn is a sequence also,
then just following the current insn's next field will take us to the
first insn of the next sequence, which is the wrong place. We don't
want to optimize with a branch that has had its delay slot filled.
Avoid this by verifying that NEXT_INSN (PREV_INSN (next)) == next
which fails only if NEXT is such a branch. */
if (next && GET_CODE (next) == JUMP_INSN && simplejump_p (next)
&& (! final_sequence || NEXT_INSN (PREV_INSN (next)) == next))
if (next && GET_CODE (next) == JUMP_INSN && simplejump_p (next))
label = JUMP_LABEL (next);
/* If not optimizing, jump label fields are not set. To be safe, always
check here to whether label is still zero. */
if (label == 0)
......@@ -2752,35 +2754,8 @@ output_scc_insn (operands, insn)
/* operands[3] is an unused slot. */
operands[3] = label;
/* If we are in a delay slot, assume it is the delay slot of an fpcc
insn since our type isn't allowed anywhere else. */
/* ??? Fpcc instructions no longer have delay slots, so this code is
probably obsolete. */
/* The fastest way to emit code for this is an annulled branch followed
by two move insns. This will take two cycles if the branch is taken,
and three cycles if the branch is not taken.
However, if we are in the delay slot of another branch, this won't work,
because we can't put a branch in the delay slot of another branch.
The above sequence would effectively take 3 or 4 cycles respectively
since a no op would have be inserted between the two branches.
In this case, we want to emit a move, annulled branch, and then the
second move. This sequence always takes 3 cycles, and hence is faster
when we are in a branch delay slot. */
if (final_sequence)
{
strcpy (string, "mov 0,%0\n\t");
strcat (string, output_cbranch (operands[2], 3, 0, 1, 0));
strcat (string, "\n\tmov 1,%0");
}
else
{
strcpy (string, output_cbranch (operands[2], 3, 0, 1, 0));
strcat (string, "\n\tmov 1,%0\n\tmov 0,%0");
}
strcpy (string, output_cbranch (operands[2], 3, 0, 1, 0));
strcat (string, "\n\tmov 1,%0\n\tmov 0,%0");
if (need_label)
strcat (string, "\n%l3:");
......@@ -4380,13 +4355,16 @@ sparc_builtin_saveregs (arglist)
ANNUL is non-zero if we should generate an annulling branch.
NOOP is non-zero if we have to follow this branch by a noop. */
NOOP is non-zero if we have to follow this branch by a noop.
INSN, if set, is the insn. */
char *
output_cbranch (op, label, reversed, annul, noop)
output_cbranch (op, label, reversed, annul, noop, insn)
rtx op;
int label;
int reversed, annul, noop;
rtx insn;
{
static char string[20];
enum rtx_code code = GET_CODE (op);
......@@ -4506,8 +4484,6 @@ output_cbranch (op, label, reversed, annul, noop)
if (annul)
strcat (string, ",a");
/* ??? If v9, optional prediction bit ",pt" or ",pf" goes here. */
if (! TARGET_V9)
{
labeloff = 3;
......@@ -4515,6 +4491,11 @@ output_cbranch (op, label, reversed, annul, noop)
}
else
{
rtx note;
if (insn && (note = find_reg_note (insn, REG_BR_PRED, NULL_RTX)))
strcat (string, INTVAL (XEXP (note, 0)) & ATTR_FLAG_likely ? ",pt" : ",pn");
labeloff = 9;
if (mode == CCFPmode || mode == CCFPEmode)
{
......@@ -5372,34 +5353,49 @@ void
sparc_initialize_trampoline (tramp, fnaddr, cxt)
rtx tramp, fnaddr, cxt;
{
rtx high_cxt = expand_shift (RSHIFT_EXPR, SImode, cxt,
size_int (10), 0, 1);
rtx high_fn = expand_shift (RSHIFT_EXPR, SImode, fnaddr,
size_int (10), 0, 1);
rtx low_cxt = expand_and (cxt, GEN_INT (0x3ff), 0);
rtx low_fn = expand_and (fnaddr, GEN_INT (0x3ff), 0);
rtx g1_sethi = gen_rtx_HIGH (SImode, GEN_INT (0x03000000));
rtx g2_sethi = gen_rtx_HIGH (SImode, GEN_INT (0x05000000));
rtx g1_ori = gen_rtx_HIGH (SImode, GEN_INT (0x82106000));
rtx g2_ori = gen_rtx_HIGH (SImode, GEN_INT (0x8410A000));
rtx tem = gen_reg_rtx (SImode);
emit_move_insn (tem, g1_sethi);
emit_insn (gen_iorsi3 (high_fn, high_fn, tem));
emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 0)), high_fn);
emit_move_insn (tem, g1_ori);
emit_insn (gen_iorsi3 (low_fn, low_fn, tem));
emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)), low_fn);
emit_move_insn (tem, g2_sethi);
emit_insn (gen_iorsi3 (high_cxt, high_cxt, tem));
emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)), high_cxt);
emit_move_insn (tem, g2_ori);
emit_insn (gen_iorsi3 (low_cxt, low_cxt, tem));
emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 16)), low_cxt);
/* SPARC 32 bit trampoline:
sethi %hi(fn),%g1
sethi %hi(static),%g2
jmp %g1+%lo(fn)
or %g2,%lo(static),%g2
SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
*/
emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 0)),
expand_binop (SImode, ior_optab,
expand_shift (RSHIFT_EXPR, SImode, fnaddr,
size_int (10), 0, 1),
GEN_INT (0x03000000),
NULL_RTX, 1, OPTAB_DIRECT));
emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
expand_binop (SImode, ior_optab,
expand_shift (RSHIFT_EXPR, SImode, cxt,
size_int (10), 0, 1),
GEN_INT (0x05000000),
NULL_RTX, 1, OPTAB_DIRECT));
emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
expand_binop (SImode, ior_optab,
expand_and (fnaddr, GEN_INT (0x3ff), NULL_RTX),
GEN_INT (0x81c06000),
NULL_RTX, 1, OPTAB_DIRECT));
emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
expand_binop (SImode, ior_optab,
expand_and (cxt, GEN_INT (0x3ff), NULL_RTX),
GEN_INT (0x8410a000),
NULL_RTX, 1, OPTAB_DIRECT));
emit_insn (gen_flush (validize_mem (gen_rtx_MEM (SImode, tramp))));
emit_insn (gen_flush (validize_mem (gen_rtx_MEM (SImode,
plus_constant (tramp, 8)))));
emit_insn (gen_flush (validize_mem (gen_rtx_MEM (SImode,
plus_constant (tramp, 16)))));
/* On UltraSPARC a flush flushes an entire cache line. The trampoline is
aligned on a 16 byte boundary so one flush clears it all. */
if (sparc_cpu != PROCESSOR_ULTRASPARC)
emit_insn (gen_flush (validize_mem (gen_rtx_MEM (SImode,
plus_constant (tramp, 8)))));
}
/* The 64 bit version is simpler because it makes more sense to load the
......@@ -5410,17 +5406,27 @@ void
sparc64_initialize_trampoline (tramp, fnaddr, cxt)
rtx tramp, fnaddr, cxt;
{
emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 24)), cxt);
emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 32)), fnaddr);
/*
rd %pc,%g1
ldx [%g1+24],%g5
jmp %g5
ldx [%g1+16],%g5
+16 bytes data
*/
emit_move_insn (gen_rtx_MEM (SImode, tramp),
GEN_INT (0x83414000));
emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
GEN_INT (0xca586018));
emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
GEN_INT (0x81c04000));
emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
GEN_INT (0xca586010));
emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), cxt);
emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 20)), fnaddr);
emit_insn (gen_flush (validize_mem (gen_rtx_MEM (DImode, tramp))));
emit_insn (gen_flush (validize_mem (gen_rtx_MEM (DImode,
plus_constant (tramp, 8)))));
emit_insn (gen_flush (validize_mem (gen_rtx_MEM (DImode,
plus_constant (tramp, 16)))));
emit_insn (gen_flush (validize_mem (gen_rtx_MEM (DImode,
plus_constant (tramp, 24)))));
emit_insn (gen_flush (validize_mem (gen_rtx_MEM (DImode,
plus_constant (tramp, 32)))));
if (sparc_cpu != PROCESSOR_ULTRASPARC)
emit_insn (gen_flush (validize_mem (gen_rtx_MEM (DImode, plus_constant (tramp, 8)))));
}
/* Subroutines to support a flat (single) register window calling
......@@ -6438,10 +6444,17 @@ sparc_v8plus_shift (operands, insn, opcode)
if (GET_CODE (operands[3]) == SCRATCH)
operands[3] = operands[0];
output_asm_insn ("sllx %H1,32,%3", operands);
if (sparc_check_64 (operands[1], insn) <= 0)
output_asm_insn ("srl %L1,0,%L1", operands);
output_asm_insn ("or %L1,%3,%3", operands);
if (GET_CODE (operands[1]) == CONST_INT)
{
output_asm_insn ("mov %1,%3", operands);
}
else
{
output_asm_insn ("sllx %H1,32,%3", operands);
if (sparc_check_64 (operands[1], insn) <= 0)
output_asm_insn ("srl %L1,0,%L1", operands);
output_asm_insn ("or %L1,%3,%3", operands);
}
strcpy(asm_code, opcode);
if (which_alternative != 2)
......
......@@ -2099,54 +2099,11 @@ extern union tree_node *current_function_decl;
#define EPILOGUE_USES(REGNO) \
(!TARGET_FLAT && REGNO == 31)
/* Output assembler code for a block containing the constant parts
of a trampoline, leaving space for the variable parts. */
/* On 32 bit sparcs, the trampoline contains five instructions:
sethi #TOP_OF_FUNCTION,%g1
or #BOTTOM_OF_FUNCTION,%g1,%g1
sethi #TOP_OF_STATIC,%g2
jmp g1
or #BOTTOM_OF_STATIC,%g2,%g2
On 64 bit sparcs, the trampoline contains 4 insns and two pseudo-immediate
constants (plus some padding):
rd %pc,%g1
ldx[%g1+20],%g5
ldx[%g1+28],%g1
jmp %g1
nop
nop
.xword context
.xword function */
/* ??? Stack is execute-protected in v9. */
#define TRAMPOLINE_TEMPLATE(FILE) \
do { \
if (TARGET_ARCH64) \
{ \
fprintf (FILE, "\trd %%pc,%%g1\n"); \
fprintf (FILE, "\tldx [%%g1+24],%%g5\n"); \
fprintf (FILE, "\tldx [%%g1+32],%%g1\n"); \
fprintf (FILE, "\tjmp %%g1\n"); \
fprintf (FILE, "\tnop\n"); \
fprintf (FILE, "\tnop\n"); \
/* -mmedlow shouldn't generate .xwords, so don't use them at all */ \
fprintf (FILE, "\t.word 0,0,0,0\n"); \
} \
else \
{ \
ASM_OUTPUT_INT (FILE, const0_rtx); \
ASM_OUTPUT_INT (FILE, const0_rtx); \
ASM_OUTPUT_INT (FILE, const0_rtx); \
ASM_OUTPUT_INT (FILE, GEN_INT (0x81C04000)); \
ASM_OUTPUT_INT (FILE, const0_rtx); \
} \
} while (0)
/* Length in units of the trampoline for entering a nested function. */
#define TRAMPOLINE_SIZE (TARGET_ARCH64 ? 40 : 20)
#define TRAMPOLINE_SIZE (TARGET_ARCH64 ? 32 : 16)
#define TRAMPOLINE_ALIGNMENT 128 /* 16 bytes */
/* Emit RTL insns to initialize the variable parts of a trampoline.
FNADDR is an RTX for the address of the function's pure code.
......@@ -2155,12 +2112,10 @@ do { \
void sparc_initialize_trampoline ();
void sparc64_initialize_trampoline ();
#define INITIALIZE_TRAMPOLINE(TRAMP, FNADDR, CXT) \
do { \
if (TARGET_ARCH64) \
sparc64_initialize_trampoline (TRAMP, FNADDR, CXT); \
else \
sparc_initialize_trampoline (TRAMP, FNADDR, CXT); \
} while (0)
sparc_initialize_trampoline (TRAMP, FNADDR, CXT)
/* Generate necessary RTL for __builtin_saveregs().
ARGLIST is the argument list; see expr.c. */
......
......@@ -1308,6 +1308,22 @@
[(set_attr "type" "multi")
(set_attr "length" "3")])
(define_split
[(set (match_operand:SI 0 "register_operand" "=r")
(match_operator:SI 2 "noov_compare_op"
[(match_operand 1 "icc_or_fcc_reg_operand" "")
(const_int 0)]))]
;; 32 bit LTU/GEU are better implemented using addx/subx
"TARGET_V9 && REGNO (operands[1]) == SPARC_ICC_REG
&& (GET_MODE (operands[1]) == CCXmode
|| (GET_CODE (operands[2]) != LTU && GET_CODE (operands[2]) != GEU))"
[(set (match_dup 0) (const_int 0))
(set (match_dup 0)
(if_then_else:SI (match_op_dup:SI 2 [(match_dup 1) (const_int 0)])
(const_int 1)
(match_dup 0)))]
"")
(define_insn "*scc_di"
[(set (match_operand:DI 0 "register_operand" "=r")
(match_operator:DI 2 "noov_compare_op"
......@@ -1521,7 +1537,7 @@
{
return output_cbranch (operands[0], 1, 0,
final_sequence && INSN_ANNULLED_BRANCH_P (insn),
! final_sequence);
! final_sequence, insn);
}"
[(set_attr "type" "branch")])
......@@ -1536,7 +1552,7 @@
{
return output_cbranch (operands[0], 1, 1,
final_sequence && INSN_ANNULLED_BRANCH_P (insn),
! final_sequence);
! final_sequence, insn);
}"
[(set_attr "type" "branch")])
......@@ -1552,7 +1568,7 @@
{
return output_cbranch (operands[1], 2, 0,
final_sequence && INSN_ANNULLED_BRANCH_P (insn),
! final_sequence);
! final_sequence, insn);
}"
[(set_attr "type" "branch")])
......@@ -1568,7 +1584,7 @@
{
return output_cbranch (operands[1], 2, 1,
final_sequence && INSN_ANNULLED_BRANCH_P (insn),
! final_sequence);
! final_sequence, insn);
}"
[(set_attr "type" "branch")])
......@@ -1584,7 +1600,7 @@
{
return output_cbranch (operands[1], 2, 0,
final_sequence && INSN_ANNULLED_BRANCH_P (insn),
! final_sequence);
! final_sequence, insn);
}"
[(set_attr "type" "branch")])
......@@ -1600,7 +1616,7 @@
{
return output_cbranch (operands[1], 2, 1,
final_sequence && INSN_ANNULLED_BRANCH_P (insn),
! final_sequence);
! final_sequence, insn);
}"
[(set_attr "type" "branch")])
......@@ -2240,6 +2256,20 @@
operands[2] = copy_rtx (operands[0]);
PUT_MODE (operands[2], SImode);")
;; Split register to register moves.
(define_split
[(set (match_operand:DI 0 "register_operand" "=r")
(match_operand:DI 1 "arith_double_operand" "rIN"))]
"! TARGET_ARCH64 && GET_CODE (operands[1]) == REG && REGNO (operands[0]) < 32
&& (GET_CODE (operands[1]) != REG || REGNO (operands[1]) < 32)
&& ! reg_overlap_mentioned_p (operands[0], operands[1])"
[(set (match_dup 2) (match_dup 4))
(set (match_dup 3) (match_dup 5))]
"operands[2] = gen_highpart (SImode, operands[0]);
operands[3] = gen_lowpart (SImode, operands[0]);
operands[4] = gen_highpart (SImode, operands[1]);
operands[5] = gen_lowpart (SImode, operands[1]);")
(define_insn "*movdi_sp32"
[(set (match_operand:DI 0 "reg_or_nonsymb_mem_operand" "=r,T,U,Q,r,r,?f,?f,?Q")
(match_operand:DI 1 "general_operand" "r,U,T,r,Q,i,f,Q,f"))]
......@@ -3217,7 +3247,7 @@
if (which_alternative == 1)
return \"st %1,%0\";
if (sparc_check_64 (operands[1], insn) > 0)
return \"nop\";
return final_sequence ? \"nop\" : \"\";
return \"srl %1,0,%0\";
"
[(set_attr "type" "shift,store")])
......@@ -3639,7 +3669,7 @@ return \"srl %1,0,%0\";
gen_rtx_PLUS (DImode, operands[1],
operands[2])),
gen_rtx_CLOBBER (VOIDmode,
gen_rtx_REG (SImode, SPARC_ICC_REG)))));
gen_rtx_REG (CCmode, SPARC_ICC_REG)))));
DONE;
}
}")
......@@ -3648,7 +3678,7 @@ return \"srl %1,0,%0\";
[(set (match_operand:DI 0 "register_operand" "=r")
(plus:DI (match_operand:DI 1 "arith_double_operand" "%r")
(match_operand:DI 2 "arith_double_operand" "rHI")))
(clobber (reg:SI 100))]
(clobber (reg:CC 100))]
"! TARGET_ARCH64"
"*
{
......@@ -3681,7 +3711,7 @@ return \"srl %1,0,%0\";
[(set (match_operand:DI 0 "register_operand" "=r")
(plus:DI (match_operand:DI 1 "arith_double_operand" "%r")
(match_operand:DI 2 "arith_double_operand" "rHI")))
(clobber (reg:SI 100))]
(clobber (reg:CC 100))]
"! TARGET_ARCH64 && reload_completed"
[(parallel [(set (reg:CC_NOOV 100)
(compare:CC_NOOV (plus:SI (match_dup 4)
......@@ -3698,16 +3728,13 @@ return \"srl %1,0,%0\";
operands[5] = gen_lowpart (SImode, operands[2]);
operands[6] = gen_highpart (SImode, operands[0]);
operands[7] = gen_highpart (SImode, operands[1]);
if (GET_CODE (operands[2]) == CONST_INT)
operands[8] = INTVAL (operands[2]) < 0 ? constm1_rtx : const0_rtx;
else
operands[8] = gen_highpart (SImode, operands[2]);")
operands[8] = gen_highpart (SImode, operands[2]);")
(define_split
[(set (match_operand:DI 0 "register_operand" "=r")
(minus:DI (match_operand:DI 1 "arith_double_operand" "r")
(match_operand:DI 2 "arith_double_operand" "rHI")))
(clobber (reg:SI 100))]
(clobber (reg:CC 100))]
"! TARGET_ARCH64 && reload_completed"
[(parallel [(set (reg:CC_NOOV 100)
(compare:CC_NOOV (minus:SI (match_dup 4)
......@@ -3749,7 +3776,7 @@ return \"srl %1,0,%0\";
[(set (match_operand:DI 0 "register_operand" "=r")
(plus:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
(match_operand:DI 2 "register_operand" "r")))
(clobber (reg:SI 100))]
(clobber (reg:CC 100))]
"! TARGET_ARCH64"
"addcc %L2,%1,%L0\;addx %H2,0,%H0"
[(set_attr "type" "multi")])
......@@ -3823,7 +3850,7 @@ return \"srl %1,0,%0\";
gen_rtx_MINUS (DImode, operands[1],
operands[2])),
gen_rtx_CLOBBER (VOIDmode,
gen_rtx_REG (SImode, SPARC_ICC_REG)))));
gen_rtx_REG (CCmode, SPARC_ICC_REG)))));
DONE;
}
}")
......@@ -3832,7 +3859,7 @@ return \"srl %1,0,%0\";
[(set (match_operand:DI 0 "register_operand" "=r")
(minus:DI (match_operand:DI 1 "register_operand" "r")
(match_operand:DI 2 "arith_double_operand" "rHI")))
(clobber (reg:SI 100))]
(clobber (reg:CC 100))]
"! TARGET_ARCH64"
"*
{
......@@ -3862,7 +3889,7 @@ return \"srl %1,0,%0\";
[(set (match_operand:DI 0 "register_operand" "=r")
(minus:DI (match_operand:DI 1 "register_operand" "r")
(zero_extend:DI (match_operand:SI 2 "register_operand" "r"))))
(clobber (reg:SI 100))]
(clobber (reg:CC 100))]
"! TARGET_ARCH64"
"subcc %L1,%2,%L0\;addx %H1,0,%H0"
[(set_attr "type" "multi")])
......@@ -4113,11 +4140,27 @@ return \"srl %1,0,%0\";
smul %1,%2,%4\;srlx %4,%3,%0"
[(set_attr "length" "2")])
;; The combiner changes TRUNCATE in the previous pattern to SUBREG.
(define_insn ""
[(set (match_operand:SI 0 "register_operand" "=h,r")
(subreg:SI
(lshiftrt:DI
(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
(sign_extend:DI (match_operand:SI 2 "register_operand" "r,r")))
(match_operand:SI 3 "const_int_operand" "i,i"))
1))
(clobber (match_scratch:SI 4 "=X,&h"))]
"TARGET_V8PLUS"
"@
smul %1,%2,%0\;srlx %0,%3,%0
smul %1,%2,%4\;srlx %4,%3,%0"
[(set_attr "length" "2")])
(define_insn "const_smulsi3_highpart_v8plus"
[(set (match_operand:SI 0 "register_operand" "=h,r")
(truncate:SI
(lshiftrt:DI (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
(match_operand:SI 2 "register_operand" "r,r"))
(match_operand 2 "small_int" "i,i"))
(match_operand:SI 3 "const_int_operand" "i,i"))))
(clobber (match_scratch:SI 4 "=X,&h"))]
"TARGET_V8PLUS"
......@@ -4510,6 +4553,23 @@ return \"srl %1,0,%0\";
operands[4] = GEN_INT (~INTVAL (operands[2]) & 0xffffffff);
}")
;; Split DImode logical operations requiring two instructions.
(define_split
[(set (match_operand:DI 0 "register_operand" "")
(match_operator:DI 1 "cc_arithop" ; AND, IOR, XOR
[(match_operand:DI 2 "register_operand" "")
(match_operand:DI 3 "arith_double_operand" "")]))]
"! TARGET_ARCH64 && reload_completed
&& GET_CODE (operands[0]) == REG && REGNO (operands[0]) < 32"
[(set (match_dup 4) (match_op_dup:SI 1 [(match_dup 6) (match_dup 8)]))
(set (match_dup 5) (match_op_dup:SI 1 [(match_dup 7) (match_dup 9)]))]
"operands[4] = gen_highpart (SImode, operands[0]);
operands[5] = gen_lowpart (SImode, operands[0]);
operands[6] = gen_highpart (SImode, operands[2]);
operands[7] = gen_lowpart (SImode, operands[2]);
operands[8] = gen_highpart (SImode, operands[3]);
operands[9] = gen_lowpart (SImode, operands[3]);")
(define_insn "*and_not_di_sp32"
[(set (match_operand:DI 0 "register_operand" "=r,b")
(and:DI (not:DI (match_operand:DI 1 "register_operand" "r,b"))
......@@ -4899,7 +4959,7 @@ return \"srl %1,0,%0\";
gen_rtx_SET (VOIDmode, operand0,
gen_rtx_NEG (DImode, operand1)),
gen_rtx_CLOBBER (VOIDmode,
gen_rtx_REG (SImode, SPARC_ICC_REG)))));
gen_rtx_REG (CCmode, SPARC_ICC_REG)))));
DONE;
}
}")
......@@ -4907,7 +4967,7 @@ return \"srl %1,0,%0\";
(define_insn "*negdi2_sp32"
[(set (match_operand:DI 0 "register_operand" "=r")
(neg:DI (match_operand:DI 1 "register_operand" "r")))
(clobber (reg:SI 100))]
(clobber (reg:CC 100))]
"! TARGET_ARCH64"
"*
{
......@@ -5331,13 +5391,28 @@ return \"srl %1,0,%0\";
(define_insn "ashldi3_v8plus"
[(set (match_operand:DI 0 "register_operand" "=&h,&h,r")
(ashift:DI (match_operand:DI 1 "register_operand" "r,0,r")
(ashift:DI (match_operand:DI 1 "arith_operand" "rI,0,rI")
(match_operand:SI 2 "arith_operand" "rI,rI,rI")))
(clobber (match_scratch:SI 3 "=X,X,&h"))]
"TARGET_V8PLUS"
"*return sparc_v8plus_shift (operands, insn, \"sllx\");"
[(set_attr "length" "5,5,6")])
;; Optimize (1LL<<x)-1
(define_insn ""
[(set (match_operand:DI 0 "register_operand" "=h")
(plus:DI (ashift:DI (const_int 1)
(match_operand:SI 2 "arith_operand" "rI"))
(const_int -1)))]
"TARGET_V8PLUS"
"*
{
if (GET_CODE (operands[2]) == REG && REGNO (operands[2]) == REGNO (operands[0]))
return \"mov 1,%L0\;sllx %L0,%2,%L0\;sub %L0,1,%L0\;srlx %L0,32,%H0\";
return \"mov 1,%H0\;sllx %H0,%2,%L0\;sub %L0,1,%L0\;srlx %L0,32,%H0\";
}"
[(set_attr "length" "4")])
(define_insn "*cmp_cc_ashift_1"
[(set (reg:CC_NOOV 100)
(compare:CC_NOOV (ashift:SI (match_operand:SI 0 "register_operand" "r")
......@@ -5402,7 +5477,7 @@ if (! TARGET_ARCH64)
(define_insn "ashrdi3_v8plus"
[(set (match_operand:DI 0 "register_operand" "=&h,&h,r")
(ashiftrt:DI (match_operand:DI 1 "register_operand" "r,0,r")
(ashiftrt:DI (match_operand:DI 1 "arith_operand" "rI,0,rI")
(match_operand:SI 2 "arith_operand" "rI,rI,rI")))
(clobber (match_scratch:SI 3 "=X,X,&h"))]
"TARGET_V8PLUS"
......@@ -5454,7 +5529,7 @@ if (! TARGET_ARCH64)
(define_insn "lshrdi3_v8plus"
[(set (match_operand:DI 0 "register_operand" "=&h,&h,r")
(lshiftrt:DI (match_operand:DI 1 "register_operand" "r,0,r")
(lshiftrt:DI (match_operand:DI 1 "arith_operand" "rI,0,rI")
(match_operand:SI 2 "arith_operand" "rI,rI,rI")))
(clobber (match_scratch:SI 3 "=X,X,&h"))]
"TARGET_V8PLUS"
......@@ -6605,18 +6680,6 @@ if (! TARGET_ARCH64)
"TARGET_ARCH64 && short_branch (INSN_UID (insn), INSN_UID (operands[2])) && in_same_eh_region (insn, operands[2]) && in_same_eh_region (insn, ins1)"
"call %a0,%1\;add %%o7,(%l2-.-4),%%o7")
;; Other miscellaneous peepholes.
;; (reg:SI 100) is created by the {add,neg,sub}di patterns.
(define_peephole
[(parallel [(set (match_operand:SI 0 "register_operand" "=r")
(minus:SI (match_operand:SI 1 "reg_or_0_operand" "rJ")
(reg:SI 100)))
(clobber (reg:CC 100))])
(set (reg:CC 100) (compare (match_dup 0) (const_int 0)))]
""
"subxcc %r1,0,%0")
;; After a nonlocal goto, we need to restore the PIC register, but only
;; if we need it. So do nothing much here, but we'll check for this in
;; finalize_pic.
......
......@@ -253,8 +253,7 @@ print_rtx (in_rtx)
case 't':
putc (' ', outfile);
fprintf (outfile, HOST_WIDE_INT_PRINT_HEX,
(HOST_WIDE_INT) XTREE (in_rtx, i));
fprintf (outfile, HOST_PTR_PRINTF, (char *) XTREE (in_rtx, i));
break;
case '*':
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment