Commit 225e4f43 by J"orn Rennecke Committed by Joern Rennecke

Add SH4 support:

	* config/sh/lib1funcs.asm (___movstr_i4_even, ___movstr_i4_odd): Define.
	(___movstrSI12_i4, ___sdivsi3_i4, ___udivsi3_i4): Define.
	* sh.c (reg_class_from_letter, regno_reg_class): Add DF_REGS.
	(fp_reg_names, assembler_dialect): New variables.
	(print_operand_address): Handle SUBREGs.
	(print_operand): Added 'o' case.
	Don't use adj_offsettable_operand on PRE_DEC / POST_INC.
	Name of FP registers depends on mode.
	(expand_block_move): Emit different code for SH4 hardware.
	(prepare_scc_operands): Use emit_sf_insn / emit_df_insn as appropriate.
	(from_compare): Likewise.
	(add_constant): New argument last_value.  Changed all callers.
	(find_barrier): Don't try HImode load for FPUL_REG.
	(machine_dependent_reorg): Likewise.
	(sfunc_uses_reg): A CLOBBER cannot be the address register use.
	(gen_far_branch): Emit a barrier after the new jump.
	(barrier_align): Don't trust instruction lengths before
	fixing up pcloads.
	(machine_dependent_reorg): Add support for FIRST_XD_REG .. LAST_XD_REG.
	Use auto-inc addressing for fp registers if doubles need to
	be loaded in two steps.
	Set sh_flag_remove_dead_before_cse.
	(push): Support for TARGET_FMOVD.  Use gen_push_fpul for fpul.
	(pop): Support for TARGET_FMOVD.  Use gen_pop_fpul for fpul.
	(calc_live_regs): Support for TARGET_FMOVD.  Don't save FPSCR.
	Support for FIRST_XD_REG .. LAST_XD_REG.
	(sh_expand_prologue): Support for FIRST_XD_REG .. LAST_XD_REG.
	(sh_expand_epilogue): Likewise.
	(sh_builtin_saveregs): Use DFmode moves for fp regs on SH4.
	(initial_elimination_offset): Take TARGET_ALIGN_DOUBLE into account.
	(arith_reg_operand): FPUL_REG is OK for SH4.
	(fp_arith_reg_operand, fp_extended_operand) New functions.
	(tertiary_reload_operand, fpscr_operand): Likewise.
	(commutative_float_operator, noncommutative_float_operator): Likewise.
	(binary_float_operator, get_fpscr_rtx, emit_sf_insn): Likewise.
	(emit_df_insn, expand_sf_unop, expand_sf_binop): Likewise.
	(expand_df_unop, expand_df_binop, expand_fp_branch): Likewise.
	(emit_fpscr_use, mark_use, remove_dead_before_cse): Likewise.
	* sh.h (CPP_SPEC): Add support for -m4, m4-single, m4-single-only.
	(CONDITIONAL_REGISTER_USAGE): Likewise.
	(HARD_SH4_BIT, FPU_SINGLE_BIT, SH4_BIT, FMOVD_BIT): Define.
	(TARGET_CACHE32, TARGET_SUPERSCALAR, TARGET_HARWARD): Define.
	(TARGET_HARD_SH4, TARGET_FPU_SINGLE, TARGET_SH4, TARGET_FMOVD): Define.
	(target_flag): Add -m4, m4-single, m4-single-only, -mfmovd.
	(OPTIMIZATION_OPTIONS): If optimizing, set flag_omit_frame_pointer
	to -1 and sh_flag_remove_dead_before_cse to 1.
	(ASSEMBLER_DIALECT): Define to assembler_dialect.
	(assembler_dialect, fp_reg_names): Declare.
	(OVERRIDE_OPTIONS): Add code for TARGET_SH4.
	Hide names of registers that are not accessible.
	(CACHE_LOG): Take TARGET_CACHE32 into account.
	(LOOP_ALIGN): Take TARGET_HARWARD into account.
	(FIRST_XD_REG, LAST_XD_REG, FPSCR_REG): Define.
	(FIRST_PSEUDO_REGISTER: Now 49.
	(FIXED_REGISTERS, CALL_USED_REGISTERS): Include values for registers.
	(HARD_REGNO_NREGS): Special treatment of FIRST_XD_REG .. LAST_XD_REG.
	(HARD_REGNO_MODE_OK): Update.
	(enum reg_class): Add DF_REGS and FPSCR_REGS.
	(REG_CLASS_NAMES, REG_CLASS_CONTENTS, REG_ALLOC_ORDER): Likewise.
	(SECONDARY_OUTPUT_RELOAD_CLASS, SECONDARY_INPUT_RELOAD_CLASS): Update.
	(CLASS_CANNOT_CHANGE_SIZE, DEBUG_REGISTER_NAMES): Define.
	(NPARM_REGS): Eight floating point parameter registers on SH4.
	(BASE_RETURN_VALUE_REG): SH4 also passes double values
	in floating point registers.
	(GET_SH_ARG_CLASS) Likewise.
	Complex float types are also returned in float registers.
	(BASE_ARG_REG): Complex float types are also passes in float registers.
	(FUNCTION_VALUE): Change mode like PROMOTE_MODE does.
	(LIBCALL_VALUE): Remove trailing semicolon.
	(ROUND_REG): Round when double precision value is passed in floating
	point register(s).
	(FUNCTION_ARG_ADVANCE): No change wanted for SH4 when things are
	passed on the stack.
	(FUNCTION_ARG): Little endian adjustment for SH4 SFmode.
	(FUNCTION_ARG_PARTIAL_NREGS): Zero for SH4.
	(TRAMPOLINE_ALIGNMENT): Take TARGET_HARWARD into account.
	(INITIALIZE_TRAMPOLINE): Emit ic_invalidate_line for TARGET_HARWARD.
	(MODE_DISP_OK_8): Not for SH4 DFmode.
	(GO_IF_LEGITIMATE_ADDRESS): No base reg + index reg for SH4 DFmode.
	Allow indexed addressing for PSImode after reload.
	(LEGITIMIZE_ADDRESS): Not for SH4 DFmode.
	(LEGITIMIZE_RELOAD_ADDRESS): Handle SH3E SFmode.
	Don't change SH4 DFmode nor PSImode RELOAD_FOR_INPUT_ADDRESS.
	(DOUBLE_TYPE_SIZE): 64 for SH4.
	(RTX_COSTS): Add PLUS case.
	Increae cost of ASHIFT, ASHIFTRT, LSHIFTRT case.
	(REGISTER_MOVE_COST): Add handling of R0_REGS, FPUL_REGS, T_REGS,
	MAC_REGS, PR_REGS, DF_REGS.
	(REGISTER_NAMES): Use fp_reg_names.
	(enum processor_type): Add PROCESSOR_SH4.
	(sh_flag_remove_dead_before_cse): Declare.
	(rtx_equal_function_value_matters, fpscr_rtx, get_fpscr_rtx): Declare.
	(PREDICATE_CODES): Add binary_float_operator,
	commutative_float_operator, fp_arith_reg_operand, fp_extended_operand,
	fpscr_operand, noncommutative_float_operator.
	(ADJUST_COST): Use different scale for TARGET_SUPERSCALAR.
	(SH_DYNAMIC_SHIFT_COST): Cheaper for SH4.
	* sh.md (attribute cpu): Add value sh4.
	(attrbutes fmovd, issues): Define.
	(attribute type): Add values dfp_arith, dfp_cmp, dfp_conv, dfdiv.
	(function units memory, int, mpy, fp): Make dependent on issue rate.
	(function units issue, single_issue, load_si, load): Define.
	(function units load_store, fdiv, gp_fpul): Define.
	(attribute hit_stack): Provide proper default.
	(use_sfunc_addr+1, udivsi3): Predicated on ! TARGET_SH4.
	(udivsi3_i4, udivsi3_i4_single, divsi3_i4, divsi3_i4_single): New insns.
	(udivsi3, divsi3): Emit special patterns for SH4 hardware,
	(mulsi3_call): Now uses match_operand for function address.
	(mulsi3): Also emit code for SH1 case.  Wrap result in REG_LIBCALL /
	REG_RETVAL notes.
	(push, pop, push_e, pop_e): Now define_expands.
	(push_fpul, push_4, pop_fpul, pop_4, ic_invalidate_line): New expanders.
	(movsi_ie): Added y/i alternative.
	(ic_invalidate_line_i, movdf_i4): New insns.
	(movdf_i4+[123], reload_outdf+[12345], movsi_y+[12]): New splitters.
	(reload_indf, reload_outdf, reload_outsf, reload_insi): New expanders.
	(movdf): Add special code for SH4.
	(movsf_ie, movsf_ie+1, reload_insf, calli): Make use of fpscr visible.
	(call_valuei, calli, call_value): Likewise.
	(movsf): Emit no-op move.
	(mov_nop, movsi_y): New insns.
	(blt, sge): generalize to handle DFmode.
	(return predicate): Call emit_fpscr_use and remove_dead_before_cse.
	(block_move_real, block_lump_real): Predicate on ! TARGET_HARD_SH4.
	(block_move_real_i4, block_lump_real_i4, fpu_switch): New insns.
	(fpu_switch0, fpu_switch1, movpsi): New expanders.
	(fpu_switch+[12], fix_truncsfsi2_i4_2+1): New splitters.
	(toggle_sz): New insn.
	(addsf3, subsf3, mulsf3, divsf3): Now define_expands.
	(addsf3_i, subsf3_i, mulsf3_i4, mulsf3_ie, divsf3_i): New insns.
	(macsf3): Make use of fpscr visible.  Disable for SH4.
	(floatsisf2): Make use of fpscr visible.
	(floatsisf2_i4): New insn.
	(floatsisf2_ie, fixsfsi, cmpgtsf_t, cmpeqsf_t): Disable for SH4.
	(ieee_ccmpeqsf_t): Likewise.
	(fix_truncsfsi2): Emit different code for SH4.
	(fix_truncsfsi2_i4, fix_truncsfsi2_i4_2, cmpgtsf_t_i4): New insns.
	(cmpeqsf_t_i4, ieee_ccmpeqsf_t_4): New insns.
	(negsf2, sqrtsf2, abssf2): Now expanders.
	(adddf3, subdf3i, muldf2, divdf3, floatsidf2): New expanders.
	(negsf2_i, sqrtsf2_i, abssf2_i, adddf3_i, subdf3_i): New insns.
	(muldf3_i, divdf3_i, floatsidf2_i, fix_truncdfsi2_i): New insns.
	(fix_truncdfsi2, cmpdf, negdf2, sqrtdf2, absdf2): New expanders.
	(fix_truncdfsi2_i4, cmpgtdf_t, cmpeqdf_t, ieee_ccmpeqdf_t): New insns.
	(fix_truncdfsi2_i4_2+1): New splitters.
	(negdf2_i, sqrtdf2_i, absdf2_i, extendsfdf2_i4): New insns.
	(extendsfdf2, truncdfsf2): New expanders.
	(truncdfsf2_i4): New insn.
	* t-sh (LIB1ASMFUNCS): Add _movstr_i4, _sdivsi3_i4, _udivsi3_i4.
	(MULTILIB_OPTIONS): Add m4-single-only/m4-single/m4.
	* float-sh.h: When testing for __SH3E__, also test for
	__SH4_SINGLE_ONLY__ .
	* va-sh.h (__va_freg): Define to float.
	(__va_greg, __fa_freg, __gnuc_va_list, va_start):
        Define for __SH4_SINGLE_ONLY__ like for __SH3E__ .
        (__PASS_AS_FLOAT, __TARGET_SH4_P): Likewise.
	(__PASS_AS_FLOAT): Use different definition for __SH4__ and
	 __SH4_SINGLE__.
	(TARGET_SH4_P): Define.
	(va_arg): Use it.
	* sh.md (movdf_k, movsf_i): Tweak the condition so that
	init_expr_once is satisfied about the existence of load / store insns.
	* sh.md (movsi_i, movsi_ie, movsi_i_lowpart, movsf_i, movsf_ie):
        change m constraint in source operand to mr / mf .
	* va-sh.h (__va_arg_sh1): Use __asm instead of asm.
	* (__VA_REEF): Define.
	(__va_arg_sh1): Use it.
	* va-sh.h (va_start, va_arg, va_copy): Add parenteses.

From-SVN: r23777
parent 57cfc5dd
Mon Nov 23 16:46:46 1998 J"orn Rennecke <amylaar@cygnus.co.uk>
Add SH4 support:
* config/sh/lib1funcs.asm (___movstr_i4_even, ___movstr_i4_odd): Define.
(___movstrSI12_i4, ___sdivsi3_i4, ___udivsi3_i4): Define.
* sh.c (reg_class_from_letter, regno_reg_class): Add DF_REGS.
(fp_reg_names, assembler_dialect): New variables.
(print_operand_address): Handle SUBREGs.
(print_operand): Added 'o' case.
Don't use adj_offsettable_operand on PRE_DEC / POST_INC.
Name of FP registers depends on mode.
(expand_block_move): Emit different code for SH4 hardware.
(prepare_scc_operands): Use emit_sf_insn / emit_df_insn as appropriate.
(from_compare): Likewise.
(add_constant): New argument last_value. Changed all callers.
(find_barrier): Don't try HImode load for FPUL_REG.
(machine_dependent_reorg): Likewise.
(sfunc_uses_reg): A CLOBBER cannot be the address register use.
(gen_far_branch): Emit a barrier after the new jump.
(barrier_align): Don't trust instruction lengths before
fixing up pcloads.
(machine_dependent_reorg): Add support for FIRST_XD_REG .. LAST_XD_REG.
Use auto-inc addressing for fp registers if doubles need to
be loaded in two steps.
Set sh_flag_remove_dead_before_cse.
(push): Support for TARGET_FMOVD. Use gen_push_fpul for fpul.
(pop): Support for TARGET_FMOVD. Use gen_pop_fpul for fpul.
(calc_live_regs): Support for TARGET_FMOVD. Don't save FPSCR.
Support for FIRST_XD_REG .. LAST_XD_REG.
(sh_expand_prologue): Support for FIRST_XD_REG .. LAST_XD_REG.
(sh_expand_epilogue): Likewise.
(sh_builtin_saveregs): Use DFmode moves for fp regs on SH4.
(initial_elimination_offset): Take TARGET_ALIGN_DOUBLE into account.
(arith_reg_operand): FPUL_REG is OK for SH4.
(fp_arith_reg_operand, fp_extended_operand) New functions.
(tertiary_reload_operand, fpscr_operand): Likewise.
(commutative_float_operator, noncommutative_float_operator): Likewise.
(binary_float_operator, get_fpscr_rtx, emit_sf_insn): Likewise.
(emit_df_insn, expand_sf_unop, expand_sf_binop): Likewise.
(expand_df_unop, expand_df_binop, expand_fp_branch): Likewise.
(emit_fpscr_use, mark_use, remove_dead_before_cse): Likewise.
* sh.h (CPP_SPEC): Add support for -m4, m4-single, m4-single-only.
(CONDITIONAL_REGISTER_USAGE): Likewise.
(HARD_SH4_BIT, FPU_SINGLE_BIT, SH4_BIT, FMOVD_BIT): Define.
(TARGET_CACHE32, TARGET_SUPERSCALAR, TARGET_HARWARD): Define.
(TARGET_HARD_SH4, TARGET_FPU_SINGLE, TARGET_SH4, TARGET_FMOVD): Define.
(target_flag): Add -m4, m4-single, m4-single-only, -mfmovd.
(OPTIMIZATION_OPTIONS): If optimizing, set flag_omit_frame_pointer
to -1 and sh_flag_remove_dead_before_cse to 1.
(ASSEMBLER_DIALECT): Define to assembler_dialect.
(assembler_dialect, fp_reg_names): Declare.
(OVERRIDE_OPTIONS): Add code for TARGET_SH4.
Hide names of registers that are not accessible.
(CACHE_LOG): Take TARGET_CACHE32 into account.
(LOOP_ALIGN): Take TARGET_HARWARD into account.
(FIRST_XD_REG, LAST_XD_REG, FPSCR_REG): Define.
(FIRST_PSEUDO_REGISTER: Now 49.
(FIXED_REGISTERS, CALL_USED_REGISTERS): Include values for registers.
(HARD_REGNO_NREGS): Special treatment of FIRST_XD_REG .. LAST_XD_REG.
(HARD_REGNO_MODE_OK): Update.
(enum reg_class): Add DF_REGS and FPSCR_REGS.
(REG_CLASS_NAMES, REG_CLASS_CONTENTS, REG_ALLOC_ORDER): Likewise.
(SECONDARY_OUTPUT_RELOAD_CLASS, SECONDARY_INPUT_RELOAD_CLASS): Update.
(CLASS_CANNOT_CHANGE_SIZE, DEBUG_REGISTER_NAMES): Define.
(NPARM_REGS): Eight floating point parameter registers on SH4.
(BASE_RETURN_VALUE_REG): SH4 also passes double values
in floating point registers.
(GET_SH_ARG_CLASS) Likewise.
Complex float types are also returned in float registers.
(BASE_ARG_REG): Complex float types are also passes in float registers.
(FUNCTION_VALUE): Change mode like PROMOTE_MODE does.
(LIBCALL_VALUE): Remove trailing semicolon.
(ROUND_REG): Round when double precision value is passed in floating
point register(s).
(FUNCTION_ARG_ADVANCE): No change wanted for SH4 when things are
passed on the stack.
(FUNCTION_ARG): Little endian adjustment for SH4 SFmode.
(FUNCTION_ARG_PARTIAL_NREGS): Zero for SH4.
(TRAMPOLINE_ALIGNMENT): Take TARGET_HARWARD into account.
(INITIALIZE_TRAMPOLINE): Emit ic_invalidate_line for TARGET_HARWARD.
(MODE_DISP_OK_8): Not for SH4 DFmode.
(GO_IF_LEGITIMATE_ADDRESS): No base reg + index reg for SH4 DFmode.
Allow indexed addressing for PSImode after reload.
(LEGITIMIZE_ADDRESS): Not for SH4 DFmode.
(LEGITIMIZE_RELOAD_ADDRESS): Handle SH3E SFmode.
Don't change SH4 DFmode nor PSImode RELOAD_FOR_INPUT_ADDRESS.
(DOUBLE_TYPE_SIZE): 64 for SH4.
(RTX_COSTS): Add PLUS case.
Increae cost of ASHIFT, ASHIFTRT, LSHIFTRT case.
(REGISTER_MOVE_COST): Add handling of R0_REGS, FPUL_REGS, T_REGS,
MAC_REGS, PR_REGS, DF_REGS.
(REGISTER_NAMES): Use fp_reg_names.
(enum processor_type): Add PROCESSOR_SH4.
(sh_flag_remove_dead_before_cse): Declare.
(rtx_equal_function_value_matters, fpscr_rtx, get_fpscr_rtx): Declare.
(PREDICATE_CODES): Add binary_float_operator,
commutative_float_operator, fp_arith_reg_operand, fp_extended_operand,
fpscr_operand, noncommutative_float_operator.
(ADJUST_COST): Use different scale for TARGET_SUPERSCALAR.
(SH_DYNAMIC_SHIFT_COST): Cheaper for SH4.
* sh.md (attribute cpu): Add value sh4.
(attrbutes fmovd, issues): Define.
(attribute type): Add values dfp_arith, dfp_cmp, dfp_conv, dfdiv.
(function units memory, int, mpy, fp): Make dependent on issue rate.
(function units issue, single_issue, load_si, load): Define.
(function units load_store, fdiv, gp_fpul): Define.
(attribute hit_stack): Provide proper default.
(use_sfunc_addr+1, udivsi3): Predicated on ! TARGET_SH4.
(udivsi3_i4, udivsi3_i4_single, divsi3_i4, divsi3_i4_single): New insns.
(udivsi3, divsi3): Emit special patterns for SH4 hardware,
(mulsi3_call): Now uses match_operand for function address.
(mulsi3): Also emit code for SH1 case. Wrap result in REG_LIBCALL /
REG_RETVAL notes.
(push, pop, push_e, pop_e): Now define_expands.
(push_fpul, push_4, pop_fpul, pop_4, ic_invalidate_line): New expanders.
(movsi_ie): Added y/i alternative.
(ic_invalidate_line_i, movdf_i4): New insns.
(movdf_i4+[123], reload_outdf+[12345], movsi_y+[12]): New splitters.
(reload_indf, reload_outdf, reload_outsf, reload_insi): New expanders.
(movdf): Add special code for SH4.
(movsf_ie, movsf_ie+1, reload_insf, calli): Make use of fpscr visible.
(call_valuei, calli, call_value): Likewise.
(movsf): Emit no-op move.
(mov_nop, movsi_y): New insns.
(blt, sge): generalize to handle DFmode.
(return predicate): Call emit_fpscr_use and remove_dead_before_cse.
(block_move_real, block_lump_real): Predicate on ! TARGET_HARD_SH4.
(block_move_real_i4, block_lump_real_i4, fpu_switch): New insns.
(fpu_switch0, fpu_switch1, movpsi): New expanders.
(fpu_switch+[12], fix_truncsfsi2_i4_2+1): New splitters.
(toggle_sz): New insn.
(addsf3, subsf3, mulsf3, divsf3): Now define_expands.
(addsf3_i, subsf3_i, mulsf3_i4, mulsf3_ie, divsf3_i): New insns.
(macsf3): Make use of fpscr visible. Disable for SH4.
(floatsisf2): Make use of fpscr visible.
(floatsisf2_i4): New insn.
(floatsisf2_ie, fixsfsi, cmpgtsf_t, cmpeqsf_t): Disable for SH4.
(ieee_ccmpeqsf_t): Likewise.
(fix_truncsfsi2): Emit different code for SH4.
(fix_truncsfsi2_i4, fix_truncsfsi2_i4_2, cmpgtsf_t_i4): New insns.
(cmpeqsf_t_i4, ieee_ccmpeqsf_t_4): New insns.
(negsf2, sqrtsf2, abssf2): Now expanders.
(adddf3, subdf3i, muldf2, divdf3, floatsidf2): New expanders.
(negsf2_i, sqrtsf2_i, abssf2_i, adddf3_i, subdf3_i): New insns.
(muldf3_i, divdf3_i, floatsidf2_i, fix_truncdfsi2_i): New insns.
(fix_truncdfsi2, cmpdf, negdf2, sqrtdf2, absdf2): New expanders.
(fix_truncdfsi2_i4, cmpgtdf_t, cmpeqdf_t, ieee_ccmpeqdf_t): New insns.
(fix_truncdfsi2_i4_2+1): New splitters.
(negdf2_i, sqrtdf2_i, absdf2_i, extendsfdf2_i4): New insns.
(extendsfdf2, truncdfsf2): New expanders.
(truncdfsf2_i4): New insn.
* t-sh (LIB1ASMFUNCS): Add _movstr_i4, _sdivsi3_i4, _udivsi3_i4.
(MULTILIB_OPTIONS): Add m4-single-only/m4-single/m4.
* float-sh.h: When testing for __SH3E__, also test for
__SH4_SINGLE_ONLY__ .
* va-sh.h (__va_freg): Define to float.
(__va_greg, __fa_freg, __gnuc_va_list, va_start):
Define for __SH4_SINGLE_ONLY__ like for __SH3E__ .
(__PASS_AS_FLOAT, __TARGET_SH4_P): Likewise.
(__PASS_AS_FLOAT): Use different definition for __SH4__ and
__SH4_SINGLE__.
(TARGET_SH4_P): Define.
(va_arg): Use it.
* sh.md (movdf_k, movsf_i): Tweak the condition so that
init_expr_once is satisfied about the existence of load / store insns.
* sh.md (movsi_i, movsi_ie, movsi_i_lowpart, movsf_i, movsf_ie):
change m constraint in source operand to mr / mf .
* va-sh.h (__va_arg_sh1): Use __asm instead of asm.
* (__VA_REEF): Define.
(__va_arg_sh1): Use it.
* va-sh.h (va_start, va_arg, va_copy): Add parenteses.
Sun Nov 22 21:34:02 1998 Jeffrey A Law (law@cygnus.com)
* i386/dgux.c (struct option): Add new "description field".
......
......@@ -37,7 +37,7 @@
#undef FLT_MAX_10_EXP
#define FLT_MAX_10_EXP 38
#ifdef __SH3E__
#if defined (__SH3E__) || defined (__SH4_SINGLE_ONLY__)
/* Number of base-FLT_RADIX digits in the significand of a double */
#undef DBL_MANT_DIG
......
......@@ -770,6 +770,64 @@ ___movstr:
add #64,r4
#endif
#ifdef L_movstr_i4
#if defined(__SH4__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
.text
.global ___movstr_i4_even
.global ___movstr_i4_odd
.global ___movstrSI12_i4
.p2align 5
L_movstr_2mod4_end:
mov.l r0,@(16,r4)
rts
mov.l r1,@(20,r4)
.p2align 2
___movstr_i4_odd:
mov.l @r5+,r1
add #-4,r4
mov.l @r5+,r2
mov.l @r5+,r3
mov.l r1,@(4,r4)
mov.l r2,@(8,r4)
L_movstr_loop:
mov.l r3,@(12,r4)
dt r6
mov.l @r5+,r0
bt/s L_movstr_2mod4_end
mov.l @r5+,r1
add #16,r4
L_movstr_start_even:
mov.l @r5+,r2
mov.l @r5+,r3
mov.l r0,@r4
dt r6
mov.l r1,@(4,r4)
bf/s L_movstr_loop
mov.l r2,@(8,r4)
rts
mov.l r3,@(12,r4)
___movstr_i4_even:
mov.l @r5+,r0
bra L_movstr_start_even
mov.l @r5+,r1
.p2align 4
___movstrSI12_i4:
mov.l @r5,r0
mov.l @(4,r5),r1
mov.l @(8,r5),r2
mov.l r0,@r4
mov.l r1,@(4,r4)
rts
mov.l r2,@(8,r4)
#endif /* ! __SH4__ */
#endif
#ifdef L_mulsi3
......@@ -808,9 +866,47 @@ hiset: sts macl,r0 ! r0 = bb*dd
#endif
#ifdef L_sdivsi3
#ifdef L_sdivsi3_i4
.title "SH DIVIDE"
!! 4 byte integer Divide code for the Hitachi SH
#ifdef __SH4__
!! args in r4 and r5, result in fpul, clobber dr0, dr2
.global ___sdivsi3_i4
___sdivsi3_i4:
lds r4,fpul
float fpul,dr0
lds r5,fpul
float fpul,dr2
fdiv dr2,dr0
rts
ftrc dr0,fpul
#elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
!! args in r4 and r5, result in fpul, clobber r2, dr0, dr2
.global ___sdivsi3_i4
___sdivsi3_i4:
sts.l fpscr,@-r15
mov #8,r2
swap.w r2,r2
lds r2,fpscr
lds r4,fpul
float fpul,dr0
lds r5,fpul
float fpul,dr2
fdiv dr2,dr0
ftrc dr0,fpul
rts
lds.l @r15+,fpscr
#endif /* ! __SH4__ */
#endif
#ifdef L_sdivsi3
/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
sh3e code. */
#if ! defined(__SH4__) && ! defined (__SH4_SINGLE__)
!!
!! Steve Chamberlain
!! sac@cygnus.com
......@@ -904,11 +1000,109 @@ ___sdivsi3:
div0: rts
mov #0,r0
#endif /* ! __SH4__ */
#endif
#ifdef L_udivsi3
#ifdef L_udivsi3_i4
.title "SH DIVIDE"
!! 4 byte integer Divide code for the Hitachi SH
#ifdef __SH4__
!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4
.global ___udivsi3_i4
___udivsi3_i4:
mov #1,r1
cmp/hi r1,r5
bf trivial
rotr r1
xor r1,r4
lds r4,fpul
mova L1,r0
#ifdef FMOVD_WORKS
fmov.d @r0+,dr4
#else
#ifdef __LITTLE_ENDIAN__
fmov.s @r0+,fr5
fmov.s @r0,fr4
#else
fmov.s @r0+,fr4
fmov.s @r0,fr5
#endif
#endif
float fpul,dr0
xor r1,r5
lds r5,fpul
float fpul,dr2
fadd dr4,dr0
fadd dr4,dr2
fdiv dr2,dr0
rts
ftrc dr0,fpul
trivial:
rts
lds r4,fpul
.align 2
L1:
.double 2147483648
#elif defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
!! args in r4 and r5, result in fpul, clobber r0, r1, r4, r5, dr0, dr2, dr4
.global ___udivsi3_i4
___udivsi3_i4:
mov #1,r1
cmp/hi r1,r5
bf trivial
sts.l fpscr,@-r15
mova L1,r0
lds.l @r0+,fpscr
rotr r1
xor r1,r4
lds r4,fpul
#ifdef FMOVD_WORKS
fmov.d @r0+,dr4
#else
#ifdef __LITTLE_ENDIAN__
fmov.s @r0+,fr5
fmov.s @r0,fr4
#else
fmov.s @r0+,fr4
fmov.s @r0,fr5
#endif
#endif
float fpul,dr0
xor r1,r5
lds r5,fpul
float fpul,dr2
fadd dr4,dr0
fadd dr4,dr2
fdiv dr2,dr0
ftrc dr0,fpul
rts
lds.l @r15+,fpscr
trivial:
rts
lds r4,fpul
.align 2
L1:
#ifdef __LITTLE_ENDIAN__
.long 0x80000
#else
.long 0x180000
#endif
.double 2147483648
#endif /* ! __SH4__ */
#endif
#ifdef L_udivsi3
/* __SH4_SINGLE_ONLY__ keeps this part for link compatibility with
sh3e code. */
#if ! defined(__SH4__) && ! defined (__SH4_SINGLE__)
!!
!! Steve Chamberlain
!! sac@cygnus.com
......@@ -966,22 +1160,40 @@ vshortway:
ret: rts
mov r4,r0
#endif /* __SH4__ */
#endif
#ifdef L_set_fpscr
#if defined (__SH3E__)
#if defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || defined(__SH4_SINGLE_ONLY__)
.global ___set_fpscr
___set_fpscr:
lds r4,fpscr
mov.l ___set_fpscr_L1,r1
swap.w r4,r0
or #24,r0
#ifndef FMOVD_WORKS
xor #16,r0
#endif
#if defined(__SH4__)
swap.w r0,r3
mov.l r3,@(4,r1)
#else /* defined(__SH3E__) || defined(__SH4_SINGLE*__) */
swap.w r0,r2
mov.l r2,@r1
#endif
#ifndef FMOVD_WORKS
xor #8,r0
#else
xor #24,r0
#endif
#if defined(__SH4__)
swap.w r0,r2
rts
mov.l r2,@r1
#else /* defined(__SH3E__) || defined(__SH4_SINGLE*__) */
swap.w r0,r3
rts
mov.l r3,@(4,r1)
#endif
.align 2
___set_fpscr_L1:
.long ___fpscr_values
......@@ -990,5 +1202,5 @@ ___set_fpscr_L1:
#else
.comm ___fpscr_values,8
#endif /* ELF */
#endif /* SH3E */
#endif /* SH3E / SH4 */
#endif /* L_set_fpscr */
/* Output routines for GCC for Hitachi Super-H.
Copyright (C) 1993, 1994, 1995, 1996, 1997 Free Software Foundation, Inc.
Copyright (C) 1993-1998 Free Software Foundation, Inc.
This file is part of GNU CC.
......@@ -103,6 +103,17 @@ int regno_reg_class[FIRST_PSEUDO_REGISTER] =
FP_REGS, FP_REGS, FP_REGS, FP_REGS,
FP_REGS, FP_REGS, FP_REGS, FP_REGS,
FP_REGS, FP_REGS, FP_REGS, FP_REGS,
DF_REGS, DF_REGS, DF_REGS, DF_REGS,
DF_REGS, DF_REGS, DF_REGS, DF_REGS,
FPSCR_REGS,
};
char fp_reg_names[][5] =
{
"fr0", "fr1", "fr2", "fr3", "fr4", "fr5", "fr6", "fr7",
"fr8", "fr9", "fr10", "fr11", "fr12", "fr13", "fr14", "fr15",
"fpul",
"xd0","xd2","xd4", "xd6", "xd8", "xd10", "xd12", "xd14",
};
/* Provide reg_class from a letter such as appears in the machine
......@@ -110,7 +121,7 @@ int regno_reg_class[FIRST_PSEUDO_REGISTER] =
enum reg_class reg_class_from_letter[] =
{
/* a */ NO_REGS, /* b */ NO_REGS, /* c */ NO_REGS, /* d */ NO_REGS,
/* a */ ALL_REGS, /* b */ NO_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
/* e */ NO_REGS, /* f */ FP_REGS, /* g */ NO_REGS, /* h */ NO_REGS,
/* i */ NO_REGS, /* j */ NO_REGS, /* k */ NO_REGS, /* l */ PR_REGS,
/* m */ NO_REGS, /* n */ NO_REGS, /* o */ NO_REGS, /* p */ NO_REGS,
......@@ -119,6 +130,12 @@ enum reg_class reg_class_from_letter[] =
/* y */ FPUL_REGS, /* z */ R0_REGS
};
int assembler_dialect;
rtx get_fpscr_rtx ();
void emit_sf_insn ();
void emit_df_insn ();
static void split_branches PROTO ((rtx));
/* Print the operand address in x to the stream. */
......@@ -131,7 +148,8 @@ print_operand_address (stream, x)
switch (GET_CODE (x))
{
case REG:
fprintf (stream, "@%s", reg_names[REGNO (x)]);
case SUBREG:
fprintf (stream, "@%s", reg_names[true_regnum (x)]);
break;
case PLUS:
......@@ -143,13 +161,19 @@ print_operand_address (stream, x)
{
case CONST_INT:
fprintf (stream, "@(%d,%s)", INTVAL (index),
reg_names[REGNO (base)]);
reg_names[true_regnum (base)]);
break;
case REG:
fprintf (stream, "@(r0,%s)",
reg_names[MAX (REGNO (base), REGNO (index))]);
break;
case SUBREG:
{
int base_num = true_regnum (base);
int index_num = true_regnum (index);
fprintf (stream, "@(r0,%s)",
reg_names[MAX (base_num, index_num)]);
break;
}
default:
debug_rtx (x);
......@@ -159,11 +183,11 @@ print_operand_address (stream, x)
break;
case PRE_DEC:
fprintf (stream, "@-%s", reg_names[REGNO (XEXP (x, 0))]);
fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
break;
case POST_INC:
fprintf (stream, "@%s+", reg_names[REGNO (XEXP (x, 0))]);
fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
break;
default:
......@@ -182,7 +206,8 @@ print_operand_address (stream, x)
'O' print a constant without the #
'R' print the LSW of a dp value - changes if in little endian
'S' print the MSW of a dp value - changes if in little endian
'T' print the next word of a dp value - same as 'R' in big endian mode. */
'T' print the next word of a dp value - same as 'R' in big endian mode.
'o' output an operator. */
void
print_operand (stream, x, code)
......@@ -230,16 +255,31 @@ print_operand (stream, x, code)
fputs (reg_names[REGNO (x) + 1], (stream));
break;
case MEM:
print_operand_address (stream,
XEXP (adj_offsettable_operand (x, 4), 0));
if (GET_CODE (XEXP (x, 0)) != PRE_DEC
&& GET_CODE (XEXP (x, 0)) != POST_INC)
x = adj_offsettable_operand (x, 4);
print_operand_address (stream, XEXP (x, 0));
break;
}
break;
case 'o':
switch (GET_CODE (x))
{
case PLUS: fputs ("add", stream); break;
case MINUS: fputs ("sub", stream); break;
case MULT: fputs ("mul", stream); break;
case DIV: fputs ("div", stream); break;
}
break;
default:
switch (GET_CODE (x))
{
case REG:
fputs (reg_names[REGNO (x)], (stream));
if (REGNO (x) >= FIRST_FP_REG && REGNO (x) <= LAST_FP_REG
&& GET_MODE_SIZE (GET_MODE (x)) > 4)
fprintf ((stream), "d%s", reg_names[REGNO (x)]+1);
else
fputs (reg_names[REGNO (x)], (stream));
break;
case MEM:
output_address (XEXP (x, 0));
......@@ -273,6 +313,55 @@ expand_block_move (operands)
if (! constp || align < 4 || (bytes % 4 != 0))
return 0;
if (TARGET_HARD_SH4)
{
if (bytes < 12)
return 0;
else if (bytes == 12)
{
tree entry_name;
rtx func_addr_rtx;
rtx r4 = gen_rtx (REG, SImode, 4);
rtx r5 = gen_rtx (REG, SImode, 5);
entry_name = get_identifier ("__movstrSI12_i4");
func_addr_rtx
= copy_to_mode_reg (Pmode,
gen_rtx_SYMBOL_REF (Pmode,
IDENTIFIER_POINTER (entry_name)));
emit_insn (gen_move_insn (r4, XEXP (operands[0], 0)));
emit_insn (gen_move_insn (r5, XEXP (operands[1], 0)));
emit_insn (gen_block_move_real_i4 (func_addr_rtx));
return 1;
}
else if (! TARGET_SMALLCODE)
{
tree entry_name;
rtx func_addr_rtx;
int dwords;
rtx r4 = gen_rtx (REG, SImode, 4);
rtx r5 = gen_rtx (REG, SImode, 5);
rtx r6 = gen_rtx (REG, SImode, 6);
entry_name = get_identifier (bytes & 4
? "__movstr_i4_odd"
: "__movstr_i4_even");
func_addr_rtx
= copy_to_mode_reg (Pmode,
gen_rtx_SYMBOL_REF (Pmode,
IDENTIFIER_POINTER (entry_name)));
emit_insn (gen_move_insn (r4, XEXP (operands[0], 0)));
emit_insn (gen_move_insn (r5, XEXP (operands[1], 0)));
dwords = bytes >> 3;
emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
return 1;
}
else
return 0;
}
if (bytes < 64)
{
char entry[30];
......@@ -405,9 +494,17 @@ prepare_scc_operands (code)
|| TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT)
sh_compare_op1 = force_reg (mode, sh_compare_op1);
emit_insn (gen_rtx (SET, VOIDmode, t_reg,
gen_rtx (code, SImode, sh_compare_op0,
sh_compare_op1)));
if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
(mode == SFmode ? emit_sf_insn : emit_df_insn)
(gen_rtx (PARALLEL, VOIDmode, gen_rtvec (2,
gen_rtx (SET, VOIDmode, t_reg,
gen_rtx (code, SImode,
sh_compare_op0, sh_compare_op1)),
gen_rtx (USE, VOIDmode, get_fpscr_rtx ()))));
else
emit_insn (gen_rtx (SET, VOIDmode, t_reg,
gen_rtx (code, SImode, sh_compare_op0,
sh_compare_op1)));
return t_reg;
}
......@@ -443,7 +540,15 @@ from_compare (operands, code)
insn = gen_rtx (SET, VOIDmode,
gen_rtx (REG, SImode, 18),
gen_rtx (code, SImode, sh_compare_op0, sh_compare_op1));
emit_insn (insn);
if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
{
insn = gen_rtx (PARALLEL, VOIDmode,
gen_rtvec (2, insn,
gen_rtx (USE, VOIDmode, get_fpscr_rtx ())));
(mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
}
else
emit_insn (insn);
}
/* Functions to output assembly code. */
......@@ -1722,7 +1827,8 @@ static int pool_size;
/* Add a constant to the pool and return its label. */
static rtx
add_constant (x, mode)
add_constant (x, mode, last_value)
rtx last_value;
rtx x;
enum machine_mode mode;
{
......@@ -1741,13 +1847,27 @@ add_constant (x, mode)
continue;
}
if (rtx_equal_p (x, pool_vector[i].value))
return pool_vector[i].label;
{
lab = 0;
if (! last_value
|| ! i
|| ! rtx_equal_p (last_value, pool_vector[i-1].value))
{
lab = pool_vector[i].label;
if (! lab)
pool_vector[i].label = lab = gen_label_rtx ();
}
return lab;
}
}
}
/* Need a new one. */
pool_vector[pool_size].value = x;
lab = gen_label_rtx ();
if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
lab = 0;
else
lab = gen_label_rtx ();
pool_vector[pool_size].mode = mode;
pool_vector[pool_size].label = lab;
pool_size++;
......@@ -1965,7 +2085,8 @@ find_barrier (num_mova, mova, from)
/* We must explicitly check the mode, because sometimes the
front end will generate code to load unsigned constants into
HImode targets without properly sign extending them. */
if (mode == HImode || (mode == SImode && hi_const (src)))
if (mode == HImode
|| (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
{
found_hi += 2;
/* We put the short constants before the long constants, so
......@@ -2130,7 +2251,7 @@ sfunc_uses_reg (insn)
for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
{
part = XVECEXP (pattern, 0, i);
if (part == reg_part)
if (part == reg_part || GET_CODE (part) == CLOBBER)
continue;
if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
&& GET_CODE (SET_DEST (part)) == REG)
......@@ -2470,6 +2591,13 @@ gen_far_branch (bp)
}
else
jump = emit_jump_insn_after (gen_return (), insn);
/* Emit a barrier so that reorg knows that any following instructions
are not reachable via a fall-through path.
But don't do this when not optimizing, since we wouldn't supress the
alignment for the barrier then, and could end up with out-of-range
pc-relative loads. */
if (optimize)
emit_barrier_after (jump);
emit_label_after (bp->near_label, insn);
JUMP_LABEL (jump) = bp->far_label;
if (! invert_jump (insn, label))
......@@ -2556,36 +2684,42 @@ barrier_align (barrier_or_label)
if (! TARGET_SH3 || ! optimize)
return CACHE_LOG;
/* Check if there is an immediately preceding branch to the insn beyond
the barrier. We must weight the cost of discarding useful information
from the current cache line when executing this branch and there is
an alignment, against that of fetching unneeded insn in front of the
branch target when there is no alignment. */
/* PREV is presumed to be the JUMP_INSN for the barrier under
investigation. Skip to the insn before it. */
prev = prev_real_insn (prev);
for (slot = 2, credit = 1 << (CACHE_LOG - 2) + 2;
credit >= 0 && prev && GET_CODE (prev) == INSN;
prev = prev_real_insn (prev))
/* When fixing up pcloads, a constant table might be inserted just before
the basic block that ends with the barrier. Thus, we can't trust the
instruction lengths before that. */
if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
{
if (GET_CODE (PATTERN (prev)) == USE
|| GET_CODE (PATTERN (prev)) == CLOBBER)
continue;
if (GET_CODE (PATTERN (prev)) == SEQUENCE)
prev = XVECEXP (PATTERN (prev), 0, 1);
if (slot &&
get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
slot = 0;
credit -= get_attr_length (prev);
/* Check if there is an immediately preceding branch to the insn beyond
the barrier. We must weight the cost of discarding useful information
from the current cache line when executing this branch and there is
an alignment, against that of fetching unneeded insn in front of the
branch target when there is no alignment. */
/* PREV is presumed to be the JUMP_INSN for the barrier under
investigation. Skip to the insn before it. */
prev = prev_real_insn (prev);
for (slot = 2, credit = 1 << (CACHE_LOG - 2) + 2;
credit >= 0 && prev && GET_CODE (prev) == INSN;
prev = prev_real_insn (prev))
{
if (GET_CODE (PATTERN (prev)) == USE
|| GET_CODE (PATTERN (prev)) == CLOBBER)
continue;
if (GET_CODE (PATTERN (prev)) == SEQUENCE)
prev = XVECEXP (PATTERN (prev), 0, 1);
if (slot &&
get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
slot = 0;
credit -= get_attr_length (prev);
}
if (prev
&& GET_CODE (prev) == JUMP_INSN
&& JUMP_LABEL (prev)
&& next_real_insn (JUMP_LABEL (prev)) == next_real_insn (barrier_or_label)
&& (credit - slot >= (GET_CODE (SET_SRC (PATTERN (prev))) == PC ? 2 : 0)))
return 0;
}
if (prev
&& GET_CODE (prev) == JUMP_INSN
&& JUMP_LABEL (prev)
&& next_real_insn (JUMP_LABEL (prev)) == next_real_insn (barrier_or_label)
&& (credit - slot >= (GET_CODE (SET_SRC (PATTERN (prev))) == PC ? 2 : 0)))
return 0;
return CACHE_LOG;
}
......@@ -2914,7 +3048,8 @@ machine_dependent_reorg (first)
dst = SET_DEST (pat);
mode = GET_MODE (dst);
if (mode == SImode && hi_const (src))
if (mode == SImode && hi_const (src)
&& REGNO (dst) != FPUL_REG)
{
int offset = 0;
......@@ -2929,7 +3064,7 @@ machine_dependent_reorg (first)
if (GET_CODE (dst) == REG
&& ((REGNO (dst) >= FIRST_FP_REG
&& REGNO (dst) <= LAST_FP_REG)
&& REGNO (dst) <= LAST_XD_REG)
|| REGNO (dst) == FPUL_REG))
{
if (last_float
......@@ -2943,7 +3078,8 @@ machine_dependent_reorg (first)
last_float_move = scan;
last_float = src;
newsrc = gen_rtx (MEM, mode,
(REGNO (dst) == FPUL_REG
((TARGET_SH4 && ! TARGET_FMOVD
|| REGNO (dst) == FPUL_REG)
? r0_inc_rtx
: r0_rtx));
last_float_addr = &XEXP (newsrc, 0);
......@@ -2983,6 +3119,16 @@ machine_dependent_reorg (first)
emit_insn_before (gen_use_sfunc_addr (reg), insn);
}
}
#if 0
/* fpscr is not actually a user variable, but we pretend it is for the
sake of the previous optimization passes, since we want it handled like
one. However, we don't have eny debugging information for it, so turn
it into a non-user variable now. */
if (TARGET_SH4)
REG_USERVAR_P (get_fpscr_rtx ()) = 0;
#endif
if (optimize)
sh_flag_remove_dead_before_cse = 1;
mdep_reorg_phase = SH_AFTER_MDEP_REORG;
}
......@@ -3386,8 +3532,16 @@ push (rn)
int rn;
{
rtx x;
if ((rn >= FIRST_FP_REG && rn <= LAST_FP_REG)
|| rn == FPUL_REG)
if (rn == FPUL_REG)
x = gen_push_fpul ();
else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
&& rn >= FIRST_FP_REG && rn <= LAST_XD_REG)
{
if ((rn - FIRST_FP_REG) & 1 && rn <= LAST_FP_REG)
return;
x = gen_push_4 (gen_rtx (REG, DFmode, rn));
}
else if (TARGET_SH3E && rn >= FIRST_FP_REG && rn <= LAST_FP_REG)
x = gen_push_e (gen_rtx (REG, SFmode, rn));
else
x = gen_push (gen_rtx (REG, SImode, rn));
......@@ -3404,8 +3558,16 @@ pop (rn)
int rn;
{
rtx x;
if ((rn >= FIRST_FP_REG && rn <= LAST_FP_REG)
|| rn == FPUL_REG)
if (rn == FPUL_REG)
x = gen_pop_fpul ();
else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
&& rn >= FIRST_FP_REG && rn <= LAST_XD_REG)
{
if ((rn - FIRST_FP_REG) & 1 && rn <= LAST_FP_REG)
return;
x = gen_pop_4 (gen_rtx (REG, DFmode, rn));
}
else if (TARGET_SH3E && rn >= FIRST_FP_REG && rn <= LAST_FP_REG)
x = gen_pop_e (gen_rtx (REG, SFmode, rn));
else
x = gen_pop (gen_rtx (REG, SImode, rn));
......@@ -3453,6 +3615,16 @@ calc_live_regs (count_ptr, live_regs_mask2)
int count;
*live_regs_mask2 = 0;
/* If we can save a lot of saves by switching to double mode, do that. */
if (TARGET_SH4 && TARGET_FMOVD && TARGET_FPU_SINGLE)
for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
if (regs_ever_live[reg] && regs_ever_live[reg+1]
&& (! call_used_regs[reg] || (pragma_interrupt && ! pragma_trapa))
&& ++count > 2)
{
target_flags &= ~FPU_SINGLE_BIT;
break;
}
for (count = 0, reg = FIRST_PSEUDO_REGISTER - 1; reg >= 0; reg--)
{
if ((pragma_interrupt && ! pragma_trapa)
......@@ -3463,7 +3635,7 @@ calc_live_regs (count_ptr, live_regs_mask2)
&& regs_ever_live[PR_REG]))
&& reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
&& reg != RETURN_ADDRESS_POINTER_REGNUM
&& reg != T_REG && reg != GBR_REG)
&& reg != T_REG && reg != GBR_REG && reg != FPSCR_REG)
: (/* Only push those regs which are used and need to be saved. */
regs_ever_live[reg] && ! call_used_regs[reg]))
{
......@@ -3472,6 +3644,24 @@ calc_live_regs (count_ptr, live_regs_mask2)
else
live_regs_mask |= 1 << reg;
count++;
if (TARGET_SH4 && TARGET_FMOVD && reg >= FIRST_FP_REG)
if (reg <= LAST_FP_REG)
{
if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
{
if (reg >= 32)
*live_regs_mask2 |= 1 << ((reg ^ 1) - 32);
else
live_regs_mask |= 1 << (reg ^ 1);
count++;
}
}
else if (reg <= LAST_XD_REG)
{
/* Must switch to double mode to access these registers. */
target_flags &= ~FPU_SINGLE_BIT;
count++;
}
}
}
......@@ -3487,6 +3677,7 @@ sh_expand_prologue ()
int live_regs_mask;
int d, i;
int live_regs_mask2;
int save_flags = target_flags;
int double_align = 0;
/* We have pretend args if we had an object sent partially in registers
......@@ -3524,11 +3715,19 @@ sh_expand_prologue ()
emit_insn (gen_sp_switch_1 ());
live_regs_mask = calc_live_regs (&d, &live_regs_mask2);
/* ??? Maybe we could save some switching if we can move a mode switch
that already happens to be at the function start into the prologue. */
if (target_flags != save_flags)
emit_insn (gen_toggle_sz ());
push_regs (live_regs_mask, live_regs_mask2);
if (target_flags != save_flags)
emit_insn (gen_toggle_sz ());
if (TARGET_ALIGN_DOUBLE && d & 1)
double_align = 4;
target_flags = save_flags;
output_stack_adjust (-get_frame_size () - double_align,
stack_pointer_rtx, 3);
......@@ -3543,6 +3742,7 @@ sh_expand_epilogue ()
int d, i;
int live_regs_mask2;
int save_flags = target_flags;
int frame_size = get_frame_size ();
live_regs_mask = calc_live_regs (&d, &live_regs_mask2);
......@@ -3573,7 +3773,8 @@ sh_expand_epilogue ()
/* Pop all the registers. */
live_regs_mask = calc_live_regs (&d, &live_regs_mask2);
if (target_flags != save_flags)
emit_insn (gen_toggle_sz ());
if (live_regs_mask & (1 << PR_REG))
pop (PR_REG);
for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
......@@ -3584,6 +3785,9 @@ sh_expand_epilogue ()
else if (j >= 32 && (live_regs_mask2 & (1 << (j - 32))))
pop (j);
}
if (target_flags != save_flags)
emit_insn (gen_toggle_sz ());
target_flags = save_flags;
output_stack_adjust (extra_push + current_function_pretend_args_size,
stack_pointer_rtx, 7);
......@@ -3651,6 +3855,25 @@ sh_builtin_saveregs (arglist)
emit_move_insn (fpregs, XEXP (regbuf, 0));
emit_insn (gen_addsi3 (fpregs, fpregs,
GEN_INT (n_floatregs * UNITS_PER_WORD)));
if (TARGET_SH4)
{
for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
{
emit_insn (gen_addsi3 (fpregs, fpregs,
GEN_INT (-2 * UNITS_PER_WORD)));
emit_move_insn (gen_rtx (MEM, DFmode, fpregs),
gen_rtx (REG, DFmode, BASE_ARG_REG (DFmode) + regno));
}
regno = first_floatreg;
if (regno & 1)
{
emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD)));
emit_move_insn (gen_rtx (MEM, SFmode, fpregs),
gen_rtx (REG, SFmode, BASE_ARG_REG (SFmode) + regno
- (TARGET_LITTLE_ENDIAN != 0)));
}
}
else
for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
{
emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD)));
......@@ -3677,6 +3900,8 @@ initial_elimination_offset (from, to)
int live_regs_mask, live_regs_mask2;
live_regs_mask = calc_live_regs (&regs_saved, &live_regs_mask2);
if (TARGET_ALIGN_DOUBLE && regs_saved & 1)
total_auto_space += 4;
target_flags = save_flags;
total_saved_regs_space = (regs_saved) * 4;
......@@ -3885,12 +4110,48 @@ arith_reg_operand (op, mode)
else
return 1;
return (regno != T_REG && regno != PR_REG && regno != FPUL_REG
return (regno != T_REG && regno != PR_REG
&& (regno != FPUL_REG || TARGET_SH4)
&& regno != MACH_REG && regno != MACL_REG);
}
return 0;
}
int
fp_arith_reg_operand (op, mode)
rtx op;
enum machine_mode mode;
{
if (register_operand (op, mode))
{
int regno;
if (GET_CODE (op) == REG)
regno = REGNO (op);
else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
regno = REGNO (SUBREG_REG (op));
else
return 1;
return (regno != T_REG && regno != PR_REG && regno > 15
&& regno != MACH_REG && regno != MACL_REG);
}
return 0;
}
int
fp_extended_operand (op, mode)
rtx op;
enum machine_mode mode;
{
if (GET_CODE (op) == FLOAT_EXTEND && GET_MODE (op) == mode)
{
op = XEXP (op, 0);
mode = GET_MODE (op);
}
return fp_arith_reg_operand (op, mode);
}
/* Returns 1 if OP is a valid source operand for an arithmetic insn. */
int
......@@ -3991,6 +4252,73 @@ braf_label_ref_operand(op, mode)
if (GET_CODE (prev) != PLUS || XEXP (prev, 1) != op)
return 0;
}
int
tertiary_reload_operand (op, mode)
rtx op;
enum machine_mode mode;
{
enum rtx_code code = GET_CODE (op);
return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
}
int
fpscr_operand (op)
rtx op;
{
return (GET_CODE (op) == REG && REGNO (op) == FPSCR_REG
&& GET_MODE (op) == PSImode);
}
int
commutative_float_operator (op, mode)
rtx op;
enum machine_mode mode;
{
if (GET_MODE (op) != mode)
return 0;
switch (GET_CODE (op))
{
case PLUS:
case MULT:
return 1;
}
return 0;
}
int
noncommutative_float_operator (op, mode)
rtx op;
enum machine_mode mode;
{
if (GET_MODE (op) != mode)
return 0;
switch (GET_CODE (op))
{
case MINUS:
case DIV:
return 1;
}
return 0;
}
int
binary_float_operator (op, mode)
rtx op;
enum machine_mode mode;
{
if (GET_MODE (op) != mode)
return 0;
switch (GET_CODE (op))
{
case PLUS:
case MINUS:
case MULT:
case DIV:
return 1;
}
return 0;
}
/* Return the destination address of a branch. */
......@@ -4102,3 +4430,304 @@ reg_unused_after (reg, insn)
}
return 1;
}
extern struct obstack permanent_obstack;
rtx
get_fpscr_rtx ()
{
static rtx fpscr_rtx;
if (! fpscr_rtx)
{
push_obstacks (&permanent_obstack, &permanent_obstack);
fpscr_rtx = gen_rtx (REG, PSImode, 48);
REG_USERVAR_P (fpscr_rtx) = 1;
pop_obstacks ();
mark_user_reg (fpscr_rtx);
}
if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
mark_user_reg (fpscr_rtx);
return fpscr_rtx;
}
void
emit_sf_insn (pat)
rtx pat;
{
rtx addr;
/* When generating reload insns, we must not create new registers. FPSCR
should already have the correct value, so do nothing to change it. */
if (! TARGET_FPU_SINGLE && ! reload_in_progress)
{
addr = gen_reg_rtx (SImode);
emit_insn (gen_fpu_switch0 (addr));
}
emit_insn (pat);
if (! TARGET_FPU_SINGLE && ! reload_in_progress)
{
addr = gen_reg_rtx (SImode);
emit_insn (gen_fpu_switch1 (addr));
}
}
void
emit_df_insn (pat)
rtx pat;
{
rtx addr;
if (TARGET_FPU_SINGLE && ! reload_in_progress)
{
addr = gen_reg_rtx (SImode);
emit_insn (gen_fpu_switch0 (addr));
}
emit_insn (pat);
if (TARGET_FPU_SINGLE && ! reload_in_progress)
{
addr = gen_reg_rtx (SImode);
emit_insn (gen_fpu_switch1 (addr));
}
}
void
expand_sf_unop (fun, operands)
rtx (*fun)();
rtx *operands;
{
emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
}
void
expand_sf_binop (fun, operands)
rtx (*fun)();
rtx *operands;
{
emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
get_fpscr_rtx ()));
}
void
expand_df_unop (fun, operands)
rtx (*fun)();
rtx *operands;
{
emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
}
void
expand_df_binop (fun, operands)
rtx (*fun)();
rtx *operands;
{
emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
get_fpscr_rtx ()));
}
void
expand_fp_branch (compare, branch)
rtx (*compare) (), (*branch) ();
{
(GET_MODE (sh_compare_op0) == SFmode ? emit_sf_insn : emit_df_insn)
((*compare) ());
emit_jump_insn ((*branch) ());
}
/* We don't want to make fpscr call-saved, because that would prevent
channging it, and it would also cost an exstra instruction to save it.
We don't want it to be known as a global register either, because
that disables all flow analysis. But it has to be live at the function
return. Thus, we need to insert a USE at the end of the function. */
/* This should best be called at about the time FINALIZE_PIC is called,
but not dependent on flag_pic. Alas, there is no suitable hook there,
so this gets called from HAVE_RETURN. */
int
emit_fpscr_use ()
{
static int fpscr_uses = 0;
if (rtx_equal_function_value_matters)
{
emit_insn (gen_rtx (USE, VOIDmode, get_fpscr_rtx ()));
fpscr_uses++;
}
else
{
if (fpscr_uses > 1)
{
/* Due to he crude way we emit the USEs, we might end up with
some extra ones. Delete all but the last one. */
rtx insn;
for (insn = get_last_insn(); insn; insn = PREV_INSN (insn))
if (GET_CODE (insn) == INSN
&& GET_CODE (PATTERN (insn)) == USE
&& GET_CODE (XEXP (PATTERN (insn), 0)) == REG
&& REGNO (XEXP (PATTERN (insn), 0)) == FPSCR_REG)
{
insn = PREV_INSN (insn);
break;
}
for (; insn; insn = PREV_INSN (insn))
if (GET_CODE (insn) == INSN
&& GET_CODE (PATTERN (insn)) == USE
&& GET_CODE (XEXP (PATTERN (insn), 0)) == REG
&& REGNO (XEXP (PATTERN (insn), 0)) == FPSCR_REG)
{
PUT_CODE (insn, NOTE);
NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
NOTE_SOURCE_FILE (insn) = 0;
}
}
fpscr_uses = 0;
}
}
/* ??? gcc does flow analysis strictly after common subexpression
elimination. As a result, common subespression elimination fails
when there are some intervening statements setting the same register.
If we did nothing about this, this would hurt the precision switching
for SH4 badly. There is some cse after reload, but it is unable to
undo the extra register pressure from the unused instructions, and
it cannot remove auto-increment loads.
A C code example that shows this flow/cse weakness for (at least) SH
and sparc (as of gcc ss-970706) is this:
double
f(double a)
{
double d;
d = 0.1;
a += d;
d = 1.1;
d = 0.1;
a *= d;
return a;
}
So we add another pass before common subexpression elimination, to
remove assignments that are dead due to a following assignment in the
same basic block. */
int sh_flag_remove_dead_before_cse;
static void
mark_use (x, reg_set_block)
rtx x, *reg_set_block;
{
enum rtx_code code;
if (! x)
return;
code = GET_CODE (x);
switch (code)
{
case REG:
{
int regno = REGNO (x);
int nregs = (regno < FIRST_PSEUDO_REGISTER
? HARD_REGNO_NREGS (regno, GET_MODE (x))
: 1);
do
{
reg_set_block[regno + nregs - 1] = 0;
}
while (--nregs);
break;
}
case SET:
{
rtx dest = SET_DEST (x);
if (GET_CODE (dest) == SUBREG)
dest = SUBREG_REG (dest);
if (GET_CODE (dest) != REG)
mark_use (dest, reg_set_block);
mark_use (SET_SRC (x), reg_set_block);
break;
}
case CLOBBER:
break;
default:
{
char *fmt = GET_RTX_FORMAT (code);
int i, j;
for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
{
if (fmt[i] == 'e')
mark_use (XEXP (x, i), reg_set_block);
else if (fmt[i] == 'E')
for (j = XVECLEN (x, i) - 1; j >= 0; j--)
mark_use (XVECEXP (x, i, j), reg_set_block);
}
break;
}
}
}
int
remove_dead_before_cse ()
{
rtx *reg_set_block, last, last_call, insn, set;
int in_libcall = 0;
/* This pass should run just once, after rtl generation. */
if (! sh_flag_remove_dead_before_cse
|| rtx_equal_function_value_matters
|| reload_completed)
return;
sh_flag_remove_dead_before_cse = 0;
reg_set_block = (rtx *)alloca (max_reg_num () * sizeof (rtx));
bzero ((char *)reg_set_block, max_reg_num () * sizeof (rtx));
last_call = last = get_last_insn ();
for (insn = last; insn; insn = PREV_INSN (insn))
{
if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
continue;
if (GET_CODE (insn) == JUMP_INSN)
{
last_call = last = insn;
continue;
}
set = single_set (insn);
/* Don't delete parts of libcalls, since that would confuse cse, loop
and flow. */
if (find_reg_note (insn, REG_RETVAL, NULL_RTX))
in_libcall = 1;
else if (in_libcall)
{
if (find_reg_note (insn, REG_LIBCALL, NULL_RTX))
in_libcall = 0;
}
else if (set && GET_CODE (SET_DEST (set)) == REG)
{
int regno = REGNO (SET_DEST (set));
rtx ref_insn = (regno < FIRST_PSEUDO_REGISTER && call_used_regs[regno]
? last_call
: last);
if (reg_set_block[regno] == ref_insn
&& (regno >= FIRST_PSEUDO_REGISTER
|| HARD_REGNO_NREGS (regno, GET_MODE (SET_DEST (set))) == 1)
&& (GET_CODE (insn) != CALL_INSN || CONST_CALL_P (insn)))
{
PUT_CODE (insn, NOTE);
NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
NOTE_SOURCE_FILE (insn) = 0;
continue;
}
else
reg_set_block[REGNO (SET_DEST (set))] = ref_insn;
}
if (GET_CODE (insn) == CALL_INSN)
{
last_call = insn;
mark_use (CALL_INSN_FUNCTION_USAGE (insn), reg_set_block);
}
mark_use (PATTERN (insn), reg_set_block);
}
return 0;
}
/* Definitions of target machine for GNU compiler for Hitachi Super-H.
Copyright (C) 1993, 1994, 1995, 1996, 1997 Free Software Foundation, Inc.
Copyright (C) 1993-1998 Free Software Foundation, Inc.
Contributed by Steve Chamberlain (sac@cygnus.com).
Improved by Jim Wilson (wilson@cygnus.com).
......@@ -43,7 +43,10 @@ extern int code_for_indirect_jump_scratch;
%{m2:-D__sh2__} \
%{m3:-D__sh3__} \
%{m3e:-D__SH3E__} \
%{!m1:%{!m2:%{!m3:%{!m3e:-D__sh1__}}}}"
%{m4-single-only:-D__SH4_SINGLE_ONLY__} \
%{m4-single:-D__SH4_SINGLE__} \
%{m4:-D__SH4__} \
%{!m1:%{!m2:%{!m3:%{!m3e:%{!m4:%{!m4-single:%{!m4-single-only:-D__sh1__}}}}}}}"
#define CPP_PREDEFINES "-D__sh__ -Acpu(sh) -Amachine(sh)"
......@@ -54,19 +57,28 @@ extern int code_for_indirect_jump_scratch;
/* We can not debug without a frame pointer. */
/* #define CAN_DEBUG_WITHOUT_FP */
#define CONDITIONAL_REGISTER_USAGE \
if (! TARGET_SH3E) \
{ \
int regno; \
for (regno = FIRST_FP_REG; regno <= LAST_FP_REG; regno++) \
fixed_regs[regno] = call_used_regs[regno] = 1; \
fixed_regs[FPUL_REG] = call_used_regs[FPUL_REG] = 1; \
} \
/* Hitachi saves and restores mac registers on call. */ \
if (TARGET_HITACHI) \
{ \
call_used_regs[MACH_REG] = 0; \
call_used_regs[MACL_REG] = 0; \
#define CONDITIONAL_REGISTER_USAGE \
if (! TARGET_SH4 || ! TARGET_FMOVD) \
{ \
int regno; \
for (regno = FIRST_XD_REG; regno <= LAST_XD_REG; regno++) \
fixed_regs[regno] = call_used_regs[regno] = 1; \
if (! TARGET_SH4) \
{ \
if (! TARGET_SH3E) \
{ \
int regno; \
for (regno = FIRST_FP_REG; regno <= LAST_FP_REG; regno++) \
fixed_regs[regno] = call_used_regs[regno] = 1; \
fixed_regs[FPUL_REG] = call_used_regs[FPUL_REG] = 1; \
} \
} \
} \
/* Hitachi saves and restores mac registers on call. */ \
if (TARGET_HITACHI) \
{ \
call_used_regs[MACH_REG] = 0; \
call_used_regs[MACL_REG] = 0; \
}
/* ??? Need to write documentation for all SH options and add it to the
......@@ -81,6 +93,10 @@ extern int target_flags;
#define SH2_BIT (1<<9)
#define SH3_BIT (1<<10)
#define SH3E_BIT (1<<11)
#define HARD_SH4_BIT (1<<5)
#define FPU_SINGLE_BIT (1<<7)
#define SH4_BIT (1<<12)
#define FMOVD_BIT (1<<4)
#define SPACE_BIT (1<<13)
#define BIGTABLE_BIT (1<<14)
#define RELAX_BIT (1<<15)
......@@ -107,6 +123,27 @@ extern int target_flags;
/* Nonzero if we should generate code using type 3E insns. */
#define TARGET_SH3E (target_flags & SH3E_BIT)
/* Nonzero if the cache line size is 32. */
#define TARGET_CACHE32 (target_flags & HARD_SH4_BIT)
/* Nonzero if we schedule for a superscalar implementation. */
#define TARGET_SUPERSCALAR (target_flags & HARD_SH4_BIT)
/* Nonzero if the target has separate instruction and data caches. */
#define TARGET_HARWARD (target_flags & HARD_SH4_BIT)
/* Nonzero if compiling for SH4 hardware (to be used for insn costs etc.) */
#define TARGET_HARD_SH4 (target_flags & HARD_SH4_BIT)
/* Nonzero if the default precision of th FPU is single */
#define TARGET_FPU_SINGLE (target_flags & FPU_SINGLE_BIT)
/* Nonzero if we should generate code using type 4 insns. */
#define TARGET_SH4 (target_flags & SH4_BIT)
/* Nonzero if we should generate fmovd. */
#define TARGET_FMOVD (target_flags & FMOVD_BIT)
/* Nonzero if we respect NANs. */
#define TARGET_IEEE (target_flags & IEEE_BIT)
......@@ -137,10 +174,14 @@ extern int target_flags;
{ {"1", SH1_BIT}, \
{"2", SH2_BIT}, \
{"3", SH3_BIT|SH2_BIT}, \
{"3e", SH3E_BIT|SH3_BIT|SH2_BIT}, \
{"3e", SH3E_BIT|SH3_BIT|SH2_BIT|FPU_SINGLE_BIT}, \
{"4-single-only", SH3E_BIT|SH3_BIT|SH2_BIT|SH3E_BIT|HARD_SH4_BIT|FPU_SINGLE_BIT}, \
{"4-single", SH4_BIT|SH3E_BIT|SH3_BIT|SH2_BIT|HARD_SH4_BIT|FPU_SINGLE_BIT},\
{"4", SH4_BIT|SH3E_BIT|SH3_BIT|SH2_BIT|HARD_SH4_BIT}, \
{"b", -LITTLE_ENDIAN_BIT}, \
{"bigtable", BIGTABLE_BIT}, \
{"dalign", DALIGN_BIT}, \
{"fmovd", FMOVD_BIT}, \
{"hitachi", HITACHI_BIT}, \
{"ieee", IEEE_BIT}, \
{"isize", ISIZE_BIT}, \
......@@ -160,26 +201,58 @@ extern int target_flags;
#define OPTIMIZATION_OPTIONS(LEVEL,SIZE) \
do { \
if (LEVEL) \
flag_omit_frame_pointer = -1; \
if (LEVEL) \
sh_flag_remove_dead_before_cse = 1; \
if (SIZE) \
target_flags |= SPACE_BIT; \
} while (0)
#define ASSEMBLER_DIALECT 0 /* will allow to distinguish b[tf].s and b[tf]/s . */
#define OVERRIDE_OPTIONS \
do { \
sh_cpu = CPU_SH1; \
if (TARGET_SH2) \
sh_cpu = CPU_SH2; \
if (TARGET_SH3) \
sh_cpu = CPU_SH3; \
if (TARGET_SH3E) \
sh_cpu = CPU_SH3E; \
\
/* Never run scheduling before reload, since that can \
break global alloc, and generates slower code anyway due \
to the pressure on R0. */ \
flag_schedule_insns = 0; \
sh_addr_diff_vec_mode = TARGET_BIGTABLE ? SImode : HImode; \
#define ASSEMBLER_DIALECT assembler_dialect
extern int assembler_dialect;
#define OVERRIDE_OPTIONS \
do { \
sh_cpu = CPU_SH1; \
assembler_dialect = 0; \
if (TARGET_SH2) \
sh_cpu = CPU_SH2; \
if (TARGET_SH3) \
sh_cpu = CPU_SH3; \
if (TARGET_SH3E) \
sh_cpu = CPU_SH3E; \
if (TARGET_SH4) \
{ \
assembler_dialect = 1; \
sh_cpu = CPU_SH4; \
} \
if (! TARGET_SH4 || ! TARGET_FMOVD) \
{ \
/* Prevent usage of explicit register names for variables \
for registers not present / not addressable in the \
target architecture. */ \
int regno; \
for (regno = (TARGET_SH3E) ? 17 : 0; \
regno <= 24; regno++) \
fp_reg_names[regno][0] = 0; \
} \
if (flag_omit_frame_pointer < 0) \
/* The debugging information is sufficient, \
but gdb doesn't implement this yet */ \
if (0) \
flag_omit_frame_pointer \
= (PREFERRED_DEBUGGING_TYPE == DWARF_DEBUG \
|| PREFERRED_DEBUGGING_TYPE == DWARF2_DEBUG); \
else \
flag_omit_frame_pointer = 0; \
\
/* Never run scheduling before reload, since that can \
break global alloc, and generates slower code anyway due \
to the pressure on R0. */ \
flag_schedule_insns = 0; \
sh_addr_diff_vec_mode = TARGET_BIGTABLE ? SImode : HImode; \
} while (0)
/* Target machine storage layout. */
......@@ -233,7 +306,7 @@ do { \
/* The log (base 2) of the cache line size, in bytes. Processors prior to
SH3 have no actual cache, but they fetch code in chunks of 4 bytes. */
#define CACHE_LOG (TARGET_SH3 ? 4 : 2)
#define CACHE_LOG (TARGET_CACHE32 ? 5 : TARGET_SH3 ? 4 : 2)
/* Allocation boundary (in *bits*) for the code of a function.
32 bit alignment is faster, because instructions are always fetched as a
......@@ -279,7 +352,7 @@ do { \
barrier_align (LABEL_AFTER_BARRIER)
#define LOOP_ALIGN(A_LABEL) \
((! optimize || TARGET_SMALLCODE) ? 0 : 2)
((! optimize || TARGET_HARWARD || TARGET_SMALLCODE) ? 0 : 2)
#define LABEL_ALIGN(A_LABEL) \
( \
......@@ -341,8 +414,11 @@ do { \
#define RAP_REG 23
#define FIRST_FP_REG 24
#define LAST_FP_REG 39
#define FIRST_XD_REG 40
#define LAST_XD_REG 47
#define FPSCR_REG 48
#define FIRST_PSEUDO_REGISTER 40
#define FIRST_PSEUDO_REGISTER 49
/* 1 for registers that have pervasive standard uses
and are not available for the register allocator.
......@@ -361,6 +437,9 @@ do { \
0, 0, 0, 0, \
0, 0, 0, 0, \
0, 0, 0, 0, \
0, 0, 0, 0, \
0, 0, 0, 0, \
1, \
}
/* 1 for registers not available across function calls.
......@@ -381,6 +460,9 @@ do { \
1, 1, 1, 1, \
1, 1, 1, 1, \
0, 0, 0, 0, \
1, 1, 1, 1, \
1, 1, 0, 0, \
1, \
}
/* Return number of consecutive hard regs needed starting at reg REGNO
......@@ -388,20 +470,39 @@ do { \
This is ordinarily the length in words of a value of mode MODE
but can be less for certain modes in special long registers.
On the SH regs are UNITS_PER_WORD bits wide. */
On the SH all but the XD regs are UNITS_PER_WORD bits wide. */
#define HARD_REGNO_NREGS(REGNO, MODE) \
(((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD))
((REGNO) >= FIRST_XD_REG && (REGNO) <= LAST_XD_REG \
? (GET_MODE_SIZE (MODE) / (2 * UNITS_PER_WORD)) \
: ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)) \
/* Value is 1 if hard register REGNO can hold a value of machine-mode MODE.
We can allow any mode in any general register. The special registers
only allow SImode. Don't allow any mode in the PR. */
/* We cannot hold DCmode values in the XD registers because alter_reg
handles subregs of them incorrectly. We could work around this by
spacing the XD registers like the DR registers, but this would require
additional memory in every compilation to hold larger register vectors.
We could hold SFmode / SCmode values in XD registers, but that
would require a tertiary reload when reloading from / to memory,
and a secondary reload to reload from / to general regs; that
seems to be a loosing proposition. */
#define HARD_REGNO_MODE_OK(REGNO, MODE) \
(SPECIAL_REG (REGNO) ? (MODE) == SImode \
: (REGNO) == FPUL_REG ? (MODE) == SImode || (MODE) == SFmode \
: (REGNO) >= FIRST_FP_REG && (REGNO) <= LAST_FP_REG ? (MODE) == SFmode \
: (REGNO) >= FIRST_FP_REG && (REGNO) <= LAST_FP_REG && (MODE) == SFmode \
? 1 \
: (REGNO) >= FIRST_FP_REG && (REGNO) <= LAST_FP_REG \
? ((MODE) == SFmode \
|| (TARGET_SH3E && (MODE) == SCmode) \
|| (((TARGET_SH4 && (MODE) == DFmode) || (MODE) == DCmode) \
&& (((REGNO) - FIRST_FP_REG) & 1) == 0)) \
: (REGNO) >= FIRST_XD_REG && (REGNO) <= LAST_XD_REG \
? (MODE) == DFmode \
: (REGNO) == PR_REG ? 0 \
: (REGNO) == FPSCR_REG ? (MODE) == PSImode \
: 1)
/* Value is 1 if it is a good idea to tie two pseudo registers
......@@ -541,6 +642,8 @@ enum reg_class
GENERAL_REGS,
FP0_REGS,
FP_REGS,
DF_REGS,
FPSCR_REGS,
GENERAL_FP_REGS,
ALL_REGS,
LIM_REG_CLASSES
......@@ -560,6 +663,8 @@ enum reg_class
"GENERAL_REGS", \
"FP0_REGS", \
"FP_REGS", \
"DF_REGS", \
"FPSCR_REGS", \
"GENERAL_FP_REGS", \
"ALL_REGS", \
}
......@@ -579,8 +684,10 @@ enum reg_class
{ 0x0081FFFF, 0x00000000 }, /* GENERAL_REGS */ \
{ 0x01000000, 0x00000000 }, /* FP0_REGS */ \
{ 0xFF000000, 0x000000FF }, /* FP_REGS */ \
{ 0xFF81FFFF, 0x000000FF }, /* GENERAL_FP_REGS */ \
{ 0xFFFFFFFF, 0x000000FF }, /* ALL_REGS */ \
{ 0xFF000000, 0x0000FFFF }, /* DF_REGS */ \
{ 0x00000000, 0x00010000 }, /* FPSCR_REGS */ \
{ 0xFF81FFFF, 0x0000FFFF }, /* GENERAL_FP_REGS */ \
{ 0xFFFFFFFF, 0x0001FFFF }, /* ALL_REGS */ \
}
/* The same information, inverted:
......@@ -603,6 +710,7 @@ extern int regno_reg_class[];
spilled or used otherwise, we better have the FP_REGS allocated first. */
#define REG_ALLOC_ORDER \
{ 25,26,27,28,29,30,31,24,32,33,34,35,36,37,38,39, \
40,41,42,43,44,45,46,47,48, \
1,2,3,7,6,5,4,0,8,9,10,11,12,13,14, \
22,15,16,17,18,19,20,21,23 }
......@@ -657,7 +765,8 @@ extern enum reg_class reg_class_from_letter[];
#define PREFERRED_RELOAD_CLASS(X, CLASS) (CLASS)
#define SECONDARY_OUTPUT_RELOAD_CLASS(CLASS,MODE,X) \
((((((CLASS) == FP_REGS || (CLASS) == FP0_REGS) \
((((((CLASS) == FP_REGS || (CLASS) == FP0_REGS \
|| (CLASS) == DF_REGS) \
&& (GET_CODE (X) == REG && REGNO (X) <= AP_REG)) \
|| (((CLASS) == GENERAL_REGS || (CLASS) == R0_REGS) \
&& GET_CODE (X) == REG \
......@@ -666,7 +775,7 @@ extern enum reg_class reg_class_from_letter[];
? FPUL_REGS \
: ((CLASS) == FPUL_REGS \
&& (GET_CODE (X) == MEM \
|| GET_CODE (X) == REG && REGNO (X) >= FIRST_PSEUDO_REGISTER))\
|| (GET_CODE (X) == REG && REGNO (X) >= FIRST_PSEUDO_REGISTER)))\
? GENERAL_REGS \
: (((CLASS) == MAC_REGS || (CLASS) == PR_REGS) \
&& GET_CODE (X) == REG && REGNO (X) > 15 \
......@@ -674,10 +783,19 @@ extern enum reg_class reg_class_from_letter[];
? GENERAL_REGS : NO_REGS)
#define SECONDARY_INPUT_RELOAD_CLASS(CLASS,MODE,X) \
((((CLASS) == FP_REGS || (CLASS) == FP0_REGS) \
((((CLASS) == FP_REGS || (CLASS) == FP0_REGS || (CLASS) == DF_REGS) \
&& immediate_operand ((X), (MODE)) \
&& ! (fp_zero_operand (X) || fp_one_operand (X))) \
? R0_REGS : SECONDARY_OUTPUT_RELOAD_CLASS((CLASS),(MODE),(X)))
&& ! ((fp_zero_operand (X) || fp_one_operand (X)) && (MODE) == SFmode))\
? R0_REGS \
: CLASS == FPUL_REGS && immediate_operand ((X), (MODE)) \
? (GET_CODE (X) == CONST_INT && CONST_OK_FOR_I (INTVAL (X)) \
? GENERAL_REGS \
: R0_REGS) \
: (CLASS == FPSCR_REGS \
&& ((GET_CODE (X) == REG && REGNO (X) >= FIRST_PSEUDO_REGISTER) \
|| GET_CODE (X) == MEM && GET_CODE (XEXP ((X), 0)) == PLUS)) \
? GENERAL_REGS \
: SECONDARY_OUTPUT_RELOAD_CLASS((CLASS),(MODE),(X)))
/* Return the maximum number of consecutive registers
needed to represent mode MODE in a register of class CLASS.
......@@ -685,6 +803,11 @@ extern enum reg_class reg_class_from_letter[];
On SH this is the size of MODE in words. */
#define CLASS_MAX_NREGS(CLASS, MODE) \
((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
/* If defined, gives a class of registers that cannot be used as the
operand of a SUBREG that changes the size of the object. */
#define CLASS_CANNOT_CHANGE_SIZE DF_REGS
/* Stack layout; function entry, exit and calling. */
......@@ -694,6 +817,9 @@ extern enum reg_class reg_class_from_letter[];
#define NPARM_REGS(MODE) \
(TARGET_SH3E && (MODE) == SFmode \
? 8 \
: TARGET_SH4 && (GET_MODE_CLASS (MODE) == MODE_FLOAT \
|| GET_MODE_CLASS (MODE) == MODE_COMPLEX_FLOAT) \
? 8 \
: 4)
#define FIRST_PARM_REG 4
......@@ -752,25 +878,48 @@ extern enum reg_class reg_class_from_letter[];
#define BASE_RETURN_VALUE_REG(MODE) \
((TARGET_SH3E && ((MODE) == SFmode)) \
? FIRST_FP_RET_REG \
: TARGET_SH3E && (MODE) == SCmode \
? FIRST_FP_RET_REG \
: (TARGET_SH4 \
&& ((MODE) == DFmode || (MODE) == SFmode \
|| (MODE) == DCmode || (MODE) == SCmode )) \
? FIRST_FP_RET_REG \
: FIRST_RET_REG)
#define BASE_ARG_REG(MODE) \
((TARGET_SH3E && ((MODE) == SFmode)) \
? FIRST_FP_PARM_REG \
: TARGET_SH4 && (GET_MODE_CLASS (MODE) == MODE_FLOAT \
|| GET_MODE_CLASS (MODE) == MODE_COMPLEX_FLOAT)\
? FIRST_FP_PARM_REG \
: FIRST_PARM_REG)
/* Define how to find the value returned by a function.
VALTYPE is the data type of the value (as a tree).
If the precise function being called is known, FUNC is its FUNCTION_DECL;
otherwise, FUNC is 0. */
#define FUNCTION_VALUE(VALTYPE, FUNC) \
LIBCALL_VALUE (TYPE_MODE (VALTYPE))
otherwise, FUNC is 0.
For the SH, this is like LIBCALL_VALUE, except that we must change the
mode like PROMOTE_MODE does.
??? PROMOTE_MODE is ignored for non-scalar types. The set of types
tested here has to be kept in sync with the one in explow.c:promote_mode. */
#define FUNCTION_VALUE(VALTYPE, FUNC) \
gen_rtx (REG, \
((GET_MODE_CLASS (TYPE_MODE (VALTYPE)) == MODE_INT \
&& GET_MODE_SIZE (TYPE_MODE (VALTYPE)) < UNITS_PER_WORD \
&& (TREE_CODE (VALTYPE) == INTEGER_TYPE \
|| TREE_CODE (VALTYPE) == ENUMERAL_TYPE \
|| TREE_CODE (VALTYPE) == BOOLEAN_TYPE \
|| TREE_CODE (VALTYPE) == CHAR_TYPE \
|| TREE_CODE (VALTYPE) == REAL_TYPE \
|| TREE_CODE (VALTYPE) == OFFSET_TYPE)) \
? SImode : TYPE_MODE (VALTYPE)), \
BASE_RETURN_VALUE_REG (TYPE_MODE (VALTYPE)))
/* Define how to find the value returned by a library function
assuming the value has mode MODE. */
#define LIBCALL_VALUE(MODE) \
gen_rtx (REG, (MODE), BASE_RETURN_VALUE_REG (MODE));
gen_rtx (REG, (MODE), BASE_RETURN_VALUE_REG (MODE))
/* 1 if N is a possible register number for a function value. */
#define FUNCTION_VALUE_REGNO_P(REGNO) \
......@@ -801,7 +950,11 @@ struct sh_args {
#define CUMULATIVE_ARGS struct sh_args
#define GET_SH_ARG_CLASS(MODE) \
((TARGET_SH3E && ((MODE) == SFmode)) ? SH_ARG_FLOAT : SH_ARG_INT)
((TARGET_SH3E && (MODE) == SFmode) \
? SH_ARG_FLOAT \
: TARGET_SH4 && (GET_MODE_CLASS (MODE) == MODE_FLOAT \
|| GET_MODE_CLASS (MODE) == MODE_COMPLEX_FLOAT) \
? SH_ARG_FLOAT : SH_ARG_INT)
#define ROUND_ADVANCE(SIZE) \
(((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
......@@ -813,7 +966,9 @@ struct sh_args {
round doubles to even regs when asked to explicitly. */
#define ROUND_REG(CUM, MODE) \
((TARGET_ALIGN_DOUBLE \
(((TARGET_ALIGN_DOUBLE \
|| (TARGET_SH4 && ((MODE) == DFmode || (MODE) == DCmode) \
&& (CUM).arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (MODE)))\
&& GET_MODE_UNIT_SIZE ((MODE)) > UNITS_PER_WORD) \
? ((CUM).arg_count[(int) GET_SH_ARG_CLASS (MODE)] \
+ ((CUM).arg_count[(int) GET_SH_ARG_CLASS (MODE)] & 1)) \
......@@ -838,11 +993,12 @@ struct sh_args {
available.) */
#define FUNCTION_ARG_ADVANCE(CUM, MODE, TYPE, NAMED) \
((CUM).arg_count[(int) GET_SH_ARG_CLASS (MODE)] = \
(ROUND_REG ((CUM), (MODE)) \
+ ((MODE) != BLKmode \
? ROUND_ADVANCE (GET_MODE_SIZE (MODE)) \
: ROUND_ADVANCE (int_size_in_bytes (TYPE)))))
if (! TARGET_SH4 || PASS_IN_REG_P ((CUM), (MODE), (TYPE))) \
((CUM).arg_count[(int) GET_SH_ARG_CLASS (MODE)] \
= (ROUND_REG ((CUM), (MODE)) \
+ ((MODE) == BLKmode \
? ROUND_ADVANCE (int_size_in_bytes (TYPE)) \
: ROUND_ADVANCE (GET_MODE_SIZE (MODE)))))
/* Return boolean indicating arg of mode MODE will be passed in a reg.
This macro is only used in this file. */
......@@ -883,7 +1039,9 @@ extern int current_function_varargs;
((PASS_IN_REG_P ((CUM), (MODE), (TYPE)) \
&& ((NAMED) || TARGET_SH3E || ! current_function_varargs)) \
? gen_rtx (REG, (MODE), \
(BASE_ARG_REG (MODE) + ROUND_REG ((CUM), (MODE)))) \
((BASE_ARG_REG (MODE) + ROUND_REG ((CUM), (MODE))) \
^ ((MODE) == SFmode && TARGET_SH4 \
&& TARGET_LITTLE_ENDIAN != 0))) \
: 0)
/* For an arg passed partly in registers and partly in memory,
......@@ -894,8 +1052,9 @@ extern int current_function_varargs;
#define FUNCTION_ARG_PARTIAL_NREGS(CUM, MODE, TYPE, NAMED) \
((PASS_IN_REG_P ((CUM), (MODE), (TYPE)) \
&& ! TARGET_SH4 \
&& (ROUND_REG ((CUM), (MODE)) \
+ (MODE != BLKmode \
+ ((MODE) != BLKmode \
? ROUND_ADVANCE (GET_MODE_SIZE (MODE)) \
: ROUND_ADVANCE (int_size_in_bytes (TYPE))) \
- NPARM_REGS (MODE) > 0)) \
......@@ -955,7 +1114,7 @@ extern int current_function_anonymous_args;
/* Alignment required for a trampoline in bits . */
#define TRAMPOLINE_ALIGNMENT \
((CACHE_LOG < 3 || TARGET_SMALLCODE) ? 32 : 64) \
((CACHE_LOG < 3 || TARGET_SMALLCODE && ! TARGET_HARWARD) ? 32 : 64)
/* Emit RTL insns to initialize the variable parts of a trampoline.
FNADDR is an RTX for the address of the function's pure code.
......@@ -971,6 +1130,8 @@ extern int current_function_anonymous_args;
(CXT)); \
emit_move_insn (gen_rtx (MEM, SImode, plus_constant ((TRAMP), 12)), \
(FNADDR)); \
if (TARGET_HARWARD) \
emit_insn (gen_ic_invalidate_line (TRAMP)); \
}
/* A C expression whose value is RTL representing the value of the return
......@@ -1086,7 +1247,10 @@ extern struct rtx_def *sh_builtin_saveregs ();
#define MODE_DISP_OK_4(X,MODE) \
(GET_MODE_SIZE (MODE) == 4 && (unsigned) INTVAL (X) < 64 \
&& ! (INTVAL (X) & 3) && ! (TARGET_SH3E && (MODE) == SFmode))
#define MODE_DISP_OK_8(X,MODE) ((GET_MODE_SIZE(MODE)==8) && ((unsigned)INTVAL(X)<60) && (!(INTVAL(X) &3)))
#define MODE_DISP_OK_8(X,MODE) \
((GET_MODE_SIZE(MODE)==8) && ((unsigned)INTVAL(X)<60) \
&& ! (INTVAL(X) & 3) && ! (TARGET_SH4 && (MODE) == DFmode))
#define BASE_REGISTER_RTX_P(X) \
((GET_CODE (X) == REG && REG_OK_FOR_BASE_P (X)) \
......@@ -1141,13 +1305,15 @@ extern struct rtx_def *sh_builtin_saveregs ();
else if ((GET_CODE (X) == POST_INC || GET_CODE (X) == PRE_DEC) \
&& BASE_REGISTER_RTX_P (XEXP ((X), 0))) \
goto LABEL; \
else if (GET_CODE (X) == PLUS && MODE != PSImode) \
else if (GET_CODE (X) == PLUS \
&& ((MODE) != PSImode || reload_completed)) \
{ \
rtx xop0 = XEXP ((X), 0); \
rtx xop1 = XEXP ((X), 1); \
if (GET_MODE_SIZE (MODE) <= 8 && BASE_REGISTER_RTX_P (xop0)) \
GO_IF_LEGITIMATE_INDEX ((MODE), xop1, LABEL); \
if (GET_MODE_SIZE (MODE) <= 4) \
if (GET_MODE_SIZE (MODE) <= 4 \
|| TARGET_SH4 && TARGET_FMOVD && MODE == DFmode) \
{ \
if (BASE_REGISTER_RTX_P (xop1) && INDEX_REGISTER_RTX_P (xop0))\
goto LABEL; \
......@@ -1181,6 +1347,7 @@ extern struct rtx_def *sh_builtin_saveregs ();
|| GET_MODE_SIZE (MODE) == 8) \
&& GET_CODE (XEXP ((X), 1)) == CONST_INT \
&& BASE_REGISTER_RTX_P (XEXP ((X), 0)) \
&& ! (TARGET_SH4 && (MODE) == DFmode) \
&& ! (TARGET_SH3E && (MODE) == SFmode)) \
{ \
rtx index_rtx = XEXP ((X), 1); \
......@@ -1228,12 +1395,21 @@ extern struct rtx_def *sh_builtin_saveregs ();
&& (GET_MODE_SIZE (MODE) == 4 || GET_MODE_SIZE (MODE) == 8) \
&& GET_CODE (XEXP (X, 1)) == CONST_INT \
&& BASE_REGISTER_RTX_P (XEXP (X, 0)) \
&& ! (TARGET_SH3E && MODE == SFmode)) \
&& ! (TARGET_SH4 && (MODE) == DFmode) \
&& ! ((MODE) == PSImode && (TYPE) == RELOAD_FOR_INPUT_ADDRESS)) \
{ \
rtx index_rtx = XEXP (X, 1); \
HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base; \
rtx sum; \
\
if (TARGET_SH3E && MODE == SFmode) \
{ \
X = copy_rtx (X); \
push_reload (index_rtx, NULL_RTX, &XEXP (X, 1), NULL_PTR, \
INDEX_REG_CLASS, Pmode, VOIDmode, 0, 0, (OPNUM), \
(TYPE)); \
goto WIN; \
} \
/* Instead of offset_base 128..131 use 124..127, so that \
simple add suffices. */ \
if (offset > 127) \
......@@ -1315,7 +1491,7 @@ extern struct rtx_def *sh_builtin_saveregs ();
/* Since the SH3e has only `float' support, it is desirable to make all
floating point types equivalent to `float'. */
#define DOUBLE_TYPE_SIZE (TARGET_SH3E ? 32 : 64)
#define DOUBLE_TYPE_SIZE ((TARGET_SH3E && ! TARGET_SH4) ? 32 : 64)
/* 'char' is signed by default. */
#define DEFAULT_SIGNED_CHAR 1
......@@ -1407,6 +1583,11 @@ extern struct rtx_def *sh_builtin_saveregs ();
return 10;
#define RTX_COSTS(X, CODE, OUTER_CODE) \
case PLUS: \
return (COSTS_N_INSNS (1) \
+ rtx_cost (XEXP ((X), 0), PLUS) \
+ (rtx_equal_p (XEXP ((X), 0), XEXP ((X), 1))\
? 0 : rtx_cost (XEXP ((X), 1), PLUS)));\
case AND: \
return COSTS_N_INSNS (andcosts (X)); \
case MULT: \
......@@ -1414,7 +1595,13 @@ extern struct rtx_def *sh_builtin_saveregs ();
case ASHIFT: \
case ASHIFTRT: \
case LSHIFTRT: \
return COSTS_N_INSNS (shiftcosts (X)) ; \
/* Add one extra unit for the matching constraint. \
Otherwise loop strength reduction would think that\
a shift with different sourc and destination is \
as cheap as adding a constant to a register. */ \
return (COSTS_N_INSNS (shiftcosts (X)) \
+ rtx_cost (XEXP ((X), 0), (CODE)) \
+ 1); \
case DIV: \
case UDIV: \
case MOD: \
......@@ -1462,11 +1649,29 @@ extern struct rtx_def *sh_builtin_saveregs ();
/* Compute extra cost of moving data between one register class
and another. */
/* Regclass always uses 2 for moves in the same register class;
If SECONDARY*_RELOAD_CLASS says something about the src/dst pair,
it uses this information. Hence, the general register <-> floating point
register information here is not used for SFmode. */
#define REGISTER_MOVE_COST(SRCCLASS, DSTCLASS) \
((DSTCLASS) == PR_REG ? 10 \
: (((DSTCLASS) == FP_REGS && (SRCCLASS) == GENERAL_REGS) \
|| ((DSTCLASS) == GENERAL_REGS && (SRCCLASS) == FP_REGS)) ? 4 \
: 1)
((((DSTCLASS) == T_REGS) || ((DSTCLASS) == PR_REG)) ? 10 \
: ((((DSTCLASS) == FP0_REGS || (DSTCLASS) == FP_REGS || (DSTCLASS) == DF_REGS) \
&& ((SRCCLASS) == GENERAL_REGS || (SRCCLASS) == R0_REGS)) \
|| (((DSTCLASS) == GENERAL_REGS || (DSTCLASS) == R0_REGS) \
&& ((SRCCLASS) == FP0_REGS || (SRCCLASS) == FP_REGS \
|| (SRCCLASS) == DF_REGS))) \
? TARGET_FMOVD ? 8 : 12 \
: (((DSTCLASS) == FPUL_REGS \
&& ((SRCCLASS) == GENERAL_REGS || (SRCCLASS) == R0_REGS)) \
|| (SRCCLASS == FPUL_REGS \
&& ((DSTCLASS) == GENERAL_REGS || (DSTCLASS) == R0_REGS))) \
? 5 \
: (((DSTCLASS) == FPUL_REGS \
&& ((SRCCLASS) == PR_REGS || (SRCCLASS) == MAC_REGS)) \
|| ((SRCCLASS) == FPUL_REGS \
&& ((DSTCLASS) == PR_REGS || (DSTCLASS) == MAC_REGS))) \
? 7 \
: 2)
/* ??? Perhaps make MEMORY_MOVE_COST depend on compiler option? This
would be so that people would slow memory systems could generate
......@@ -1573,13 +1778,32 @@ dtors_section() \
the Real framepointer; it can also be used as a normal general register.
Note that the name `fp' is horribly misleading since `fp' is in fact only
the argument-and-return-context pointer. */
extern char fp_reg_names[][5];
#define REGISTER_NAMES \
{ \
"r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", \
"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", \
"ap", "pr", "t", "gbr", "mach","macl", fp_reg_names[16], "rap", \
fp_reg_names[0], fp_reg_names[1] , fp_reg_names[2], fp_reg_names[3], \
fp_reg_names[4], fp_reg_names[5], fp_reg_names[6], fp_reg_names[7], \
fp_reg_names[8], fp_reg_names[9], fp_reg_names[10], fp_reg_names[11], \
fp_reg_names[12], fp_reg_names[13], fp_reg_names[14], fp_reg_names[15], \
fp_reg_names[17], fp_reg_names[18], fp_reg_names[19], fp_reg_names[20], \
fp_reg_names[21], fp_reg_names[22], fp_reg_names[23], fp_reg_names[24], \
"fpscr", \
}
#define DEBUG_REGISTER_NAMES \
{ \
"r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", \
"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", \
"ap", "pr", "t", "gbr", "mach","macl", "fpul","rap", \
"fr0","fr1","fr2", "fr3", "fr4", "fr5", "fr6", "fr7", \
"fr8","fr9","fr10","fr11","fr12","fr13","fr14","fr15",\
"xd0","xd2","xd4", "xd6", "xd8", "xd10","xd12","xd14", \
"fpscr", \
}
/* DBX register number for a given compiler register number. */
......@@ -1773,7 +1997,8 @@ enum processor_type {
PROCESSOR_SH1,
PROCESSOR_SH2,
PROCESSOR_SH3,
PROCESSOR_SH3E
PROCESSOR_SH3E,
PROCESSOR_SH4
};
#define sh_cpu_attr ((enum attr_cpu)sh_cpu)
......@@ -1837,6 +2062,11 @@ extern int sh_valid_machine_decl_attribute ();
#define VALID_MACHINE_DECL_ATTRIBUTE(DECL, ATTRIBUTES, IDENTIFIER, ARGS) \
sh_valid_machine_decl_attribute (DECL, ATTRIBUTES, IDENTIFIER, ARGS)
extern int sh_flag_remove_dead_before_cse;
extern int rtx_equal_function_value_matters;
extern struct rtx_def *fpscr_rtx;
extern struct rtx_def *get_fpscr_rtx ();
#define MOVE_RATIO (TARGET_SMALLCODE ? 2 : 16)
......@@ -1860,10 +2090,16 @@ sh_valid_machine_decl_attribute (DECL, ATTRIBUTES, IDENTIFIER, ARGS)
{"arith_operand", {SUBREG, REG, CONST_INT}}, \
{"arith_reg_operand", {SUBREG, REG}}, \
{"arith_reg_or_0_operand", {SUBREG, REG, CONST_INT}}, \
{"binary_float_operator", {PLUS, MULT}}, \
{"braf_label_ref_operand", {LABEL_REF}}, \
{"commutative_float_operator", {PLUS, MULT}}, \
{"fp_arith_reg_operand", {SUBREG, REG}}, \
{"fp_extended_operand", {SUBREG, REG, FLOAT_EXTEND}}, \
{"fpscr_operand", {REG}}, \
{"general_movsrc_operand", {SUBREG, REG, CONST_INT, MEM}}, \
{"general_movdst_operand", {SUBREG, REG, CONST_INT, MEM}}, \
{"logical_operand", {SUBREG, REG, CONST_INT}}, \
{"noncommutative_float_operator", {MINUS, DIV}}, \
{"register_operand", {SUBREG, REG}},
/* Define this macro if it is advisable to hold scalars in registers
......@@ -1929,7 +2165,7 @@ do { \
using their arguments pretty quickly. \
Assume a four cycle delay before they are needed. */ \
if (! reg_set_p (reg, dep_insn)) \
cost -= 4; \
cost -= TARGET_SUPERSCALAR ? 40 : 4; \
} \
/* Adjust load_si / pcload_si type insns latency. Use the known \
nominal latency and form of the insn to speed up the check. */ \
......@@ -1939,9 +2175,14 @@ do { \
it's actually a move insn. */ \
&& general_movsrc_operand (SET_SRC (PATTERN (dep_insn)), SImode))\
cost = 2; \
else if (cost == 30 \
&& GET_CODE (PATTERN (dep_insn)) == SET \
&& GET_MODE (SET_SRC (PATTERN (dep_insn))) == SImode) \
cost = 20; \
} while (0) \
/* For the sake of libgcc2.c, indicate target supports atexit. */
#define HAVE_ATEXIT
#define SH_DYNAMIC_SHIFT_COST (TARGET_SH3 ? (TARGET_SMALLCODE ? 1 : 2) : 20)
#define SH_DYNAMIC_SHIFT_COST \
(TARGET_HARD_SH4 ? 1 : TARGET_SH3 ? (TARGET_SMALLCODE ? 1 : 2) : 20)
......@@ -70,13 +70,20 @@
;; Target CPU.
(define_attr "cpu"
"sh1,sh2,sh3,sh3e"
"sh1,sh2,sh3,sh3e,sh4"
(const (symbol_ref "sh_cpu_attr")))
(define_attr "endian" "big,little"
(const (if_then_else (symbol_ref "TARGET_LITTLE_ENDIAN")
(const_string "little") (const_string "big"))))
(define_attr "fmovd" "yes,no"
(const (if_then_else (symbol_ref "TARGET_FMOVD")
(const_string "yes") (const_string "no"))))
;; issues/clock
(define_attr "issues" "1,2"
(const (if_then_else (symbol_ref "TARGET_SUPERSCALAR") (const_string "2") (const_string "1"))))
;; cbranch conditional branch instructions
;; jump unconditional jumps
;; arith ordinary arithmetic
......@@ -101,10 +108,12 @@
;; fp floating point
;; fdiv floating point divide (or square root)
;; gp_fpul move between general purpose register and fpul
;; dfp_arith, dfp_cmp,dfp_conv
;; dfdiv double precision floating point divide (or square root)
;; nil no-op move, will be deleted.
(define_attr "type"
"cbranch,jump,jump_ind,arith,arith3,arith3b,dyn_shift,other,load,load_si,store,move,fmove,smpy,dmpy,return,pload,pstore,pcload,pcload_si,rte,sfunc,call,fp,fdiv,gp_fpul,nil"
"cbranch,jump,jump_ind,arith,arith3,arith3b,dyn_shift,other,load,load_si,store,move,fmove,smpy,dmpy,return,pload,pstore,pcload,pcload_si,rte,sfunc,call,fp,fdiv,dfp_arith,dfp_cmp,dfp_conv,dfdiv,gp_fpul,nil"
(const_string "other"))
; If a conditional branch destination is within -252..258 bytes away
......@@ -252,34 +261,216 @@
;; We only do this for SImode loads of general registers, to make the work
;; for ADJUST_COST easier.
(define_function_unit "memory" 1 0
(eq_attr "type" "load_si,pcload_si")
(and (eq_attr "issues" "1")
(eq_attr "type" "load_si,pcload_si"))
3 2)
(define_function_unit "memory" 1 0
(eq_attr "type" "load,pcload,pload,store,pstore")
(and (eq_attr "issues" "1")
(eq_attr "type" "load,pcload,pload,store,pstore"))
2 2)
(define_function_unit "int" 1 0
(eq_attr "type" "arith3,arith3b") 3 3)
(and (eq_attr "issues" "1") (eq_attr "type" "arith3,arith3b")) 3 3)
(define_function_unit "int" 1 0
(eq_attr "type" "dyn_shift") 2 2)
(and (eq_attr "issues" "1") (eq_attr "type" "dyn_shift")) 2 2)
(define_function_unit "int" 1 0
(eq_attr "type" "arith,arith3b,dyn_shift") 2 2)
(and (eq_attr "issues" "1") (eq_attr "type" "!arith3,arith3b,dyn_shift")) 1 1)
;; ??? These are approximations.
(define_function_unit "mpy" 1 0 (eq_attr "type" "smpy") 2 2)
(define_function_unit "mpy" 1 0 (eq_attr "type" "dmpy") 3 3)
(define_function_unit "mpy" 1 0
(and (eq_attr "issues" "1") (eq_attr "type" "smpy")) 2 2)
(define_function_unit "mpy" 1 0
(and (eq_attr "issues" "1") (eq_attr "type" "dmpy")) 3 3)
(define_function_unit "fp" 1 0
(and (eq_attr "issues" "1") (eq_attr "type" "fp,fmove")) 2 1)
(define_function_unit "fp" 1 0
(and (eq_attr "issues" "1") (eq_attr "type" "fdiv")) 13 12)
;; SH4 scheduling
;; The SH4 is a dual-issue implementation, thus we have to multiply all
;; costs by at least two.
;; There will be single increments of the modeled that don't correspond
;; to the actual target ;; whenever two insns to be issued depend one a
;; single resource, and the scheduler picks to be the first one.
;; If we multiplied the costs just by two, just two of these single
;; increments would amount to an actual cycle. By picking a larger
;; factor, we can ameliorate the effect; However, we then have to make sure
;; that only two insns are modeled as issued per actual cycle.
;; Moreover, we need a way to specify the latency of insns that don't
;; use an actual function unit.
;; We use an 'issue' function unit to do that, and a cost factor of 10.
(define_function_unit "issue" 2 0
(and (eq_attr "issues" "2") (eq_attr "type" "!nil,arith3"))
10 10)
(define_function_unit "issue" 2 0
(and (eq_attr "issues" "2") (eq_attr "type" "arith3"))
30 30)
;; There is no point in providing exact scheduling information about branches,
;; because they are at the starts / ends of basic blocks anyways.
;; Some insns cannot be issued before/after another insn in the same cycle,
;; irrespective of the type of the other insn.
;; default is dual-issue, but can't be paired with an insn that
;; uses multiple function units.
(define_function_unit "single_issue" 1 0
(and (eq_attr "issues" "2")
(eq_attr "type" "!smpy,dmpy,pload,pstore,dfp_cmp,gp_fpul,call,sfunc,arith3,arith3b"))
1 10
[(eq_attr "type" "smpy,dmpy,pload,pstore,dfp_cmp,gp_fpul")])
(define_function_unit "single_issue" 1 0
(and (eq_attr "issues" "2")
(eq_attr "type" "smpy,dmpy,pload,pstore,dfp_cmp,gp_fpul"))
10 10
[(const_int 1)])
;; arith3 insns are always pairable at the start, but not inecessarily at
;; the end; however, there doesn;t seem to be a way to express that.
(define_function_unit "single_issue" 1 0
(and (eq_attr "issues" "2")
(eq_attr "type" "arith3"))
30 20
[(const_int 1)])
;; arith3b insn are pairable at the end and have latency that prevents pairing
;; with the following branch, but we don't want this latency be respected;
;; When the following branch is immediately adjacent, we can redirect the
;; internal branch, which is likly to be a larger win.
(define_function_unit "single_issue" 1 0
(and (eq_attr "issues" "2")
(eq_attr "type" "arith3b"))
20 20
[(const_int 1)])
;; calls introduce a longisch delay that is likely to flush the pipelines.
(define_function_unit "single_issue" 1 0
(and (eq_attr "issues" "2")
(eq_attr "type" "call,sfunc"))
160 160
[(eq_attr "type" "!call") (eq_attr "type" "call")])
;; Load and store instructions have no alignment peculiarities for the SH4,
;; but they use the load-store unit, which they share with the fmove type
;; insns (fldi[01]; fmov frn,frm; flds; fsts; fabs; fneg) .
;; Loads have a latency of two.
;; However, call insns can only paired with a preceding insn, and have
;; a delay slot, so that we want two more insns to be scheduled between the
;; load of the function address and the call. This is equivalent to a
;; latency of three.
;; We cannot use a conflict list for this, because we need to distinguish
;; between the actual call address and the function arguments.
;; ADJUST_COST can only properly handle reductions of the cost, so we
;; use a latency of three here, which gets multiplied by 10 to yield 30.
;; We only do this for SImode loads of general registers, to make the work
;; for ADJUST_COST easier.
(define_function_unit "fp" 1 0 (eq_attr "type" "fp,fmove") 2 1)
(define_function_unit "fp" 1 0 (eq_attr "type" "fdiv") 13 12)
;; When specifying different latencies for different insns using the
;; the same function unit, genattrtab.c assumes a 'FIFO constraint'
;; so that the blockage is at least READY-COST (E) + 1 - READY-COST (C)
;; for an executing insn E and a candidate insn C.
;; Therefore, we define three different function units for load_store:
;; load_store, load and load_si.
(define_function_unit "load_si" 1 0
(and (eq_attr "issues" "2")
(eq_attr "type" "load_si,pcload_si")) 30 10)
(define_function_unit "load" 1 0
(and (eq_attr "issues" "2")
(eq_attr "type" "load,pcload,pload")) 20 10)
(define_function_unit "load_store" 1 0
(and (eq_attr "issues" "2")
(eq_attr "type" "load_si,pcload_si,load,pcload,pload,store,pstore,fmove"))
10 10)
(define_function_unit "int" 1 0
(and (eq_attr "issues" "2") (eq_attr "type" "arith,dyn_shift")) 10 10)
;; Again, we have to pretend a lower latency for the "int" unit to avoid a
;; spurious FIFO constraint; the multiply instructions use the "int"
;; unit actually only for two cycles.
(define_function_unit "int" 1 0
(and (eq_attr "issues" "2") (eq_attr "type" "smpy,dmpy")) 20 20)
;; We use a fictous "mpy" unit to express the actual latency.
(define_function_unit "mpy" 1 0
(and (eq_attr "issues" "2") (eq_attr "type" "smpy,dmpy")) 40 20)
;; Again, we have to pretend a lower latency for the "int" unit to avoid a
;; spurious FIFO constraint.
(define_function_unit "int" 1 0
(and (eq_attr "issues" "2") (eq_attr "type" "gp_fpul")) 10 10)
;; We use a fictous "gp_fpul" unit to express the actual latency.
(define_function_unit "gp_fpul" 1 0
(and (eq_attr "issues" "2") (eq_attr "type" "gp_fpul")) 20 10)
;; ??? multiply uses the floating point unit, but with a two cycle delay.
;; Thus, a simple single-precision fp operation could finish if issued in
;; the very next cycle, but stalls when issued two or three cycles later.
;; Similarily, a divide / sqrt can work without stalls if issued in
;; the very next cycle, while it would have to block if issued two or
;; three cycles later.
;; There is no way to model this with gcc's function units. This problem is
;; actually mentioned in md.texi. Tackling this problem requires first that
;; it is possible to speak about the target in an open discussion.
;;
;; However, simple double-precision operations always conflict.
(define_function_unit "fp" 1 0
(and (eq_attr "issues" "2") (eq_attr "type" "smpy,dmpy")) 40 40
[(eq_attr "type" "dfp_cmp,dfp_conv,dfp_arith")])
;; The "fp" unit is for pipeline stages F1 and F2.
(define_function_unit "fp" 1 0
(and (eq_attr "issues" "2") (eq_attr "type" "fp")) 30 10)
;; Again, we have to pretend a lower latency for the "fp" unit to avoid a
;; spurious FIFO constraint; the bulk of the fdiv type insns executes in
;; the F3 stage.
(define_function_unit "fp" 1 0
(and (eq_attr "issues" "2") (eq_attr "type" "fdiv")) 30 10)
;; The "fdiv" function unit models the aggregate effect of the F1, F2 and F3
;; pipeline stages on the pipelining of fdiv/fsqrt insns.
;; We also use it to give the actual latency here.
;; fsqrt is actually one cycle faster than fdiv (and the value used here),
;; but that will hardly matter in practice for scheduling.
(define_function_unit "fdiv" 1 0
(and (eq_attr "issues" "2") (eq_attr "type" "fdiv")) 120 100)
;; There is again a late use of the "fp" unit by [d]fdiv type insns
;; that we can't express.
(define_function_unit "fp" 1 0
(and (eq_attr "issues" "2") (eq_attr "type" "dfp_cmp,dfp_conv")) 40 20)
(define_function_unit "fp" 1 0
(and (eq_attr "issues" "2") (eq_attr "type" "dfp_arith")) 80 60)
(define_function_unit "fp" 1 0
(and (eq_attr "issues" "2") (eq_attr "type" "dfdiv")) 230 10)
(define_function_unit "fdiv" 1 0
(and (eq_attr "issues" "2") (eq_attr "type" "dfdiv")) 230 210)
; Definitions for filling branch delay slots.
(define_attr "needs_delay_slot" "yes,no" (const_string "no"))
(define_attr "hit_stack" "yes,no" (const_string "no"))
;; ??? This should be (nil) instead of (const_int 0)
(define_attr "hit_stack" "yes,no"
(cond [(eq (symbol_ref "find_regno_note (insn, REG_INC, 15)") (const_int 0))
(const_string "no")]
(const_string "yes")))
(define_attr "interrupt_function" "no,yes"
(const (symbol_ref "pragma_interrupt")))
......@@ -668,7 +859,42 @@
(clobber (reg:SI 17))
(clobber (reg:SI 4))
(use (match_operand:SI 1 "arith_reg_operand" "r"))]
""
"! TARGET_SH4"
"jsr @%1%#"
[(set_attr "type" "sfunc")
(set_attr "needs_delay_slot" "yes")])
(define_insn "udivsi3_i4"
[(set (match_operand:SI 0 "register_operand" "=y")
(udiv:SI (reg:SI 4) (reg:SI 5)))
(clobber (reg:SI 17))
(clobber (reg:DF 24))
(clobber (reg:DF 26))
(clobber (reg:DF 28))
(clobber (reg:SI 0))
(clobber (reg:SI 1))
(clobber (reg:SI 4))
(clobber (reg:SI 5))
(use (reg:PSI 48))
(use (match_operand:SI 1 "arith_reg_operand" "r"))]
"TARGET_SH4 && ! TARGET_FPU_SINGLE"
"jsr @%1%#"
[(set_attr "type" "sfunc")
(set_attr "needs_delay_slot" "yes")])
(define_insn "udivsi3_i4_single"
[(set (match_operand:SI 0 "register_operand" "=y")
(udiv:SI (reg:SI 4) (reg:SI 5)))
(clobber (reg:SI 17))
(clobber (reg:DF 24))
(clobber (reg:DF 26))
(clobber (reg:DF 28))
(clobber (reg:SI 0))
(clobber (reg:SI 1))
(clobber (reg:SI 4))
(clobber (reg:SI 5))
(use (match_operand:SI 1 "arith_reg_operand" "r"))]
"TARGET_HARD_SH4 && TARGET_FPU_SINGLE"
"jsr @%1%#"
[(set_attr "type" "sfunc")
(set_attr "needs_delay_slot" "yes")])
......@@ -685,7 +911,22 @@
(clobber (reg:SI 4))
(use (match_dup 3))])]
""
"operands[3] = gen_reg_rtx(SImode);")
"
{
operands[3] = gen_reg_rtx(SImode);
if (TARGET_HARD_SH4)
{
emit_move_insn (gen_rtx (REG, SImode, 4), operands[1]);
emit_move_insn (gen_rtx (REG, SImode, 5), operands[2]);
emit_move_insn (operands[3],
gen_rtx_SYMBOL_REF (SImode, \"__udivsi3_i4\"));
if (TARGET_FPU_SINGLE)
emit_insn (gen_udivsi3_i4_single (operands[0], operands[3]));
else
emit_insn (gen_udivsi3_i4 (operands[0], operands[3]));
DONE;
}
}")
(define_insn ""
[(set (match_operand:SI 0 "register_operand" "=z")
......@@ -696,7 +937,33 @@
(clobber (reg:SI 2))
(clobber (reg:SI 3))
(use (match_operand:SI 1 "arith_reg_operand" "r"))]
""
"! TARGET_SH4"
"jsr @%1%#"
[(set_attr "type" "sfunc")
(set_attr "needs_delay_slot" "yes")])
(define_insn "divsi3_i4"
[(set (match_operand:SI 0 "register_operand" "=y")
(div:SI (reg:SI 4) (reg:SI 5)))
(clobber (reg:SI 17))
(clobber (reg:DF 24))
(clobber (reg:DF 26))
(use (reg:PSI 48))
(use (match_operand:SI 1 "arith_reg_operand" "r"))]
"TARGET_SH4 && ! TARGET_FPU_SINGLE"
"jsr @%1%#"
[(set_attr "type" "sfunc")
(set_attr "needs_delay_slot" "yes")])
(define_insn "divsi3_i4_single"
[(set (match_operand:SI 0 "register_operand" "=y")
(div:SI (reg:SI 4) (reg:SI 5)))
(clobber (reg:SI 17))
(clobber (reg:DF 24))
(clobber (reg:DF 26))
(clobber (reg:SI 2))
(use (match_operand:SI 1 "arith_reg_operand" "r"))]
"TARGET_HARD_SH4 && TARGET_FPU_SINGLE"
"jsr @%1%#"
[(set_attr "type" "sfunc")
(set_attr "needs_delay_slot" "yes")])
......@@ -715,7 +982,22 @@
(clobber (reg:SI 3))
(use (match_dup 3))])]
""
"operands[3] = gen_reg_rtx(SImode);")
"
{
operands[3] = gen_reg_rtx(SImode);
if (TARGET_HARD_SH4)
{
emit_move_insn (gen_rtx (REG, SImode, 4), operands[1]);
emit_move_insn (gen_rtx (REG, SImode, 5), operands[2]);
emit_move_insn (operands[3],
gen_rtx_SYMBOL_REF (SImode, \"__sdivsi3_i4\"));
if (TARGET_FPU_SINGLE)
emit_insn (gen_divsi3_i4_single (operands[0], operands[3]));
else
emit_insn (gen_divsi3_i4 (operands[0], operands[3]));
DONE;
}
}")
;; -------------------------------------------------------------------------
;; Multiplication instructions
......@@ -782,7 +1064,6 @@
(define_expand "mulsi3_call"
[(set (reg:SI 4) (match_operand:SI 1 "general_operand" ""))
(set (reg:SI 5) (match_operand:SI 2 "general_operand" ""))
(set (match_dup 3) (symbol_ref:SI "__mulsi3"))
(parallel[(set (match_operand:SI 0 "register_operand" "")
(mult:SI (reg:SI 4)
(reg:SI 5)))
......@@ -792,9 +1073,9 @@
(clobber (reg:SI 3))
(clobber (reg:SI 2))
(clobber (reg:SI 1))
(use (match_dup 3))])]
(use (match_operand:SI 3 "register_operand" ""))])]
""
"operands[3] = gen_reg_rtx(SImode);")
"")
(define_insn "mul_l"
[(set (reg:SI 21)
......@@ -813,13 +1094,29 @@
""
"
{
rtx first, last;
if (!TARGET_SH2)
{
FAIL;
/* ??? Does this give worse or better code? */
emit_insn (gen_mulsi3_call (operands[0], operands[1], operands[2]));
DONE;
/* The address must be set outside the libcall,
since it goes into a pseudo. */
rtx addr = force_reg (SImode, gen_rtx_SYMBOL_REF (SImode, \"__mulsi3\"));
rtx insns = gen_mulsi3_call (operands[0], operands[1], operands[2], addr);
first = XVECEXP (insns, 0, 0);
last = XVECEXP (insns, 0, XVECLEN (insns, 0) - 1);
emit_insn (insns);
}
else
{
rtx macl = gen_rtx_REG (SImode, MACL_REG);
first = emit_insn (gen_mul_l (operands[1], operands[2]));
last = emit_insn (gen_movsi_i ((operands[0]), macl));
}
/* Wrap the sequence in REG_LIBCALL / REG_RETVAL notes so that loop
invariant code motion can move it. */
REG_NOTES (first) = gen_rtx_INSN_LIST (REG_LIBCALL, last, REG_NOTES (first));
REG_NOTES (last) = gen_rtx_INSN_LIST (REG_RETVAL, first, REG_NOTES (last));
DONE;
}")
(define_insn "mulsidi3_i"
......@@ -1767,50 +2064,65 @@
;; define push and pop so it is easy for sh.c
(define_insn "push"
(define_expand "push"
[(set (mem:SI (pre_dec:SI (reg:SI 15)))
(match_operand:SI 0 "register_operand" "r,l,x"))]
""
"@
mov.l %0,@-r15
sts.l %0,@-r15
sts.l %0,@-r15"
[(set_attr "type" "store,pstore,store")
(set_attr "hit_stack" "yes")])
"")
(define_insn "pop"
(define_expand "pop"
[(set (match_operand:SI 0 "register_operand" "=r,l,x")
(mem:SI (post_inc:SI (reg:SI 15))))]
""
"@
mov.l @r15+,%0
lds.l @r15+,%0
lds.l @r15+,%0"
[(set_attr "type" "load,pload,load")
(set_attr "hit_stack" "yes")])
"")
(define_expand "push_e"
[(parallel [(set (mem:SF (pre_dec:SI (reg:SI 15)))
(match_operand:SF 0 "" ""))
(use (reg:PSI 48))
(clobber (scratch:SI))])]
""
"")
(define_insn "push_e"
[(set (mem:SF (pre_dec:SI (reg:SI 15)))
(match_operand:SF 0 "register_operand" "r,f,y"))]
(define_insn "push_fpul"
[(set (mem:SF (pre_dec:SI (reg:SI 15))) (reg:SF 22))]
"TARGET_SH3E"
"@
mov.l %0,@-r15
fmov.s %0,@-r15
sts.l %0,@-r15"
"sts.l fpul,@-r15"
[(set_attr "type" "store")
(set_attr "hit_stack" "yes")])
(define_insn "pop_e"
[(set (match_operand:SF 0 "register_operand" "=r,f,y")
(mem:SF (post_inc:SI (reg:SI 15))))]
;; DFmode pushes for sh4 require a lot of what is defined for movdf_i4,
;; so use that.
(define_expand "push_4"
[(parallel [(set (mem:DF (pre_dec:SI (reg:SI 15))) (match_operand:DF 0 "" ""))
(use (reg:PSI 48))
(clobber (scratch:SI))])]
""
"")
(define_expand "pop_e"
[(parallel [(set (match_operand:SF 0 "" "")
(mem:SF (post_inc:SI (reg:SI 15))))
(use (reg:PSI 48))
(clobber (scratch:SI))])]
""
"")
(define_insn "pop_fpul"
[(set (reg:SF 22) (mem:SF (post_inc:SI (reg:SI 15))))]
"TARGET_SH3E"
"@
mov.l @r15+,%0
fmov.s @r15+,%0
lds.l @r15+,%0"
"lds.l @r15+,fpul"
[(set_attr "type" "load")
(set_attr "hit_stack" "yes")])
(define_expand "pop_4"
[(parallel [(set (match_operand:DF 0 "" "")
(mem:DF (post_inc:SI (reg:SI 15))))
(use (reg:PSI 48))
(clobber (scratch:SI))])]
""
"")
;; These two patterns can happen as the result of optimization, when
;; comparisons get simplified to a move of zero or 1 into the T reg.
;; They don't disappear completely, because the T reg is a fixed hard reg.
......@@ -1829,7 +2141,7 @@
;; of a pseudo-reg into the T reg
(define_insn "movsi_i"
[(set (match_operand:SI 0 "general_movdst_operand" "=t,r,r,r,r,r,m,<,<,xl,x,l,r")
(match_operand:SI 1 "general_movsrc_operand" "r,Q,rI,m,xl,t,r,x,l,r,>,>,i"))]
(match_operand:SI 1 "general_movsrc_operand" "r,Q,rI,mr,xl,t,r,x,l,r,>,>,i"))]
"
! TARGET_SH3E
&& (register_operand (operands[0], SImode)
......@@ -1856,8 +2168,8 @@
;; ??? This allows moves from macl to fpul to be recognized, but these moves
;; will require a reload.
(define_insn "movsi_ie"
[(set (match_operand:SI 0 "general_movdst_operand" "=r,r,t,r,r,r,m,<,<,xl,x,l,r,y,r,y")
(match_operand:SI 1 "general_movsrc_operand" "Q,rI,r,m,xl,t,r,x,l,r,>,>,i,r,y,y"))]
[(set (match_operand:SI 0 "general_movdst_operand" "=r,r,t,r,r,r,m,<,<,xl,x,l,y,r,y,r,y")
(match_operand:SI 1 "general_movsrc_operand" "Q,rI,r,mr,xl,t,r,x,l,r,>,>,>,i,r,y,y"))]
"TARGET_SH3E
&& (register_operand (operands[0], SImode)
|| register_operand (operands[1], SImode))"
......@@ -1874,16 +2186,17 @@
lds %1,%0
lds.l %1,%0
lds.l %1,%0
lds.l %1,%0
fake %1,%0
lds %1,%0
sts %1,%0
! move optimized away"
[(set_attr "type" "pcload_si,move,*,load_si,move,move,store,store,pstore,move,load,pload,pcload_si,gp_fpul,gp_fpul,nil")
(set_attr "length" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,0")])
[(set_attr "type" "pcload_si,move,*,load_si,move,move,store,store,pstore,move,load,pload,load,pcload_si,gp_fpul,gp_fpul,nil")
(set_attr "length" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,0")])
(define_insn "movsi_i_lowpart"
[(set (strict_low_part (match_operand:SI 0 "general_movdst_operand" "=r,r,r,r,r,m,r"))
(match_operand:SI 1 "general_movsrc_operand" "Q,rI,m,xl,t,r,i"))]
(match_operand:SI 1 "general_movsrc_operand" "Q,rI,mr,xl,t,r,i"))]
"register_operand (operands[0], SImode)
|| register_operand (operands[1], SImode)"
"@
......@@ -1901,6 +2214,30 @@
""
"{ if (prepare_move_operands (operands, SImode)) DONE; }")
(define_expand "ic_invalidate_line"
[(parallel [(unspec_volatile [(match_operand:SI 0 "register_operand" "+r")
(match_dup 1)] 12)
(clobber (scratch:SI))])]
"TARGET_HARD_SH4"
"
{
operands[0] = force_reg (Pmode, operands[0]);
operands[1] = force_reg (Pmode, GEN_INT (0xf0000008));
}")
;; The address %0 is assumed to be 4-aligned at least. Thus, by ORing
;; 0xf0000008, we get the low-oder bits *1*00 (binary), ;; which fits
;; the requirement *0*00 for associative address writes. The alignment of
;; %0 implies that its least significant bit is cleared,
;; thus we clear the V bit of a matching entry if there is one.
(define_insn "ic_invalidate_line_i"
[(unspec_volatile [(match_operand:SI 0 "register_operand" "r,r")
(match_operand:SI 1 "register_operand" "r,r")] 12)
(clobber (match_scratch:SI 2 "=&r,1"))]
"TARGET_HARD_SH4"
"ocbwb\\t@%0\;extu.w\\t%0,%2\;or\\t%r1,%r2\;mov.l\\t%0,@%2"
[(set_attr "length" "8")])
(define_insn "movqi_i"
[(set (match_operand:QI 0 "general_movdst_operand" "=r,r,m,r,r,l")
(match_operand:QI 1 "general_movsrc_operand" "ri,m,r,t,l,r"))]
......@@ -2014,12 +2351,330 @@
(define_insn "movdf_k"
[(set (match_operand:DF 0 "general_movdst_operand" "=r,r,r,m")
(match_operand:DF 1 "general_movsrc_operand" "r,FQ,m,r"))]
"arith_reg_operand (operands[0], DFmode)
|| arith_reg_operand (operands[1], DFmode)"
"(! TARGET_SH4 || reload_completed
/* ??? We provide some insn so that direct_{load,store}[DFmode] get set */
|| GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 3
|| GET_CODE (operands[1]) == REG && REGNO (operands[1]) == 3)
&& (arith_reg_operand (operands[0], DFmode)
|| arith_reg_operand (operands[1], DFmode))"
"* return output_movedouble (insn, operands, DFmode);"
[(set_attr "length" "4")
(set_attr "type" "move,pcload,load,store")])
;; All alternatives of movdf_i4 are split for ! TARGET_FMOVD.
;; However, the d/F/c/z alternative cannot be split directly; it is converted
;; with special code in machine_dependent_reorg into a load of the R0_REG and
;; the d/m/c/X alternative, which is split later into single-precision
;; instructions. And when not optimizing, no splits are done before fixing
;; up pcloads, so we need usable length information for that.
(define_insn "movdf_i4"
[(set (match_operand:DF 0 "general_movdst_operand" "=d,r,d,d,m,r,r,m,!??r,!???d")
(match_operand:DF 1 "general_movsrc_operand" "d,r,F,m,d,FQ,m,r,d,r"))
(use (match_operand:PSI 2 "fpscr_operand" "c,c,c,c,c,c,c,c,c,c"))
(clobber (match_scratch:SI 3 "=X,X,&z,X,X,X,X,X,X,X"))]
"TARGET_SH4
&& (arith_reg_operand (operands[0], DFmode)
|| arith_reg_operand (operands[1], DFmode))"
"@
fmov %1,%0
#
#
fmov.d %1,%0
fmov.d %1,%0
#
#
#
#
#"
[(set_attr_alternative "length"
[(if_then_else (eq_attr "fmovd" "yes") (const_int 2) (const_int 4))
(const_int 4)
(if_then_else (eq_attr "fmovd" "yes") (const_int 4) (const_int 6))
(if_then_else (eq_attr "fmovd" "yes") (const_int 2) (const_int 6))
(if_then_else (eq_attr "fmovd" "yes") (const_int 2) (const_int 6))
(const_int 4)
(const_int 8) (const_int 8) ;; these need only 8 bytes for @(r0,rn)
(const_int 8) (const_int 8)])
(set_attr "type" "fmove,move,pcload,load,store,pcload,load,store,load,load")])
;; Moving DFmode between fp/general registers through memory
;; (the top of the stack) is faster than moving through fpul even for
;; little endian. Because the type of an instruction is important for its
;; scheduling, it is beneficial to split these operations, rather than
;; emitting them in one single chunk, even if this will expose a stack
;; use that will prevent scheduling of other stack accesses beyond this
;; instruction.
(define_split
[(set (match_operand:DF 0 "register_operand" "")
(match_operand:DF 1 "register_operand" ""))
(use (match_operand:PSI 2 "fpscr_operand" "c"))
(clobber (match_scratch:SI 3 "=X"))]
"TARGET_SH4 && reload_completed
&& (true_regnum (operands[0]) < 16) != (true_regnum (operands[1]) < 16)"
[(const_int 0)]
"
{
rtx insn, tos;
tos = gen_rtx (MEM, DFmode, gen_rtx (PRE_DEC, Pmode, stack_pointer_rtx));
insn = emit_insn (gen_movdf_i4 (tos, operands[1], operands[2]));
REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_INC, stack_pointer_rtx, NULL_RTX);
tos = gen_rtx (MEM, DFmode, gen_rtx (POST_INC, Pmode, stack_pointer_rtx));
insn = emit_insn (gen_movdf_i4 (operands[0], tos, operands[2]));
REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_INC, stack_pointer_rtx, NULL_RTX);
DONE;
}")
;; local-alloc sometimes allocates scratch registers even when not required,
;; so we must be prepared to handle these.
;; Remove the use and clobber from a movdf_i4 so that we can use movdf_k.
(define_split
[(set (match_operand:DF 0 "general_movdst_operand" "")
(match_operand:DF 1 "general_movsrc_operand" ""))
(use (match_operand:PSI 2 "fpscr_operand" "c"))
(clobber (match_scratch:SI 3 "X"))]
"TARGET_SH4
&& reload_completed
&& true_regnum (operands[0]) < 16
&& true_regnum (operands[1]) < 16"
[(set (match_dup 0) (match_dup 1))]
"
{
/* If this was a reg <-> mem operation with base + index reg addressing,
we have to handle this in a special way. */
rtx mem = operands[0];
int store_p = 1;
if (! memory_operand (mem, DFmode))
{
mem = operands[1];
store_p = 0;
}
if (GET_CODE (mem) == SUBREG && SUBREG_WORD (mem) == 0)
mem = SUBREG_REG (mem);
if (GET_CODE (mem) == MEM)
{
rtx addr = XEXP (mem, 0);
if (GET_CODE (addr) == PLUS
&& GET_CODE (XEXP (addr, 0)) == REG
&& GET_CODE (XEXP (addr, 1)) == REG)
{
int offset;
rtx reg0 = gen_rtx (REG, Pmode, 0);
rtx regop = operands[store_p], word0 ,word1;
if (GET_CODE (regop) == SUBREG)
regop = alter_subreg (regop);
if (REGNO (XEXP (addr, 0)) == REGNO (XEXP (addr, 1)))
offset = 2;
else
offset = 4;
mem = copy_rtx (mem);
PUT_MODE (mem, SImode);
word0 = gen_rtx(SUBREG, SImode, regop, 0);
emit_insn (store_p
? gen_movsi_ie (mem, word0) : gen_movsi_ie (word0, mem));
emit_insn (gen_addsi3 (reg0, reg0, GEN_INT (offset)));
mem = copy_rtx (mem);
word1 = gen_rtx(SUBREG, SImode, regop, 1);
emit_insn (store_p
? gen_movsi_ie (mem, word1) : gen_movsi_ie (word1, mem));
emit_insn (gen_addsi3 (reg0, reg0, GEN_INT (-offset)));
DONE;
}
}
}")
;; Split away the clobber of r0 after machine_dependent_reorg has fixed pcloads.
(define_split
[(set (match_operand:DF 0 "register_operand" "")
(match_operand:DF 1 "memory_operand" ""))
(use (match_operand:PSI 2 "fpscr_operand" "c"))
(clobber (reg:SI 0))]
"TARGET_SH4 && reload_completed"
[(parallel [(set (match_dup 0) (match_dup 1))
(use (match_dup 2))
(clobber (scratch:SI))])]
"")
(define_expand "reload_indf"
[(parallel [(set (match_operand:DF 0 "register_operand" "=f")
(match_operand:DF 1 "immediate_operand" "FQ"))
(use (reg:PSI 48))
(clobber (match_operand:SI 2 "register_operand" "=&z"))])]
""
"")
(define_expand "reload_outdf"
[(parallel [(set (match_operand:DF 0 "register_operand" "=r,f")
(match_operand:DF 1 "register_operand" "af,r"))
(clobber (match_operand:SI 2 "register_operand" "=&y,y"))])]
""
"")
;; Simplify no-op moves.
(define_split
[(set (match_operand:SF 0 "register_operand" "")
(match_operand:SF 1 "register_operand" ""))
(use (match_operand:PSI 2 "fpscr_operand" ""))
(clobber (match_scratch:SI 3 "X"))]
"TARGET_SH3E && reload_completed
&& true_regnum (operands[0]) == true_regnum (operands[1])"
[(set (match_dup 0) (match_dup 0))]
"")
;; fmovd substitute post-reload splits
(define_split
[(set (match_operand:DF 0 "register_operand" "")
(match_operand:DF 1 "register_operand" ""))
(use (match_operand:PSI 2 "fpscr_operand" "c"))
(clobber (match_scratch:SI 3 "X"))]
"TARGET_SH4 && ! TARGET_FMOVD && reload_completed
&& true_regnum (operands[0]) >= FIRST_FP_REG
&& true_regnum (operands[1]) >= FIRST_FP_REG"
[(const_int 0)]
"
{
int dst = true_regnum (operands[0]), src = true_regnum (operands[1]);
emit_insn (gen_movsf_ie (gen_rtx (REG, SFmode, dst),
gen_rtx (REG, SFmode, src), operands[2]));
emit_insn (gen_movsf_ie (gen_rtx (REG, SFmode, dst + 1),
gen_rtx (REG, SFmode, src + 1), operands[2]));
DONE;
}")
(define_split
[(set (match_operand:DF 0 "register_operand" "")
(mem:DF (match_operand:SI 1 "register_operand" "")))
(use (match_operand:PSI 2 "fpscr_operand" "c"))
(clobber (match_scratch:SI 3 "X"))]
"TARGET_SH4 && ! TARGET_FMOVD && reload_completed
&& true_regnum (operands[0]) >= FIRST_FP_REG
&& find_regno_note (insn, REG_DEAD, true_regnum (operands[1]))"
[(const_int 0)]
"
{
int regno = true_regnum (operands[0]);
rtx insn;
rtx mem2 = gen_rtx (MEM, SFmode, gen_rtx (POST_INC, Pmode, operands[1]));
insn = emit_insn (gen_movsf_ie (gen_rtx (REG, SFmode,
regno + !! TARGET_LITTLE_ENDIAN),
mem2, operands[2]));
REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_INC, operands[1], NULL_RTX);
insn = emit_insn (gen_movsf_ie (gen_rtx (REG, SFmode,
regno + ! TARGET_LITTLE_ENDIAN),
gen_rtx (MEM, SFmode, operands[1]),
operands[2]));
DONE;
}")
(define_split
[(set (match_operand:DF 0 "register_operand" "")
(match_operand:DF 1 "memory_operand" ""))
(use (match_operand:PSI 2 "fpscr_operand" "c"))
(clobber (match_scratch:SI 3 "X"))]
"TARGET_SH4 && ! TARGET_FMOVD && reload_completed
&& true_regnum (operands[0]) >= FIRST_FP_REG"
[(const_int 0)]
"
{
int regno = true_regnum (operands[0]);
rtx addr, insn, adjust = NULL_RTX;
rtx mem2 = copy_rtx (operands[1]);
rtx reg0 = gen_rtx_REG (SFmode, regno + !! TARGET_LITTLE_ENDIAN);
rtx reg1 = gen_rtx_REG (SFmode, regno + ! TARGET_LITTLE_ENDIAN);
PUT_MODE (mem2, SFmode);
operands[1] = copy_rtx (mem2);
addr = XEXP (mem2, 0);
if (GET_CODE (addr) != POST_INC)
{
/* If we have to modify the stack pointer, the value that we have
read with post-increment might be modified by an interrupt,
so write it back. */
if (REGNO (addr) == STACK_POINTER_REGNUM)
adjust = gen_push_e (reg0);
else
adjust = gen_addsi3 (addr, addr, GEN_INT (-4));
XEXP (mem2, 0) = addr = gen_rtx_POST_INC (SImode, addr);
}
addr = XEXP (addr, 0);
insn = emit_insn (gen_movsf_ie (reg0, mem2, operands[2]));
REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_INC, addr, NULL_RTX);
insn = emit_insn (gen_movsf_ie (reg1, operands[1], operands[2]));
if (adjust)
emit_insn (adjust);
else
REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_INC, addr, NULL_RTX);
DONE;
}")
(define_split
[(set (match_operand:DF 0 "memory_operand" "")
(match_operand:DF 1 "register_operand" ""))
(use (match_operand:PSI 2 "fpscr_operand" "c"))
(clobber (match_scratch:SI 3 "X"))]
"TARGET_SH4 && ! TARGET_FMOVD && reload_completed
&& true_regnum (operands[1]) >= FIRST_FP_REG"
[(const_int 0)]
"
{
int regno = true_regnum (operands[1]);
rtx insn, addr, adjust = NULL_RTX;
operands[0] = copy_rtx (operands[0]);
PUT_MODE (operands[0], SFmode);
insn = emit_insn (gen_movsf_ie (operands[0],
gen_rtx (REG, SFmode,
regno + ! TARGET_LITTLE_ENDIAN),
operands[2]));
operands[0] = copy_rtx (operands[0]);
addr = XEXP (operands[0], 0);
if (GET_CODE (addr) != PRE_DEC)
{
adjust = gen_addsi3 (addr, addr, GEN_INT (4));
emit_insn_before (adjust, insn);
XEXP (operands[0], 0) = addr = gen_rtx (PRE_DEC, SImode, addr);
}
addr = XEXP (addr, 0);
if (! adjust)
REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_INC, addr, NULL_RTX);
insn = emit_insn (gen_movsf_ie (operands[0],
gen_rtx (REG, SFmode,
regno + !! TARGET_LITTLE_ENDIAN),
operands[2]));
REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_INC, addr, NULL_RTX);
DONE;
}")
;; The '&' for operand 2 is not really true, but push_secondary_reload
;; insists on it.
;; Operand 1 must accept FPUL_REGS in case fpul is reloaded to memory,
;; to avoid a bogus tertiary reload.
;; We need a tertiary reload when a floating point register is reloaded
;; to memory, so the predicate for operand 0 must accept this, while the
;; constraint of operand 1 must reject the secondary reload register.
;; Thus, the secondary reload register for this case has to be GENERAL_REGS,
;; too.
;; By having the predicate for operand 0 reject any register, we make
;; sure that the ordinary moves that just need an intermediate register
;; won't get a bogus tertiary reload.
;; We use tertiary_reload_operand instead of memory_operand here because
;; memory_operand rejects operands that are not directly addressible, e.g.:
;; (mem:SF (plus:SI (reg:SI 14 r14)
;; (const_int 132)))
(define_expand "reload_outsf"
[(parallel [(set (match_operand:SF 2 "register_operand" "=&r")
(match_operand:SF 1 "register_operand" "y"))
(clobber (scratch:SI))])
(parallel [(set (match_operand:SF 0 "tertiary_reload_operand" "=m")
(match_dup 2))
(clobber (scratch:SI))])]
""
"")
;; If the output is a register and the input is memory or a register, we have
;; to be careful and see which word needs to be loaded first.
......@@ -2129,14 +2784,26 @@
"
{
if (prepare_move_operands (operands, DFmode)) DONE;
if (TARGET_SH4)
{
emit_df_insn (gen_movdf_i4 (operands[0], operands[1], get_fpscr_rtx ()));
/* We need something to tag possible REG_LIBCALL notes on to. */
if (TARGET_FPU_SINGLE && rtx_equal_function_value_matters
&& GET_CODE (operands[0]) == REG)
emit_insn (gen_mov_nop (operands[0]));
DONE;
}
}")
(define_insn "movsf_i"
[(set (match_operand:SF 0 "general_movdst_operand" "=r,r,r,r,m,l,r")
(match_operand:SF 1 "general_movsrc_operand" "r,I,FQ,m,r,r,l"))]
(match_operand:SF 1 "general_movsrc_operand" "r,I,FQ,mr,r,r,l"))]
"
! TARGET_SH3E
(! TARGET_SH3E
/* ??? We provide some insn so that direct_{load,store}[SFmode] get set */
|| GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 3
|| GET_CODE (operands[1]) == REG && REGNO (operands[1]) == 3)
&& (arith_reg_operand (operands[0], SFmode)
|| arith_reg_operand (operands[1], SFmode))"
"@
......@@ -2156,8 +2823,9 @@
[(set (match_operand:SF 0 "general_movdst_operand"
"=f,r,f,f,fy,f,m,r,r,m,f,y,y,rf,r,y,y")
(match_operand:SF 1 "general_movsrc_operand"
"f,r,G,H,FQ,m,f,FQ,m,r,y,f,>,fr,y,r,y"))
(clobber (match_scratch:SI 2 "=X,X,X,X,&z,X,X,X,X,X,X,X,X,y,X,X,X"))]
"f,r,G,H,FQ,mf,f,FQ,mr,r,y,f,>,fr,y,r,y"))
(use (match_operand:PSI 2 "fpscr_operand" "c,c,c,c,c,c,c,c,c,c,c,c,c,c,c,c,c"))
(clobber (match_scratch:SI 3 "=X,X,X,X,&z,X,X,X,X,X,X,X,X,y,X,X,X"))]
"TARGET_SH3E
&& (arith_reg_operand (operands[0], SFmode)
......@@ -2181,16 +2849,19 @@
lds %1,%0
! move optimized away"
[(set_attr "type" "fmove,move,fmove,fmove,pcload,load,store,pcload,load,store,fmove,fmove,load,*,gp_fpul,gp_fpul,nil")
(set_attr "length" "*,*,*,*,4,*,*,*,*,*,2,2,2,*,2,2,0")])
(set_attr "length" "*,*,*,*,4,*,*,*,*,*,2,2,2,4,2,2,0")])
(define_split
[(set (match_operand:SF 0 "register_operand" "")
(match_operand:SF 1 "register_operand" ""))
(use (match_operand:PSI 2 "fpscr_operand" "c"))
(clobber (reg:SI 22))]
""
[(parallel [(set (reg:SF 22) (match_dup 1))
(use (match_dup 2))
(clobber (scratch:SI))])
(parallel [(set (match_dup 0) (reg:SF 22))
(use (match_dup 2))
(clobber (scratch:SI))])]
"")
......@@ -2204,17 +2875,63 @@
DONE;
if (TARGET_SH3E)
{
emit_insn (gen_movsf_ie (operands[0], operands[1]));
emit_sf_insn (gen_movsf_ie (operands[0], operands[1], get_fpscr_rtx ()));
/* We need something to tag possible REG_LIBCALL notes on to. */
if (! TARGET_FPU_SINGLE && rtx_equal_function_value_matters
&& GET_CODE (operands[0]) == REG)
emit_insn (gen_mov_nop (operands[0]));
DONE;
}
}")
(define_insn "mov_nop"
[(set (match_operand 0 "register_operand" "") (match_dup 0))]
"TARGET_SH3E"
""
[(set_attr "length" "0")
(set_attr "type" "nil")])
(define_expand "reload_insf"
[(parallel [(set (match_operand:SF 0 "register_operand" "=f")
(match_operand:SF 1 "immediate_operand" "FQ"))
(use (reg:PSI 48))
(clobber (match_operand:SI 2 "register_operand" "=&z"))])]
""
"")
(define_expand "reload_insi"
[(parallel [(set (match_operand:SF 0 "register_operand" "=y")
(match_operand:SF 1 "immediate_operand" "FQ"))
(clobber (match_operand:SI 2 "register_operand" "=&z"))])]
""
"")
(define_insn "*movsi_y"
[(set (match_operand:SI 0 "register_operand" "=y,y")
(match_operand:SI 1 "immediate_operand" "Qi,I"))
(clobber (match_scratch:SI 3 "=&z,r"))]
"TARGET_SH3E
&& (reload_in_progress || reload_completed)"
"#"
[(set_attr "length" "4")
(set_attr "type" "pcload,move")])
(define_split
[(set (match_operand:SI 0 "register_operand" "y")
(match_operand:SI 1 "immediate_operand" "I"))
(clobber (match_operand:SI 2 "register_operand" "r"))]
""
[(set (match_dup 2) (match_dup 1))
(set (match_dup 0) (match_dup 2))]
"")
(define_split
[(set (match_operand:SI 0 "register_operand" "y")
(match_operand:SI 1 "memory_operand" ">"))
(clobber (reg:SI 0))]
""
[(set (match_dup 0) (match_dup 1))]
"")
;; ------------------------------------------------------------------------
;; Define the real conditional branch instructions.
......@@ -2289,7 +3006,7 @@
""
"
{
if (GET_MODE (sh_compare_op0) == SFmode)
if (GET_MODE_CLASS (GET_MODE (sh_compare_op0)) == MODE_FLOAT)
{
rtx tmp = sh_compare_op0;
sh_compare_op0 = sh_compare_op1;
......@@ -2396,6 +3113,7 @@
(define_insn "calli"
[(call (mem:SI (match_operand:SI 0 "arith_reg_operand" "r"))
(match_operand 1 "" ""))
(use (reg:SI 48))
(clobber (reg:SI 17))]
""
"jsr @%0%#"
......@@ -2406,6 +3124,7 @@
[(set (match_operand 0 "" "=rf")
(call (mem:SI (match_operand:SI 1 "arith_reg_operand" "r"))
(match_operand 2 "" "")))
(use (reg:SI 48))
(clobber (reg:SI 17))]
""
"jsr @%1%#"
......@@ -2415,6 +3134,7 @@
(define_expand "call"
[(parallel [(call (mem:SI (match_operand 0 "arith_reg_operand" ""))
(match_operand 1 "" ""))
(use (reg:SI 48))
(clobber (reg:SI 17))])]
""
"operands[0] = force_reg (SImode, XEXP (operands[0], 0));")
......@@ -2423,6 +3143,7 @@
[(parallel [(set (match_operand 0 "arith_reg_operand" "")
(call (mem:SI (match_operand 1 "arith_reg_operand" ""))
(match_operand 2 "" "")))
(use (reg:SI 48))
(clobber (reg:SI 17))])]
""
"operands[1] = force_reg (SImode, XEXP (operands[1], 0));")
......@@ -2656,9 +3377,16 @@
}"
[(set_attr "length" "4")])
;; ??? This is not the proper place to invoke another compiler pass;
;; Alas, there is no proper place to put it.
;; ??? This is also an odd place for the call to emit_fpscr_use. It
;; would be all right if it were for an define_expand for return, but
;; that doesn't mix with emitting a prologue.
(define_insn "return"
[(return)]
"reload_completed"
"emit_fpscr_use (),
remove_dead_before_cse (),
reload_completed"
"%@ %#"
[(set_attr "type" "return")
(set_attr "needs_delay_slot" "yes")])
......@@ -2726,19 +3454,15 @@
""
"
{
if (GET_MODE (sh_compare_op0) == SFmode)
if (GET_MODE_CLASS (GET_MODE (sh_compare_op0)) == MODE_FLOAT)
{
if (TARGET_IEEE)
{
rtx t_reg = gen_rtx (REG, SImode, T_REG);
rtx lab = gen_label_rtx ();
emit_insn (gen_rtx (SET, VOIDmode, t_reg,
gen_rtx (EQ, SImode, sh_compare_op0,
sh_compare_op1)));
prepare_scc_operands (EQ);
emit_jump_insn (gen_branch_true (lab));
emit_insn (gen_rtx (SET, VOIDmode, t_reg,
gen_rtx (GT, SImode, sh_compare_op0,
sh_compare_op1)));
prepare_scc_operands (GT);
emit_label (lab);
emit_insn (gen_movt (operands[0]));
}
......@@ -2963,7 +3687,7 @@
(use (match_operand:SI 0 "arith_reg_operand" "r"))
(clobber (reg:SI 17))
(clobber (reg:SI 0))])]
""
"! TARGET_HARD_SH4"
"jsr @%0%#"
[(set_attr "type" "sfunc")
(set_attr "needs_delay_slot" "yes")])
......@@ -2978,7 +3702,38 @@
(clobber (reg:SI 5))
(clobber (reg:SI 6))
(clobber (reg:SI 0))])]
""
"! TARGET_HARD_SH4"
"jsr @%0%#"
[(set_attr "type" "sfunc")
(set_attr "needs_delay_slot" "yes")])
(define_insn "block_move_real_i4"
[(parallel [(set (mem:BLK (reg:SI 4))
(mem:BLK (reg:SI 5)))
(use (match_operand:SI 0 "arith_reg_operand" "r"))
(clobber (reg:SI 17))
(clobber (reg:SI 0))
(clobber (reg:SI 1))
(clobber (reg:SI 2))])]
"TARGET_HARD_SH4"
"jsr @%0%#"
[(set_attr "type" "sfunc")
(set_attr "needs_delay_slot" "yes")])
(define_insn "block_lump_real_i4"
[(parallel [(set (mem:BLK (reg:SI 4))
(mem:BLK (reg:SI 5)))
(use (match_operand:SI 0 "arith_reg_operand" "r"))
(use (reg:SI 6))
(clobber (reg:SI 17))
(clobber (reg:SI 4))
(clobber (reg:SI 5))
(clobber (reg:SI 6))
(clobber (reg:SI 0))
(clobber (reg:SI 1))
(clobber (reg:SI 2))
(clobber (reg:SI 3))])]
"TARGET_HARD_SH4"
"jsr @%0%#"
[(set_attr "type" "sfunc")
(set_attr "needs_delay_slot" "yes")])
......@@ -2989,43 +3744,188 @@
;; ??? All patterns should have a type attribute.
(define_insn "addsf3"
(define_expand "fpu_switch0"
[(set (match_operand:SI 0 "" "") (symbol_ref "__fpscr_values"))
(set (match_dup 2) (match_dup 1))]
""
"
{
operands[1] = gen_rtx (MEM, PSImode, operands[0]);
RTX_UNCHANGING_P (operands[1]) = 1;
operands[2] = get_fpscr_rtx ();
}")
(define_expand "fpu_switch1"
[(set (match_operand:SI 0 "" "") (symbol_ref "__fpscr_values"))
(set (match_dup 1) (plus:SI (match_dup 0) (const_int 4)))
(set (match_dup 3) (match_dup 2))]
""
"
{
operands[1] = gen_reg_rtx (SImode);
operands[2] = gen_rtx (MEM, PSImode, operands[1]);
RTX_UNCHANGING_P (operands[2]) = 1;
operands[3] = get_fpscr_rtx ();
}")
(define_expand "movpsi"
[(set (match_operand:PSI 0 "register_operand" "")
(match_operand:PSI 1 "general_movsrc_operand" ""))]
""
"")
;; The c / m alternative is a fake to guide reload to load directly into
;; fpscr, since reload doesn't know how to use post-increment.
;; GO_IF_LEGITIMATE_ADDRESS guards about bogus addresses before reload,
;; SECONDARY_INPUT_RELOAD_CLASS does this during reload, and the insn's
;; predicate after reload.
;; The gp_fpul type for r/!c might look a bit odd, but it actually schedules
;; like a gpr <-> fpul move.
(define_insn "fpu_switch"
[(set (match_operand:PSI 0 "register_operand" "c,c,r,c,c,r,m,r")
(match_operand:PSI 1 "general_movsrc_operand" "c,>,m,m,r,r,r,!c"))]
"! reload_completed
|| true_regnum (operands[0]) != FPSCR_REG || GET_CODE (operands[1]) != MEM
|| GET_CODE (XEXP (operands[1], 0)) != PLUS"
"@
! precision stays the same
lds.l %1,fpscr
mov.l %1,%0
#
lds %1,fpscr
mov %1,%0
mov.l %1,%0
sts fpscr,%0"
[(set_attr "length" "0,2,2,4,2,2,2,2")
(set_attr "type" "dfp_conv,dfp_conv,load,dfp_conv,dfp_conv,move,store,gp_fpul")])
(define_split
[(set (reg:PSI 48) (mem:PSI (match_operand:SI 0 "register_operand" "r")))]
"find_regno_note (insn, REG_DEAD, true_regnum (operands[0]))"
[(set (match_dup 0) (match_dup 0))]
"
{
rtx insn = emit_insn (gen_fpu_switch (get_fpscr_rtx (),
gen_rtx (MEM, PSImode,
gen_rtx (POST_INC, Pmode,
operands[0]))));
REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_INC, operands[0], NULL_RTX);
}")
(define_split
[(set (reg:PSI 48) (mem:PSI (match_operand:SI 0 "register_operand" "r")))]
""
[(set (match_dup 0) (plus:SI (match_dup 0) (const_int -4)))]
"
{
rtx insn = emit_insn (gen_fpu_switch (get_fpscr_rtx (),
gen_rtx (MEM, PSImode,
gen_rtx (POST_INC, Pmode,
operands[0]))));
REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_INC, operands[0], NULL_RTX);
}")
;; ??? This uses the fp unit, but has no type indicating that.
;; If we did that, this would either give a bogus latency or introduce
;; a bogus FIFO constraint.
;; Since this insn is currently only used for prologues/epilogues,
;; it is probably best to claim no function unit, which matches the
;; current setting.
(define_insn "toggle_sz"
[(set (reg:PSI 48) (xor:PSI (reg:PSI 48) (const_int 1048576)))]
"TARGET_SH4"
"fschg")
(define_expand "addsf3"
[(match_operand:SF 0 "arith_reg_operand" "")
(match_operand:SF 1 "arith_reg_operand" "")
(match_operand:SF 2 "arith_reg_operand" "")]
"TARGET_SH3E"
"{ expand_sf_binop (&gen_addsf3_i, operands); DONE; }")
(define_insn "addsf3_i"
[(set (match_operand:SF 0 "arith_reg_operand" "=f")
(plus:SF (match_operand:SF 1 "arith_reg_operand" "%0")
(match_operand:SF 2 "arith_reg_operand" "f")))]
(match_operand:SF 2 "arith_reg_operand" "f")))
(use (match_operand:PSI 3 "fpscr_operand" "c"))]
"TARGET_SH3E"
"fadd %2,%0"
[(set_attr "type" "fp")])
(define_insn "subsf3"
(define_expand "subsf3"
[(match_operand:SF 0 "arith_reg_operand" "")
(match_operand:SF 1 "arith_reg_operand" "")
(match_operand:SF 2 "arith_reg_operand" "")]
"TARGET_SH3E"
"{ expand_sf_binop (&gen_subsf3_i, operands); DONE; }")
(define_insn "subsf3_i"
[(set (match_operand:SF 0 "arith_reg_operand" "=f")
(minus:SF (match_operand:SF 1 "arith_reg_operand" "0")
(match_operand:SF 2 "arith_reg_operand" "f")))]
(match_operand:SF 2 "arith_reg_operand" "f")))
(use (match_operand:PSI 3 "fpscr_operand" "c"))]
"TARGET_SH3E"
"fsub %2,%0"
[(set_attr "type" "fp")])
(define_insn "mulsf3"
;; Unfortunately, the combiner is unable to cope with the USE of the FPSCR
;; register in feeding fp instructions. Thus, we cannot generate fmac for
;; mixed-precision SH4 targets. To allow it to be still generated for the
;; SH3E, we use a separate insn for SH3E mulsf3.
(define_expand "mulsf3"
[(match_operand:SF 0 "arith_reg_operand" "")
(match_operand:SF 1 "arith_reg_operand" "")
(match_operand:SF 2 "arith_reg_operand" "")]
"TARGET_SH3E"
"
{
if (TARGET_SH4)
expand_sf_binop (&gen_mulsf3_i4, operands);
else
emit_insn (gen_mulsf3_ie (operands[0], operands[1], operands[2]));
DONE;
}")
(define_insn "mulsf3_i4"
[(set (match_operand:SF 0 "arith_reg_operand" "=f")
(mult:SF (match_operand:SF 1 "arith_reg_operand" "%0")
(match_operand:SF 2 "arith_reg_operand" "f")))]
(match_operand:SF 2 "arith_reg_operand" "f")))
(use (match_operand:PSI 3 "fpscr_operand" "c"))]
"TARGET_SH3E"
"fmul %2,%0"
[(set_attr "type" "fp")])
(define_insn "mulsf3_ie"
[(set (match_operand:SF 0 "arith_reg_operand" "=f")
(mult:SF (match_operand:SF 1 "arith_reg_operand" "%0")
(match_operand:SF 2 "arith_reg_operand" "f")))]
"TARGET_SH3E && ! TARGET_SH4"
"fmul %2,%0"
[(set_attr "type" "fp")])
(define_insn "*macsf3"
[(set (match_operand:SF 0 "arith_reg_operand" "=f")
(plus:SF (mult:SF (match_operand:SF 1 "arith_reg_operand" "%w")
(match_operand:SF 2 "arith_reg_operand" "f"))
(match_operand:SF 3 "arith_reg_operand" "0")))]
"TARGET_SH3E"
(match_operand:SF 3 "arith_reg_operand" "0")))
(use (match_operand:PSI 4 "fpscr_operand" "c"))]
"TARGET_SH3E && ! TARGET_SH4"
"fmac fr0,%2,%0"
[(set_attr "type" "fp")])
(define_insn "divsf3"
(define_expand "divsf3"
[(match_operand:SF 0 "arith_reg_operand" "")
(match_operand:SF 1 "arith_reg_operand" "")
(match_operand:SF 2 "arith_reg_operand" "")]
"TARGET_SH3E"
"{ expand_sf_binop (&gen_divsf3_i, operands); DONE; }")
(define_insn "divsf3_i"
[(set (match_operand:SF 0 "arith_reg_operand" "=f")
(div:SF (match_operand:SF 1 "arith_reg_operand" "0")
(match_operand:SF 2 "arith_reg_operand" "f")))]
(match_operand:SF 2 "arith_reg_operand" "f")))
(use (match_operand:PSI 3 "fpscr_operand" "c"))]
"TARGET_SH3E"
"fdiv %2,%0"
[(set_attr "type" "fdiv")])
......@@ -3033,15 +3933,34 @@
(define_expand "floatsisf2"
[(set (reg:SI 22)
(match_operand:SI 1 "arith_reg_operand" ""))
(set (match_operand:SF 0 "arith_reg_operand" "")
(float:SF (reg:SI 22)))]
(parallel [(set (match_operand:SF 0 "arith_reg_operand" "")
(float:SF (reg:SI 22)))
(use (match_dup 2))])]
"TARGET_SH3E"
"")
"
{
if (TARGET_SH4)
{
emit_insn (gen_rtx (SET, VOIDmode, gen_rtx (REG, SImode, 22),
operands[1]));
emit_sf_insn (gen_floatsisf2_i4 (operands[0], get_fpscr_rtx ()));
DONE;
}
operands[2] = get_fpscr_rtx ();
}")
(define_insn "floatsisf2_i4"
[(set (match_operand:SF 0 "arith_reg_operand" "=f")
(float:SF (reg:SI 22)))
(use (match_operand:PSI 1 "fpscr_operand" "c"))]
"TARGET_SH3E"
"float fpul,%0"
[(set_attr "type" "fp")])
(define_insn "*floatsisf2_ie"
[(set (match_operand:SF 0 "arith_reg_operand" "=f")
(float:SF (reg:SI 22)))]
"TARGET_SH3E"
"TARGET_SH3E && ! TARGET_SH4"
"float fpul,%0"
[(set_attr "type" "fp")])
......@@ -3051,26 +3970,62 @@
(set (match_operand:SI 0 "arith_reg_operand" "=r")
(reg:SI 22))]
"TARGET_SH3E"
"")
"
{
if (TARGET_SH4)
{
emit_sf_insn (gen_fix_truncsfsi2_i4 (operands[1], get_fpscr_rtx ()));
emit_insn (gen_rtx (SET, VOIDmode, operands[0],
gen_rtx (REG, SImode, 22)));
DONE;
}
}")
(define_insn "fix_truncsfsi2_i4"
[(set (reg:SI 22)
(fix:SI (match_operand:SF 0 "arith_reg_operand" "f")))
(use (match_operand:PSI 1 "fpscr_operand" "c"))]
"TARGET_SH4"
"ftrc %0,fpul"
[(set_attr "type" "fp")])
(define_insn "fix_truncsfsi2_i4_2"
[(set (match_operand:SI 0 "arith_reg_operand" "=r")
(fix:SI (match_operand:SF 1 "arith_reg_operand" "f")))
(use (reg:SI 48))
(clobber (reg:SI 22))]
"TARGET_SH4"
"#"
[(set_attr "length" "4")])
(define_split
[(set (match_operand:SI 0 "arith_reg_operand" "=r")
(fix:SI (match_operand:SF 1 "arith_reg_operand" "f")))
(use (match_operand:PSI 2 "fpscr_operand" "c"))
(clobber (reg:SI 22))]
"TARGET_SH4"
[(parallel [(set (reg:SI 22) (fix:SI (match_dup 1)))
(use (match_dup 2))])
(set (match_dup 0) (reg:SI 22))])
(define_insn "*fixsfsi"
[(set (reg:SI 22)
(fix:SI (match_operand:SF 0 "arith_reg_operand" "f")))]
"TARGET_SH3E"
"TARGET_SH3E && ! TARGET_SH4"
"ftrc %0,fpul"
[(set_attr "type" "fp")])
(define_insn "cmpgtsf_t"
[(set (reg:SI 18) (gt:SI (match_operand:SF 0 "arith_reg_operand" "f")
(match_operand:SF 1 "arith_reg_operand" "f")))]
"TARGET_SH3E"
"TARGET_SH3E && ! TARGET_SH4"
"fcmp/gt %1,%0"
[(set_attr "type" "fp")])
(define_insn "cmpeqsf_t"
[(set (reg:SI 18) (eq:SI (match_operand:SF 0 "arith_reg_operand" "f")
(match_operand:SF 1 "arith_reg_operand" "f")))]
"TARGET_SH3E"
"TARGET_SH3E && ! TARGET_SH4"
"fcmp/eq %1,%0"
[(set_attr "type" "fp")])
......@@ -3078,11 +4033,36 @@
[(set (reg:SI 18) (ior:SI (reg:SI 18)
(eq:SI (match_operand:SF 0 "arith_reg_operand" "f")
(match_operand:SF 1 "arith_reg_operand" "f"))))]
"TARGET_SH3E && TARGET_IEEE"
"TARGET_SH3E && TARGET_IEEE && ! TARGET_SH4"
"* return output_ieee_ccmpeq (insn, operands);"
[(set_attr "length" "4")])
(define_insn "cmpgtsf_t_i4"
[(set (reg:SI 18) (gt:SI (match_operand:SF 0 "arith_reg_operand" "f")
(match_operand:SF 1 "arith_reg_operand" "f")))
(use (match_operand:PSI 2 "fpscr_operand" "c"))]
"TARGET_SH4"
"fcmp/gt %1,%0"
[(set_attr "type" "fp")])
(define_insn "cmpeqsf_t_i4"
[(set (reg:SI 18) (eq:SI (match_operand:SF 0 "arith_reg_operand" "f")
(match_operand:SF 1 "arith_reg_operand" "f")))
(use (match_operand:PSI 2 "fpscr_operand" "c"))]
"TARGET_SH4"
"fcmp/eq %1,%0"
[(set_attr "type" "fp")])
(define_insn "*ieee_ccmpeqsf_t_4"
[(set (reg:SI 18) (ior:SI (reg:SI 18)
(eq:SI (match_operand:SF 0 "arith_reg_operand" "f")
(match_operand:SF 1 "arith_reg_operand" "f"))))
(use (match_operand:PSI 2 "fpscr_operand" "c"))]
"TARGET_IEEE && TARGET_SH4"
"* return output_ieee_ccmpeq (insn, operands);"
[(set_attr "length" "4")])
(define_expand "cmpsf"
[(set (reg:SI 18) (compare (match_operand:SF 0 "arith_operand" "")
(match_operand:SF 1 "arith_operand" "")))]
......@@ -3094,25 +4074,285 @@
DONE;
}")
(define_insn "negsf2"
(define_expand "negsf2"
[(match_operand:SF 0 "arith_reg_operand" "")
(match_operand:SF 1 "arith_reg_operand" "")]
"TARGET_SH3E"
"{ expand_sf_unop (&gen_negsf2_i, operands); DONE; }")
(define_insn "negsf2_i"
[(set (match_operand:SF 0 "arith_reg_operand" "=f")
(neg:SF (match_operand:SF 1 "arith_reg_operand" "0")))]
(neg:SF (match_operand:SF 1 "arith_reg_operand" "0")))
(use (match_operand:PSI 2 "fpscr_operand" "c"))]
"TARGET_SH3E"
"fneg %0"
[(set_attr "type" "fp")])
[(set_attr "type" "fmove")])
(define_insn "sqrtsf2"
(define_expand "sqrtsf2"
[(match_operand:SF 0 "arith_reg_operand" "")
(match_operand:SF 1 "arith_reg_operand" "")]
"TARGET_SH3E"
"{ expand_sf_unop (&gen_sqrtsf2_i, operands); DONE; }")
(define_insn "sqrtsf2_i"
[(set (match_operand:SF 0 "arith_reg_operand" "=f")
(sqrt:SF (match_operand:SF 1 "arith_reg_operand" "0")))]
(sqrt:SF (match_operand:SF 1 "arith_reg_operand" "0")))
(use (match_operand:PSI 2 "fpscr_operand" "c"))]
"TARGET_SH3E"
"fsqrt %0"
[(set_attr "type" "fdiv")])
(define_insn "abssf2"
(define_expand "abssf2"
[(match_operand:SF 0 "arith_reg_operand" "")
(match_operand:SF 1 "arith_reg_operand" "")]
"TARGET_SH3E"
"{ expand_sf_unop (&gen_abssf2_i, operands); DONE; }")
(define_insn "abssf2_i"
[(set (match_operand:SF 0 "arith_reg_operand" "=f")
(abs:SF (match_operand:SF 1 "arith_reg_operand" "0")))]
(abs:SF (match_operand:SF 1 "arith_reg_operand" "0")))
(use (match_operand:PSI 2 "fpscr_operand" "c"))]
"TARGET_SH3E"
"fabs %0"
[(set_attr "type" "fmove")])
(define_expand "adddf3"
[(match_operand:DF 0 "arith_reg_operand" "")
(match_operand:DF 1 "arith_reg_operand" "")
(match_operand:DF 2 "arith_reg_operand" "")]
"TARGET_SH4"
"{ expand_df_binop (&gen_adddf3_i, operands); DONE; }")
(define_insn "adddf3_i"
[(set (match_operand:DF 0 "arith_reg_operand" "=f")
(plus:DF (match_operand:DF 1 "arith_reg_operand" "%0")
(match_operand:DF 2 "arith_reg_operand" "f")))
(use (match_operand:PSI 3 "fpscr_operand" "c"))]
"TARGET_SH4"
"fadd %2,%0"
[(set_attr "type" "dfp_arith")])
(define_expand "subdf3"
[(match_operand:DF 0 "arith_reg_operand" "")
(match_operand:DF 1 "arith_reg_operand" "")
(match_operand:DF 2 "arith_reg_operand" "")]
"TARGET_SH4"
"{ expand_df_binop (&gen_subdf3_i, operands); DONE; }")
(define_insn "subdf3_i"
[(set (match_operand:DF 0 "arith_reg_operand" "=f")
(minus:DF (match_operand:DF 1 "arith_reg_operand" "0")
(match_operand:DF 2 "arith_reg_operand" "f")))
(use (match_operand:PSI 3 "fpscr_operand" "c"))]
"TARGET_SH4"
"fsub %2,%0"
[(set_attr "type" "dfp_arith")])
(define_expand "muldf3"
[(match_operand:DF 0 "arith_reg_operand" "")
(match_operand:DF 1 "arith_reg_operand" "")
(match_operand:DF 2 "arith_reg_operand" "")]
"TARGET_SH4"
"{ expand_df_binop (&gen_muldf3_i, operands); DONE; }")
(define_insn "muldf3_i"
[(set (match_operand:DF 0 "arith_reg_operand" "=f")
(mult:DF (match_operand:DF 1 "arith_reg_operand" "%0")
(match_operand:DF 2 "arith_reg_operand" "f")))
(use (match_operand:PSI 3 "fpscr_operand" "c"))]
"TARGET_SH4"
"fmul %2,%0"
[(set_attr "type" "dfp_arith")])
(define_expand "divdf3"
[(match_operand:DF 0 "arith_reg_operand" "")
(match_operand:DF 1 "arith_reg_operand" "")
(match_operand:DF 2 "arith_reg_operand" "")]
"TARGET_SH4"
"{ expand_df_binop (&gen_divdf3_i, operands); DONE; }")
(define_insn "divdf3_i"
[(set (match_operand:DF 0 "arith_reg_operand" "=f")
(div:DF (match_operand:DF 1 "arith_reg_operand" "0")
(match_operand:DF 2 "arith_reg_operand" "f")))
(use (match_operand:PSI 3 "fpscr_operand" "c"))]
"TARGET_SH4"
"fdiv %2,%0"
[(set_attr "type" "dfdiv")])
(define_expand "floatsidf2"
[(match_operand:DF 0 "arith_reg_operand" "")
(match_operand:SI 1 "arith_reg_operand" "")]
"TARGET_SH4"
"
{
emit_insn (gen_rtx (SET, VOIDmode, gen_rtx (REG, SImode, 22), operands[1]));
emit_df_insn (gen_floatsidf2_i (operands[0], get_fpscr_rtx ()));
DONE;
}")
(define_insn "floatsidf2_i"
[(set (match_operand:DF 0 "arith_reg_operand" "=f")
(float:DF (reg:SI 22)))
(use (match_operand:PSI 1 "fpscr_operand" "c"))]
"TARGET_SH4"
"float fpul,%0"
[(set_attr "type" "dfp_conv")])
(define_expand "fix_truncdfsi2"
[(match_operand:SI 0 "arith_reg_operand" "=r")
(match_operand:DF 1 "arith_reg_operand" "f")]
"TARGET_SH4"
"
{
emit_df_insn (gen_fix_truncdfsi2_i (operands[1], get_fpscr_rtx ()));
emit_insn (gen_rtx (SET, VOIDmode, operands[0], gen_rtx (REG, SImode, 22)));
DONE;
}")
(define_insn "fix_truncdfsi2_i"
[(set (reg:SI 22)
(fix:SI (match_operand:DF 0 "arith_reg_operand" "f")))
(use (match_operand:PSI 1 "fpscr_operand" "c"))]
"TARGET_SH4"
"ftrc %0,fpul"
[(set_attr "type" "dfp_conv")])
(define_insn "fix_truncdfsi2_i4"
[(set (match_operand:SI 0 "arith_reg_operand" "=r")
(fix:SI (match_operand:DF 1 "arith_reg_operand" "f")))
(use (match_operand:PSI 2 "fpscr_operand" "c"))
(clobber (reg:SI 22))]
"TARGET_SH4"
"#"
[(set_attr "length" "4")])
(define_split
[(set (match_operand:SI 0 "arith_reg_operand" "=r")
(fix:SI (match_operand:DF 1 "arith_reg_operand" "f")))
(use (match_operand:PSI 2 "fpscr_operand" "c"))
(clobber (reg:SI 22))]
"TARGET_SH4"
[(parallel [(set (reg:SI 22) (fix:SI (match_dup 1)))
(use (match_dup 2))])
(set (match_dup 0) (reg:SI 22))])
(define_insn "cmpgtdf_t"
[(set (reg:SI 18) (gt:SI (match_operand:DF 0 "arith_reg_operand" "f")
(match_operand:DF 1 "arith_reg_operand" "f")))
(use (match_operand:PSI 2 "fpscr_operand" "c"))]
"TARGET_SH4"
"fcmp/gt %1,%0"
[(set_attr "type" "dfp_cmp")])
(define_insn "cmpeqdf_t"
[(set (reg:SI 18) (eq:SI (match_operand:DF 0 "arith_reg_operand" "f")
(match_operand:DF 1 "arith_reg_operand" "f")))
(use (match_operand:PSI 2 "fpscr_operand" "c"))]
"TARGET_SH4"
"fcmp/eq %1,%0"
[(set_attr "type" "dfp_cmp")])
(define_insn "*ieee_ccmpeqdf_t"
[(set (reg:SI 18) (ior:SI (reg:SI 18)
(eq:SI (match_operand:DF 0 "arith_reg_operand" "f")
(match_operand:DF 1 "arith_reg_operand" "f"))))
(use (match_operand:PSI 2 "fpscr_operand" "c"))]
"TARGET_IEEE && TARGET_SH4"
"* return output_ieee_ccmpeq (insn, operands);"
[(set_attr "length" "4")])
(define_expand "cmpdf"
[(set (reg:SI 18) (compare (match_operand:DF 0 "arith_operand" "")
(match_operand:DF 1 "arith_operand" "")))]
"TARGET_SH4"
"
{
sh_compare_op0 = operands[0];
sh_compare_op1 = operands[1];
DONE;
}")
(define_expand "negdf2"
[(match_operand:DF 0 "arith_reg_operand" "")
(match_operand:DF 1 "arith_reg_operand" "")]
"TARGET_SH4"
"{ expand_df_unop (&gen_negdf2_i, operands); DONE; }")
(define_insn "negdf2_i"
[(set (match_operand:DF 0 "arith_reg_operand" "=f")
(neg:DF (match_operand:DF 1 "arith_reg_operand" "0")))
(use (match_operand:PSI 2 "fpscr_operand" "c"))]
"TARGET_SH4"
"fneg %0"
[(set_attr "type" "fmove")])
(define_expand "sqrtdf2"
[(match_operand:DF 0 "arith_reg_operand" "")
(match_operand:DF 1 "arith_reg_operand" "")]
"TARGET_SH4"
"{ expand_df_unop (&gen_sqrtdf2_i, operands); DONE; }")
(define_insn "sqrtdf2_i"
[(set (match_operand:DF 0 "arith_reg_operand" "=f")
(sqrt:DF (match_operand:DF 1 "arith_reg_operand" "0")))
(use (match_operand:PSI 2 "fpscr_operand" "c"))]
"TARGET_SH4"
"fsqrt %0"
[(set_attr "type" "dfdiv")])
(define_expand "absdf2"
[(match_operand:DF 0 "arith_reg_operand" "")
(match_operand:DF 1 "arith_reg_operand" "")]
"TARGET_SH4"
"{ expand_df_unop (&gen_absdf2_i, operands); DONE; }")
(define_insn "absdf2_i"
[(set (match_operand:DF 0 "arith_reg_operand" "=f")
(abs:DF (match_operand:DF 1 "arith_reg_operand" "0")))
(use (match_operand:PSI 2 "fpscr_operand" "c"))]
"TARGET_SH4"
"fabs %0"
[(set_attr "type" "fmove")])
(define_expand "extendsfdf2"
[(match_operand:DF 0 "arith_reg_operand" "")
(match_operand:SF 1 "arith_reg_operand" "")]
"TARGET_SH4"
"
{
emit_sf_insn (gen_movsf_ie (gen_rtx (REG, SFmode, 22), operands[1],
get_fpscr_rtx ()));
emit_df_insn (gen_extendsfdf2_i4 (operands[0], get_fpscr_rtx ()));
DONE;
}")
(define_insn "extendsfdf2_i4"
[(set (match_operand:DF 0 "arith_reg_operand" "=f")
(float_extend:DF (reg:SF 22)))
(use (match_operand:PSI 1 "fpscr_operand" "c"))]
"TARGET_SH4"
"fcnvsd fpul,%0"
[(set_attr "type" "fp")])
(define_expand "truncdfsf2"
[(match_operand:SF 0 "arith_reg_operand" "")
(match_operand:DF 1 "arith_reg_operand" "")]
"TARGET_SH4"
"
{
emit_df_insn (gen_truncdfsf2_i4 (operands[1], get_fpscr_rtx ()));
emit_sf_insn (gen_movsf_ie (operands[0], gen_rtx (REG, SFmode, 22),
get_fpscr_rtx ()));
DONE;
}")
(define_insn "truncdfsf2_i4"
[(set (reg:SF 22)
(float_truncate:SF (match_operand:DF 0 "arith_reg_operand" "f")))
(use (match_operand:PSI 1 "fpscr_operand" "c"))]
"TARGET_SH4"
"fcnvds %0,fpul"
[(set_attr "type" "fp")])
;; Bit field extract patterns. These give better code for packed bitfields,
......
CROSS_LIBGCC1 = libgcc1-asm.a
LIB1ASMSRC = sh/lib1funcs.asm
LIB1ASMFUNCS = _ashiftrt _ashiftrt_n _ashiftlt _lshiftrt _movstr \
_mulsi3 _sdivsi3 _udivsi3 _set_fpscr
_movstr_i4 _mulsi3 _sdivsi3 _sdivsi3_i4 _udivsi3 _udivsi3_i4 _set_fpscr
# These are really part of libgcc1, but this will cause them to be
# built correctly, so...
......@@ -21,7 +21,7 @@ fp-bit.c: $(srcdir)/config/fp-bit.c
echo '#endif' >> fp-bit.c
cat $(srcdir)/config/fp-bit.c >> fp-bit.c
MULTILIB_OPTIONS= ml m2/m3e
MULTILIB_OPTIONS= ml m2/m3e/m4-single-only/m4-single/m4
MULTILIB_DIRNAMES=
MULTILIB_MATCHES = m2=m3
......
......@@ -6,10 +6,10 @@
#ifndef __GNUC_VA_LIST
#define __GNUC_VA_LIST
#ifdef __SH3E__
#if defined (__SH3E__) || defined (__SH4_SINGLE__) || defined (__SH4__) || defined (__SH4_SINGLE_ONLY__)
typedef long __va_greg;
typedef double __va_freg;
typedef float __va_freg;
typedef struct {
__va_greg * __va_next_o; /* next available register */
......@@ -33,24 +33,24 @@ typedef void *__gnuc_va_list;
#ifdef _STDARG_H
#ifdef __SH3E__
#if defined (__SH3E__) || defined (__SH4_SINGLE__) || defined (__SH4__) || defined (__SH4_SINGLE_ONLY__)
#define va_start(AP, LASTARG) \
__extension__ \
({ \
AP.__va_next_fp = (__va_freg *) __builtin_saveregs (); \
AP.__va_next_fp_limit = (AP.__va_next_fp + \
(AP).__va_next_fp = (__va_freg *) __builtin_saveregs (); \
(AP).__va_next_fp_limit = ((AP).__va_next_fp + \
(__builtin_args_info (1) < 8 ? 8 - __builtin_args_info (1) : 0)); \
AP.__va_next_o = (__va_greg *) AP.__va_next_fp_limit; \
AP.__va_next_o_limit = (AP.__va_next_o + \
(AP).__va_next_o = (__va_greg *) (AP).__va_next_fp_limit; \
(AP).__va_next_o_limit = ((AP).__va_next_o + \
(__builtin_args_info (0) < 4 ? 4 - __builtin_args_info (0) : 0)); \
AP.__va_next_stack = (__va_greg *) __builtin_next_arg (LASTARG); \
(AP).__va_next_stack = (__va_greg *) __builtin_next_arg (LASTARG); \
})
#else /* ! SH3E */
#define va_start(AP, LASTARG) \
(AP = ((__gnuc_va_list) __builtin_next_arg (LASTARG)))
((AP) = ((__gnuc_va_list) __builtin_next_arg (LASTARG)))
#endif /* ! SH3E */
......@@ -59,24 +59,26 @@ __extension__ \
#define va_alist __builtin_va_alist
#define va_dcl int __builtin_va_alist;...
#ifdef __SH3E__
#if defined (__SH3E__) || defined (__SH4_SINGLE__) || defined (__SH4__) || defined (__SH4_SINGLE_ONLY__)
#define va_start(AP) \
__extension__ \
({ \
AP.__va_next_fp = (__va_freg *) __builtin_saveregs (); \
AP.__va_next_fp_limit = (AP.__va_next_fp + \
(AP).__va_next_fp = (__va_freg *) __builtin_saveregs (); \
(AP).__va_next_fp_limit = ((AP).__va_next_fp + \
(__builtin_args_info (1) < 8 ? 8 - __builtin_args_info (1) : 0)); \
AP.__va_next_o = (__va_greg *) AP.__va_next_fp_limit; \
AP.__va_next_o_limit = (AP.__va_next_o + \
(AP).__va_next_o = (__va_greg *) (AP).__va_next_fp_limit; \
(AP).__va_next_o_limit = ((AP).__va_next_o + \
(__builtin_args_info (0) < 4 ? 4 - __builtin_args_info (0) : 0)); \
AP.__va_next_stack = (__va_greg *) __builtin_next_arg (__builtin_va_alist) \
- (__builtin_args_info (0) >= 4 || __builtin_args_info (1) >= 8 ? 1 : 0); \
(AP).__va_next_stack \
= ((__va_greg *) __builtin_next_arg (__builtin_va_alist) \
- (__builtin_args_info (0) >= 4 || __builtin_args_info (1) >= 8 \
? 1 : 0)); \
})
#else /* ! SH3E */
#define va_start(AP) AP=(char *) &__builtin_va_alist
#define va_start(AP) ((AP) = (char *) &__builtin_va_alist)
#endif /* ! SH3E */
......@@ -136,53 +138,78 @@ enum __va_type_classes {
We want the MEM_IN_STRUCT_P bit set in the emitted RTL, therefore we
use unions even when it would otherwise be unnecessary. */
/* gcc has an extension that allows to use a casted lvalue as an lvalue,
But it doesn't work in C++ with -pedantic - even in the presence of
__extension__ . We work around this problem by using a reference type. */
#ifdef __cplusplus
#define __VA_REF &
#else
#define __VA_REF
#endif
#define __va_arg_sh1(AP, TYPE) __extension__ \
__extension__ \
({(sizeof (TYPE) == 1 \
? ({union {TYPE t; char c;} __t; \
asm("" \
: "=r" (__t.c) \
: "0" ((((union { int i, j; } *) (AP))++)->i)); \
__asm("" \
: "=r" (__t.c) \
: "0" ((((union { int i, j; } *__VA_REF) (AP))++)->i)); \
__t.t;}) \
: sizeof (TYPE) == 2 \
? ({union {TYPE t; short s;} __t; \
asm("" \
: "=r" (__t.s) \
: "0" ((((union { int i, j; } *) (AP))++)->i)); \
__asm("" \
: "=r" (__t.s) \
: "0" ((((union { int i, j; } *__VA_REF) (AP))++)->i)); \
__t.t;}) \
: sizeof (TYPE) >= 4 || __LITTLE_ENDIAN_P \
? (((union { TYPE t; int i;} *) (AP))++)->t \
: ((union {TYPE t;TYPE u;}*) ((char *)++(int *)(AP) - sizeof (TYPE)))->t);})
? (((union { TYPE t; int i;} *__VA_REF) (AP))++)->t \
: ((union {TYPE t;TYPE u;}*) ((char *)++(int *__VA_REF)(AP) - sizeof (TYPE)))->t);})
#ifdef __SH3E__
#if defined (__SH3E__) || defined (__SH4_SINGLE__) || defined (__SH4__) || defined (__SH4_SINGLE_ONLY__)
#define __PASS_AS_FLOAT(TYPE_CLASS,SIZE) \
(TYPE_CLASS == __real_type_class && SIZE == 4)
#define __TARGET_SH4_P 0
#if defined(__SH4__) || defined(__SH4_SINGLE__)
#undef __PASS_AS_FLOAT
#define __PASS_AS_FLOAT(TYPE_CLASS,SIZE) \
(TYPE_CLASS == __real_type_class && SIZE <= 8 \
|| TYPE_CLASS == __complex_type_class && SIZE <= 16)
#undef __TARGET_SH4_P
#define __TARGET_SH4_P 1
#endif
#define va_arg(pvar,TYPE) \
__extension__ \
({int __type = __builtin_classify_type (* (TYPE *) 0); \
void * __result_p; \
if (__PASS_AS_FLOAT (__type, sizeof(TYPE))) \
{ \
if (pvar.__va_next_fp < pvar.__va_next_fp_limit) \
if ((pvar).__va_next_fp < (pvar).__va_next_fp_limit) \
{ \
__result_p = &pvar.__va_next_fp; \
if (((__type == __real_type_class && sizeof (TYPE) > 4)\
|| sizeof (TYPE) > 8) \
&& (((int) (pvar).__va_next_fp ^ (int) (pvar).__va_next_fp_limit)\
& 4)) \
(pvar).__va_next_fp++; \
__result_p = &(pvar).__va_next_fp; \
} \
else \
__result_p = &pvar.__va_next_stack; \
__result_p = &(pvar).__va_next_stack; \
} \
else \
{ \
if (pvar.__va_next_o + ((sizeof (TYPE) + 3) / 4) \
<= pvar.__va_next_o_limit) \
__result_p = &pvar.__va_next_o; \
if ((pvar).__va_next_o + ((sizeof (TYPE) + 3) / 4) \
<= (pvar).__va_next_o_limit) \
__result_p = &(pvar).__va_next_o; \
else \
{ \
if (sizeof (TYPE) > 4) \
pvar.__va_next_o = pvar.__va_next_o_limit; \
if (! __TARGET_SH4_P) \
(pvar).__va_next_o = (pvar).__va_next_o_limit; \
\
__result_p = &pvar.__va_next_stack; \
__result_p = &(pvar).__va_next_stack; \
} \
} \
__va_arg_sh1(*(void **)__result_p, TYPE);})
......@@ -194,6 +221,6 @@ __extension__ \
#endif /* SH3E */
/* Copy __gnuc_va_list into another variable of this type. */
#define __va_copy(dest, src) (dest) = (src)
#define __va_copy(dest, src) ((dest) = (src))
#endif /* defined (_STDARG_H) || defined (_VARARGS_H) */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment