Commit 3fbee523 by Bernd Schmidt Committed by Bernd Schmidt

lib1funcs.asm (___umulsi3_highpart, [...]): Use a more efficient implementation.

	* config/bfin/lib1funcs.asm (___umulsi3_highpart, __smulsi3_highpart):
	Use a more efficient implementation.
	* config/bfin/bfin.md (umulsi3_highpart, smulsi3_highpart): Emit
	inline sequences when not optimizing for size.

From-SVN: r123748
parent 9d3f9aa3
......@@ -17,6 +17,11 @@
(flag_macinit1hi): Tighten constraints.
(flag_mul_macv2hi_parts_acconly): New pattern.
* config/bfin/lib1funcs.asm (___umulsi3_highpart, __smulsi3_highpart):
Use a more efficient implementation.
* config/bfin/bfin.md (umulsi3_highpart, smulsi3_highpart): Emit
inline sequences when not optimizing for size.
2007-02-11 Jie Zhang <jie.zhang@analog.com>
* config/bfin/bfin.opt (msim): New option.
(mcpu=): New option.
......
......@@ -1451,6 +1451,7 @@
[(set_attr "type" "mult")])
(define_expand "umulsi3_highpart"
[(parallel
[(set (match_operand:SI 0 "register_operand" "")
(truncate:SI
(lshiftrt:DI
......@@ -1458,19 +1459,49 @@
(match_operand:SI 1 "nonimmediate_operand" ""))
(zero_extend:DI
(match_operand:SI 2 "register_operand" "")))
(const_int 32))))]
(const_int 32))))
(clobber (reg:PDI REG_A0))
(clobber (reg:PDI REG_A1))])]
""
{
if (!optimize_size)
{
rtx a1reg = gen_rtx_REG (PDImode, REG_A1);
rtx a0reg = gen_rtx_REG (PDImode, REG_A0);
emit_insn (gen_flag_macinit1hi (a1reg,
gen_lowpart (HImode, operands[1]),
gen_lowpart (HImode, operands[2]),
GEN_INT (MACFLAG_FU)));
emit_insn (gen_lshrpdi3 (a1reg, a1reg, GEN_INT (16)));
emit_insn (gen_flag_mul_macv2hi_parts_acconly (a0reg, a1reg,
gen_lowpart (V2HImode, operands[1]),
gen_lowpart (V2HImode, operands[2]),
const1_rtx, const1_rtx,
const1_rtx, const0_rtx, a1reg,
const0_rtx, GEN_INT (MACFLAG_FU),
GEN_INT (MACFLAG_FU)));
emit_insn (gen_flag_machi_parts_acconly (a1reg,
gen_lowpart (V2HImode, operands[2]),
gen_lowpart (V2HImode, operands[1]),
const1_rtx, const0_rtx,
a1reg, const0_rtx, GEN_INT (MACFLAG_FU)));
emit_insn (gen_lshrpdi3 (a1reg, a1reg, GEN_INT (16)));
emit_insn (gen_sum_of_accumulators (operands[0], a0reg, a0reg, a1reg));
}
else
{
rtx umulsi3_highpart_libfunc
= init_one_libfunc ("__umulsi3_highpart");
emit_library_call_value (umulsi3_highpart_libfunc,
operands[0], LCT_NORMAL, SImode,
2, operands[1], SImode, operands[2], SImode);
}
DONE;
})
(define_expand "smulsi3_highpart"
[(parallel
[(set (match_operand:SI 0 "register_operand" "")
(truncate:SI
(lshiftrt:DI
......@@ -1478,15 +1509,44 @@
(match_operand:SI 1 "nonimmediate_operand" ""))
(sign_extend:DI
(match_operand:SI 2 "register_operand" "")))
(const_int 32))))]
(const_int 32))))
(clobber (reg:PDI REG_A0))
(clobber (reg:PDI REG_A1))])]
""
{
if (!optimize_size)
{
rtx a1reg = gen_rtx_REG (PDImode, REG_A1);
rtx a0reg = gen_rtx_REG (PDImode, REG_A0);
emit_insn (gen_flag_macinit1hi (a1reg,
gen_lowpart (HImode, operands[1]),
gen_lowpart (HImode, operands[2]),
GEN_INT (MACFLAG_FU)));
emit_insn (gen_lshrpdi3 (a1reg, a1reg, GEN_INT (16)));
emit_insn (gen_flag_mul_macv2hi_parts_acconly (a0reg, a1reg,
gen_lowpart (V2HImode, operands[1]),
gen_lowpart (V2HImode, operands[2]),
const1_rtx, const1_rtx,
const1_rtx, const0_rtx, a1reg,
const0_rtx, GEN_INT (MACFLAG_IS),
GEN_INT (MACFLAG_IS_M)));
emit_insn (gen_flag_machi_parts_acconly (a1reg,
gen_lowpart (V2HImode, operands[2]),
gen_lowpart (V2HImode, operands[1]),
const1_rtx, const0_rtx,
a1reg, const0_rtx, GEN_INT (MACFLAG_IS_M)));
emit_insn (gen_ashrpdi3 (a1reg, a1reg, GEN_INT (16)));
emit_insn (gen_sum_of_accumulators (operands[0], a0reg, a0reg, a1reg));
}
else
{
rtx smulsi3_highpart_libfunc
= init_one_libfunc ("__smulsi3_highpart");
emit_library_call_value (smulsi3_highpart_libfunc,
operands[0], LCT_NORMAL, SImode,
2, operands[1], SImode, operands[2], SImode);
}
DONE;
})
......
......@@ -123,17 +123,12 @@ ___umodsi3:
.type ___umulsi3_highpart, STT_FUNC;
___umulsi3_highpart:
R2 = R1.H * R0.H, R3 = R1.L * R0.H (FU);
R0 = R1.L * R0.L, R1 = R1.H * R0.L (FU);
R0 >>= 16;
/* Unsigned multiplication has the nice property that we can
ignore carry on this first addition. */
R0 = R0 + R3;
R0 = R0 + R1;
cc = ac0;
R1 = cc;
R1 = PACK(R1.l,R0.h);
R0 = R1 + R2;
A1 = R1.L * R0.L (FU);
A1 = A1 >> 16;
A0 = R1.H * R0.H, A1 += R1.L * R0.H (FU);
A1 += R0.L * R1.H (FU);
A1 = A1 >> 16;
R0 = (A0 += A1);
RTS;
#endif
......@@ -143,24 +138,11 @@ ___umulsi3_highpart:
.type ___smulsi3_highpart, STT_FUNC;
___smulsi3_highpart:
R2 = R1.L * R0.L (FU);
R3 = R1.H * R0.L (IS,M);
R0 = R0.H * R1.H, R1 = R0.H * R1.L (IS,M);
R1.L = R2.H + R1.L;
cc = ac0;
R2 = cc;
R1.L = R1.L + R3.L;
cc = ac0;
R1 >>>= 16;
R3 >>>= 16;
R1 = R1 + R3;
R1 = R1 + R2;
R2 = cc;
R1 = R1 + R2;
R0 = R0 + R1;
A1 = R1.L * R0.L (FU);
A1 = A1 >> 16;
A0 = R0.H * R1.H, A1 += R0.H * R1.L (IS,M);
A1 += R1.H * R0.L (IS,M);
A1 = A1 >>> 16;
R0 = (A0 += A1);
RTS;
#endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment