Commit e13d9d5a by Georg-Johann Lay Committed by Georg-Johann Lay

Adjust decimal point of signed accum mode to GCC default.

libgcc/
	Adjust decimal point of signed accum mode to GCC default.

	PR target/54222
	* config/avr/t-avr (LIB1ASMFUNCS): Add _fractsfsq _fractsfusq,
	_divqq_helper.
	* config/avr/lib1funcs-fixed.S (__fractqqsf, __fracthqsf)
	(__fractsasf, __fractsfha, __fractusqsf, __fractsfsa)
	(__mulha3, __mulsa3)
	(__divqq3, __divha3, __divsa3): Adjust to new position of
	decimal point of signed accum types. 
	
	(__mulusa3_round): New function.
	(__mulusa3): Use it.
	(__divqq_helper): New function.
	(__udivuqq3): Use it.

gcc/
	Adjust decimal point of signed accum mode to GCC default.

	PR target/54222
	* config/avr/avr-modes.def (HA, SA, DA): Remove mode adjustments.
	(TA): Move decimal point one bit to the right.
	* config/avr/avr.c (avr_out_fract): Rewrite.

From-SVN: r193721
parent 5eb4cb47
2012-11-22 Georg-Johann Lay <avr@gjlay.de>
Adjust decimal point of signed accum mode to GCC default.
PR target/54222
* config/avr/avr-modes.def (HA, SA, DA): Remove mode adjustments.
(TA): Move decimal point one bit to the right.
* config/avr/avr.c (avr_out_fract): Rewrite.
2012-11-21 Matthias Klose <doko@ubuntu.com> 2012-11-21 Matthias Klose <doko@ubuntu.com>
* config/alpha/t-linux: New file; define MULTIARCH_DIRNAME. * config/alpha/t-linux: New file; define MULTIARCH_DIRNAME.
FRACTIONAL_INT_MODE (PSI, 24, 3); FRACTIONAL_INT_MODE (PSI, 24, 3);
/* On 8 bit machines it requires fewer instructions for fixed point
routines if the decimal place is on a byte boundary which is not
the default for signed accum types. */
ADJUST_IBIT (HA, 7);
ADJUST_FBIT (HA, 8);
ADJUST_IBIT (SA, 15);
ADJUST_FBIT (SA, 16);
ADJUST_IBIT (DA, 31);
ADJUST_FBIT (DA, 32);
/* Make TA and UTA 64 bits wide. /* Make TA and UTA 64 bits wide.
128 bit wide modes would be insane on a 8-bit machine. 128 bit wide modes would be insane on a 8-bit machine.
This needs special treatment in avr.c and avr-lib.h. */ This needs special treatment in avr.c and avr-lib.h. */
ADJUST_BYTESIZE (TA, 8); ADJUST_BYTESIZE (TA, 8);
ADJUST_ALIGNMENT (TA, 1); ADJUST_ALIGNMENT (TA, 1);
ADJUST_IBIT (TA, 15); ADJUST_IBIT (TA, 16);
ADJUST_FBIT (TA, 48); ADJUST_FBIT (TA, 47);
ADJUST_BYTESIZE (UTA, 8); ADJUST_BYTESIZE (UTA, 8);
ADJUST_ALIGNMENT (UTA, 1); ADJUST_ALIGNMENT (UTA, 1);
......
...@@ -6974,6 +6974,332 @@ avr_out_addto_sp (rtx *op, int *plen) ...@@ -6974,6 +6974,332 @@ avr_out_addto_sp (rtx *op, int *plen)
} }
/* Outputs instructions needed for fixed point type conversion.
This includes converting between any fixed point type, as well
as converting to any integer type. Conversion between integer
types is not supported.
Converting signed fractional types requires a bit shift if converting
to or from any unsigned fractional type because the decimal place is
shifted by 1 bit. When the destination is a signed fractional, the sign
is stored in either the carry or T bit. */
const char*
avr_out_fract (rtx insn, rtx operands[], bool intsigned, int *plen)
{
size_t i;
rtx xop[6];
RTX_CODE shift = UNKNOWN;
bool sign_in_carry = false;
bool msb_in_carry = false;
bool lsb_in_carry = false;
const char *code_ashift = "lsl %0";
#define MAY_CLOBBER(RR) \
/* Shorthand used below. */ \
((sign_bytes \
&& IN_RANGE (RR, dest.regno_msb - sign_bytes + 1, dest.regno_msb)) \
|| (reg_unused_after (insn, all_regs_rtx[RR]) \
&& !IN_RANGE (RR, dest.regno, dest.regno_msb)))
struct
{
/* bytes : Length of operand in bytes.
ibyte : Length of integral part in bytes.
fbyte, fbit : Length of fractional part in bytes, bits. */
bool sbit;
unsigned fbit, bytes, ibyte, fbyte;
unsigned regno, regno_msb;
} dest, src, *val[2] = { &dest, &src };
if (plen)
*plen = 0;
/* Step 0: Determine information on source and destination operand we
====== will need in the remainder. */
for (i = 0; i < sizeof (val) / sizeof (*val); i++)
{
enum machine_mode mode;
xop[i] = operands[i];
mode = GET_MODE (xop[i]);
val[i]->bytes = GET_MODE_SIZE (mode);
val[i]->regno = REGNO (xop[i]);
val[i]->regno_msb = REGNO (xop[i]) + val[i]->bytes - 1;
if (SCALAR_INT_MODE_P (mode))
{
val[i]->sbit = intsigned;
val[i]->fbit = 0;
}
else if (ALL_SCALAR_FIXED_POINT_MODE_P (mode))
{
val[i]->sbit = SIGNED_SCALAR_FIXED_POINT_MODE_P (mode);
val[i]->fbit = GET_MODE_FBIT (mode);
}
else
fatal_insn ("unsupported fixed-point conversion", insn);
val[i]->fbyte = (1 + val[i]->fbit) / BITS_PER_UNIT;
val[i]->ibyte = val[i]->bytes - val[i]->fbyte;
}
// Byte offset of the decimal point taking into account different place
// of the decimal point in input and output and different register numbers
// of input and output.
int offset = dest.regno - src.regno + dest.fbyte - src.fbyte;
// Number of destination bytes that will come from sign / zero extension.
int sign_bytes = (dest.ibyte - src.ibyte) * (dest.ibyte > src.ibyte);
// Number of bytes at the low end to be filled with zeros.
int zero_bytes = (dest.fbyte - src.fbyte) * (dest.fbyte > src.fbyte);
// Do we have a 16-Bit register that is cleared?
rtx clrw = NULL_RTX;
bool sign_extend = src.sbit && sign_bytes;
if (0 == dest.fbit % 8 && 7 == src.fbit % 8)
shift = ASHIFT;
else if (7 == dest.fbit % 8 && 0 == src.fbit % 8)
shift = ASHIFTRT;
else if (dest.fbit % 8 == src.fbit % 8)
shift = UNKNOWN;
else
gcc_unreachable();
/* Step 1: Clear bytes at the low end and copy payload bits from source
====== to destination. */
int step = offset < 0 ? 1 : -1;
unsigned d0 = offset < 0 ? dest.regno : dest.regno_msb;
// We leared at least that number of registers.
int clr_n = 0;
for (; d0 >= dest.regno && d0 <= dest.regno_msb; d0 += step)
{
// Next regno of destination is needed for MOVW
unsigned d1 = d0 + step;
// Current and next regno of source
unsigned s0 = d0 - offset;
unsigned s1 = s0 + step;
// Must current resp. next regno be CLRed? This applies to the low
// bytes of the destination that have no associated source bytes.
bool clr0 = s0 < src.regno;
bool clr1 = s1 < src.regno && d1 >= dest.regno;
// First gather what code to emit (if any) and additional step to
// apply if a MOVW is in use. xop[2] is destination rtx and xop[3]
// is the source rtx for the current loop iteration.
const char *code = NULL;
int stepw = 0;
if (clr0)
{
if (AVR_HAVE_MOVW && clr1 && clrw)
{
xop[2] = all_regs_rtx[d0 & ~1];
xop[3] = clrw;
code = "movw %2,%3";
stepw = step;
}
else
{
xop[2] = all_regs_rtx[d0];
code = "clr %2";
if (++clr_n >= 2
&& !clrw
&& d0 % 2 == (step > 0))
{
clrw = all_regs_rtx[d0 & ~1];
}
}
}
else if (offset && s0 <= src.regno_msb)
{
int movw = AVR_HAVE_MOVW && offset % 2 == 0
&& d0 % 2 == (offset > 0)
&& d1 <= dest.regno_msb && d1 >= dest.regno
&& s1 <= src.regno_msb && s1 >= src.regno;
xop[2] = all_regs_rtx[d0 & ~movw];
xop[3] = all_regs_rtx[s0 & ~movw];
code = movw ? "movw %2,%3" : "mov %2,%3";
stepw = step * movw;
}
if (code)
{
if (sign_extend && shift != ASHIFT && !sign_in_carry
&& (d0 == src.regno_msb || d0 + stepw == src.regno_msb))
{
/* We are going to override the sign bit. If we sign-extend,
store the sign in the Carry flag. This is not needed if
the destination will be ASHIFT is the remainder because
the ASHIFT will set Carry without extra instruction. */
avr_asm_len ("lsl %0", &all_regs_rtx[src.regno_msb], plen, 1);
sign_in_carry = true;
}
unsigned src_msb = dest.regno_msb - sign_bytes - offset + 1;
if (!sign_extend && shift == ASHIFTRT && !msb_in_carry
&& src.ibyte > dest.ibyte
&& (d0 == src_msb || d0 + stepw == src_msb))
{
/* We are going to override the MSB. If we shift right,
store the MSB in the Carry flag. This is only needed if
we don't sign-extend becaue with sign-extension the MSB
(the sign) will be produced by the sign extension. */
avr_asm_len ("lsr %0", &all_regs_rtx[src_msb], plen, 1);
msb_in_carry = true;
}
unsigned src_lsb = dest.regno - offset -1;
if (shift == ASHIFT && src.fbyte > dest.fbyte && !lsb_in_carry
&& (d0 == src_lsb || d0 + stepw == src_lsb))
{
/* We are going to override the new LSB; store it into carry. */
avr_asm_len ("lsl %0", &all_regs_rtx[src_lsb], plen, 1);
code_ashift = "rol %0";
lsb_in_carry = true;
}
avr_asm_len (code, xop, plen, 1);
d0 += stepw;
}
}
/* Step 2: Shift destination left by 1 bit position. This might be needed
====== for signed input and unsigned output. */
if (shift == ASHIFT && src.fbyte > dest.fbyte && !lsb_in_carry)
{
unsigned s0 = dest.regno - offset -1;
if (MAY_CLOBBER (s0))
avr_asm_len ("lsl %0", &all_regs_rtx[s0], plen, 1);
else
avr_asm_len ("mov __tmp_reg__,%0" CR_TAB
"lsl __tmp_reg__", &all_regs_rtx[s0], plen, 2);
code_ashift = "rol %0";
lsb_in_carry = true;
}
if (shift == ASHIFT)
{
for (d0 = dest.regno + zero_bytes;
d0 <= dest.regno_msb - sign_bytes; d0++)
{
avr_asm_len (code_ashift, &all_regs_rtx[d0], plen, 1);
code_ashift = "rol %0";
}
lsb_in_carry = false;
sign_in_carry = true;
}
/* Step 4a: Store MSB in carry if we don't already have it or will produce
======= it in sign-extension below. */
if (!sign_extend && shift == ASHIFTRT && !msb_in_carry
&& src.ibyte > dest.ibyte)
{
unsigned s0 = dest.regno_msb - sign_bytes - offset + 1;
if (MAY_CLOBBER (s0))
avr_asm_len ("lsr %0", &all_regs_rtx[s0], plen, 1);
else
avr_asm_len ("mov __tmp_reg__,%0" CR_TAB
"lsr __tmp_reg__", &all_regs_rtx[s0], plen, 2);
msb_in_carry = true;
}
/* Step 3: Sign-extend or zero-extend the destination as needed.
====== */
if (sign_extend && !sign_in_carry)
{
unsigned s0 = src.regno_msb;
if (MAY_CLOBBER (s0))
avr_asm_len ("lsl %0", &all_regs_rtx[s0], plen, 1);
else
avr_asm_len ("mov __tmp_reg__,%0" CR_TAB
"lsl __tmp_reg__", &all_regs_rtx[s0], plen, 2);
sign_in_carry = true;
}
gcc_assert (sign_in_carry + msb_in_carry + lsb_in_carry <= 1);
unsigned copies = 0;
rtx movw = sign_extend ? NULL_RTX : clrw;
for (d0 = dest.regno_msb - sign_bytes + 1; d0 <= dest.regno_msb; d0++)
{
if (AVR_HAVE_MOVW && movw
&& d0 % 2 == 0 && d0 + 1 <= dest.regno_msb)
{
xop[2] = all_regs_rtx[d0];
xop[3] = movw;
avr_asm_len ("movw %2,%3", xop, plen, 1);
d0++;
}
else
{
avr_asm_len (sign_extend ? "sbc %0,%0" : "clr %0",
&all_regs_rtx[d0], plen, 1);
if (++copies >= 2 && !movw && d0 % 2 == 1)
movw = all_regs_rtx[d0-1];
}
} /* for */
/* Step 4: Right shift the destination. This might be needed for
====== conversions from unsigned to signed. */
if (shift == ASHIFTRT)
{
const char *code_ashiftrt = "lsr %0";
if (sign_extend || msb_in_carry)
code_ashiftrt = "ror %0";
if (src.sbit && src.ibyte == dest.ibyte)
code_ashiftrt = "asr %0";
for (d0 = dest.regno_msb - sign_bytes;
d0 >= dest.regno + zero_bytes - 1 && d0 >= dest.regno; d0--)
{
avr_asm_len (code_ashiftrt, &all_regs_rtx[d0], plen, 1);
code_ashiftrt = "ror %0";
}
}
#undef MAY_CLOBBER
return "";
}
/* Create RTL split patterns for byte sized rotate expressions. This /* Create RTL split patterns for byte sized rotate expressions. This
produces a series of move instructions and considers overlap situations. produces a series of move instructions and considers overlap situations.
Overlapping non-HImode operands need a scratch register. */ Overlapping non-HImode operands need a scratch register. */
...@@ -7123,348 +7449,6 @@ avr_rotate_bytes (rtx operands[]) ...@@ -7123,348 +7449,6 @@ avr_rotate_bytes (rtx operands[])
} }
/* Outputs instructions needed for fixed point type conversion.
This includes converting between any fixed point type, as well
as converting to any integer type. Conversion between integer
types is not supported.
The number of instructions generated depends on the types
being converted and the registers assigned to them.
The number of instructions required to complete the conversion
is least if the registers for source and destination are overlapping
and are aligned at the decimal place as actual movement of data is
completely avoided. In some cases, the conversion may already be
complete without any instructions needed.
When converting to signed types from signed types, sign extension
is implemented.
Converting signed fractional types requires a bit shift if converting
to or from any unsigned fractional type because the decimal place is
shifted by 1 bit. When the destination is a signed fractional, the sign
is stored in either the carry or T bit. */
const char*
avr_out_fract (rtx insn, rtx operands[], bool intsigned, int *plen)
{
int i;
bool sbit[2];
/* ilen: Length of integral part (in bytes)
flen: Length of fractional part (in bytes)
tlen: Length of operand (in bytes)
blen: Length of operand (in bits) */
int ilen[2], flen[2], tlen[2], blen[2];
int rdest, rsource, offset;
int start, end, dir;
bool sign_in_T = false, sign_in_Carry = false, sign_done = false;
bool widening_sign_extend = false;
int clrword = -1, lastclr = 0, clr = 0;
rtx xop[6];
const int dest = 0;
const int src = 1;
xop[dest] = operands[dest];
xop[src] = operands[src];
if (plen)
*plen = 0;
/* Determine format (integer and fractional parts)
of types needing conversion. */
for (i = 0; i < 2; i++)
{
enum machine_mode mode = GET_MODE (xop[i]);
tlen[i] = GET_MODE_SIZE (mode);
blen[i] = GET_MODE_BITSIZE (mode);
if (SCALAR_INT_MODE_P (mode))
{
sbit[i] = intsigned;
ilen[i] = GET_MODE_SIZE (mode);
flen[i] = 0;
}
else if (ALL_SCALAR_FIXED_POINT_MODE_P (mode))
{
sbit[i] = SIGNED_SCALAR_FIXED_POINT_MODE_P (mode);
ilen[i] = (GET_MODE_IBIT (mode) + 1) / 8;
flen[i] = (GET_MODE_FBIT (mode) + 1) / 8;
}
else
fatal_insn ("unsupported fixed-point conversion", insn);
}
/* Perform sign extension if source and dest are both signed,
and there are more integer parts in dest than in source. */
widening_sign_extend = sbit[dest] && sbit[src] && ilen[dest] > ilen[src];
rdest = REGNO (xop[dest]);
rsource = REGNO (xop[src]);
offset = flen[src] - flen[dest];
/* Position of MSB resp. sign bit. */
xop[2] = GEN_INT (blen[dest] - 1);
xop[3] = GEN_INT (blen[src] - 1);
/* Store the sign bit if the destination is a signed fract and the source
has a sign in the integer part. */
if (sbit[dest] && ilen[dest] == 0 && sbit[src] && ilen[src] > 0)
{
/* To avoid using BST and BLD if the source and destination registers
overlap or the source is unused after, we can use LSL to store the
sign bit in carry since we don't need the integral part of the source.
Restoring the sign from carry saves one BLD instruction below. */
if (reg_unused_after (insn, xop[src])
|| (rdest < rsource + tlen[src]
&& rdest + tlen[dest] > rsource))
{
avr_asm_len ("lsl %T1%t3", xop, plen, 1);
sign_in_Carry = true;
}
else
{
avr_asm_len ("bst %T1%T3", xop, plen, 1);
sign_in_T = true;
}
}
/* Pick the correct direction to shift bytes. */
if (rdest < rsource + offset)
{
dir = 1;
start = 0;
end = tlen[dest];
}
else
{
dir = -1;
start = tlen[dest] - 1;
end = -1;
}
/* Perform conversion by moving registers into place, clearing
destination registers that do not overlap with any source. */
for (i = start; i != end; i += dir)
{
int destloc = rdest + i;
int sourceloc = rsource + i + offset;
/* Source register location is outside range of source register,
so clear this byte in the dest. */
if (sourceloc < rsource
|| sourceloc >= rsource + tlen[src])
{
if (AVR_HAVE_MOVW
&& i + dir != end
&& (sourceloc + dir < rsource
|| sourceloc + dir >= rsource + tlen[src])
&& ((dir == 1 && !(destloc % 2) && !(sourceloc % 2))
|| (dir == -1 && (destloc % 2) && (sourceloc % 2)))
&& clrword != -1)
{
/* Use already cleared word to clear two bytes at a time. */
int even_i = i & ~1;
int even_clrword = clrword & ~1;
xop[4] = GEN_INT (8 * even_i);
xop[5] = GEN_INT (8 * even_clrword);
avr_asm_len ("movw %T0%t4,%T0%t5", xop, plen, 1);
i += dir;
}
else
{
if (i == tlen[dest] - 1
&& widening_sign_extend
&& blen[src] - 1 - 8 * offset < 0)
{
/* The SBRC below that sign-extends would come
up with a negative bit number because the sign
bit is out of reach. ALso avoid some early-clobber
situations because of premature CLR. */
if (reg_unused_after (insn, xop[src]))
avr_asm_len ("lsl %T1%t3" CR_TAB
"sbc %T0%t2,%T0%t2", xop, plen, 2);
else
avr_asm_len ("mov __tmp_reg__,%T1%t3" CR_TAB
"lsl __tmp_reg__" CR_TAB
"sbc %T0%t2,%T0%t2", xop, plen, 3);
sign_done = true;
continue;
}
/* Do not clear the register if it is going to get
sign extended with a MOV later. */
if (sbit[dest] && sbit[src]
&& i != tlen[dest] - 1
&& i >= flen[dest])
{
continue;
}
xop[4] = GEN_INT (8 * i);
avr_asm_len ("clr %T0%t4", xop, plen, 1);
/* If the last byte was cleared too, we have a cleared
word we can MOVW to clear two bytes at a time. */
if (lastclr)
clrword = i;
clr = 1;
}
}
else if (destloc == sourceloc)
{
/* Source byte is already in destination: Nothing needed. */
continue;
}
else
{
/* Registers do not line up and source register location
is within range: Perform move, shifting with MOV or MOVW. */
if (AVR_HAVE_MOVW
&& i + dir != end
&& sourceloc + dir >= rsource
&& sourceloc + dir < rsource + tlen[src]
&& ((dir == 1 && !(destloc % 2) && !(sourceloc % 2))
|| (dir == -1 && (destloc % 2) && (sourceloc % 2))))
{
int even_i = i & ~1;
int even_i_plus_offset = (i + offset) & ~1;
xop[4] = GEN_INT (8 * even_i);
xop[5] = GEN_INT (8 * even_i_plus_offset);
avr_asm_len ("movw %T0%t4,%T1%t5", xop, plen, 1);
i += dir;
}
else
{
xop[4] = GEN_INT (8 * i);
xop[5] = GEN_INT (8 * (i + offset));
avr_asm_len ("mov %T0%t4,%T1%t5", xop, plen, 1);
}
}
lastclr = clr;
clr = 0;
}
/* Perform sign extension if source and dest are both signed,
and there are more integer parts in dest than in source. */
if (widening_sign_extend)
{
if (!sign_done)
{
xop[4] = GEN_INT (blen[src] - 1 - 8 * offset);
/* Register was cleared above, so can become 0xff and extended.
Note: Instead of the CLR/SBRC/COM the sign extension could
be performed after the LSL below by means of a SBC if only
one byte has to be shifted left. */
avr_asm_len ("sbrc %T0%T4" CR_TAB
"com %T0%t2", xop, plen, 2);
}
/* Sign extend additional bytes by MOV and MOVW. */
start = tlen[dest] - 2;
end = flen[dest] + ilen[src] - 1;
for (i = start; i != end; i--)
{
if (AVR_HAVE_MOVW && i != start && i-1 != end)
{
i--;
xop[4] = GEN_INT (8 * i);
xop[5] = GEN_INT (8 * (tlen[dest] - 2));
avr_asm_len ("movw %T0%t4,%T0%t5", xop, plen, 1);
}
else
{
xop[4] = GEN_INT (8 * i);
xop[5] = GEN_INT (8 * (tlen[dest] - 1));
avr_asm_len ("mov %T0%t4,%T0%t5", xop, plen, 1);
}
}
}
/* If destination is a signed fract, and the source was not, a shift
by 1 bit is needed. Also restore sign from carry or T. */
if (sbit[dest] && !ilen[dest] && (!sbit[src] || ilen[src]))
{
/* We have flen[src] non-zero fractional bytes to shift.
Because of the right shift, handle one byte more so that the
LSB won't be lost. */
int nonzero = flen[src] + 1;
/* If the LSB is in the T flag and there are no fractional
bits, the high byte is zero and no shift needed. */
if (flen[src] == 0 && sign_in_T)
nonzero = 0;
start = flen[dest] - 1;
end = start - nonzero;
for (i = start; i > end && i >= 0; i--)
{
xop[4] = GEN_INT (8 * i);
if (i == start && !sign_in_Carry)
avr_asm_len ("lsr %T0%t4", xop, plen, 1);
else
avr_asm_len ("ror %T0%t4", xop, plen, 1);
}
if (sign_in_T)
{
avr_asm_len ("bld %T0%T2", xop, plen, 1);
}
}
else if (sbit[src] && !ilen[src] && (!sbit[dest] || ilen[dest]))
{
/* If source was a signed fract and dest was not, shift 1 bit
other way. */
start = flen[dest] - flen[src];
if (start < 0)
start = 0;
for (i = start; i < flen[dest]; i++)
{
xop[4] = GEN_INT (8 * i);
if (i == start)
avr_asm_len ("lsl %T0%t4", xop, plen, 1);
else
avr_asm_len ("rol %T0%t4", xop, plen, 1);
}
}
return "";
}
/* Modifies the length assigned to instruction INSN /* Modifies the length assigned to instruction INSN
LEN is the initially computed length of the insn. */ LEN is the initially computed length of the insn. */
......
2012-11-22 Georg-Johann Lay <avr@gjlay.de>
Adjust decimal point of signed accum mode to GCC default.
PR target/54222
* config/avr/t-avr (LIB1ASMFUNCS): Add _fractsfsq _fractsfusq,
_divqq_helper.
* config/avr/lib1funcs-fixed.S (__fractqqsf, __fracthqsf)
(__fractsasf, __fractsfha, __fractusqsf, __fractsfsa)
(__mulha3, __mulsa3)
(__divqq3, __divha3, __divsa3): Adjust to new position of
decimal point of signed accum types.
(__mulusa3_round): New function.
(__mulusa3): Use it.
(__divqq_helper): New function.
(__udivuqq3): Use it.
2012-11-20 Jakub Jelinek <jakub@redhat.com> 2012-11-20 Jakub Jelinek <jakub@redhat.com>
PR bootstrap/55370 PR bootstrap/55370
......
...@@ -43,8 +43,8 @@ DEFUN __fractqqsf ...@@ -43,8 +43,8 @@ DEFUN __fractqqsf
;; Move in place for SA -> SF conversion ;; Move in place for SA -> SF conversion
clr r22 clr r22
mov r23, r24 mov r23, r24
lsl r23
;; Sign-extend ;; Sign-extend
lsl r24
sbc r24, r24 sbc r24, r24
mov r25, r24 mov r25, r24
XJMP __fractsasf XJMP __fractsasf
...@@ -67,9 +67,8 @@ ENDF __fractuqqsf ...@@ -67,9 +67,8 @@ ENDF __fractuqqsf
DEFUN __fracthqsf DEFUN __fracthqsf
;; Move in place for SA -> SF conversion ;; Move in place for SA -> SF conversion
wmov 22, 24 wmov 22, 24
lsl r22
rol r23
;; Sign-extend ;; Sign-extend
lsl r25
sbc r24, r24 sbc r24, r24
mov r25, r24 mov r25, r24
XJMP __fractsasf XJMP __fractsasf
...@@ -140,11 +139,13 @@ ENDF __fractusqsf ...@@ -140,11 +139,13 @@ ENDF __fractusqsf
#if defined (L_fractsasf) #if defined (L_fractsasf)
DEFUN __fractsasf DEFUN __fractsasf
XCALL __floatsisf XCALL __floatsisf
;; Divide non-zero results by 2^16 to move the ;; Divide non-zero results by 2^15 to move the
;; decimal point into place ;; decimal point into place
cpse r25, __zero_reg__ tst r25
subi r25, exp_hi (16) breq 0f
ret subi r24, exp_lo (15)
sbci r25, exp_hi (15)
0: ret
ENDF __fractsasf ENDF __fractsasf
#endif /* L_fractsasf */ #endif /* L_fractsasf */
...@@ -186,8 +187,9 @@ ENDF __fractsfuqq ...@@ -186,8 +187,9 @@ ENDF __fractsfuqq
#if defined (L_fractsfha) #if defined (L_fractsfha)
DEFUN __fractsfha DEFUN __fractsfha
;; Multiply with 2^24 to get a HA result in r25:r24 ;; Multiply with 2^{16+7} to get a HA result in r25:r24
subi r25, exp_hi (-24) subi r24, exp_lo (-23)
sbci r25, exp_hi (-23)
XJMP __fixsfsi XJMP __fixsfsi
ENDF __fractsfha ENDF __fractsfha
#endif /* L_fractsfha */ #endif /* L_fractsfha */
...@@ -201,8 +203,7 @@ ENDF __fractsfuha ...@@ -201,8 +203,7 @@ ENDF __fractsfuha
#endif /* L_fractsfuha */ #endif /* L_fractsfuha */
#if defined (L_fractsfhq) #if defined (L_fractsfhq)
DEFUN __fractsfsq FALIAS __fractsfsq
ENDF __fractsfsq
DEFUN __fractsfhq DEFUN __fractsfhq
;; Multiply with 2^{16+15} to get a HQ result in r25:r24 ;; Multiply with 2^{16+15} to get a HQ result in r25:r24
...@@ -214,8 +215,7 @@ ENDF __fractsfhq ...@@ -214,8 +215,7 @@ ENDF __fractsfhq
#endif /* L_fractsfhq */ #endif /* L_fractsfhq */
#if defined (L_fractsfuhq) #if defined (L_fractsfuhq)
DEFUN __fractsfusq FALIAS __fractsfusq
ENDF __fractsfusq
DEFUN __fractsfuhq DEFUN __fractsfuhq
;; Multiply with 2^{16+16} to get a UHQ result in r25:r24 ;; Multiply with 2^{16+16} to get a UHQ result in r25:r24
...@@ -227,8 +227,9 @@ ENDF __fractsfuhq ...@@ -227,8 +227,9 @@ ENDF __fractsfuhq
#if defined (L_fractsfsa) #if defined (L_fractsfsa)
DEFUN __fractsfsa DEFUN __fractsfsa
;; Multiply with 2^16 to get a SA result in r25:r22 ;; Multiply with 2^15 to get a SA result in r25:r22
subi r25, exp_hi (-16) subi r24, exp_lo (-15)
sbci r25, exp_hi (-15)
XJMP __fixsfsi XJMP __fixsfsi
ENDF __fractsfsa ENDF __fractsfsa
#endif /* L_fractsfsa */ #endif /* L_fractsfsa */
...@@ -325,6 +326,9 @@ ENDF __muluhq3 ...@@ -325,6 +326,9 @@ ENDF __muluhq3
;;; Rounding: -0.5 LSB <= error <= 0.5 LSB ;;; Rounding: -0.5 LSB <= error <= 0.5 LSB
DEFUN __mulha3 DEFUN __mulha3
XCALL __mulhisi3 XCALL __mulhisi3
lsl r22
rol r23
rol r24
XJMP __muluha3_round XJMP __muluha3_round
ENDF __mulha3 ENDF __mulha3
#endif /* L_mulha3 */ #endif /* L_mulha3 */
...@@ -359,6 +363,9 @@ ENDF __muluha3_round ...@@ -359,6 +363,9 @@ ENDF __muluha3_round
Fixed Multiplication 16.16 x 16.16 Fixed Multiplication 16.16 x 16.16
*******************************************************/ *******************************************************/
;; Bits outside the result (below LSB), used in the signed version
#define GUARD __tmp_reg__
#if defined (__AVR_HAVE_MUL__) #if defined (__AVR_HAVE_MUL__)
;; Multiplier ;; Multiplier
...@@ -381,9 +388,16 @@ ENDF __muluha3_round ...@@ -381,9 +388,16 @@ ENDF __muluha3_round
#if defined (L_mulusa3) #if defined (L_mulusa3)
;;; (C3:C0) = (A3:A0) * (B3:B0) ;;; (C3:C0) = (A3:A0) * (B3:B0)
;;; Clobbers: __tmp_reg__
;;; Rounding: -0.5 LSB < error <= 0.5 LSB
DEFUN __mulusa3 DEFUN __mulusa3
set
;; Fallthru
ENDF __mulusa3
;;; Round for last digit iff T = 1
;;; Return guard bits in GUARD (__tmp_reg__).
;;; Rounding, T = 0: -1.0 LSB < error <= 0 LSB
;;; Rounding, T = 1: -0.5 LSB < error <= 0.5 LSB
DEFUN __mulusa3_round
;; Some of the MUL instructions have LSBs outside the result. ;; Some of the MUL instructions have LSBs outside the result.
;; Don't ignore these LSBs in order to tame rounding error. ;; Don't ignore these LSBs in order to tame rounding error.
;; Use C2/C3 for these LSBs. ;; Use C2/C3 for these LSBs.
...@@ -395,9 +409,12 @@ DEFUN __mulusa3 ...@@ -395,9 +409,12 @@ DEFUN __mulusa3
mul A1, B0 $ add C3, r0 $ adc C0, r1 mul A1, B0 $ add C3, r0 $ adc C0, r1
mul A0, B1 $ add C3, r0 $ adc C0, r1 $ rol C1 mul A0, B1 $ add C3, r0 $ adc C0, r1 $ rol C1
;; Round ;; Round if T = 1. Store guarding bits outside the result for rounding
;; and left-shift by the signed version (function below).
brtc 0f
sbrc C3, 7 sbrc C3, 7
adiw C0, 1 adiw C0, 1
0: push C3
;; The following MULs don't have LSBs outside the result. ;; The following MULs don't have LSBs outside the result.
;; C2/C3 is the high part. ;; C2/C3 is the high part.
...@@ -420,25 +437,42 @@ DEFUN __mulusa3 ...@@ -420,25 +437,42 @@ DEFUN __mulusa3
mul A2, B3 $ add C3, r0 mul A2, B3 $ add C3, r0
mul A3, B2 $ add C3, r0 mul A3, B2 $ add C3, r0
;; Guard bits used in the signed version below.
pop GUARD
clr __zero_reg__ clr __zero_reg__
ret ret
ENDF __mulusa3 ENDF __mulusa3_round
#endif /* L_mulusa3 */ #endif /* L_mulusa3 */
#if defined (L_mulsa3) #if defined (L_mulsa3)
;;; (C3:C0) = (A3:A0) * (B3:B0) ;;; (C3:C0) = (A3:A0) * (B3:B0)
;;; Clobbers: __tmp_reg__ ;;; Clobbers: __tmp_reg__, T
;;; Rounding: -0.5 LSB <= error <= 0.5 LSB ;;; Rounding: -0.5 LSB <= error <= 0.5 LSB
DEFUN __mulsa3 DEFUN __mulsa3
XCALL __mulusa3 clt
XCALL __mulusa3_round
;; A posteriori sign extension of the operands
tst B3 tst B3
brpl 1f brpl 1f
sub C2, A0 sub C2, A0
sbc C3, A1 sbc C3, A1
1: sbrs A3, 7 1: sbrs A3, 7
ret rjmp 2f
sub C2, B0 sub C2, B0
sbc C3, B1 sbc C3, B1
2:
;; Shift 1 bit left to adjust for 15 fractional bits
lsl GUARD
rol C0
rol C1
rol C2
rol C3
;; Round last digit
lsl GUARD
adc C0, __zero_reg__
adc C1, __zero_reg__
adc C2, __zero_reg__
adc C3, __zero_reg__
ret ret
ENDF __mulsa3 ENDF __mulsa3
#endif /* L_mulsa3 */ #endif /* L_mulsa3 */
...@@ -492,27 +526,56 @@ ENDF __mulsa3 ...@@ -492,27 +526,56 @@ ENDF __mulsa3
DEFUN __mulsa3 DEFUN __mulsa3
push B0 push B0
push B1 push B1
bst B3, 7 push B3
XCALL __mulusa3 clt
;; A survived in 31:30:27:26 XCALL __mulusa3_round
rcall 1f pop r30
pop AA1 ;; sign-extend B
pop AA0 bst r30, 7
brtc 1f
;; A1, A0 survived in R27:R26
sub C2, AA0
sbc C3, AA1
1:
pop AA1 ;; B1
pop AA0 ;; B0
;; sign-extend A. A3 survived in R31
bst AA3, 7 bst AA3, 7
1: brtc 9f brtc 2f
;; 1-extend A/B
sub C2, AA0 sub C2, AA0
sbc C3, AA1 sbc C3, AA1
9: ret 2:
;; Shift 1 bit left to adjust for 15 fractional bits
lsl GUARD
rol C0
rol C1
rol C2
rol C3
;; Round last digit
lsl GUARD
adc C0, __zero_reg__
adc C1, __zero_reg__
adc C2, __zero_reg__
adc C3, __zero_reg__
ret
ENDF __mulsa3 ENDF __mulsa3
#endif /* L_mulsa3 */ #endif /* L_mulsa3 */
#if defined (L_mulusa3) #if defined (L_mulusa3)
;;; (R25:R22) *= (R21:R18) ;;; (R25:R22) *= (R21:R18)
;;; Clobbers: ABI, called by optabs and __mulsua ;;; Clobbers: ABI, called by optabs
;;; Rounding: -1 LSB <= error <= 1 LSB ;;; Rounding: -1 LSB <= error <= 1 LSB
;;; Does not clobber T and A[] survives in 26, 27, 30, 31
DEFUN __mulusa3 DEFUN __mulusa3
set
;; Fallthru
ENDF __mulusa3
;;; A[] survives in 26, 27, 30, 31
;;; Also used by __mulsa3 with T = 0
;;; Round if T = 1
;;; Return Guard bits in GUARD (__tmp_reg__), used by signed version.
DEFUN __mulusa3_round
push CC2 push CC2
push CC3 push CC3
; clear result ; clear result
...@@ -560,21 +623,26 @@ DEFUN __mulusa3 ...@@ -560,21 +623,26 @@ DEFUN __mulusa3
sbci B0, 0 sbci B0, 0
brne 5b brne 5b
;; Move result into place and round ;; Save guard bits and set carry for rounding
push B3
lsl B3 lsl B3
;; Move result into place
wmov C2, CC2 wmov C2, CC2
wmov C0, CC0 wmov C0, CC0
clr __zero_reg__ clr __zero_reg__
brtc 6f
;; Round iff T = 1
adc C0, __zero_reg__ adc C0, __zero_reg__
adc C1, __zero_reg__ adc C1, __zero_reg__
adc C2, __zero_reg__ adc C2, __zero_reg__
adc C3, __zero_reg__ adc C3, __zero_reg__
6:
pop GUARD
;; Epilogue ;; Epilogue
pop CC3 pop CC3
pop CC2 pop CC2
ret ret
ENDF __mulusa3 ENDF __mulusa3_round
#endif /* L_mulusa3 */ #endif /* L_mulusa3 */
#undef A0 #undef A0
...@@ -600,6 +668,8 @@ ENDF __mulusa3 ...@@ -600,6 +668,8 @@ ENDF __mulusa3
#endif /* __AVR_HAVE_MUL__ */ #endif /* __AVR_HAVE_MUL__ */
#undef GUARD
/******************************************************* /*******************************************************
Fractional Division 8 / 8 Fractional Division 8 / 8
*******************************************************/ *******************************************************/
...@@ -607,30 +677,38 @@ ENDF __mulusa3 ...@@ -607,30 +677,38 @@ ENDF __mulusa3
#define r_divd r25 /* dividend */ #define r_divd r25 /* dividend */
#define r_quo r24 /* quotient */ #define r_quo r24 /* quotient */
#define r_div r22 /* divisor */ #define r_div r22 /* divisor */
#define r_sign __tmp_reg__
#if defined (L_divqq3) #if defined (L_divqq3)
DEFUN __divqq3 DEFUN __divqq3
mov r0, r_divd mov r_sign, r_divd
eor r0, r_div eor r_sign, r_div
sbrc r_div, 7 sbrc r_div, 7
neg r_div neg r_div
sbrc r_divd, 7 sbrc r_divd, 7
neg r_divd neg r_divd
cp r_divd, r_div XCALL __divqq_helper
breq __divqq3_minus1 ; if equal return -1
XCALL __udivuqq3
lsr r_quo lsr r_quo
sbrc r0, 7 ; negate result if needed sbrc r_sign, 7 ; negate result if needed
neg r_quo neg r_quo
ret ret
__divqq3_minus1:
ldi r_quo, 0x80
ret
ENDF __divqq3 ENDF __divqq3
#endif /* defined (L_divqq3) */ #endif /* L_divqq3 */
#if defined (L_udivuqq3) #if defined (L_udivuqq3)
DEFUN __udivuqq3 DEFUN __udivuqq3
cp r_divd, r_div
brsh 0f
XJMP __divqq_helper
;; Result is out of [0, 1) ==> Return 1 - eps.
0: ldi r_quo, 0xff
ret
ENDF __udivuqq3
#endif /* L_udivuqq3 */
#if defined (L_divqq_helper)
DEFUN __divqq_helper
clr r_quo ; clear quotient clr r_quo ; clear quotient
inc __zero_reg__ ; init loop counter, used per shift inc __zero_reg__ ; init loop counter, used per shift
__udivuqq3_loop: __udivuqq3_loop:
...@@ -649,12 +727,13 @@ __udivuqq3_cont: ...@@ -649,12 +727,13 @@ __udivuqq3_cont:
com r_quo ; complement result com r_quo ; complement result
; because C flag was complemented in loop ; because C flag was complemented in loop
ret ret
ENDF __udivuqq3 ENDF __divqq_helper
#endif /* defined (L_udivuqq3) */ #endif /* L_divqq_helper */
#undef r_divd #undef r_divd
#undef r_quo #undef r_quo
#undef r_div #undef r_div
#undef r_sign
/******************************************************* /*******************************************************
...@@ -746,6 +825,8 @@ DEFUN __divha3 ...@@ -746,6 +825,8 @@ DEFUN __divha3
NEG2 r_divdL NEG2 r_divdL
2: 2:
XCALL __udivuha3 XCALL __udivuha3
lsr r_quoH ; adjust to 7 fractional bits
ror r_quoL
sbrs r0, 7 ; negate result if needed sbrs r0, 7 ; negate result if needed
ret ret
NEG2 r_quoL NEG2 r_quoL
...@@ -806,6 +887,10 @@ DEFUN __divsa3 ...@@ -806,6 +887,10 @@ DEFUN __divsa3
NEG4 r_arg1L NEG4 r_arg1L
2: 2:
XCALL __udivusa3 XCALL __udivusa3
lsr r_quoHH ; adjust to 15 fractional bits
ror r_quoHL
ror r_quoH
ror r_quoL
sbrs r0, 7 ; negate result if needed sbrs r0, 7 ; negate result if needed
ret ret
;; negate r_quoL ;; negate r_quoL
...@@ -1024,8 +1109,8 @@ DEFUN __usadd_8 ...@@ -1024,8 +1109,8 @@ DEFUN __usadd_8
XCALL __adddi3 XCALL __adddi3
brcs 0f brcs 0f
ret ret
;; A[] = 0xffffffff 0: ;; A[] = 0xffffffff
0: XJMP __sbc_8 XJMP __sbc_8
ENDF __usadd_8 ENDF __usadd_8
#endif /* L_usadd_8 */ #endif /* L_usadd_8 */
...@@ -1038,8 +1123,8 @@ DEFUN __ussub_8 ...@@ -1038,8 +1123,8 @@ DEFUN __ussub_8
XCALL __subdi3 XCALL __subdi3
brcs 0f brcs 0f
ret ret
;; A[] = 0 0: ;; A[] = 0
0: XJMP __clr_8 XJMP __clr_8
ENDF __ussub_8 ENDF __ussub_8
#endif /* L_ussub_8 */ #endif /* L_ussub_8 */
...@@ -1049,9 +1134,9 @@ FALIAS __ssaddda3 ...@@ -1049,9 +1134,9 @@ FALIAS __ssaddda3
FALIAS __ssadddq3 FALIAS __ssadddq3
DEFUN __ssadd_8 DEFUN __ssadd_8
;; A = (B >= 0) ? INT64_MAX : INT64_MIN
XCALL __adddi3 XCALL __adddi3
brvc 0f brvc 0f
;; A = (B >= 0) ? INT64_MAX : INT64_MIN
cpi B7, 0x80 cpi B7, 0x80
XCALL __sbc_8 XCALL __sbc_8
subi A7, 0x80 subi A7, 0x80
......
...@@ -64,12 +64,12 @@ LIB1ASMFUNCS += \ ...@@ -64,12 +64,12 @@ LIB1ASMFUNCS += \
\ \
_fractsfqq _fractsfuqq \ _fractsfqq _fractsfuqq \
_fractsfhq _fractsfuhq _fractsfha _fractsfuha \ _fractsfhq _fractsfuhq _fractsfha _fractsfuha \
_fractsfsa _fractsfusa \ _fractsfsq _fractsfusq _fractsfsa _fractsfusa \
_mulqq3 \ _mulqq3 \
_mulhq3 _muluhq3 \ _mulhq3 _muluhq3 \
_mulha3 _muluha3 _muluha3_round \ _mulha3 _muluha3 _muluha3_round \
_mulsa3 _mulusa3 \ _mulsa3 _mulusa3 \
_divqq3 _udivuqq3 \ _divqq3 _udivuqq3 _divqq_helper \
_divhq3 _udivuhq3 \ _divhq3 _udivuhq3 \
_divha3 _udivuha3 \ _divha3 _udivuha3 \
_divsa3 _udivusa3 \ _divsa3 _udivusa3 \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment