Commit d38a64b4 by Joern Rennecke Committed by Joern Rennecke

config.host (arc*-*-elf*, [...]): New configurations.

2013-10-01  Joern Rennecke  <joern.rennecke@embecosm.com>
            Brendan Kehoe  <brendan@zen.org>
            Simon Cook  <simon.cook@embecosm.com>

        * config.host (arc*-*-elf*, arc*-*-linux-uclibc*): New configurations.
        * config/arc: New directory.
        * longlong.h [__arc__] (umul_ppmm): Remove.
        [__arc__] (__umulsidi3): Define.
        [__arc__ && __ARC_NORM__] (count_leading_zeroes): Define.
        [__arc__ && __ARC_NORM__] (COUNT_LEADING_ZEROS_0): Likewise.

Co-Authored-By: Brendan Kehoe <brendan@zen.org>
Co-Authored-By: Simon Cook <simon.cook@embecosm.com>

From-SVN: r203073
parent 526b7aee
2013-10-01 Joern Rennecke <joern.rennecke@embecosm.com>
Brendan Kehoe <brendan@zen.org>
Simon Cook <simon.cook@embecosm.com>
* config.host (arc*-*-elf*, arc*-*-linux-uclibc*): New configurations.
* config/arc: New directory.
* longlong.h [__arc__] (umul_ppmm): Remove.
[__arc__] (__umulsidi3): Define.
[__arc__ && __ARC_NORM__] (count_leading_zeroes): Define.
[__arc__ && __ARC_NORM__] (COUNT_LEADING_ZEROS_0): Likewise.
2013-09-17 Jacek Caban <jacek@codeweavers.com> 2013-09-17 Jacek Caban <jacek@codeweavers.com>
* config/i386/gthr-win32.c: CreateSemaphoreW instead of * config/i386/gthr-win32.c: CreateSemaphoreW instead of
......
...@@ -91,6 +91,9 @@ alpha*-*-*) ...@@ -91,6 +91,9 @@ alpha*-*-*)
am33_2.0-*-linux*) am33_2.0-*-linux*)
cpu_type=mn10300 cpu_type=mn10300
;; ;;
arc*-*-*)
cpu_type=arc
;;
arm*-*-*) arm*-*-*)
cpu_type=arm cpu_type=arm
;; ;;
...@@ -315,6 +318,14 @@ alpha*-dec-*vms*) ...@@ -315,6 +318,14 @@ alpha*-dec-*vms*)
extra_parts="$extra_parts vms-dwarf2.o vms-dwarf2eh.o" extra_parts="$extra_parts vms-dwarf2.o vms-dwarf2eh.o"
md_unwind_header=alpha/vms-unwind.h md_unwind_header=alpha/vms-unwind.h
;; ;;
arc*-*-elf*)
tmake_file="arc/t-arc-newlib arc/t-arc"
extra_parts="crti.o crtn.o crtend.o crtbegin.o crtendS.o crtbeginS.o libgmon.a crtg.o crtgend.o"
;;
arc*-*-linux-uclibc*)
tmake_file="${tmake_file} t-slibgcc-libgcc t-slibgcc-nolc-override arc/t-arc700-uClibc arc/t-arc"
extra_parts="crti.o crtn.o crtend.o crtbegin.o crtendS.o crtbeginS.o libgmon.a crtg.o crtgend.o"
;;
arm-wrs-vxworks) arm-wrs-vxworks)
tmake_file="$tmake_file arm/t-arm arm/t-vxworks t-softfp-sfdf t-softfp-excl arm/t-softfp t-softfp" tmake_file="$tmake_file arm/t-arm arm/t-vxworks t-softfp-sfdf t-softfp-excl arm/t-softfp t-softfp"
extra_parts="$extra_parts crti.o crtn.o" extra_parts="$extra_parts crti.o crtn.o"
......
/* Assembler macros for the Synopsys DesignWare ARC CPU.
Copyright (C) 1994, 1995, 1997, 2004, 2007-2012
Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#define FUNC(X) .type X,@function
#define ENDFUNC(X) .size X, .-X
/* Code to start and stop profiling for the Synopsys DesignWare ARC CPU.
Copyright (C) 1994, 1995, 1997, 2004, 2007-2012
Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
.section .init
.global _init
.global _fini
.global __monstartup
mov_s r0,_init
mov_s r1,_fini
jl __monstartup
.section .__arc_profile_desc, "a"
.global __arc_profile_desc_secstart
.balign 4
__arc_profile_desc_secstart:
.section .__arc_profile_forward, "a"
.global __arc_profile_forward_secstart
.balign 4
__arc_profile_forward_secstart:
.section .__arc_profile_counters, "aw"
.global __arc_profile_counters_secstart
.balign 4
__arc_profile_counters_secstart:
.section .fini
.global _mcleanup
jl _mcleanup
/* Code to start and stop profiling for the Synopsys DesignWare ARC CPU.
Copyright (C) 1994, 1995, 1997, 2004, 2007-2012
Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
.section .__arc_profile_desc, "a"
.global __arc_profile_desc_secend
__arc_profile_desc_secend:
.section .__arc_profile_forward, "a"
.global __arc_profile_forward_secend
__arc_profile_forward_secend:
/* .fini/.init stack frame setup for the Synopsys DesignWare ARC CPU.
Copyright (C) 1994, 1995, 1997, 2004, 2007-2012
Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
# This file contains the stack frame setup for contents of the .fini and
# .init sections.
.section .init
.global _init
.word 0
_init:
push_s blink
.section .fini
.global _fini
.word 0
_fini:
push_s blink
/* Ensure .fini/.init return for the Synopsys DesignWare ARC CPU.
Copyright (C) 1994, 1995, 1997, 2004, 2007-2012
Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
# This file just makes sure that the .fini and .init sections do in
# fact return. This file is the last thing linked into any executable.
.section .init
pop_s blink
j_s [blink]
.section .fini
pop_s blink
j_s [blink]
/* Copyright (C) 2004, 2006, 2007-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 3, or (at your option) any
later version.
In addition to the permissions in the GNU General Public License, the
Free Software Foundation gives you unlimited permission to link the
compiled version of this file into combinations with other programs,
and to distribute those combinations without any restriction coming
from the use of this file. (The General Public License restrictions
do apply in other respects; for example, they cover modification of
the file, and distribution when not linked into a combine
executable.)
This file is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
/* Calculate division table for ARC700 integer division
Contributed by Joern Rennecke
joern.rennecke@arc.com */
#include <stdio.h>
#include <math.h>
int
main ()
{
int i, j;
unsigned x;
double q, r, err, max_err = -1;
puts("/* This table has been generated by divtab-arc700.c. */");
puts("\
/* 1/512 .. 1/256, normalized. There is a leading 1 in bit 31.\n\
For powers of two, we list unnormalized numbers instead. The values\n\
for powers of 2 are loaded, but not used. The value for 1 is actually\n\
the first instruction after .Lmuldiv. */\n\
.balign 4");
puts (".Ldivtab:\n");
for (i = 256; i >= 2; --i)
{
j = i < 0 ? -i : i;
if (j & (j-1))
while (j < 128)
j += j;
else
/* Power of two. */
j *= 128;
q = 4.*(1<<30)*128/j;
r = ceil (q);
printf ("\t.long\t0x%X\n", (unsigned) r);
err = r - q;
if (err > max_err)
max_err = err;
}
#if 0
printf ("\t/* maximum error: %f */\n", max_err);
#endif
exit (0);
}
/* Copyright (C) 2007-2013 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* This file selects the double-precision parts of fp-bit.c that are
still needed for some ARC hardware variants; it also renames functions
that duplicate asm-coded functionality so that their results can be
used to compare with the optimized versions for debugging. */
#define FINE_GRAINED_LIBRARIES
#define ARC_DP_DEBUG 1
#if !defined (__ARC_NORM__) || ARC_DP_DEBUG
#define L_pack_df
#define L_unpack_df
#define L_make_df
#define L_thenan_df
#define L_sf_to_df
#endif
#ifndef __ARC_NORM__
#define L_addsub_df
#elif ARC_DP_DEBUG
#define L_addsub_df
#define __adddf3 __adddf3_c
#define __subdf3 __subdf3_c
#endif
#ifndef __ARC_NORM__
#define L_mul_df
#define L_div_df
#elif (!defined (__ARC700__) && !defined (__ARC_MUL64__) \
&& !defined(__ARC_MUL32BY16__))
#define L_mul_df
#define L_div_df
#undef QUIET_NAN
#define QUIET_NAN 0xfffffffffffffLL
#elif ARC_DP_DEBUG
#define L_mul_df
#define __muldf3 __muldf3_c
#define L_div_df
#define __divdf3 __divdf3_c
#endif
#ifndef __ARC_NORM__
#define L_df_to_sf
#define L_si_to_df
#define L_df_to_si
#define L_tf_to_usi /* need to defined this instead of df_to_usi */
#define L_usi_to_df
#elif ARC_DP_DEBUG
#define L_df_to_sf
#define __truncdfsf2 __truncdfsf2_c
#define L_si_to_df
#define __floatsidf __floatsidf_c
#define L_df_to_si
#define __fixdfsi __fixdfsi_c
#define L_tf_to_usi
#define __fixunsdfsi __fixunsdfsi_c
#define L_usi_to_df
#define __floatunsidf __floatunsidf_c
#endif
#ifndef __ARC_NORM__
#define L_fpcmp_parts_df
#define L_compare_df
#define L_eq_df
#define L_ne_df
#define L_gt_df
#define L_ge_df
#define L_lt_df
#define L_le_df
#define L_unord_df
#define L_negate_df
#elif ARC_DP_DEBUG
#define L_fpcmp_parts_df
#define L_eq_df
#define __eqdf2 __eqdf2_c
#define L_gt_df
#define __gtdf2 __gtdf2_c
#define L_ge_df
#define __gedf2 __gedf2_c
#define L_unord_df
#define __unorddf2 __unorddf2_c
#endif
/* Copyright (C) 2007-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* This file selects the single-precision parts of fp-bit.c that are
still needed for some ARC hardware variants; it also renames functions
that duplicate asm-coded functionality so that their results can be
used to compare with the optimized versions for debugging. */
#define ARC_FP_DEBUG 1
#define FINE_GRAINED_LIBRARIES
#if !defined (__ARC_NORM__) || ARC_FP_DEBUG
#define L_pack_sf
#define L_unpack_sf
#define L_make_sf
#define L_thenan_sf
#endif
#ifndef __ARC_NORM__
#define L_addsub_sf
#define L_mul_sf
#define L_div_sf
#define L_sf_to_df
#define L_si_to_sf
#define L_sf_to_si
#define L_usi_to_sf
#elif ARC_FP_DEBUG
#define L_addsub_sf
#define __addsf3 __addsf3_c
#define __subsf3 __subsf3_c
#define L_mul_sf
#define __mulsf3 __mulsf3_c
#define L_div_sf
#define __divsf3 __divsf3_c
#define L_sf_to_df
#define __extendsfdf2 __extendsfdf2_c
#define L_si_to_sf
#define __floatsisf __floatsisf_c
#define L_sf_to_si
#define __fixsfsi __fixsfsi_c
#define L_usi_to_sf
#define __floatunsisf __floatunsisf_c
#endif
#ifndef __ARC_NORM__
#define L_fpcmp_parts_sf
#define L_compare_sf
#define L_eq_sf
#define L_ne_sf
#define L_gt_sf
#define L_ge_sf
#define L_lt_sf
#define L_le_sf
#define L_unord_sf
#define L_negate_sf
#elif ARC_FP_DEBUG
#define L_fpcmp_parts_sf
#define L_eq_sf
#define __eqsf2 __eqsf2_c
#define L_gt_sf
#define __gtsf2 __gtsf2_c
#define L_ge_sf
#define __gesf2 __gesf2_c
#define L_unord_sf
#define __unordsf2 __unordsf2_c
#endif
/* Copyright (C) 2007-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* File deliberately left blank. */
/* Copyright (C) 2007-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#define LP_START 0x02
#define LP_END 0x03
#define IDENTITY 0x04
#define STATUS32 0x0a
#define COUNT0 0x21 /* Timer 0 count */
#define CONTROL0 0x22 /* Timer 0 control */
#define LIMIT0 0x23 /* Timer 0 limit */
#define INT_VECTOR_BASE 0x25
#define D_CACHE_BUILD 0x72
#define DC_FLDL 0x4c
/* This file contains code to do profiling.
Copyright (C) 2007-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "../asm.h"
#include "auxreg.h"
/* This file contains code to do profiling. */
.weak __profile_timer_cycles
.global __profile_timer_cycles
.set __profile_timer_cycles, 200
.text
; For Arctangent-A5, if no data cache is present, a read of the
; cache build register returns the ID register. For ARC600 and
; later, the version field will be zero.
.global __dcache_linesz
.balign 4
__dcache_linesz:
lr r12,[D_CACHE_BUILD]
extb_s r0,r12
breq_s r0,0,.Lsz_nocache
brge r0,0x20,.Lsz_havecache
lr r0,[IDENTITY]
breq r12,r0,.Lsz_nocache
.Lsz_havecache:
lsr_s r12,r12,16
mov_s r0,16
bmsk_s r12,r12,3
asl_s r0,r0,r12
j_s [blink]
.Lsz_nocache:
mov_s r0,1
j_s [blink]
/*-
* Copyright (c) 1983, 1992, 1993
* The Regents of the University of California. All rights reserved.
* Copyright (C) 2007-2012 Free Software Foundation, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if 0
#include <sys/param.h>
#include <sys/time.h>
#endif
#include <sys/gmon.h>
#include <sys/gmon_out.h>
#include <stddef.h>
#include <errno.h>
#include <stdio.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#if 0
#include <libc-internal.h>
#include <not-cancel.h>
#ifdef USE_IN_LIBIO
# include <wchar.h>
#endif
#endif
#define internal_function
#define weak_alias(fun,aliasid) extern __typeof(fun) aliasid __attribute__ ((weak, alias (#fun)));
#define __libc_enable_secure 0
/* Head of basic-block list or NULL. */
struct __bb *__bb_head attribute_hidden;
struct gmonparam _gmonparam attribute_hidden = { GMON_PROF_OFF };
/*
* See profil(2) where this is described:
*/
static int s_scale;
#define SCALE_1_TO_1 0x10000L
#define ERR(s) write (STDERR_FILENO, s, sizeof (s) - 1)
void moncontrol (int mode);
void __moncontrol (int mode);
static void write_hist (int fd) internal_function;
static void write_call_graph (int fd) internal_function;
static void write_bb_counts (int fd) internal_function;
/*
* Control profiling
* profiling is what mcount checks to see if
* all the data structures are ready.
*/
void
__moncontrol (int mode)
{
struct gmonparam *p = &_gmonparam;
/* Don't change the state if we ran into an error. */
if (p->state == GMON_PROF_ERROR)
return;
if (mode)
{
/* start */
__profil((void *) p->kcount, p->kcountsize, p->lowpc, s_scale);
p->state = GMON_PROF_ON;
}
else
{
/* stop */
__profil(NULL, 0, 0, 0);
p->state = GMON_PROF_OFF;
}
}
weak_alias (__moncontrol, moncontrol)
void
__monstartup (u_long lowpc, u_long highpc)
{
register int o;
char *cp;
struct gmonparam *p = &_gmonparam;
int linesz;
/*
* round lowpc and highpc to multiples of the density we're using
* so the rest of the scaling (here and in gprof) stays in ints.
*/
p->lowpc = ROUNDDOWN(lowpc, HISTFRACTION * sizeof(HISTCOUNTER));
if (sizeof *p->froms % sizeof(HISTCOUNTER) != 0)
{
p->highpc = ROUNDUP(highpc, HISTFRACTION * sizeof(HISTCOUNTER));
p->textsize = p->highpc - p->lowpc;
p->kcountsize = ROUNDUP((p->textsize + HISTFRACTION - 1) / HISTFRACTION,
sizeof (*p->froms));
}
else
{
/* Avoid odd scales by rounding up highpc to get kcountsize rounded. */
p->textsize = ROUNDUP (highpc - p->lowpc,
HISTFRACTION * sizeof (*p->froms));
p->highpc = p->lowpc + p->textsize;
p->kcountsize = p->textsize / HISTFRACTION;
}
p->hashfraction = HASHFRACTION;
p->log_hashfraction = -1;
/* The following test must be kept in sync with the corresponding
test in mcount.c. */
if ((HASHFRACTION & (HASHFRACTION - 1)) == 0) {
/* if HASHFRACTION is a power of two, mcount can use shifting
instead of integer division. Precompute shift amount. */
p->log_hashfraction = ffs(p->hashfraction * sizeof(*p->froms)) - 1;
}
p->tolimit = p->textsize * ARCDENSITY / 100;
if (p->tolimit < MINARCS)
p->tolimit = MINARCS;
else if (p->tolimit > MAXARCS)
p->tolimit = MAXARCS;
p->tossize = p->tolimit * sizeof(struct tostruct);
/* p->kcount must not share cache lines with the adjacent data, because
we use uncached accesses while profiling. */
linesz = __dcache_linesz ();
cp = calloc (ROUNDUP (p->kcountsize, linesz) + p->tossize
+ (linesz - 1), 1);
if (! cp)
{
ERR("monstartup: out of memory\n");
p->tos = NULL;
p->state = GMON_PROF_ERROR;
/* In case we loose the error state due to a race,
prevent invalid writes also by clearing tolimit. */
p->tolimit = 0;
return;
}
p->tos = (struct tostruct *)cp;
cp += p->tossize;
cp = (char *) ROUNDUP ((ptrdiff_t) cp, linesz);
p->kcount = (HISTCOUNTER *)cp;
cp += ROUNDUP (p->kcountsize, linesz);
p->tos[0].link = 0;
o = p->highpc - p->lowpc;
if (p->kcountsize < (u_long) o)
{
#ifndef hp300
s_scale = ((float)p->kcountsize / o ) * SCALE_1_TO_1;
#else
/* avoid floating point operations */
int quot = o / p->kcountsize;
if (quot >= 0x10000)
s_scale = 1;
else if (quot >= 0x100)
s_scale = 0x10000 / quot;
else if (o >= 0x800000)
s_scale = 0x1000000 / (o / (p->kcountsize >> 8));
else
s_scale = 0x1000000 / ((o << 8) / p->kcountsize);
#endif
} else
s_scale = SCALE_1_TO_1;
__moncontrol(1);
}
weak_alias (__monstartup, monstartup)
static void
internal_function
write_hist (int fd)
{
u_char tag = GMON_TAG_TIME_HIST;
struct arc_gmon_hist_hdr thdr __attribute__ ((aligned (__alignof__ (char *))));
int r;
if (_gmonparam.kcountsize > 0)
{
*(char **) thdr.low_pc = (char *) _gmonparam.lowpc;
*(char **) thdr.high_pc = (char *) _gmonparam.highpc;
*(int32_t *) thdr.hist_size = (_gmonparam.kcountsize
/ sizeof (HISTCOUNTER));
*(int32_t *) thdr.prof_rate = __profile_frequency ();
strncpy (thdr.dimen, "seconds", sizeof (thdr.dimen));
thdr.dimen_abbrev = 's';
r = write (fd, &tag, sizeof tag);
if (r != sizeof tag)
return;
r = write (fd, &thdr, sizeof thdr);
if (r != sizeof thdr)
return;
r = write (fd,_gmonparam.kcount, _gmonparam.kcountsize);
if ((unsigned) r != _gmonparam.kcountsize)
return;
}
}
static void
internal_function
write_call_graph (int fd)
{
#define NARCS_PER_WRITE 64
#define BYTES_PER_ARC (1 + sizeof (struct gmon_cg_arc_record))
#define BYTES_PER_WRITE (BYTES_PER_ARC * NARCS_PER_WRITE)
ARCINDEX to_index;
u_long frompc, selfpc, count;
char buffer[BYTES_PER_WRITE], *p;
u_long *prof_desc = __arc_profile_desc_secstart;
u_long *prof_count = __arc_profile_counters_secstart;
u_long *prof_desc_end = __arc_profile_desc_secend;
u_long *prof_forward = __arc_profile_forward_secstart;
for (p = buffer; p < buffer + BYTES_PER_WRITE; p += BYTES_PER_ARC)
*p = GMON_TAG_CG_ARC;
p = buffer;
frompc = *prof_desc++ & -2;
while (prof_desc < prof_desc_end)
{
selfpc = *prof_desc++;
if (selfpc & 1)
{
frompc = selfpc & -2;
selfpc = *prof_desc++;
}
count = *prof_count++;
if (selfpc)
{
struct arc
{
char *frompc;
char *selfpc;
int32_t count;
}
arc;
if (!count)
continue;
arc.frompc = (char *) frompc;
arc.selfpc = (char *) selfpc;
arc.count = count;
memcpy (p + 1, &arc, sizeof arc);
p += 1 + sizeof arc;
if (p == buffer + BYTES_PER_WRITE)
{
write (fd, buffer, BYTES_PER_WRITE);
p = buffer;
}
}
else
{
for (to_index = count;
to_index != 0;
to_index = _gmonparam.tos[to_index].link)
{
struct arc
{
char *frompc;
char *selfpc;
int32_t count;
}
arc;
arc.frompc = (char *) frompc;
arc.selfpc = (char *) _gmonparam.tos[to_index].selfpc;
arc.count = _gmonparam.tos[to_index].count;
memcpy (p + 1, &arc, sizeof arc);
p += 1 + sizeof arc;
if (p == buffer + BYTES_PER_WRITE)
{
write (fd, buffer, BYTES_PER_WRITE);
p = buffer;
}
}
}
}
while (prof_forward < __arc_profile_forward_secend)
{
/* ??? The 'call count' is actually supposed to be a fixed point
factor, with 16 bits each before and after the point.
It would be much nicer if we figured out the actual number
of calls to the caller, and multiplied that with the fixed point
factor to arrive at the estimated calls for the callee. */
memcpy (p + 1, prof_forward, 3 * sizeof *prof_forward);
prof_forward += 3;
p += 1 + 3 * sizeof *prof_forward;
if (p == buffer + BYTES_PER_WRITE)
{
write (fd, buffer, BYTES_PER_WRITE);
p = buffer;
}
}
if (p != buffer)
write (fd, buffer, p - buffer);
}
static void
internal_function
write_bb_counts (int fd)
{
struct __bb *grp;
u_char tag = GMON_TAG_BB_COUNT;
size_t ncounts;
size_t i;
struct { unsigned long address; long count; } bbbody[8];
size_t nfilled;
/* Write each group of basic-block info (all basic-blocks in a
compilation unit form a single group). */
for (grp = __bb_head; grp; grp = grp->next)
{
ncounts = grp->ncounts;
write (fd, &tag, 1);
write (fd, &ncounts, sizeof ncounts);
for (nfilled = i = 0; i < ncounts; ++i)
{
if (nfilled == sizeof (bbbody) / sizeof (bbbody[0]))
{
write (fd, bbbody, sizeof bbbody);
nfilled = 0;
}
bbbody[nfilled].address = grp->addresses[i];
bbbody[nfilled++].count = grp->counts[i];
}
if (nfilled > 0)
write (fd, bbbody, nfilled * sizeof bbbody[0]);
}
}
static void
write_gmon (void)
{
struct gmon_hdr ghdr __attribute__ ((aligned (__alignof__ (int))));
int fd = -1;
char *env;
#ifndef O_NOFOLLOW
# define O_NOFOLLOW 0
#endif
env = getenv ("GMON_OUT_PREFIX");
if (env != NULL && !__libc_enable_secure)
{
size_t len = strlen (env);
char buf[len + 20];
snprintf (buf, sizeof (buf), "%s.%u", env, getpid ());
fd = open (buf, O_CREAT|O_TRUNC|O_WRONLY|O_NOFOLLOW, 0666);
}
if (fd == -1)
{
fd = open ("gmon.out", O_CREAT|O_TRUNC|O_WRONLY|O_NOFOLLOW,
0666);
if (fd < 0)
{
perror ("_mcleanup: gmon.out");
return;
}
}
/* write gmon.out header: */
memset (&ghdr, '\0', sizeof (struct gmon_hdr));
memcpy (&ghdr.cookie[0], GMON_MAGIC, sizeof (ghdr.cookie));
*(int32_t *) ghdr.version = GMON_VERSION;
write (fd, &ghdr, sizeof (struct gmon_hdr));
/* write PC histogram: */
write_hist (fd);
/* write call-graph: */
write_call_graph (fd);
/* write basic-block execution counts: */
write_bb_counts (fd);
close (fd);
}
void
__write_profiling (void)
{
int save = _gmonparam.state;
_gmonparam.state = GMON_PROF_OFF;
if (save == GMON_PROF_ON)
write_gmon ();
_gmonparam.state = save;
}
#ifndef SHARED
/* This symbol isn't used anywhere in the DSO and it is not exported.
This would normally mean it should be removed to get the same API
in static libraries. But since profiling is special in static libs
anyway we keep it. But not when building the DSO since some
quality assurance tests will otherwise trigger. */
weak_alias (__write_profiling, write_profiling)
#endif
void
_mcleanup (void)
{
__moncontrol (0);
if (_gmonparam.state != GMON_PROF_ERROR)
write_gmon ();
/* free the memory. */
if (_gmonparam.tos != NULL)
free (_gmonparam.tos);
}
/* Copyright (C) 2007-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#ifndef MACHINE_GMON_H
#define MACHINE_GMON_H
/* We can't fake out own <sys/types.h> header because the newlib / uclibc
headers in GCC_FOR_TARGET take precedence. */
#define __BEGIN_DECLS
#define __END_DECLS
#define __THROW
extern int __dcache_linesz (void);
#define _MCOUNT_DECL(countp, selfpc) \
static inline void _mcount_internal (void *countp, u_long selfpc)
extern void _mcount (void);
extern void _mcount_call (void);
/* N.B.: the calling point might be a sibcall, thus blink does not necessarily
hold the caller's address. r8 doesn't hold the caller's address, either,
but rather a pointer to the counter data structure associated with the
caller.
This function must be compiled with optimization turned on in order to
enable a sibcall for the final call to selfpc; this is important when trying
to profile a program with deep tail-recursion that would get a stack
overflow otherwise. */
#define MCOUNT \
void \
_mcount_call (void) \
{ \
register void *countp __asm("r8"); \
register u_long selfpc __asm("r9"); \
_mcount_internal (countp, selfpc); \
((void (*)(void)) selfpc) (); \
}
extern int __profil (u_short *,size_t, size_t, u_int);
#endif /* MACHINE_GMON_H */
/*-
* Copyright (c) 1983, 1992, 1993
* The Regents of the University of California. All rights reserved.
*
* Copyright (C) 2007-2012 Free Software Foundation, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if !defined(lint) && !defined(KERNEL) && defined(LIBC_SCCS)
static char sccsid[] = "@(#)mcount.c 8.1 (Berkeley) 6/4/93";
#endif
#if 0
#include <unistd.h>
#include <sys/param.h>
#endif
#include <sys/gmon.h>
/* This file provides the machine-dependent definitions of the _MCOUNT_DECL
and MCOUNT macros. */
#include <machine-gmon.h>
#include <atomic.h>
/*
* mcount is called on entry to each function compiled with the profiling
* switch set. _mcount(), which is declared in a machine-dependent way
* with _MCOUNT_DECL, does the actual work and is either inlined into a
* C routine or called by an assembly stub. In any case, this magic is
* taken care of by the MCOUNT definition in <machine/profile.h>.
*
* _mcount updates data structures that represent traversals of the
* program's call graph edges. frompc and selfpc are the return
* address and function address that represents the given call graph edge.
*
* Note: the original BSD code used the same variable (frompcindex) for
* both frompcindex and frompc. Any reasonable, modern compiler will
* perform this optimization.
*/
_MCOUNT_DECL(count_ptr, selfpc) /* _mcount; may be static, inline, etc */
{
register ARCINDEX *frompcindex;
register struct tostruct *top, *prevtop;
register struct gmonparam *p;
register ARCINDEX toindex;
/* Check for nested function trampoline. */
if (selfpc & 2)
selfpc = *(u_long *) (selfpc + 10);
p = &_gmonparam;
/*
* check that we are profiling
* and that we aren't recursively invoked.
*/
#if 0
if (catomic_compare_and_exchange_bool_acq (&p->state, GMON_PROF_BUSY,
GMON_PROF_ON))
return;
#elif defined (__ARC700__)
/* ??? This could temporrarily loose the ERROR / OFF condition in a race,
but doing an actual compare_and_exchange would be too costly. It would
be better if we had a semaphore independent of the 'sticky' state, but
then we could run into ABI compatibility problems with the size of struct
gmonparam. */
{
u_long old_state;
__asm ("ex %0,%1": "=r" (old_state), "+m" (p->state)
: "0" (GMON_PROF_BUSY));
if (old_state != GMON_PROF_ON)
{
switch (old_state)
{
case GMON_PROF_OFF:
__asm ("ex %0,%1": "+r" (old_state), "+m" (p->state));
if (old_state == GMON_PROF_BUSY
/* Switching off while we say we are busy while profiling
was actually already switched off is all right. */
|| old_state == GMON_PROF_OFF)
break;
/* It is not clear if we should allow switching on
profiling at this point, and how to handle further races.
For now, record an error in this case. */
/* Fall through. */
default: /* We expect here only GMON_PROF_ERROR. */
p->state = GMON_PROF_ERROR;
break;
case GMON_PROF_BUSY: break;
}
return;
}
}
#else /* ??? No semaphore primitives available. */
if (p->state != GMON_PROF_ON)
return;
p->state = GMON_PROF_BUSY;
#endif
frompcindex = count_ptr;
toindex = *frompcindex;
if (toindex == 0) {
/*
* first time traversing this arc
*/
toindex = ++p->tos[0].link;
if (toindex >= (ARCINDEX) p->tolimit)
/* halt further profiling */
goto overflow;
*frompcindex = toindex;
top = &p->tos[toindex];
top->selfpc = selfpc;
top->count = 1;
top->link = 0;
goto done;
}
top = &p->tos[toindex];
if (top->selfpc == selfpc) {
/*
* arc at front of chain; usual case.
*/
top->count++;
goto done;
}
/*
* have to go looking down chain for it.
* top points to what we are looking at,
* prevtop points to previous top.
* we know it is not at the head of the chain.
*/
for (; /* goto done */; ) {
if (top->link == 0) {
/*
* top is end of the chain and none of the chain
* had top->selfpc == selfpc.
* so we allocate a new tostruct
* and link it to the head of the chain.
*/
toindex = ++p->tos[0].link;
if (toindex >= (ARCINDEX) p->tolimit)
goto overflow;
top = &p->tos[toindex];
top->selfpc = selfpc;
top->count = 1;
top->link = *frompcindex;
*frompcindex = toindex;
goto done;
}
/*
* otherwise, check the next arc on the chain.
*/
prevtop = top;
top = &p->tos[top->link];
if (top->selfpc == selfpc) {
/*
* there it is.
* increment its count
* move it to the head of the chain.
*/
top->count++;
toindex = prevtop->link;
prevtop->link = top->link;
top->link = *frompcindex;
*frompcindex = toindex;
goto done;
}
}
done:
p->state = GMON_PROF_ON;
return;
overflow:
p->state = GMON_PROF_ERROR;
return;
}
/*
* Actual definition of mcount function. Defined in <machine/profile.h>,
* which is included by <sys/gmon.h>.
*/
MCOUNT
/* This file contains code to do profiling.
Copyright (C) 2007-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "../asm.h"
/* This file contains code to do profiling. */
.weak __profile_frequency_value
.global __profile_frequency_value
.set __profile_frequency_value, 1000
.text
.balign 4
.global __profile_frequency
FUNC(__profile_frequency)
__profile_frequency:
mov_s r0,__profile_frequency_value
j_s [blink]
ENDFUNC(__profile_frequency)
/* Return frequency of ticks reported by profil. Generic version. */
/*-
* Copyright (c) 1983, 1992, 1993
* The Regents of the University of California. All rights reserved.
*
* Copyright (C) 2007-2012 Free Software Foundation, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/types.h>
#include <sys/time.h>
#if 0
#include <libc-internal.h>
#else
#include "sys/gmon.h"
#endif
int
__profile_frequency (void)
{
/*
* Discover the tick frequency of the machine if something goes wrong,
* we return 0, an impossible hertz.
*/
struct itimerval tim;
tim.it_interval.tv_sec = 0;
tim.it_interval.tv_usec = 1;
tim.it_value.tv_sec = 0;
tim.it_value.tv_usec = 0;
setitimer(ITIMER_REAL, &tim, 0);
setitimer(ITIMER_REAL, 0, &tim);
if (tim.it_interval.tv_usec < 2)
return 0;
return (1000000 / tim.it_interval.tv_usec);
}
/* This file contains code to do profiling.
Copyright (C) 2007-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "../asm.h"
#include "auxreg.h"
/* This file contains code to do profiling. */
.weak __profile_timer_cycles
.global __profile_timer_cycles
.set __profile_timer_cycles, 200
.section .bss
.global __profil_offset
.align 4
.type __profil_offset, @object
.size __profil_offset, 4
__profil_offset:
.zero 4
.text
.global __dcache_linesz
.global __profil
FUNC(__profil)
.Lstop_profiling:
sr r0,[CONTROL0]
j_s [blink]
.balign 4
__profil:
.Lprofil:
breq_s r0,0,.Lstop_profiling
; r0: buf r1: bufsiz r2: offset r3: scale
bxor.f r3,r3,15; scale must be 0x8000, i.e. 1/2; generate 0.
push_s blink
lsr_s r2,r2,1
mov_s r8,r0
flag.ne 1 ; halt if wrong scale
sub_s r0,r0,r2
st r0,[__profil_offset]
bl __dcache_linesz
pop_s blink
bbit1.d r0,0,nocache
mov_s r0,r8
#ifdef __ARC700__
add_s r1,r1,31
lsr.f lp_count,r1,5
lpne 2f
sr r0,[DC_FLDL]
add_s r0,r0,32
#else /* !__ARC700__ */
# FIX ME: set up loop according to cache line size
lr r12,[D_CACHE_BUILD]
sub_s r0,r0,16
sub_s r1,r1,1
lsr_s r12,r12,16
asr_s r1,r1,4
bmsk_s r12,r12,3
asr_s r1,r1,r12
add.f lp_count,r1,1
mov_s r1,16
asl_s r1,r1,r12
lpne 2f
add r0,r0,r1
sr r0,[DC_FLDL]
#endif /* __ARC700__ */
2: b_s .Lcounters_cleared
nocache:
.Lcounters_cleared:
lr r1,[INT_VECTOR_BASE] ; disable timer0 interrupts
sr r3,[CONTROL0]
sr r3,[COUNT0]
0: ld_s r0,[pcl,1f-0b+((0b-.Lprofil) & 2)] ; 1f@GOTOFF
0: ld_s r12,[pcl,1f+4-0b+((0b-.Lprofil) & 2)] ; 1f@GOTOFF + 4
st_s r0,[r1,24]; timer0 uses vector3
st_s r12,[r1,24+4]; timer0 uses vector3
;sr 10000,[LIMIT0]
sr __profile_timer_cycles,[LIMIT0]
mov_s r12,3 ; enable timer interrupts; count only when not halted.
sr r12,[CONTROL0]
lr r12,[STATUS32]
bset_s r12,r12,1 ; allow level 1 interrupts
flag r12
mov_s r0,0
j_s [blink]
.balign 4
1: j __profil_irq
ENDFUNC(__profil)
FUNC(__profil_irq)
.balign 4 ; make final jump unaligned to avoid delay penalty
.balign 32,0,12 ; make sure the code spans no more that two cache lines
nop_s
__profil_irq:
push_s r0
ld r0,[__profil_offset]
push_s r1
lsr r1,ilink1,2
push_s r2
ldw.as.di r2,[r0,r1]
add1 r0,r0,r1
ld_s r1,[sp,4]
add_s r2,r2,1
bbit1 r2,16,nostore
stw.di r2,[r0]
nostore:ld.ab r2,[sp,8]
pop_s r0
j.f [ilink1]
ENDFUNC(__profil_irq)
; could save one cycle if the counters were allocated at link time and
; the contents of __profil_offset were pre-computed at link time, like this:
#if 0
; __profil_offset needs to be PROVIDEd as __profile_base-text/4
.global __profil_offset
.balign 4
__profil_irq:
push_s r0
lsr r0,ilink1,2
add1 r0,__profil_offset,r0
push_s r1
ldw.di r1,[r0]
add_s r1,r1,1
bbit1 r1,16,nostore
stw.di r1,[r0]
nostore:pop_s r1
pop_s r0
j [ilink1]
#endif /* 0 */
/*-
* Copyright (c) 1982, 1986, 1992, 1993
* The Regents of the University of California. All rights reserved.
* Copyright (C) 2007-2012 Free Software Foundation, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)gmon.h 8.2 (Berkeley) 1/4/94
*/
#ifndef _SYS_GMON_H
#define _SYS_GMON_H 1
#if 0
#include <features.h>
#include <sys/types.h>
#else
#include <sys/types.h>
#include "machine-gmon.h"
#define attribute_hidden __attribute__ ((visibility("hidden")))
#endif
#include <stdint.h>
/*
* See gmon_out.h for gmon.out format.
*/
/* structure emitted by "gcc -a". This must match struct bb in
gcc/libgcc2.c. It is OK for gcc to declare a longer structure as
long as the members below are present. */
struct __bb
{
long zero_word;
const char *filename;
long *counts;
long ncounts;
struct __bb *next;
const unsigned long *addresses;
};
extern struct __bb *__bb_head;
/*
* histogram counters are unsigned shorts (according to the kernel).
*/
#define HISTCOUNTER unsigned short
/*
* fraction of text space to allocate for histogram counters here, 1/2
*/
#define HISTFRACTION 2
/*
* Fraction of text space to allocate for from hash buckets.
* The value of HASHFRACTION is based on the minimum number of bytes
* of separation between two subroutine call points in the object code.
* Given MIN_SUBR_SEPARATION bytes of separation the value of
* HASHFRACTION is calculated as:
*
* HASHFRACTION = MIN_SUBR_SEPARATION / (2 * sizeof(short) - 1);
*
* For example, on the VAX, the shortest two call sequence is:
*
* calls $0,(r0)
* calls $0,(r0)
*
* which is separated by only three bytes, thus HASHFRACTION is
* calculated as:
*
* HASHFRACTION = 3 / (2 * 2 - 1) = 1
*
* Note that the division above rounds down, thus if MIN_SUBR_FRACTION
* is less than three, this algorithm will not work!
*
* In practice, however, call instructions are rarely at a minimal
* distance. Hence, we will define HASHFRACTION to be 2 across all
* architectures. This saves a reasonable amount of space for
* profiling data structures without (in practice) sacrificing
* any granularity.
*/
#define HASHFRACTION 2
/*
* Percent of text space to allocate for tostructs.
* This is a heuristic; we will fail with a warning when profiling programs
* with a very large number of very small functions, but that's
* normally OK.
* 2 is probably still a good value for normal programs.
* Profiling a test case with 64000 small functions will work if
* you raise this value to 3 and link statically (which bloats the
* text size, thus raising the number of arcs expected by the heuristic).
*/
#define ARCDENSITY 3
/*
* Always allocate at least this many tostructs. This
* hides the inadequacy of the ARCDENSITY heuristic, at least
* for small programs.
*/
#define MINARCS 50
/*
* The type used to represent indices into gmonparam.tos[].
*/
#define ARCINDEX u_long
/*
* Maximum number of arcs we want to allow.
* Used to be max representable value of ARCINDEX minus 2, but now
* that ARCINDEX is a long, that's too large; we don't really want
* to allow a 48 gigabyte table.
* The old value of 1<<16 wasn't high enough in practice for large C++
* programs; will 1<<20 be adequate for long? FIXME
*/
#define MAXARCS (1 << 20)
struct tostruct {
u_long selfpc;
long count;
ARCINDEX link;
};
/*
* a raw arc, with pointers to the calling site and
* the called site and a count.
*/
struct rawarc {
u_long raw_frompc;
u_long raw_selfpc;
long raw_count;
};
/*
* general rounding functions.
*/
#define ROUNDDOWN(x,y) (((x)/(y))*(y))
#define ROUNDUP(x,y) ((((x)+(y)-1)/(y))*(y))
/*
* The profiling data structures are housed in this structure.
*/
struct gmonparam {
long int state;
u_short *kcount;
u_long kcountsize;
ARCINDEX *froms;
u_long fromssize;
struct tostruct *tos;
u_long tossize;
long tolimit;
u_long lowpc;
u_long highpc;
u_long textsize;
u_long hashfraction;
long log_hashfraction;
};
extern struct gmonparam _gmonparam;
/*
* Possible states of profiling.
*/
#define GMON_PROF_ON 0
#define GMON_PROF_BUSY 1
#define GMON_PROF_ERROR 2
#define GMON_PROF_OFF 3
/*
* Sysctl definitions for extracting profiling information from the kernel.
*/
#define GPROF_STATE 0 /* int: profiling enabling variable */
#define GPROF_COUNT 1 /* struct: profile tick count buffer */
#define GPROF_FROMS 2 /* struct: from location hash bucket */
#define GPROF_TOS 3 /* struct: destination/count structure */
#define GPROF_GMONPARAM 4 /* struct: profiling parameters (see above) */
__BEGIN_DECLS
/* Set up data structures and start profiling. */
extern void __monstartup (u_long __lowpc, u_long __highpc) __THROW;
extern void monstartup (u_long __lowpc, u_long __highpc) __THROW;
/* Clean up profiling and write out gmon.out. */
extern void _mcleanup (void) __THROW;
extern void __write_profiling (void);
extern int attribute_hidden __profile_frequency (void);
extern u_long __arc_profile_desc_secstart[], __arc_profile_desc_secend[];
extern u_long __arc_profile_forward_secstart[], __arc_profile_forward_secend[];
extern u_long __arc_profile_counters_secstart[];
__END_DECLS
#endif /* sys/gmon.h */
/* Copyright (C) 2007-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#define GMON_TAG_TIME_HIST 0
#define GMON_TAG_CG_ARC 1
#define GMON_TAG_BB_COUNT 2
#define GMON_MAGIC "gmon"
#define GMON_VERSION 1
struct arc_gmon_hist_hdr
{
char low_pc[4];
char high_pc[4];
char hist_size[4];
char prof_rate[4];
char dimen[15];
char dimen_abbrev;
};
struct gmon_cg_arc_record
{
char afrompc[4];
char selfpc[4];
char count[4];
};
struct gmon_hdr
{
char cookie[4];
char version[4];
char c[12];
};
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "arc-ieee-754.h"
#if 0 /* DEBUG */
.global __adddf3
.balign 4
__adddf3:
push_s blink
push_s r2
push_s r3
push_s r0
bl.d __adddf3_c
push_s r1
ld_s r2,[sp,12]
ld_s r3,[sp,8]
st_s r0,[sp,12]
st_s r1,[sp,8]
pop_s r1
bl.d __adddf3_asm
pop_s r0
pop_s r3
pop_s r2
pop_s blink
cmp r0,r2
cmp.eq r1,r3
jeq_s [blink]
bl abort
.global __subdf3
.balign 4
__subdf3:
push_s blink
push_s r2
push_s r3
push_s r0
bl.d __subdf3_c
push_s r1
ld_s r2,[sp,12]
ld_s r3,[sp,8]
st_s r0,[sp,12]
st_s r1,[sp,8]
pop_s r1
bl.d __subdf3_asm
pop_s r0
pop_s r3
pop_s r2
pop_s blink
cmp r0,r2
cmp.eq r1,r3
jeq_s [blink]
bl abort
#define __adddf3 __adddf3_asm
#define __subdf3 __subdf3_asm
#endif /* DEBUG */
/* N.B. This is optimized for ARC700.
ARC600 has very different scheduling / instruction selection criteria. */
/* inputs: DBL0, DBL1 (r0-r3)
output: DBL0 (r0, r1)
clobber: r2-r10, r12, flags
All NaN highword bits must be 1. NaN low word is random. */
.balign 4
.global __adddf3
.global __subdf3
.long 0x7ff00000 ; exponent mask
FUNC(__adddf3)
FUNC(__subdf3)
__subdf3:
bxor_l DBL1H,DBL1H,31
__adddf3:
ld r9,[pcl,-8]
bmsk r4,DBL0H,30
xor r10,DBL0H,DBL1H
and r6,DBL1H,r9
sub.f r12,r4,r6
asr_s r12,r12,20
blo .Ldbl1_gt
brhs r4,r9,.Linf_nan
brhs r12,32,.Large_shift
brne r12,0,.Lsmall_shift
brge r10,0,.Ladd_same_exp ; r12 == 0
/* After subtracting, we need to normalize; when shifting to place the
leading 1 into position for the implicit 1 and adding that to DBL0H,
we increment the exponent. Thus, we have to subtract one more than
the shift count from the exponent beforehand. Iff the exponent drops thus
below zero (before adding in the fraction with the leading one), we have
generated a denormal number. Denormal handling is basicallly reducing the
shift count so that we produce a zero exponent instead; however, this way
the shift count can become zero (if we started out with exponent 1).
Therefore, a simple min operation is not good enough, since we don't
want to handle a zero normalizing shift in the main path.
On the plus side, we don't need to check for denorm input, the result
of subtracing these looks just the same as denormals generated during
subtraction. */
bmsk r7,DBL1H,30
cmp r4,r7
cmp.eq DBL0L,DBL1L
blo .L_rsub_same_exp
sub.f DBL0L,DBL0L,DBL1L
bmsk r12,DBL0H,19
bic DBL1H,DBL0H,r12
sbc.f r4,r4,r7
beq_l .Large_cancel
norm DBL1L,r4
b.d .Lsub_done_same_exp
sub r12,DBL1L,9
.balign 4
.Linf_nan:
; If both inputs are inf, but with different signs, the result is NaN.
asr r12,r10,31
or_s DBL1H,DBL1H,r12
j_s.d [blink]
or.eq DBL0H,DBL0H,DBL1H
.balign 4
.L_rsub_same_exp:
rsub.f DBL0L,DBL0L,DBL1L
bmsk r12,DBL1H,19
bic_s DBL1H,DBL1H,r12
sbc.f r4,r7,r4
beq_l .Large_cancel
norm DBL1L,r4
sub r12,DBL1L,9
.Lsub_done_same_exp:
asl_s r12,r12,20
sub_s DBL1L,DBL1L,10
sub DBL0H,DBL1H,r12
xor.f 0,DBL0H,DBL1H
bmi .Ldenorm
.Lpast_denorm:
neg_s r12,DBL1L
lsr r7,DBL0L,r12
asl r12,r4,DBL1L
asl_s DBL0L,DBL0L,DBL1L
add_s r12,r12,r7
j_s.d [blink]
add_l DBL0H,DBL0H,r12
.balign 4
.Ladd_same_exp:
/* This is a special case because we can't test for need to shift
down by checking if bit 20 of DBL0H changes. OTOH, here we know
that we always need to shift down. */
; The implicit 1 of DBL0 is not shifted together with the
; fraction, thus effectively doubled, compensating for not setting
; implicit1 for DBL1
add_s r12,DBL0L,DBL1L
lsr.f 0,r12,2 ; round to even
breq r6,0,.Ldenorm_add
adc.f DBL0L,DBL0L,DBL1L
sub r7,DBL1H,DBL0H
sub1 r7,r7,r9 ; boost exponent by 2/2
rrc DBL0L,DBL0L
asr.f r7,r7 ; DBL1.fraction/2 - DBL0.fraction/2 ; exp++
add.cs.f DBL0L,DBL0L,0x80000000
add_l DBL0H,DBL0H,r7 ; DBL0.implicit1 not shifted for DBL1.implicit1
add.cs DBL0H,DBL0H,1
bic.f 0,r9,DBL0H ; check for overflow -> infinity.
jne_l [blink]
and DBL0H,DBL0H,0xfff00000
j_s.d [blink]
mov_s DBL0L,0
.balign 4
.Large_shift:
brhs r12,55,.Lret_dbl0
bmsk_s DBL1H,DBL1H,19
brne r6,0,.Lno_denorm_large_shift
brhi.d r12,33,.Lfixed_denorm_large_shift
sub_s r12,r12,1
breq r12,31, .Lfixed_denorm_small_shift
.Lshift32:
mov_s r12,DBL1L
mov_s DBL1L,DBL1H
brlt.d r10,0,.Lsub
mov_s DBL1H,0
b_s .Ladd
.Ldenorm_add:
cmp_s r12,DBL1L
mov_s DBL0L,r12
j_s.d [blink]
adc DBL0H,r4,DBL1H
.Lret_dbl0:
j_s [blink]
.balign 4
.Lsmall_shift:
breq.d r6,0,.Ldenorm_small_shift
bmsk_s DBL1H,DBL1H,19
bset_s DBL1H,DBL1H,20
.Lfixed_denorm_small_shift:
neg r8,r12
asl r4,DBL1H,r8
lsr_l DBL1H,DBL1H,r12
lsr r5,DBL1L,r12
asl r12,DBL1L,r8
brge.d r10,0,.Ladd
or DBL1L,r4,r5
/* subtract, abs(DBL0) > abs(DBL1) */
/* DBL0H, DBL0L: original values
DBL1H, DBL1L: fraction with explicit leading 1, shifted into place
r4: orig. DBL0H & 0x7fffffff
r6: orig. DBL1H & 0x7ff00000
r9: 0x7ff00000
r10: orig. DBL0H ^ DBL1H
r12: guard bits */
.balign 4
.Lsub:
neg.f r12,r12
mov_s r7,DBL1H
bmsk r5,DBL0H,19
sbc.f DBL0L,DBL0L,DBL1L
bic DBL1H,DBL0H,r5
bset r5,r5,20
sbc.f r4,r5,r7
beq_l .Large_cancel_sub
norm DBL1L,r4
bmsk r6,DBL1H,30
.Lsub_done:
sub_s DBL1L,DBL1L,9
breq DBL1L,1,.Lsub_done_noshift
asl r5,DBL1L,20
sub_s DBL1L,DBL1L,1
brlo r6,r5,.Ldenorm_sub
sub DBL0H,DBL1H,r5
.Lpast_denorm_sub:
neg_s DBL1H,DBL1L
lsr r6,r12,DBL1H
asl_s r12,r12,DBL1L
and r8,r6,1
add1.f 0,r8,r12
add.ne.f r12,r12,r12
asl r8,DBL0L,DBL1L
lsr r12,DBL0L,DBL1H
adc.f DBL0L,r8,r6
asl r5,r4,DBL1L
add_s DBL0H,DBL0H,r12
j_s.d [blink]
adc DBL0H,DBL0H,r5
.balign 4
.Lno_denorm_large_shift:
breq.d r12,32,.Lshift32
bset_l DBL1H,DBL1H,20
.Lfixed_denorm_large_shift:
neg r8,r12
asl r4,DBL1H,r8
lsr r5,DBL1L,r12
asl.f 0,DBL1L,r8
lsr DBL1L,DBL1H,r12
or r12,r4,r5
tst.eq r12,1
or.ne r12,r12,2
brlt.d r10,0,.Lsub
mov_s DBL1H,0
b_l .Ladd
; If a denorm is produced without shifting, we have an exact result -
; no need for rounding.
.balign 4
.Ldenorm_sub:
lsr DBL1L,r6,20
xor DBL0H,r6,DBL1H
brne.d DBL1L,1,.Lpast_denorm_sub
sub_s DBL1L,DBL1L,1
.Lsub_done_noshift:
add.f 0,r12,r12
btst.eq DBL0L,0
cmp.eq r12,r12
add.cs.f DBL0L,DBL0L,1
bclr r4,r4,20
j_s.d [blink]
adc DBL0H,DBL1H,r4
.balign 4
.Ldenorm_small_shift:
brne.d r12,1,.Lfixed_denorm_small_shift
sub_l r12,r12,1
brlt r10,0,.Lsub
.Ladd: ; bit 20 of DBL1H is clear and bit 0 of r12 does not matter
add.f DBL0L,DBL0L,DBL1L
add_s DBL1H,DBL1H,DBL0H
add.cs DBL1H,DBL1H,1
xor_l DBL0H,DBL0H,DBL1H
bbit0 DBL0H,20,.Lno_shiftdown
lsr.f DBL0H,DBL1H
and r4,DBL0L,2
bmsk DBL0H,DBL0H,18
sbc DBL0H,DBL1H,DBL0H
rrc.f DBL0L,DBL0L
or.f r12,r12,r4
cmp.eq r12,r12
add.cs.f DBL0L,DBL0L,1
bic.f 0,r9,DBL0H ; check for generating infinity with possible ...
jne.d [blink] ; ... non-zero fraction
add.cs DBL0H,DBL0H,1
mov_s DBL0L,0
bmsk DBL1H,DBL0H,19
j_s.d [blink]
bic_s DBL0H,DBL0H,DBL1H
.Lno_shiftdown:
mov_s DBL0H,DBL1H
add.f 0,r12,r12
btst.eq DBL0L,0
cmp.eq r12,r12
add.cs.f DBL0L,DBL0L,1
j_s.d [blink]
add.cs DBL0H,DBL0H,1
.balign 4
.Ldenorm:
bmsk DBL0H,DBL1H,30
lsr r12,DBL0H,20
xor_s DBL0H,DBL0H,DBL1H
sub_l DBL1L,r12,1
bgt .Lpast_denorm
j_s.d [blink]
add_l DBL0H,DBL0H,r4
.balign 4
.Large_cancel:
;DBL0L: mantissa DBL1H: sign & exponent
norm.f DBL1L,DBL0L
bmsk DBL0H,DBL1H,30
add_s DBL1L,DBL1L,22
mov.mi DBL1L,21
add_s r12,DBL1L,1
asl_s r12,r12,20
beq_s .Lret0
brhs.d DBL0H,r12,.Lpast_denorm_large_cancel
sub DBL0H,DBL1H,r12
bmsk DBL0H,DBL1H,30
lsr r12,DBL0H,20
xor_s DBL0H,DBL0H,DBL1H
sub.f DBL1L,r12,1
jle [blink]
.Lpast_denorm_large_cancel:
rsub.f r7,DBL1L,32
lsr r7,DBL0L,r7
asl_s DBL0L,DBL0L,DBL1L
mov.ls r7,DBL0L
add_s DBL0H,DBL0H,r7
j_s.d [blink]
mov.ls DBL0L,0
.Lret0:
j_s.d [blink]
mov_l DBL0H,0
/* r4:DBL0L:r12 : unnormalized result fraction
DBL1H: result sign and exponent */
/* When seeing large cancellation, only the topmost guard bit might be set. */
.balign 4
.Large_cancel_sub:
norm.f DBL1L,DBL0L
bpnz.d 0f
bmsk DBL0H,DBL1H,30
mov r5,22<<20
bne.d 1f
mov_s DBL1L,21
bset r5,r5,5+20
add_s DBL1L,DBL1L,32
brne r12,0,1f
j_s.d [blink]
mov_l DBL0H,0
.balign 4
0: add r5,DBL1L,23
asl r5,r5,20
add_s DBL1L,DBL1L,22
1: brlo DBL0H,r5,.Ldenorm_large_cancel_sub
sub DBL0H,DBL1H,r5
.Lpast_denorm_large_cancel_sub:
rsub.f r7,DBL1L,32
lsr r12,r12,r7
lsr r7,DBL0L,r7
asl_s DBL0L,DBL0L,DBL1L
add.ge DBL0H,DBL0H,r7
add_s DBL0L,DBL0L,r12
add.lt DBL0H,DBL0H,DBL0L
mov.eq DBL0L,r12
j_s.d [blink]
mov.lt DBL0L,0
.balign 4
.Ldenorm_large_cancel_sub:
lsr r5,DBL0H,20
xor_s DBL0H,DBL0H,DBL1H
brgt.d r5,1,.Lpast_denorm_large_cancel_sub
sub DBL1L,r5,1
j_l [blink] ; denorm, no shift -> no rounding needed.
/* r4: DBL0H & 0x7fffffff
r6: DBL1H & 0x7ff00000
r9: 0x7ff00000
r10: sign difference
r12: shift count (negative) */
.balign 4
.Ldbl1_gt:
brhs r6,r9,.Lret_dbl1 ; inf or NaN
neg r8,r12
brhs r8,32,.Large_shift_dbl0
.Lsmall_shift_dbl0:
breq.d r6,0,.Ldenorm_small_shift_dbl0
bmsk_s DBL0H,DBL0H,19
bset_s DBL0H,DBL0H,20
.Lfixed_denorm_small_shift_dbl0:
asl r4,DBL0H,r12
lsr DBL0H,DBL0H,r8
lsr r5,DBL0L,r8
asl r12,DBL0L,r12
brge.d r10,0,.Ladd_dbl1_gt
or DBL0L,r4,r5
/* subtract, abs(DBL0) < abs(DBL1) */
/* DBL0H, DBL0L: fraction with explicit leading 1, shifted into place
DBL1H, DBL1L: original values
r6: orig. DBL1H & 0x7ff00000
r9: 0x7ff00000
r12: guard bits */
.balign 4
.Lrsub:
neg.f r12,r12
bmsk r7,DBL1H,19
mov_s r5,DBL0H
sbc.f DBL0L,DBL1L,DBL0L
bic DBL1H,DBL1H,r7
bset r7,r7,20
sbc.f r4,r7,r5
beq_l .Large_cancel_sub
norm DBL1L,r4
b_l .Lsub_done ; note: r6 is already set up.
.Lret_dbl1:
mov_s DBL0H,DBL1H
j_s.d [blink]
mov_l DBL0L,DBL1L
.balign 4
.Ldenorm_small_shift_dbl0:
sub.f r8,r8,1
bne.d .Lfixed_denorm_small_shift_dbl0
add_s r12,r12,1
brlt r10,0,.Lrsub
.Ladd_dbl1_gt: ; bit 20 of DBL0H is clear and bit 0 of r12 does not matter
add.f DBL0L,DBL0L,DBL1L
add_s DBL0H,DBL0H,DBL1H
add.cs DBL0H,DBL0H,1
xor DBL1H,DBL0H,DBL1H
bbit0 DBL1H,20,.Lno_shiftdown_dbl1_gt
lsr.f DBL1H,DBL0H
and r4,DBL0L,2
bmsk DBL1H,DBL1H,18
sbc DBL0H,DBL0H,DBL1H
rrc.f DBL0L,DBL0L
or.f r12,r12,r4
cmp.eq r12,r12
add.cs.f DBL0L,DBL0L,1
bic.f 0,r9,DBL0H ; check for generating infinity with possible ...
jne.d [blink] ; ... non-zero fraction
add.cs DBL0H,DBL0H,1
mov_s DBL0L,0
bmsk DBL1H,DBL0H,19
j_s.d [blink]
bic_s DBL0H,DBL0H,DBL1H
.Lno_shiftdown_dbl1_gt:
add.f 0,r12,r12
btst.eq DBL0L,0
cmp.eq r12,r12
add.cs.f DBL0L,DBL0L,1
j_s.d [blink]
add.cs DBL0H,DBL0H,1
.balign 4
.Large_shift_dbl0:
brhs r8,55,.Lret_dbl1
bmsk_s DBL0H,DBL0H,19
brne r6,0,.Lno_denorm_large_shift_dbl0
add_s r12,r12,1
brne.d r8,33,.Lfixed_denorm_large_shift_dbl0
sub r8,r8,1
bset_s DBL0H,DBL0H,20
.Lshift32_dbl0:
mov_s r12,DBL0L
mov_s DBL0L,DBL0H
brlt.d r10,0,.Lrsub
mov_s DBL0H,0
b_s .Ladd_dbl1_gt
.balign 4
.Lno_denorm_large_shift_dbl0:
breq.d r8,32,.Lshift32_dbl0
bset_l DBL0H,DBL0H,20
.Lfixed_denorm_large_shift_dbl0:
asl r4,DBL0H,r12
lsr r5,DBL0L,r8
asl.f 0,DBL0L,r12
lsr DBL0L,DBL0H,r8
or r12,r4,r5
tst.eq r12,1
or.ne r12,r12,2
brlt.d r10,0,.Lrsub
mov_s DBL0H,0
b_l .Ladd_dbl1_gt
ENDFUNC(__adddf3)
ENDFUNC(__subdf3)
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "arc-ieee-754.h"
#if 0 /* DEBUG */
.global __addsf3
FUNC(__addsf3)
.balign 4
__addsf3:
push_s blink
push_s r1
bl.d __addsf3_c
push_s r0
ld_s r1,[sp,4]
st_s r0,[sp,4]
bl.d __addsf3_asm
pop_s r0
pop_s r1
pop_s blink
cmp r0,r1
jeq_s [blink]
bl abort
ENDFUNC(__addsf3)
.global __subsf3
FUNC(__subsf3)
.balign 4
__subsf3:
push_s blink
push_s r1
bl.d __subsf3_c
push_s r0
ld_s r1,[sp,4]
st_s r0,[sp,4]
bl.d __subsf3_asm
pop_s r0
pop_s r1
pop_s blink
cmp r0,r1
jeq_s [blink]
bl abort
ENDFUNC(__subsf3)
#define __addsf3 __addsf3_asm
#define __subsf3 __subsf3_asm
#endif /* DEBUG */
/* N.B. This is optimized for ARC700.
ARC600 has very different scheduling / instruction selection criteria. */
/* inputs: r0, r1
output: r0
clobber: r1-r10, r12, flags */
.balign 4
.global __addsf3
.global __subsf3
FUNC(__addsf3)
FUNC(__subsf3)
.long 0x7f800000 ; exponent mask
__subsf3:
bxor_l r1,r1,31
__addsf3:
ld r9,[pcl,-8]
bmsk r4,r0,30
xor r10,r0,r1
and r6,r1,r9
sub.f r12,r4,r6
asr_s r12,r12,23
blo .Ldbl1_gt
brhs r4,r9,.Linf_nan
brne r12,0,.Lsmall_shift
brge r10,0,.Ladd_same_exp ; r12 == 0
/* After subtracting, we need to normalize; when shifting to place the
leading 1 into position for the implicit 1 and adding that to DBL0,
we increment the exponent. Thus, we have to subtract one more than
the shift count from the exponent beforehand. Iff the exponent drops thus
below zero (before adding in the fraction with the leading one), we have
generated a denormal number. Denormal handling is basicallly reducing the
shift count so that we produce a zero exponent instead; FWIW, this way
the shift count can become zero (if we started out with exponent 1).
On the plus side, we don't need to check for denorm input, the result
of subtracing these looks just the same as denormals generated during
subtraction. */
bmsk r7,r1,30
breq r4,r7,.Lret0
sub.f r5,r4,r7
lsr r12,r4,23
neg.cs r5,r5
norm r3,r5
bmsk r2,r0,22
sub_s r3,r3,6
min r12,r12,r3
bic r1,r0,r2
sub_s r3,r12,1
asl_s r12,r12,23
asl r2,r5,r3
sub_s r1,r1,r12
add_s r0,r1,r2
j_s.d [blink]
bxor.cs r0,r0,31
.balign 4
.Linf_nan:
; If both inputs are inf, but with different signs, the result is NaN.
asr r12,r10,31
or_s r1,r1,r12
j_s.d [blink]
or.eq r0,r0,r1
.balign 4
.Ladd_same_exp:
/* This is a special case because we can't test for need to shift
down by checking if bit 23 of DBL0 changes. OTOH, here we know
that we always need to shift down. */
; adding the two floating point numbers together makes the sign
; cancel out and apear as carry; the exponent is doubled, and the
; fraction also in need of shifting left by one. The two implicit
; ones of the sources make an implicit 1 of the result, again
; non-existent in a place shifted by one.
add.f r0,r0,r1
btst_s r0,1
breq r6,0,.Ldenorm_add
add.ne r0,r0,1 ; round to even.
rrc r0,r0
bmsk r1,r9,23
add r0,r0,r1 ; increment exponent
bic.f 0,r9,r0; check for overflow -> infinity.
jne_l [blink]
mov_s r0,r9
j_s.d [blink]
bset.cs r0,r0,31
.Ldenorm_add:
j_s.d [blink]
add r0,r4,r1
.Lret_dbl0:
j_s [blink]
.balign 4
.Lsmall_shift:
brhi r12,25,.Lret_dbl0
breq.d r6,0,.Ldenorm_small_shift
bmsk_s r1,r1,22
bset_s r1,r1,23
.Lfixed_denorm_small_shift:
neg r8,r12
asl r5,r1,r8
brge.d r10,0,.Ladd
lsr_l r1,r1,r12
/* subtract, abs(DBL0) > abs(DBL1) */
/* DBL0: original values
DBL1: fraction with explicit leading 1, shifted into place
r4: orig. DBL0 & 0x7fffffff
r6: orig. DBL1 & 0x7f800000
r9: 0x7f800000
r10: orig. DBL0H ^ DBL1H
r5 : guard bits */
.balign 4
.Lsub:
neg.f r12,r5
bmsk r3,r0,22
bset r5,r3,23
sbc.f r4,r5,r1
beq.d .Large_cancel_sub
bic r7,r0,r3
norm r3,r4
bmsk r6,r7,30
.Lsub_done:
sub_s r3,r3,6
breq r3,1,.Lsub_done_noshift
asl r5,r3,23
sub_l r3,r3,1
brlo r6,r5,.Ldenorm_sub
sub r0,r7,r5
neg_s r1,r3
lsr.f r2,r12,r1
asl_s r12,r12,r3
btst_s r2,0
bmsk.eq.f r12,r12,30
asl r5,r4,r3
add_s r0,r0,r2
adc.ne r0,r0,0
j_s.d [blink]
add_l r0,r0,r5
.Lret0:
j_s.d [blink]
mov_l r0,0
.balign 4
.Ldenorm_small_shift:
brne.d r12,1,.Lfixed_denorm_small_shift
sub_s r12,r12,1
brlt.d r10,0,.Lsub
mov_s r5,r12 ; zero r5, and align following code
.Ladd: ; Both bit 23 of DBL1 and bit 0 of r5 are clear.
bmsk r2,r0,22
add_s r2,r2,r1
bbit0.d r2,23,.Lno_shiftdown
add_s r0,r0,r1
bic.f 0,r9,r0; check for overflow -> infinity; eq : infinity
bmsk r1,r2,22
lsr.ne.f r2,r2,2; cc: even ; hi: might round down
lsr.ne r1,r1,1
rcmp.hi r5,1; hi : round down
bclr.hi r0,r0,0
j_l.d [blink]
sub_s r0,r0,r1
/* r4: DBL0H & 0x7fffffff
r6: DBL1H & 0x7f800000
r9: 0x7f800000
r10: sign difference
r12: shift count (negative) */
.balign 4
.Ldbl1_gt:
brhs r6,r9,.Lret_dbl1 ; inf or NaN
neg r8,r12
brhi r8,25,.Lret_dbl1
.Lsmall_shift_dbl0:
breq.d r6,0,.Ldenorm_small_shift_dbl0
bmsk_s r0,r0,22
bset_s r0,r0,23
.Lfixed_denorm_small_shift_dbl0:
asl r5,r0,r12
brge.d r10,0,.Ladd_dbl1_gt
lsr r0,r0,r8
/* subtract, abs(DBL0) < abs(DBL1) */
/* DBL0: fraction with explicit leading 1, shifted into place
DBL1: original value
r6: orig. DBL1 & 0x7f800000
r9: 0x7f800000
r5: guard bits */
.balign 4
.Lrsub:
neg.f r12,r5
bmsk r5,r1,22
bic r7,r1,r5
bset r5,r5,23
sbc.f r4,r5,r0
bne.d .Lsub_done ; note: r6 is already set up.
norm r3,r4
/* Fall through */
/* r4:r12 : unnormalized result fraction
r7: result sign and exponent */
/* When seeing large cancellation, only the topmost guard bit might be set. */
.balign 4
.Large_cancel_sub:
breq_s r12,0,.Lret0
sub r0,r7,24<<23
xor.f 0,r0,r7 ; test if exponent is negative
tst.pl r9,r0 ; test if exponent is zero
jpnz [blink] ; return if non-denormal result
bmsk r6,r7,30
lsr r3,r6,23
xor r0,r6,r7
sub_s r3,r3,24-22
j_s.d [blink]
bset r0,r0,r3
; If a denorm is produced, we have an exact result -
; no need for rounding.
.balign 4
.Ldenorm_sub:
sub r3,r6,1
lsr.f r3,r3,23
xor r0,r6,r7
neg_s r1,r3
asl.ne r4,r4,r3
lsr_s r12,r12,r1
add_s r0,r0,r4
j_s.d [blink]
add.ne r0,r0,r12
.balign 4
.Lsub_done_noshift:
add.f 0,r12,r12
btst.eq r4,0
bclr r4,r4,23
add r0,r7,r4
j_s.d [blink]
adc.ne r0,r0,0
.balign 4
.Lno_shiftdown:
add.f 0,r5,r5
btst.eq r0,0
cmp.eq r5,r5
j_s.d [blink]
add.cs r0,r0,1
.Lret_dbl1:
j_s.d [blink]
mov_l r0,r1
.balign 4
.Ldenorm_small_shift_dbl0:
sub.f r8,r8,1
bne.d .Lfixed_denorm_small_shift_dbl0
add_s r12,r12,1
brlt.d r10,0,.Lrsub
mov r5,0
.Ladd_dbl1_gt: ; both bit 23 of DBL0 and bit 0 of r5 are clear.
bmsk r2,r1,22
add_s r2,r2,r0
bbit0.d r2,23,.Lno_shiftdown_dbl1_gt
add_s r0,r1,r0
bic.f 0,r9,r0; check for overflow -> infinity; eq : infinity
bmsk r1,r2,22
lsr.ne.f r2,r2,2; cc: even ; hi: might round down
lsr.ne r1,r1,1
rcmp.hi r5,1; hi : round down
bclr.hi r0,r0,0
j_l.d [blink]
sub_s r0,r0,r1
.balign 4
.Lno_shiftdown_dbl1_gt:
add.f 0,r5,r5
btst.eq r0,0
cmp.eq r5,r5
j_s.d [blink]
add.cs r0,r0,1
ENDFUNC(__addsf3)
ENDFUNC(__subsf3)
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#ifdef __LITTLE_ENDIAN__
#define DBL0L r0
#define DBL0H r1
#define DBL1L r2
#define DBL1H r3
#else
#define DBL0L r1
#define DBL0H r0
#define DBL1L r3
#define DBL1H r2
#endif
#define add_l add
#define asr_l asr
#define j_l j
#define jne_l jne
#define jeq_l jeq
#define or_l or
#define mov_l mov
#define b_l b
#define beq_l beq
#define bne_l bne
#define brne_l brne
#define bset_l bset
#define sub_l sub
#define sub1_l sub1
#define lsr_l lsr
#define xor_l xor
#define bic_l bic
#define bmsk_l bmsk
#define bxor_l bxor
#define bcs_s blo_s
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/*
to calculate a := b/x as b*y, with y := 1/x:
- x is in the range [1..2)
- calculate 15..18 bit inverse y0 using a table of approximating polynoms.
Precision is higher for polynoms used to evaluate input with larger
value.
- Do one newton-raphson iteration step to double the precision,
then multiply this with the divisor
-> more time to decide if dividend is subnormal
- the worst error propagation is on the side of the value range
with the least initial defect, thus giving us about 30 bits precision.
The truncation error for the either is less than 1 + x/2 ulp.
A 31 bit inverse can be simply calculated by using x with implicit 1
and chaining the multiplies. For a 32 bit inverse, we multiply y0^2
with the bare fraction part of x, then add in y0^2 for the implicit
1 of x.
- If calculating a 31 bit inverse, the systematic error is less than
-1 ulp; likewise, for 32 bit, it is less than -2 ulp.
- If we calculate our seed with a 32 bit fraction, we can archive a
tentative result strictly better than -2 / +2.5 (1) ulp/128, i.e. we
only need to take the step to calculate the 2nd stage rest and
rounding adjust 1/32th of the time. However, if we use a 20 bit
fraction for the seed, the negative error can exceed -2 ulp/128, (2)
thus for a simple add / tst check, we need to do the 2nd stage
rest calculation/ rounding adjust 1/16th of the time.
(1): The inexactness of the 32 bit inverse contributes an error in the
range of (-1 .. +(1+x/2) ) ulp/128. Leaving out the low word of the
rest contributes an error < +1/x ulp/128 . In the interval [1,2),
x/2 + 1/x <= 1.5 .
(2): Unless proven otherwise. I have not actually looked for an
example where -2 ulp/128 is exceeded, and my calculations indicate
that the excess, if existent, is less than -1/512 ulp.
??? The algorithm is still based on the ARC700 optimized code.
Maybe we could make better use of 32x16 bit multiply, or 64 bit multiply
results.
*/
#include "../arc-ieee-754.h"
#define mlo acc2
#define mhi acc1
#define mul64(b,c) mullw 0,b,c` machlw 0,b,c
#define mulu64(b,c) mululw 0,b,c` machulw 0,b,c
/* N.B. fp-bit.c does double rounding on denormal numbers. */
#if 0 /* DEBUG */
.global __divdf3
FUNC(__divdf3)
.balign 4
__divdf3:
push_s blink
push_s r2
push_s r3
push_s r0
bl.d __divdf3_c
push_s r1
ld_s r2,[sp,12]
ld_s r3,[sp,8]
st_s r0,[sp,12]
st_s r1,[sp,8]
pop_s r1
bl.d __divdf3_asm
pop_s r0
pop_s r3
pop_s r2
pop_s blink
cmp r0,r2
cmp.eq r1,r3
jeq_s [blink]
and r12,DBL0H,DBL1H
bic.f 0,0x7ff80000,r12 ; both NaN -> OK
jeq_s [blink]
bl abort
ENDFUNC(__divdf3)
#define __divdf3 __divdf3_asm
#endif /* DEBUG */
FUNC(__divdf3)
.balign 4
.L7ff00000:
.long 0x7ff00000
.Ldivtab:
.long 0xfc0fffe1
.long 0xf46ffdfb
.long 0xed1ffa54
.long 0xe61ff515
.long 0xdf7fee75
.long 0xd91fe680
.long 0xd2ffdd52
.long 0xcd1fd30c
.long 0xc77fc7cd
.long 0xc21fbbb6
.long 0xbcefaec0
.long 0xb7efa100
.long 0xb32f92bf
.long 0xae8f83b7
.long 0xaa2f7467
.long 0xa5ef6479
.long 0xa1cf53fa
.long 0x9ddf433e
.long 0x9a0f3216
.long 0x965f2091
.long 0x92df0f11
.long 0x8f6efd05
.long 0x8c1eeacc
.long 0x88eed876
.long 0x85dec615
.long 0x82eeb3b9
.long 0x800ea10b
.long 0x7d3e8e0f
.long 0x7a8e7b3f
.long 0x77ee6836
.long 0x756e5576
.long 0x72fe4293
.long 0x709e2f93
.long 0x6e4e1c7f
.long 0x6c0e095e
.long 0x69edf6c5
.long 0x67cde3a5
.long 0x65cdd125
.long 0x63cdbe25
.long 0x61ddab3f
.long 0x600d991f
.long 0x5e3d868c
.long 0x5c6d7384
.long 0x5abd615f
.long 0x590d4ecd
.long 0x576d3c83
.long 0x55dd2a89
.long 0x545d18e9
.long 0x52dd06e9
.long 0x516cf54e
.long 0x4ffce356
.long 0x4e9cd1ce
.long 0x4d3cbfec
.long 0x4becae86
.long 0x4aac9da4
.long 0x496c8c73
.long 0x483c7bd3
.long 0x470c6ae8
.long 0x45dc59af
.long 0x44bc4915
.long 0x43ac3924
.long 0x428c27fb
.long 0x418c187a
.long 0x407c07bd
__divdf3_support: /* This label makes debugger output saner. */
.balign 4
.Ldenorm_dbl1:
brge r6, \
0x43500000,.Linf_NaN ; large number / denorm -> Inf
bmsk.f r12,DBL1H,19
mov.eq r12,DBL1L
mov.eq DBL1L,0
sub.eq r7,r7,32
norm.f r11,r12 ; flag for x/0 -> Inf check
beq_s .Linf_NaN
mov.mi r11,0
add.pl r11,r11,1
add_s r12,r12,r12
asl r8,r12,r11
rsub r12,r11,31
lsr r12,DBL1L,r12
tst_s DBL1H,DBL1H
or r8,r8,r12
lsr r4,r8,26
lsr DBL1H,r8,12
ld.as r4,[r10,r4]
bxor.mi DBL1H,DBL1H,31
sub r11,r11,11
asl DBL1L,DBL1L,r11
sub r11,r11,1
mulu64 (r4,r8)
sub r7,r7,r11
b.d .Lpast_denorm_dbl1
asl r7,r7,20
.Linf_NaN:
tst_s DBL0L,DBL0L ; 0/0 -> NaN
xor_s DBL1H,DBL1H,DBL0H
bclr.eq.f DBL0H,DBL0H,31
bmsk DBL0H,DBL1H,30
xor_s DBL0H,DBL0H,DBL1H
sub.eq DBL0H,DBL0H,1
mov_s DBL0L,0
j_s.d [blink]
or DBL0H,DBL0H,r9
.balign 4
.Lret0_2:
xor_s DBL1H,DBL1H,DBL0H
mov_s DBL0L,0
bmsk DBL0H,DBL1H,30
j_s.d [blink]
xor_s DBL0H,DBL0H,DBL1H
.balign 4
.global __divdf3
/* N.B. the spacing between divtab and the sub3 to get its address must
be a multiple of 8. */
__divdf3:
asl r8,DBL1H,12
lsr r4,r8,26
sub3 r10,pcl,51;(.-.Ldivtab) >> 3
ld.as r9,[pcl,-104]; [pcl,(-((.-.L7ff00000) >> 2))] ; 0x7ff00000
ld.as r4,[r10,r4]
lsr r12,DBL1L,20
and.f r7,DBL1H,r9
or r8,r8,r12
mulu64 (r4,r8)
beq.d .Ldenorm_dbl1
.Lpast_denorm_dbl1:
and.f r6,DBL0H,r9
breq.d r7,r9,.Linf_nan_dbl1
asl r4,r4,12
sub r4,r4,mhi
mululw 0,r4,r4
machulw r5,r4,r4
bne.d .Lnormal_dbl0
lsr r8,r8,1
.balign 4
.Ldenorm_dbl0:
bmsk.f r12,DBL0H,19
; wb stall
mov.eq r12,DBL0L
sub.eq r6,r6,32
norm.f r11,r12 ; flag for 0/x -> 0 check
brge r7, \
0x43500000, .Lret0_2 ; denorm/large number -> 0
beq_s .Lret0_2
mov.mi r11,0
add.pl r11,r11,1
asl r12,r12,r11
sub r6,r6,r11
add.f 0,r6,31
lsr r10,DBL0L,r6
mov.mi r10,0
add r6,r6,11+32
neg.f r11,r6
asl DBL0L,DBL0L,r11
mov.pl DBL0L,0
sub r6,r6,32-1
b.d .Lpast_denorm_dbl0
asl r6,r6,20
.balign 4
.Linf_nan_dbl1: ; 0/Inf -> NaN Inf/Inf -> NaN x/Inf-> 0 x/NaN -> NaN
or.f 0,r6,DBL0L
cmp.ne r6,r9
not_s DBL0L,DBL1H
sub_s.ne DBL0L,DBL0L,DBL0L
tst_s DBL0H,DBL0H
add_s DBL0H,DBL1H,DBL0L
j_s.d [blink]
bxor.mi DBL0H,DBL0H,31
.balign 4
.Lnormal_dbl0:
breq.d r6,r9,.Linf_nan_dbl0
asl r12,DBL0H,11
lsr r10,DBL0L,21
.Lpast_denorm_dbl0:
bset r8,r8,31
mulu64 (r5,r8)
add_s r12,r12,r10
bset r5,r12,31
cmp r5,r8
cmp.eq DBL0L,DBL1L
lsr.cc r5,r5,1
sub r4,r4,mhi ; u1.31 inverse, about 30 bit
mululw 0,r5,r4
machulw r11,r5,r4 ; result fraction highpart
lsr r8,r8,2 ; u3.29
add r5,r6, /* wait for immediate */ \
0x3fe00000
mulu64 (r11,r8) ; u-28.31
asl_s DBL1L,DBL1L,9 ; u-29.23:9
sbc r6,r5,r7
mov r12,mlo ; u-28.31
mulu64 (r11,DBL1L) ; mhi: u-28.23:9
add.cs DBL0L,DBL0L,DBL0L
asl_s DBL0L,DBL0L,6 ; u-26.25:7
asl r10,r11,23
sub_l DBL0L,DBL0L,r12
lsr r7,r11,9
sub r5,DBL0L,mhi ; rest msw ; u-26.31:0
mul64 (r5,r4) ; mhi: result fraction lowpart
xor.f 0,DBL0H,DBL1H
and DBL0H,r6,r9
add_s DBL0H,DBL0H,r7
bclr r12,r9,20 ; 0x7fe00000
brhs.d r6,r12,.Linf_denorm
bxor.mi DBL0H,DBL0H,31
add.f r12,mhi,0x11
asr r9,r12,5
sub.mi DBL0H,DBL0H,1
add.f DBL0L,r9,r10
tst r12,0x1c
jne.d [blink]
add.cs DBL0H,DBL0H,1
/* work out exact rounding if we fall through here. */
/* We know that the exact result cannot be represented in double
precision. Find the mid-point between the two nearest
representable values, multiply with the divisor, and check if
the result is larger than the dividend. Since we want to know
only the sign bit, it is sufficient to calculate only the
highpart of the lower 64 bits. */
mulu64 (r11,DBL1L) ; rest before considering r12 in r5 : -mlo
sub.f DBL0L,DBL0L,1
asl r12,r9,2 ; u-22.30:2
sub.cs DBL0H,DBL0H,1
sub.f r12,r12,2
mov r10,mlo ; rest before considering r12 in r5 : -r10
mululw 0,r12,DBL1L
machulw r7,r12,DBL1L ; mhi: u-51.32
asl r5,r5,25 ; s-51.7:25
lsr r10,r10,7 ; u-51.30:2
mulu64 (r12,r8) ; mlo: u-51.31:1
sub r5,r5,r10
add.mi r5,r5,DBL1L ; signed multiply adjust for r12*DBL1L
bset r7,r7,0 ; make sure that the result is not zero, and that
sub r5,r5,r7 ; a highpart zero appears negative
sub.f r5,r5,mlo ; rest msw
add.pl.f DBL0L,DBL0L,1
j_s.d [blink]
add.eq DBL0H,DBL0H,1
.Linf_nan_dbl0:
tst_s DBL1H,DBL1H
j_s.d [blink]
bxor.mi DBL0H,DBL0H,31
.balign 4
.Linf_denorm:
lsr r12,r6,28
brlo.d r12,0xc,.Linf
.Ldenorm:
asr r6,r6,20
neg r9,r6
mov_s DBL0H,0
brhs.d r9,54,.Lret0
bxor.mi DBL0H,DBL0H,31
add r12,mhi,1
and r12,r12,-4
rsub r7,r6,5
asr r10,r12,28
bmsk r4,r12,27
min r7,r7,31
asr DBL0L,r4,r7
add DBL1H,r11,r10
abs.f r10,r4
sub.mi r10,r10,1
add.f r7,r6,32-5
asl r4,r4,r7
mov.mi r4,r10
add.f r10,r6,23
rsub r7,r6,9
lsr r7,DBL1H,r7
asl r10,DBL1H,r10
or.pnz DBL0H,DBL0H,r7
or.mi r4,r4,r10
mov.mi r10,r7
add.f DBL0L,r10,DBL0L
add.cs.f DBL0H,DBL0H,1 ; carry clear after this point
bxor.f 0,r4,31
add.pnz.f DBL0L,DBL0L,1
add.cs.f DBL0H,DBL0H,1
jne_s [blink]
/* Calculation so far was not conclusive; calculate further rest. */
mulu64 (r11,DBL1L) ; rest before considering r12 in r5 : -mlo
asr.f r12,r12,3
asl r5,r5,25 ; s-51.7:25
mov r11,mlo ; rest before considering r12 in r5 : -r11
mulu64 (r12,r8) ; u-51.31:1
and r9,DBL0L,1 ; tie-breaker: round to even
lsr r11,r11,7 ; u-51.30:2
mov DBL1H,mlo ; u-51.31:1
mulu64 (r12,DBL1L) ; u-51.62:2
sub.mi r11,r11,DBL1L ; signed multiply adjust for r12*DBL1L
add_s DBL1H,DBL1H,r11
sub DBL1H,DBL1H,r5 ; -rest msw
add_s DBL1H,DBL1H,mhi ; -rest msw
add.f 0,DBL1H,DBL1H ; can't ror.f by 32 :-(
tst_s DBL1H,DBL1H
cmp.eq mlo,r9
add.cs.f DBL0L,DBL0L,1
j_s.d [blink]
add.cs DBL0H,DBL0H,1
.Lret0:
/* return +- 0 */
j_s.d [blink]
mov_s DBL0L,0
.Linf:
mov_s DBL0H,r9
mov_s DBL0L,0
j_s.d [blink]
bxor.mi DBL0H,DBL0H,31
ENDFUNC(__divdf3)
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/*
- calculate 15..18 bit inverse using a table of approximating polynoms.
precision is higher for polynoms used to evaluate input with larger
value.
- do one newton-raphson iteration step to double the precision,
then multiply this with the divisor
-> more time to decide if dividend is subnormal
- the worst error propagation is on the side of the value range
with the least initial defect, thus giving us about 30 bits precision.
*/
#include "../arc-ieee-754.h"
#define mlo acc2
#define mhi acc1
#define mul64(b,c) mullw 0,b,c` machlw 0,b,c
#define mulu64(b,c) mululw 0,b,c` machulw 0,b,c
#if 0 /* DEBUG */
.global __divsf3
FUNC(__divsf3)
.balign 4
__divsf3:
push_s blink
push_s r1
bl.d __divsf3_c
push_s r0
ld_s r1,[sp,4]
st_s r0,[sp,4]
bl.d __divsf3_asm
pop_s r0
pop_s r1
pop_s blink
cmp r0,r1
#if 1
bne abort
jeq_s [blink]
b abort
#else
bne abort
j_s [blink]
#endif
ENDFUNC(__divsf3)
#define __divsf3 __divsf3_asm
#endif /* DEBUG */
FUNC(__divsf3)
.balign 4
.Ldivtab:
.long 0xfc0ffff0
.long 0xf46ffefd
.long 0xed1ffd2a
.long 0xe627fa8e
.long 0xdf7ff73b
.long 0xd917f33b
.long 0xd2f7eea3
.long 0xcd1fe986
.long 0xc77fe3e7
.long 0xc21fdddb
.long 0xbcefd760
.long 0xb7f7d08c
.long 0xb32fc960
.long 0xae97c1ea
.long 0xaa27ba26
.long 0xa5e7b22e
.long 0xa1cfa9fe
.long 0x9ddfa1a0
.long 0x9a0f990c
.long 0x9667905d
.long 0x92df878a
.long 0x8f6f7e84
.long 0x8c27757e
.long 0x88f76c54
.long 0x85df630c
.long 0x82e759c5
.long 0x8007506d
.long 0x7d3f470a
.long 0x7a8f3da2
.long 0x77ef341e
.long 0x756f2abe
.long 0x72f7212d
.long 0x709717ad
.long 0x6e4f0e44
.long 0x6c1704d6
.long 0x69e6fb44
.long 0x67cef1d7
.long 0x65c6e872
.long 0x63cedf18
.long 0x61e6d5cd
.long 0x6006cc6d
.long 0x5e36c323
.long 0x5c76b9f3
.long 0x5abeb0b7
.long 0x5916a79b
.long 0x57769e77
.long 0x55de954d
.long 0x54568c4e
.long 0x52d6834d
.long 0x51667a7f
.long 0x4ffe71b5
.long 0x4e9e68f1
.long 0x4d466035
.long 0x4bf65784
.long 0x4aae4ede
.long 0x496e4646
.long 0x48363dbd
.long 0x47063547
.long 0x45de2ce5
.long 0x44be2498
.long 0x43a61c64
.long 0x4296144a
.long 0x41860c0e
.long 0x407e03ee
.L7f800000:
.long 0x7f800000
.balign 4
.global __divsf3_support
__divsf3_support:
.Linf_NaN:
bclr.f 0,r0,31 ; 0/0 -> NaN
xor_s r0,r0,r1
bmsk r1,r0,30
bic_s r0,r0,r1
sub.eq r0,r0,1
j_s.d [blink]
or r0,r0,r9
.Lret0:
xor_s r0,r0,r1
bmsk r1,r0,30
j_s.d [blink]
bic_s r0,r0,r1
/* N.B. the spacing between divtab and the sub3 to get its address must
be a multiple of 8. */
__divsf3:
ld.as r9,[pcl,-9]; [pcl,(-((.-.L7f800000) >> 2))] ; 0x7f800000
sub3 r3,pcl,37;(.-.Ldivtab) >> 3
lsr r2,r1,17
and.f r11,r1,r9
bmsk r5,r2,5
beq.d .Ldenorm_fp1
asl r6,r1,8
and.f r2,r0,r9
ld.as r5,[r3,r5]
asl r4,r1,9
bset r6,r6,31
breq.d r11,r9,.Linf_nan_fp1
.Lpast_denorm_fp1:
mululw 0,r5,r4
machulw r8,r5,r4
breq.d r2,r9,.Linf_nan_fp0
asl r5,r5,13
sub r7,r5,r8
mululw 0,r7,r6
machulw r8,r7,r6
beq.d .Ldenorm_fp0
asl r12,r0,8
mulu64 (r8,r7)
bset r3,r12,31
.Lpast_denorm_fp0:
cmp_s r3,r6
lsr.cc r3,r3,1
add_s r2,r2, /* wait for immediate */ \
0x3f000000
sub r7,r7,mhi ; u1.31 inverse, about 30 bit
mulu64 (r3,r7)
sbc r2,r2,r11
xor.f 0,r0,r1
and r0,r2,r9
bclr r3,r9,23 ; 0x7f000000
brhs.d r2,r3,.Linf_denorm
bxor.mi r0,r0,31
.Lpast_denorm:
add r3,mhi,0x22 ; round to nearest or higher
tst r3,0x3c ; check if rounding was unsafe
lsr r3,r3,6
jne.d [blink] ; return if rounding was safe.
add_s r0,r0,r3
/* work out exact rounding if we fall through here. */
/* We know that the exact result cannot be represented in single
precision. Find the mid-point between the two nearest
representable values, multiply with the divisor, and check if
the result is larger than the dividend. */
add_s r3,r3,r3
sub_s r3,r3,1
mulu64 (r3,r6)
asr.f 0,r0,1 ; for round-to-even in case this is a denorm
rsub r2,r9,25
asl_s r12,r12,r2
sub.f 0,r12,mlo
j_s.d [blink]
sub.mi r0,r0,1
.Linf_nan_fp1:
lsr_s r0,r0,31
bmsk.f 0,r1,22
asl_s r0,r0,31
bne_s 0f ; inf/inf -> nan
brne r2,r9,.Lsigned0 ; x/inf -> 0, but x/nan -> nan
0: j_s.d [blink]
mov r0,-1
.Lsigned0:
.Linf_nan_fp0:
tst_s r1,r1
j_s.d [blink]
bxor.mi r0,r0,31
.balign 4
.global __divsf3
/* For denormal results, it is possible that an exact result needs
rounding, and thus the round-to-even rule has to come into play. */
.Linf_denorm:
brlo r2,0xc0000000,.Linf
.Ldenorm:
asr_s r2,r2,23
bic r0,r0,r9
neg r9,r2
brlo.d r9,25,.Lpast_denorm
lsr r3,mlo,r9
/* Fall through: return +- 0 */
j_s [blink]
.Linf:
j_s.d [blink]
or r0,r0,r9
.balign 4
.Ldenorm_fp1:
norm.f r12,r6 ; flag for x/0 -> Inf check
add r6,r6,r6
rsub r5,r12,16
ror r5,r1,r5
bmsk r5,r5,5
bic.ne.f 0, \
0x60000000,r0 ; large number / denorm -> Inf
ld.as r5,[r3,r5]
asl r6,r6,r12
beq.d .Linf_NaN
and.f r2,r0,r9
add r4,r6,r6
asl_s r12,r12,23
bne.d .Lpast_denorm_fp1
add_s r2,r2,r12
.Ldenorm_fp0:
mulu64 (r8,r7)
bclr r12,r12,31
norm.f r3,r12 ; flag for 0/x -> 0 check
bic.ne.f 0,0x60000000,r1 ; denorm/large number -> 0
beq_s .Lret0
asl_s r12,r12,r3
asl_s r3,r3,23
add_s r12,r12,r12
add r11,r11,r3
b.d .Lpast_denorm_fp0
mov_s r3,r12
ENDFUNC(__divsf3)
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "../arc-ieee-754.h"
#if 0 /* DEBUG */
.global __muldf3
.balign 4
__muldf3:
push_s blink
push_s r2
push_s r3
push_s r0
bl.d __muldf3_c
push_s r1
ld_s r2,[sp,12]
ld_s r3,[sp,8]
st_s r0,[sp,12]
st_s r1,[sp,8]
pop_s r1
bl.d __muldf3_asm
pop_s r0
pop_s r3
pop_s r2
pop_s blink
cmp r0,r2
cmp.eq r1,r3
jeq_s [blink]
b abort
#define __muldf3 __muldf3_asm
#endif /* DEBUG */
__muldf3_support: /* This label makes debugger output saner. */
.balign 4
FUNC(__muldf3)
.Ldenorm_2:
breq.d DBL1L,0,.Lret0_2 ; 0 input -> 0 output
norm.f r12,DBL1L
mov.mi r12,21
add.pl r12,r12,22
neg r11,r12
asl_s r12,r12,20
lsr.f DBL1H,DBL1L,r11
ror DBL1L,DBL1L,r11
sub_s DBL0H,DBL0H,r12
mov.eq DBL1H,DBL1L
sub_l DBL1L,DBL1L,DBL1H
/* Fall through. */
.global __muldf3
.balign 4
__muldf3:
mululw 0,DBL0L,DBL1L
machulw r4,DBL0L,DBL1L
ld.as r9,[pcl,0x67] ; ((.L7ff00000-.+2)/4)]
bmsk r6,DBL0H,19
bset r6,r6,20
mov r8,acc2
mululw 0,r4,1
and r11,DBL0H,r9
breq.d r11,0,.Ldenorm_dbl0
and r12,DBL1H,r9
breq.d r12,0,.Ldenorm_dbl1
maclw 0,r6,DBL1L
machulw 0,r6,DBL1L
breq.d r11,r9,.Linf_nan
bmsk r10,DBL1H,19
breq.d r12,r9,.Linf_nan
bset r10,r10,20
maclw 0,r10,DBL0L
machulw r5,r10,DBL0L
add_s r12,r12,r11 ; add exponents
mov r4,acc2
mululw 0,r5,1
maclw 0,r6,r10
machulw r7,r6,r10 ; fraction product in r7:acc2:r4:r8
tst r8,r8
bclr r8,r9,30 ; 0x3ff00000
bset.ne r4,r4,0 ; put least significant word into sticky bit
bclr r6,r9,20 ; 0x7fe00000
lsr.f r10,r7,9
rsub.eq r8,r8,r9 ; 0x40000000
sub r12,r12,r8 ; subtract bias + implicit 1
brhs.d r12,r6,.Linf_denorm
rsub r10,r10,12
.Lshift_frac:
neg r8,r10
asl r6,r4,r10
lsr DBL0L,r4,r8
add.f 0,r6,r6
btst.eq DBL0L,0
cmp.eq r4,r4 ; round to nearest / round to even
asl r4,acc2,r10
lsr r5,acc2,r8
adc.f DBL0L,DBL0L,r4
xor.f 0,DBL0H,DBL1H
asl r7,r7,r10
add_s r12,r12,r5
adc DBL0H,r12,r7
j_s.d [blink]
bset.mi DBL0H,DBL0H,31
/* N.B. This is optimized for ARC700.
ARC600 has very different scheduling / instruction selection criteria. */
/* If one number is denormal, subtract some from the exponent of the other
one (if the other exponent is too small, return 0), and normalize the
denormal. Then re-run the computation. */
.Lret0_2:
lsr_s DBL0H,DBL0H,31
asl_s DBL0H,DBL0H,31
j_s.d [blink]
mov_s DBL0L,0
.balign 4
.Ldenorm_dbl0:
mov_s r12,DBL0L
mov_s DBL0L,DBL1L
mov_s DBL1L,r12
mov_s r12,DBL0H
mov_s DBL0H,DBL1H
mov_s DBL1H,r12
and r11,DBL0H,r9
.Ldenorm_dbl1:
brhs r11,r9,.Linf_nan
brhs 0x3ca00001,r11,.Lret0
sub_s DBL0H,DBL0H,DBL1H
bmsk.f DBL1H,DBL1H,30
add_s DBL0H,DBL0H,DBL1H
beq.d .Ldenorm_2
norm r12,DBL1H
sub_s r12,r12,10
asl r5,r12,20
asl_s DBL1H,DBL1H,r12
sub DBL0H,DBL0H,r5
neg r5,r12
lsr r6,DBL1L,r5
asl_s DBL1L,DBL1L,r12
b.d __muldf3
add_s DBL1H,DBL1H,r6
.Lret0: xor_s DBL0H,DBL0H,DBL1H
bclr DBL1H,DBL0H,31
xor_s DBL0H,DBL0H,DBL1H
j_s.d [blink]
mov_s DBL0L,0
.balign 4
.Linf_nan:
bclr r12,DBL1H,31
xor_s DBL1H,DBL1H,DBL0H
bclr_s DBL0H,DBL0H,31
max r8,DBL0H,r12 ; either NaN -> NaN ; otherwise inf
or.f 0,DBL0H,DBL0L
mov_s DBL0L,0
or.ne.f DBL1L,DBL1L,r12
not_s DBL0H,DBL0L ; inf * 0 -> NaN
mov.ne DBL0H,r8
tst_s DBL1H,DBL1H
j_s.d [blink]
bset.mi DBL0H,DBL0H,31
/* We have checked for infinitey / NaN input before, and transformed
denormalized inputs into normalized inputs. Thus, the worst case
exponent overflows are:
1 + 1 - 0x400 == 0xc02 : maximum underflow
0x7fe + 0x7fe - 0x3ff == 0xbfd ; maximum overflow
N.B. 0x7e and 0x7f are also values for overflow.
If (r12 <= -54), we have an underflow to zero. */
.balign 4
.Linf_denorm:
lsr r6,r12,28
brlo.d r6,0xc,.Linf
asr r6,r12,20
add.f r10,r10,r6
brgt.d r10,0,.Lshift_frac
mov_s r12,0
beq.d .Lround_frac
add r10,r10,32
.Lshift32_frac:
tst r4,r4
mov r4,acc2
bset.ne r4,r4,1
mululw 0,r7,1
brge.d r10,1,.Lshift_frac
mov r7,0
breq.d r10,0,.Lround_frac
add r10,r10,32
brgt r10,21,.Lshift32_frac
b_s .Lret0
.Lround_frac:
add.f 0,r4,r4
btst.eq acc2,0
mov_s DBL0L,acc2
mov_s DBL0H,r7
adc.eq.f DBL0L,DBL0L,0
j_s.d [blink]
adc.eq DBL0H,DBL0H,0
.Linf: mov_s DBL0L,0
xor.f DBL1H,DBL1H,DBL0H
mov_s DBL0H,r9
j_s.d [blink]
bset.mi DBL0H,DBL0H,31
ENDFUNC(__muldf3)
.balign 4
.L7ff00000:
.long 0x7ff00000
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "../arc-ieee-754.h"
#if 0 /* DEBUG */
.global __mulsf3
FUNC(__mulsf3)
.balign 4
__mulsf3:
push_s blink
push_s r1
bl.d __mulsf3_c
push_s r0
ld_s r1,[sp,4]
st_s r0,[sp,4]
bl.d __mulsf3_asm
pop_s r0
pop_s r1
pop_s blink
cmp r0,r1
jeq_s [blink]
and r12,r0,r1
bic.f 0,0x7f800000,r12
bne 0f
bmsk.f 0,r0,22
bmsk.ne.f r1,r1,22
jne_s [blink] ; both NaN -> OK
0: bl abort
ENDFUNC(__mulsf3)
#define __mulsf3 __mulsf3_asm
#endif /* DEBUG */
.balign 4
.global __mulsf3
FUNC(__mulsf3)
__mulsf3:
ld.as r9,[pcl,80]; [pcl,((.L7f800000-.+2)/4)]
bmsk r4,r1,22
bset r2,r0,23
asl_s r2,r2,8
bset r3,r4,23
and r11,r0,r9
breq.d r11,0,.Ldenorm_dbl0
and r12,r1,r9
breq.d r12,0,.Ldenorm_dbl1
xor_s r0,r0,r1
mululw 0,r2,r3
machulw r6,r2,r3
breq.d r11,r9,.Linf_nan_dbl0
ld.as r4,[pcl,69]; [pcl,((.L7fffffff-.+2)/4)]
breq.d r12,r9,.Linf_nan_dbl1
.Lpast_denorm:
asl.f 0,r6,8
mov r7,acc2
add.pl r6,r6,r6
bclr.pl r6,r6,23
add.pl.f r7,r7,r7
add.cs r6,r6,1
lsr.f 0,r6,1
add_s r12,r12,r11
adc.f 0,r7,r4
add_s r12,r12, \
-0x3f800000
adc.f r8,r6,r12
tst.pl r8,r9
bic r0,r0,r4
min r3,r8,r9
jpnz.d [blink]
add.pnz r0,r0,r3
; infinity or denormal number
add.ne.f r3,r3,r3
asr_s r3,r3,23+1
bset r6,r6,23
bpnz.d .Linfinity
sub_s r3,r3,1
neg_s r2,r3
brhi.d r2,24,.Lret_r0 ; right shift shift > 24 -> return +-0
lsr r2,r6,r2
asl r9,r6,r3
lsr.f 0,r2,1
tst r7,r7
add_s r0,r0,r2
bset.ne r9,r9,0
adc.f 0,r9,r4
j_s.d [blink]
add.cs r0,r0,1
.Linfinity:
j_s.d [blink]
add_s r0,r0,r9
.Lret_r0: j_s [blink]
.balign 4
.Ldenorm_dbl0:
bclr_s r2,r2,31
norm.f r4,r2
add_s r2,r2,r2
asl r2,r2,r4
breq.d r12,r9,.Ldenorm_dbl0_inf_nan_dbl1
asl r4,r4,23
mululw 0,r2,r3
machulw r6,r2,r3
sub.ne.f r12,r12,r4
ld.as r4,[pcl,28]; [pcl,((.L7fffffff-.+2)/4)]
bhi.d .Lpast_denorm
xor_s r0,r0,r1
bmsk r1,r0,30
j_s.d [blink]
bic_s r0,r0,r1
.balign 4
.Ldenorm_dbl0_inf_nan_dbl1:
bmsk.f 0,r0,30
mov.eq r1,-1
.Linf_nan_dbl1:
xor_s r1,r1,r0
.Linf_nan_dbl0:
bclr_s r1,r1,31
j_s.d [blink]
xor_s r0,r0,r1
.balign 4
.Ldenorm_dbl1:
breq.d r11,r9,.Linf_nan_dbl0_2
norm.f r3,r4
sub_s r3,r3,7
asl r4,r4,r3
mululw 0,r2,r4
machulw r6,r2,r4
sub_s r3,r3,1
asl_s r3,r3,23
sub.ne.f r11,r11,r3
ld.as r4,[pcl,11]; [pcl,((.L7fffffff-.+2)/4)]
bhi.d .Lpast_denorm
bmsk r8,r0,30
j_s.d [blink]
bic r0,r0,r8
.balign 4
.Linf_nan_dbl0_2:
bclr_s r1,r1,31
xor_s r0,r0,r1
sub.eq r1,r1,1 ; inf/nan * 0 -> nan
bic.f 0,r9,r1
j_s.d [blink]
or.eq r0,r0,r1 ; r1 nan -> result nan
.balign 4
.L7f800000:
.long 0x7f800000
.L7fffffff:
.long 0x7fffffff
ENDFUNC(__mulsf3)
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/*
to calculate a := b/x as b*y, with y := 1/x:
- x is in the range [1..2)
- calculate 15..18 bit inverse y0 using a table of approximating polynoms.
Precision is higher for polynoms used to evaluate input with larger
value.
- Do one newton-raphson iteration step to double the precision,
then multiply this with the divisor
-> more time to decide if dividend is subnormal
- the worst error propagation is on the side of the value range
with the least initial defect, thus giving us about 30 bits precision.
The truncation error for the either is less than 1 + x/2 ulp.
A 31 bit inverse can be simply calculated by using x with implicit 1
and chaining the multiplies. For a 32 bit inverse, we multiply y0^2
with the bare fraction part of x, then add in y0^2 for the implicit
1 of x.
- If calculating a 31 bit inverse, the systematic error is less than
-1 ulp; likewise, for 32 bit, it is less than -2 ulp.
- If we calculate our seed with a 32 bit fraction, we can archive a
tentative result strictly better than -2 / +2.5 (1) ulp/128, i.e. we
only need to take the step to calculate the 2nd stage rest and
rounding adjust 1/32th of the time. However, if we use a 20 bit
fraction for the seed, the negative error can exceed -2 ulp/128, (2)
thus for a simple add / tst check, we need to do the 2nd stage
rest calculation/ rounding adjust 1/16th of the time.
(1): The inexactness of the 32 bit inverse contributes an error in the
range of (-1 .. +(1+x/2) ) ulp/128. Leaving out the low word of the
rest contributes an error < +1/x ulp/128 . In the interval [1,2),
x/2 + 1/x <= 1.5 .
(2): Unless proven otherwise. I have not actually looked for an
example where -2 ulp/128 is exceeded, and my calculations indicate
that the excess, if existent, is less than -1/512 ulp.
??? The algorithm is still based on the ARC700 optimized code.
Maybe we could make better use of 64 bit multiply results and/or mmed .
*/
#include "../arc-ieee-754.h"
/* N.B. fp-bit.c does double rounding on denormal numbers. */
#if 0 /* DEBUG */
.global __divdf3
FUNC(__divdf3)
.balign 4
__divdf3:
push_s blink
push_s r2
push_s r3
push_s r0
bl.d __divdf3_c
push_s r1
ld_s r2,[sp,12]
ld_s r3,[sp,8]
st_s r0,[sp,12]
st_s r1,[sp,8]
pop_s r1
bl.d __divdf3_asm
pop_s r0
pop_s r3
pop_s r2
pop_s blink
cmp r0,r2
cmp.eq r1,r3
jeq_s [blink]
and r12,DBL0H,DBL1H
bic.f 0,0x7ff80000,r12 ; both NaN -> OK
jeq_s [blink]
bl abort
ENDFUNC(__divdf3)
#define __divdf3 __divdf3_asm
#endif /* DEBUG */
FUNC(__divdf3)
.balign 4
.L7ff00000:
.long 0x7ff00000
.Ldivtab:
.long 0xfc0fffe1
.long 0xf46ffdfb
.long 0xed1ffa54
.long 0xe61ff515
.long 0xdf7fee75
.long 0xd91fe680
.long 0xd2ffdd52
.long 0xcd1fd30c
.long 0xc77fc7cd
.long 0xc21fbbb6
.long 0xbcefaec0
.long 0xb7efa100
.long 0xb32f92bf
.long 0xae8f83b7
.long 0xaa2f7467
.long 0xa5ef6479
.long 0xa1cf53fa
.long 0x9ddf433e
.long 0x9a0f3216
.long 0x965f2091
.long 0x92df0f11
.long 0x8f6efd05
.long 0x8c1eeacc
.long 0x88eed876
.long 0x85dec615
.long 0x82eeb3b9
.long 0x800ea10b
.long 0x7d3e8e0f
.long 0x7a8e7b3f
.long 0x77ee6836
.long 0x756e5576
.long 0x72fe4293
.long 0x709e2f93
.long 0x6e4e1c7f
.long 0x6c0e095e
.long 0x69edf6c5
.long 0x67cde3a5
.long 0x65cdd125
.long 0x63cdbe25
.long 0x61ddab3f
.long 0x600d991f
.long 0x5e3d868c
.long 0x5c6d7384
.long 0x5abd615f
.long 0x590d4ecd
.long 0x576d3c83
.long 0x55dd2a89
.long 0x545d18e9
.long 0x52dd06e9
.long 0x516cf54e
.long 0x4ffce356
.long 0x4e9cd1ce
.long 0x4d3cbfec
.long 0x4becae86
.long 0x4aac9da4
.long 0x496c8c73
.long 0x483c7bd3
.long 0x470c6ae8
.long 0x45dc59af
.long 0x44bc4915
.long 0x43ac3924
.long 0x428c27fb
.long 0x418c187a
.long 0x407c07bd
__divdf3_support: /* This label makes debugger output saner. */
.balign 4
.Ldenorm_dbl1:
brge r6, \
0x43500000,.Linf_NaN ; large number / denorm -> Inf
bmsk.f r12,DBL1H,19
mov.eq r12,DBL1L
mov.eq DBL1L,0
sub.eq r7,r7,32
norm.f r11,r12 ; flag for x/0 -> Inf check
beq_s .Linf_NaN
mov.mi r11,0
add.pl r11,r11,1
add_s r12,r12,r12
asl r8,r12,r11
rsub r12,r11,31
lsr r12,DBL1L,r12
tst_s DBL1H,DBL1H
or r8,r8,r12
lsr r4,r8,26
lsr DBL1H,r8,12
ld.as r4,[r10,r4]
bxor.mi DBL1H,DBL1H,31
sub r11,r11,11
asl DBL1L,DBL1L,r11
sub r11,r11,1
mulu64 r4,r8
sub r7,r7,r11
b.d .Lpast_denorm_dbl1
asl r7,r7,20
.balign 4
.Ldenorm_dbl0:
bmsk.f r12,DBL0H,19
; wb stall
mov.eq r12,DBL0L
sub.eq r6,r6,32
norm.f r11,r12 ; flag for 0/x -> 0 check
brge r7, \
0x43500000, .Lret0_2 ; denorm/large number -> 0
beq_s .Lret0_2
mov.mi r11,0
add.pl r11,r11,1
asl r12,r12,r11
sub r6,r6,r11
add.f 0,r6,31
lsr r10,DBL0L,r6
mov.mi r10,0
add r6,r6,11+32
neg.f r11,r6
asl DBL0L,DBL0L,r11
mov.pl DBL0L,0
sub r6,r6,32-1
b.d .Lpast_denorm_dbl0
asl r6,r6,20
.Linf_NaN:
tst_s DBL0L,DBL0L ; 0/0 -> NaN
xor_s DBL1H,DBL1H,DBL0H
bclr.eq.f DBL0H,DBL0H,31
bmsk DBL0H,DBL1H,30
xor_s DBL0H,DBL0H,DBL1H
sub.eq DBL0H,DBL0H,1
mov_s DBL0L,0
j_s.d [blink]
or DBL0H,DBL0H,r9
.balign 4
.Lret0_2:
xor_s DBL1H,DBL1H,DBL0H
mov_s DBL0L,0
bmsk DBL0H,DBL1H,30
j_s.d [blink]
xor_s DBL0H,DBL0H,DBL1H
.balign 4
.global __divdf3
/* N.B. the spacing between divtab and the sub3 to get its address must
be a multiple of 8. */
__divdf3:
asl r8,DBL1H,12
lsr r4,r8,26
sub3 r10,pcl,61; (.-.Ldivtab) >> 3
ld.as r9,[pcl,-124]; [pcl,(-((.-.L7ff00000) >> 2))] ; 0x7ff00000
ld.as r4,[r10,r4]
lsr r12,DBL1L,20
and.f r7,DBL1H,r9
or r8,r8,r12
mulu64 r4,r8
beq.d .Ldenorm_dbl1
.Lpast_denorm_dbl1:
and.f r6,DBL0H,r9
breq.d r7,r9,.Linf_nan_dbl1
asl r4,r4,12
sub r4,r4,mhi
mulu64 r4,r4
beq.d .Ldenorm_dbl0
lsr r8,r8,1
breq.d r6,r9,.Linf_nan_dbl0
asl r12,DBL0H,11
lsr r10,DBL0L,21
.Lpast_denorm_dbl0:
bset r8,r8,31
mulu64 mhi,r8
add_s r12,r12,r10
bset r5,r12,31
cmp r5,r8
cmp.eq DBL0L,DBL1L
lsr.cc r5,r5,1
sub r4,r4,mhi ; u1.31 inverse, about 30 bit
mulu64 r5,r4 ; result fraction highpart
lsr r8,r8,2 ; u3.29
add r5,r6, /* wait for immediate */ \
0x3fe00000
mov r11,mhi ; result fraction highpart
mulu64 r11,r8 ; u-28.31
asl_s DBL1L,DBL1L,9 ; u-29.23:9
sbc r6,r5,r7
mov r12,mlo ; u-28.31
mulu64 r11,DBL1L ; mhi: u-28.23:9
add.cs DBL0L,DBL0L,DBL0L
asl_s DBL0L,DBL0L,6 ; u-26.25:7
asl r10,r11,23
sub_l DBL0L,DBL0L,r12
lsr r7,r11,9
sub r5,DBL0L,mhi ; rest msw ; u-26.31:0
mul64 r5,r4 ; mhi: result fraction lowpart
xor.f 0,DBL0H,DBL1H
and DBL0H,r6,r9
add_s DBL0H,DBL0H,r7
bclr r12,r9,20 ; 0x7fe00000
brhs.d r6,r12,.Linf_denorm
bxor.mi DBL0H,DBL0H,31
add.f r12,mhi,0x11
asr r9,r12,5
sub.mi DBL0H,DBL0H,1
add.f DBL0L,r9,r10
tst r12,0x1c
jne.d [blink]
add.cs DBL0H,DBL0H,1
/* work out exact rounding if we fall through here. */
/* We know that the exact result cannot be represented in double
precision. Find the mid-point between the two nearest
representable values, multiply with the divisor, and check if
the result is larger than the dividend. Since we want to know
only the sign bit, it is sufficient to calculate only the
highpart of the lower 64 bits. */
mulu64 r11,DBL1L ; rest before considering r12 in r5 : -mlo
sub.f DBL0L,DBL0L,1
asl r12,r9,2 ; u-22.30:2
sub.cs DBL0H,DBL0H,1
sub.f r12,r12,2
mov r10,mlo ; rest before considering r12 in r5 : -r10
mulu64 r12,DBL1L ; mhi: u-51.32
asl r5,r5,25 ; s-51.7:25
lsr r10,r10,7 ; u-51.30:2
mov r7,mhi ; u-51.32
mulu64 r12,r8 ; mlo: u-51.31:1
sub r5,r5,r10
add.mi r5,r5,DBL1L ; signed multiply adjust for r12*DBL1L
bset r7,r7,0 ; make sure that the result is not zero, and that
sub r5,r5,r7 ; a highpart zero appears negative
sub.f r5,r5,mlo ; rest msw
add.pl.f DBL0L,DBL0L,1
j_s.d [blink]
add.eq DBL0H,DBL0H,1
.Linf_nan_dbl1: ; 0/Inf -> NaN Inf/Inf -> NaN x/Inf-> 0 x/NaN -> NaN
or.f 0,r6,DBL0L
cmp.ne r6,r9
not_s DBL0L,DBL1H
sub_s.ne DBL0L,DBL0L,DBL0L
tst_s DBL0H,DBL0H
add_s DBL0H,DBL1H,DBL0L
j_s.d [blink]
bxor.mi DBL0H,DBL0H,31
.Linf_nan_dbl0:
tst_s DBL1H,DBL1H
j_s.d [blink]
bxor.mi DBL0H,DBL0H,31
.balign 4
.Linf_denorm:
lsr r12,r6,28
brlo.d r12,0xc,.Linf
.Ldenorm:
asr r6,r6,20
neg r9,r6
mov_s DBL0H,0
brhs.d r9,54,.Lret0
bxor.mi DBL0H,DBL0H,31
add r12,mhi,1
and r12,r12,-4
rsub r7,r6,5
asr r10,r12,28
bmsk r4,r12,27
min r7,r7,31
asr DBL0L,r4,r7
add DBL1H,r11,r10
abs.f r10,r4
sub.mi r10,r10,1
add.f r7,r6,32-5
asl r4,r4,r7
mov.mi r4,r10
add.f r10,r6,23
rsub r7,r6,9
lsr r7,DBL1H,r7
asl r10,DBL1H,r10
or.pnz DBL0H,DBL0H,r7
or.mi r4,r4,r10
mov.mi r10,r7
add.f DBL0L,r10,DBL0L
add.cs.f DBL0H,DBL0H,1 ; carry clear after this point
bxor.f 0,r4,31
add.pnz.f DBL0L,DBL0L,1
add.cs.f DBL0H,DBL0H,1
jne_s [blink]
/* Calculation so far was not conclusive; calculate further rest. */
mulu64 r11,DBL1L ; rest before considering r12 in r5 : -mlo
asr.f r12,r12,3
asl r5,r5,25 ; s-51.7:25
mov r11,mlo ; rest before considering r12 in r5 : -r11
mulu64 r12,r8 ; u-51.31:1
and r9,DBL0L,1 ; tie-breaker: round to even
lsr r11,r11,7 ; u-51.30:2
mov DBL1H,mlo ; u-51.31:1
mulu64 r12,DBL1L ; u-51.62:2
sub.mi r11,r11,DBL1L ; signed multiply adjust for r12*DBL1L
add_s DBL1H,DBL1H,r11
sub DBL1H,DBL1H,r5 ; -rest msw
add_s DBL1H,DBL1H,mhi ; -rest msw
add.f 0,DBL1H,DBL1H ; can't ror.f by 32 :-(
tst_s DBL1H,DBL1H
cmp.eq mlo,r9
add.cs.f DBL0L,DBL0L,1
j_s.d [blink]
add.cs DBL0H,DBL0H,1
.Lret0:
/* return +- 0 */
j_s.d [blink]
mov_s DBL0L,0
.Linf:
mov_s DBL0H,r9
mov_s DBL0L,0
j_s.d [blink]
bxor.mi DBL0H,DBL0H,31
ENDFUNC(__divdf3)
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/*
- calculate 15..18 bit inverse using a table of approximating polynoms.
precision is higher for polynoms used to evaluate input with larger
value.
- do one newton-raphson iteration step to double the precision,
then multiply this with the divisor
-> more time to decide if dividend is subnormal
- the worst error propagation is on the side of the value range
with the least initial defect, thus giving us about 30 bits precision.
*/
#include "../arc-ieee-754.h"
#if 0 /* DEBUG */
.global __divsf3
FUNC(__divsf3)
.balign 4
__divsf3:
push_s blink
push_s r1
bl.d __divsf3_c
push_s r0
ld_s r1,[sp,4]
st_s r0,[sp,4]
bl.d __divsf3_asm
pop_s r0
pop_s r1
pop_s blink
cmp r0,r1
#if 1
bne abort
jeq_s [blink]
b abort
#else
bne abort
j_s [blink]
#endif
ENDFUNC(__divsf3)
#define __divsf3 __divsf3_asm
#endif /* DEBUG */
FUNC(__divsf3)
.balign 4
.Ldivtab:
.long 0xfc0ffff0
.long 0xf46ffefd
.long 0xed1ffd2a
.long 0xe627fa8e
.long 0xdf7ff73b
.long 0xd917f33b
.long 0xd2f7eea3
.long 0xcd1fe986
.long 0xc77fe3e7
.long 0xc21fdddb
.long 0xbcefd760
.long 0xb7f7d08c
.long 0xb32fc960
.long 0xae97c1ea
.long 0xaa27ba26
.long 0xa5e7b22e
.long 0xa1cfa9fe
.long 0x9ddfa1a0
.long 0x9a0f990c
.long 0x9667905d
.long 0x92df878a
.long 0x8f6f7e84
.long 0x8c27757e
.long 0x88f76c54
.long 0x85df630c
.long 0x82e759c5
.long 0x8007506d
.long 0x7d3f470a
.long 0x7a8f3da2
.long 0x77ef341e
.long 0x756f2abe
.long 0x72f7212d
.long 0x709717ad
.long 0x6e4f0e44
.long 0x6c1704d6
.long 0x69e6fb44
.long 0x67cef1d7
.long 0x65c6e872
.long 0x63cedf18
.long 0x61e6d5cd
.long 0x6006cc6d
.long 0x5e36c323
.long 0x5c76b9f3
.long 0x5abeb0b7
.long 0x5916a79b
.long 0x57769e77
.long 0x55de954d
.long 0x54568c4e
.long 0x52d6834d
.long 0x51667a7f
.long 0x4ffe71b5
.long 0x4e9e68f1
.long 0x4d466035
.long 0x4bf65784
.long 0x4aae4ede
.long 0x496e4646
.long 0x48363dbd
.long 0x47063547
.long 0x45de2ce5
.long 0x44be2498
.long 0x43a61c64
.long 0x4296144a
.long 0x41860c0e
.long 0x407e03ee
.L7f800000:
.long 0x7f800000
.balign 4
.global __divsf3_support
__divsf3_support:
.Linf_NaN:
bclr.f 0,r0,31 ; 0/0 -> NaN
xor_s r0,r0,r1
bmsk r1,r0,30
bic_s r0,r0,r1
sub.eq r0,r0,1
j_s.d [blink]
or r0,r0,r9
.Lret0:
xor_s r0,r0,r1
bmsk r1,r0,30
j_s.d [blink]
bic_s r0,r0,r1
/* N.B. the spacing between divtab and the sub3 to get its address must
be a multiple of 8. */
__divsf3:
lsr r2,r1,17
sub3 r3,pcl,37 ; (.-.Ldivtab) >> 3
bmsk_s r2,r2,5
ld.as r5,[r3,r2]
asl r4,r1,9
ld.as r9,[pcl,-13]; [pcl,(-((.-.L7f800000) >> 2))] ; 0x7f800000
mulu64 r5,r4
and.f r11,r1,r9
asl r6,r1,8
bset r6,r6,31
beq.d .Ldenorm_fp1
asl r5,r5,13
breq.d r11,r9,.Linf_nan_fp1
and.f r2,r0,r9
sub r7,r5,mhi
mulu64 r7,r6
beq.d .Ldenorm_fp0
asl r12,r0,8
breq.d r2,r9,.Linf_nan_fp0
mulu64 mhi,r7
.Lpast_denorm_fp1:
bset r3,r12,31
.Lpast_denorm_fp0:
cmp_s r3,r6
lsr.cc r3,r3,1
add_s r2,r2, /* wait for immediate */ \
0x3f000000
sub r7,r7,mhi ; u1.31 inverse, about 30 bit
mulu64 r3,r7
sbc r2,r2,r11
xor.f 0,r0,r1
and r0,r2,r9
bclr r3,r9,23 ; 0x7f000000
brhs.d r2,r3,.Linf_denorm
bxor.mi r0,r0,31
.Lpast_denorm:
add r3,mhi,0x22 ; round to nearest or higher
tst r3,0x3c ; check if rounding was unsafe
lsr r3,r3,6
jne.d [blink] ; return if rounding was safe.
add_s r0,r0,r3
/* work out exact rounding if we fall through here. */
/* We know that the exact result cannot be represented in single
precision. Find the mid-point between the two nearest
representable values, multiply with the divisor, and check if
the result is larger than the dividend. */
add_s r3,r3,r3
sub_s r3,r3,1
mulu64 r3,r6
asr.f 0,r0,1 ; for round-to-even in case this is a denorm
rsub r2,r9,25
asl_s r12,r12,r2
sub.f 0,r12,mlo
j_s.d [blink]
sub.mi r0,r0,1
.Linf_nan_fp1:
lsr_s r0,r0,31
bmsk.f 0,r1,22
asl_s r0,r0,31
bne_s 0f ; inf/inf -> nan
brne r2,r9,.Lsigned0 ; x/inf -> 0, but x/nan -> nan
0: j_s.d [blink]
mov r0,-1
.Lsigned0:
.Linf_nan_fp0:
tst_s r1,r1
j_s.d [blink]
bxor.mi r0,r0,31
.balign 4
.global __divsf3
/* For denormal results, it is possible that an exact result needs
rounding, and thus the round-to-even rule has to come into play. */
.Linf_denorm:
brlo r2,0xc0000000,.Linf
.Ldenorm:
asr_s r2,r2,23
bic r0,r0,r9
neg r9,r2
brlo.d r9,25,.Lpast_denorm
lsr r3,mlo,r9
/* Fall through: return +- 0 */
j_s [blink]
.Linf:
j_s.d [blink]
or r0,r0,r9
.balign 4
.Ldenorm_fp1:
bclr r6,r6,31
norm.f r12,r6 ; flag for x/0 -> Inf check
add r6,r6,r6
rsub r5,r12,16
ror r5,r1,r5
asl r6,r6,r12
bmsk r5,r5,5
ld.as r5,[r3,r5]
add r4,r6,r6
; load latency
mulu64 r5,r4
bic.ne.f 0, \
0x60000000,r0 ; large number / denorm -> Inf
asl r5,r5,13
sub r7,r5,mhi
beq.d .Linf_NaN
mulu64 r7,r6
asl_s r12,r12,23
and.f r2,r0,r9
add_s r2,r2,r12
asl r12,r0,8
bne.d .Lpast_denorm_fp1
.Ldenorm_fp0: mulu64 mhi,r7
bclr r12,r12,31
norm.f r3,r12 ; flag for 0/x -> 0 check
bic.ne.f 0,0x60000000,r1 ; denorm/large number -> 0
beq_s .Lret0
asl_s r12,r12,r3
asl_s r3,r3,23
add_s r12,r12,r12
add r11,r11,r3
b.d .Lpast_denorm_fp0
mov_s r3,r12
ENDFUNC(__divsf3)
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "../arc-ieee-754.h"
#if 0 /* DEBUG */
.global __muldf3
.balign 4
__muldf3:
push_s blink
push_s r2
push_s r3
push_s r0
bl.d __muldf3_c
push_s r1
ld_s r2,[sp,12]
ld_s r3,[sp,8]
st_s r0,[sp,12]
st_s r1,[sp,8]
pop_s r1
bl.d __muldf3_asm
pop_s r0
pop_s r3
pop_s r2
pop_s blink
cmp r0,r2
cmp.eq r1,r3
jeq_s [blink]
and r12,DBL0H,DBL1H
bic.f 0,0x7ff80000,r12 ; both NaN -> OK
jeq_s [blink]
b abort
#define __muldf3 __muldf3_asm
#endif /* DEBUG */
__muldf3_support: /* This label makes debugger output saner. */
.balign 4
FUNC(__muldf3)
.Ldenorm_2:
breq.d DBL1L,0,.Lret0_2 ; 0 input -> 0 output
norm.f r12,DBL1L
mov.mi r12,21
add.pl r12,r12,22
neg r11,r12
asl_s r12,r12,20
lsr.f DBL1H,DBL1L,r11
ror DBL1L,DBL1L,r11
sub_s DBL0H,DBL0H,r12
mov.eq DBL1H,DBL1L
sub_l DBL1L,DBL1L,DBL1H
/* Fall through. */
.global __muldf3
.balign 4
__muldf3:
mulu64 DBL0L,DBL1L
ld.as r9,[pcl,0x68] ; ((.L7ff00000-.+2)/4)]
bmsk r6,DBL0H,19
bset r6,r6,20
and r11,DBL0H,r9
breq.d r11,0,.Ldenorm_dbl0
and r12,DBL1H,r9
breq.d r12,0,.Ldenorm_dbl1
mov r8,mlo
mov r4,mhi
mulu64 r6,DBL1L
breq.d r11,r9,.Linf_nan
bmsk r10,DBL1H,19
breq.d r12,r9,.Linf_nan
bset r10,r10,20
add.f r4,r4,mlo
adc r5,mhi,0
mulu64 r10,DBL0L
add_s r12,r12,r11 ; add exponents
add.f r4,r4,mlo
adc r5,r5,mhi
mulu64 r6,r10
tst r8,r8
bclr r8,r9,30 ; 0x3ff00000
bset.ne r4,r4,0 ; put least significant word into sticky bit
bclr r6,r9,20 ; 0x7fe00000
add.f r5,r5,mlo
adc r7,mhi,0 ; fraction product in r7:r5:r4
lsr.f r10,r7,9
rsub.eq r8,r8,r9 ; 0x40000000
sub r12,r12,r8 ; subtract bias + implicit 1
brhs.d r12,r6,.Linf_denorm
rsub r10,r10,12
.Lshift_frac:
neg r8,r10
asl r6,r4,r10
lsr DBL0L,r4,r8
add.f 0,r6,r6
btst.eq DBL0L,0
cmp.eq r4,r4 ; round to nearest / round to even
asl r4,r5,r10
lsr r5,r5,r8
adc.f DBL0L,DBL0L,r4
xor.f 0,DBL0H,DBL1H
asl r7,r7,r10
add_s r12,r12,r5
adc DBL0H,r12,r7
j_s.d [blink]
bset.mi DBL0H,DBL0H,31
/* N.B. This is optimized for ARC700.
ARC600 has very different scheduling / instruction selection criteria. */
/* If one number is denormal, subtract some from the exponent of the other
one (if the other exponent is too small, return 0), and normalize the
denormal. Then re-run the computation. */
.Lret0_2:
lsr_s DBL0H,DBL0H,31
asl_s DBL0H,DBL0H,31
j_s.d [blink]
mov_s DBL0L,0
.balign 4
.Ldenorm_dbl0:
mov_s r12,DBL0L
mov_s DBL0L,DBL1L
mov_s DBL1L,r12
mov_s r12,DBL0H
mov_s DBL0H,DBL1H
mov_s DBL1H,r12
and r11,DBL0H,r9
.Ldenorm_dbl1:
brhs r11,r9,.Linf_nan
brhs 0x3ca00001,r11,.Lret0
sub_s DBL0H,DBL0H,DBL1H
bmsk.f DBL1H,DBL1H,30
add_s DBL0H,DBL0H,DBL1H
beq.d .Ldenorm_2
norm r12,DBL1H
sub_s r12,r12,10
asl r5,r12,20
asl_s DBL1H,DBL1H,r12
sub DBL0H,DBL0H,r5
neg r5,r12
lsr r6,DBL1L,r5
asl_s DBL1L,DBL1L,r12
b.d __muldf3
add_s DBL1H,DBL1H,r6
.Lret0: xor_s DBL0H,DBL0H,DBL1H
bclr DBL1H,DBL0H,31
xor_s DBL0H,DBL0H,DBL1H
j_s.d [blink]
mov_s DBL0L,0
.balign 4
.Linf_nan:
bclr r12,DBL1H,31
xor_s DBL1H,DBL1H,DBL0H
bclr_s DBL0H,DBL0H,31
max r8,DBL0H,r12 ; either NaN -> NaN ; otherwise inf
or.f 0,DBL0H,DBL0L
mov_s DBL0L,0
or.ne.f DBL1L,DBL1L,r12
not_s DBL0H,DBL0L ; inf * 0 -> NaN
mov.ne DBL0H,r8
tst_s DBL1H,DBL1H
j_s.d [blink]
bset.mi DBL0H,DBL0H,31
/* We have checked for infinitey / NaN input before, and transformed
denormalized inputs into normalized inputs. Thus, the worst case
exponent overflows are:
1 + 1 - 0x400 == 0xc02 : maximum underflow
0x7fe + 0x7fe - 0x3ff == 0xbfd ; maximum overflow
N.B. 0x7e and 0x7f are also values for overflow.
If (r12 <= -54), we have an underflow to zero. */
.balign 4
.Linf_denorm:
lsr r6,r12,28
brlo.d r6,0xc,.Linf
asr r6,r12,20
add.f r10,r10,r6
brgt.d r10,0,.Lshift_frac
mov_s r12,0
beq.d .Lround_frac
add r10,r10,32
.Lshift32_frac:
tst r4,r4
mov r4,r5
bset.ne r4,r4,1
mov r5,r7
brge.d r10,1,.Lshift_frac
mov r7,0
breq.d r10,0,.Lround_frac
add r10,r10,32
brgt r10,21,.Lshift32_frac
b_s .Lret0
.Lround_frac:
add.f 0,r4,r4
btst.eq r5,0
mov_s DBL0L,r5
mov_s DBL0H,r7
adc.eq.f DBL0L,DBL0L,0
j_s.d [blink]
adc.eq DBL0H,DBL0H,0
.Linf: mov_s DBL0L,0
xor.f DBL1H,DBL1H,DBL0H
mov_s DBL0H,r9
j_s.d [blink]
bset.mi DBL0H,DBL0H,31
ENDFUNC(__muldf3)
.balign 4
.L7ff00000:
.long 0x7ff00000
/* Copyright (C) 2008-2013 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "../arc-ieee-754.h"
#if 0 /* DEBUG */
.global __mulsf3
FUNC(__mulsf3)
.balign 4
__mulsf3:
push_s blink
push_s r1
bl.d __mulsf3_c
push_s r0
ld_s r1,[sp,4]
st_s r0,[sp,4]
bl.d __mulsf3_asm
pop_s r0
pop_s r1
pop_s blink
cmp r0,r1
jeq_s [blink]
and r12,r0,r1
bic.f 0,0x7f800000,r12
bne 0f
bmsk.f 0,r0,22
bmsk.ne.f r1,r1,22
jne_s [blink] ; both NaN -> OK
0: bl abort
ENDFUNC(__mulsf3)
#define __mulsf3 __mulsf3_asm
#endif /* DEBUG */
.balign 4
.global __mulsf3
FUNC(__mulsf3)
__mulsf3:
ld.as r9,[pcl,80]; [pcl,((.L7f800000-.+2)/4)]
bmsk r4,r1,22
bset r2,r0,23
asl_s r2,r2,8
bset r3,r4,23
mulu64 r2,r3
and r11,r0,r9
breq.d r11,0,.Ldenorm_dbl0
and r12,r1,r9
breq.d r12,0,.Ldenorm_dbl1
xor_s r0,r0,r1
breq.d r11,r9,.Linf_nan_dbl0
ld.as r4,[pcl,70]; [pcl,((.L7fffffff-.+2)/4)]
breq.d r12,r9,.Linf_nan_dbl1
.Lpast_denorm:
asl.f 0,mhi,8
mov r6,mhi
mov r7,mlo
add.pl r6,r6,r6
bclr.pl r6,r6,23
add.pl.f r7,r7,r7
add.cs r6,r6,1
lsr.f 0,r6,1
add_s r12,r12,r11
adc.f 0,r7,r4
add_s r12,r12, \
-0x3f800000
adc.f r8,r6,r12
tst.pl r8,r9
bic r0,r0,r4
min r3,r8,r9
jpnz.d [blink]
add.pnz r0,r0,r3
; infinity or denormal number
add.ne.f r3,r3,r3
asr_s r3,r3,23+1
bset r6,r6,23
bpnz.d .Linfinity
sub_s r3,r3,1
neg_s r2,r3
brhi.d r2,24,.Lret_r0 ; right shift shift > 24 -> return +-0
lsr r2,r6,r2
asl r9,r6,r3
lsr.f 0,r2,1
tst r7,r7
add_s r0,r0,r2
bset.ne r9,r9,0
adc.f 0,r9,r4
j_s.d [blink]
add.cs r0,r0,1
.Linfinity:
j_s.d [blink]
add_s r0,r0,r9
.Lret_r0: j_s [blink]
.balign 4
.Ldenorm_dbl0:
bclr_s r2,r2,31
norm.f r4,r2
add_s r2,r2,r2
asl r2,r2,r4
mulu64 r2,r3
breq.d r12,r9,.Ldenorm_dbl0_inf_nan_dbl1
asl r4,r4,23
sub.ne.f r12,r12,r4
ld.as r4,[pcl,29]; [pcl,((.L7fffffff-.+2)/4)]
bhi.d .Lpast_denorm
xor_s r0,r0,r1
bmsk r1,r0,30
j_s.d [blink]
bic_s r0,r0,r1
.balign 4
.Ldenorm_dbl0_inf_nan_dbl1:
bmsk.f 0,r0,30
beq_s .Lretnan
xor_s r0,r0,r1
.Linf_nan_dbl1:
xor_s r1,r1,r0
.Linf_nan_dbl0:
bclr_s r1,r1,31
cmp_s r1,r9
jls.d [blink]
xor_s r0,r0,r1
; r1 NaN -> result NaN
.Lretnan:
j_s.d [blink]
mov r0,-1
.balign 4
.Ldenorm_dbl1:
breq.d r11,r9,.Linf_nan_dbl0_2
norm.f r3,r4
sub_s r3,r3,7
asl r4,r4,r3
mulu64 r2,r4
sub_s r3,r3,1
asl_s r3,r3,23
sub.ne.f r11,r11,r3
ld.as r4,[pcl,11]; [pcl,((.L7fffffff-.+2)/4)]
bhi.d .Lpast_denorm
bmsk r8,r0,30
j_s.d [blink]
bic r0,r0,r8
.balign 4
.Linf_nan_dbl0_2:
bclr_s r1,r1,31
xor_s r0,r0,r1
sub.eq r1,r1,1 ; inf/nan * 0 -> nan
bic.f 0,r9,r1
j_s.d [blink]
or.eq r0,r0,r1 ; r1 nan -> result nan
.balign 4
.L7f800000:
.long 0x7f800000
.L7fffffff:
.long 0x7fffffff
ENDFUNC(__mulsf3)
/* Copyright (C) 2008-2013 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "../arc-ieee-754.h"
#if 0 /* DEBUG */
.global __divsf3
FUNC(__divsf3)
.balign 4
__divsf3:
push_s blink
push_s r1
bl.d __divsf3_c
push_s r0
ld_s r1,[sp,4]
st_s r0,[sp,4]
bl.d __divsf3_asm
pop_s r0
pop_s r1
pop_s blink
cmp r0,r1
jeq_s [blink]
and r12,r0,r1
bic.f 0,0x7f800000,r12 ; both NaN -> OK
jeq_s [blink]
bl abort
ENDFUNC(__divsf3)
#define __divsf3 __divsf3_asm
#endif /* DEBUG */
.balign 4
__divdf3_support: /* This label makes debugger output saner. */
FUNC(__divsf3)
.Ldenorm_fp0:
norm.f r12,r2 ; flag for 0/x -> 0 check
bic.ne.f 0,0x60000000,r1 ; denorm/large number -> 0
beq_s .Lret0_NaN
tst r1,r9
add_s r2,r2,r2
sub_s r12,r12,8
asl_s r2,r2,r12
asl_l r12,r12,23
bne.d .Lpast_denorm_fp0
add r5,r5,r12
/* r0 is subnormal, r1 is subnormal or 0. */
.balign 4
.Ldenorm_fp1:
norm.f r12,r3 ; flag for x/0 -> Inf check
bic.ne.f 0,0x60000000,r0 ; large number/denorm -> Inf
beq_s .Linf
add_s r3,r3,r3
sub_s r12,r12,8
asl_s r3,r3,r12
asl_s r12,r12,23
b.d .Lpast_denorm_fp1
add r4,r4,r12
.Lret0_NaN:
bclr.f 0,r1,31 ; 0/0 -> NaN
bic r0,r10,r9
j_s.d [blink]
sub.eq r0,r0,1
.balign 4
.Linf_nan_fp0:
bic.f 0,r9,r1 ; fp1 Inf -> result NaN
bic r1,r5,r9 ; fp1 sign
sub.eq r1,r1,1
j_s.d [blink]
xor_s r0,r0,r1
.Linf_nan_fp1:
bic r0,r4,r9 ; fp0 sign
bmsk.f 0,r1,22 ; x/inf -> 0, x/nan -> nan
xor.eq r1,r1,r9
j_s.d [blink]
xor_s r0,r0,r1
.global __divsf3
.balign 4
.long 0x7f800000 ; exponent mask
__divsf3:
ld r9,[pcl,-4]
bmsk r2,r0,22
xor r4,r0,r2
bmsk r3,r1,22
xor r5,r1,r3
and r11,r0,r9
breq.d r11,0,.Ldenorm_fp0
xor r10,r4,r5
breq r11,r9,.Linf_nan_fp0
bset_s r2,r2,23
and r11,r1,r9
breq r11,0,.Ldenorm_fp1
breq r11,r9,.Linf_nan_fp1
.Lpast_denorm_fp0:
bset_s r3,r3,23
.Lpast_denorm_fp1:
cmp r2,r3
asl_s r2,r2,6+1
asl_s r3,r3,7
add.lo r2,r2,r2
bclr r8,r9,30 ; exponent bias
bclr.lo r8,r8,23 ; reduce exp by one if fraction is shifted
sub r4,r4,r5
add r4,r4,r8
xor.f 0,r10,r4
bmi .Linf_denorm
and.f r12,r4,r9
beq .Ldenorm
sub_s r2,r2,r3 ; discard implicit 1
rsub r3,r3,1 ; prime r3 for two-insn divide-step use
.Ldiv_23bit:
.rep 6
add1.f r2,r3,r2
sub.cc r2,r2,r3
.endr
breq r12,r9,.Linf
bmsk r0,r2,6
xor_s r2,r2,r0
.Ldiv_17bit:
.rep 7
add1.f r2,r3,r2
sub.cc r2,r2,r3
.endr
asl_s r0,r0,7
bmsk r1,r2,6
xor_s r2,r2,r1
or_s r0,r0,r1
.Ldiv_10bit:
.rep 7
add1.f r2,r3,r2
sub.cc r2,r2,r3
.endr
asl_s r0,r0,7
bmsk r1,r2,6
xor_s r2,r2,r1
or_s r0,r0,r1
.Ldiv_3bit:
.rep 3
add1.f r2,r3,r2
sub.cc r2,r2,r3
.endr
asl_s r0,r0,3
.Ldiv_0bit:
add1.f r1,r3,r2
sub.cc r1,r1,r3
bmsk_s r2,r2,2
tst r1,-0x7e ; 0xffffff82, test for rest or odd
bmsk_s r1,r1,0
add_s r0,r0,r2 ; assemble fraction
add_s r0,r0,r4 ; add in sign & exponent
j_s.d [blink]
add.ne r0,r0,r1 ; round to nearest / even
.balign 4
.Linf:
j_s.d [blink]
or r0,r10,r9
.Lret_r4:
j_s.d [blink]
mov_s r0,r4
.balign 4
.Linf_denorm:
add.f r12,r4,r4
asr_l r12,r12,24
bpl .Linf
max r12,r12,-24
.Ldenorm:
rsub r3,r3,1
add r1,pcl,68; .Ldenorm_tab-.
ldw.as r12,[r1,r12]
mov_s r0,0
lsr_s r2,r2
sub_s r1,r1,r12
j_s.d [r1]
bic r4,r10,r9
.short .Ldenorm_tab-.Lret_r4
.short .Ldenorm_tab-.Ldiv_0bit
.short .Ldenorm_tab-.Ldiv_3bit-2*8
.short .Ldenorm_tab-.Ldiv_3bit-1*8
.short .Ldenorm_tab-.Ldiv_3bit
.short .Ldenorm_tab-.Ldiv_10bit-6*8
.short .Ldenorm_tab-.Ldiv_10bit-5*8
.short .Ldenorm_tab-.Ldiv_10bit-3*8
.short .Ldenorm_tab-.Ldiv_10bit-3*8
.short .Ldenorm_tab-.Ldiv_10bit-2*8
.short .Ldenorm_tab-.Ldiv_10bit-1*8
.short .Ldenorm_tab-.Ldiv_10bit
.short .Ldenorm_tab-.Ldiv_17bit-6*8
.short .Ldenorm_tab-.Ldiv_17bit-5*8
.short .Ldenorm_tab-.Ldiv_17bit-4*8
.short .Ldenorm_tab-.Ldiv_17bit-3*8
.short .Ldenorm_tab-.Ldiv_17bit-2*8
.short .Ldenorm_tab-.Ldiv_17bit-1*8
.short .Ldenorm_tab-.Ldiv_17bit
.short .Ldenorm_tab-.Ldiv_23bit-5*8
.short .Ldenorm_tab-.Ldiv_23bit-4*8
.short .Ldenorm_tab-.Ldiv_23bit-3*8
.short .Ldenorm_tab-.Ldiv_23bit-2*8
.short .Ldenorm_tab-.Ldiv_23bit-1*8
.Ldenorm_tab:
.short .Ldenorm_tab-.Ldiv_23bit
ENDFUNC(__divsf3)
/* Copyright (C) 2008-2013 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "../arc-ieee-754.h"
#if 0 /* DEBUG */
.global __mulsf3
FUNC(__mulsf3)
.balign 4
__mulsf3:
push_s blink
push_s r1
bl.d __mulsf3_c
push_s r0
ld_s r1,[sp,4]
st_s r0,[sp,4]
bl.d __mulsf3_asm
pop_s r0
pop_s r1
pop_s blink
cmp r0,r1
jeq_s [blink]
and r12,r0,r1
bic.f 0,0x7f800000,r12
bne 0f
bmsk.f 0,r0,22
bmsk.ne.f r1,r1,22
jne_s [blink] ; both NaN -> OK
0: bl abort
ENDFUNC(__mulsf3)
#define __mulsf3 __mulsf3_asm
#endif /* DEBUG */
.balign 4
.global __mulsf3
FUNC(__mulsf3)
__mulsf3:
ld.as r9,[pcl,76]; [pcl,((.L7f800000-.+2)/4)]
bmsk r4,r1,22
bset r3,r4,23
bmsk r2,r0,22
and r11,r0,r9
breq.d r11,0,.Ldenorm_dbl0
and r12,r1,r9
xor_s r0,r0,r1
breq.d r11,r9,.Linf_nan_dbl0
bset_s r2,r2,23
breq r12,0,.Ldenorm_dbl1
breq r12,r9,.Linf_nan_dbl1
.Lpast_denorm:
mov r6,0
lsr.f r7,r2
; We could so this a bit faster here with a 32 bit shift register and
; inserting the r2 factor / retrieving the low result a byte at a time,
; but that'd increase code size.
mov lp_count,24
.balign 4
lp 0f
add.cs r6,r6,r3
lsr.f r6,r6
rrc.f r7,r7
0:
ld.as r4,[pcl,59]; [pcl,((.L7fffffff-.+2)/4)]
asl.f 0,r6,8
add.pl r6,r6,r6
bclr.pl r6,r6,23
add.pl.f r7,r7,r7
add.cs r6,r6,1
lsr.f 0,r6,1
add_s r12,r12,r11
adc.f 0,r7,r4
add_s r12,r12, \
-0x3f800000
adc.f r8,r6,r12
tst.pl r8,r9
bic r0,r0,r4
min r3,r8,r9
jpnz.d [blink]
add.pnz r0,r0,r3
; infinity or denormal number
add.ne.f r3,r3,r3
asr_s r3,r3,23+1
bset r6,r6,23
bpnz.d .Linfinity
sub_s r3,r3,1
neg_s r2,r3
brhi.d r2,24,.Lret_r0 ; right shift shift > 24 -> return +-0
lsr r2,r6,r2
asl r9,r6,r3
lsr.f 0,r2,1
tst r7,r7
add_s r0,r0,r2
bset.ne r9,r9,0
adc.f 0,r9,r4
j_s.d [blink]
add.cs r0,r0,1
.Linfinity:
j_s.d [blink]
add_s r0,r0,r9
.Lret_r0: j_s [blink]
.balign 4
.Ldenorm_dbl0:
asl_s r2,r2,8
norm.f r4,r2
lsr_s r2,r2,7
asl r2,r2,r4
breq.d r12,r9,.Ldenorm_dbl0_inf_nan_dbl1
asl r4,r4,23
sub.ne.f r12,r12,r4
bhi.d .Lpast_denorm
xor_s r0,r0,r1
bmsk r1,r0,30
j_s.d [blink]
bic_s r0,r0,r1
.balign 4
.Ldenorm_dbl0_inf_nan_dbl1:
bmsk.f 0,r0,30
beq_s .Lretnan
xor_s r0,r0,r1
.Linf_nan_dbl1:
xor_s r1,r1,r0
bclr_s r1,r1,31
j_s.d [blink]
xor_s r0,r0,r1
.Linf_nan_dbl0:
sub_s r2,r1,1 ; inf/nan * 0 -> nan; inf * nan -> nan (use |r2| >= inf)
bic.f 0,r9,r2
xor_s r0,r0,r1
bclr_s r1,r1,31
xor_s r0,r0,r1
jne_s [blink]
.Lretnan:
j_s.d [blink]
mov r0,-1
.balign 4
.Ldenorm_dbl1:
norm.f r3,r4
sub_s r3,r3,7
asl r4,r4,r3
sub_s r3,r3,1
asl_s r3,r3,23
sub.ne.f r11,r11,r3
bhi.d .Lpast_denorm
mov_s r3,r4
bmsk r3,r0,30
j_s.d [blink]
bic_s r0,r0,r3
.balign 4
.L7f800000:
.long 0x7f800000
.L7fffffff:
.long 0x7fffffff
ENDFUNC(__mulsf3)
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/*
to calculate a := b/x as b*y, with y := 1/x:
- x is in the range [1..2)
- calculate 15..18 bit inverse y0 using a table of approximating polynoms.
Precision is higher for polynoms used to evaluate input with larger
value.
- Do one newton-raphson iteration step to double the precision,
then multiply this with the divisor
-> more time to decide if dividend is subnormal
- the worst error propagation is on the side of the value range
with the least initial defect, thus giving us about 30 bits precision.
The truncation error for the either is less than 1 + x/2 ulp.
A 31 bit inverse can be simply calculated by using x with implicit 1
and chaining the multiplies. For a 32 bit inverse, we multiply y0^2
with the bare fraction part of x, then add in y0^2 for the implicit
1 of x.
- If calculating a 31 bit inverse, the systematic error is less than
-1 ulp; likewise, for 32 bit, it is less than -2 ulp.
- If we calculate our seed with a 32 bit fraction, we can archive a
tentative result strictly better than -2 / +2.5 (1) ulp/128, i.e. we
only need to take the step to calculate the 2nd stage rest and
rounding adjust 1/32th of the time. However, if we use a 20 bit
fraction for the seed, the negative error can exceed -2 ulp/128, (2)
thus for a simple add / tst check, we need to do the 2nd stage
rest calculation/ rounding adjust 1/16th of the time.
(1): The inexactness of the 32 bit inverse contributes an error in the
range of (-1 .. +(1+x/2) ) ulp/128. Leaving out the low word of the
rest contributes an error < +1/x ulp/128 . In the interval [1,2),
x/2 + 1/x <= 1.5 .
(2): Unless proven otherwise. I have not actually looked for an
example where -2 ulp/128 is exceeded, and my calculations indicate
that the excess, if existent, is less than -1/512 ulp.
*/
#include "arc-ieee-754.h"
/* N.B. fp-bit.c does double rounding on denormal numbers. */
#if 0 /* DEBUG */
.global __divdf3
FUNC(__divdf3)
.balign 4
__divdf3:
push_s blink
push_s r2
push_s r3
push_s r0
bl.d __divdf3_c
push_s r1
ld_s r2,[sp,12]
ld_s r3,[sp,8]
st_s r0,[sp,12]
st_s r1,[sp,8]
pop_s r1
bl.d __divdf3_asm
pop_s r0
pop_s r3
pop_s r2
pop_s blink
cmp r0,r2
cmp.eq r1,r3
jeq_s [blink]
and r12,DBL0H,DBL1H
bic.f 0,0x7ff80000,r12 ; both NaN -> OK
jeq_s [blink]
bl abort
ENDFUNC(__divdf3)
#define __divdf3 __divdf3_asm
#endif /* DEBUG */
FUNC(__divdf3)
__divdf3_support: /* This label makes debugger output saner. */
.balign 4
.Ldenorm_dbl1:
brge r6, \
0x43500000,.Linf_NaN ; large number / denorm -> Inf
bmsk.f r12,DBL1H,19
mov.eq r12,DBL1L
mov.eq DBL1L,0
sub.eq r7,r7,32
norm.f r11,r12 ; flag for x/0 -> Inf check
beq_s .Linf_NaN
mov.mi r11,0
add.pl r11,r11,1
add_s r12,r12,r12
asl r8,r12,r11
rsub r12,r11,31
lsr r12,DBL1L,r12
tst_s DBL1H,DBL1H
or r8,r8,r12
lsr r4,r8,26
lsr DBL1H,r8,12
ld.as r4,[r10,r4]
bxor.mi DBL1H,DBL1H,31
sub r11,r11,11
asl DBL1L,DBL1L,r11
sub r11,r11,1
mpyhu r5,r4,r8
sub r7,r7,r11
asl r4,r4,12
b.d .Lpast_denorm_dbl1
asl r7,r7,20
; wb stall
.balign 4
.Ldenorm_dbl0:
bmsk.f r12,DBL0H,19
; wb stall
mov.eq r12,DBL0L
sub.eq r6,r6,32
norm.f r11,r12 ; flag for 0/x -> 0 check
brge r7, \
0x43500000, .Lret0_NaN ; denorm/large number -> 0
beq_s .Lret0_NaN
mov.mi r11,0
add.pl r11,r11,1
asl r12,r12,r11
sub r6,r6,r11
add.f 0,r6,31
lsr r10,DBL0L,r6
mov.mi r10,0
add r6,r6,11+32
neg.f r11,r6
asl DBL0L,DBL0L,r11
mov.pl DBL0L,0
sub r6,r6,32-1
b.d .Lpast_denorm_dbl0
asl r6,r6,20
.Linf_NaN:
tst_s DBL0L,DBL0L ; 0/0 -> NaN
xor_s DBL1H,DBL1H,DBL0H
bclr.eq.f DBL0H,DBL0H,31
bmsk DBL0H,DBL1H,30
xor_s DBL0H,DBL0H,DBL1H
sub.eq DBL0H,DBL0H,1
mov_s DBL0L,0
j_s.d [blink]
or DBL0H,DBL0H,r9
.balign 4
.Lret0_NaN:
xor_s DBL1H,DBL1H,DBL0H
cmp_s r12,r9
mov_s DBL0L,0
bmsk DBL0H,DBL1H,30
xor_s DBL0H,DBL0H,DBL1H
j_s.d [blink]
sub.hi DBL0H,DBL0H,1
.Linf_nan_dbl1: ; Inf/Inf -> NaN x/Inf-> 0 x/NaN -> NaN
not_s DBL0L,DBL1H
cmp r6,r9
sub_s.ne DBL0L,DBL0L,DBL0L
tst_s DBL0H,DBL0H
add_s DBL0H,DBL1H,DBL0L
j_s.d [blink]
bxor.mi DBL0H,DBL0H,31
.Linf_nan_dbl0:
tst_s DBL1H,DBL1H
j_s.d [blink]
bxor.mi DBL0H,DBL0H,31
.balign 4
.global __divdf3
/* N.B. the spacing between divtab and the add3 to get its address must
be a multiple of 8. */
__divdf3:
asl r8,DBL1H,12
lsr r12,DBL1L,20
lsr r4,r8,26
add3 r10,pcl,59 ; (.Ldivtab-.) >> 3
ld.as r4,[r10,r4]
ld.as r9,[pcl,180]; [pcl,(-((.-.L7ff00000) >> 2))] ; 0x7ff00000
or r8,r8,r12
mpyhu r5,r4,r8
and.f r7,DBL1H,r9
asl r4,r4,12 ; having the asl here is a concession to the XMAC pipeline.
beq.d .Ldenorm_dbl1
and r6,DBL0H,r9
.Lpast_denorm_dbl1: ; wb stall
sub r4,r4,r5
mpyhu r5,r4,r4
breq.d r6,0,.Ldenorm_dbl0
lsr r8,r8,1
asl r12,DBL0H,11
lsr r10,DBL0L,21
.Lpast_denorm_dbl0: ; wb stall
bset r8,r8,31
mpyhu r11,r5,r8
add_s r12,r12,r10
bset r5,r12,31
cmp r5,r8
cmp.eq DBL0L,DBL1L
; wb stall
lsr.cc r5,r5,1
sub r4,r4,r11 ; u1.31 inverse, about 30 bit
mpyhu r11,r5,r4 ; result fraction highpart
breq r7,r9,.Linf_nan_dbl1
lsr r8,r8,2 ; u3.29
add r5,r6, /* wait for immediate / XMAC wb stall */ \
0x3fe00000
; wb stall (not for XMAC)
breq r6,r9,.Linf_nan_dbl0
mpyu r12,r11,r8 ; u-28.31
asl_s DBL1L,DBL1L,9 ; u-29.23:9
sbc r6,r5,r7
; resource conflict (not for XMAC)
mpyhu r5,r11,DBL1L ; u-28.23:9
add.cs DBL0L,DBL0L,DBL0L
asl_s DBL0L,DBL0L,6 ; u-26.25:7
asl r10,r11,23
sub_l DBL0L,DBL0L,r12
; wb stall (before 'and' for XMAC)
lsr r7,r11,9
sub r5,DBL0L,r5 ; rest msw ; u-26.31:0
mpyh r12,r5,r4 ; result fraction lowpart
xor.f 0,DBL0H,DBL1H
and DBL0H,r6,r9
add_s DBL0H,DBL0H,r7 ; (XMAC wb stall)
bxor.mi DBL0H,DBL0H,31
brhs r6, /* wb stall / wait for immediate */ \
0x7fe00000,.Linf_denorm
add.f r12,r12,0x11
asr r9,r12,5
sub.mi DBL0H,DBL0H,1
add.f DBL0L,r9,r10
tst r12,0x1c
jne.d [blink]
add.cs DBL0H,DBL0H,1
/* work out exact rounding if we fall through here. */
/* We know that the exact result cannot be represented in double
precision. Find the mid-point between the two nearest
representable values, multiply with the divisor, and check if
the result is larger than the dividend. Since we want to know
only the sign bit, it is sufficient to calculate only the
highpart of the lower 64 bits. */
sub.f DBL0L,DBL0L,1
asl r12,r9,2 ; u-22.30:2
mpyu r10,r11,DBL1L ; rest before considering r12 in r5 : -r10
sub.cs DBL0H,DBL0H,1
sub.f r12,r12,2
; resource conflict (not for XMAC)
mpyhu r7,r12,DBL1L ; u-51.32
asl r5,r5,25 ; s-51.7:25
lsr r10,r10,7 ; u-51.30:2
; resource conflict (not for XMAC)
; resource conflict (not for XMAC)
mpyu r9,r12,r8 ; u-51.31:1
sub r5,r5,r10
add.mi r5,r5,DBL1L ; signed multiply adjust for r12*DBL1L
bset r7,r7,0 ; make sure that the result is not zero, and that
; wb stall (one earlier for XMAC)
sub r5,r5,r7 ; a highpart zero appears negative
sub.f r5,r5,r9 ; rest msw
add.pl.f DBL0L,DBL0L,1
j_s.d [blink]
add.eq DBL0H,DBL0H,1
.balign 4
.Linf_denorm:
brlo r6,0xc0000000,.Linf
.Ldenorm:
asr r6,r6,20
neg r9,r6
mov_s DBL0H,0
brhs.d r9,54,.Lret0
bxor.mi DBL0H,DBL0H,31
add_l r12,r12,1
and r12,r12,-4
rsub r7,r6,5
asr r10,r12,28
bmsk r4,r12,27
asrs DBL0L,r4,r7
add DBL1H,r11,r10
add.f r7,r6,32-5
abss r10,r4
asl r4,r4,r7
mov.mi r4,r10
add.f r10,r6,23
rsub r7,r6,9
lsr r7,DBL1H,r7
asl r10,DBL1H,r10
or.pnz DBL0H,DBL0H,r7
or.mi r4,r4,r10
mov.mi r10,r7
add.f DBL0L,r10,DBL0L
add.cs.f DBL0H,DBL0H,1 ; carry clear after this point
bxor.f 0,r4,31
add.pnz.f DBL0L,DBL0L,1
add.cs.f DBL0H,DBL0H,1
jne_l [blink]
/* Calculation so far was not conclusive; calculate further rest. */
mpyu r11,r11,DBL1L ; rest before considering r12 in r5 : -r11
asr.f r12,r12,3
asl r5,r5,25 ; s-51.7:25
; resource conflict (not for XMAC)
mpyu DBL1H,r12,r8 ; u-51.31:1
and r9,DBL0L,1 ; tie-breaker: round to even
lsr r11,r11,7 ; u-51.30:2
; resource conflict (not for XMAC)
mpyhu r8,r12,DBL1L ; u-51.32
sub.mi r11,r11,DBL1L ; signed multiply adjust for r12*DBL1L
add_s DBL1H,DBL1H,r11
; resource conflict (not for XMAC)
; resource conflict (not for XMAC)
mpyu r12,r12,DBL1L ; u-83.30:2
sub DBL1H,DBL1H,r5 ; -rest msw
add_s DBL1H,DBL1H,r8 ; -rest msw
add.f 0,DBL1H,DBL1H ; can't ror.f by 32 :-(
; wb stall (XMAC: Before add.f)
tst_s DBL1H,DBL1H
cmp.eq r12,r9
add.cs.f DBL0L,DBL0L,1
j_s.d [blink]
add.cs DBL0H,DBL0H,1
.Lret0:
/* return +- 0 */
j_s.d [blink]
mov_s DBL0L,0
.Linf:
mov_s DBL0H,r9
mov_s DBL0L,0
j_s.d [blink]
bxor.mi DBL0H,DBL0H,31
.balign 4
.Ldivtab:
.long 0xfc0fffe1
.long 0xf46ffdfb
.long 0xed1ffa54
.long 0xe61ff515
.long 0xdf7fee75
.long 0xd91fe680
.long 0xd2ffdd52
.long 0xcd1fd30c
.long 0xc77fc7cd
.long 0xc21fbbb6
.long 0xbcefaec0
.long 0xb7efa100
.long 0xb32f92bf
.long 0xae8f83b7
.long 0xaa2f7467
.long 0xa5ef6479
.long 0xa1cf53fa
.long 0x9ddf433e
.long 0x9a0f3216
.long 0x965f2091
.long 0x92df0f11
.long 0x8f6efd05
.long 0x8c1eeacc
.long 0x88eed876
.long 0x85dec615
.long 0x82eeb3b9
.long 0x800ea10b
.long 0x7d3e8e0f
.long 0x7a8e7b3f
.long 0x77ee6836
.long 0x756e5576
.long 0x72fe4293
.long 0x709e2f93
.long 0x6e4e1c7f
.long 0x6c0e095e
.long 0x69edf6c5
.long 0x67cde3a5
.long 0x65cdd125
.long 0x63cdbe25
.long 0x61ddab3f
.long 0x600d991f
.long 0x5e3d868c
.long 0x5c6d7384
.long 0x5abd615f
.long 0x590d4ecd
.long 0x576d3c83
.long 0x55dd2a89
.long 0x545d18e9
.long 0x52dd06e9
.long 0x516cf54e
.long 0x4ffce356
.long 0x4e9cd1ce
.long 0x4d3cbfec
.long 0x4becae86
.long 0x4aac9da4
.long 0x496c8c73
.long 0x483c7bd3
.long 0x470c6ae8
.long 0x45dc59af
.long 0x44bc4915
.long 0x43ac3924
.long 0x428c27fb
.long 0x418c187a
.long 0x407c07bd
.L7ff00000:
.long 0x7ff00000
ENDFUNC(__divdf3)
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/*
- calculate 15..18 bit inverse using a table of approximating polynoms.
precision is higher for polynoms used to evaluate input with larger
value.
- do one newton-raphson iteration step to double the precision,
then multiply this with the divisor
-> more time to decide if dividend is subnormal
- the worst error propagation is on the side of the value range
with the least initial defect, thus giving us about 30 bits precision.
*/
#include "arc-ieee-754.h"
#if 0 /* DEBUG */
.global __divsf3
FUNC(__divsf3)
.balign 4
__divsf3:
push_s blink
push_s r1
bl.d __divsf3_c
push_s r0
ld_s r1,[sp,4]
st_s r0,[sp,4]
bl.d __divsf3_asm
pop_s r0
pop_s r1
pop_s blink
cmp r0,r1
#if 1
bne abort
jeq_s [blink]
b abort
#else
bne abort
j_s [blink]
#endif
ENDFUNC(__divsf3)
#define __divsf3 __divsf3_asm
#endif /* DEBUG */
FUNC(__divsf3)
.balign 4
.L7f800000:
.long 0x7f800000
.Ldivtab:
.long 0xfc0ffff0
.long 0xf46ffefd
.long 0xed1ffd2a
.long 0xe627fa8e
.long 0xdf7ff73b
.long 0xd917f33b
.long 0xd2f7eea3
.long 0xcd1fe986
.long 0xc77fe3e7
.long 0xc21fdddb
.long 0xbcefd760
.long 0xb7f7d08c
.long 0xb32fc960
.long 0xae97c1ea
.long 0xaa27ba26
.long 0xa5e7b22e
.long 0xa1cfa9fe
.long 0x9ddfa1a0
.long 0x9a0f990c
.long 0x9667905d
.long 0x92df878a
.long 0x8f6f7e84
.long 0x8c27757e
.long 0x88f76c54
.long 0x85df630c
.long 0x82e759c5
.long 0x8007506d
.long 0x7d3f470a
.long 0x7a8f3da2
.long 0x77ef341e
.long 0x756f2abe
.long 0x72f7212d
.long 0x709717ad
.long 0x6e4f0e44
.long 0x6c1704d6
.long 0x69e6fb44
.long 0x67cef1d7
.long 0x65c6e872
.long 0x63cedf18
.long 0x61e6d5cd
.long 0x6006cc6d
.long 0x5e36c323
.long 0x5c76b9f3
.long 0x5abeb0b7
.long 0x5916a79b
.long 0x57769e77
.long 0x55de954d
.long 0x54568c4e
.long 0x52d6834d
.long 0x51667a7f
.long 0x4ffe71b5
.long 0x4e9e68f1
.long 0x4d466035
.long 0x4bf65784
.long 0x4aae4ede
.long 0x496e4646
.long 0x48363dbd
.long 0x47063547
.long 0x45de2ce5
.long 0x44be2498
.long 0x43a61c64
.long 0x4296144a
.long 0x41860c0e
.long 0x407e03ee
__divsf3_support: /* This label makes debugger output saner. */
.Ldenorm_fp1:
bclr r6,r6,31
norm.f r12,r6 ; flag for x/0 -> Inf check
add r6,r6,r6
rsub r5,r12,16
ror r5,r1,r5
asl r6,r6,r12
bmsk r5,r5,5
ld.as r5,[r3,r5]
add r4,r6,r6
; load latency
mpyhu r7,r5,r4
bic.ne.f 0, \
0x60000000,r0 ; large number / denorm -> Inf
beq_s .Linf_NaN
asl r5,r5,13
; wb stall
; slow track
sub r7,r5,r7
mpyhu r8,r7,r6
asl_s r12,r12,23
and.f r2,r0,r9
add r2,r2,r12
asl r12,r0,8
; wb stall
bne.d .Lpast_denorm_fp1
.Ldenorm_fp0:
mpyhu r8,r8,r7
bclr r12,r12,31
norm.f r3,r12 ; flag for 0/x -> 0 check
bic.ne.f 0,0x60000000,r1 ; denorm/large number -> 0
beq_s .Lret0
asl_s r12,r12,r3
asl_s r3,r3,23
add_s r12,r12,r12
add r11,r11,r3
b.d .Lpast_denorm_fp0
mov_s r3,r12
.balign 4
.Linf_NaN:
bclr.f 0,r0,31 ; 0/0 -> NaN
xor_s r0,r0,r1
bmsk r1,r0,30
bic_s r0,r0,r1
sub.eq r0,r0,1
j_s.d [blink]
or r0,r0,r9
.Lret0:
xor_s r0,r0,r1
bmsk r1,r0,30
j_s.d [blink]
bic_s r0,r0,r1
.Linf_nan_fp1:
lsr_s r0,r0,31
bmsk.f 0,r1,22
asl_s r0,r0,31
bne_s 0f ; inf/inf -> nan
brne r2,r9,.Lsigned0 ; x/inf -> 0, but x/nan -> nan
0: j_s.d [blink]
mov r0,-1
.Lsigned0:
.Linf_nan_fp0:
tst_s r1,r1
j_s.d [blink]
bxor.mi r0,r0,31
.balign 4
.global __divsf3
/* N.B. the spacing between divtab and the sub3 to get its address must
be a multiple of 8. */
__divsf3:
lsr r2,r1,17
sub3 r3,pcl,55;(.-.Ldivtab) >> 3
bmsk_s r2,r2,5
ld.as r5,[r3,r2]
asl r4,r1,9
ld.as r9,[pcl,-114]; [pcl,(-((.-.L7f800000) >> 2))] ; 0x7f800000
mpyhu r7,r5,r4
asl r6,r1,8
and.f r11,r1,r9
bset r6,r6,31
asl r5,r5,13
; wb stall
beq .Ldenorm_fp1
sub r7,r5,r7
mpyhu r8,r7,r6
breq.d r11,r9,.Linf_nan_fp1
and.f r2,r0,r9
beq.d .Ldenorm_fp0
asl r12,r0,8
; wb stall
breq r2,r9,.Linf_nan_fp0
mpyhu r8,r8,r7
.Lpast_denorm_fp1:
bset r3,r12,31
.Lpast_denorm_fp0:
cmp_s r3,r6
lsr.cc r3,r3,1
add_s r2,r2, /* wait for immediate */ \
/* wb stall */ \
0x3f000000
sub r7,r7,r8 ; u1.31 inverse, about 30 bit
mpyhu r3,r3,r7
sbc r2,r2,r11
xor.f 0,r0,r1
and r0,r2,r9
bxor.mi r0,r0,31
brhs r2, /* wb stall / wait for immediate */ \
0x7f000000,.Linf_denorm
.Lpast_denorm:
add_s r3,r3,0x22 ; round to nearest or higher
tst r3,0x3c ; check if rounding was unsafe
lsr r3,r3,6
jne.d [blink] ; return if rounding was safe.
add_s r0,r0,r3
/* work out exact rounding if we fall through here. */
/* We know that the exact result cannot be represented in single
precision. Find the mid-point between the two nearest
representable values, multiply with the divisor, and check if
the result is larger than the dividend. */
add_s r3,r3,r3
sub_s r3,r3,1
mpyu r3,r3,r6
asr.f 0,r0,1 ; for round-to-even in case this is a denorm
rsub r2,r9,25
asl_s r12,r12,r2
; wb stall
; slow track
sub.f 0,r12,r3
j_s.d [blink]
sub.mi r0,r0,1
/* For denormal results, it is possible that an exact result needs
rounding, and thus the round-to-even rule has to come into play. */
.Linf_denorm:
brlo r2,0xc0000000,.Linf
.Ldenorm:
asr_s r2,r2,23
bic r0,r0,r9
neg r9,r2
brlo.d r9,25,.Lpast_denorm
lsr r3,r3,r9
/* Fall through: return +- 0 */
j_s [blink]
.Linf:
j_s.d [blink]
or r0,r0,r9
ENDFUNC(__divsf3)
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "arc-ieee-754.h"
#if 0 /* DEBUG */
.global __divsf3
FUNC(__divsf3)
.balign 4
__divsf3:
push_s blink
push_s r1
bl.d __divsf3_c
push_s r0
ld_s r1,[sp,4]
st_s r0,[sp,4]
bl.d __divsf3_asm
pop_s r0
pop_s r1
pop_s blink
cmp r0,r1
#if 1
bne abort
jeq_s [blink]
b abort
#else
bne abort
j_s [blink]
#endif
ENDFUNC(__divsf3)
#define __divsf3 __divsf3_asm
#endif /* DEBUG */
.balign 4
__divdf3_support: /* This label makes debugger output saner. */
FUNC(__divsf3)
.Ldenorm_fp0:
norm.f r12,r2 ; flag for 0/x -> 0 check
bic.ne.f 0,0x60000000,r1 ; denorm/large number -> 0
beq_s .Lret0_NaN
tst r1,r9
add_s r2,r2,r2
sub_s r12,r12,8
asl_s r2,r2,r12
asl_l r12,r12,23
bne.d .Lpast_denorm_fp0
add r5,r5,r12
/* r0 is subnormal, r1 is subnormal or 0. */
.balign 4
.Ldenorm_fp1:
norm.f r12,r3 ; flag for x/0 -> Inf check
bic.ne.f 0,0x60000000,r0 ; large number/denorm -> Inf
beq_s .Linf
add_s r3,r3,r3
sub_s r12,r12,8
asl_s r3,r3,r12
asl_s r12,r12,23
b.d .Lpast_denorm_fp1
add r4,r4,r12
.Lret0_NaN:
bclr.f 0,r1,31 ; 0/0 -> NaN
bic r0,r10,r9
j_s.d [blink]
sub.eq r0,r0,1
.global __divsf3
.balign 4
.long 0x7f800000 ; exponent mask
__divsf3:
ld r9,[pcl,-4]
bmsk r2,r0,22
xor r4,r0,r2
bmsk r3,r1,22
xor r5,r1,r3
and r11,r0,r9
breq.d r11,0,.Ldenorm_fp0
xor r10,r4,r5
breq r11,r9,.Linf_nan_fp0
bset_s r2,r2,23
and r11,r1,r9
breq r11,0,.Ldenorm_fp1
breq r11,r9,.Linf_nan_fp1
.Lpast_denorm_fp0:
bset_s r3,r3,23
.Lpast_denorm_fp1:
cmp r2,r3
asl_s r2,r2,6+1
asl_s r3,r3,7
add.lo r2,r2,r2
bclr r8,r9,30 ; exponent bias
bclr.lo r8,r8,23 ; reduce exp by one if fraction is shifted
sub r4,r4,r5
add r4,r4,r8
xor.f 0,r10,r4
bmi .Linf_denorm
and r12,r4,r9
breq r12,0,.Ldenorm
sub_s r2,r2,r3 ; discard implicit 1
.Ldiv_23bit:
.rep 6
divaw r2,r2,r3
.endr
breq r12,r9,.Linf
bmsk r0,r2,6
xor_s r2,r2,r0
.Ldiv_17bit:
.rep 7
divaw r2,r2,r3
.endr
asl_s r0,r0,7
bmsk r1,r2,6
xor_s r2,r2,r1
or_s r0,r0,r1
.Ldiv_10bit:
.rep 7
divaw r2,r2,r3
.endr
asl_s r0,r0,7
bmsk r1,r2,6
xor_s r2,r2,r1
or_s r0,r0,r1
.Ldiv_3bit:
.rep 3
divaw r2,r2,r3
.endr
asl_s r0,r0,3
.Ldiv_0bit:
divaw r1,r2,r3
bmsk_s r2,r2,2
tst r1,-0x7e ; 0xffffff82, test for rest or odd
bmsk_s r1,r1,0
add_s r0,r0,r2 ; assemble fraction
add_s r0,r0,r4 ; add in sign & exponent
j_s.d [blink]
add.ne r0,r0,r1 ; round to nearest / even
.balign 4
.Linf_nan_fp0:
bic.f 0,r9,r1 ; fp1 Inf -> result NaN
bic r1,r5,r9 ; fp1 sign
sub.eq r1,r1,1
j_s.d [blink]
xor_s r0,r0,r1
.Linf_nan_fp1:
bic r0,r4,r9 ; fp0 sign
bmsk.f 0,r1,22 ; x/inf -> 0, x/nan -> nan
xor.eq r1,r1,r9
j_s.d [blink]
xor_s r0,r0,r1
.Linf:
j_s.d [blink]
or r0,r10,r9
.Lret_r4:
j_s.d [blink]
mov_s r0,r4
.balign 4
.Linf_denorm:
add.f r12,r4,r4
asr_l r12,r12,24
bpl .Linf
max r12,r12,-24
.Ldenorm:
add r1,pcl,42; .Ldenorm_tab-.
ldb_s r12,[r12,r1]
mov_s r0,0
lsr_s r2,r2
sub_s r1,r1,r12
j_s.d [r1]
bic r4,r10,r9
.byte .Ldenorm_tab-.Lret_r4
.byte .Ldenorm_tab-.Ldiv_0bit
.byte .Ldenorm_tab-.Ldiv_3bit-8
.byte .Ldenorm_tab-.Ldiv_3bit-4
.byte .Ldenorm_tab-.Ldiv_3bit
.byte .Ldenorm_tab-.Ldiv_10bit-24
.byte .Ldenorm_tab-.Ldiv_10bit-20
.byte .Ldenorm_tab-.Ldiv_10bit-16
.byte .Ldenorm_tab-.Ldiv_10bit-12
.byte .Ldenorm_tab-.Ldiv_10bit-8
.byte .Ldenorm_tab-.Ldiv_10bit-4
.byte .Ldenorm_tab-.Ldiv_10bit
.byte .Ldenorm_tab-.Ldiv_17bit-24
.byte .Ldenorm_tab-.Ldiv_17bit-20
.byte .Ldenorm_tab-.Ldiv_17bit-16
.byte .Ldenorm_tab-.Ldiv_17bit-12
.byte .Ldenorm_tab-.Ldiv_17bit-8
.byte .Ldenorm_tab-.Ldiv_17bit-4
.byte .Ldenorm_tab-.Ldiv_17bit
.byte .Ldenorm_tab-.Ldiv_23bit-20
.byte .Ldenorm_tab-.Ldiv_23bit-16
.byte .Ldenorm_tab-.Ldiv_23bit-12
.byte .Ldenorm_tab-.Ldiv_23bit-8
.byte .Ldenorm_tab-.Ldiv_23bit-4
.Ldenorm_tab:
.byte .Ldenorm_tab-.Ldiv_23bit
ENDFUNC(__divsf3)
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* We use a polynom similar to a Tchebycheff polynom to get an initial
seed, and then use a newton-raphson iteration step to get an
approximate result
If this result can't be rounded to the exact result with confidence, we
round to the value between the two closest representable values, and
test if the correctly rounded value is above or below this value.
Because of the Newton-raphson iteration step, an error in the seed at X
is amplified by X. Therefore, we don't want a Tchebycheff polynom
or a polynom that is close to optimal according to the maximum norm
on the errro of the seed value; we want one that is close to optimal
according to the maximum norm on the error of the result, i.e. we
want the maxima of the polynom to increase linearily.
Given an interval [X0,X2) over which to approximate,
with X1 := (X0+X2)/2, D := X1-X0, F := 1/D, and S := D/X1 we have,
like for Tchebycheff polynoms:
P(0) := 1
but then we have:
P(1) := X + S*D
P(2) := 2 * X^2 + S*D * X - D^2
Then again:
P(n+1) := 2 * X * P(n) - D^2 * P (n-1)
*/
static long double merr = 42.;
double
err (long double a0, long double a1, long double x)
{
long double y0 = a0 + (x-1)*a1;
long double approx = 2. * y0 - y0 * x * y0;
long double true = 1./x;
long double err = approx - true;
if (err <= -1./65536./16384.)
printf ("ERROR EXCEEDS 1 ULP %.15f %.15f %.15f\n",
(double)x, (double)approx, (double)true);
if (merr > err)
merr = err;
return err;
}
int
main (void)
{
long double T[5]; /* Taylor polynom */
long double P[5][5];
int i, j;
long double X0, X1, X2, S;
long double inc = 1./64;
long double D = inc*0.5;
long i0, i1, i2, io;
memset (P, 0, sizeof (P));
P[0][0] = 1.;
for (i = 1; i < 5; i++)
P[i][i] = 1 << i-1;
P[2][0] = -D*D;
for (X0 = 1.; X0 < 2.; X0 += inc)
{
X1 = X0 + inc * 0.5;
X2 = X0 + inc;
S = D / X1;
T[0] = 1./X1;
for (i = 1; i < 5; i++)
T[i] = T[i-1] * -T[0];
#if 0
printf ("T %1.8f %f %f %f %f\n", (double)T[0], (double)T[1], (double)T[2],
(double)T[3], (double)T[4]);
#endif
P[1][0] = S*D;
P[2][1] = S*D;
for (i = 3; i < 5; i++)
{
P[i][0] = -D*D*P[i-2][0];
for (j = 1; j < i; j++)
P[i][j] = 2*P[i-1][j-1]-D*D*P[i-2][j];
}
#if 0
printf ("P3 %1.8f %f %f %f %f\n", (double)P[3][0], (double)P[3][1], (double)P[3][2],
(double)P[3][3], (double)P[3][4]);
printf ("P4 %1.8f %f %f %f %f\n", (double)P[4][0], (double)P[4][1], (double)P[4][2],
(double)P[4][3], (double)P[4][4]);
#endif
for (i = 4; i > 1; i--)
{
long double a = T[i]/P[i][i];
for (j = 0; j < i; j++)
T[j] -= a * P[i][j];
}
#if 0
printf ("A %1.8f %f %f\n", (double)T[0], (double)T[1], (double)T[2]);
#endif
#if 0
i2 = T[2]*1024;
long double a = (T[2]-i/1024.)/P[2][2];
for (j = 0; j < 2; j++)
T[j] -= a * P[2][j];
#else
i2 = 0;
#endif
long double T0, Ti1;
for (i = 0, i0 = 0; i < 4; i++)
{
i1 = T[1]*4096. + i0 / (long double)(1 << 20) - 0.5;
i1 = - (-i1 & 0x0fff);
Ti1 = ((unsigned)(-i1 << 20) | i0) /-(long double)(1LL<<32LL);
T0 = T[0] - (T[1]-Ti1)/P[1][1] * P[1][0] - (X1 - 1) * Ti1;
i0 = T0 * 1024 * 1024 + 0.5;
i0 &= 0xfffff;
}
#if 0
printf ("A %1.8f %f %f\n", (double)T[0], (double)T[1], (double)T[2]);
#endif
io = (unsigned)(-i1 << 20) | i0;
long double A1 = (unsigned)io/-65536./65536.;
long double A0 = (unsigned)(io << 12)/65536./65536.;
long double Xm0 = 1./sqrt (-A1);
long double Xm1 = 0.5+0.5*-A0/A1;
#if 0
printf ("%f %f %f %f\n", (double)A0, (double)A1, (double) Ti1, (double)X0);
printf ("%.12f %.12f %.12f\n",
err (A0, A1, X0), err (A0, A1, X1), err (A0, A1, X2));
printf ("%.12f %.12f\n", (double)Xm0, (double)Xm1);
printf ("%.12f %.12f\n", err (A0, A1, Xm0), err (A0, A1, Xm1));
#endif
printf ("\t.long 0x%x\n", io);
}
#if 0
printf ("maximum error: %.15f %x %f\n", (double)merr, (unsigned)(long long)(-merr * 65536 * 65536), (double)log(-merr)/log(2));
#endif
return 0;
}
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* We use a polynom similar to a Tchebycheff polynom to get an initial
seed, and then use a newton-raphson iteration step to get an
approximate result
If this result can't be rounded to the exact result with confidence, we
round to the value between the two closest representable values, and
test if the correctly rounded value is above or below this value.
Because of the Newton-raphson iteration step, an error in the seed at X
is amplified by X. Therefore, we don't want a Tchebycheff polynom
or a polynom that is close to optimal according to the maximum norm
on the errro of the seed value; we want one that is close to optimal
according to the maximum norm on the error of the result, i.e. we
want the maxima of the polynom to increase linearily.
Given an interval [X0,X2) over which to approximate,
with X1 := (X0+X2)/2, D := X1-X0, F := 1/D, and S := D/X1 we have,
like for Tchebycheff polynoms:
P(0) := 1
but then we have:
P(1) := X + S*D
P(2) := 2 * X^2 + S*D * X - D^2
Then again:
P(n+1) := 2 * X * P(n) - D^2 * P (n-1)
*/
int
main (void)
{
long double T[5]; /* Taylor polynom */
long double P[5][5];
int i, j;
long double X0, X1, X2, S;
long double inc = 1./64;
long double D = inc*0.5;
long i0, i1, i2;
memset (P, 0, sizeof (P));
P[0][0] = 1.;
for (i = 1; i < 5; i++)
P[i][i] = 1 << i-1;
P[2][0] = -D*D;
for (X0 = 1.; X0 < 2.; X0 += inc)
{
X1 = X0 + inc * 0.5;
X2 = X1 + inc;
S = D / X1;
T[0] = 1./X1;
for (i = 1; i < 5; i++)
T[i] = T[i-1] * -T[0];
#if 0
printf ("T %1.8f %f %f %f %f\n", (double)T[0], (double)T[1], (double)T[2],
(double)T[3], (double)T[4]);
#endif
P[1][0] = S*D;
P[2][1] = S*D;
for (i = 3; i < 5; i++)
{
P[i][0] = -D*D*P[i-2][0];
for (j = 1; j < i; j++)
P[i][j] = 2*P[i-1][j-1]-D*D*P[i-2][j];
}
#if 0
printf ("P3 %1.8f %f %f %f %f\n", (double)P[3][0], (double)P[3][1], (double)P[3][2],
(double)P[3][3], (double)P[3][4]);
printf ("P4 %1.8f %f %f %f %f\n", (double)P[4][0], (double)P[4][1], (double)P[4][2],
(double)P[4][3], (double)P[4][4]);
#endif
for (i = 4; i > 1; i--)
{
long double a = T[i]/P[i][i];
for (j = 0; j < i; j++)
T[j] -= a * P[i][j];
}
#if 0
printf ("A %1.8f %f %f\n", (double)T[0], (double)T[1], (double)T[2]);
#endif
#if 0
i2 = T[2]*512;
long double a = (T[2]-i/512.)/P[2][2];
for (j = 0; j < 2; j++)
T[j] -= a * P[2][j];
#else
i2 = 0;
#endif
for (i = 0, i0 = 0; i < 4; i++)
{
long double T0, Ti1;
i1 = T[1]*8192. + i0 / (long double)(1 << 19) - 0.5;
i1 = - (-i1 & 0x1fff);
Ti1 = ((unsigned)(-i1 << 19) | i0) /-(long double)(1LL<<32LL);
T0 = T[0] - (T[1]-Ti1)/P[1][1] * P[1][0] - (X1 - 1) * Ti1;
i0 = T0 * 512 * 1024 + 0.5;
i0 &= 0x7ffff;
}
#if 0
printf ("A %1.8f %f %f\n", (double)T[0], (double)T[1], (double)T[2]);
#endif
printf ("\t.long 0x%x\n", (-i1 << 19) | i0);
}
return 0;
}
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "arc-ieee-754.h"
/* inputs: DBL0, DBL1
output: z flag
clobber: r12, flags
For NaNs, bit 19.. bit 30 of the high word must be set. */
#if 0 /* DEBUG */
.global __eqdf2
.balign 4
FUNC(__eqdf2)
__eqdf2:
st.a r11,[sp,-4]` push_s blink` st.a r10,[sp,-4]` st.a r9,[sp,-4]
st.a r8,[sp,-4]` st.a r7,[sp,-4]` st.a r6,[sp,-4]` st.a r5,[sp,-4]
st.a r4,[sp,-4]` push_s r3` push_s r2` push_s r1`
bl.d __eqdf2_c` push_s r0
mov r11,r0` pop_s r0` pop_s r1` pop_s r2` pop_s r3
ld.ab r4,[sp,4]` ld.ab r5,[sp,4]` ld.ab r6,[sp,4]`
ld.ab r7,[sp,4]` ld.ab r8,[sp,4]` ld.ab r9,[sp,4]
bl.d __eqdf2_asm` ld.ab r10,[sp,4]
pop_s blink
breq.d r11,0,0f
ld.ab r11,[sp,4]
jne_s [blink]
bl abort
0: jeq_s [blink]
bl abort
ENDFUNC(__eqdf2)
#define __eqdf2 __eqdf2_asm
#endif /* DEBUG */
.global __eqdf2
.balign 4
HIDDEN_FUNC(__eqdf2)
/* Good performance as long as the difference in high word is
well predictable (as seen from the branch predictor). */
__eqdf2:
brne.d DBL0H,DBL1H,.Lhighdiff
bmsk r12,DBL0H,20
#ifdef DPFP_COMPAT
or.f 0,DBL0L,DBL1L
bset.ne r12,r12,21
#endif /* DPFP_COMPAT */
add1.f r12,r12,DBL0H /* set c iff NaN; also, clear z if NaN. */
j_s.d [blink]
cmp.cc DBL0L,DBL1L
.balign 4
.Lhighdiff:
or r12,DBL0H,DBL1H
or.f 0,DBL0L,DBL1L
j_s.d [blink]
bmsk.eq.f r12,r12,30
ENDFUNC(__eqdf2)
/* ??? could we do better by speeding up some 'common' case of inequality? */
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "arc-ieee-754.h"
/* inputs: r0, r1
output: z flag
clobber: r12, flags
For NaNs, bit 22 .. bit 30 must be set. */
#if 0 /* DEBUG */
.global __eqsf2
.balign 4
FUNC(__eqsf2)
__eqsf2:
st.a r11,[sp,-4]` push_s blink` st.a r10,[sp,-4]` st.a r9,[sp,-4]
st.a r8,[sp,-4]` st.a r7,[sp,-4]` st.a r6,[sp,-4]` st.a r5,[sp,-4]
st.a r4,[sp,-4]` push_s r3` push_s r2` push_s r1`
bl.d __eqsf2_c` push_s r0
mov r11,r0` pop_s r0` pop_s r1` pop_s r2` pop_s r3
ld.ab r4,[sp,4]` ld.ab r5,[sp,4]` ld.ab r6,[sp,4]`
ld.ab r7,[sp,4]` ld.ab r8,[sp,4]` ld.ab r9,[sp,4]
bl.d __eqsf2_asm` ld.ab r10,[sp,4]
pop_s blink
breq.d r11,0,0f
ld.ab r11,[sp,4]
jne_s [blink]
bl abort
0: jeq_s [blink]
bl abort
ENDFUNC(__eqsf2)
#define __eqsf2 __eqsf2_asm
#endif /* DEBUG */
/* Good performance as long as the binary difference is
well predictable (as seen from the branch predictor). */
.global __eqsf2
.balign 4
HIDDEN_FUNC(__eqsf2)
__eqsf2:
breq r0, r1,.Lno_bdiff
or r12,r0,r1
j_s.d [blink]
bmsk.f 0,r12,30
.Lno_bdiff:
bmsk r12,r0,23
add1.f r12,r12,r0 /* set c iff NaN; also, clear z if NaN. */
j_s.d [blink]
cmp.cc r0,r1
ENDFUNC(__eqsf2)
/* Copyright (C) 2006, 2008-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "arc-ieee-754.h"
#if 0 /* DEBUG */
.global __extendsfdf2
.balign 4
FUNC(__extendsfdf2)
__extendsfdf2:
push_s blink
bl.d __extendsfdf2_c
push_s r0
ld_s r2,[sp]
st_s r1,[sp]
push_s r0
bl.d __extendsfdf2_asm
mov_s r0,r2
pop_s r2
pop_s r3
pop_s blink
cmp r0,r2
cmp.eq r1,r3
jeq_s [blink]
bl abort
ENDFUNC(__extendsfdf2)
#define __extendsfdf2 __extendsfdf2_asm
#endif /* DEBUG */
#if 0 /* ARC600 */
__extendsfdf2:
lsr r2,r0,23
tst r2,0xff
bic.ne.f r2,0xff
beq_s .Linf_nan_denorm_0
..
.Linf_nan_denorm:
bbit1 r0,30,.Linf_nan
#endif
.global __extendsfdf2
.balign 4
FUNC(__extendsfdf2)
__extendsfdf2:
add.f r1,r0,r0
norm r3,r1
#ifdef __LITTLE_ENDIAN__
lsr_s DBL0H,r1,4
brhs r3,7,.Linf_nan_denorm_0
asl_s DBL0L,r0,29
add_s DBL0H,DBL0H, \
0x38000000
#else
lsr r2,r1,4
brhs r3,7,.Linf_nan_denorm_0
asl_s DBL0L,r1,28
add DBL0H,r2, \
0x38000000
#endif
j_s.d [blink]
bxor.cs DBL0H,DBL0H,31
.balign 4
.Linf_nan_denorm_0:
#ifdef __LITTLE_ENDIAN__
mov_s DBL0H,r0
jeq.d [blink]
mov.eq DBL0L,0
#else
jeq_s [blink]
#endif
bmi .Linf_nan
asl_s r0,r0,r3
rsub r3,r3,0x380+6
#ifdef __LITTLE_ENDIAN__
asl_s r3,r3,20
lsr DBL0H,r0,9
asl_s DBL0L,r0,23
add_s DBL0H,DBL0H,r3
j_s.d [blink]
bxor.cs DBL0H,DBL0H,31
#else
asl DBL0L,r0,23
lsr_s DBL0H,r0,9
asl_s r3,r3,20
bxor.cs DBL0H,DBL0H,31
j_s.d [blink]
add_l DBL0H,DBL0H,r3
#endif
.Linf_nan:
#ifdef __LITTLE_ENDIAN__
lsr DBL0H,r0,3
or_s DBL0H,DBL0H,r0
j_s.d [blink]
mov_l DBL0L,0
#else
lsr r3,r0,3
mov_s DBL0L,0
j_s.d [blink]
or_l DBL0H,r0,r3
#endif
ENDFUNC(__extendsfdf2)
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "arc-ieee-754.h"
#if 0 /* DEBUG */
FUNC(__fixdfsi)
.global __fixdfsi
.balign 4
__fixdfsi:
push_s blink
push_s r0
bl.d __fixdfsi_c
push_s r1
mov_s r2,r0
pop_s r1
ld r0,[sp]
bl.d __fixdfsi_asm
st r2,[sp]
pop_s r1
pop_s blink
cmp r0,r1
jeq_s [blink]
bl abort
ENDFUNC(__fixdfsi)
#define __fixdfsi __fixdfsi_asm
#endif /* DEBUG */
/* If the fraction has to be shifted left by a positive non-zero amount,
we have to combine bits from DBL0L and DBL0H. If we shift right,
or shift by zero, we only want to have the bits from DBL0H in r0. */
.global __fixdfsi
FUNC(__fixdfsi)
.balign 4
__fixdfsi:
bbit0 DBL0H,30,.Lret0or1
asr r2,DBL0H,20
bmsk_s DBL0H,DBL0H,19
sub_s r2,r2,19; 0x3ff+20-0x400
neg_s r3,r2
asr.f 0,r3,11
bset_s DBL0H,DBL0H,20
#ifdef __LITTLE_ENDIAN__
mov.cs DBL0L,DBL0H
asl DBL0H,DBL0H,r2
#else
asl.cc DBL0H,DBL0H,r2
lsr.cs DBL0H,DBL0H,r3
#endif
lsr_s DBL0L,DBL0L,r3
add.cc r0,r0,r1
j_s.d [blink]
neg.pl r0,r0
.Lret0or1:
add.f r0,DBL0H,0x100000
lsr_s r0,r0,30
bmsk_s r0,r0,0
j_s.d [blink]
neg.mi r0,r0
ENDFUNC(__fixdfsi)
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "arc-ieee-754.h"
#if 0 /* DEBUG */
.global __fixsfsi
FUNC(__fixsfsi)
.balign 4
__fixsfsi:
push_s blink
bl.d __fixsfsi_c
push_s r0
ld_s r1,[sp]
st_s r0,[sp]
bl.d __fixsfsi_asm
mov_s r0,r1
pop_s r1
pop_s blink
cmp r0,r1
jeq_s [blink]
bl abort
ENDFUNC(__fixsfsi)
#define __fixsfsi __fixsfsi_asm
#endif /* DEBUG */
.global __fixsfsi
FUNC(__fixsfsi)
.balign 4
__fixsfsi:
bbit0 r0,30,.Lret0or1
lsr r2,r0,23
bmsk_s r0,r0,22
bset_s r0,r0,23
sub_s r2,r2,22;0x7f+23-0x80
asl.f 0,r2,24
neg r3,r2
asl.mi r0,r0,r2
lsr.pl r0,r0,r3
j_s.d [blink]
neg.cs r0,r0
.Lret0or1:
add.f r0,r0,0x800000
lsr_s r0,r0,30
bmsk_s r0,r0,0
j_s.d [blink]
neg.mi r0,r0
ENDFUNC(__fixsfsi)
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "arc-ieee-754.h"
#if 0 /* DEBUG */
FUNC(__fixunsdfsi)
.global __fixunsdfsi
.balign 4
__fixunsdfsi:
push_s blink
push_s r0
bl.d __fixunsdfsi_c
push_s r1
mov_s r2,r0
pop_s r1
ld r0,[sp]
bl.d __fixunsdfsi_asm
st r2,[sp]
pop_s r1
pop_s blink
cmp r0,r1
jeq_s [blink]
bl abort
ENDFUNC(__fixunsdfsi)
#define __fixunsdfsi __fixunsdfsi_asm
#endif /* DEBUG */
.global __fixunsdfsi
FUNC(__fixunsdfsi)
.balign 4
__fixunsdfsi:
bbit0 DBL0H,30,.Lret0or1
lsr r2,DBL0H,20
bmsk_s DBL0H,DBL0H,19
sub_s r2,r2,19; 0x3ff+20-0x400
neg_s r3,r2
btst_s r3,10
bset_s DBL0H,DBL0H,20
#ifdef __LITTLE_ENDIAN__
mov.ne DBL0L,DBL0H
asl DBL0H,DBL0H,r2
#else
asl.eq DBL0H,DBL0H,r2
lsr.ne DBL0H,DBL0H,r3
#endif
lsr DBL0L,DBL0L,r3
j_s.d [blink]
add.eq r0,r0,r1
.Lret0:
j_s.d [blink]
mov_l r0,0
.Lret0or1:
add_s DBL0H,DBL0H,0x100000
lsr_s DBL0H,DBL0H,30
j_s.d [blink]
bmsk_l r0,DBL0H,0
ENDFUNC(__fixunsdfsi)
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "arc-ieee-754.h"
#if 0 /* DEBUG */
.global __floatsidf
.balign 4
FUNC(__floatsidf)
__floatsidf:
push_s blink
bl.d __floatsidf_c
push_s r0
ld_s r2,[sp]
st_s r1,[sp]
push_s r0
bl.d __floatsidf_asm
mov_s r0,r2
pop_s r2
pop_s r3
pop_s blink
cmp r0,r2
cmp.eq r1,r3
jeq_s [blink]
bl abort
ENDFUNC(__floatsidf)
#define __floatsidf __floatsidf_asm
#endif /* DEBUG */
.global __floatsidf
.balign 4
FUNC(__floatsidf)
__floatsidf:
abs.f r1,r0
jeq_s [blink]
lsr r2,r1
mov r12,-0x41d ; -(0x3ff+31-1)
norm r2,r2
bclr.cs r12,r12,11
rsub.f r3,r2,11
add_s r12,r2,r12
add_s r2,r2,21
#ifdef __LITTLE_ENDIAN__
asl DBL0L,r1,r2
lsr_s DBL0H,r1,r3
#else
lsr DBL0H,r1,r3
asl_s DBL0L,r1,r2
#endif
asl_s r12,r12,20
mov.lo DBL0H,DBL0L
sub_s DBL0H,DBL0H,r12
j_s.d [blink]
mov.ls DBL0L,0
ENDFUNC(__floatsidf)
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "arc-ieee-754.h"
#if 0 /* DEBUG */
.global __floatsisf
FUNC(__floatsisf)
.balign 4
__floatsisf:
push_s blink
bl.d __floatsisf_c
push_s r0
ld_s r1,[sp]
st_s r0,[sp]
bl.d __floatsisf_asm
mov_s r0,r1
pop_s r1
pop_s blink
cmp r0,r1
jeq_s [blink]
bl abort
ENDFUNC(__floatsisf)
.global __floatunsisf
FUNC(__floatunsisf)
.balign 4
__floatunsisf:
push_s blink
bl.d __floatunsisf_c
push_s r0
ld_s r1,[sp]
st_s r0,[sp]
bl.d __floatunsisf_asm
mov_s r0,r1
pop_s r1
pop_s blink
cmp r0,r1
jeq_s [blink]
bl abort
ENDFUNC(__floatunsisf)
#define __floatsisf __floatsisf_asm
#define __floatunsisf __floatunsisf_asm
#endif /* DEBUG */
.global __floatunsisf
.global __floatsisf
FUNC(__floatsisf)
FUNC(__floatunsisf)
.balign 4
__floatunsisf:
lsr_s r2,r0
mov_l r12,0x9d ; 0x7f + 31 - 1
norm r2,r2
brne_l r0,0,0f
j_s [blink]
.balign 4
__floatsisf:
abs.f r0,r0
jeq_s [blink]
lsr_s r2,r0
mov_s r12,0x9d ; 0x7f + 31 - 1
norm r2,r2
bset.cs r12,r12,8
0: rsub.f r3,r2,8
bmsk r1,r0,r3
ror r1,r1,r3
lsr.pl r0,r0,r3
neg_s r3,r3
asl.mi r0,r0,r3
sub_s r12,r12,r2
asl_s r12,r12,23
bxor.pl.f r1,r1,31
add_s r0,r0,r12
j_s.d [blink]
add.pnz r0,r0,1
ENDFUNC(__floatunsisf)
ENDFUNC(__floatsisf)
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "arc-ieee-754.h"
#if 0 /* DEBUG */
.global __floatunsidf
.balign 4
FUNC(__floatunsidf)
__floatunsidf:
push_s blink
bl.d __floatunsidf_c
push_s r0
ld_s r2,[sp]
st_s r1,[sp]
push_s r0
bl.d __floatunsidf_asm
mov_s r0,r2
pop_s r2
pop_s r3
pop_s blink
cmp r0,r2
cmp.eq r1,r3
jeq_s [blink]
bl abort
ENDFUNC(__floatunsidf)
#define __floatunsidf __floatunsidf_asm
#endif /* DEBUG */
.global __floatunsidf
.balign 4
FUNC(__floatunsidf)
__floatunsidf:
lsr_s r1,r0
breq_s r0,0,.Lret0
norm r2,r1
mov r12,-0x41d ; -(0x3ff+31-1)
rsub.f r3,r2,11
add_s r12,r2,r12
add_s r2,r2,21
#ifdef __LITTLE_ENDIAN__
lsr DBL0H,r0,r3
asl_s DBL0L,r0,r2
#else
asl DBL0L,r0,r2
lsr_s DBL0H,r0,r3
#endif
asl_s r12,r12,20
mov.lo DBL0H,DBL0L
sub_s DBL0H,DBL0H,r12
.Lret0: j_s.d [blink]
mov.ls DBL0L,0
ENDFUNC(__floatunsidf)
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "arc-ieee-754.h"
/* inputs: DBL0, DBL1
output: c flags to be used for 'hs' condition
clobber: r12, flags */
/* For NaNs, bit 19.. bit 30 of the high word must be set. */
#if 0 /* DEBUG */
.global __gedf2
.balign 4
FUNC(__gedf2)
__gedf2:
st.a r11,[sp,-4]` push_s blink` st.a r10,[sp,-4]` st.a r9,[sp,-4]
st.a r8,[sp,-4]` st.a r7,[sp,-4]` st.a r6,[sp,-4]` st.a r5,[sp,-4]
st.a r4,[sp,-4]` push_s r3` push_s r2` push_s r1`
bl.d __gedf2_c` push_s r0
mov r11,r0` pop_s r0` pop_s r1` pop_s r2` pop_s r3
ld.ab r4,[sp,4]` ld.ab r5,[sp,4]` ld.ab r6,[sp,4]`
ld.ab r7,[sp,4]` ld.ab r8,[sp,4]` ld.ab r9,[sp,4]
bl.d __gedf2_asm` ld.ab r10,[sp,4]
pop_s blink
brge.d r11,0,0f
ld.ab r11,[sp,4]
jlo [blink]
bl abort
0: jhs [blink]
bl abort
ENDFUNC(__gedf2)
#define __gedf2 __gedf2_asm
#endif /* DEBUG */
.global __gedf2
.balign 4
HIDDEN_FUNC(__gedf2)
__gedf2:
or.f r12,DBL0H,DBL1H
bmi.d .Lneg
bmsk_s r12,r12,20
add1.f 0,r12,DBL0H ; clear z; set c iff NaN
add1.cc.f r12,r12,DBL1H ; clear z; set c iff NaN
bbit1 DBL0H,31,.Lneg
cmp.cc DBL0H,DBL1H
j_s.d [blink]
cmp.eq DBL0L,DBL1L
.balign 4
.Lneg: breq.d DBL1H,0,.L0
add1.f 0,r12,DBL0H
add1.cc.f r12,r12,DBL1H
cmp.cc DBL1H,DBL0H
j_s.d [blink]
cmp.eq DBL1L,DBL0L
.balign 4
.L0:
bxor.f 0,DBL0H,31 ; check for high word of -0.
beq_s .Lcheck_0
cmp.cc DBL1H,DBL0H
j_s.d [blink]
cmp.eq DBL1L,DBL0L
.Lcheck_0:
; high words suggest DBL0 may be -0, DBL1 +0; check low words.
cmp_s DBL1H,DBL0L
j_s.d [blink]
cmp.cc DBL1H,DBL1L
ENDFUNC(__gedf2)
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "arc-ieee-754.h"
/* inputs: r0, r1
output: c flag to be used for 'hs' condition
clobber: r12,flags */
/* For NaNs, bit 22.. bit 30 must be set. */
#if 0 /* DEBUG */
.global __gesf2
.balign 4
FUNC(__gesf2)
__gesf2:
st.a r11,[sp,-4]` push_s blink` st.a r10,[sp,-4]` st.a r9,[sp,-4]
st.a r8,[sp,-4]` st.a r7,[sp,-4]` st.a r6,[sp,-4]` st.a r5,[sp,-4]
st.a r4,[sp,-4]` push_s r3` push_s r2` push_s r1`
bl.d __gesf2_c` push_s r0
mov r11,r0` pop_s r0` pop_s r1` pop_s r2` pop_s r3
ld.ab r4,[sp,4]` ld.ab r5,[sp,4]` ld.ab r6,[sp,4]`
ld.ab r7,[sp,4]` ld.ab r8,[sp,4]` ld.ab r9,[sp,4]
bl.d __gesf2_asm` ld.ab r10,[sp,4]
pop_s blink
brge.d r11,0,0f
ld.ab r11,[sp,4]
jlo [blink]
bl abort
0: jhs [blink]
bl abort
ENDFUNC(__gesf2)
#define __gesf2 __gesf2_asm
#endif /* DEBUG */
.global __gesf2
.balign 4
HIDDEN_FUNC(__gesf2)
__gesf2:
or.f r12,r0,r1
bmi.d .Lneg
bmsk_s r12,r12,23
add1.f 0,r12,r0 ; check for NaN
add1.cc.f r12,r12,r1
j_s.d [blink]
cmp.cc r0,r1
.balign 4
.Lneg: breq.d r1,0,.L0
add1.f 0,r12,r0 ; check for NaN
add1.cc.f r12,r12,r1
j_s.d [blink]
cmp.cc r1,r0
.balign 4
.L0: bxor.f 0,r0,31 ; check for -0
j_s.d [blink]
cmp.hi r1,r0
ENDFUNC(__gesf2)
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "arc-ieee-754.h"
/* inputs: DBL0, DBL1
output: c,z flags to be used for 'hi' condition
clobber: r12, flags */
/* For NaNs, bit 19.. bit 30 of the high word must be set. */
#if 0 /* DEBUG */
.global __gtdf2
.balign 4
FUNC(__gtdf2)
__gtdf2:
st.a r11,[sp,-4]` push_s blink` st.a r10,[sp,-4]` st.a r9,[sp,-4]
st.a r8,[sp,-4]` st.a r7,[sp,-4]` st.a r6,[sp,-4]` st.a r5,[sp,-4]
st.a r4,[sp,-4]` push_s r3` push_s r2` push_s r1`
bl.d __gtdf2_c` push_s r0
mov r11,r0` pop_s r0` pop_s r1` pop_s r2` pop_s r3
ld.ab r4,[sp,4]` ld.ab r5,[sp,4]` ld.ab r6,[sp,4]`
ld.ab r7,[sp,4]` ld.ab r8,[sp,4]` ld.ab r9,[sp,4]
bl.d __gtdf2_asm` ld.ab r10,[sp,4]
pop_s blink
brgt.d r11,0,0f
ld.ab r11,[sp,4]
jls [blink]
bl abort
0: jhi [blink]
bl abort
ENDFUNC(__gtdf2)
#define __gtdf2 __gtdf2_asm
#endif /* DEBUG */
.global __gtdf2
.balign 4
HIDDEN_FUNC(__gtdf2)
__gtdf2:
or.f r12,DBL0H,DBL1H
bmi.d .Lneg
bmsk_s r12,r12,20
add1.f 0,r12,DBL0H ; clear z; set c iff NaN
add1.cc.f r12,r12,DBL1H ; clear z; set c iff NaN
; don't care: z may or may not be cleared if there is no NaN event
cmp.cc DBL0H,DBL1H
j_s.d [blink]
cmp.eq DBL0L,DBL1L
.balign 4
.Lneg: breq.d DBL0H,0,.L0
add1.f 0,r12,DBL1H
add1.cc.f r12,r12,DBL0H
cmp.cc DBL1H,DBL0H
j_s.d [blink]
cmp.eq DBL1L,DBL0L
.balign 4
.L0:
bxor.f 0,DBL1H,31
beq_s .Lcheck_0
cmp.cc DBL1H,DBL0H
j_s.d [blink]
cmp.eq DBL1L,DBL0L
.balign 4
.Lcheck_0:
; high words suggest DBL0 may be +0, DBL1 -0; check low words.
j_s.d [blink]
or.f 0,DBL0L,DBL1L
ENDFUNC(__gtdf2)
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "arc-ieee-754.h"
/* inputs: r0, r1
output: c, z flags to be used for 'hi' condition
clobber: r12,flags */
/* For NaNs, bit 22.. bit 30 must be set. */
#if 0 /* DEBUG */
.global __gtsf2
.balign 4
FUNC(__gtsf2)
__gtsf2:
st.a r11,[sp,-4]` push_s blink` st.a r10,[sp,-4]` st.a r9,[sp,-4]
st.a r8,[sp,-4]` st.a r7,[sp,-4]` st.a r6,[sp,-4]` st.a r5,[sp,-4]
st.a r4,[sp,-4]` push_s r3` push_s r2` push_s r1`
bl.d __gtsf2_c` push_s r0
mov r11,r0` pop_s r0` pop_s r1` pop_s r2` pop_s r3
ld.ab r4,[sp,4]` ld.ab r5,[sp,4]` ld.ab r6,[sp,4]`
ld.ab r7,[sp,4]` ld.ab r8,[sp,4]` ld.ab r9,[sp,4]
bl.d __gtsf2_asm` ld.ab r10,[sp,4]
pop_s blink
brgt.d r11,0,0f
ld.ab r11,[sp,4]
jls [blink]
bl abort
0: jhi [blink]
bl abort
ENDFUNC(__gtsf2)
#define __gtsf2 __gtsf2_asm
#endif /* DEBUG */
.global __gtsf2
.balign 4
HIDDEN_FUNC(__gtsf2)
__gtsf2:
or.f r12,r0,r1
bmi.d .Lneg
bmsk_s r12,r12,23
add1.f 0,r12,r0 ; check for NaN
add1.cc.f r12,r12,r1
j_s.d [blink]
cmp.cc r0,r1
.balign 4
.Lneg: breq.d r0,0,.L0
add1.f 0,r12,r0 ; check for NaN
add1.cc.f r12,r12,r1
j_s.d [blink]
cmp.cc r1,r0
.balign 4
.L0: bxor.f 0,r1,31 ; check for -0
j_s.d [blink]
cmp.hi r1,r0
ENDFUNC(__gtsf2)
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* XMAC schedule: directly back-to-back multiplies stall; the third
instruction after a multiply stalls unless it is also a multiply. */
#include "arc-ieee-754.h"
#if 0 /* DEBUG */
.global __muldf3
.balign 4
__muldf3:
push_s blink
push_s r2
push_s r3
push_s r0
bl.d __muldf3_c
push_s r1
ld_s r2,[sp,12]
ld_s r3,[sp,8]
st_s r0,[sp,12]
st_s r1,[sp,8]
pop_s r1
bl.d __muldf3_asm
pop_s r0
pop_s r3
pop_s r2
pop_s blink
cmp r0,r2
cmp.eq r1,r3
jeq_s [blink]
b abort
#define __muldf3 __muldf3_asm
#endif /* DEBUG */
/* N.B. This is optimized for ARC700.
ARC600 has very different scheduling / instruction selection criteria. */
/* For the standard multiplier, instead of mpyu rx,DBL0L,DBL1L; tst rx,rx ,
we can do:
sub rx,DBL0L,1; bic rx,DBL0L,rx; lsr rx,rx; norm rx,rx; asl.f 0,DBL1L,rx */
__muldf3_support: /* This label makes debugger output saner. */
/* If one number is denormal, subtract some from the exponent of the other
one (if the other exponent is too small, return 0), and normalize the
denormal. Then re-run the computation. */
.balign 4
FUNC(__muldf3)
.Ldenorm_dbl0:
mov_s r12,DBL0L
mov_s DBL0L,DBL1L
mov_s DBL1L,r12
mov_s r12,DBL0H
mov_s DBL0H,DBL1H
mov_s DBL1H,r12
and r11,DBL0H,r9
.Ldenorm_dbl1:
brhs r11,r9,.Linf_nan
brhs 0x3ca00001,r11,.Lret0
sub_s DBL0H,DBL0H,DBL1H
bmsk_s DBL1H,DBL1H,30
add_s DBL0H,DBL0H,DBL1H
breq_s DBL1H,0,.Ldenorm_2
norm r12,DBL1H
sub_s r12,r12,10
asl r5,r12,20
asl_s DBL1H,DBL1H,r12
sub DBL0H,DBL0H,r5
neg r5,r12
lsr r6,DBL1L,r5
asl_s DBL1L,DBL1L,r12
b.d __muldf3
add_s DBL1H,DBL1H,r6
.balign 4
.Linf_nan:
bclr r12,DBL1H,31
xor_s DBL1H,DBL1H,DBL0H
bclr_s DBL0H,DBL0H,31
max r8,DBL0H,r12 ; either NaN -> NaN ; otherwise inf
or.f 0,DBL0H,DBL0L
mov_s DBL0L,0
or.ne.f DBL1L,DBL1L,r12
not_s DBL0H,DBL0L ; inf * 0 -> NaN
mov.ne DBL0H,r8
tst_s DBL1H,DBL1H
j_s.d [blink]
bset.mi DBL0H,DBL0H,31
.Lret0: xor_s DBL0H,DBL0H,DBL1H
bclr DBL1H,DBL0H,31
xor_s DBL0H,DBL0H,DBL1H
j_s.d [blink]
mov_l DBL0L,0
.balign 4
.Ldenorm_2:
breq_s DBL1L,0,.Lret0 ; 0 input -> 0 output
norm.f r12,DBL1L
mov.mi r12,21
add.pl r12,r12,22
neg r11,r12
asl_s r12,r12,20
lsr.f DBL1H,DBL1L,r11
ror DBL1L,DBL1L,r11
sub_s DBL0H,DBL0H,r12
mov.eq DBL1H,DBL1L
sub_s DBL1L,DBL1L,DBL1H
/* Fall through. */
.global __muldf3
.balign 4
__muldf3:
ld.as r9,[pcl,0x4b] ; ((.L7ff00000-.+2)/4)]
mpyhu r4,DBL0L,DBL1L
bmsk r6,DBL0H,19
bset r6,r6,20
mpyu r7,r6,DBL1L
and r11,DBL0H,r9
breq r11,0,.Ldenorm_dbl0
mpyhu r8,r6,DBL1L
bmsk r10,DBL1H,19
bset r10,r10,20
mpyhu r5,r10,DBL0L
add.f r4,r4,r7
and r12,DBL1H,r9
mpyhu r7,r6,r10
breq r12,0,.Ldenorm_dbl1
adc.f r5,r5,r8
mpyu r8,r10,DBL0L
breq r11,r9,.Linf_nan
breq r12,r9,.Linf_nan
mpyu r6,r6,r10
add.cs r7,r7,1
add.f r4,r4,r8
mpyu r10,DBL1L,DBL0L
bclr r8,r9,30 ; 0x3ff00000
adc.f r5,r5,r6
; XMAC write-back stall / std. mult stall is one cycle later
bclr r6,r9,20 ; 0x7fe00000
add.cs r7,r7,1 ; fraction product in r7:r5:r4
tst r10,r10
bset.ne r4,r4,0 ; put least significant word into sticky bit
lsr.f r10,r7,9
add_l r12,r12,r11 ; add exponents
rsub.eq r8,r8,r9 ; 0x40000000
sub r12,r12,r8 ; subtract bias + implicit 1
brhs.d r12,r6,.Linf_denorm
rsub r10,r10,12
.Lshift_frac:
neg r8,r10
asl r6,r4,r10
lsr DBL0L,r4,r8
add.f 0,r6,r6
btst.eq DBL0L,0
cmp.eq r4,r4 ; round to nearest / round to even
asl r4,r5,r10
lsr r5,r5,r8
adc.f DBL0L,DBL0L,r4
xor.f 0,DBL0H,DBL1H
asl r7,r7,r10
add_s r12,r12,r5
adc DBL0H,r12,r7
j_s.d [blink]
bset.mi DBL0H,DBL0H,31
/* We have checked for infinitey / NaN input before, and transformed
denormalized inputs into normalized inputs. Thus, the worst case
exponent overflows are:
1 + 1 - 0x400 == 0xc02 : maximum underflow
0x7fe + 0x7fe - 0x3ff == 0xbfd ; maximum overflow
N.B. 0x7e and 0x7f are also values for overflow.
If (r12 <= -54), we have an underflow to zero. */
.balign 4
.Linf_denorm:
brlo r12,0xc0000000,.Linf
asr r6,r12,20
mov_s r12,0
add.f r10,r10,r6
brgt r10,0,.Lshift_frac
beq_s .Lround_frac
add.f r10,r10,32
.Lshift32_frac:
tst r4,r4
mov r4,r5
bset.ne r4,r4,1
mov r5,r7
mov r7,0
brge r10,1,.Lshift_frac
breq r10,0,.Lround_frac
add.f r10,r10,32
brgt r10,21,.Lshift32_frac
b_s .Lret0
.Lround_frac:
add.f 0,r4,r4
btst.eq r5,0
mov_s DBL0L,r5
mov_s DBL0H,r7
adc.eq.f DBL0L,DBL0L,0
j_s.d [blink]
adc.eq DBL0H,DBL0H,0
.Linf: xor.f DBL1H,DBL1H,DBL0H
mov_s DBL0L,0
mov_s DBL0H,r9
j_s.d [blink]
bset.mi DBL0H,DBL0H,31
ENDFUNC(__muldf3)
.balign 4
.L7ff00000:
.long 0x7ff00000
/* Copyright (C) 2008-2013 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* XMAC schedule: directly back-to-back multiplies stall; the third
instruction after a multiply stalls unless it is also a multiply. */
#include "arc-ieee-754.h"
#if 0 /* DEBUG */
.global __mulsf3
FUNC(__mulsf3)
.balign 4
__mulsf3:
push_s blink
push_s r1
bl.d __mulsf3_c
push_s r0
ld_s r1,[sp,4]
st_s r0,[sp,4]
bl.d __mulsf3_asm
pop_s r0
pop_s r1
pop_s blink
cmp r0,r1
jeq_s [blink]
and r12,r0,r1
bic.f 0,0x7f800000,r12
bne 0f
bmsk.f 0,r0,22
bmsk.ne.f r1,r1,22
jne_s [blink] ; both NaN -> OK
0: bl abort
ENDFUNC(__mulsf3)
#define __mulsf3 __mulsf3_asm
#endif /* DEBUG */
.balign 4
.global __mulsf3
FUNC(__mulsf3)
__mulsf3:
ld.as r9,[pcl,79]; [pcl,((.L7f800000-.+2)/4)]
bmsk r4,r1,22
bset r2,r0,23
asl_s r2,r2,8
bset r3,r4,23
mpyhu r6,r2,r3
and r11,r0,r9
breq r11,0,.Ldenorm_dbl0
mpyu r7,r2,r3
breq r11,r9,.Linf_nan_dbl0
and r12,r1,r9
asl.f 0,r6,8
breq r12,0,.Ldenorm_dbl1
.Lpast_denorm:
xor_s r0,r0,r1
.Lpast_denorm_dbl1:
add.pl r6,r6,r6
bclr.pl r6,r6,23
add.pl.f r7,r7,r7
ld.as r4,[pcl,64]; [pcl,((.L7fffffff-.+2)/4)]
add.cs r6,r6,1
lsr.f 0,r6,1
breq r12,r9,.Linf_nan_dbl1
add_s r12,r12,r11
adc.f 0,r7,r4
add_s r12,r12, \
-0x3f800000
adc.f r8,r6,r12
bic r0,r0,r4
tst.pl r8,r9
min r3,r8,r9
jpnz.d [blink]
add.pnz r0,r0,r3
; infinity or denormal number
add.ne.f r3,r3,r3
bpnz .Linfinity
asr_s r3,r3,23+1
bset r6,r6,23
sub_s r3,r3,1
neg_s r2,r3
brhi r2,24,.Lret_r0 ; right shift shift > 24 -> return +-0
lsr r2,r6,r2
asl r9,r6,r3
lsr.f 0,r2,1
tst r7,r7
add_s r0,r0,r2
bset.ne r9,r9,0
adc.f 0,r9,r4
j_s.d [blink]
add.cs r0,r0,1
.Linfinity:
j_s.d [blink]
add_s r0,r0,r9
.Lret_r0: j_s [blink]
.balign 4
.Linf_nan_dbl0:
sub_s r2,r1,1 ; inf/nan * 0 -> nan; inf * nan -> nan (use |r2| >= inf)
bic.f 0,r9,r2
xor_s r0,r0,r1
bclr_s r1,r1,31
xor_s r0,r0,r1
jne_s [blink]
.Lretnan:
j_s.d [blink]
mov r0,-1
.Ldenorm_dbl0_inf_nan_dbl1:
bmsk.f 0,r0,30
beq_s .Lretnan
xor_s r0,r0,r1
.Linf_nan_dbl1:
xor_s r1,r1,r0
bclr_s r1,r1,31
j_s.d [blink]
xor_s r0,r0,r1
.balign 4
.Ldenorm_dbl0:
bclr_s r2,r2,31
norm.f r4,r2
and r12,r1,r9
add_s r2,r2,r2
asl r2,r2,r4
asl r4,r4,23
mpyhu r6,r2,r3
breq r12,r9,.Ldenorm_dbl0_inf_nan_dbl1
sub.ne.f r12,r12,r4
mpyu r7,r2,r3
bhi.d .Lpast_denorm
asl.f 0,r6,8
xor_s r0,r0,r1
bmsk r1,r0,30
j_s.d [blink]
bic_l r0,r0,r1
.balign 4
.Ldenorm_dbl1:
norm.f r3,r4
xor_s r0,r0,r1
sub_s r3,r3,7
asl r4,r4,r3
sub_s r3,r3,1
asl_s r3,r3,23
mpyhu r6,r2,r4
sub.ne.f r11,r11,r3
bmsk r8,r0,30
mpyu r7,r2,r4
bhi.d .Lpast_denorm_dbl1
asl.f 0,r6,8
j_s.d [blink]
bic r0,r0,r8
.balign 4
.L7f800000:
.long 0x7f800000
.L7fffffff:
.long 0x7fffffff
ENDFUNC(__mulsf3)
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "arc-ieee-754.h"
/* inputs: r0, r1
output: c flag
clobber: r12, flags
For NaNs, bit 19 .. bit 30 must be set. */
#if 0 /* DEBUG */
.global __orddf2
.balign 4
FUNC(__orddf2)
__orddf2:
st.a r11,[sp,-4]` push_s blink` st.a r10,[sp,-4]` st.a r9,[sp,-4]
st.a r8,[sp,-4]` st.a r7,[sp,-4]` st.a r6,[sp,-4]` st.a r5,[sp,-4]
st.a r4,[sp,-4]` push_s r3` push_s r2` push_s r1`
bl.d __unorddf2_c` push_s r0
mov r11,r0` pop_s r0` pop_s r1` pop_s r2` pop_s r3
ld.ab r4,[sp,4]` ld.ab r5,[sp,4]` ld.ab r6,[sp,4]`
ld.ab r7,[sp,4]` ld.ab r8,[sp,4]` ld.ab r9,[sp,4]
bl.d __orddf2_asm` ld.ab r10,[sp,4]
pop_s blink
brne.d r11,0,0f
ld.ab r11,[sp,4]
jcc [blink]
bl abort
0: jcs [blink]
bl abort
ENDFUNC(__orddf2)
#define __orddf2 __orddf2_asm
#endif /* DEBUG */
.global __orddf2
.balign 4
HIDDEN_FUNC(__orddf2)
__orddf2:
bmsk r12,DBL0H,20
add1.f r12,r12,DBL0H /* clear z; set c if NaN. */
bmsk r12,DBL1H,20
j_s.d [blink]
add1.cc.f r12,r12,DBL1H /* clear z; set c if NaN. */
ENDFUNC(__orddf2)
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "arc-ieee-754.h"
/* inputs: r0, r1
output: c flag
clobber: r12, flags
For NaNs, bit 22 .. bit 30 must be set. */
#if 0 /* DEBUG */
.global __ordsf2
.balign 4
FUNC(__ordsf2)
__ordsf2:
st.a r11,[sp,-4]` push_s blink` st.a r10,[sp,-4]` st.a r9,[sp,-4]
st.a r8,[sp,-4]` st.a r7,[sp,-4]` st.a r6,[sp,-4]` st.a r5,[sp,-4]
st.a r4,[sp,-4]` push_s r3` push_s r2` push_s r1`
bl.d __unordsf2_c` push_s r0
mov r11,r0` pop_s r0` pop_s r1` pop_s r2` pop_s r3
ld.ab r4,[sp,4]` ld.ab r5,[sp,4]` ld.ab r6,[sp,4]`
ld.ab r7,[sp,4]` ld.ab r8,[sp,4]` ld.ab r9,[sp,4]
bl.d __ordsf2_asm` ld.ab r10,[sp,4]
pop_s blink
brne.d r11,0,0f
ld.ab r11,[sp,4]
jcc [blink]
bl abort
0: jcs [blink]
bl abort
ENDFUNC(__ordsf2)
#define __ordsf2 __ordsf2_asm
#endif /* DEBUG */
.global __ordsf2
.balign 4
HIDDEN_FUNC(__ordsf2)
__ordsf2:
bmsk r12,r0,23
add1.f r12,r12,r0 /* clear z; set c if NaN. */
bmsk r12,r1,23
j_s.d [blink]
add1.cc.f r12,r12,r1 /* clear z; set c if NaN. */
ENDFUNC(__ordsf2)
/* Copyright (C) 2006, 2008-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "arc-ieee-754.h"
#if 0 /* DEBUG */
FUNC(__truncdfsf2)
.global __truncdfsf2
.balign 4
__truncdfsf2:
push_s blink
push_s r0
bl.d __truncdfsf2_c
push_s r1
mov_s r2,r0
pop_s r1
ld r0,[sp]
bl.d __truncdfsf2_asm
st r2,[sp]
pop_s r1
pop_s blink
cmp r0,r1
jeq_s [blink]
and r12,r0,r1
bic.f 0,0x7f800000,r12
bne 0f
bmsk.f 0,r0,22
bmsk.ne.f r1,r1,22
jne_s [blink] ; both NaN -> OK
0: bl abort
ENDFUNC(__truncdfsf2)
#define __truncdfsf2 __truncdfsf2_asm
#endif /* DEBUG */
.global __truncdfsf2
.balign 4
FUNC(__truncdfsf2)
__truncdfsf2:
lsr r2,DBL0H,20
asl_s DBL0H,DBL0H,12
sub r12,r2,0x380
bclr.f r3,r12,11
brhs r3,0xff,.Lill_exp
beq_l .Ldenorm0
asl_s r12,r12,23
tst DBL0L, \
0x2fffffff /* Check if msb guard bit wants rounding up. */
lsr_s DBL0L,DBL0L,28
lsr_s DBL0H,DBL0H,8
add.ne DBL0L,DBL0L,1
add_s DBL0H,DBL0H,DBL0L
lsr_s DBL0H,DBL0H
btst_s r2,11
add_s r0,DBL0H,r12
j_s.d [blink]
bxor.ne r0,r0,31
.balign 4
.Lill_exp:
bbit1 r2,10,.Linf_nan
bmsk_s r12,r12,9
rsub.f r12,r12,8+0x400-32 ; Go from 9 to 1 guard bit in MSW. */
bhs_s .Lzero
lsr r3,DBL0L,21
rrc DBL0H,DBL0H ; insert leading 1
asl.f 0,DBL0L,8 ; check lower 24 guard bits
add_s r3,DBL0H,r3
add.pnz r3,r3,1 ; assemble fraction with compressed guard bits.
lsr r0,r3,r12
neg_s r12,r12
btst_s r0,1
asl.eq.f r3,r3,r12
add.ne r0,r0,1
btst_s r2,11
lsr_s r0,r0
j_s.d [blink]
bxor.ne r0,r0,31
.Lzero:
lsr_s r2,r2,11
j_s.d [blink]
asl r0,r2,31
.Ldenorm0:
asl_s r12,r12,20
tst DBL0L, \
0x5fffffff /* Check if msb guard bit wants rounding up. */
lsr_s DBL0L,DBL0L,29
lsr_s DBL0H,DBL0H,9
add.ne DBL0L,DBL0L,1
bset_s DBL0H,DBL0H,23
add_s DBL0H,DBL0H,DBL0L
lsr_s DBL0H,DBL0H
j_s.d [blink]
add_l r0,DBL0H,r12
/* We would generally say that NaNs must have a non-zero high fraction part,
but to allow hardware double precision floating point to interoperate
with single precision software floating point, we make an exception here.
The cost is to replace a tst_s DBL0H with an or.f DBL0L,DBL0L,DBL0H .
As we start out unaligned, and there is an odd number of other short insns,
we have a choice of letting this cost us a misalign penalty or
4 more bytes (if we align the code). We choose the former here because
infinity / NaN is not expected to be prevalent in time-critical code. */
.Linf_nan:
or.f DBL0L,DBL0L,DBL0H
mov_s r0,1
add.ne r2,r2,1
tst r2,0x7ff
asl.ne r0,r0,23
btst_s r12,11
neg r0,r0
j_s.d [blink]
bxor.eq r0,r0,31
ENDFUNC(__truncdfsf2)
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "arc-ieee-754.h"
/* inputs: DBL0, DBL1
output: z flag
clobber: r12, flags
For NaNs, bit 19.. bit 30 of the high word must be set. */
#if 0 /* DEBUG */
.global __uneqdf2
.balign 4
FUNC(__uneqdf2)
__uneqdf2:
st.a r11,[sp,-4]` push_s blink` st.a r10,[sp,-4]` st.a r9,[sp,-4]
st.a r8,[sp,-4]` st.a r7,[sp,-4]` st.a r6,[sp,-4]` st.a r5,[sp,-4]
st.a r4,[sp,-4]` push_s r3` push_s r2` push_s r1`
bl.d __eqdf2_c` push_s r0
push_s r0` ld_s r0, [sp,4]` ld_s r1, [sp,8]` ld_s r2,[sp,12]
bl.d __unorddf2_c` ld_s r3,[sp,16]
ld.ab r11,[sp,4]` tst r0,r0` mov.ne r11,0
pop_s r0` pop_s r1` pop_s r2` pop_s r3
ld.ab r4,[sp,4]` ld.ab r5,[sp,4]` ld.ab r6,[sp,4]`
ld.ab r7,[sp,4]` ld.ab r8,[sp,4]` ld.ab r9,[sp,4]
bl.d __uneqdf2_asm` ld.ab r10,[sp,4]
pop_s blink
breq.d r11,0,0f
ld.ab r11,[sp,4]
jne_s [blink]
bl abort
0: jeq_s [blink]
bl abort
ENDFUNC(__uneqdf2)
#define __uneqdf2 __uneqdf2_asm
#endif /* DEBUG */
.global __uneqdf2
.balign 4
HIDDEN_FUNC(__uneqdf2)
__uneqdf2:
cmp_s DBL0H,DBL1H
cmp.eq DBL0L,DBL1L
jeq_s [blink]
or r12,DBL0H,DBL1H
or.f 0,DBL0L,DBL1L
bclr.eq.f r12,r12,31
jeq_s [blink]
mov_s r12, \
0x7ff80000
bic.f 0,r12,DBL0H
j_s.d [blink]
bic.ne.f r12,r12,DBL1H
ENDFUNC(__uneqdf2)
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "arc-ieee-754.h"
/* inputs: r0, r1
output: z flag
clobber: r12, flags
For NaNs, bit 22 .. bit 30 must be set. */
#if 0 /* DEBUG */
.global __uneqsf2
.balign 4
FUNC(__uneqsf2)
__uneqsf2:
st.a r11,[sp,-4]` push_s blink` st.a r10,[sp,-4]` st.a r9,[sp,-4]
st.a r8,[sp,-4]` st.a r7,[sp,-4]` st.a r6,[sp,-4]` st.a r5,[sp,-4]
st.a r4,[sp,-4]` push_s r3` push_s r2` push_s r1`
bl.d __eqsf2_c` push_s r0
push_s r0` ld_s r0, [sp,4]
bl.d __unordsf2_c` ld_s r1,[sp,8]
ld.ab r11,[sp,4]` tst r0,r0` mov.ne r11,0
pop_s r0` pop_s r1` pop_s r2` pop_s r3
ld.ab r4,[sp,4]` ld.ab r5,[sp,4]` ld.ab r6,[sp,4]`
ld.ab r7,[sp,4]` ld.ab r8,[sp,4]` ld.ab r9,[sp,4]
bl.d __uneqsf2_asm` ld.ab r10,[sp,4]
pop_s blink
breq.d r11,0,0f
ld.ab r11,[sp,4]
jne_s [blink]
bl abort
0: jeq_s [blink]
bl abort
ENDFUNC(__uneqsf2)
#define __uneqsf2 __uneqsf2_asm
#endif /* DEBUG */
.global __uneqsf2
.balign 4
HIDDEN_FUNC(__uneqsf2)
__uneqsf2:
mov_s r12, \
0x7fc00000
bic.f 0,r12,r0
bic.ne.f r12,r12,r1
or r12,r0,r1
bmsk.ne.f r12,r12,30
j_s.d [blink]
cmp.ne r0,r1
ENDFUNC(__uneqsf2)
/* .init/.fini section handling + C++ global constructor/destructor handling.
This file is based on crtstuff.c, sol2-crti.asm, sol2-crtn.asm.
Copyright (C) 1995, 1997, 1998, 2007-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
GCC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* Declare a pointer to void function type. */
typedef void (*func_ptr) (void);
#ifdef CRT_INIT
/* NOTE: In order to be able to support SVR4 shared libraries, we arrange
to have one set of symbols { __CTOR_LIST__, __DTOR_LIST__, __CTOR_END__,
__DTOR_END__ } per root executable and also one set of these symbols
per shared library. So in any given whole process image, we may have
multiple definitions of each of these symbols. In order to prevent
these definitions from conflicting with one another, and in order to
ensure that the proper lists are used for the initialization/finalization
of each individual shared library (respectively), we give these symbols
only internal (i.e. `static') linkage, and we also make it a point to
refer to only the __CTOR_END__ symbol in crtfini.o and the __DTOR_LIST__
symbol in crtinit.o, where they are defined. */
static func_ptr __CTOR_LIST__[1] __attribute__ ((section (".ctors")))
= { (func_ptr) (-1) };
static func_ptr __DTOR_LIST__[1] __attribute__ ((section (".dtors")))
= { (func_ptr) (-1) };
/* Run all the global destructors on exit from the program. */
/* Some systems place the number of pointers in the first word of the
table. On SVR4 however, that word is -1. In all cases, the table is
null-terminated. On SVR4, we start from the beginning of the list and
invoke each per-compilation-unit destructor routine in order
until we find that null.
Note that this function MUST be static. There will be one of these
functions in each root executable and one in each shared library, but
although they all have the same code, each one is unique in that it
refers to one particular associated `__DTOR_LIST__' which belongs to the
same particular root executable or shared library file. */
static void __do_global_dtors (void)
asm ("__do_global_dtors") __attribute__ ((section (".text")));
static void
__do_global_dtors (void)
{
func_ptr *p;
for (p = __DTOR_LIST__ + 1; *p; p++)
(*p) ();
}
/* .init section start.
This must appear at the start of the .init section. */
asm ("\n\
.section .init\n\
.global init\n\
.word 0\n\
init:\n\
st blink,[sp,4]\n\
st fp,[sp]\n\
mov fp,sp\n\
sub sp,sp,16\n\
");
/* .fini section start.
This must appear at the start of the .init section. */
asm ("\n\
.section .fini\n\
.global fini\n\
.word 0\n\
fini:\n\
st blink,[sp,4]\n\
st fp,[sp]\n\
mov fp,sp\n\
sub sp,sp,16\n\
bl.nd __do_global_dtors\n\
");
#endif /* CRT_INIT */
#ifdef CRT_FINI
/* Put a word containing zero at the end of each of our two lists of function
addresses. Note that the words defined here go into the .ctors and .dtors
sections of the crtend.o file, and since that file is always linked in
last, these words naturally end up at the very ends of the two lists
contained in these two sections. */
static func_ptr __CTOR_END__[1] __attribute__ ((section (".ctors")))
= { (func_ptr) 0 };
static func_ptr __DTOR_END__[1] __attribute__ ((section (".dtors")))
= { (func_ptr) 0 };
/* Run all global constructors for the program.
Note that they are run in reverse order. */
static void __do_global_ctors (void)
asm ("__do_global_ctors") __attribute__ ((section (".text")));
static void
__do_global_ctors (void)
{
func_ptr *p;
for (p = __CTOR_END__ - 1; *p != (func_ptr) -1; p--)
(*p) ();
}
/* .init section end.
This must live at the end of the .init section. */
asm ("\n\
.section .init\n\
bl.nd __do_global_ctors\n\
ld blink,[fp,4]\n\
j.d blink\n\
ld.a fp,[sp,16]\n\
");
/* .fini section end.
This must live at the end of the .fini section. */
asm ("\n\
.section .fini\n\
ld blink,[fp,4]\n\
j.d blink\n\
ld.a fp,[sp,16]\n\
");
#endif /* CRT_FINI */
; libgcc1 routines for Synopsys DesignWare ARC cpu.
/* Copyright (C) 1995, 1997, 2007-2013 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* As a special exception, if you link this library with other files,
some of which are compiled with GCC, to produce an executable,
this library does not by itself cause the resulting executable
to be covered by the GNU General Public License.
This exception does not however invalidate any other reasons why
the executable file might be covered by the GNU General Public License. */
/* ANSI concatenation macros. */
#define CONCAT1(a, b) CONCAT2(a, b)
#define CONCAT2(a, b) a ## b
/* Use the right prefix for global labels. */
#define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
#ifndef WORKING_ASSEMBLER
#define abs_l abs
#define asl_l asl
#define mov_l mov
#endif
#define FUNC(X) .type SYM(X),@function
#define HIDDEN_FUNC(X) FUNC(X)` .hidden X
#define ENDFUNC0(X) .Lfe_##X: .size X,.Lfe_##X-X
#define ENDFUNC(X) ENDFUNC0(X)
#ifdef L_mulsi3
.section .text
.align 4
.global SYM(__mulsi3)
SYM(__mulsi3):
/* This the simple version.
while (a)
{
if (a & 1)
r += b;
a >>= 1;
b <<= 1;
}
*/
#if defined (__ARC_MUL64__)
FUNC(__mulsi3)
mulu64 r0,r1
j_s.d [blink]
mov_s r0,mlo
ENDFUNC(__mulsi3)
#elif defined (__ARC700__)
HIDDEN_FUNC(__mulsi3)
mpyu r0,r0,r1
nop_s
j_s [blink]
ENDFUNC(__mulsi3)
#elif defined (__ARC_NORM__)
FUNC(__mulsi3)
norm.f r2,r0
rsub lp_count,r2,31
mov.mi lp_count,32
mov_s r2,r0
mov_s r0,0
lpnz @.Lend ; loop is aligned
lsr.f r2,r2
add.cs r0,r0,r1
add_s r1,r1,r1
.Lend: j_s [blink]
ENDFUNC(__mulsi3)
#elif !defined (__OPTIMIZE_SIZE__) && !defined(__ARC601__)
/* Up to 3.5 times faster than the simpler code below, but larger. */
FUNC(__mulsi3)
ror.f r2,r0,4
mov_s r0,0
add3.mi r0,r0,r1
asl.f r2,r2,2
add2.cs r0,r0,r1
jeq_s [blink]
.Loop:
add1.mi r0,r0,r1
asl.f r2,r2,2
add.cs r0,r0,r1
asl_s r1,r1,4
ror.f r2,r2,8
add3.mi r0,r0,r1
asl.f r2,r2,2
bne.d .Loop
add2.cs r0,r0,r1
j_s [blink]
ENDFUNC(__mulsi3)
#elif !defined (__OPTIMIZE_SIZE__) /* __ARC601__ */
FUNC(__mulsi3)
lsr.f r2,r0
mov_s r0,0
mov_s r3,0
add.cs r0,r0,r1
.Loop:
lsr.f r2,r2
add1.cs r0,r0,r1
lsr.f r2,r2
add2.cs r0,r0,r1
lsr.f r2,r2
add3.cs r0,r0,r1
bne.d .Loop
add3 r1,r3,r1
j_s [blink]
ENDFUNC(__mulsi3)
#else
/********************************************************/
FUNC(__mulsi3)
mov_s r2,0 ; Accumulate result here.
.Lloop:
bbit0 r0,0,@.Ly
add_s r2,r2,r1 ; r += b
.Ly:
lsr_s r0,r0 ; a >>= 1
asl_s r1,r1 ; b <<= 1
brne_s r0,0,@.Lloop
.Ldone:
j_s.d [blink]
mov_s r0,r2
ENDFUNC(__mulsi3)
/********************************************************/
#endif
#endif /* L_mulsi3 */
#ifdef L_umulsidi3
.section .text
.align 4
.global SYM(__umulsidi3)
SYM(__umulsidi3):
HIDDEN_FUNC(__umulsidi3)
/* We need ARC700 /ARC_MUL64 definitions of __umulsidi3 / __umulsi3_highpart
in case some code has been compiled without multiply support enabled,
but linked with the multiply-support enabled libraries.
For ARC601 (i.e. without a barrel shifter), we also use umuldisi3 as our
umulsi3_highpart implementation; the use of the latter label doesn't
actually benefit ARC601 platforms, but is useful when ARC601 code is linked
against other libraries. */
#if defined (__ARC700__) || defined (__ARC_MUL64__) || defined (__ARC601__)
.global SYM(__umulsi3_highpart)
SYM(__umulsi3_highpart):
HIDDEN_FUNC(__umulsi3_highpart)
#endif
/* This the simple version.
while (a)
{
if (a & 1)
r += b;
a >>= 1;
b <<= 1;
}
*/
#include "ieee-754/arc-ieee-754.h"
#ifdef __ARC700__
mov_s r12,DBL0L
mpyu DBL0L,r12,DBL0H
j_s.d [blink]
mpyhu DBL0H,r12,DBL0H
#elif defined (__ARC_MUL64__)
/* Likewise for __ARC_MUL64__ */
mulu64 r0,r1
mov_s DBL0L,mlo
j_s.d [blink]
mov_s DBL0H,mhi
#else /* !__ARC700__ && !__ARC_MUL64__ */
/* Although it might look tempting to extend this to handle muldi3,
using mulsi3 twice with 2.25 cycles per 32 bit add is faster
than one loop with 3 or four cycles per 32 bit add. */
asl.f r12,0 ; Top part of b.
mov_s r2,0 ; Accumulate result here.
bbit1.d r0,0,@.Ladd
mov_s r3,0
.Llooptst:
rlc r12,r12
breq r0,0,@.Ldone ; while (a)
.Lloop:
asl.f r1,r1 ; b <<= 1
bbit0.d r0,1,@.Llooptst
lsr r0,r0 ; a >>= 1
rlc r12,r12
.Ladd:
add.f r3,r3,r1 ; r += b
brne.d r0,0,@.Lloop ; while (a);
adc r2,r2,r12
.Ldone:
mov_s DBL0L,r3
j_s.d [blink]
mov DBL0H,r2
#endif /* !__ARC700__*/
ENDFUNC(__umulsidi3)
#if defined (__ARC700__) || defined (__ARC_MUL64__) || defined (__ARC601__)
ENDFUNC(__umulsi3_highpart)
#endif
#endif /* L_umulsidi3 */
#ifdef L_umulsi3_highpart
#include "ieee-754/arc-ieee-754.h"
/* For use without a barrel shifter, and for ARC700 / ARC_MUL64, the
mulsidi3 algorithms above look better, so for these, there is an
extra label up there. */
#if !defined (__ARC700__) && !defined (__ARC_MUL64__) && !defined (__ARC601__)
.global SYM(__umulsi3_highpart)
SYM(__umulsi3_highpart):
HIDDEN_FUNC(__umulsi3_highpart)
mov_s r2,0
mov_s r3,32
.Loop:
lsr.f r0,r0
add.cs.f r2,r2,r1
sub_s r3,r3,1
brne.d r0,0,.Loop
rrc r2,r2
j_s.d [blink]
/* Make the result register peephole-compatible with mulsidi3. */
lsr DBL0H,r2,r3
ENDFUNC(__umulsi3_highpart)
#endif /* !__ARC700__ && !__ARC601__ */
#endif /* L_umulsi3_highpart */
#ifdef L_divmod_tools
; Utilities used by all routines.
.section .text
/*
unsigned long
udivmodsi4(int modwanted, unsigned long num, unsigned long den)
{
unsigned long bit = 1;
unsigned long res = 0;
while (den < num && bit && !(den & (1L<<31)))
{
den <<=1;
bit <<=1;
}
while (bit)
{
if (num >= den)
{
num -= den;
res |= bit;
}
bit >>=1;
den >>=1;
}
if (modwanted) return num;
return res;
}
*/
; inputs: r0 = numerator, r1 = denominator
; outputs: r0 = quotient, r1 = remainder, r2/r3 trashed
.balign 4
.global SYM(__udivmodsi4)
FUNC(__udivmodsi4)
SYM(__udivmodsi4):
#if defined (__ARC700__)
/* Normalize divisor and divident, and then use the appropriate number of
divaw (the number of result bits, or one more) to produce the result.
There are some special conditions that need to be tested:
- We can only directly normalize unsigned numbers that fit in 31 bit. For
the divisor, we test early on that it is not 'negative'.
- divaw can't corrrectly process a divident that is larger than the divisor.
We handle this be checking that the divident prior to normalization is
not larger than the normalized divisor. As we then already know then
that the divisor fits 31 bit, this check also makes sure that the
divident fits.
- ordinary normalization of the divident could make it larger than the
normalized divisor, which again would be unsuitable for divaw.
Thus, we want to shift left the divident by one less, except that we
want to leave it alone if it is already 31 bit. To this end, we
double the input to norm with adds.
- If the divident has less bits than the divisor, that would leave us
with a negative number of divaw to execute. Although we could use a
conditional loop to avoid excess divaw, and then the quotient could
be extracted correctly as there'd be more than enough zero bits, the
remainder would be shifted left too far, requiring a conditional shift
right. The cost of that shift and the possible mispredict on the
conditional loop cost as much as putting in an early check for a zero
result. */
bmsk r3,r0,29
brne.d r3,r0,.Large_dividend
norm.f r2,r1
brlo r0,r1,.Lret0
norm r3,r0
asl_s r1,r1,r2
sub_s r3,r3,1
asl_l r0,r0,r3 ; not short to keep loop aligned
sub lp_count,r2,r3
lp .Ldiv_end
divaw r0,r0,r1
.Ldiv_end:sub_s r3,r2,1
lsr r1,r0,r2
j_s.d [blink]
bmsk r0,r0,r3
.balign 4
.Large_dividend:
bmi .Ltrivial
asl_s r1,r1,r2
mov_s r3,0
sub1.f r4,r0,r1
mov.lo r4,r0
mov.hs r3,2
cmp r4,r1
sub.hs r4,r4,r1
add.hs r3,r3,1
mov.f lp_count,r2
lpne .Ldiv_end2
divaw r4,r4,r1
.Ldiv_end2:asl r0,r3,r2
lsr r1,r4,r2
sub_s r2,r2,1
bmsk r4,r4,r2
j_s.d [blink]
or.ne r0,r0,r4
.Lret0:
mov_s r1,r0
j_s.d [blink]
mov_l r0,0
.balign 4
.Ltrivial:
sub.f r1,r0,r1
mov.c r1,r0
mov_s r0,1
j_s.d [blink]
mov.c r0,0
#elif !defined (__OPTIMIZE_SIZE__)
#ifdef __ARC_NORM__
lsr_s r2,r0
brhs.d r1,r2,.Lret0_3
norm r2,r2
norm r3,r1
sub_s r3,r3,r2
asl_s r1,r1,r3
sub1.f 0,r0,r1
lsr.cs r1,r1,1
sbc r2,r3,0
sub1 r0,r0,r1
cmp_s r0,r1
mov.f lp_count,r2
#else /* ! __ARC_NORM__ */
lsr_s r2,r0
brhs.d r1,r2,.Lret0_3
mov lp_count,32
.Lloop1:
asl_s r1,r1 ; den <<= 1
brls.d r1,r2,@.Lloop1
sub lp_count,lp_count,1
sub_s r0,r0,r1
lsr_s r1,r1
cmp_s r0,r1
xor.f r2,lp_count,31
mov_s lp_count,r2
#endif /* !__ARC_NORM__ */
sub.cc r0,r0,r1
mov_s r3,3
sbc r3,r3,0
#ifndef __ARC601__
asl_s r3,r3,r2
rsub r1,r1,1
lpne @.Lloop2_end
add1.f r0,r1,r0
sub.cc r0,r0,r1
.Lloop2_end:
lsr r1,r0,r2
#else
rsub r1,r1,1
lpne @.Lloop2_end
asl_s r3,r3
add1.f r0,r1,r0
sub.cc r0,r0,r1
.Lloop2_end:
lsr_s r1,r0
lsr.f lp_count,r2
mov.cc r1,r0
lpnz 1f
lsr_s r1,r1
lsr_s r1,r1
1:
#endif
bmsk r0,r0,r2
bclr r0,r0,r2
j_s.d [blink]
or_s r0,r0,r3
.Lret0_3:
#if 0 /* Slightly shorter, but slower. */
lp .Loop3_end
brhi.d r1,r0,.Loop3_end
sub_s r0,r0,r1
.Loop3_end
add_s r1,r1,r0
j_s.d [blink]
rsub r0,lp_count,32-1
#else
mov_s r4,r1
sub.f r1,r0,r1
sbc r0,r0,r0
sub.cc.f r1,r1,r4
sbc r0,r0,0
sub.cc.f r1,r1,r4
sbc r0,r0,-3
j_s.d [blink]
add.cs r1,r1,r4
#endif
#else /* Arctangent-A5 */
breq_s r1,0,@.Ldivmodend
mov_s r2,1 ; bit = 1
mov_s r3,0 ; res = 0
.Lloop1:
brhs r1,r0,@.Lloop2
bbit1 r1,31,@.Lloop2
asl_s r1,r1 ; den <<= 1
b.d @.Lloop1
asl_s r2,r2 ; bit <<= 1
.Lloop2:
brlo r0,r1,@.Lshiftdown
sub_s r0,r0,r1 ; num -= den
or_s r3,r3,r2 ; res |= bit
.Lshiftdown:
lsr_s r2,r2 ; bit >>= 1
lsr_s r1,r1 ; den >>= 1
brne_s r2,0,@.Lloop2
.Ldivmodend:
mov_s r1,r0 ; r1 = mod
j.d [blink]
mov_s r0,r3 ; r0 = res
/******************************************************/
#endif
ENDFUNC(__udivmodsi4)
#endif
#ifdef L_udivsi3
.section .text
.align 4
.global SYM(__udivsi3)
FUNC(__udivsi3)
SYM(__udivsi3):
b @SYM(__udivmodsi4)
ENDFUNC(__udivsi3)
#if 0 /* interferes with linux loader */
.section .__arc_profile_forward, "a"
.long SYM(__udivsi3)
.long SYM(__udivmodsi4)
.long 65536
#endif
#endif /* L_udivsi3 */
#ifdef L_divsi3
.section .text
.align 4
.global SYM(__divsi3)
FUNC(__divsi3)
#ifndef __ARC700__
SYM(__divsi3):
/* A5 / ARC60? */
mov r7,blink
xor r6,r0,r1
abs_s r0,r0
bl.d @SYM(__udivmodsi4)
abs_s r1,r1
tst r6,r6
j.d [r7]
neg.mi r0,r0
#else /* !ifndef __ARC700__ */
;; We can use the abs, norm, divaw and mpy instructions for ARC700
#define MULDIV
#ifdef MULDIV
/* This table has been generated by divtab-arc700.c. */
/* 1/512 .. 1/256, normalized. There is a leading 1 in bit 31.
For powers of two, we list unnormalized numbers instead. The values
for powers of 2 are loaded, but not used. The value for 1 is actually
the first instruction after .Lmuldiv. */
.balign 4
.Ldivtab:
.long 0x1000000
.long 0x80808081
.long 0x81020409
.long 0x81848DA9
.long 0x82082083
.long 0x828CBFBF
.long 0x83126E98
.long 0x83993053
.long 0x84210843
.long 0x84A9F9C9
.long 0x85340854
.long 0x85BF3762
.long 0x864B8A7E
.long 0x86D90545
.long 0x8767AB60
.long 0x87F78088
.long 0x88888889
.long 0x891AC73B
.long 0x89AE408A
.long 0x8A42F871
.long 0x8AD8F2FC
.long 0x8B70344B
.long 0x8C08C08D
.long 0x8CA29C05
.long 0x8D3DCB09
.long 0x8DDA5203
.long 0x8E78356E
.long 0x8F1779DA
.long 0x8FB823EF
.long 0x905A3864
.long 0x90FDBC0A
.long 0x91A2B3C5
.long 0x92492493
.long 0x92F11385
.long 0x939A85C5
.long 0x94458095
.long 0x94F20950
.long 0x95A02569
.long 0x964FDA6D
.long 0x97012E03
.long 0x97B425EE
.long 0x9868C80A
.long 0x991F1A52
.long 0x99D722DB
.long 0x9A90E7DA
.long 0x9B4C6F9F
.long 0x9C09C09D
.long 0x9CC8E161
.long 0x9D89D89E
.long 0x9E4CAD24
.long 0x9F1165E8
.long 0x9FD809FE
.long 0xA0A0A0A1
.long 0xA16B312F
.long 0xA237C32C
.long 0xA3065E40
.long 0xA3D70A3E
.long 0xA4A9CF1E
.long 0xA57EB503
.long 0xA655C43A
.long 0xA72F053A
.long 0xA80A80A9
.long 0xA8E83F58
.long 0xA9C84A48
.long 0xAAAAAAAB
.long 0xAB8F69E3
.long 0xAC769185
.long 0xAD602B59
.long 0xAE4C415D
.long 0xAF3ADDC7
.long 0xB02C0B03
.long 0xB11FD3B9
.long 0xB21642C9
.long 0xB30F6353
.long 0xB40B40B5
.long 0xB509E68B
.long 0xB60B60B7
.long 0xB70FBB5B
.long 0xB81702E1
.long 0xB92143FB
.long 0xBA2E8BA3
.long 0xBB3EE722
.long 0xBC52640C
.long 0xBD691048
.long 0xBE82FA0C
.long 0xBFA02FE9
.long 0xC0C0C0C1
.long 0xC1E4BBD6
.long 0xC30C30C4
.long 0xC4372F86
.long 0xC565C87C
.long 0xC6980C6A
.long 0xC7CE0C7D
.long 0xC907DA4F
.long 0xCA4587E7
.long 0xCB8727C1
.long 0xCCCCCCCD
.long 0xCE168A78
.long 0xCF6474A9
.long 0xD0B69FCC
.long 0xD20D20D3
.long 0xD3680D37
.long 0xD4C77B04
.long 0xD62B80D7
.long 0xD79435E6
.long 0xD901B204
.long 0xDA740DA8
.long 0xDBEB61EF
.long 0xDD67C8A7
.long 0xDEE95C4D
.long 0xE070381D
.long 0xE1FC780F
.long 0xE38E38E4
.long 0xE525982B
.long 0xE6C2B449
.long 0xE865AC7C
.long 0xEA0EA0EB
.long 0xEBBDB2A6
.long 0xED7303B6
.long 0xEF2EB720
.long 0xF0F0F0F1
.long 0xF2B9D649
.long 0xF4898D60
.long 0xF6603D99
.long 0xF83E0F84
.long 0xFA232CF3
.long 0xFC0FC0FD
.long 0xFE03F810
.long 0x2000000
.long 0x81020409
.long 0x82082083
.long 0x83126E98
.long 0x84210843
.long 0x85340854
.long 0x864B8A7E
.long 0x8767AB60
.long 0x88888889
.long 0x89AE408A
.long 0x8AD8F2FC
.long 0x8C08C08D
.long 0x8D3DCB09
.long 0x8E78356E
.long 0x8FB823EF
.long 0x90FDBC0A
.long 0x92492493
.long 0x939A85C5
.long 0x94F20950
.long 0x964FDA6D
.long 0x97B425EE
.long 0x991F1A52
.long 0x9A90E7DA
.long 0x9C09C09D
.long 0x9D89D89E
.long 0x9F1165E8
.long 0xA0A0A0A1
.long 0xA237C32C
.long 0xA3D70A3E
.long 0xA57EB503
.long 0xA72F053A
.long 0xA8E83F58
.long 0xAAAAAAAB
.long 0xAC769185
.long 0xAE4C415D
.long 0xB02C0B03
.long 0xB21642C9
.long 0xB40B40B5
.long 0xB60B60B7
.long 0xB81702E1
.long 0xBA2E8BA3
.long 0xBC52640C
.long 0xBE82FA0C
.long 0xC0C0C0C1
.long 0xC30C30C4
.long 0xC565C87C
.long 0xC7CE0C7D
.long 0xCA4587E7
.long 0xCCCCCCCD
.long 0xCF6474A9
.long 0xD20D20D3
.long 0xD4C77B04
.long 0xD79435E6
.long 0xDA740DA8
.long 0xDD67C8A7
.long 0xE070381D
.long 0xE38E38E4
.long 0xE6C2B449
.long 0xEA0EA0EB
.long 0xED7303B6
.long 0xF0F0F0F1
.long 0xF4898D60
.long 0xF83E0F84
.long 0xFC0FC0FD
.long 0x4000000
.long 0x82082083
.long 0x84210843
.long 0x864B8A7E
.long 0x88888889
.long 0x8AD8F2FC
.long 0x8D3DCB09
.long 0x8FB823EF
.long 0x92492493
.long 0x94F20950
.long 0x97B425EE
.long 0x9A90E7DA
.long 0x9D89D89E
.long 0xA0A0A0A1
.long 0xA3D70A3E
.long 0xA72F053A
.long 0xAAAAAAAB
.long 0xAE4C415D
.long 0xB21642C9
.long 0xB60B60B7
.long 0xBA2E8BA3
.long 0xBE82FA0C
.long 0xC30C30C4
.long 0xC7CE0C7D
.long 0xCCCCCCCD
.long 0xD20D20D3
.long 0xD79435E6
.long 0xDD67C8A7
.long 0xE38E38E4
.long 0xEA0EA0EB
.long 0xF0F0F0F1
.long 0xF83E0F84
.long 0x8000000
.long 0x84210843
.long 0x88888889
.long 0x8D3DCB09
.long 0x92492493
.long 0x97B425EE
.long 0x9D89D89E
.long 0xA3D70A3E
.long 0xAAAAAAAB
.long 0xB21642C9
.long 0xBA2E8BA3
.long 0xC30C30C4
.long 0xCCCCCCCD
.long 0xD79435E6
.long 0xE38E38E4
.long 0xF0F0F0F1
.long 0x10000000
.long 0x88888889
.long 0x92492493
.long 0x9D89D89E
.long 0xAAAAAAAB
.long 0xBA2E8BA3
.long 0xCCCCCCCD
.long 0xE38E38E4
.long 0x20000000
.long 0x92492493
.long 0xAAAAAAAB
.long 0xCCCCCCCD
.long 0x40000000
.long 0xAAAAAAAB
.long 0x80000000
__muldiv:
neg r4,r2
ld.as r5,[pcl,r4]
abs_s r12,r0
bic.f 0,r2,r4
mpyhu.ne r12,r12,r5
norm r3,r2
xor.f 0,r0,r1
; write port allocation stall
rsub r3,r3,30
lsr r0,r12,r3
j_s.d [blink]
neg.mi r0,r0
.balign 4
SYM(__divsi3):
norm r3,r1
abs_s r2,r1
brhs r3,23,__muldiv
norm r4,r0
abs_l r12,r0
brhs r4,r3,.Lonebit
asl_s r2,r2,r3
asl r12,r12,r4
sub lp_count,r3,r4
sub.f r12,r12,r2
brge.d r12,r2,.Lsbit
sub r4,r3,r4
add.lo r12,r12,r2
lp .Ldivend
.Ldivstart:divaw r12,r12,r2
.Ldivend:xor_s r1,r1,r0
sub r0,r4,1
bmsk r0,r12,r0
bset.hs r0,r0,r4
tst_s r1,r1
j_s.d [blink]
neg.mi r0,r0
.Lonebit:
xor_s r1,r1,r0
asr_s r1,r1,31
sub1.f 0,r12,r2 ; special case: -2**(n+1) / 2**n
or r0,r1,1
add.eq r0,r0,r0
cmp_s r12,r2
j_s.d [blink]
mov.lo r0,0
.Lsbit:
; Need to handle special cases involving negative powers of two:
; r12,r2 are normalized dividend / divisor;
; divide anything by 0x80000000, or divide 0x80000000 by 0x40000000
add_s r12,r12,r2
xor_s r1,r1,r0
rsub r4,r4,-1
ror r0,r12,r4
tst_s r2,r2
bmsk r0,r0,r3
add.pl r0,r0,r0
tst_s r1,r1
j_s.d [blink]
neg.mi r0,r0
#else /* !MULDIV */
/* This version requires that divaw works with a divisor of 0x80000000U */
abs_s r2,r1
norm r4,r0
neg_s r3,r2
norm r3,r3
abs_s r12,r0
brhs r4,r3,.Lonebit
asl_s r2,r2,r3
asl r12,r12,r4
sub lp_count,r3,r4
cmp_s r12,r2
sub.hs r12,r12,r2
lp .Ldivend
.Ldivstart:divaw r12,r12,r2
.Ldivend:xor_s r1,r1,r0
sub_s r0,r3,1
bmsk r0,r12,r0
bset.hs r0,r0,r3
tst_s r1,r1
j_s.d [blink]
negmi r0,r0
.Lonebit:
xor_s r1,r1,r0
asr_s r1,r1,31
cmp_s r12,r2
mov_s r0,0
j_s.d [blink]
orhs r0,r1,1
#endif /* MULDIV */
#endif /* ifndef __ARC700__ */
ENDFUNC(__divsi3)
#endif /* L_divsi3 */
#ifdef L_umodsi3
.section .text
.align 4
.global SYM(__umodsi3)
FUNC(__umodsi3)
SYM(__umodsi3):
mov r7,blink
bl.nd @SYM(__udivmodsi4)
j.d [r7]
mov r0,r1
ENDFUNC(__umodsi3)
#if 0 /* interferes with linux loader */
.section .__arc_profile_forward, "a"
.long SYM(__umodsi3)
.long SYM(__udivmodsi4)
.long 65536
#endif
#endif /* L_umodsi3 */
#ifdef L_modsi3
.section .text
.align 4
.global SYM (__modsi3)
FUNC(__modsi3)
SYM(__modsi3):
#ifndef __ARC700__
/* A5 / ARC60? */
mov_s r12,blink
mov_s r6,r0
abs_s r0,r0
bl.d @SYM(__udivmodsi4)
abs_s r1,r1
tst r6,r6
neg_s r0,r1
j_s.d [r12]
mov.pl r0,r1
#else /* __ARC700__ */
abs_s r2,r1
norm.f r4,r0
neg r5,r2
norm r3,r5
abs_l r12,r0
brhs r4,r3,.Lonebit
asl_s r2,r2,r3
asl r12,r12,r4
sub lp_count,r3,r4
cmp_s r12,r2
sub.hs r12,r12,r2
tst_s r0,r0
lp .Ldivend
.Ldivstart:divaw r12,r12,r2
.Ldivend:
lsr r0,r12,r3
j_s.d [blink]
neg.mi r0,r0
.balign 4
.Lonebit:neg.pl r5,r5
cmp_s r12,r2
j_s.d [blink]
sub.hs r0,r0,r5
#endif /* __ARC700__ */
ENDFUNC(__modsi3)
#endif /* L_modsi3 */
#ifdef L_clzsi2
.section .text
.align 4
.global SYM (__clzsi2)
SYM(__clzsi2):
#ifdef __ARC_NORM__
HIDDEN_FUNC(__clzsi2)
norm.f r0,r0
mov.n r0,0
j_s.d [blink]
add.pl r0,r0,1
ENDFUNC(__clzsi2)
#elif defined (__ARC601__)
FUNC(__clzsi2)
mov lp_count,10
mov_l r1,0
bset r2,r1,29
lp .Loop_end
brhs r0,r2,.Loop_end
add3 r0,r1,r0
.Loop_end:
asl.f 0,r0
sub2 r0,lp_count,lp_count
sub.cs.f r0,r0,1
add r0,r0,31
j_s.d [blink]
add.pl r0,r0,1
ENDFUNC(__clzsi2)
#else
FUNC(__clzsi2)
asl.f 0,r0,2
mov r1,-1
.Lcheck:
bbit1.d r0,31,.Ldone
asl.pl r0,r0,3
bcs.d .Ldone_1
add_s r1,r1,3
bpnz.d .Lcheck
asl.f 0,r0,2
mov_s r0,32
j_s.d [blink]
mov.ne r0,r1
.Ldone:
j_s.d [blink]
add_s r0,r1,1
.Ldone_1:
j_s.d [blink]
sub_s r0,r1,1
ENDFUNC(__clzsi2)
#endif
#endif /* L_clzsi2 */
.section .text
;;; MILLICODE THUNK LIB ;***************
;;; .macro push_regs from, to, offset
;;; st_s "\from", [sp, \offset]
;;; .if \to-\from
;;; push_regs "(\from+1)", \to, "(\offset+4)"
;;; .endif
;;; .endm
;;; push_regs 13, 18, 0
;;;
;;;; .macro sum from, to, three
;;;; .long \from
;;;; .long \three
;;;; .local regno
;;;; .set regno, \from+1
;;;; .set shift, 32
;;;; .set shift, shift - 1
;;;; # st_s %shift @3 lsl #shift
;;;; .if \to-\from
;;;; sum "(\from+1)", \to, "(\three)"
;;;; .endif
;;;; .endm
;;;;
;;;; SUM 0,5, 9
;;;;
; .altmacro
;; .macro push_regs from=0, to=3, offset
;; st_s r\from, [sp, \offset]
;; .if \to-\from
;; push_regs "\from+1 ",\to,"(\offset+4)"
;; .endif
;; .endm
;;
;; .macro expand_to_push from=13, to
;; ; .section .text
;; ; .align 4
;; ; .global st_
;; ; .type foo,
;; st_13_to_25:
;; ; push_regs \from, \to, 0
;; push_regs 0,3 ;
;; .endm
;;
;; expand_to_push 13,18
;;
;#endif
#ifdef L_millicodethunk_st
.section .text
.align 4
.global SYM(__st_r13_to_r15)
.global SYM(__st_r13_to_r16)
.global SYM(__st_r13_to_r17)
.global SYM(__st_r13_to_r18)
.global SYM(__st_r13_to_r19)
.global SYM(__st_r13_to_r20)
.global SYM(__st_r13_to_r21)
.global SYM(__st_r13_to_r22)
.global SYM(__st_r13_to_r23)
.global SYM(__st_r13_to_r24)
.global SYM(__st_r13_to_r25)
HIDDEN_FUNC(__st_r13_to_r15)
HIDDEN_FUNC(__st_r13_to_r16)
HIDDEN_FUNC(__st_r13_to_r17)
HIDDEN_FUNC(__st_r13_to_r18)
HIDDEN_FUNC(__st_r13_to_r19)
HIDDEN_FUNC(__st_r13_to_r20)
HIDDEN_FUNC(__st_r13_to_r21)
HIDDEN_FUNC(__st_r13_to_r22)
HIDDEN_FUNC(__st_r13_to_r23)
HIDDEN_FUNC(__st_r13_to_r24)
HIDDEN_FUNC(__st_r13_to_r25)
.align 4
SYM(__st_r13_to_r25):
st r25, [sp,48]
SYM(__st_r13_to_r24):
st r24, [sp,44]
SYM(__st_r13_to_r23):
st r23, [sp,40]
SYM(__st_r13_to_r22):
st r22, [sp,36]
SYM(__st_r13_to_r21):
st r21, [sp,32]
SYM(__st_r13_to_r20):
st r20, [sp,28]
SYM(__st_r13_to_r19):
st r19, [sp,24]
SYM(__st_r13_to_r18):
st r18, [sp,20]
SYM(__st_r13_to_r17):
st r17, [sp,16]
SYM(__st_r13_to_r16):
st r16, [sp,12]
SYM(__st_r13_to_r15):
#ifdef __ARC700__
st r15, [sp,8] ; minimum function size to avoid stall: 6 bytes.
#else
st_s r15, [sp,8]
#endif
st_s r14, [sp,4]
j_s.d [%blink]
st_s r13, [sp,0]
ENDFUNC(__st_r13_to_r15)
ENDFUNC(__st_r13_to_r16)
ENDFUNC(__st_r13_to_r17)
ENDFUNC(__st_r13_to_r18)
ENDFUNC(__st_r13_to_r19)
ENDFUNC(__st_r13_to_r20)
ENDFUNC(__st_r13_to_r21)
ENDFUNC(__st_r13_to_r22)
ENDFUNC(__st_r13_to_r23)
ENDFUNC(__st_r13_to_r24)
ENDFUNC(__st_r13_to_r25)
#endif /* L_millicodethunk_st */
#ifdef L_millicodethunk_ld
.section .text
.align 4
; ==================================
; the loads
.global SYM(__ld_r13_to_r15)
.global SYM(__ld_r13_to_r16)
.global SYM(__ld_r13_to_r17)
.global SYM(__ld_r13_to_r18)
.global SYM(__ld_r13_to_r19)
.global SYM(__ld_r13_to_r20)
.global SYM(__ld_r13_to_r21)
.global SYM(__ld_r13_to_r22)
.global SYM(__ld_r13_to_r23)
.global SYM(__ld_r13_to_r24)
.global SYM(__ld_r13_to_r25)
HIDDEN_FUNC(__ld_r13_to_r15)
HIDDEN_FUNC(__ld_r13_to_r16)
HIDDEN_FUNC(__ld_r13_to_r17)
HIDDEN_FUNC(__ld_r13_to_r18)
HIDDEN_FUNC(__ld_r13_to_r19)
HIDDEN_FUNC(__ld_r13_to_r20)
HIDDEN_FUNC(__ld_r13_to_r21)
HIDDEN_FUNC(__ld_r13_to_r22)
HIDDEN_FUNC(__ld_r13_to_r23)
HIDDEN_FUNC(__ld_r13_to_r24)
HIDDEN_FUNC(__ld_r13_to_r25)
SYM(__ld_r13_to_r25):
ld r25, [sp,48]
SYM(__ld_r13_to_r24):
ld r24, [sp,44]
SYM(__ld_r13_to_r23):
ld r23, [sp,40]
SYM(__ld_r13_to_r22):
ld r22, [sp,36]
SYM(__ld_r13_to_r21):
ld r21, [sp,32]
SYM(__ld_r13_to_r20):
ld r20, [sp,28]
SYM(__ld_r13_to_r19):
ld r19, [sp,24]
SYM(__ld_r13_to_r18):
ld r18, [sp,20]
SYM(__ld_r13_to_r17):
ld r17, [sp,16]
SYM(__ld_r13_to_r16):
ld r16, [sp,12]
SYM(__ld_r13_to_r15):
#ifdef __ARC700__
ld r15, [sp,8] ; minimum function size to avoid stall: 6 bytes.
#else
ld_s r15, [sp,8]
#endif
ld_s r14, [sp,4]
j_s.d [%blink]
ld_s r13, [sp,0]
ENDFUNC(__ld_r13_to_r15)
ENDFUNC(__ld_r13_to_r16)
ENDFUNC(__ld_r13_to_r17)
ENDFUNC(__ld_r13_to_r18)
ENDFUNC(__ld_r13_to_r19)
ENDFUNC(__ld_r13_to_r20)
ENDFUNC(__ld_r13_to_r21)
ENDFUNC(__ld_r13_to_r22)
ENDFUNC(__ld_r13_to_r23)
ENDFUNC(__ld_r13_to_r24)
ENDFUNC(__ld_r13_to_r25)
#endif /* L_millicodethunk_ld */
#ifdef L_millicodethunk_ret
.global SYM(__ld_r13_to_r14_ret)
.global SYM(__ld_r13_to_r15_ret)
.global SYM(__ld_r13_to_r16_ret)
.global SYM(__ld_r13_to_r17_ret)
.global SYM(__ld_r13_to_r18_ret)
.global SYM(__ld_r13_to_r19_ret)
.global SYM(__ld_r13_to_r20_ret)
.global SYM(__ld_r13_to_r21_ret)
.global SYM(__ld_r13_to_r22_ret)
.global SYM(__ld_r13_to_r23_ret)
.global SYM(__ld_r13_to_r24_ret)
.global SYM(__ld_r13_to_r25_ret)
HIDDEN_FUNC(__ld_r13_to_r14_ret)
HIDDEN_FUNC(__ld_r13_to_r15_ret)
HIDDEN_FUNC(__ld_r13_to_r16_ret)
HIDDEN_FUNC(__ld_r13_to_r17_ret)
HIDDEN_FUNC(__ld_r13_to_r18_ret)
HIDDEN_FUNC(__ld_r13_to_r19_ret)
HIDDEN_FUNC(__ld_r13_to_r20_ret)
HIDDEN_FUNC(__ld_r13_to_r21_ret)
HIDDEN_FUNC(__ld_r13_to_r22_ret)
HIDDEN_FUNC(__ld_r13_to_r23_ret)
HIDDEN_FUNC(__ld_r13_to_r24_ret)
HIDDEN_FUNC(__ld_r13_to_r25_ret)
.section .text
.align 4
SYM(__ld_r13_to_r25_ret):
ld r25, [sp,48]
SYM(__ld_r13_to_r24_ret):
ld r24, [sp,44]
SYM(__ld_r13_to_r23_ret):
ld r23, [sp,40]
SYM(__ld_r13_to_r22_ret):
ld r22, [sp,36]
SYM(__ld_r13_to_r21_ret):
ld r21, [sp,32]
SYM(__ld_r13_to_r20_ret):
ld r20, [sp,28]
SYM(__ld_r13_to_r19_ret):
ld r19, [sp,24]
SYM(__ld_r13_to_r18_ret):
ld r18, [sp,20]
SYM(__ld_r13_to_r17_ret):
ld r17, [sp,16]
SYM(__ld_r13_to_r16_ret):
ld r16, [sp,12]
SYM(__ld_r13_to_r15_ret):
ld r15, [sp,8]
SYM(__ld_r13_to_r14_ret):
ld blink,[sp,r12]
ld_s r14, [sp,4]
ld.ab r13, [sp,r12]
j_s.d [%blink]
add_s sp,sp,4
ENDFUNC(__ld_r13_to_r14_ret)
ENDFUNC(__ld_r13_to_r15_ret)
ENDFUNC(__ld_r13_to_r16_ret)
ENDFUNC(__ld_r13_to_r17_ret)
ENDFUNC(__ld_r13_to_r18_ret)
ENDFUNC(__ld_r13_to_r19_ret)
ENDFUNC(__ld_r13_to_r20_ret)
ENDFUNC(__ld_r13_to_r21_ret)
ENDFUNC(__ld_r13_to_r22_ret)
ENDFUNC(__ld_r13_to_r23_ret)
ENDFUNC(__ld_r13_to_r24_ret)
ENDFUNC(__ld_r13_to_r25_ret)
#endif /* L_millicodethunk_ret */
#ifdef L_adddf3
#ifdef __ARC_NORM__
#include "ieee-754/adddf3.S"
#endif
#endif
#ifdef L_muldf3
#ifdef __ARC700__
#include "ieee-754/muldf3.S"
#elif defined (__ARC_NORM__) && defined(__ARC_MUL64__)
#include "ieee-754/arc600-mul64/muldf3.S"
#elif defined (__ARC_NORM__) && defined(__ARC_MUL32BY16__)
#include "ieee-754/arc600-dsp/muldf3.S"
#endif
#endif
#ifdef L_addsf3
#ifdef __ARC_NORM__
#include "ieee-754/addsf3.S"
#endif
#endif
#ifdef L_mulsf3
#ifdef __ARC700__
#include "ieee-754/mulsf3.S"
#elif defined (__ARC_NORM__) && defined(__ARC_MUL64__)
#include "ieee-754/arc600-mul64/mulsf3.S"
#elif defined (__ARC_NORM__) && defined(__ARC_MUL32BY16__)
#include "ieee-754/arc600-dsp/mulsf3.S"
#elif defined (__ARC_NORM__)
#include "ieee-754/arc600/mulsf3.S"
#endif
#endif
#ifdef L_divdf3
#ifdef __ARC700__
#include "ieee-754/divdf3.S"
#elif defined (__ARC_NORM__) && defined(__ARC_MUL64__)
#include "ieee-754/arc600-mul64/divdf3.S"
#elif defined (__ARC_NORM__) && defined(__ARC_MUL32BY16__)
#include "ieee-754/arc600-dsp/divdf3.S"
#endif
#endif
#ifdef L_divsf3
#ifdef __ARC700__
#include "ieee-754/divsf3-stdmul.S"
#elif defined (__ARC_NORM__) && defined(__ARC_MUL64__)
#include "ieee-754/arc600-mul64/divsf3.S"
#elif defined (__ARC_NORM__) && defined(__ARC_MUL32BY16__)
#include "ieee-754/arc600-dsp/divsf3.S"
#elif defined (__ARC_NORM__)
#include "ieee-754/arc600/divsf3.S"
#endif
#endif
#ifdef L_extendsfdf2
#ifdef __ARC_NORM__
#include "ieee-754/extendsfdf2.S"
#endif
#endif
#ifdef L_truncdfsf2
#ifdef __ARC_NORM__
#include "ieee-754/truncdfsf2.S"
#endif
#endif
#ifdef L_floatsidf
#ifdef __ARC_NORM__
#include "ieee-754/floatsidf.S"
#endif
#endif
#ifdef L_floatsisf
#ifdef __ARC_NORM__
#include "ieee-754/floatsisf.S"
#endif
#endif
#ifdef L_floatunsidf
#ifdef __ARC_NORM__
#include "ieee-754/floatunsidf.S"
#endif
#endif
#ifdef L_fixdfsi
#ifdef __ARC_NORM__
#include "ieee-754/fixdfsi.S"
#endif
#endif
#ifdef L_fixsfsi
#ifdef __ARC_NORM__
#include "ieee-754/fixsfsi.S"
#endif
#endif
#ifdef L_fixunsdfsi
#ifdef __ARC_NORM__
#include "ieee-754/fixunsdfsi.S"
#endif
#endif
#ifdef L_eqdf2
#ifdef __ARC_NORM__
#include "ieee-754/eqdf2.S"
#endif
#endif
#ifdef L_eqsf2
#ifdef __ARC_NORM__
#include "ieee-754/eqsf2.S"
#endif
#endif
#ifdef L_gtdf2
#ifdef __ARC_NORM__
#include "ieee-754/gtdf2.S"
#endif
#endif
#ifdef L_gtsf2
#ifdef __ARC_NORM__
#include "ieee-754/gtsf2.S"
#endif
#endif
#ifdef L_gedf2
#ifdef __ARC_NORM__
#include "ieee-754/gedf2.S"
#endif
#endif
#ifdef L_gesf2
#ifdef __ARC_NORM__
#include "ieee-754/gesf2.S"
#endif
#endif
#ifdef L_uneqdf2
#ifdef __ARC_NORM__
#include "ieee-754/uneqdf2.S"
#endif
#endif
#ifdef L_uneqsf2
#ifdef __ARC_NORM__
#include "ieee-754/uneqsf2.S"
#endif
#endif
#ifdef L_orddf2
#ifdef __ARC_NORM__
#include "ieee-754/orddf2.S"
#endif
#endif
#ifdef L_ordsf2
#ifdef __ARC_NORM__
#include "ieee-754/ordsf2.S"
#endif
#endif
# Exclude libgcc.so symbols for the Synopsys DesignWare ARC CPU.
# Copyright (C) 2007-2012 Free Software Foundation, Inc.
# Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
# on behalf of Synopsys Inc.
# This file is part of GCC.
# GCC is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 3, or (at your option) any later
# version.
# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
# Under Section 7 of GPL version 3, you are granted additional
# permissions described in the GCC Runtime Library Exception, version
# 3.1, as published by the Free Software Foundation.
# You should have received a copy of the GNU General Public License and
# a copy of the GCC Runtime Library Exception along with this program;
# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
# <http://www.gnu.org/licenses/>. */
# Exclude various symbols which should not be visible in libgcc.so for ARC.
# Floating point comparisons use a special lightweight ABI which is not
# compatible with calls via a plt. Moreover, the code is so compact that
# it is better to include a separate copy in each dso.
%exclude {
__eqsf2
__eqdf2
__gtsf2
__gtdf2
__gesf2
__gedf2
__uneqsf2
__uneqdf2
__ordsf2
__orddf2
}
# GCC Makefile fragment for Synopsys DesignWare ARC
# Copyright (C) 2007-2013 Free Software Foundation, Inc.
# Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
# on behalf of Synopsys Inc.
# This file is part of GCC.
# GCC is free software; you can redistribute it and/or modify it under the
# terms of the GNU General Public License as published by the Free Software
# Foundation; either version 3, or (at your option) any later version.
# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
# details.
# You should have received a copy of the GNU General Public License along
# with GCC; see the file COPYING3. If not see
# <http://www.gnu.org/licenses/>.
CROSS_LIBGCC1 = libgcc1-asm.a
LIB1ASMSRC = arc/lib1funcs.S
LIB1ASMFUNCS = _mulsi3 _umulsidi3 _umulsi3_highpart \
_udivsi3 _divsi3 _umodsi3 _modsi3 \
_divmod_tools _clzsi2 \
_millicodethunk_st _millicodethunk_ld _millicodethunk_ret \
_adddf3 _muldf3 _addsf3 _mulsf3 _divsf3 _divdf3 _truncdfsf2 _extendsfdf2 \
_eqdf2 _eqsf2 _gedf2 _gesf2 _gtdf2 _gtsf2 _uneqdf2 _uneqsf2 _ordsf2 _orddf2 \
_fixdfsi _fixsfsi _floatsidf _floatsisf _fixunsdfsi _floatunsidf
#LIBGCC2_CFLAGS = -g1 -O2 $(LIBGCC2_INCLUDES) $(GCC_CFLAGS)
# For floating-point emulation, we mostly use hand-coded assembly.
# We use fp-bit.c for debugging purposes, and some parts of it
# as a fallback for hardware configurations for which the hand-coded
# assembly support is incomplete, i.e., where there is no NORM and/or no
# supported multiply instruction. Using floating point on such a
# configuration is generally inadvisable, but we got to provide support
# somehow so that we can run the testsuites.
# fp-hack.h / dp-hack.h take care of slecting the parts that are needed,
# and (for debugging) of renaming functions so that they can be
# used in an asm wrapper.
LIB2ADD = fp-bit.c dp-bit.c
dp-bit.c: $(srcdir)/fp-bit.c
echo '#ifndef __big_endian__' > dp-bit.c
echo '#define FLOAT_BIT_ORDER_MISMATCH' >> dp-bit.c
echo '#endif' >> dp-bit.c
echo '#include "fp-bit.h"' >> dp-bit.c
echo '#include "config/arc/dp-hack.h"' >> dp-bit.c
grep -v 'include.*fp-bit.h' $(srcdir)/fp-bit.c >> dp-bit.c
fp-bit.c: $(srcdir)/fp-bit.c
echo '#define FLOAT' > fp-bit.c
echo '#ifndef __big_endian__' >> fp-bit.c
echo '#define FLOAT_BIT_ORDER_MISMATCH' >> fp-bit.c
echo '#endif' >> fp-bit.c
echo '#include "config/arc/fp-hack.h"' >> fp-bit.c
cat $(srcdir)/fp-bit.c >> fp-bit.c
# .init/.fini section routines
crtg.o: $(srcdir)/config/arc/crtg.S
$(crt_compile) -c -x assembler-with-cpp $<
crtgend.o: $(srcdir)/config/arc/crtgend.S
$(crt_compile) -c -x assembler-with-cpp $<
mcount.o: $(srcdir)/config/arc/gmon/mcount.c
$(gcc_compile) -isystem $(srcdir)/config/arc/gmon -c $< \
-fcall-saved-r0 -fcall-saved-r1 -fcall-saved-r2 -fcall-saved-r3 \
-fcall-saved-r4 -fcall-saved-r5 -fcall-saved-r6 -fcall-saved-r7 \
-fomit-frame-pointer
gmon.o: $(srcdir)/config/arc/gmon/gmon.c
$(gcc_compile) -isystem $(srcdir)/config/arc/gmon -mno-sdata -c $< \
-fno-strict-aliasing \
-Wno-extra # suppress inane warning about missing initializer.
# Adding initializers for the remaining elements of gmonparam would
# make the code more brittle.
prof-freq-stub.o: $(srcdir)/config/arc/gmon/prof-freq-stub.S
$(gcc_compile) -isystem $(srcdir)/config/arc/gmon -c $<
prof-freq.o: $(srcdir)/config/arc/gmon/prof-freq.c
$(gcc_compile) -isystem $(srcdir)/config/arc/gmon -c $<
dcache_linesz.o: $(srcdir)/config/arc/gmon/dcache_linesz.S
$(gcc_compile) -isystem $(srcdir)/config/arc/gmon -c $<
profil.o: $(srcdir)/config/arc/gmon/profil.S
$(gcc_compile) -isystem $(srcdir)/config/arc/gmon -c $<
profil-uclibc.o: $(srcdir)/config/arc/gmon/profil-uclibc.c
$(gcc_compile) -isystem $(srcdir)/config/arc/gmon -c $<
libgmon.a: mcount.o gmon.o dcache_linesz.o $(PROFILE_OSDEP)
$(AR_CREATE_FOR_TARGET) $@ $^
# GCC Makefile fragment for the Synopsys DesignWare ARC CPU with newlib.
# Copyright (C) 2007-2012 Free Software Foundation, Inc.
# Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
# on behalf of Synopsys Inc.
# This file is part of GCC.
# GCC is free software; you can redistribute it and/or modify it under the
# terms of the GNU General Public License as published by the Free Software
# Foundation; either version 3, or (at your option) any later version.
# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
# details.
# You should have received a copy of the GNU General Public License along
# with GCC; see the file COPYING3. If not see
# <http://www.gnu.org/licenses/>.
PROFILE_OSDEP = prof-freq-stub.o profil.o
# GCC Makefile fragment for the Synopsys DesignWare ARC700 CPU with uClibc.
# Copyright (C) 2007-2012 Free Software Foundation, Inc.
# Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
# on behalf of Synopsys Inc.
# This file is part of GCC.
# GCC is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 3, or (at your option) any later
# version.
# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
# Under Section 7 of GPL version 3, you are granted additional
# permissions described in the GCC Runtime Library Exception, version
# 3.1, as published by the Free Software Foundation.
# You should have received a copy of the GNU General Public License and
# a copy of the GCC Runtime Library Exception along with this program;
# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
# <http://www.gnu.org/licenses/>. */
CRTSTUFF_T_CFLAGS += -mno-sdata
# Compile crtbeginS.o and crtendS.o with pic.
CRTSTUFF_T_CFLAGS_S = $(CRTSTUFF_T_CFLAGS) -mA7 -fPIC
# Compile libgcc2.a with pic.
TARGET_LIBGCC2_CFLAGS = -mA7 -fPIC
PROFILE_OSDEP = prof-freq.o
# Override t-slibgcc-elf-ver to hide some lib1func
# routines which should not be called via PLT.
SHLIB_MAPFILES = libgcc-std.ver $(srcdir)/config/arc/libgcc-excl.ver
...@@ -188,16 +188,20 @@ extern UDItype __udiv_qrnnd (UDItype *, UDItype, UDItype, UDItype); ...@@ -188,16 +188,20 @@ extern UDItype __udiv_qrnnd (UDItype *, UDItype, UDItype, UDItype);
"rIJ" ((USItype) (bh)), \ "rIJ" ((USItype) (bh)), \
"r" ((USItype) (al)), \ "r" ((USItype) (al)), \
"rIJ" ((USItype) (bl))) "rIJ" ((USItype) (bl)))
/* Call libgcc routine. */
#define umul_ppmm(w1, w0, u, v) \ #define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
do { \ #ifdef __ARC_NORM__
DWunion __w; \ #define count_leading_zeros(count, x) \
__w.ll = __umulsidi3 (u, v); \ do \
w1 = __w.s.high; \ { \
w0 = __w.s.low; \ SItype c_; \
} while (0) \
#define __umulsidi3 __umulsidi3 __asm__ ("norm.f\t%0,%1\n\tmov.mi\t%0,-1" : "=r" (c_) : "r" (x) : "cc");\
UDItype __umulsidi3 (USItype, USItype); (count) = c_ + 1; \
} \
while (0)
#define COUNT_LEADING_ZEROS_0 32
#endif
#endif #endif
#if defined (__arm__) && (defined (__thumb2__) || !defined (__thumb__)) \ #if defined (__arm__) && (defined (__thumb2__) || !defined (__thumb__)) \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment