Commit d38a64b4 by Joern Rennecke Committed by Joern Rennecke

config.host (arc*-*-elf*, [...]): New configurations.

2013-10-01  Joern Rennecke  <joern.rennecke@embecosm.com>
            Brendan Kehoe  <brendan@zen.org>
            Simon Cook  <simon.cook@embecosm.com>

        * config.host (arc*-*-elf*, arc*-*-linux-uclibc*): New configurations.
        * config/arc: New directory.
        * longlong.h [__arc__] (umul_ppmm): Remove.
        [__arc__] (__umulsidi3): Define.
        [__arc__ && __ARC_NORM__] (count_leading_zeroes): Define.
        [__arc__ && __ARC_NORM__] (COUNT_LEADING_ZEROS_0): Likewise.

Co-Authored-By: Brendan Kehoe <brendan@zen.org>
Co-Authored-By: Simon Cook <simon.cook@embecosm.com>

From-SVN: r203073
parent 526b7aee
2013-10-01 Joern Rennecke <joern.rennecke@embecosm.com>
Brendan Kehoe <brendan@zen.org>
Simon Cook <simon.cook@embecosm.com>
* config.host (arc*-*-elf*, arc*-*-linux-uclibc*): New configurations.
* config/arc: New directory.
* longlong.h [__arc__] (umul_ppmm): Remove.
[__arc__] (__umulsidi3): Define.
[__arc__ && __ARC_NORM__] (count_leading_zeroes): Define.
[__arc__ && __ARC_NORM__] (COUNT_LEADING_ZEROS_0): Likewise.
2013-09-17 Jacek Caban <jacek@codeweavers.com> 2013-09-17 Jacek Caban <jacek@codeweavers.com>
* config/i386/gthr-win32.c: CreateSemaphoreW instead of * config/i386/gthr-win32.c: CreateSemaphoreW instead of
......
...@@ -91,6 +91,9 @@ alpha*-*-*) ...@@ -91,6 +91,9 @@ alpha*-*-*)
am33_2.0-*-linux*) am33_2.0-*-linux*)
cpu_type=mn10300 cpu_type=mn10300
;; ;;
arc*-*-*)
cpu_type=arc
;;
arm*-*-*) arm*-*-*)
cpu_type=arm cpu_type=arm
;; ;;
...@@ -315,6 +318,14 @@ alpha*-dec-*vms*) ...@@ -315,6 +318,14 @@ alpha*-dec-*vms*)
extra_parts="$extra_parts vms-dwarf2.o vms-dwarf2eh.o" extra_parts="$extra_parts vms-dwarf2.o vms-dwarf2eh.o"
md_unwind_header=alpha/vms-unwind.h md_unwind_header=alpha/vms-unwind.h
;; ;;
arc*-*-elf*)
tmake_file="arc/t-arc-newlib arc/t-arc"
extra_parts="crti.o crtn.o crtend.o crtbegin.o crtendS.o crtbeginS.o libgmon.a crtg.o crtgend.o"
;;
arc*-*-linux-uclibc*)
tmake_file="${tmake_file} t-slibgcc-libgcc t-slibgcc-nolc-override arc/t-arc700-uClibc arc/t-arc"
extra_parts="crti.o crtn.o crtend.o crtbegin.o crtendS.o crtbeginS.o libgmon.a crtg.o crtgend.o"
;;
arm-wrs-vxworks) arm-wrs-vxworks)
tmake_file="$tmake_file arm/t-arm arm/t-vxworks t-softfp-sfdf t-softfp-excl arm/t-softfp t-softfp" tmake_file="$tmake_file arm/t-arm arm/t-vxworks t-softfp-sfdf t-softfp-excl arm/t-softfp t-softfp"
extra_parts="$extra_parts crti.o crtn.o" extra_parts="$extra_parts crti.o crtn.o"
......
/* Assembler macros for the Synopsys DesignWare ARC CPU.
Copyright (C) 1994, 1995, 1997, 2004, 2007-2012
Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#define FUNC(X) .type X,@function
#define ENDFUNC(X) .size X, .-X
/* Code to start and stop profiling for the Synopsys DesignWare ARC CPU.
Copyright (C) 1994, 1995, 1997, 2004, 2007-2012
Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
.section .init
.global _init
.global _fini
.global __monstartup
mov_s r0,_init
mov_s r1,_fini
jl __monstartup
.section .__arc_profile_desc, "a"
.global __arc_profile_desc_secstart
.balign 4
__arc_profile_desc_secstart:
.section .__arc_profile_forward, "a"
.global __arc_profile_forward_secstart
.balign 4
__arc_profile_forward_secstart:
.section .__arc_profile_counters, "aw"
.global __arc_profile_counters_secstart
.balign 4
__arc_profile_counters_secstart:
.section .fini
.global _mcleanup
jl _mcleanup
/* Code to start and stop profiling for the Synopsys DesignWare ARC CPU.
Copyright (C) 1994, 1995, 1997, 2004, 2007-2012
Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
.section .__arc_profile_desc, "a"
.global __arc_profile_desc_secend
__arc_profile_desc_secend:
.section .__arc_profile_forward, "a"
.global __arc_profile_forward_secend
__arc_profile_forward_secend:
/* .fini/.init stack frame setup for the Synopsys DesignWare ARC CPU.
Copyright (C) 1994, 1995, 1997, 2004, 2007-2012
Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
# This file contains the stack frame setup for contents of the .fini and
# .init sections.
.section .init
.global _init
.word 0
_init:
push_s blink
.section .fini
.global _fini
.word 0
_fini:
push_s blink
/* Ensure .fini/.init return for the Synopsys DesignWare ARC CPU.
Copyright (C) 1994, 1995, 1997, 2004, 2007-2012
Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
# This file just makes sure that the .fini and .init sections do in
# fact return. This file is the last thing linked into any executable.
.section .init
pop_s blink
j_s [blink]
.section .fini
pop_s blink
j_s [blink]
/* Copyright (C) 2004, 2006, 2007-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 3, or (at your option) any
later version.
In addition to the permissions in the GNU General Public License, the
Free Software Foundation gives you unlimited permission to link the
compiled version of this file into combinations with other programs,
and to distribute those combinations without any restriction coming
from the use of this file. (The General Public License restrictions
do apply in other respects; for example, they cover modification of
the file, and distribution when not linked into a combine
executable.)
This file is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
/* Calculate division table for ARC700 integer division
Contributed by Joern Rennecke
joern.rennecke@arc.com */
#include <stdio.h>
#include <math.h>
int
main ()
{
int i, j;
unsigned x;
double q, r, err, max_err = -1;
puts("/* This table has been generated by divtab-arc700.c. */");
puts("\
/* 1/512 .. 1/256, normalized. There is a leading 1 in bit 31.\n\
For powers of two, we list unnormalized numbers instead. The values\n\
for powers of 2 are loaded, but not used. The value for 1 is actually\n\
the first instruction after .Lmuldiv. */\n\
.balign 4");
puts (".Ldivtab:\n");
for (i = 256; i >= 2; --i)
{
j = i < 0 ? -i : i;
if (j & (j-1))
while (j < 128)
j += j;
else
/* Power of two. */
j *= 128;
q = 4.*(1<<30)*128/j;
r = ceil (q);
printf ("\t.long\t0x%X\n", (unsigned) r);
err = r - q;
if (err > max_err)
max_err = err;
}
#if 0
printf ("\t/* maximum error: %f */\n", max_err);
#endif
exit (0);
}
/* Copyright (C) 2007-2013 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* This file selects the double-precision parts of fp-bit.c that are
still needed for some ARC hardware variants; it also renames functions
that duplicate asm-coded functionality so that their results can be
used to compare with the optimized versions for debugging. */
#define FINE_GRAINED_LIBRARIES
#define ARC_DP_DEBUG 1
#if !defined (__ARC_NORM__) || ARC_DP_DEBUG
#define L_pack_df
#define L_unpack_df
#define L_make_df
#define L_thenan_df
#define L_sf_to_df
#endif
#ifndef __ARC_NORM__
#define L_addsub_df
#elif ARC_DP_DEBUG
#define L_addsub_df
#define __adddf3 __adddf3_c
#define __subdf3 __subdf3_c
#endif
#ifndef __ARC_NORM__
#define L_mul_df
#define L_div_df
#elif (!defined (__ARC700__) && !defined (__ARC_MUL64__) \
&& !defined(__ARC_MUL32BY16__))
#define L_mul_df
#define L_div_df
#undef QUIET_NAN
#define QUIET_NAN 0xfffffffffffffLL
#elif ARC_DP_DEBUG
#define L_mul_df
#define __muldf3 __muldf3_c
#define L_div_df
#define __divdf3 __divdf3_c
#endif
#ifndef __ARC_NORM__
#define L_df_to_sf
#define L_si_to_df
#define L_df_to_si
#define L_tf_to_usi /* need to defined this instead of df_to_usi */
#define L_usi_to_df
#elif ARC_DP_DEBUG
#define L_df_to_sf
#define __truncdfsf2 __truncdfsf2_c
#define L_si_to_df
#define __floatsidf __floatsidf_c
#define L_df_to_si
#define __fixdfsi __fixdfsi_c
#define L_tf_to_usi
#define __fixunsdfsi __fixunsdfsi_c
#define L_usi_to_df
#define __floatunsidf __floatunsidf_c
#endif
#ifndef __ARC_NORM__
#define L_fpcmp_parts_df
#define L_compare_df
#define L_eq_df
#define L_ne_df
#define L_gt_df
#define L_ge_df
#define L_lt_df
#define L_le_df
#define L_unord_df
#define L_negate_df
#elif ARC_DP_DEBUG
#define L_fpcmp_parts_df
#define L_eq_df
#define __eqdf2 __eqdf2_c
#define L_gt_df
#define __gtdf2 __gtdf2_c
#define L_ge_df
#define __gedf2 __gedf2_c
#define L_unord_df
#define __unorddf2 __unorddf2_c
#endif
/* Copyright (C) 2007-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* This file selects the single-precision parts of fp-bit.c that are
still needed for some ARC hardware variants; it also renames functions
that duplicate asm-coded functionality so that their results can be
used to compare with the optimized versions for debugging. */
#define ARC_FP_DEBUG 1
#define FINE_GRAINED_LIBRARIES
#if !defined (__ARC_NORM__) || ARC_FP_DEBUG
#define L_pack_sf
#define L_unpack_sf
#define L_make_sf
#define L_thenan_sf
#endif
#ifndef __ARC_NORM__
#define L_addsub_sf
#define L_mul_sf
#define L_div_sf
#define L_sf_to_df
#define L_si_to_sf
#define L_sf_to_si
#define L_usi_to_sf
#elif ARC_FP_DEBUG
#define L_addsub_sf
#define __addsf3 __addsf3_c
#define __subsf3 __subsf3_c
#define L_mul_sf
#define __mulsf3 __mulsf3_c
#define L_div_sf
#define __divsf3 __divsf3_c
#define L_sf_to_df
#define __extendsfdf2 __extendsfdf2_c
#define L_si_to_sf
#define __floatsisf __floatsisf_c
#define L_sf_to_si
#define __fixsfsi __fixsfsi_c
#define L_usi_to_sf
#define __floatunsisf __floatunsisf_c
#endif
#ifndef __ARC_NORM__
#define L_fpcmp_parts_sf
#define L_compare_sf
#define L_eq_sf
#define L_ne_sf
#define L_gt_sf
#define L_ge_sf
#define L_lt_sf
#define L_le_sf
#define L_unord_sf
#define L_negate_sf
#elif ARC_FP_DEBUG
#define L_fpcmp_parts_sf
#define L_eq_sf
#define __eqsf2 __eqsf2_c
#define L_gt_sf
#define __gtsf2 __gtsf2_c
#define L_ge_sf
#define __gesf2 __gesf2_c
#define L_unord_sf
#define __unordsf2 __unordsf2_c
#endif
/* Copyright (C) 2007-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* File deliberately left blank. */
/* Copyright (C) 2007-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#define LP_START 0x02
#define LP_END 0x03
#define IDENTITY 0x04
#define STATUS32 0x0a
#define COUNT0 0x21 /* Timer 0 count */
#define CONTROL0 0x22 /* Timer 0 control */
#define LIMIT0 0x23 /* Timer 0 limit */
#define INT_VECTOR_BASE 0x25
#define D_CACHE_BUILD 0x72
#define DC_FLDL 0x4c
/* This file contains code to do profiling.
Copyright (C) 2007-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "../asm.h"
#include "auxreg.h"
/* This file contains code to do profiling. */
.weak __profile_timer_cycles
.global __profile_timer_cycles
.set __profile_timer_cycles, 200
.text
; For Arctangent-A5, if no data cache is present, a read of the
; cache build register returns the ID register. For ARC600 and
; later, the version field will be zero.
.global __dcache_linesz
.balign 4
__dcache_linesz:
lr r12,[D_CACHE_BUILD]
extb_s r0,r12
breq_s r0,0,.Lsz_nocache
brge r0,0x20,.Lsz_havecache
lr r0,[IDENTITY]
breq r12,r0,.Lsz_nocache
.Lsz_havecache:
lsr_s r12,r12,16
mov_s r0,16
bmsk_s r12,r12,3
asl_s r0,r0,r12
j_s [blink]
.Lsz_nocache:
mov_s r0,1
j_s [blink]
/* Copyright (C) 2007-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#ifndef MACHINE_GMON_H
#define MACHINE_GMON_H
/* We can't fake out own <sys/types.h> header because the newlib / uclibc
headers in GCC_FOR_TARGET take precedence. */
#define __BEGIN_DECLS
#define __END_DECLS
#define __THROW
extern int __dcache_linesz (void);
#define _MCOUNT_DECL(countp, selfpc) \
static inline void _mcount_internal (void *countp, u_long selfpc)
extern void _mcount (void);
extern void _mcount_call (void);
/* N.B.: the calling point might be a sibcall, thus blink does not necessarily
hold the caller's address. r8 doesn't hold the caller's address, either,
but rather a pointer to the counter data structure associated with the
caller.
This function must be compiled with optimization turned on in order to
enable a sibcall for the final call to selfpc; this is important when trying
to profile a program with deep tail-recursion that would get a stack
overflow otherwise. */
#define MCOUNT \
void \
_mcount_call (void) \
{ \
register void *countp __asm("r8"); \
register u_long selfpc __asm("r9"); \
_mcount_internal (countp, selfpc); \
((void (*)(void)) selfpc) (); \
}
extern int __profil (u_short *,size_t, size_t, u_int);
#endif /* MACHINE_GMON_H */
/*-
* Copyright (c) 1983, 1992, 1993
* The Regents of the University of California. All rights reserved.
*
* Copyright (C) 2007-2012 Free Software Foundation, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#if !defined(lint) && !defined(KERNEL) && defined(LIBC_SCCS)
static char sccsid[] = "@(#)mcount.c 8.1 (Berkeley) 6/4/93";
#endif
#if 0
#include <unistd.h>
#include <sys/param.h>
#endif
#include <sys/gmon.h>
/* This file provides the machine-dependent definitions of the _MCOUNT_DECL
and MCOUNT macros. */
#include <machine-gmon.h>
#include <atomic.h>
/*
* mcount is called on entry to each function compiled with the profiling
* switch set. _mcount(), which is declared in a machine-dependent way
* with _MCOUNT_DECL, does the actual work and is either inlined into a
* C routine or called by an assembly stub. In any case, this magic is
* taken care of by the MCOUNT definition in <machine/profile.h>.
*
* _mcount updates data structures that represent traversals of the
* program's call graph edges. frompc and selfpc are the return
* address and function address that represents the given call graph edge.
*
* Note: the original BSD code used the same variable (frompcindex) for
* both frompcindex and frompc. Any reasonable, modern compiler will
* perform this optimization.
*/
_MCOUNT_DECL(count_ptr, selfpc) /* _mcount; may be static, inline, etc */
{
register ARCINDEX *frompcindex;
register struct tostruct *top, *prevtop;
register struct gmonparam *p;
register ARCINDEX toindex;
/* Check for nested function trampoline. */
if (selfpc & 2)
selfpc = *(u_long *) (selfpc + 10);
p = &_gmonparam;
/*
* check that we are profiling
* and that we aren't recursively invoked.
*/
#if 0
if (catomic_compare_and_exchange_bool_acq (&p->state, GMON_PROF_BUSY,
GMON_PROF_ON))
return;
#elif defined (__ARC700__)
/* ??? This could temporrarily loose the ERROR / OFF condition in a race,
but doing an actual compare_and_exchange would be too costly. It would
be better if we had a semaphore independent of the 'sticky' state, but
then we could run into ABI compatibility problems with the size of struct
gmonparam. */
{
u_long old_state;
__asm ("ex %0,%1": "=r" (old_state), "+m" (p->state)
: "0" (GMON_PROF_BUSY));
if (old_state != GMON_PROF_ON)
{
switch (old_state)
{
case GMON_PROF_OFF:
__asm ("ex %0,%1": "+r" (old_state), "+m" (p->state));
if (old_state == GMON_PROF_BUSY
/* Switching off while we say we are busy while profiling
was actually already switched off is all right. */
|| old_state == GMON_PROF_OFF)
break;
/* It is not clear if we should allow switching on
profiling at this point, and how to handle further races.
For now, record an error in this case. */
/* Fall through. */
default: /* We expect here only GMON_PROF_ERROR. */
p->state = GMON_PROF_ERROR;
break;
case GMON_PROF_BUSY: break;
}
return;
}
}
#else /* ??? No semaphore primitives available. */
if (p->state != GMON_PROF_ON)
return;
p->state = GMON_PROF_BUSY;
#endif
frompcindex = count_ptr;
toindex = *frompcindex;
if (toindex == 0) {
/*
* first time traversing this arc
*/
toindex = ++p->tos[0].link;
if (toindex >= (ARCINDEX) p->tolimit)
/* halt further profiling */
goto overflow;
*frompcindex = toindex;
top = &p->tos[toindex];
top->selfpc = selfpc;
top->count = 1;
top->link = 0;
goto done;
}
top = &p->tos[toindex];
if (top->selfpc == selfpc) {
/*
* arc at front of chain; usual case.
*/
top->count++;
goto done;
}
/*
* have to go looking down chain for it.
* top points to what we are looking at,
* prevtop points to previous top.
* we know it is not at the head of the chain.
*/
for (; /* goto done */; ) {
if (top->link == 0) {
/*
* top is end of the chain and none of the chain
* had top->selfpc == selfpc.
* so we allocate a new tostruct
* and link it to the head of the chain.
*/
toindex = ++p->tos[0].link;
if (toindex >= (ARCINDEX) p->tolimit)
goto overflow;
top = &p->tos[toindex];
top->selfpc = selfpc;
top->count = 1;
top->link = *frompcindex;
*frompcindex = toindex;
goto done;
}
/*
* otherwise, check the next arc on the chain.
*/
prevtop = top;
top = &p->tos[top->link];
if (top->selfpc == selfpc) {
/*
* there it is.
* increment its count
* move it to the head of the chain.
*/
top->count++;
toindex = prevtop->link;
prevtop->link = top->link;
top->link = *frompcindex;
*frompcindex = toindex;
goto done;
}
}
done:
p->state = GMON_PROF_ON;
return;
overflow:
p->state = GMON_PROF_ERROR;
return;
}
/*
* Actual definition of mcount function. Defined in <machine/profile.h>,
* which is included by <sys/gmon.h>.
*/
MCOUNT
/* This file contains code to do profiling.
Copyright (C) 2007-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "../asm.h"
/* This file contains code to do profiling. */
.weak __profile_frequency_value
.global __profile_frequency_value
.set __profile_frequency_value, 1000
.text
.balign 4
.global __profile_frequency
FUNC(__profile_frequency)
__profile_frequency:
mov_s r0,__profile_frequency_value
j_s [blink]
ENDFUNC(__profile_frequency)
/* Return frequency of ticks reported by profil. Generic version. */
/*-
* Copyright (c) 1983, 1992, 1993
* The Regents of the University of California. All rights reserved.
*
* Copyright (C) 2007-2012 Free Software Foundation, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/types.h>
#include <sys/time.h>
#if 0
#include <libc-internal.h>
#else
#include "sys/gmon.h"
#endif
int
__profile_frequency (void)
{
/*
* Discover the tick frequency of the machine if something goes wrong,
* we return 0, an impossible hertz.
*/
struct itimerval tim;
tim.it_interval.tv_sec = 0;
tim.it_interval.tv_usec = 1;
tim.it_value.tv_sec = 0;
tim.it_value.tv_usec = 0;
setitimer(ITIMER_REAL, &tim, 0);
setitimer(ITIMER_REAL, 0, &tim);
if (tim.it_interval.tv_usec < 2)
return 0;
return (1000000 / tim.it_interval.tv_usec);
}
/* This file contains code to do profiling.
Copyright (C) 2007-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "../asm.h"
#include "auxreg.h"
/* This file contains code to do profiling. */
.weak __profile_timer_cycles
.global __profile_timer_cycles
.set __profile_timer_cycles, 200
.section .bss
.global __profil_offset
.align 4
.type __profil_offset, @object
.size __profil_offset, 4
__profil_offset:
.zero 4
.text
.global __dcache_linesz
.global __profil
FUNC(__profil)
.Lstop_profiling:
sr r0,[CONTROL0]
j_s [blink]
.balign 4
__profil:
.Lprofil:
breq_s r0,0,.Lstop_profiling
; r0: buf r1: bufsiz r2: offset r3: scale
bxor.f r3,r3,15; scale must be 0x8000, i.e. 1/2; generate 0.
push_s blink
lsr_s r2,r2,1
mov_s r8,r0
flag.ne 1 ; halt if wrong scale
sub_s r0,r0,r2
st r0,[__profil_offset]
bl __dcache_linesz
pop_s blink
bbit1.d r0,0,nocache
mov_s r0,r8
#ifdef __ARC700__
add_s r1,r1,31
lsr.f lp_count,r1,5
lpne 2f
sr r0,[DC_FLDL]
add_s r0,r0,32
#else /* !__ARC700__ */
# FIX ME: set up loop according to cache line size
lr r12,[D_CACHE_BUILD]
sub_s r0,r0,16
sub_s r1,r1,1
lsr_s r12,r12,16
asr_s r1,r1,4
bmsk_s r12,r12,3
asr_s r1,r1,r12
add.f lp_count,r1,1
mov_s r1,16
asl_s r1,r1,r12
lpne 2f
add r0,r0,r1
sr r0,[DC_FLDL]
#endif /* __ARC700__ */
2: b_s .Lcounters_cleared
nocache:
.Lcounters_cleared:
lr r1,[INT_VECTOR_BASE] ; disable timer0 interrupts
sr r3,[CONTROL0]
sr r3,[COUNT0]
0: ld_s r0,[pcl,1f-0b+((0b-.Lprofil) & 2)] ; 1f@GOTOFF
0: ld_s r12,[pcl,1f+4-0b+((0b-.Lprofil) & 2)] ; 1f@GOTOFF + 4
st_s r0,[r1,24]; timer0 uses vector3
st_s r12,[r1,24+4]; timer0 uses vector3
;sr 10000,[LIMIT0]
sr __profile_timer_cycles,[LIMIT0]
mov_s r12,3 ; enable timer interrupts; count only when not halted.
sr r12,[CONTROL0]
lr r12,[STATUS32]
bset_s r12,r12,1 ; allow level 1 interrupts
flag r12
mov_s r0,0
j_s [blink]
.balign 4
1: j __profil_irq
ENDFUNC(__profil)
FUNC(__profil_irq)
.balign 4 ; make final jump unaligned to avoid delay penalty
.balign 32,0,12 ; make sure the code spans no more that two cache lines
nop_s
__profil_irq:
push_s r0
ld r0,[__profil_offset]
push_s r1
lsr r1,ilink1,2
push_s r2
ldw.as.di r2,[r0,r1]
add1 r0,r0,r1
ld_s r1,[sp,4]
add_s r2,r2,1
bbit1 r2,16,nostore
stw.di r2,[r0]
nostore:ld.ab r2,[sp,8]
pop_s r0
j.f [ilink1]
ENDFUNC(__profil_irq)
; could save one cycle if the counters were allocated at link time and
; the contents of __profil_offset were pre-computed at link time, like this:
#if 0
; __profil_offset needs to be PROVIDEd as __profile_base-text/4
.global __profil_offset
.balign 4
__profil_irq:
push_s r0
lsr r0,ilink1,2
add1 r0,__profil_offset,r0
push_s r1
ldw.di r1,[r0]
add_s r1,r1,1
bbit1 r1,16,nostore
stw.di r1,[r0]
nostore:pop_s r1
pop_s r0
j [ilink1]
#endif /* 0 */
/*-
* Copyright (c) 1982, 1986, 1992, 1993
* The Regents of the University of California. All rights reserved.
* Copyright (C) 2007-2012 Free Software Foundation, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)gmon.h 8.2 (Berkeley) 1/4/94
*/
#ifndef _SYS_GMON_H
#define _SYS_GMON_H 1
#if 0
#include <features.h>
#include <sys/types.h>
#else
#include <sys/types.h>
#include "machine-gmon.h"
#define attribute_hidden __attribute__ ((visibility("hidden")))
#endif
#include <stdint.h>
/*
* See gmon_out.h for gmon.out format.
*/
/* structure emitted by "gcc -a". This must match struct bb in
gcc/libgcc2.c. It is OK for gcc to declare a longer structure as
long as the members below are present. */
struct __bb
{
long zero_word;
const char *filename;
long *counts;
long ncounts;
struct __bb *next;
const unsigned long *addresses;
};
extern struct __bb *__bb_head;
/*
* histogram counters are unsigned shorts (according to the kernel).
*/
#define HISTCOUNTER unsigned short
/*
* fraction of text space to allocate for histogram counters here, 1/2
*/
#define HISTFRACTION 2
/*
* Fraction of text space to allocate for from hash buckets.
* The value of HASHFRACTION is based on the minimum number of bytes
* of separation between two subroutine call points in the object code.
* Given MIN_SUBR_SEPARATION bytes of separation the value of
* HASHFRACTION is calculated as:
*
* HASHFRACTION = MIN_SUBR_SEPARATION / (2 * sizeof(short) - 1);
*
* For example, on the VAX, the shortest two call sequence is:
*
* calls $0,(r0)
* calls $0,(r0)
*
* which is separated by only three bytes, thus HASHFRACTION is
* calculated as:
*
* HASHFRACTION = 3 / (2 * 2 - 1) = 1
*
* Note that the division above rounds down, thus if MIN_SUBR_FRACTION
* is less than three, this algorithm will not work!
*
* In practice, however, call instructions are rarely at a minimal
* distance. Hence, we will define HASHFRACTION to be 2 across all
* architectures. This saves a reasonable amount of space for
* profiling data structures without (in practice) sacrificing
* any granularity.
*/
#define HASHFRACTION 2
/*
* Percent of text space to allocate for tostructs.
* This is a heuristic; we will fail with a warning when profiling programs
* with a very large number of very small functions, but that's
* normally OK.
* 2 is probably still a good value for normal programs.
* Profiling a test case with 64000 small functions will work if
* you raise this value to 3 and link statically (which bloats the
* text size, thus raising the number of arcs expected by the heuristic).
*/
#define ARCDENSITY 3
/*
* Always allocate at least this many tostructs. This
* hides the inadequacy of the ARCDENSITY heuristic, at least
* for small programs.
*/
#define MINARCS 50
/*
* The type used to represent indices into gmonparam.tos[].
*/
#define ARCINDEX u_long
/*
* Maximum number of arcs we want to allow.
* Used to be max representable value of ARCINDEX minus 2, but now
* that ARCINDEX is a long, that's too large; we don't really want
* to allow a 48 gigabyte table.
* The old value of 1<<16 wasn't high enough in practice for large C++
* programs; will 1<<20 be adequate for long? FIXME
*/
#define MAXARCS (1 << 20)
struct tostruct {
u_long selfpc;
long count;
ARCINDEX link;
};
/*
* a raw arc, with pointers to the calling site and
* the called site and a count.
*/
struct rawarc {
u_long raw_frompc;
u_long raw_selfpc;
long raw_count;
};
/*
* general rounding functions.
*/
#define ROUNDDOWN(x,y) (((x)/(y))*(y))
#define ROUNDUP(x,y) ((((x)+(y)-1)/(y))*(y))
/*
* The profiling data structures are housed in this structure.
*/
struct gmonparam {
long int state;
u_short *kcount;
u_long kcountsize;
ARCINDEX *froms;
u_long fromssize;
struct tostruct *tos;
u_long tossize;
long tolimit;
u_long lowpc;
u_long highpc;
u_long textsize;
u_long hashfraction;
long log_hashfraction;
};
extern struct gmonparam _gmonparam;
/*
* Possible states of profiling.
*/
#define GMON_PROF_ON 0
#define GMON_PROF_BUSY 1
#define GMON_PROF_ERROR 2
#define GMON_PROF_OFF 3
/*
* Sysctl definitions for extracting profiling information from the kernel.
*/
#define GPROF_STATE 0 /* int: profiling enabling variable */
#define GPROF_COUNT 1 /* struct: profile tick count buffer */
#define GPROF_FROMS 2 /* struct: from location hash bucket */
#define GPROF_TOS 3 /* struct: destination/count structure */
#define GPROF_GMONPARAM 4 /* struct: profiling parameters (see above) */
__BEGIN_DECLS
/* Set up data structures and start profiling. */
extern void __monstartup (u_long __lowpc, u_long __highpc) __THROW;
extern void monstartup (u_long __lowpc, u_long __highpc) __THROW;
/* Clean up profiling and write out gmon.out. */
extern void _mcleanup (void) __THROW;
extern void __write_profiling (void);
extern int attribute_hidden __profile_frequency (void);
extern u_long __arc_profile_desc_secstart[], __arc_profile_desc_secend[];
extern u_long __arc_profile_forward_secstart[], __arc_profile_forward_secend[];
extern u_long __arc_profile_counters_secstart[];
__END_DECLS
#endif /* sys/gmon.h */
/* Copyright (C) 2007-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#define GMON_TAG_TIME_HIST 0
#define GMON_TAG_CG_ARC 1
#define GMON_TAG_BB_COUNT 2
#define GMON_MAGIC "gmon"
#define GMON_VERSION 1
struct arc_gmon_hist_hdr
{
char low_pc[4];
char high_pc[4];
char hist_size[4];
char prof_rate[4];
char dimen[15];
char dimen_abbrev;
};
struct gmon_cg_arc_record
{
char afrompc[4];
char selfpc[4];
char count[4];
};
struct gmon_hdr
{
char cookie[4];
char version[4];
char c[12];
};
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "arc-ieee-754.h"
#if 0 /* DEBUG */
.global __addsf3
FUNC(__addsf3)
.balign 4
__addsf3:
push_s blink
push_s r1
bl.d __addsf3_c
push_s r0
ld_s r1,[sp,4]
st_s r0,[sp,4]
bl.d __addsf3_asm
pop_s r0
pop_s r1
pop_s blink
cmp r0,r1
jeq_s [blink]
bl abort
ENDFUNC(__addsf3)
.global __subsf3
FUNC(__subsf3)
.balign 4
__subsf3:
push_s blink
push_s r1
bl.d __subsf3_c
push_s r0
ld_s r1,[sp,4]
st_s r0,[sp,4]
bl.d __subsf3_asm
pop_s r0
pop_s r1
pop_s blink
cmp r0,r1
jeq_s [blink]
bl abort
ENDFUNC(__subsf3)
#define __addsf3 __addsf3_asm
#define __subsf3 __subsf3_asm
#endif /* DEBUG */
/* N.B. This is optimized for ARC700.
ARC600 has very different scheduling / instruction selection criteria. */
/* inputs: r0, r1
output: r0
clobber: r1-r10, r12, flags */
.balign 4
.global __addsf3
.global __subsf3
FUNC(__addsf3)
FUNC(__subsf3)
.long 0x7f800000 ; exponent mask
__subsf3:
bxor_l r1,r1,31
__addsf3:
ld r9,[pcl,-8]
bmsk r4,r0,30
xor r10,r0,r1
and r6,r1,r9
sub.f r12,r4,r6
asr_s r12,r12,23
blo .Ldbl1_gt
brhs r4,r9,.Linf_nan
brne r12,0,.Lsmall_shift
brge r10,0,.Ladd_same_exp ; r12 == 0
/* After subtracting, we need to normalize; when shifting to place the
leading 1 into position for the implicit 1 and adding that to DBL0,
we increment the exponent. Thus, we have to subtract one more than
the shift count from the exponent beforehand. Iff the exponent drops thus
below zero (before adding in the fraction with the leading one), we have
generated a denormal number. Denormal handling is basicallly reducing the
shift count so that we produce a zero exponent instead; FWIW, this way
the shift count can become zero (if we started out with exponent 1).
On the plus side, we don't need to check for denorm input, the result
of subtracing these looks just the same as denormals generated during
subtraction. */
bmsk r7,r1,30
breq r4,r7,.Lret0
sub.f r5,r4,r7
lsr r12,r4,23
neg.cs r5,r5
norm r3,r5
bmsk r2,r0,22
sub_s r3,r3,6
min r12,r12,r3
bic r1,r0,r2
sub_s r3,r12,1
asl_s r12,r12,23
asl r2,r5,r3
sub_s r1,r1,r12
add_s r0,r1,r2
j_s.d [blink]
bxor.cs r0,r0,31
.balign 4
.Linf_nan:
; If both inputs are inf, but with different signs, the result is NaN.
asr r12,r10,31
or_s r1,r1,r12
j_s.d [blink]
or.eq r0,r0,r1
.balign 4
.Ladd_same_exp:
/* This is a special case because we can't test for need to shift
down by checking if bit 23 of DBL0 changes. OTOH, here we know
that we always need to shift down. */
; adding the two floating point numbers together makes the sign
; cancel out and apear as carry; the exponent is doubled, and the
; fraction also in need of shifting left by one. The two implicit
; ones of the sources make an implicit 1 of the result, again
; non-existent in a place shifted by one.
add.f r0,r0,r1
btst_s r0,1
breq r6,0,.Ldenorm_add
add.ne r0,r0,1 ; round to even.
rrc r0,r0
bmsk r1,r9,23
add r0,r0,r1 ; increment exponent
bic.f 0,r9,r0; check for overflow -> infinity.
jne_l [blink]
mov_s r0,r9
j_s.d [blink]
bset.cs r0,r0,31
.Ldenorm_add:
j_s.d [blink]
add r0,r4,r1
.Lret_dbl0:
j_s [blink]
.balign 4
.Lsmall_shift:
brhi r12,25,.Lret_dbl0
breq.d r6,0,.Ldenorm_small_shift
bmsk_s r1,r1,22
bset_s r1,r1,23
.Lfixed_denorm_small_shift:
neg r8,r12
asl r5,r1,r8
brge.d r10,0,.Ladd
lsr_l r1,r1,r12
/* subtract, abs(DBL0) > abs(DBL1) */
/* DBL0: original values
DBL1: fraction with explicit leading 1, shifted into place
r4: orig. DBL0 & 0x7fffffff
r6: orig. DBL1 & 0x7f800000
r9: 0x7f800000
r10: orig. DBL0H ^ DBL1H
r5 : guard bits */
.balign 4
.Lsub:
neg.f r12,r5
bmsk r3,r0,22
bset r5,r3,23
sbc.f r4,r5,r1
beq.d .Large_cancel_sub
bic r7,r0,r3
norm r3,r4
bmsk r6,r7,30
.Lsub_done:
sub_s r3,r3,6
breq r3,1,.Lsub_done_noshift
asl r5,r3,23
sub_l r3,r3,1
brlo r6,r5,.Ldenorm_sub
sub r0,r7,r5
neg_s r1,r3
lsr.f r2,r12,r1
asl_s r12,r12,r3
btst_s r2,0
bmsk.eq.f r12,r12,30
asl r5,r4,r3
add_s r0,r0,r2
adc.ne r0,r0,0
j_s.d [blink]
add_l r0,r0,r5
.Lret0:
j_s.d [blink]
mov_l r0,0
.balign 4
.Ldenorm_small_shift:
brne.d r12,1,.Lfixed_denorm_small_shift
sub_s r12,r12,1
brlt.d r10,0,.Lsub
mov_s r5,r12 ; zero r5, and align following code
.Ladd: ; Both bit 23 of DBL1 and bit 0 of r5 are clear.
bmsk r2,r0,22
add_s r2,r2,r1
bbit0.d r2,23,.Lno_shiftdown
add_s r0,r0,r1
bic.f 0,r9,r0; check for overflow -> infinity; eq : infinity
bmsk r1,r2,22
lsr.ne.f r2,r2,2; cc: even ; hi: might round down
lsr.ne r1,r1,1
rcmp.hi r5,1; hi : round down
bclr.hi r0,r0,0
j_l.d [blink]
sub_s r0,r0,r1
/* r4: DBL0H & 0x7fffffff
r6: DBL1H & 0x7f800000
r9: 0x7f800000
r10: sign difference
r12: shift count (negative) */
.balign 4
.Ldbl1_gt:
brhs r6,r9,.Lret_dbl1 ; inf or NaN
neg r8,r12
brhi r8,25,.Lret_dbl1
.Lsmall_shift_dbl0:
breq.d r6,0,.Ldenorm_small_shift_dbl0
bmsk_s r0,r0,22
bset_s r0,r0,23
.Lfixed_denorm_small_shift_dbl0:
asl r5,r0,r12
brge.d r10,0,.Ladd_dbl1_gt
lsr r0,r0,r8
/* subtract, abs(DBL0) < abs(DBL1) */
/* DBL0: fraction with explicit leading 1, shifted into place
DBL1: original value
r6: orig. DBL1 & 0x7f800000
r9: 0x7f800000
r5: guard bits */
.balign 4
.Lrsub:
neg.f r12,r5
bmsk r5,r1,22
bic r7,r1,r5
bset r5,r5,23
sbc.f r4,r5,r0
bne.d .Lsub_done ; note: r6 is already set up.
norm r3,r4
/* Fall through */
/* r4:r12 : unnormalized result fraction
r7: result sign and exponent */
/* When seeing large cancellation, only the topmost guard bit might be set. */
.balign 4
.Large_cancel_sub:
breq_s r12,0,.Lret0
sub r0,r7,24<<23
xor.f 0,r0,r7 ; test if exponent is negative
tst.pl r9,r0 ; test if exponent is zero
jpnz [blink] ; return if non-denormal result
bmsk r6,r7,30
lsr r3,r6,23
xor r0,r6,r7
sub_s r3,r3,24-22
j_s.d [blink]
bset r0,r0,r3
; If a denorm is produced, we have an exact result -
; no need for rounding.
.balign 4
.Ldenorm_sub:
sub r3,r6,1
lsr.f r3,r3,23
xor r0,r6,r7
neg_s r1,r3
asl.ne r4,r4,r3
lsr_s r12,r12,r1
add_s r0,r0,r4
j_s.d [blink]
add.ne r0,r0,r12
.balign 4
.Lsub_done_noshift:
add.f 0,r12,r12
btst.eq r4,0
bclr r4,r4,23
add r0,r7,r4
j_s.d [blink]
adc.ne r0,r0,0
.balign 4
.Lno_shiftdown:
add.f 0,r5,r5
btst.eq r0,0
cmp.eq r5,r5
j_s.d [blink]
add.cs r0,r0,1
.Lret_dbl1:
j_s.d [blink]
mov_l r0,r1
.balign 4
.Ldenorm_small_shift_dbl0:
sub.f r8,r8,1
bne.d .Lfixed_denorm_small_shift_dbl0
add_s r12,r12,1
brlt.d r10,0,.Lrsub
mov r5,0
.Ladd_dbl1_gt: ; both bit 23 of DBL0 and bit 0 of r5 are clear.
bmsk r2,r1,22
add_s r2,r2,r0
bbit0.d r2,23,.Lno_shiftdown_dbl1_gt
add_s r0,r1,r0
bic.f 0,r9,r0; check for overflow -> infinity; eq : infinity
bmsk r1,r2,22
lsr.ne.f r2,r2,2; cc: even ; hi: might round down
lsr.ne r1,r1,1
rcmp.hi r5,1; hi : round down
bclr.hi r0,r0,0
j_l.d [blink]
sub_s r0,r0,r1
.balign 4
.Lno_shiftdown_dbl1_gt:
add.f 0,r5,r5
btst.eq r0,0
cmp.eq r5,r5
j_s.d [blink]
add.cs r0,r0,1
ENDFUNC(__addsf3)
ENDFUNC(__subsf3)
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#ifdef __LITTLE_ENDIAN__
#define DBL0L r0
#define DBL0H r1
#define DBL1L r2
#define DBL1H r3
#else
#define DBL0L r1
#define DBL0H r0
#define DBL1L r3
#define DBL1H r2
#endif
#define add_l add
#define asr_l asr
#define j_l j
#define jne_l jne
#define jeq_l jeq
#define or_l or
#define mov_l mov
#define b_l b
#define beq_l beq
#define bne_l bne
#define brne_l brne
#define bset_l bset
#define sub_l sub
#define sub1_l sub1
#define lsr_l lsr
#define xor_l xor
#define bic_l bic
#define bmsk_l bmsk
#define bxor_l bxor
#define bcs_s blo_s
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/*
- calculate 15..18 bit inverse using a table of approximating polynoms.
precision is higher for polynoms used to evaluate input with larger
value.
- do one newton-raphson iteration step to double the precision,
then multiply this with the divisor
-> more time to decide if dividend is subnormal
- the worst error propagation is on the side of the value range
with the least initial defect, thus giving us about 30 bits precision.
*/
#include "../arc-ieee-754.h"
#define mlo acc2
#define mhi acc1
#define mul64(b,c) mullw 0,b,c` machlw 0,b,c
#define mulu64(b,c) mululw 0,b,c` machulw 0,b,c
#if 0 /* DEBUG */
.global __divsf3
FUNC(__divsf3)
.balign 4
__divsf3:
push_s blink
push_s r1
bl.d __divsf3_c
push_s r0
ld_s r1,[sp,4]
st_s r0,[sp,4]
bl.d __divsf3_asm
pop_s r0
pop_s r1
pop_s blink
cmp r0,r1
#if 1
bne abort
jeq_s [blink]
b abort
#else
bne abort
j_s [blink]
#endif
ENDFUNC(__divsf3)
#define __divsf3 __divsf3_asm
#endif /* DEBUG */
FUNC(__divsf3)
.balign 4
.Ldivtab:
.long 0xfc0ffff0
.long 0xf46ffefd
.long 0xed1ffd2a
.long 0xe627fa8e
.long 0xdf7ff73b
.long 0xd917f33b
.long 0xd2f7eea3
.long 0xcd1fe986
.long 0xc77fe3e7
.long 0xc21fdddb
.long 0xbcefd760
.long 0xb7f7d08c
.long 0xb32fc960
.long 0xae97c1ea
.long 0xaa27ba26
.long 0xa5e7b22e
.long 0xa1cfa9fe
.long 0x9ddfa1a0
.long 0x9a0f990c
.long 0x9667905d
.long 0x92df878a
.long 0x8f6f7e84
.long 0x8c27757e
.long 0x88f76c54
.long 0x85df630c
.long 0x82e759c5
.long 0x8007506d
.long 0x7d3f470a
.long 0x7a8f3da2
.long 0x77ef341e
.long 0x756f2abe
.long 0x72f7212d
.long 0x709717ad
.long 0x6e4f0e44
.long 0x6c1704d6
.long 0x69e6fb44
.long 0x67cef1d7
.long 0x65c6e872
.long 0x63cedf18
.long 0x61e6d5cd
.long 0x6006cc6d
.long 0x5e36c323
.long 0x5c76b9f3
.long 0x5abeb0b7
.long 0x5916a79b
.long 0x57769e77
.long 0x55de954d
.long 0x54568c4e
.long 0x52d6834d
.long 0x51667a7f
.long 0x4ffe71b5
.long 0x4e9e68f1
.long 0x4d466035
.long 0x4bf65784
.long 0x4aae4ede
.long 0x496e4646
.long 0x48363dbd
.long 0x47063547
.long 0x45de2ce5
.long 0x44be2498
.long 0x43a61c64
.long 0x4296144a
.long 0x41860c0e
.long 0x407e03ee
.L7f800000:
.long 0x7f800000
.balign 4
.global __divsf3_support
__divsf3_support:
.Linf_NaN:
bclr.f 0,r0,31 ; 0/0 -> NaN
xor_s r0,r0,r1
bmsk r1,r0,30
bic_s r0,r0,r1
sub.eq r0,r0,1
j_s.d [blink]
or r0,r0,r9
.Lret0:
xor_s r0,r0,r1
bmsk r1,r0,30
j_s.d [blink]
bic_s r0,r0,r1
/* N.B. the spacing between divtab and the sub3 to get its address must
be a multiple of 8. */
__divsf3:
ld.as r9,[pcl,-9]; [pcl,(-((.-.L7f800000) >> 2))] ; 0x7f800000
sub3 r3,pcl,37;(.-.Ldivtab) >> 3
lsr r2,r1,17
and.f r11,r1,r9
bmsk r5,r2,5
beq.d .Ldenorm_fp1
asl r6,r1,8
and.f r2,r0,r9
ld.as r5,[r3,r5]
asl r4,r1,9
bset r6,r6,31
breq.d r11,r9,.Linf_nan_fp1
.Lpast_denorm_fp1:
mululw 0,r5,r4
machulw r8,r5,r4
breq.d r2,r9,.Linf_nan_fp0
asl r5,r5,13
sub r7,r5,r8
mululw 0,r7,r6
machulw r8,r7,r6
beq.d .Ldenorm_fp0
asl r12,r0,8
mulu64 (r8,r7)
bset r3,r12,31
.Lpast_denorm_fp0:
cmp_s r3,r6
lsr.cc r3,r3,1
add_s r2,r2, /* wait for immediate */ \
0x3f000000
sub r7,r7,mhi ; u1.31 inverse, about 30 bit
mulu64 (r3,r7)
sbc r2,r2,r11
xor.f 0,r0,r1
and r0,r2,r9
bclr r3,r9,23 ; 0x7f000000
brhs.d r2,r3,.Linf_denorm
bxor.mi r0,r0,31
.Lpast_denorm:
add r3,mhi,0x22 ; round to nearest or higher
tst r3,0x3c ; check if rounding was unsafe
lsr r3,r3,6
jne.d [blink] ; return if rounding was safe.
add_s r0,r0,r3
/* work out exact rounding if we fall through here. */
/* We know that the exact result cannot be represented in single
precision. Find the mid-point between the two nearest
representable values, multiply with the divisor, and check if
the result is larger than the dividend. */
add_s r3,r3,r3
sub_s r3,r3,1
mulu64 (r3,r6)
asr.f 0,r0,1 ; for round-to-even in case this is a denorm
rsub r2,r9,25
asl_s r12,r12,r2
sub.f 0,r12,mlo
j_s.d [blink]
sub.mi r0,r0,1
.Linf_nan_fp1:
lsr_s r0,r0,31
bmsk.f 0,r1,22
asl_s r0,r0,31
bne_s 0f ; inf/inf -> nan
brne r2,r9,.Lsigned0 ; x/inf -> 0, but x/nan -> nan
0: j_s.d [blink]
mov r0,-1
.Lsigned0:
.Linf_nan_fp0:
tst_s r1,r1
j_s.d [blink]
bxor.mi r0,r0,31
.balign 4
.global __divsf3
/* For denormal results, it is possible that an exact result needs
rounding, and thus the round-to-even rule has to come into play. */
.Linf_denorm:
brlo r2,0xc0000000,.Linf
.Ldenorm:
asr_s r2,r2,23
bic r0,r0,r9
neg r9,r2
brlo.d r9,25,.Lpast_denorm
lsr r3,mlo,r9
/* Fall through: return +- 0 */
j_s [blink]
.Linf:
j_s.d [blink]
or r0,r0,r9
.balign 4
.Ldenorm_fp1:
norm.f r12,r6 ; flag for x/0 -> Inf check
add r6,r6,r6
rsub r5,r12,16
ror r5,r1,r5
bmsk r5,r5,5
bic.ne.f 0, \
0x60000000,r0 ; large number / denorm -> Inf
ld.as r5,[r3,r5]
asl r6,r6,r12
beq.d .Linf_NaN
and.f r2,r0,r9
add r4,r6,r6
asl_s r12,r12,23
bne.d .Lpast_denorm_fp1
add_s r2,r2,r12
.Ldenorm_fp0:
mulu64 (r8,r7)
bclr r12,r12,31
norm.f r3,r12 ; flag for 0/x -> 0 check
bic.ne.f 0,0x60000000,r1 ; denorm/large number -> 0
beq_s .Lret0
asl_s r12,r12,r3
asl_s r3,r3,23
add_s r12,r12,r12
add r11,r11,r3
b.d .Lpast_denorm_fp0
mov_s r3,r12
ENDFUNC(__divsf3)
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "../arc-ieee-754.h"
#if 0 /* DEBUG */
.global __muldf3
.balign 4
__muldf3:
push_s blink
push_s r2
push_s r3
push_s r0
bl.d __muldf3_c
push_s r1
ld_s r2,[sp,12]
ld_s r3,[sp,8]
st_s r0,[sp,12]
st_s r1,[sp,8]
pop_s r1
bl.d __muldf3_asm
pop_s r0
pop_s r3
pop_s r2
pop_s blink
cmp r0,r2
cmp.eq r1,r3
jeq_s [blink]
b abort
#define __muldf3 __muldf3_asm
#endif /* DEBUG */
__muldf3_support: /* This label makes debugger output saner. */
.balign 4
FUNC(__muldf3)
.Ldenorm_2:
breq.d DBL1L,0,.Lret0_2 ; 0 input -> 0 output
norm.f r12,DBL1L
mov.mi r12,21
add.pl r12,r12,22
neg r11,r12
asl_s r12,r12,20
lsr.f DBL1H,DBL1L,r11
ror DBL1L,DBL1L,r11
sub_s DBL0H,DBL0H,r12
mov.eq DBL1H,DBL1L
sub_l DBL1L,DBL1L,DBL1H
/* Fall through. */
.global __muldf3
.balign 4
__muldf3:
mululw 0,DBL0L,DBL1L
machulw r4,DBL0L,DBL1L
ld.as r9,[pcl,0x67] ; ((.L7ff00000-.+2)/4)]
bmsk r6,DBL0H,19
bset r6,r6,20
mov r8,acc2
mululw 0,r4,1
and r11,DBL0H,r9
breq.d r11,0,.Ldenorm_dbl0
and r12,DBL1H,r9
breq.d r12,0,.Ldenorm_dbl1
maclw 0,r6,DBL1L
machulw 0,r6,DBL1L
breq.d r11,r9,.Linf_nan
bmsk r10,DBL1H,19
breq.d r12,r9,.Linf_nan
bset r10,r10,20
maclw 0,r10,DBL0L
machulw r5,r10,DBL0L
add_s r12,r12,r11 ; add exponents
mov r4,acc2
mululw 0,r5,1
maclw 0,r6,r10
machulw r7,r6,r10 ; fraction product in r7:acc2:r4:r8
tst r8,r8
bclr r8,r9,30 ; 0x3ff00000
bset.ne r4,r4,0 ; put least significant word into sticky bit
bclr r6,r9,20 ; 0x7fe00000
lsr.f r10,r7,9
rsub.eq r8,r8,r9 ; 0x40000000
sub r12,r12,r8 ; subtract bias + implicit 1
brhs.d r12,r6,.Linf_denorm
rsub r10,r10,12
.Lshift_frac:
neg r8,r10
asl r6,r4,r10
lsr DBL0L,r4,r8
add.f 0,r6,r6
btst.eq DBL0L,0
cmp.eq r4,r4 ; round to nearest / round to even
asl r4,acc2,r10
lsr r5,acc2,r8
adc.f DBL0L,DBL0L,r4
xor.f 0,DBL0H,DBL1H
asl r7,r7,r10
add_s r12,r12,r5
adc DBL0H,r12,r7
j_s.d [blink]
bset.mi DBL0H,DBL0H,31
/* N.B. This is optimized for ARC700.
ARC600 has very different scheduling / instruction selection criteria. */
/* If one number is denormal, subtract some from the exponent of the other
one (if the other exponent is too small, return 0), and normalize the
denormal. Then re-run the computation. */
.Lret0_2:
lsr_s DBL0H,DBL0H,31
asl_s DBL0H,DBL0H,31
j_s.d [blink]
mov_s DBL0L,0
.balign 4
.Ldenorm_dbl0:
mov_s r12,DBL0L
mov_s DBL0L,DBL1L
mov_s DBL1L,r12
mov_s r12,DBL0H
mov_s DBL0H,DBL1H
mov_s DBL1H,r12
and r11,DBL0H,r9
.Ldenorm_dbl1:
brhs r11,r9,.Linf_nan
brhs 0x3ca00001,r11,.Lret0
sub_s DBL0H,DBL0H,DBL1H
bmsk.f DBL1H,DBL1H,30
add_s DBL0H,DBL0H,DBL1H
beq.d .Ldenorm_2
norm r12,DBL1H
sub_s r12,r12,10
asl r5,r12,20
asl_s DBL1H,DBL1H,r12
sub DBL0H,DBL0H,r5
neg r5,r12
lsr r6,DBL1L,r5
asl_s DBL1L,DBL1L,r12
b.d __muldf3
add_s DBL1H,DBL1H,r6
.Lret0: xor_s DBL0H,DBL0H,DBL1H
bclr DBL1H,DBL0H,31
xor_s DBL0H,DBL0H,DBL1H
j_s.d [blink]
mov_s DBL0L,0
.balign 4
.Linf_nan:
bclr r12,DBL1H,31
xor_s DBL1H,DBL1H,DBL0H
bclr_s DBL0H,DBL0H,31
max r8,DBL0H,r12 ; either NaN -> NaN ; otherwise inf
or.f 0,DBL0H,DBL0L
mov_s DBL0L,0
or.ne.f DBL1L,DBL1L,r12
not_s DBL0H,DBL0L ; inf * 0 -> NaN
mov.ne DBL0H,r8
tst_s DBL1H,DBL1H
j_s.d [blink]
bset.mi DBL0H,DBL0H,31
/* We have checked for infinitey / NaN input before, and transformed
denormalized inputs into normalized inputs. Thus, the worst case
exponent overflows are:
1 + 1 - 0x400 == 0xc02 : maximum underflow
0x7fe + 0x7fe - 0x3ff == 0xbfd ; maximum overflow
N.B. 0x7e and 0x7f are also values for overflow.
If (r12 <= -54), we have an underflow to zero. */
.balign 4
.Linf_denorm:
lsr r6,r12,28
brlo.d r6,0xc,.Linf
asr r6,r12,20
add.f r10,r10,r6
brgt.d r10,0,.Lshift_frac
mov_s r12,0
beq.d .Lround_frac
add r10,r10,32
.Lshift32_frac:
tst r4,r4
mov r4,acc2
bset.ne r4,r4,1
mululw 0,r7,1
brge.d r10,1,.Lshift_frac
mov r7,0
breq.d r10,0,.Lround_frac
add r10,r10,32
brgt r10,21,.Lshift32_frac
b_s .Lret0
.Lround_frac:
add.f 0,r4,r4
btst.eq acc2,0
mov_s DBL0L,acc2
mov_s DBL0H,r7
adc.eq.f DBL0L,DBL0L,0
j_s.d [blink]
adc.eq DBL0H,DBL0H,0
.Linf: mov_s DBL0L,0
xor.f DBL1H,DBL1H,DBL0H
mov_s DBL0H,r9
j_s.d [blink]
bset.mi DBL0H,DBL0H,31
ENDFUNC(__muldf3)
.balign 4
.L7ff00000:
.long 0x7ff00000
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "../arc-ieee-754.h"
#if 0 /* DEBUG */
.global __mulsf3
FUNC(__mulsf3)
.balign 4
__mulsf3:
push_s blink
push_s r1
bl.d __mulsf3_c
push_s r0
ld_s r1,[sp,4]
st_s r0,[sp,4]
bl.d __mulsf3_asm
pop_s r0
pop_s r1
pop_s blink
cmp r0,r1
jeq_s [blink]
and r12,r0,r1
bic.f 0,0x7f800000,r12
bne 0f
bmsk.f 0,r0,22
bmsk.ne.f r1,r1,22
jne_s [blink] ; both NaN -> OK
0: bl abort
ENDFUNC(__mulsf3)
#define __mulsf3 __mulsf3_asm
#endif /* DEBUG */
.balign 4
.global __mulsf3
FUNC(__mulsf3)
__mulsf3:
ld.as r9,[pcl,80]; [pcl,((.L7f800000-.+2)/4)]
bmsk r4,r1,22
bset r2,r0,23
asl_s r2,r2,8
bset r3,r4,23
and r11,r0,r9
breq.d r11,0,.Ldenorm_dbl0
and r12,r1,r9
breq.d r12,0,.Ldenorm_dbl1
xor_s r0,r0,r1
mululw 0,r2,r3
machulw r6,r2,r3
breq.d r11,r9,.Linf_nan_dbl0
ld.as r4,[pcl,69]; [pcl,((.L7fffffff-.+2)/4)]
breq.d r12,r9,.Linf_nan_dbl1
.Lpast_denorm:
asl.f 0,r6,8
mov r7,acc2
add.pl r6,r6,r6
bclr.pl r6,r6,23
add.pl.f r7,r7,r7
add.cs r6,r6,1
lsr.f 0,r6,1
add_s r12,r12,r11
adc.f 0,r7,r4
add_s r12,r12, \
-0x3f800000
adc.f r8,r6,r12
tst.pl r8,r9
bic r0,r0,r4
min r3,r8,r9
jpnz.d [blink]
add.pnz r0,r0,r3
; infinity or denormal number
add.ne.f r3,r3,r3
asr_s r3,r3,23+1
bset r6,r6,23
bpnz.d .Linfinity
sub_s r3,r3,1
neg_s r2,r3
brhi.d r2,24,.Lret_r0 ; right shift shift > 24 -> return +-0
lsr r2,r6,r2
asl r9,r6,r3
lsr.f 0,r2,1
tst r7,r7
add_s r0,r0,r2
bset.ne r9,r9,0
adc.f 0,r9,r4
j_s.d [blink]
add.cs r0,r0,1
.Linfinity:
j_s.d [blink]
add_s r0,r0,r9
.Lret_r0: j_s [blink]
.balign 4
.Ldenorm_dbl0:
bclr_s r2,r2,31
norm.f r4,r2
add_s r2,r2,r2
asl r2,r2,r4
breq.d r12,r9,.Ldenorm_dbl0_inf_nan_dbl1
asl r4,r4,23
mululw 0,r2,r3
machulw r6,r2,r3
sub.ne.f r12,r12,r4
ld.as r4,[pcl,28]; [pcl,((.L7fffffff-.+2)/4)]
bhi.d .Lpast_denorm
xor_s r0,r0,r1
bmsk r1,r0,30
j_s.d [blink]
bic_s r0,r0,r1
.balign 4
.Ldenorm_dbl0_inf_nan_dbl1:
bmsk.f 0,r0,30
mov.eq r1,-1
.Linf_nan_dbl1:
xor_s r1,r1,r0
.Linf_nan_dbl0:
bclr_s r1,r1,31
j_s.d [blink]
xor_s r0,r0,r1
.balign 4
.Ldenorm_dbl1:
breq.d r11,r9,.Linf_nan_dbl0_2
norm.f r3,r4
sub_s r3,r3,7
asl r4,r4,r3
mululw 0,r2,r4
machulw r6,r2,r4
sub_s r3,r3,1
asl_s r3,r3,23
sub.ne.f r11,r11,r3
ld.as r4,[pcl,11]; [pcl,((.L7fffffff-.+2)/4)]
bhi.d .Lpast_denorm
bmsk r8,r0,30
j_s.d [blink]
bic r0,r0,r8
.balign 4
.Linf_nan_dbl0_2:
bclr_s r1,r1,31
xor_s r0,r0,r1
sub.eq r1,r1,1 ; inf/nan * 0 -> nan
bic.f 0,r9,r1
j_s.d [blink]
or.eq r0,r0,r1 ; r1 nan -> result nan
.balign 4
.L7f800000:
.long 0x7f800000
.L7fffffff:
.long 0x7fffffff
ENDFUNC(__mulsf3)
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/*
- calculate 15..18 bit inverse using a table of approximating polynoms.
precision is higher for polynoms used to evaluate input with larger
value.
- do one newton-raphson iteration step to double the precision,
then multiply this with the divisor
-> more time to decide if dividend is subnormal
- the worst error propagation is on the side of the value range
with the least initial defect, thus giving us about 30 bits precision.
*/
#include "../arc-ieee-754.h"
#if 0 /* DEBUG */
.global __divsf3
FUNC(__divsf3)
.balign 4
__divsf3:
push_s blink
push_s r1
bl.d __divsf3_c
push_s r0
ld_s r1,[sp,4]
st_s r0,[sp,4]
bl.d __divsf3_asm
pop_s r0
pop_s r1
pop_s blink
cmp r0,r1
#if 1
bne abort
jeq_s [blink]
b abort
#else
bne abort
j_s [blink]
#endif
ENDFUNC(__divsf3)
#define __divsf3 __divsf3_asm
#endif /* DEBUG */
FUNC(__divsf3)
.balign 4
.Ldivtab:
.long 0xfc0ffff0
.long 0xf46ffefd
.long 0xed1ffd2a
.long 0xe627fa8e
.long 0xdf7ff73b
.long 0xd917f33b
.long 0xd2f7eea3
.long 0xcd1fe986
.long 0xc77fe3e7
.long 0xc21fdddb
.long 0xbcefd760
.long 0xb7f7d08c
.long 0xb32fc960
.long 0xae97c1ea
.long 0xaa27ba26
.long 0xa5e7b22e
.long 0xa1cfa9fe
.long 0x9ddfa1a0
.long 0x9a0f990c
.long 0x9667905d
.long 0x92df878a
.long 0x8f6f7e84
.long 0x8c27757e
.long 0x88f76c54
.long 0x85df630c
.long 0x82e759c5
.long 0x8007506d
.long 0x7d3f470a
.long 0x7a8f3da2
.long 0x77ef341e
.long 0x756f2abe
.long 0x72f7212d
.long 0x709717ad
.long 0x6e4f0e44
.long 0x6c1704d6
.long 0x69e6fb44
.long 0x67cef1d7
.long 0x65c6e872
.long 0x63cedf18
.long 0x61e6d5cd
.long 0x6006cc6d
.long 0x5e36c323
.long 0x5c76b9f3
.long 0x5abeb0b7
.long 0x5916a79b
.long 0x57769e77
.long 0x55de954d
.long 0x54568c4e
.long 0x52d6834d
.long 0x51667a7f
.long 0x4ffe71b5
.long 0x4e9e68f1
.long 0x4d466035
.long 0x4bf65784
.long 0x4aae4ede
.long 0x496e4646
.long 0x48363dbd
.long 0x47063547
.long 0x45de2ce5
.long 0x44be2498
.long 0x43a61c64
.long 0x4296144a
.long 0x41860c0e
.long 0x407e03ee
.L7f800000:
.long 0x7f800000
.balign 4
.global __divsf3_support
__divsf3_support:
.Linf_NaN:
bclr.f 0,r0,31 ; 0/0 -> NaN
xor_s r0,r0,r1
bmsk r1,r0,30
bic_s r0,r0,r1
sub.eq r0,r0,1
j_s.d [blink]
or r0,r0,r9
.Lret0:
xor_s r0,r0,r1
bmsk r1,r0,30
j_s.d [blink]
bic_s r0,r0,r1
/* N.B. the spacing between divtab and the sub3 to get its address must
be a multiple of 8. */
__divsf3:
lsr r2,r1,17
sub3 r3,pcl,37 ; (.-.Ldivtab) >> 3
bmsk_s r2,r2,5
ld.as r5,[r3,r2]
asl r4,r1,9
ld.as r9,[pcl,-13]; [pcl,(-((.-.L7f800000) >> 2))] ; 0x7f800000
mulu64 r5,r4
and.f r11,r1,r9
asl r6,r1,8
bset r6,r6,31
beq.d .Ldenorm_fp1
asl r5,r5,13
breq.d r11,r9,.Linf_nan_fp1
and.f r2,r0,r9
sub r7,r5,mhi
mulu64 r7,r6
beq.d .Ldenorm_fp0
asl r12,r0,8
breq.d r2,r9,.Linf_nan_fp0
mulu64 mhi,r7
.Lpast_denorm_fp1:
bset r3,r12,31
.Lpast_denorm_fp0:
cmp_s r3,r6
lsr.cc r3,r3,1
add_s r2,r2, /* wait for immediate */ \
0x3f000000
sub r7,r7,mhi ; u1.31 inverse, about 30 bit
mulu64 r3,r7
sbc r2,r2,r11
xor.f 0,r0,r1
and r0,r2,r9
bclr r3,r9,23 ; 0x7f000000
brhs.d r2,r3,.Linf_denorm
bxor.mi r0,r0,31
.Lpast_denorm:
add r3,mhi,0x22 ; round to nearest or higher
tst r3,0x3c ; check if rounding was unsafe
lsr r3,r3,6
jne.d [blink] ; return if rounding was safe.
add_s r0,r0,r3
/* work out exact rounding if we fall through here. */
/* We know that the exact result cannot be represented in single
precision. Find the mid-point between the two nearest
representable values, multiply with the divisor, and check if
the result is larger than the dividend. */
add_s r3,r3,r3
sub_s r3,r3,1
mulu64 r3,r6
asr.f 0,r0,1 ; for round-to-even in case this is a denorm
rsub r2,r9,25
asl_s r12,r12,r2
sub.f 0,r12,mlo
j_s.d [blink]
sub.mi r0,r0,1
.Linf_nan_fp1:
lsr_s r0,r0,31
bmsk.f 0,r1,22
asl_s r0,r0,31
bne_s 0f ; inf/inf -> nan
brne r2,r9,.Lsigned0 ; x/inf -> 0, but x/nan -> nan
0: j_s.d [blink]
mov r0,-1
.Lsigned0:
.Linf_nan_fp0:
tst_s r1,r1
j_s.d [blink]
bxor.mi r0,r0,31
.balign 4
.global __divsf3
/* For denormal results, it is possible that an exact result needs
rounding, and thus the round-to-even rule has to come into play. */
.Linf_denorm:
brlo r2,0xc0000000,.Linf
.Ldenorm:
asr_s r2,r2,23
bic r0,r0,r9
neg r9,r2
brlo.d r9,25,.Lpast_denorm
lsr r3,mlo,r9
/* Fall through: return +- 0 */
j_s [blink]
.Linf:
j_s.d [blink]
or r0,r0,r9
.balign 4
.Ldenorm_fp1:
bclr r6,r6,31
norm.f r12,r6 ; flag for x/0 -> Inf check
add r6,r6,r6
rsub r5,r12,16
ror r5,r1,r5
asl r6,r6,r12
bmsk r5,r5,5
ld.as r5,[r3,r5]
add r4,r6,r6
; load latency
mulu64 r5,r4
bic.ne.f 0, \
0x60000000,r0 ; large number / denorm -> Inf
asl r5,r5,13
sub r7,r5,mhi
beq.d .Linf_NaN
mulu64 r7,r6
asl_s r12,r12,23
and.f r2,r0,r9
add_s r2,r2,r12
asl r12,r0,8
bne.d .Lpast_denorm_fp1
.Ldenorm_fp0: mulu64 mhi,r7
bclr r12,r12,31
norm.f r3,r12 ; flag for 0/x -> 0 check
bic.ne.f 0,0x60000000,r1 ; denorm/large number -> 0
beq_s .Lret0
asl_s r12,r12,r3
asl_s r3,r3,23
add_s r12,r12,r12
add r11,r11,r3
b.d .Lpast_denorm_fp0
mov_s r3,r12
ENDFUNC(__divsf3)
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "../arc-ieee-754.h"
#if 0 /* DEBUG */
.global __muldf3
.balign 4
__muldf3:
push_s blink
push_s r2
push_s r3
push_s r0
bl.d __muldf3_c
push_s r1
ld_s r2,[sp,12]
ld_s r3,[sp,8]
st_s r0,[sp,12]
st_s r1,[sp,8]
pop_s r1
bl.d __muldf3_asm
pop_s r0
pop_s r3
pop_s r2
pop_s blink
cmp r0,r2
cmp.eq r1,r3
jeq_s [blink]
and r12,DBL0H,DBL1H
bic.f 0,0x7ff80000,r12 ; both NaN -> OK
jeq_s [blink]
b abort
#define __muldf3 __muldf3_asm
#endif /* DEBUG */
__muldf3_support: /* This label makes debugger output saner. */
.balign 4
FUNC(__muldf3)
.Ldenorm_2:
breq.d DBL1L,0,.Lret0_2 ; 0 input -> 0 output
norm.f r12,DBL1L
mov.mi r12,21
add.pl r12,r12,22
neg r11,r12
asl_s r12,r12,20
lsr.f DBL1H,DBL1L,r11
ror DBL1L,DBL1L,r11
sub_s DBL0H,DBL0H,r12
mov.eq DBL1H,DBL1L
sub_l DBL1L,DBL1L,DBL1H
/* Fall through. */
.global __muldf3
.balign 4
__muldf3:
mulu64 DBL0L,DBL1L
ld.as r9,[pcl,0x68] ; ((.L7ff00000-.+2)/4)]
bmsk r6,DBL0H,19
bset r6,r6,20
and r11,DBL0H,r9
breq.d r11,0,.Ldenorm_dbl0
and r12,DBL1H,r9
breq.d r12,0,.Ldenorm_dbl1
mov r8,mlo
mov r4,mhi
mulu64 r6,DBL1L
breq.d r11,r9,.Linf_nan
bmsk r10,DBL1H,19
breq.d r12,r9,.Linf_nan
bset r10,r10,20
add.f r4,r4,mlo
adc r5,mhi,0
mulu64 r10,DBL0L
add_s r12,r12,r11 ; add exponents
add.f r4,r4,mlo
adc r5,r5,mhi
mulu64 r6,r10
tst r8,r8
bclr r8,r9,30 ; 0x3ff00000
bset.ne r4,r4,0 ; put least significant word into sticky bit
bclr r6,r9,20 ; 0x7fe00000
add.f r5,r5,mlo
adc r7,mhi,0 ; fraction product in r7:r5:r4
lsr.f r10,r7,9
rsub.eq r8,r8,r9 ; 0x40000000
sub r12,r12,r8 ; subtract bias + implicit 1
brhs.d r12,r6,.Linf_denorm
rsub r10,r10,12
.Lshift_frac:
neg r8,r10
asl r6,r4,r10
lsr DBL0L,r4,r8
add.f 0,r6,r6
btst.eq DBL0L,0
cmp.eq r4,r4 ; round to nearest / round to even
asl r4,r5,r10
lsr r5,r5,r8
adc.f DBL0L,DBL0L,r4
xor.f 0,DBL0H,DBL1H
asl r7,r7,r10
add_s r12,r12,r5
adc DBL0H,r12,r7
j_s.d [blink]
bset.mi DBL0H,DBL0H,31
/* N.B. This is optimized for ARC700.
ARC600 has very different scheduling / instruction selection criteria. */
/* If one number is denormal, subtract some from the exponent of the other
one (if the other exponent is too small, return 0), and normalize the
denormal. Then re-run the computation. */
.Lret0_2:
lsr_s DBL0H,DBL0H,31
asl_s DBL0H,DBL0H,31
j_s.d [blink]
mov_s DBL0L,0
.balign 4
.Ldenorm_dbl0:
mov_s r12,DBL0L
mov_s DBL0L,DBL1L
mov_s DBL1L,r12
mov_s r12,DBL0H
mov_s DBL0H,DBL1H
mov_s DBL1H,r12
and r11,DBL0H,r9
.Ldenorm_dbl1:
brhs r11,r9,.Linf_nan
brhs 0x3ca00001,r11,.Lret0
sub_s DBL0H,DBL0H,DBL1H
bmsk.f DBL1H,DBL1H,30
add_s DBL0H,DBL0H,DBL1H
beq.d .Ldenorm_2
norm r12,DBL1H
sub_s r12,r12,10
asl r5,r12,20
asl_s DBL1H,DBL1H,r12
sub DBL0H,DBL0H,r5
neg r5,r12
lsr r6,DBL1L,r5
asl_s DBL1L,DBL1L,r12
b.d __muldf3
add_s DBL1H,DBL1H,r6
.Lret0: xor_s DBL0H,DBL0H,DBL1H
bclr DBL1H,DBL0H,31
xor_s DBL0H,DBL0H,DBL1H
j_s.d [blink]
mov_s DBL0L,0
.balign 4
.Linf_nan:
bclr r12,DBL1H,31
xor_s DBL1H,DBL1H,DBL0H
bclr_s DBL0H,DBL0H,31
max r8,DBL0H,r12 ; either NaN -> NaN ; otherwise inf
or.f 0,DBL0H,DBL0L
mov_s DBL0L,0
or.ne.f DBL1L,DBL1L,r12
not_s DBL0H,DBL0L ; inf * 0 -> NaN
mov.ne DBL0H,r8
tst_s DBL1H,DBL1H
j_s.d [blink]
bset.mi DBL0H,DBL0H,31
/* We have checked for infinitey / NaN input before, and transformed
denormalized inputs into normalized inputs. Thus, the worst case
exponent overflows are:
1 + 1 - 0x400 == 0xc02 : maximum underflow
0x7fe + 0x7fe - 0x3ff == 0xbfd ; maximum overflow
N.B. 0x7e and 0x7f are also values for overflow.
If (r12 <= -54), we have an underflow to zero. */
.balign 4
.Linf_denorm:
lsr r6,r12,28
brlo.d r6,0xc,.Linf
asr r6,r12,20
add.f r10,r10,r6
brgt.d r10,0,.Lshift_frac
mov_s r12,0
beq.d .Lround_frac
add r10,r10,32
.Lshift32_frac:
tst r4,r4
mov r4,r5
bset.ne r4,r4,1
mov r5,r7
brge.d r10,1,.Lshift_frac
mov r7,0
breq.d r10,0,.Lround_frac
add r10,r10,32
brgt r10,21,.Lshift32_frac
b_s .Lret0
.Lround_frac:
add.f 0,r4,r4
btst.eq r5,0
mov_s DBL0L,r5
mov_s DBL0H,r7
adc.eq.f DBL0L,DBL0L,0
j_s.d [blink]
adc.eq DBL0H,DBL0H,0
.Linf: mov_s DBL0L,0
xor.f DBL1H,DBL1H,DBL0H
mov_s DBL0H,r9
j_s.d [blink]
bset.mi DBL0H,DBL0H,31
ENDFUNC(__muldf3)
.balign 4
.L7ff00000:
.long 0x7ff00000
/* Copyright (C) 2008-2013 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "../arc-ieee-754.h"
#if 0 /* DEBUG */
.global __mulsf3
FUNC(__mulsf3)
.balign 4
__mulsf3:
push_s blink
push_s r1
bl.d __mulsf3_c
push_s r0
ld_s r1,[sp,4]
st_s r0,[sp,4]
bl.d __mulsf3_asm
pop_s r0
pop_s r1
pop_s blink
cmp r0,r1
jeq_s [blink]
and r12,r0,r1
bic.f 0,0x7f800000,r12
bne 0f
bmsk.f 0,r0,22
bmsk.ne.f r1,r1,22
jne_s [blink] ; both NaN -> OK
0: bl abort
ENDFUNC(__mulsf3)
#define __mulsf3 __mulsf3_asm
#endif /* DEBUG */
.balign 4
.global __mulsf3
FUNC(__mulsf3)
__mulsf3:
ld.as r9,[pcl,80]; [pcl,((.L7f800000-.+2)/4)]
bmsk r4,r1,22
bset r2,r0,23
asl_s r2,r2,8
bset r3,r4,23
mulu64 r2,r3
and r11,r0,r9
breq.d r11,0,.Ldenorm_dbl0
and r12,r1,r9
breq.d r12,0,.Ldenorm_dbl1
xor_s r0,r0,r1
breq.d r11,r9,.Linf_nan_dbl0
ld.as r4,[pcl,70]; [pcl,((.L7fffffff-.+2)/4)]
breq.d r12,r9,.Linf_nan_dbl1
.Lpast_denorm:
asl.f 0,mhi,8
mov r6,mhi
mov r7,mlo
add.pl r6,r6,r6
bclr.pl r6,r6,23
add.pl.f r7,r7,r7
add.cs r6,r6,1
lsr.f 0,r6,1
add_s r12,r12,r11
adc.f 0,r7,r4
add_s r12,r12, \
-0x3f800000
adc.f r8,r6,r12
tst.pl r8,r9
bic r0,r0,r4
min r3,r8,r9
jpnz.d [blink]
add.pnz r0,r0,r3
; infinity or denormal number
add.ne.f r3,r3,r3
asr_s r3,r3,23+1
bset r6,r6,23
bpnz.d .Linfinity
sub_s r3,r3,1
neg_s r2,r3
brhi.d r2,24,.Lret_r0 ; right shift shift > 24 -> return +-0
lsr r2,r6,r2
asl r9,r6,r3
lsr.f 0,r2,1
tst r7,r7
add_s r0,r0,r2
bset.ne r9,r9,0
adc.f 0,r9,r4
j_s.d [blink]
add.cs r0,r0,1
.Linfinity:
j_s.d [blink]
add_s r0,r0,r9
.Lret_r0: j_s [blink]
.balign 4
.Ldenorm_dbl0:
bclr_s r2,r2,31
norm.f r4,r2
add_s r2,r2,r2
asl r2,r2,r4
mulu64 r2,r3
breq.d r12,r9,.Ldenorm_dbl0_inf_nan_dbl1
asl r4,r4,23
sub.ne.f r12,r12,r4
ld.as r4,[pcl,29]; [pcl,((.L7fffffff-.+2)/4)]
bhi.d .Lpast_denorm
xor_s r0,r0,r1
bmsk r1,r0,30
j_s.d [blink]
bic_s r0,r0,r1
.balign 4
.Ldenorm_dbl0_inf_nan_dbl1:
bmsk.f 0,r0,30
beq_s .Lretnan
xor_s r0,r0,r1
.Linf_nan_dbl1:
xor_s r1,r1,r0
.Linf_nan_dbl0:
bclr_s r1,r1,31
cmp_s r1,r9
jls.d [blink]
xor_s r0,r0,r1
; r1 NaN -> result NaN
.Lretnan:
j_s.d [blink]
mov r0,-1
.balign 4
.Ldenorm_dbl1:
breq.d r11,r9,.Linf_nan_dbl0_2
norm.f r3,r4
sub_s r3,r3,7
asl r4,r4,r3
mulu64 r2,r4
sub_s r3,r3,1
asl_s r3,r3,23
sub.ne.f r11,r11,r3
ld.as r4,[pcl,11]; [pcl,((.L7fffffff-.+2)/4)]
bhi.d .Lpast_denorm
bmsk r8,r0,30
j_s.d [blink]
bic r0,r0,r8
.balign 4
.Linf_nan_dbl0_2:
bclr_s r1,r1,31
xor_s r0,r0,r1
sub.eq r1,r1,1 ; inf/nan * 0 -> nan
bic.f 0,r9,r1
j_s.d [blink]
or.eq r0,r0,r1 ; r1 nan -> result nan
.balign 4
.L7f800000:
.long 0x7f800000
.L7fffffff:
.long 0x7fffffff
ENDFUNC(__mulsf3)
/* Copyright (C) 2008-2013 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "../arc-ieee-754.h"
#if 0 /* DEBUG */
.global __divsf3
FUNC(__divsf3)
.balign 4
__divsf3:
push_s blink
push_s r1
bl.d __divsf3_c
push_s r0
ld_s r1,[sp,4]
st_s r0,[sp,4]
bl.d __divsf3_asm
pop_s r0
pop_s r1
pop_s blink
cmp r0,r1
jeq_s [blink]
and r12,r0,r1
bic.f 0,0x7f800000,r12 ; both NaN -> OK
jeq_s [blink]
bl abort
ENDFUNC(__divsf3)
#define __divsf3 __divsf3_asm
#endif /* DEBUG */
.balign 4
__divdf3_support: /* This label makes debugger output saner. */
FUNC(__divsf3)
.Ldenorm_fp0:
norm.f r12,r2 ; flag for 0/x -> 0 check
bic.ne.f 0,0x60000000,r1 ; denorm/large number -> 0
beq_s .Lret0_NaN
tst r1,r9
add_s r2,r2,r2
sub_s r12,r12,8
asl_s r2,r2,r12
asl_l r12,r12,23
bne.d .Lpast_denorm_fp0
add r5,r5,r12
/* r0 is subnormal, r1 is subnormal or 0. */
.balign 4
.Ldenorm_fp1:
norm.f r12,r3 ; flag for x/0 -> Inf check
bic.ne.f 0,0x60000000,r0 ; large number/denorm -> Inf
beq_s .Linf
add_s r3,r3,r3
sub_s r12,r12,8
asl_s r3,r3,r12
asl_s r12,r12,23
b.d .Lpast_denorm_fp1
add r4,r4,r12
.Lret0_NaN:
bclr.f 0,r1,31 ; 0/0 -> NaN
bic r0,r10,r9
j_s.d [blink]
sub.eq r0,r0,1
.balign 4
.Linf_nan_fp0:
bic.f 0,r9,r1 ; fp1 Inf -> result NaN
bic r1,r5,r9 ; fp1 sign
sub.eq r1,r1,1
j_s.d [blink]
xor_s r0,r0,r1
.Linf_nan_fp1:
bic r0,r4,r9 ; fp0 sign
bmsk.f 0,r1,22 ; x/inf -> 0, x/nan -> nan
xor.eq r1,r1,r9
j_s.d [blink]
xor_s r0,r0,r1
.global __divsf3
.balign 4
.long 0x7f800000 ; exponent mask
__divsf3:
ld r9,[pcl,-4]
bmsk r2,r0,22
xor r4,r0,r2
bmsk r3,r1,22
xor r5,r1,r3
and r11,r0,r9
breq.d r11,0,.Ldenorm_fp0
xor r10,r4,r5
breq r11,r9,.Linf_nan_fp0
bset_s r2,r2,23
and r11,r1,r9
breq r11,0,.Ldenorm_fp1
breq r11,r9,.Linf_nan_fp1
.Lpast_denorm_fp0:
bset_s r3,r3,23
.Lpast_denorm_fp1:
cmp r2,r3
asl_s r2,r2,6+1
asl_s r3,r3,7
add.lo r2,r2,r2
bclr r8,r9,30 ; exponent bias
bclr.lo r8,r8,23 ; reduce exp by one if fraction is shifted
sub r4,r4,r5
add r4,r4,r8
xor.f 0,r10,r4
bmi .Linf_denorm
and.f r12,r4,r9
beq .Ldenorm
sub_s r2,r2,r3 ; discard implicit 1
rsub r3,r3,1 ; prime r3 for two-insn divide-step use
.Ldiv_23bit:
.rep 6
add1.f r2,r3,r2
sub.cc r2,r2,r3
.endr
breq r12,r9,.Linf
bmsk r0,r2,6
xor_s r2,r2,r0
.Ldiv_17bit:
.rep 7
add1.f r2,r3,r2
sub.cc r2,r2,r3
.endr
asl_s r0,r0,7
bmsk r1,r2,6
xor_s r2,r2,r1
or_s r0,r0,r1
.Ldiv_10bit:
.rep 7
add1.f r2,r3,r2
sub.cc r2,r2,r3
.endr
asl_s r0,r0,7
bmsk r1,r2,6
xor_s r2,r2,r1
or_s r0,r0,r1
.Ldiv_3bit:
.rep 3
add1.f r2,r3,r2
sub.cc r2,r2,r3
.endr
asl_s r0,r0,3
.Ldiv_0bit:
add1.f r1,r3,r2
sub.cc r1,r1,r3
bmsk_s r2,r2,2
tst r1,-0x7e ; 0xffffff82, test for rest or odd
bmsk_s r1,r1,0
add_s r0,r0,r2 ; assemble fraction
add_s r0,r0,r4 ; add in sign & exponent
j_s.d [blink]
add.ne r0,r0,r1 ; round to nearest / even
.balign 4
.Linf:
j_s.d [blink]
or r0,r10,r9
.Lret_r4:
j_s.d [blink]
mov_s r0,r4
.balign 4
.Linf_denorm:
add.f r12,r4,r4
asr_l r12,r12,24
bpl .Linf
max r12,r12,-24
.Ldenorm:
rsub r3,r3,1
add r1,pcl,68; .Ldenorm_tab-.
ldw.as r12,[r1,r12]
mov_s r0,0
lsr_s r2,r2
sub_s r1,r1,r12
j_s.d [r1]
bic r4,r10,r9
.short .Ldenorm_tab-.Lret_r4
.short .Ldenorm_tab-.Ldiv_0bit
.short .Ldenorm_tab-.Ldiv_3bit-2*8
.short .Ldenorm_tab-.Ldiv_3bit-1*8
.short .Ldenorm_tab-.Ldiv_3bit
.short .Ldenorm_tab-.Ldiv_10bit-6*8
.short .Ldenorm_tab-.Ldiv_10bit-5*8
.short .Ldenorm_tab-.Ldiv_10bit-3*8
.short .Ldenorm_tab-.Ldiv_10bit-3*8
.short .Ldenorm_tab-.Ldiv_10bit-2*8
.short .Ldenorm_tab-.Ldiv_10bit-1*8
.short .Ldenorm_tab-.Ldiv_10bit
.short .Ldenorm_tab-.Ldiv_17bit-6*8
.short .Ldenorm_tab-.Ldiv_17bit-5*8
.short .Ldenorm_tab-.Ldiv_17bit-4*8
.short .Ldenorm_tab-.Ldiv_17bit-3*8
.short .Ldenorm_tab-.Ldiv_17bit-2*8
.short .Ldenorm_tab-.Ldiv_17bit-1*8
.short .Ldenorm_tab-.Ldiv_17bit
.short .Ldenorm_tab-.Ldiv_23bit-5*8
.short .Ldenorm_tab-.Ldiv_23bit-4*8
.short .Ldenorm_tab-.Ldiv_23bit-3*8
.short .Ldenorm_tab-.Ldiv_23bit-2*8
.short .Ldenorm_tab-.Ldiv_23bit-1*8
.Ldenorm_tab:
.short .Ldenorm_tab-.Ldiv_23bit
ENDFUNC(__divsf3)
/* Copyright (C) 2008-2013 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "../arc-ieee-754.h"
#if 0 /* DEBUG */
.global __mulsf3
FUNC(__mulsf3)
.balign 4
__mulsf3:
push_s blink
push_s r1
bl.d __mulsf3_c
push_s r0
ld_s r1,[sp,4]
st_s r0,[sp,4]
bl.d __mulsf3_asm
pop_s r0
pop_s r1
pop_s blink
cmp r0,r1
jeq_s [blink]
and r12,r0,r1
bic.f 0,0x7f800000,r12
bne 0f
bmsk.f 0,r0,22
bmsk.ne.f r1,r1,22
jne_s [blink] ; both NaN -> OK
0: bl abort
ENDFUNC(__mulsf3)
#define __mulsf3 __mulsf3_asm
#endif /* DEBUG */
.balign 4
.global __mulsf3
FUNC(__mulsf3)
__mulsf3:
ld.as r9,[pcl,76]; [pcl,((.L7f800000-.+2)/4)]
bmsk r4,r1,22
bset r3,r4,23
bmsk r2,r0,22
and r11,r0,r9
breq.d r11,0,.Ldenorm_dbl0
and r12,r1,r9
xor_s r0,r0,r1
breq.d r11,r9,.Linf_nan_dbl0
bset_s r2,r2,23
breq r12,0,.Ldenorm_dbl1
breq r12,r9,.Linf_nan_dbl1
.Lpast_denorm:
mov r6,0
lsr.f r7,r2
; We could so this a bit faster here with a 32 bit shift register and
; inserting the r2 factor / retrieving the low result a byte at a time,
; but that'd increase code size.
mov lp_count,24
.balign 4
lp 0f
add.cs r6,r6,r3
lsr.f r6,r6
rrc.f r7,r7
0:
ld.as r4,[pcl,59]; [pcl,((.L7fffffff-.+2)/4)]
asl.f 0,r6,8
add.pl r6,r6,r6
bclr.pl r6,r6,23
add.pl.f r7,r7,r7
add.cs r6,r6,1
lsr.f 0,r6,1
add_s r12,r12,r11
adc.f 0,r7,r4
add_s r12,r12, \
-0x3f800000
adc.f r8,r6,r12
tst.pl r8,r9
bic r0,r0,r4
min r3,r8,r9
jpnz.d [blink]
add.pnz r0,r0,r3
; infinity or denormal number
add.ne.f r3,r3,r3
asr_s r3,r3,23+1
bset r6,r6,23
bpnz.d .Linfinity
sub_s r3,r3,1
neg_s r2,r3
brhi.d r2,24,.Lret_r0 ; right shift shift > 24 -> return +-0
lsr r2,r6,r2
asl r9,r6,r3
lsr.f 0,r2,1
tst r7,r7
add_s r0,r0,r2
bset.ne r9,r9,0
adc.f 0,r9,r4
j_s.d [blink]
add.cs r0,r0,1
.Linfinity:
j_s.d [blink]
add_s r0,r0,r9
.Lret_r0: j_s [blink]
.balign 4
.Ldenorm_dbl0:
asl_s r2,r2,8
norm.f r4,r2
lsr_s r2,r2,7
asl r2,r2,r4
breq.d r12,r9,.Ldenorm_dbl0_inf_nan_dbl1
asl r4,r4,23
sub.ne.f r12,r12,r4
bhi.d .Lpast_denorm
xor_s r0,r0,r1
bmsk r1,r0,30
j_s.d [blink]
bic_s r0,r0,r1
.balign 4
.Ldenorm_dbl0_inf_nan_dbl1:
bmsk.f 0,r0,30
beq_s .Lretnan
xor_s r0,r0,r1
.Linf_nan_dbl1:
xor_s r1,r1,r0
bclr_s r1,r1,31
j_s.d [blink]
xor_s r0,r0,r1
.Linf_nan_dbl0:
sub_s r2,r1,1 ; inf/nan * 0 -> nan; inf * nan -> nan (use |r2| >= inf)
bic.f 0,r9,r2
xor_s r0,r0,r1
bclr_s r1,r1,31
xor_s r0,r0,r1
jne_s [blink]
.Lretnan:
j_s.d [blink]
mov r0,-1
.balign 4
.Ldenorm_dbl1:
norm.f r3,r4
sub_s r3,r3,7
asl r4,r4,r3
sub_s r3,r3,1
asl_s r3,r3,23
sub.ne.f r11,r11,r3
bhi.d .Lpast_denorm
mov_s r3,r4
bmsk r3,r0,30
j_s.d [blink]
bic_s r0,r0,r3
.balign 4
.L7f800000:
.long 0x7f800000
.L7fffffff:
.long 0x7fffffff
ENDFUNC(__mulsf3)
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/*
- calculate 15..18 bit inverse using a table of approximating polynoms.
precision is higher for polynoms used to evaluate input with larger
value.
- do one newton-raphson iteration step to double the precision,
then multiply this with the divisor
-> more time to decide if dividend is subnormal
- the worst error propagation is on the side of the value range
with the least initial defect, thus giving us about 30 bits precision.
*/
#include "arc-ieee-754.h"
#if 0 /* DEBUG */
.global __divsf3
FUNC(__divsf3)
.balign 4
__divsf3:
push_s blink
push_s r1
bl.d __divsf3_c
push_s r0
ld_s r1,[sp,4]
st_s r0,[sp,4]
bl.d __divsf3_asm
pop_s r0
pop_s r1
pop_s blink
cmp r0,r1
#if 1
bne abort
jeq_s [blink]
b abort
#else
bne abort
j_s [blink]
#endif
ENDFUNC(__divsf3)
#define __divsf3 __divsf3_asm
#endif /* DEBUG */
FUNC(__divsf3)
.balign 4
.L7f800000:
.long 0x7f800000
.Ldivtab:
.long 0xfc0ffff0
.long 0xf46ffefd
.long 0xed1ffd2a
.long 0xe627fa8e
.long 0xdf7ff73b
.long 0xd917f33b
.long 0xd2f7eea3
.long 0xcd1fe986
.long 0xc77fe3e7
.long 0xc21fdddb
.long 0xbcefd760
.long 0xb7f7d08c
.long 0xb32fc960
.long 0xae97c1ea
.long 0xaa27ba26
.long 0xa5e7b22e
.long 0xa1cfa9fe
.long 0x9ddfa1a0
.long 0x9a0f990c
.long 0x9667905d
.long 0x92df878a
.long 0x8f6f7e84
.long 0x8c27757e
.long 0x88f76c54
.long 0x85df630c
.long 0x82e759c5
.long 0x8007506d
.long 0x7d3f470a
.long 0x7a8f3da2
.long 0x77ef341e
.long 0x756f2abe
.long 0x72f7212d
.long 0x709717ad
.long 0x6e4f0e44
.long 0x6c1704d6
.long 0x69e6fb44
.long 0x67cef1d7
.long 0x65c6e872
.long 0x63cedf18
.long 0x61e6d5cd
.long 0x6006cc6d
.long 0x5e36c323
.long 0x5c76b9f3
.long 0x5abeb0b7
.long 0x5916a79b
.long 0x57769e77
.long 0x55de954d
.long 0x54568c4e
.long 0x52d6834d
.long 0x51667a7f
.long 0x4ffe71b5
.long 0x4e9e68f1
.long 0x4d466035
.long 0x4bf65784
.long 0x4aae4ede
.long 0x496e4646
.long 0x48363dbd
.long 0x47063547
.long 0x45de2ce5
.long 0x44be2498
.long 0x43a61c64
.long 0x4296144a
.long 0x41860c0e
.long 0x407e03ee
__divsf3_support: /* This label makes debugger output saner. */
.Ldenorm_fp1:
bclr r6,r6,31
norm.f r12,r6 ; flag for x/0 -> Inf check
add r6,r6,r6
rsub r5,r12,16
ror r5,r1,r5
asl r6,r6,r12
bmsk r5,r5,5
ld.as r5,[r3,r5]
add r4,r6,r6
; load latency
mpyhu r7,r5,r4
bic.ne.f 0, \
0x60000000,r0 ; large number / denorm -> Inf
beq_s .Linf_NaN
asl r5,r5,13
; wb stall
; slow track
sub r7,r5,r7
mpyhu r8,r7,r6
asl_s r12,r12,23
and.f r2,r0,r9
add r2,r2,r12
asl r12,r0,8
; wb stall
bne.d .Lpast_denorm_fp1
.Ldenorm_fp0:
mpyhu r8,r8,r7
bclr r12,r12,31
norm.f r3,r12 ; flag for 0/x -> 0 check
bic.ne.f 0,0x60000000,r1 ; denorm/large number -> 0
beq_s .Lret0
asl_s r12,r12,r3
asl_s r3,r3,23
add_s r12,r12,r12
add r11,r11,r3
b.d .Lpast_denorm_fp0
mov_s r3,r12
.balign 4
.Linf_NaN:
bclr.f 0,r0,31 ; 0/0 -> NaN
xor_s r0,r0,r1
bmsk r1,r0,30
bic_s r0,r0,r1
sub.eq r0,r0,1
j_s.d [blink]
or r0,r0,r9
.Lret0:
xor_s r0,r0,r1
bmsk r1,r0,30
j_s.d [blink]
bic_s r0,r0,r1
.Linf_nan_fp1:
lsr_s r0,r0,31
bmsk.f 0,r1,22
asl_s r0,r0,31
bne_s 0f ; inf/inf -> nan
brne r2,r9,.Lsigned0 ; x/inf -> 0, but x/nan -> nan
0: j_s.d [blink]
mov r0,-1
.Lsigned0:
.Linf_nan_fp0:
tst_s r1,r1
j_s.d [blink]
bxor.mi r0,r0,31
.balign 4
.global __divsf3
/* N.B. the spacing between divtab and the sub3 to get its address must
be a multiple of 8. */
__divsf3:
lsr r2,r1,17
sub3 r3,pcl,55;(.-.Ldivtab) >> 3
bmsk_s r2,r2,5
ld.as r5,[r3,r2]
asl r4,r1,9
ld.as r9,[pcl,-114]; [pcl,(-((.-.L7f800000) >> 2))] ; 0x7f800000
mpyhu r7,r5,r4
asl r6,r1,8
and.f r11,r1,r9
bset r6,r6,31
asl r5,r5,13
; wb stall
beq .Ldenorm_fp1
sub r7,r5,r7
mpyhu r8,r7,r6
breq.d r11,r9,.Linf_nan_fp1
and.f r2,r0,r9
beq.d .Ldenorm_fp0
asl r12,r0,8
; wb stall
breq r2,r9,.Linf_nan_fp0
mpyhu r8,r8,r7
.Lpast_denorm_fp1:
bset r3,r12,31
.Lpast_denorm_fp0:
cmp_s r3,r6
lsr.cc r3,r3,1
add_s r2,r2, /* wait for immediate */ \
/* wb stall */ \
0x3f000000
sub r7,r7,r8 ; u1.31 inverse, about 30 bit
mpyhu r3,r3,r7
sbc r2,r2,r11
xor.f 0,r0,r1
and r0,r2,r9
bxor.mi r0,r0,31
brhs r2, /* wb stall / wait for immediate */ \
0x7f000000,.Linf_denorm
.Lpast_denorm:
add_s r3,r3,0x22 ; round to nearest or higher
tst r3,0x3c ; check if rounding was unsafe
lsr r3,r3,6
jne.d [blink] ; return if rounding was safe.
add_s r0,r0,r3
/* work out exact rounding if we fall through here. */
/* We know that the exact result cannot be represented in single
precision. Find the mid-point between the two nearest
representable values, multiply with the divisor, and check if
the result is larger than the dividend. */
add_s r3,r3,r3
sub_s r3,r3,1
mpyu r3,r3,r6
asr.f 0,r0,1 ; for round-to-even in case this is a denorm
rsub r2,r9,25
asl_s r12,r12,r2
; wb stall
; slow track
sub.f 0,r12,r3
j_s.d [blink]
sub.mi r0,r0,1
/* For denormal results, it is possible that an exact result needs
rounding, and thus the round-to-even rule has to come into play. */
.Linf_denorm:
brlo r2,0xc0000000,.Linf
.Ldenorm:
asr_s r2,r2,23
bic r0,r0,r9
neg r9,r2
brlo.d r9,25,.Lpast_denorm
lsr r3,r3,r9
/* Fall through: return +- 0 */
j_s [blink]
.Linf:
j_s.d [blink]
or r0,r0,r9
ENDFUNC(__divsf3)
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "arc-ieee-754.h"
#if 0 /* DEBUG */
.global __divsf3
FUNC(__divsf3)
.balign 4
__divsf3:
push_s blink
push_s r1
bl.d __divsf3_c
push_s r0
ld_s r1,[sp,4]
st_s r0,[sp,4]
bl.d __divsf3_asm
pop_s r0
pop_s r1
pop_s blink
cmp r0,r1
#if 1
bne abort
jeq_s [blink]
b abort
#else
bne abort
j_s [blink]
#endif
ENDFUNC(__divsf3)
#define __divsf3 __divsf3_asm
#endif /* DEBUG */
.balign 4
__divdf3_support: /* This label makes debugger output saner. */
FUNC(__divsf3)
.Ldenorm_fp0:
norm.f r12,r2 ; flag for 0/x -> 0 check
bic.ne.f 0,0x60000000,r1 ; denorm/large number -> 0
beq_s .Lret0_NaN
tst r1,r9
add_s r2,r2,r2
sub_s r12,r12,8
asl_s r2,r2,r12
asl_l r12,r12,23
bne.d .Lpast_denorm_fp0
add r5,r5,r12
/* r0 is subnormal, r1 is subnormal or 0. */
.balign 4
.Ldenorm_fp1:
norm.f r12,r3 ; flag for x/0 -> Inf check
bic.ne.f 0,0x60000000,r0 ; large number/denorm -> Inf
beq_s .Linf
add_s r3,r3,r3
sub_s r12,r12,8
asl_s r3,r3,r12
asl_s r12,r12,23
b.d .Lpast_denorm_fp1
add r4,r4,r12
.Lret0_NaN:
bclr.f 0,r1,31 ; 0/0 -> NaN
bic r0,r10,r9
j_s.d [blink]
sub.eq r0,r0,1
.global __divsf3
.balign 4
.long 0x7f800000 ; exponent mask
__divsf3:
ld r9,[pcl,-4]
bmsk r2,r0,22
xor r4,r0,r2
bmsk r3,r1,22
xor r5,r1,r3
and r11,r0,r9
breq.d r11,0,.Ldenorm_fp0
xor r10,r4,r5
breq r11,r9,.Linf_nan_fp0
bset_s r2,r2,23
and r11,r1,r9
breq r11,0,.Ldenorm_fp1
breq r11,r9,.Linf_nan_fp1
.Lpast_denorm_fp0:
bset_s r3,r3,23
.Lpast_denorm_fp1:
cmp r2,r3
asl_s r2,r2,6+1
asl_s r3,r3,7
add.lo r2,r2,r2
bclr r8,r9,30 ; exponent bias
bclr.lo r8,r8,23 ; reduce exp by one if fraction is shifted
sub r4,r4,r5
add r4,r4,r8
xor.f 0,r10,r4
bmi .Linf_denorm
and r12,r4,r9
breq r12,0,.Ldenorm
sub_s r2,r2,r3 ; discard implicit 1
.Ldiv_23bit:
.rep 6
divaw r2,r2,r3
.endr
breq r12,r9,.Linf
bmsk r0,r2,6
xor_s r2,r2,r0
.Ldiv_17bit:
.rep 7
divaw r2,r2,r3
.endr
asl_s r0,r0,7
bmsk r1,r2,6
xor_s r2,r2,r1
or_s r0,r0,r1
.Ldiv_10bit:
.rep 7
divaw r2,r2,r3
.endr
asl_s r0,r0,7
bmsk r1,r2,6
xor_s r2,r2,r1
or_s r0,r0,r1
.Ldiv_3bit:
.rep 3
divaw r2,r2,r3
.endr
asl_s r0,r0,3
.Ldiv_0bit:
divaw r1,r2,r3
bmsk_s r2,r2,2
tst r1,-0x7e ; 0xffffff82, test for rest or odd
bmsk_s r1,r1,0
add_s r0,r0,r2 ; assemble fraction
add_s r0,r0,r4 ; add in sign & exponent
j_s.d [blink]
add.ne r0,r0,r1 ; round to nearest / even
.balign 4
.Linf_nan_fp0:
bic.f 0,r9,r1 ; fp1 Inf -> result NaN
bic r1,r5,r9 ; fp1 sign
sub.eq r1,r1,1
j_s.d [blink]
xor_s r0,r0,r1
.Linf_nan_fp1:
bic r0,r4,r9 ; fp0 sign
bmsk.f 0,r1,22 ; x/inf -> 0, x/nan -> nan
xor.eq r1,r1,r9
j_s.d [blink]
xor_s r0,r0,r1
.Linf:
j_s.d [blink]
or r0,r10,r9
.Lret_r4:
j_s.d [blink]
mov_s r0,r4
.balign 4
.Linf_denorm:
add.f r12,r4,r4
asr_l r12,r12,24
bpl .Linf
max r12,r12,-24
.Ldenorm:
add r1,pcl,42; .Ldenorm_tab-.
ldb_s r12,[r12,r1]
mov_s r0,0
lsr_s r2,r2
sub_s r1,r1,r12
j_s.d [r1]
bic r4,r10,r9
.byte .Ldenorm_tab-.Lret_r4
.byte .Ldenorm_tab-.Ldiv_0bit
.byte .Ldenorm_tab-.Ldiv_3bit-8
.byte .Ldenorm_tab-.Ldiv_3bit-4
.byte .Ldenorm_tab-.Ldiv_3bit
.byte .Ldenorm_tab-.Ldiv_10bit-24
.byte .Ldenorm_tab-.Ldiv_10bit-20
.byte .Ldenorm_tab-.Ldiv_10bit-16
.byte .Ldenorm_tab-.Ldiv_10bit-12
.byte .Ldenorm_tab-.Ldiv_10bit-8
.byte .Ldenorm_tab-.Ldiv_10bit-4
.byte .Ldenorm_tab-.Ldiv_10bit
.byte .Ldenorm_tab-.Ldiv_17bit-24
.byte .Ldenorm_tab-.Ldiv_17bit-20
.byte .Ldenorm_tab-.Ldiv_17bit-16
.byte .Ldenorm_tab-.Ldiv_17bit-12
.byte .Ldenorm_tab-.Ldiv_17bit-8
.byte .Ldenorm_tab-.Ldiv_17bit-4
.byte .Ldenorm_tab-.Ldiv_17bit
.byte .Ldenorm_tab-.Ldiv_23bit-20
.byte .Ldenorm_tab-.Ldiv_23bit-16
.byte .Ldenorm_tab-.Ldiv_23bit-12
.byte .Ldenorm_tab-.Ldiv_23bit-8
.byte .Ldenorm_tab-.Ldiv_23bit-4
.Ldenorm_tab:
.byte .Ldenorm_tab-.Ldiv_23bit
ENDFUNC(__divsf3)
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* We use a polynom similar to a Tchebycheff polynom to get an initial
seed, and then use a newton-raphson iteration step to get an
approximate result
If this result can't be rounded to the exact result with confidence, we
round to the value between the two closest representable values, and
test if the correctly rounded value is above or below this value.
Because of the Newton-raphson iteration step, an error in the seed at X
is amplified by X. Therefore, we don't want a Tchebycheff polynom
or a polynom that is close to optimal according to the maximum norm
on the errro of the seed value; we want one that is close to optimal
according to the maximum norm on the error of the result, i.e. we
want the maxima of the polynom to increase linearily.
Given an interval [X0,X2) over which to approximate,
with X1 := (X0+X2)/2, D := X1-X0, F := 1/D, and S := D/X1 we have,
like for Tchebycheff polynoms:
P(0) := 1
but then we have:
P(1) := X + S*D
P(2) := 2 * X^2 + S*D * X - D^2
Then again:
P(n+1) := 2 * X * P(n) - D^2 * P (n-1)
*/
static long double merr = 42.;
double
err (long double a0, long double a1, long double x)
{
long double y0 = a0 + (x-1)*a1;
long double approx = 2. * y0 - y0 * x * y0;
long double true = 1./x;
long double err = approx - true;
if (err <= -1./65536./16384.)
printf ("ERROR EXCEEDS 1 ULP %.15f %.15f %.15f\n",
(double)x, (double)approx, (double)true);
if (merr > err)
merr = err;
return err;
}
int
main (void)
{
long double T[5]; /* Taylor polynom */
long double P[5][5];
int i, j;
long double X0, X1, X2, S;
long double inc = 1./64;
long double D = inc*0.5;
long i0, i1, i2, io;
memset (P, 0, sizeof (P));
P[0][0] = 1.;
for (i = 1; i < 5; i++)
P[i][i] = 1 << i-1;
P[2][0] = -D*D;
for (X0 = 1.; X0 < 2.; X0 += inc)
{
X1 = X0 + inc * 0.5;
X2 = X0 + inc;
S = D / X1;
T[0] = 1./X1;
for (i = 1; i < 5; i++)
T[i] = T[i-1] * -T[0];
#if 0
printf ("T %1.8f %f %f %f %f\n", (double)T[0], (double)T[1], (double)T[2],
(double)T[3], (double)T[4]);
#endif
P[1][0] = S*D;
P[2][1] = S*D;
for (i = 3; i < 5; i++)
{
P[i][0] = -D*D*P[i-2][0];
for (j = 1; j < i; j++)
P[i][j] = 2*P[i-1][j-1]-D*D*P[i-2][j];
}
#if 0
printf ("P3 %1.8f %f %f %f %f\n", (double)P[3][0], (double)P[3][1], (double)P[3][2],
(double)P[3][3], (double)P[3][4]);
printf ("P4 %1.8f %f %f %f %f\n", (double)P[4][0], (double)P[4][1], (double)P[4][2],
(double)P[4][3], (double)P[4][4]);
#endif
for (i = 4; i > 1; i--)
{
long double a = T[i]/P[i][i];
for (j = 0; j < i; j++)
T[j] -= a * P[i][j];
}
#if 0
printf ("A %1.8f %f %f\n", (double)T[0], (double)T[1], (double)T[2]);
#endif
#if 0
i2 = T[2]*1024;
long double a = (T[2]-i/1024.)/P[2][2];
for (j = 0; j < 2; j++)
T[j] -= a * P[2][j];
#else
i2 = 0;
#endif
long double T0, Ti1;
for (i = 0, i0 = 0; i < 4; i++)
{
i1 = T[1]*4096. + i0 / (long double)(1 << 20) - 0.5;
i1 = - (-i1 & 0x0fff);
Ti1 = ((unsigned)(-i1 << 20) | i0) /-(long double)(1LL<<32LL);
T0 = T[0] - (T[1]-Ti1)/P[1][1] * P[1][0] - (X1 - 1) * Ti1;
i0 = T0 * 1024 * 1024 + 0.5;
i0 &= 0xfffff;
}
#if 0
printf ("A %1.8f %f %f\n", (double)T[0], (double)T[1], (double)T[2]);
#endif
io = (unsigned)(-i1 << 20) | i0;
long double A1 = (unsigned)io/-65536./65536.;
long double A0 = (unsigned)(io << 12)/65536./65536.;
long double Xm0 = 1./sqrt (-A1);
long double Xm1 = 0.5+0.5*-A0/A1;
#if 0
printf ("%f %f %f %f\n", (double)A0, (double)A1, (double) Ti1, (double)X0);
printf ("%.12f %.12f %.12f\n",
err (A0, A1, X0), err (A0, A1, X1), err (A0, A1, X2));
printf ("%.12f %.12f\n", (double)Xm0, (double)Xm1);
printf ("%.12f %.12f\n", err (A0, A1, Xm0), err (A0, A1, Xm1));
#endif
printf ("\t.long 0x%x\n", io);
}
#if 0
printf ("maximum error: %.15f %x %f\n", (double)merr, (unsigned)(long long)(-merr * 65536 * 65536), (double)log(-merr)/log(2));
#endif
return 0;
}
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
/* We use a polynom similar to a Tchebycheff polynom to get an initial
seed, and then use a newton-raphson iteration step to get an
approximate result
If this result can't be rounded to the exact result with confidence, we
round to the value between the two closest representable values, and
test if the correctly rounded value is above or below this value.
Because of the Newton-raphson iteration step, an error in the seed at X
is amplified by X. Therefore, we don't want a Tchebycheff polynom
or a polynom that is close to optimal according to the maximum norm
on the errro of the seed value; we want one that is close to optimal
according to the maximum norm on the error of the result, i.e. we
want the maxima of the polynom to increase linearily.
Given an interval [X0,X2) over which to approximate,
with X1 := (X0+X2)/2, D := X1-X0, F := 1/D, and S := D/X1 we have,
like for Tchebycheff polynoms:
P(0) := 1
but then we have:
P(1) := X + S*D
P(2) := 2 * X^2 + S*D * X - D^2
Then again:
P(n+1) := 2 * X * P(n) - D^2 * P (n-1)
*/
int
main (void)
{
long double T[5]; /* Taylor polynom */
long double P[5][5];
int i, j;
long double X0, X1, X2, S;
long double inc = 1./64;
long double D = inc*0.5;
long i0, i1, i2;
memset (P, 0, sizeof (P));
P[0][0] = 1.;
for (i = 1; i < 5; i++)
P[i][i] = 1 << i-1;
P[2][0] = -D*D;
for (X0 = 1.; X0 < 2.; X0 += inc)
{
X1 = X0 + inc * 0.5;
X2 = X1 + inc;
S = D / X1;
T[0] = 1./X1;
for (i = 1; i < 5; i++)
T[i] = T[i-1] * -T[0];
#if 0
printf ("T %1.8f %f %f %f %f\n", (double)T[0], (double)T[1], (double)T[2],
(double)T[3], (double)T[4]);
#endif
P[1][0] = S*D;
P[2][1] = S*D;
for (i = 3; i < 5; i++)
{
P[i][0] = -D*D*P[i-2][0];
for (j = 1; j < i; j++)
P[i][j] = 2*P[i-1][j-1]-D*D*P[i-2][j];
}
#if 0
printf ("P3 %1.8f %f %f %f %f\n", (double)P[3][0], (double)P[3][1], (double)P[3][2],
(double)P[3][3], (double)P[3][4]);
printf ("P4 %1.8f %f %f %f %f\n", (double)P[4][0], (double)P[4][1], (double)P[4][2],
(double)P[4][3], (double)P[4][4]);
#endif
for (i = 4; i > 1; i--)
{
long double a = T[i]/P[i][i];
for (j = 0; j < i; j++)
T[j] -= a * P[i][j];
}
#if 0
printf ("A %1.8f %f %f\n", (double)T[0], (double)T[1], (double)T[2]);
#endif
#if 0
i2 = T[2]*512;
long double a = (T[2]-i/512.)/P[2][2];
for (j = 0; j < 2; j++)
T[j] -= a * P[2][j];
#else
i2 = 0;
#endif
for (i = 0, i0 = 0; i < 4; i++)
{
long double T0, Ti1;
i1 = T[1]*8192. + i0 / (long double)(1 << 19) - 0.5;
i1 = - (-i1 & 0x1fff);
Ti1 = ((unsigned)(-i1 << 19) | i0) /-(long double)(1LL<<32LL);
T0 = T[0] - (T[1]-Ti1)/P[1][1] * P[1][0] - (X1 - 1) * Ti1;
i0 = T0 * 512 * 1024 + 0.5;
i0 &= 0x7ffff;
}
#if 0
printf ("A %1.8f %f %f\n", (double)T[0], (double)T[1], (double)T[2]);
#endif
printf ("\t.long 0x%x\n", (-i1 << 19) | i0);
}
return 0;
}
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "arc-ieee-754.h"
/* inputs: DBL0, DBL1
output: z flag
clobber: r12, flags
For NaNs, bit 19.. bit 30 of the high word must be set. */
#if 0 /* DEBUG */
.global __eqdf2
.balign 4
FUNC(__eqdf2)
__eqdf2:
st.a r11,[sp,-4]` push_s blink` st.a r10,[sp,-4]` st.a r9,[sp,-4]
st.a r8,[sp,-4]` st.a r7,[sp,-4]` st.a r6,[sp,-4]` st.a r5,[sp,-4]
st.a r4,[sp,-4]` push_s r3` push_s r2` push_s r1`
bl.d __eqdf2_c` push_s r0
mov r11,r0` pop_s r0` pop_s r1` pop_s r2` pop_s r3
ld.ab r4,[sp,4]` ld.ab r5,[sp,4]` ld.ab r6,[sp,4]`
ld.ab r7,[sp,4]` ld.ab r8,[sp,4]` ld.ab r9,[sp,4]
bl.d __eqdf2_asm` ld.ab r10,[sp,4]
pop_s blink
breq.d r11,0,0f
ld.ab r11,[sp,4]
jne_s [blink]
bl abort
0: jeq_s [blink]
bl abort
ENDFUNC(__eqdf2)
#define __eqdf2 __eqdf2_asm
#endif /* DEBUG */
.global __eqdf2
.balign 4
HIDDEN_FUNC(__eqdf2)
/* Good performance as long as the difference in high word is
well predictable (as seen from the branch predictor). */
__eqdf2:
brne.d DBL0H,DBL1H,.Lhighdiff
bmsk r12,DBL0H,20
#ifdef DPFP_COMPAT
or.f 0,DBL0L,DBL1L
bset.ne r12,r12,21
#endif /* DPFP_COMPAT */
add1.f r12,r12,DBL0H /* set c iff NaN; also, clear z if NaN. */
j_s.d [blink]
cmp.cc DBL0L,DBL1L
.balign 4
.Lhighdiff:
or r12,DBL0H,DBL1H
or.f 0,DBL0L,DBL1L
j_s.d [blink]
bmsk.eq.f r12,r12,30
ENDFUNC(__eqdf2)
/* ??? could we do better by speeding up some 'common' case of inequality? */
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "arc-ieee-754.h"
/* inputs: r0, r1
output: z flag
clobber: r12, flags
For NaNs, bit 22 .. bit 30 must be set. */
#if 0 /* DEBUG */
.global __eqsf2
.balign 4
FUNC(__eqsf2)
__eqsf2:
st.a r11,[sp,-4]` push_s blink` st.a r10,[sp,-4]` st.a r9,[sp,-4]
st.a r8,[sp,-4]` st.a r7,[sp,-4]` st.a r6,[sp,-4]` st.a r5,[sp,-4]
st.a r4,[sp,-4]` push_s r3` push_s r2` push_s r1`
bl.d __eqsf2_c` push_s r0
mov r11,r0` pop_s r0` pop_s r1` pop_s r2` pop_s r3
ld.ab r4,[sp,4]` ld.ab r5,[sp,4]` ld.ab r6,[sp,4]`
ld.ab r7,[sp,4]` ld.ab r8,[sp,4]` ld.ab r9,[sp,4]
bl.d __eqsf2_asm` ld.ab r10,[sp,4]
pop_s blink
breq.d r11,0,0f
ld.ab r11,[sp,4]
jne_s [blink]
bl abort
0: jeq_s [blink]
bl abort
ENDFUNC(__eqsf2)
#define __eqsf2 __eqsf2_asm
#endif /* DEBUG */
/* Good performance as long as the binary difference is
well predictable (as seen from the branch predictor). */
.global __eqsf2
.balign 4
HIDDEN_FUNC(__eqsf2)
__eqsf2:
breq r0, r1,.Lno_bdiff
or r12,r0,r1
j_s.d [blink]
bmsk.f 0,r12,30
.Lno_bdiff:
bmsk r12,r0,23
add1.f r12,r12,r0 /* set c iff NaN; also, clear z if NaN. */
j_s.d [blink]
cmp.cc r0,r1
ENDFUNC(__eqsf2)
/* Copyright (C) 2006, 2008-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "arc-ieee-754.h"
#if 0 /* DEBUG */
.global __extendsfdf2
.balign 4
FUNC(__extendsfdf2)
__extendsfdf2:
push_s blink
bl.d __extendsfdf2_c
push_s r0
ld_s r2,[sp]
st_s r1,[sp]
push_s r0
bl.d __extendsfdf2_asm
mov_s r0,r2
pop_s r2
pop_s r3
pop_s blink
cmp r0,r2
cmp.eq r1,r3
jeq_s [blink]
bl abort
ENDFUNC(__extendsfdf2)
#define __extendsfdf2 __extendsfdf2_asm
#endif /* DEBUG */
#if 0 /* ARC600 */
__extendsfdf2:
lsr r2,r0,23
tst r2,0xff
bic.ne.f r2,0xff
beq_s .Linf_nan_denorm_0
..
.Linf_nan_denorm:
bbit1 r0,30,.Linf_nan
#endif
.global __extendsfdf2
.balign 4
FUNC(__extendsfdf2)
__extendsfdf2:
add.f r1,r0,r0
norm r3,r1
#ifdef __LITTLE_ENDIAN__
lsr_s DBL0H,r1,4
brhs r3,7,.Linf_nan_denorm_0
asl_s DBL0L,r0,29
add_s DBL0H,DBL0H, \
0x38000000
#else
lsr r2,r1,4
brhs r3,7,.Linf_nan_denorm_0
asl_s DBL0L,r1,28
add DBL0H,r2, \
0x38000000
#endif
j_s.d [blink]
bxor.cs DBL0H,DBL0H,31
.balign 4
.Linf_nan_denorm_0:
#ifdef __LITTLE_ENDIAN__
mov_s DBL0H,r0
jeq.d [blink]
mov.eq DBL0L,0
#else
jeq_s [blink]
#endif
bmi .Linf_nan
asl_s r0,r0,r3
rsub r3,r3,0x380+6
#ifdef __LITTLE_ENDIAN__
asl_s r3,r3,20
lsr DBL0H,r0,9
asl_s DBL0L,r0,23
add_s DBL0H,DBL0H,r3
j_s.d [blink]
bxor.cs DBL0H,DBL0H,31
#else
asl DBL0L,r0,23
lsr_s DBL0H,r0,9
asl_s r3,r3,20
bxor.cs DBL0H,DBL0H,31
j_s.d [blink]
add_l DBL0H,DBL0H,r3
#endif
.Linf_nan:
#ifdef __LITTLE_ENDIAN__
lsr DBL0H,r0,3
or_s DBL0H,DBL0H,r0
j_s.d [blink]
mov_l DBL0L,0
#else
lsr r3,r0,3
mov_s DBL0L,0
j_s.d [blink]
or_l DBL0H,r0,r3
#endif
ENDFUNC(__extendsfdf2)
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "arc-ieee-754.h"
#if 0 /* DEBUG */
FUNC(__fixdfsi)
.global __fixdfsi
.balign 4
__fixdfsi:
push_s blink
push_s r0
bl.d __fixdfsi_c
push_s r1
mov_s r2,r0
pop_s r1
ld r0,[sp]
bl.d __fixdfsi_asm
st r2,[sp]
pop_s r1
pop_s blink
cmp r0,r1
jeq_s [blink]
bl abort
ENDFUNC(__fixdfsi)
#define __fixdfsi __fixdfsi_asm
#endif /* DEBUG */
/* If the fraction has to be shifted left by a positive non-zero amount,
we have to combine bits from DBL0L and DBL0H. If we shift right,
or shift by zero, we only want to have the bits from DBL0H in r0. */
.global __fixdfsi
FUNC(__fixdfsi)
.balign 4
__fixdfsi:
bbit0 DBL0H,30,.Lret0or1
asr r2,DBL0H,20
bmsk_s DBL0H,DBL0H,19
sub_s r2,r2,19; 0x3ff+20-0x400
neg_s r3,r2
asr.f 0,r3,11
bset_s DBL0H,DBL0H,20
#ifdef __LITTLE_ENDIAN__
mov.cs DBL0L,DBL0H
asl DBL0H,DBL0H,r2
#else
asl.cc DBL0H,DBL0H,r2
lsr.cs DBL0H,DBL0H,r3
#endif
lsr_s DBL0L,DBL0L,r3
add.cc r0,r0,r1
j_s.d [blink]
neg.pl r0,r0
.Lret0or1:
add.f r0,DBL0H,0x100000
lsr_s r0,r0,30
bmsk_s r0,r0,0
j_s.d [blink]
neg.mi r0,r0
ENDFUNC(__fixdfsi)
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "arc-ieee-754.h"
#if 0 /* DEBUG */
.global __fixsfsi
FUNC(__fixsfsi)
.balign 4
__fixsfsi:
push_s blink
bl.d __fixsfsi_c
push_s r0
ld_s r1,[sp]
st_s r0,[sp]
bl.d __fixsfsi_asm
mov_s r0,r1
pop_s r1
pop_s blink
cmp r0,r1
jeq_s [blink]
bl abort
ENDFUNC(__fixsfsi)
#define __fixsfsi __fixsfsi_asm
#endif /* DEBUG */
.global __fixsfsi
FUNC(__fixsfsi)
.balign 4
__fixsfsi:
bbit0 r0,30,.Lret0or1
lsr r2,r0,23
bmsk_s r0,r0,22
bset_s r0,r0,23
sub_s r2,r2,22;0x7f+23-0x80
asl.f 0,r2,24
neg r3,r2
asl.mi r0,r0,r2
lsr.pl r0,r0,r3
j_s.d [blink]
neg.cs r0,r0
.Lret0or1:
add.f r0,r0,0x800000
lsr_s r0,r0,30
bmsk_s r0,r0,0
j_s.d [blink]
neg.mi r0,r0
ENDFUNC(__fixsfsi)
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "arc-ieee-754.h"
#if 0 /* DEBUG */
FUNC(__fixunsdfsi)
.global __fixunsdfsi
.balign 4
__fixunsdfsi:
push_s blink
push_s r0
bl.d __fixunsdfsi_c
push_s r1
mov_s r2,r0
pop_s r1
ld r0,[sp]
bl.d __fixunsdfsi_asm
st r2,[sp]
pop_s r1
pop_s blink
cmp r0,r1
jeq_s [blink]
bl abort
ENDFUNC(__fixunsdfsi)
#define __fixunsdfsi __fixunsdfsi_asm
#endif /* DEBUG */
.global __fixunsdfsi
FUNC(__fixunsdfsi)
.balign 4
__fixunsdfsi:
bbit0 DBL0H,30,.Lret0or1
lsr r2,DBL0H,20
bmsk_s DBL0H,DBL0H,19
sub_s r2,r2,19; 0x3ff+20-0x400
neg_s r3,r2
btst_s r3,10
bset_s DBL0H,DBL0H,20
#ifdef __LITTLE_ENDIAN__
mov.ne DBL0L,DBL0H
asl DBL0H,DBL0H,r2
#else
asl.eq DBL0H,DBL0H,r2
lsr.ne DBL0H,DBL0H,r3
#endif
lsr DBL0L,DBL0L,r3
j_s.d [blink]
add.eq r0,r0,r1
.Lret0:
j_s.d [blink]
mov_l r0,0
.Lret0or1:
add_s DBL0H,DBL0H,0x100000
lsr_s DBL0H,DBL0H,30
j_s.d [blink]
bmsk_l r0,DBL0H,0
ENDFUNC(__fixunsdfsi)
/* Copyright (C) 2008-2012 Free Software Foundation, Inc.
Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
on behalf of Synopsys Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "arc-ieee-754.h"
#if 0 /* DEBUG */
.global __floatsidf
.balign 4
FUNC(__floatsidf)
__floatsidf:
push_s blink
bl.d __floatsidf_c
push_s r0
ld_s r2,[sp]
st_s r1,[sp]
push_s r0
bl.d __floatsidf_asm
mov_s r0,r2
pop_s r2
pop_s r3
pop_s blink
cmp r0,r2
cmp.eq r1,r3
jeq_s [blink]
bl abort
ENDFUNC(__floatsidf)
#define __floatsidf __floatsidf_asm
#endif /* DEBUG */
.global __floatsidf
.balign 4
FUNC(__floatsidf)
__floatsidf:
abs.f r1,r0
jeq_s [blink]
lsr r2,r1
mov r12,-0x41d ; -(0x3ff+31-1)
norm r2,r2
bclr.cs r12,r12,11
rsub.f r3,r2,11
add_s r12,r2,r12
add_s r2,r2,21
#ifdef __LITTLE_ENDIAN__
asl DBL0L,r1,r2
lsr_s DBL0H,r1,r3
#else
lsr DBL0H,r1,r3
asl_s DBL0L,r1,r2
#endif
asl_s r12,r12,20
mov.lo DBL0H,DBL0L
sub_s DBL0H,DBL0H,r12
j_s.d [blink]
mov.ls DBL0L,0
ENDFUNC(__floatsidf)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment