Commit 09fa8841 by Bob Wilson Committed by Bob Wilson

longlong.h (__xtensa__): Add definitions for umul_ppmm...

	* longlong.h (__xtensa__): Add definitions for umul_ppmm, __umulsidi3,
	count_leading_zeros, and count_trailing_zeros.
	* config/xtensa/xtensa.c (TARGET_INIT_BUILTINS): Define.
	(TARGET_FOLD_BUILTIN): Define.
	(TARGET_EXPAND_BUILTIN): Define.
	(xtensa_init_builtins): New.
	(xtensa_fold_builtin): New.
	(xtensa_expand_builtin): New.
	(xtensa_rtx_costs): Add CTZ and CLZ.  Adjust costs for MULT.
	* config/xtensa/xtensa.h (TARGET_MUL32_HIGH): Define.
	(CLZ_DEFINED_VALUE_AT_ZERO): Define.
	(CTZ_DEFINED_VALUE_AT_ZERO): Define.
	* config/xtensa/xtensa.md (UNSPEC_NSAU): Remove.
	(any_extend): New code macro.
	(u, su): New code attributes.
	(<u>mulsidi3, <u>mulsi3_highpart, clzsi2, ctzsi2): New.
	(nsau): Remove; replaced by clzsi2.
	(ffssi2): Use clzsi2.
	* config/xtensa/t-xtensa (LIB1ASMFUNCS): Add _umulsidi3,
	_clzsi2, _ctzsi2, and _ffssi2.  Rename _nsau to _clz.
	* config/xtensa/lib1funcs.asm (__mulsi3): Support Mul32 option.
	(__umulsidi3, __clzsi2, __ctzsi2, __ffssi2): New.
	(__nsau_data): Guard with ifdef L_clz instead of L_nsau.

From-SVN: r118524
parent a548d7b7
2006-11-06 Bob Wilson <bob.wilson@acm.org>
* longlong.h (__xtensa__): Add definitions for umul_ppmm, __umulsidi3,
count_leading_zeros, and count_trailing_zeros.
* config/xtensa/xtensa.c (TARGET_INIT_BUILTINS): Define.
(TARGET_FOLD_BUILTIN): Define.
(TARGET_EXPAND_BUILTIN): Define.
(xtensa_init_builtins): New.
(xtensa_fold_builtin): New.
(xtensa_expand_builtin): New.
(xtensa_rtx_costs): Add CTZ and CLZ. Adjust costs for MULT.
* config/xtensa/xtensa.h (TARGET_MUL32_HIGH): Define.
(CLZ_DEFINED_VALUE_AT_ZERO): Define.
(CTZ_DEFINED_VALUE_AT_ZERO): Define.
* config/xtensa/xtensa.md (UNSPEC_NSAU): Remove.
(any_extend): New code macro.
(u, su): New code attributes.
(<u>mulsidi3, <u>mulsi3_highpart, clzsi2, ctzsi2): New.
(nsau): Remove; replaced by clzsi2.
(ffssi2): Use clzsi2.
* config/xtensa/t-xtensa (LIB1ASMFUNCS): Add _umulsidi3,
_clzsi2, _ctzsi2, and _ffssi2. Rename _nsau to _clz.
* config/xtensa/lib1funcs.asm (__mulsi3): Support Mul32 option.
(__umulsidi3, __clzsi2, __ctzsi2, __ffssi2): New.
(__nsau_data): Guard with ifdef L_clz instead of L_nsau.
2006-11-06 Vladimir Prus <vladimir@codesourcery.com> 2006-11-06 Vladimir Prus <vladimir@codesourcery.com>
* config/arm/t-strongarm-pe: (TARGET_LIBGCC2_CFLAGS): Do no * config/arm/t-strongarm-pe: (TARGET_LIBGCC2_CFLAGS): Do no
......
...@@ -99,7 +99,10 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA ...@@ -99,7 +99,10 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
__mulsi3: __mulsi3:
leaf_entry sp, 16 leaf_entry sp, 16
#if XCHAL_HAVE_MUL16 #if XCHAL_HAVE_MUL32
mull a2, a2, a3
#elif XCHAL_HAVE_MUL16
or a4, a2, a3 or a4, a2, a3
srai a4, a4, 16 srai a4, a4, 16
bnez a4, .LMUL16 bnez a4, .LMUL16
...@@ -124,7 +127,7 @@ __mulsi3: ...@@ -124,7 +127,7 @@ __mulsi3:
slli a5, a5, 16 slli a5, a5, 16
add a2, a4, a5 add a2, a4, a5
#else /* !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MAC16 */ #else /* !MUL32 && !MUL16 && !MAC16 */
/* Multiply one bit at a time, but unroll the loop 4x to better /* Multiply one bit at a time, but unroll the loop 4x to better
exploit the addx instructions and avoid overhead. exploit the addx instructions and avoid overhead.
...@@ -188,7 +191,7 @@ __mulsi3: ...@@ -188,7 +191,7 @@ __mulsi3:
neg a3, a2 neg a3, a2
movltz a2, a3, a5 movltz a2, a3, a5
#endif /* !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MAC16 */ #endif /* !MUL32 && !MUL16 && !MAC16 */
leaf_return leaf_return
.size __mulsi3,.-__mulsi3 .size __mulsi3,.-__mulsi3
...@@ -196,6 +199,170 @@ __mulsi3: ...@@ -196,6 +199,170 @@ __mulsi3:
#endif /* L_mulsi3 */ #endif /* L_mulsi3 */
#ifdef L_umulsidi3
.align 4
.global __umulsidi3
.type __umulsidi3,@function
__umulsidi3:
leaf_entry sp, 32
#if __XTENSA_CALL0_ABI__
addi sp, sp, -32
s32i a12, sp, 16
s32i a13, sp, 20
s32i a14, sp, 24
s32i a15, sp, 28
#endif
#ifdef __XTENSA_EB__
#define wh a2
#define wl a3
#else
#define wh a3
#define wl a2
#endif /* __XTENSA_EB__ */
/* This code is taken from the mulsf3 routine in ieee754-sf.S.
See more comments there. */
#if XCHAL_HAVE_MUL32_HIGH
mull a6, a2, a3
muluh wh, a2, a3
mov wl, a6
#else /* ! MUL32_HIGH */
#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
/* a0 and a8 will be clobbered by calling the multiply function
but a8 is not used here and need not be saved. */
s32i a0, sp, 0
#endif
#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32
#define a2h a4
#define a3h a5
/* Get the high halves of the inputs into registers. */
srli a2h, a2, 16
srli a3h, a3, 16
#define a2l a2
#define a3l a3
#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
/* Clear the high halves of the inputs. This does not matter
for MUL16 because the high bits are ignored. */
extui a2, a2, 0, 16
extui a3, a3, 0, 16
#endif
#endif /* MUL16 || MUL32 */
#if XCHAL_HAVE_MUL16
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
mul16u dst, xreg ## xhalf, yreg ## yhalf
#elif XCHAL_HAVE_MUL32
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
mull dst, xreg ## xhalf, yreg ## yhalf
#elif XCHAL_HAVE_MAC16
/* The preprocessor insists on inserting a space when concatenating after
a period in the definition of do_mul below. These macros are a workaround
using underscores instead of periods when doing the concatenation. */
#define umul_aa_ll umul.aa.ll
#define umul_aa_lh umul.aa.lh
#define umul_aa_hl umul.aa.hl
#define umul_aa_hh umul.aa.hh
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
umul_aa_ ## xhalf ## yhalf xreg, yreg; \
rsr dst, ACCLO
#else /* no multiply hardware */
#define set_arg_l(dst, src) \
extui dst, src, 0, 16
#define set_arg_h(dst, src) \
srli dst, src, 16
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
set_arg_ ## xhalf (a13, xreg); \
set_arg_ ## yhalf (a14, yreg); \
call0 .Lmul_mulsi3; \
mov dst, a12
#endif
/* Add pp1 and pp2 into a6 with carry-out in a9. */
do_mul(a6, a2, l, a3, h) /* pp 1 */
do_mul(a11, a2, h, a3, l) /* pp 2 */
movi a9, 0
add a6, a6, a11
bgeu a6, a11, 1f
addi a9, a9, 1
1:
/* Shift the high half of a9/a6 into position in a9. Note that
this value can be safely incremented without any carry-outs. */
ssai 16
src a9, a9, a6
/* Compute the low word into a6. */
do_mul(a11, a2, l, a3, l) /* pp 0 */
sll a6, a6
add a6, a6, a11
bgeu a6, a11, 1f
addi a9, a9, 1
1:
/* Compute the high word into wh. */
do_mul(wh, a2, h, a3, h) /* pp 3 */
add wh, wh, a9
mov wl, a6
#endif /* !MUL32_HIGH */
leaf_return
#if !XCHAL_HAVE_MUL16 && !XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MAC16
/* For Xtensa processors with no multiply hardware, this simplified
version of _mulsi3 is used for multiplying 16-bit chunks of
the floating-point mantissas. It uses a custom ABI: the inputs
are passed in a13 and a14, the result is returned in a12, and
a8 and a15 are clobbered. */
.align 4
.Lmul_mulsi3:
movi a12, 0
.Lmul_mult_loop:
add a15, a14, a12
extui a8, a13, 0, 1
movnez a12, a15, a8
do_addx2 a15, a14, a12, a15
extui a8, a13, 1, 1
movnez a12, a15, a8
do_addx4 a15, a14, a12, a15
extui a8, a13, 2, 1
movnez a12, a15, a8
do_addx8 a15, a14, a12, a15
extui a8, a13, 3, 1
movnez a12, a15, a8
srli a13, a13, 4
slli a14, a14, 4
bnez a13, .Lmul_mult_loop
ret
#endif /* !MUL16 && !MUL32 && !MAC16 */
.size __umulsidi3,.-__umulsidi3
#endif /* L_umulsidi3 */
/* Define a macro for the NSAU (unsigned normalize shift amount) /* Define a macro for the NSAU (unsigned normalize shift amount)
instruction, which computes the number of leading zero bits, instruction, which computes the number of leading zero bits,
to handle cases where it is not included in the Xtensa processor to handle cases where it is not included in the Xtensa processor
...@@ -225,7 +392,7 @@ __mulsi3: ...@@ -225,7 +392,7 @@ __mulsi3:
#endif /* !XCHAL_HAVE_NSA */ #endif /* !XCHAL_HAVE_NSA */
.endm .endm
#ifdef L_nsau #ifdef L_clz
.section .rodata .section .rodata
.align 4 .align 4
.global __nsau_data .global __nsau_data
...@@ -251,7 +418,54 @@ __nsau_data: ...@@ -251,7 +418,54 @@ __nsau_data:
#endif /* !XCHAL_HAVE_NSA */ #endif /* !XCHAL_HAVE_NSA */
.size __nsau_data,.-__nsau_data .size __nsau_data,.-__nsau_data
.hidden __nsau_data .hidden __nsau_data
#endif /* L_nsau */ #endif /* L_clz */
#ifdef L_clzsi2
.align 4
.global __clzsi2
.type __clzsi2,@function
__clzsi2:
leaf_entry sp, 16
do_nsau a2, a2, a3, a4
leaf_return
.size __clzsi2,.-__clzsi2
#endif /* L_clzsi2 */
#ifdef L_ctzsi2
.align 4
.global __ctzsi2
.type __ctzsi2,@function
__ctzsi2:
leaf_entry sp, 16
neg a3, a2
and a3, a3, a2
do_nsau a2, a3, a4, a5
neg a2, a2
addi a2, a2, 31
leaf_return
.size __ctzsi2,.-__ctzsi2
#endif /* L_ctzsi2 */
#ifdef L_ffssi2
.align 4
.global __ffssi2
.type __ffssi2,@function
__ffssi2:
leaf_entry sp, 16
neg a3, a2
and a3, a3, a2
do_nsau a2, a3, a4, a5
neg a2, a2
addi a2, a2, 32
leaf_return
.size __ffssi2,.-__ffssi2
#endif /* L_ffssi2 */
#ifdef L_udivsi3 #ifdef L_udivsi3
......
LIB1ASMSRC = xtensa/lib1funcs.asm LIB1ASMSRC = xtensa/lib1funcs.asm
LIB1ASMFUNCS = _mulsi3 _nsau _divsi3 _modsi3 _udivsi3 _umodsi3 \ LIB1ASMFUNCS = _mulsi3 _divsi3 _modsi3 _udivsi3 _umodsi3 \
_umulsidi3 _clz _clzsi2 _ctzsi2 _ffssi2 \
_negsf2 _addsubsf3 _mulsf3 _divsf3 _cmpsf2 _fixsfsi _fixsfdi \ _negsf2 _addsubsf3 _mulsf3 _divsf3 _cmpsf2 _fixsfsi _fixsfdi \
_fixunssfsi _fixunssfdi _floatsisf _floatunsisf \ _fixunssfsi _fixunssfdi _floatsisf _floatunsisf \
_floatdisf _floatundisf \ _floatdisf _floatundisf \
......
...@@ -210,6 +210,9 @@ static bool xtensa_rtx_costs (rtx, int, int, int *); ...@@ -210,6 +210,9 @@ static bool xtensa_rtx_costs (rtx, int, int, int *);
static tree xtensa_build_builtin_va_list (void); static tree xtensa_build_builtin_va_list (void);
static bool xtensa_return_in_memory (tree, tree); static bool xtensa_return_in_memory (tree, tree);
static tree xtensa_gimplify_va_arg_expr (tree, tree, tree *, tree *); static tree xtensa_gimplify_va_arg_expr (tree, tree, tree *, tree *);
static void xtensa_init_builtins (void);
static tree xtensa_fold_builtin (tree, tree, bool);
static rtx xtensa_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
static const int reg_nonleaf_alloc_order[FIRST_PSEUDO_REGISTER] = static const int reg_nonleaf_alloc_order[FIRST_PSEUDO_REGISTER] =
REG_ALLOC_ORDER; REG_ALLOC_ORDER;
...@@ -265,6 +268,13 @@ static const int reg_nonleaf_alloc_order[FIRST_PSEUDO_REGISTER] = ...@@ -265,6 +268,13 @@ static const int reg_nonleaf_alloc_order[FIRST_PSEUDO_REGISTER] =
#undef TARGET_RETURN_IN_MSB #undef TARGET_RETURN_IN_MSB
#define TARGET_RETURN_IN_MSB xtensa_return_in_msb #define TARGET_RETURN_IN_MSB xtensa_return_in_msb
#undef TARGET_INIT_BUILTINS
#define TARGET_INIT_BUILTINS xtensa_init_builtins
#undef TARGET_FOLD_BUILTIN
#define TARGET_FOLD_BUILTIN xtensa_fold_builtin
#undef TARGET_EXPAND_BUILTIN
#define TARGET_EXPAND_BUILTIN xtensa_expand_builtin
struct gcc_target targetm = TARGET_INITIALIZER; struct gcc_target targetm = TARGET_INITIALIZER;
...@@ -2322,6 +2332,74 @@ xtensa_gimplify_va_arg_expr (tree valist, tree type, tree *pre_p, ...@@ -2322,6 +2332,74 @@ xtensa_gimplify_va_arg_expr (tree valist, tree type, tree *pre_p,
} }
/* Builtins. */
enum xtensa_builtin
{
XTENSA_BUILTIN_UMULSIDI3,
XTENSA_BUILTIN_max
};
static void
xtensa_init_builtins (void)
{
tree ftype;
ftype = build_function_type_list (unsigned_intDI_type_node,
unsigned_intSI_type_node,
unsigned_intSI_type_node, NULL_TREE);
add_builtin_function ("__builtin_umulsidi3", ftype,
XTENSA_BUILTIN_UMULSIDI3, BUILT_IN_MD,
"__umulsidi3", NULL_TREE);
}
static tree
xtensa_fold_builtin (tree fndecl, tree arglist, bool ignore ATTRIBUTE_UNUSED)
{
unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
tree arg0, arg1;
if (fcode == XTENSA_BUILTIN_UMULSIDI3)
{
arg0 = TREE_VALUE (arglist);
arg1 = TREE_VALUE (TREE_CHAIN (arglist));
if ((TREE_CODE (arg0) == INTEGER_CST && TREE_CODE (arg1) == INTEGER_CST)
|| TARGET_MUL32_HIGH)
return fold_build2 (MULT_EXPR, unsigned_intDI_type_node,
fold_convert (unsigned_intDI_type_node, arg0),
fold_convert (unsigned_intDI_type_node, arg1));
else
return NULL;
}
internal_error ("bad builtin code");
return NULL;
}
static rtx
xtensa_expand_builtin (tree exp, rtx target,
rtx subtarget ATTRIBUTE_UNUSED,
enum machine_mode mode ATTRIBUTE_UNUSED,
int ignore)
{
tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
/* The umulsidi3 builtin is just a mechanism to avoid calling the real
__umulsidi3 function when the Xtensa configuration can directly
implement it. If not, just call the function. */
if (fcode == XTENSA_BUILTIN_UMULSIDI3)
return expand_call (exp, target, ignore);
internal_error ("bad builtin code");
return NULL_RTX;
}
enum reg_class enum reg_class
xtensa_preferred_reload_class (rtx x, enum reg_class class, int isoutput) xtensa_preferred_reload_class (rtx x, enum reg_class class, int isoutput)
{ {
...@@ -2530,9 +2608,14 @@ xtensa_rtx_costs (rtx x, int code, int outer_code, int *total) ...@@ -2530,9 +2608,14 @@ xtensa_rtx_costs (rtx x, int code, int outer_code, int *total)
} }
case FFS: case FFS:
case CTZ:
*total = COSTS_N_INSNS (TARGET_NSA ? 5 : 50); *total = COSTS_N_INSNS (TARGET_NSA ? 5 : 50);
return true; return true;
case CLZ:
*total = COSTS_N_INSNS (TARGET_NSA ? 1 : 50);
return true;
case NOT: case NOT:
*total = COSTS_N_INSNS ((GET_MODE (x) == DImode) ? 3 : 2); *total = COSTS_N_INSNS ((GET_MODE (x) == DImode) ? 3 : 2);
return true; return true;
...@@ -2589,8 +2672,10 @@ xtensa_rtx_costs (rtx x, int code, int outer_code, int *total) ...@@ -2589,8 +2672,10 @@ xtensa_rtx_costs (rtx x, int code, int outer_code, int *total)
enum machine_mode xmode = GET_MODE (x); enum machine_mode xmode = GET_MODE (x);
if (xmode == SFmode) if (xmode == SFmode)
*total = COSTS_N_INSNS (TARGET_HARD_FLOAT ? 4 : 50); *total = COSTS_N_INSNS (TARGET_HARD_FLOAT ? 4 : 50);
else if (xmode == DFmode || xmode == DImode) else if (xmode == DFmode)
*total = COSTS_N_INSNS (50); *total = COSTS_N_INSNS (50);
else if (xmode == DImode)
*total = COSTS_N_INSNS (TARGET_MUL32_HIGH ? 10 : 50);
else if (TARGET_MUL32) else if (TARGET_MUL32)
*total = COSTS_N_INSNS (4); *total = COSTS_N_INSNS (4);
else if (TARGET_MAC16) else if (TARGET_MAC16)
......
...@@ -48,6 +48,7 @@ extern unsigned xtensa_current_frame_size; ...@@ -48,6 +48,7 @@ extern unsigned xtensa_current_frame_size;
#define TARGET_MAC16 XCHAL_HAVE_MAC16 #define TARGET_MAC16 XCHAL_HAVE_MAC16
#define TARGET_MUL16 XCHAL_HAVE_MUL16 #define TARGET_MUL16 XCHAL_HAVE_MUL16
#define TARGET_MUL32 XCHAL_HAVE_MUL32 #define TARGET_MUL32 XCHAL_HAVE_MUL32
#define TARGET_MUL32_HIGH XCHAL_HAVE_MUL32_HIGH
#define TARGET_DIV32 XCHAL_HAVE_DIV32 #define TARGET_DIV32 XCHAL_HAVE_DIV32
#define TARGET_NSA XCHAL_HAVE_NSA #define TARGET_NSA XCHAL_HAVE_NSA
#define TARGET_MINMAX XCHAL_HAVE_MINMAX #define TARGET_MINMAX XCHAL_HAVE_MINMAX
...@@ -1069,6 +1070,9 @@ typedef struct xtensa_args ...@@ -1069,6 +1070,9 @@ typedef struct xtensa_args
is done just by pretending it is already truncated. */ is done just by pretending it is already truncated. */
#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1 #define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 32, 1)
#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = -1, 1)
/* Specify the machine mode that pointers have. /* Specify the machine mode that pointers have.
After generation of rtl, the compiler makes no further distinction After generation of rtl, the compiler makes no further distinction
between pointers and any other objects of this machine mode. */ between pointers and any other objects of this machine mode. */
......
;; GCC machine description for Tensilica's Xtensa architecture. ;; GCC machine description for Tensilica's Xtensa architecture.
;; Copyright (C) 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc. ;; Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006
;; Free Software Foundation, Inc.
;; Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica. ;; Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
;; This file is part of GCC. ;; This file is part of GCC.
...@@ -26,7 +27,6 @@ ...@@ -26,7 +27,6 @@
(A7_REG 7) (A7_REG 7)
(A8_REG 8) (A8_REG 8)
(UNSPEC_NSAU 1)
(UNSPEC_NOP 2) (UNSPEC_NOP 2)
(UNSPEC_PLT 3) (UNSPEC_PLT 3)
(UNSPEC_RET_ADDR 4) (UNSPEC_RET_ADDR 4)
...@@ -34,6 +34,17 @@ ...@@ -34,6 +34,17 @@
(UNSPECV_ENTRY 2) (UNSPECV_ENTRY 2)
]) ])
;; This code macro allows signed and unsigned widening multiplications
;; to use the same template.
(define_code_macro any_extend [sign_extend zero_extend])
;; <u> expands to an empty string when doing a signed operation and
;; "u" when doing an unsigned operation.
(define_code_attr u [(sign_extend "") (zero_extend "u")])
;; <su> is like <u>, but the signed form expands to "s" rather than "".
(define_code_attr su [(sign_extend "s") (zero_extend "u")])
;; Attributes. ;; Attributes.
...@@ -292,6 +303,32 @@ ...@@ -292,6 +303,32 @@
;; Multiplication. ;; Multiplication.
(define_expand "<u>mulsidi3"
[(set (match_operand:DI 0 "register_operand")
(mult:DI (any_extend:DI (match_operand:SI 1 "register_operand"))
(any_extend:DI (match_operand:SI 2 "register_operand"))))]
"TARGET_MUL32_HIGH"
{
emit_insn (gen_mulsi3 (gen_lowpart (SImode, operands[0]),
operands[1], operands[2]));
emit_insn (gen_<u>mulsi3_highpart (gen_highpart (SImode, operands[0]),
operands[1], operands[2]));
DONE;
})
(define_insn "<u>mulsi3_highpart"
[(set (match_operand:SI 0 "register_operand" "=a")
(truncate:SI
(lshiftrt:DI
(mult:DI (any_extend:DI (match_operand:SI 1 "register_operand" "%r"))
(any_extend:DI (match_operand:SI 2 "register_operand" "r")))
(const_int 32))))]
"TARGET_MUL32_HIGH"
"mul<su>h\t%0, %1, %2"
[(set_attr "type" "mul32")
(set_attr "mode" "SI")
(set_attr "length" "3")])
(define_insn "mulsi3" (define_insn "mulsi3"
[(set (match_operand:SI 0 "register_operand" "=a") [(set (match_operand:SI 0 "register_operand" "=a")
(mult:SI (match_operand:SI 1 "register_operand" "%r") (mult:SI (match_operand:SI 1 "register_operand" "%r")
...@@ -541,7 +578,30 @@ ...@@ -541,7 +578,30 @@
(set_attr "length" "3")]) (set_attr "length" "3")])
;; Find first bit. ;; Count leading/trailing zeros and find first bit.
(define_insn "clzsi2"
[(set (match_operand:SI 0 "register_operand" "=a")
(clz:SI (match_operand:SI 1 "register_operand" "r")))]
"TARGET_NSA"
"nsau\t%0, %1"
[(set_attr "type" "arith")
(set_attr "mode" "SI")
(set_attr "length" "3")])
(define_expand "ctzsi2"
[(set (match_operand:SI 0 "register_operand" "")
(ctz:SI (match_operand:SI 1 "register_operand" "")))]
"TARGET_NSA"
{
rtx temp = gen_reg_rtx (SImode);
emit_insn (gen_negsi2 (temp, operands[1]));
emit_insn (gen_andsi3 (temp, temp, operands[1]));
emit_insn (gen_clzsi2 (temp, temp));
emit_insn (gen_negsi2 (temp, temp));
emit_insn (gen_addsi3 (operands[0], temp, GEN_INT (31)));
DONE;
})
(define_expand "ffssi2" (define_expand "ffssi2"
[(set (match_operand:SI 0 "register_operand" "") [(set (match_operand:SI 0 "register_operand" "")
...@@ -551,22 +611,12 @@ ...@@ -551,22 +611,12 @@
rtx temp = gen_reg_rtx (SImode); rtx temp = gen_reg_rtx (SImode);
emit_insn (gen_negsi2 (temp, operands[1])); emit_insn (gen_negsi2 (temp, operands[1]));
emit_insn (gen_andsi3 (temp, temp, operands[1])); emit_insn (gen_andsi3 (temp, temp, operands[1]));
emit_insn (gen_nsau (temp, temp)); emit_insn (gen_clzsi2 (temp, temp));
emit_insn (gen_negsi2 (temp, temp)); emit_insn (gen_negsi2 (temp, temp));
emit_insn (gen_addsi3 (operands[0], temp, GEN_INT (32))); emit_insn (gen_addsi3 (operands[0], temp, GEN_INT (32)));
DONE; DONE;
}) })
;; There is no RTL operator corresponding to NSAU.
(define_insn "nsau"
[(set (match_operand:SI 0 "register_operand" "=a")
(unspec:SI [(match_operand:SI 1 "register_operand" "r")] UNSPEC_NSAU))]
"TARGET_NSA"
"nsau\t%0, %1"
[(set_attr "type" "arith")
(set_attr "mode" "SI")
(set_attr "length" "3")])
;; Negation and one's complement. ;; Negation and one's complement.
......
...@@ -1189,6 +1189,23 @@ UDItype __umulsidi3 (USItype, USItype); ...@@ -1189,6 +1189,23 @@ UDItype __umulsidi3 (USItype, USItype);
} while (0) } while (0)
#endif /* __vax__ */ #endif /* __vax__ */
#if defined (__xtensa__) && W_TYPE_SIZE == 32
/* This code is not Xtensa-configuration-specific, so rely on the compiler
to expand builtin functions depending on what configuration features
are available. This avoids library calls when the operation can be
performed in-line. */
#define umul_ppmm(w1, w0, u, v) \
do { \
DWunion __w; \
__w.ll = __builtin_umulsidi3 (u, v); \
w1 = __w.s.high; \
w0 = __w.s.low; \
} while (0)
#define __umulsidi3(u, v) __builtin_umulsidi3 (u, v)
#define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X))
#define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X))
#endif /* __xtensa__ */
#if defined (__z8000__) && W_TYPE_SIZE == 16 #if defined (__z8000__) && W_TYPE_SIZE == 16
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \ #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
__asm__ ("add %H1,%H5\n\tadc %H0,%H3" \ __asm__ ("add %H1,%H5\n\tadc %H0,%H3" \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment