Commit 0ac198d3 by James Greenhalgh Committed by James Greenhalgh

[AArch64] Implement TARGET_GIMPLE_FOLD_BUILTIN for aarch64 backend.

gcc/
	* config/aarch64/aarch64-builtins.c
	(aarch64_gimple_fold_builtin): New.
	* config/aarch64/aarch64-protos.h (aarch64_gimple_fold_builtin): New.
	* config/aarch64/aarch64-simd-builtins.def (addv): New.
	* config/aarch64/aarch64-simd.md (addpv4sf): New.
	(addvv4sf): Update.
	* config/aarch64/aarch64.c (TARGET_GIMPLE_FOLD_BUILTIN): Define.

gcc/testsuite/
	* gcc.target/aarch64/vaddv-intrinsic.c: New.
	* gcc.target/aarch64/vaddv-intrinsic-compile.c: Likewise.
	* gcc.target/aarch64/vaddv-intrinsic.x: Likewise.



Co-Authored-By: Tejas Belagod <tejas.belagod@arm.com>

From-SVN: r198304
parent 58cff58c
2013-04-25 James Greenhalgh <james.greenhalgh@arm.com>
Tejas Belagod <tejas.belagod@arm.com>
* config/aarch64/aarch64-builtins.c
(aarch64_gimple_fold_builtin): New.
* config/aarch64/aarch64-protos.h (aarch64_gimple_fold_builtin): New.
* config/aarch64/aarch64-simd-builtins.def (addv): New.
* config/aarch64/aarch64-simd.md (addpv4sf): New.
(addvv4sf): Update.
* config/aarch64/aarch64.c (TARGET_GIMPLE_FOLD_BUILTIN): Define.
2013-04-25 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com> 2013-04-25 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
* config/aarch64/aarch64.md * config/aarch64/aarch64.md
......
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include "langhooks.h" #include "langhooks.h"
#include "diagnostic-core.h" #include "diagnostic-core.h"
#include "optabs.h" #include "optabs.h"
#include "gimple.h"
enum aarch64_simd_builtin_type_mode enum aarch64_simd_builtin_type_mode
{ {
...@@ -1254,6 +1255,54 @@ aarch64_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in) ...@@ -1254,6 +1255,54 @@ aarch64_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
return NULL_TREE; return NULL_TREE;
} }
#undef VAR1
#define VAR1(T, N, MAP, A) \
case AARCH64_SIMD_BUILTIN_##N##A:
bool
aarch64_gimple_fold_builtin (gimple_stmt_iterator *gsi)
{
bool changed = false;
gimple stmt = gsi_stmt (*gsi);
tree call = gimple_call_fn (stmt);
tree fndecl;
gimple new_stmt = NULL;
if (call)
{
fndecl = gimple_call_fndecl (stmt);
if (fndecl)
{
int fcode = DECL_FUNCTION_CODE (fndecl);
int nargs = gimple_call_num_args (stmt);
tree *args = (nargs > 0
? gimple_call_arg_ptr (stmt, 0)
: &error_mark_node);
switch (fcode)
{
BUILTIN_VDQF (UNOP, addv, 0)
new_stmt = gimple_build_assign_with_ops (
REDUC_PLUS_EXPR,
gimple_call_lhs (stmt),
args[0],
NULL_TREE);
break;
default:
break;
}
}
}
if (new_stmt)
{
gsi_replace (gsi, new_stmt, true);
changed = true;
}
return changed;
}
#undef AARCH64_CHECK_BUILTIN_MODE #undef AARCH64_CHECK_BUILTIN_MODE
#undef AARCH64_FIND_FRINT_VARIANT #undef AARCH64_FIND_FRINT_VARIANT
#undef BUILTIN_DX #undef BUILTIN_DX
......
...@@ -140,6 +140,7 @@ bool aarch64_constant_address_p (rtx); ...@@ -140,6 +140,7 @@ bool aarch64_constant_address_p (rtx);
bool aarch64_float_const_zero_rtx_p (rtx); bool aarch64_float_const_zero_rtx_p (rtx);
bool aarch64_function_arg_regno_p (unsigned); bool aarch64_function_arg_regno_p (unsigned);
bool aarch64_gen_movmemqi (rtx *); bool aarch64_gen_movmemqi (rtx *);
bool aarch64_gimple_fold_builtin (gimple_stmt_iterator *);
bool aarch64_is_extend_from_extract (enum machine_mode, rtx, rtx); bool aarch64_is_extend_from_extract (enum machine_mode, rtx, rtx);
bool aarch64_is_long_call_p (rtx); bool aarch64_is_long_call_p (rtx);
bool aarch64_label_mentioned_p (rtx); bool aarch64_label_mentioned_p (rtx);
......
...@@ -238,6 +238,9 @@ ...@@ -238,6 +238,9 @@
BUILTIN_VDQF (BINOP, fmax, 0) BUILTIN_VDQF (BINOP, fmax, 0)
BUILTIN_VDQF (BINOP, fmin, 0) BUILTIN_VDQF (BINOP, fmin, 0)
/* Implemented by aarch64_addv<mode>. */
BUILTIN_VDQF (UNOP, addv, 0)
/* Implemented by <maxmin><mode>3. */ /* Implemented by <maxmin><mode>3. */
BUILTIN_VDQ_BHSI (BINOP, smax, 3) BUILTIN_VDQ_BHSI (BINOP, smax, 3)
BUILTIN_VDQ_BHSI (BINOP, smin, 3) BUILTIN_VDQ_BHSI (BINOP, smin, 3)
......
...@@ -1341,7 +1341,7 @@ ...@@ -1341,7 +1341,7 @@
;; FP 'across lanes' add. ;; FP 'across lanes' add.
(define_insn "aarch64_addvv4sf" (define_insn "aarch64_addpv4sf"
[(set (match_operand:V4SF 0 "register_operand" "=w") [(set (match_operand:V4SF 0 "register_operand" "=w")
(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "w")] (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "w")]
UNSPEC_FADDV))] UNSPEC_FADDV))]
...@@ -1357,8 +1357,8 @@ ...@@ -1357,8 +1357,8 @@
"TARGET_SIMD" "TARGET_SIMD"
{ {
rtx tmp = gen_reg_rtx (V4SFmode); rtx tmp = gen_reg_rtx (V4SFmode);
emit_insn (gen_aarch64_addvv4sf (tmp, operands[1])); emit_insn (gen_aarch64_addpv4sf (tmp, operands[1]));
emit_insn (gen_aarch64_addvv4sf (operands[0], tmp)); emit_insn (gen_aarch64_addpv4sf (operands[0], tmp));
DONE; DONE;
}) })
...@@ -1368,8 +1368,18 @@ ...@@ -1368,8 +1368,18 @@
"TARGET_SIMD" "TARGET_SIMD"
{ {
rtx tmp = gen_reg_rtx (V4SFmode); rtx tmp = gen_reg_rtx (V4SFmode);
emit_insn (gen_aarch64_addvv4sf (tmp, operands[1])); emit_insn (gen_aarch64_addpv4sf (tmp, operands[1]));
emit_insn (gen_aarch64_addvv4sf (operands[0], tmp)); emit_insn (gen_aarch64_addpv4sf (operands[0], tmp));
DONE;
})
(define_expand "aarch64_addvv4sf"
[(set (match_operand:V4SF 0 "register_operand" "=w")
(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "w")]
UNSPEC_FADDV))]
"TARGET_SIMD"
{
emit_insn (gen_reduc_splus_v4sf (operands[0], operands[1]));
DONE; DONE;
}) })
......
...@@ -7895,6 +7895,9 @@ aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode, ...@@ -7895,6 +7895,9 @@ aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
#undef TARGET_FRAME_POINTER_REQUIRED #undef TARGET_FRAME_POINTER_REQUIRED
#define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required #define TARGET_FRAME_POINTER_REQUIRED aarch64_frame_pointer_required
#undef TARGET_GIMPLE_FOLD_BUILTIN
#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
#undef TARGET_GIMPLIFY_VA_ARG_EXPR #undef TARGET_GIMPLIFY_VA_ARG_EXPR
#define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr #define TARGET_GIMPLIFY_VA_ARG_EXPR aarch64_gimplify_va_arg_expr
......
...@@ -19731,6 +19731,27 @@ vaddd_u64 (uint64x1_t __a, uint64x1_t __b) ...@@ -19731,6 +19731,27 @@ vaddd_u64 (uint64x1_t __a, uint64x1_t __b)
return __a + __b; return __a + __b;
} }
__extension__ static __inline float32_t __attribute__ ((__always_inline__))
vaddv_f32 (float32x2_t __a)
{
float32x2_t t = __builtin_aarch64_addvv2sf (__a);
return vget_lane_f32 (t, 0);
}
__extension__ static __inline float32_t __attribute__ ((__always_inline__))
vaddvq_f32 (float32x4_t __a)
{
float32x4_t t = __builtin_aarch64_addvv4sf (__a);
return vgetq_lane_f32 (t, 0);
}
__extension__ static __inline float64_t __attribute__ ((__always_inline__))
vaddvq_f64 (float64x2_t __a)
{
float64x2_t t = __builtin_aarch64_addvv2df (__a);
return vgetq_lane_f64 (t, 0);
}
/* vceq */ /* vceq */
__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
......
2013-04-25 James Greenhalgh <james.greenhalgh@arm.com>
Tejas Belagod <tejas.belagod@arm.com>
* gcc.target/aarch64/vaddv-intrinsic.c: New.
* gcc.target/aarch64/vaddv-intrinsic-compile.c: Likewise.
* gcc.target/aarch64/vaddv-intrinsic.x: Likewise.
2013-04-25 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com> 2013-04-25 Naveen H.S <Naveen.Hurugalawadi@caviumnetworks.com>
* gcc.target/aarch64/cmp.c: New. * gcc.target/aarch64/cmp.c: New.
......
/* { dg-do compile } */
/* { dg-options "-O3" } */
#include "arm_neon.h"
#include "vaddv-intrinsic.x"
/* { dg-final { scan-assembler "faddp\\ts\[0-9\]+"} } */
/* { dg-final { scan-assembler-times "faddp\\tv\[0-9\]+\.4s" 2} } */
/* { dg-final { scan-assembler "faddp\\td\[0-9\]+"} } */
/* { dg-do run } */
/* { dg-options "-O3" } */
#include "arm_neon.h"
extern void abort (void);
#include "vaddv-intrinsic.x"
int
main (void)
{
const float32_t pool_v2sf[] = {4.0f, 9.0f};
const float32_t pool_v4sf[] = {4.0f, 9.0f, 16.0f, 25.0f};
const float64_t pool_v2df[] = {4.0, 9.0};
if (test_vaddv_v2sf (pool_v2sf) != 13.0f)
abort ();
if (test_vaddv_v4sf (pool_v4sf) != 54.0f)
abort ();
if (test_vaddv_v2df (pool_v2df) != 13.0)
abort ();
return 0;
}
float32_t
test_vaddv_v2sf (const float32_t *pool)
{
float32x2_t val;
val = vld1_f32 (pool);
return vaddv_f32 (val);
}
float32_t
test_vaddv_v4sf (const float32_t *pool)
{
float32x4_t val;
val = vld1q_f32 (pool);
return vaddvq_f32 (val);
}
float64_t
test_vaddv_v2df (const float64_t *pool)
{
float64x2_t val;
val = vld1q_f64 (pool);
return vaddvq_f64 (val);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment