Commit e9e67af1 by Kyrylo Tkachov Committed by Kyrylo Tkachov

[2/2] Vectorise lroundf, lfloorf, lceilf using the new ARMv8-A vcvt* instructions.

	PR target/62275
	* config/arm/neon.md
	(neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode>
	<v_cmp_result>): New pattern.
	* config/arm/iterators.md (NEON_VCVT): New int iterator.
	* config/arm/arm_neon_builtins.def (vcvtav2sf, vcvtav4sf, vcvtauv2sf,
	vcvtauv4sf, vcvtpv2sf, vcvtpv4sf, vcvtpuv2sf, vcvtpuv4sf, vcvtmv2sf,
	vcvtmv4sf, vcvtmuv2sf, vcvtmuv4sf): New builtin definitions.
	* config/arm/arm.c (arm_builtin_vectorized_function): Handle
	BUILT_IN_LROUNDF, BUILT_IN_LFLOORF, BUILT_IN_LCEILF.

	PR target/62275
	* gcc.target/arm/vect-lceilf_1.c: New test.
	* gcc.target/arm/vect-lfloorf_1.c: Likewise.
	* gcc.target/arm/vect-lroundf_1.c: Likewise.

From-SVN: r214826
parent ababd936
2014-09-02 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
PR target/62275
* config/arm/neon.md
(neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode>
<v_cmp_result>): New pattern.
* config/arm/iterators.md (NEON_VCVT): New int iterator.
* config/arm/arm_neon_builtins.def (vcvtav2sf, vcvtav4sf, vcvtauv2sf,
vcvtauv4sf, vcvtpv2sf, vcvtpv4sf, vcvtpuv2sf, vcvtpuv4sf, vcvtmv2sf,
vcvtmv4sf, vcvtmuv2sf, vcvtmuv4sf): New builtin definitions.
* config/arm/arm.c (arm_builtin_vectorized_function): Handle
BUILT_IN_LROUNDF, BUILT_IN_LFLOORF, BUILT_IN_LCEILF.
2014-09-02 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
PR target/62275
* config/arm/iterators.md (FIXUORS): New code iterator.
(VCVT): New int iterator.
(su_optab): New code attribute.
......@@ -29946,6 +29946,7 @@ arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
{
enum machine_mode in_mode, out_mode;
int in_n, out_n;
bool out_unsigned_p = TYPE_UNSIGNED (type_out);
if (TREE_CODE (type_out) != VECTOR_TYPE
|| TREE_CODE (type_in) != VECTOR_TYPE)
......@@ -29991,6 +29992,36 @@ arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
return ARM_FIND_VRINT_VARIANT (vrintz);
case BUILT_IN_ROUNDF:
return ARM_FIND_VRINT_VARIANT (vrinta);
#undef ARM_CHECK_BUILTIN_MODE_1
#define ARM_CHECK_BUILTIN_MODE_1(C) \
(out_mode == SImode && out_n == C \
&& in_mode == SFmode && in_n == C)
#define ARM_FIND_VCVT_VARIANT(N) \
(ARM_CHECK_BUILTIN_MODE (2) \
? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sfv2si, false) \
: (ARM_CHECK_BUILTIN_MODE (4) \
? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sfv4si, false) \
: NULL_TREE))
#define ARM_FIND_VCVTU_VARIANT(N) \
(ARM_CHECK_BUILTIN_MODE (2) \
? arm_builtin_decl(ARM_BUILTIN_NEON_##N##uv2sfv2si, false) \
: (ARM_CHECK_BUILTIN_MODE (4) \
? arm_builtin_decl(ARM_BUILTIN_NEON_##N##uv4sfv4si, false) \
: NULL_TREE))
case BUILT_IN_LROUNDF:
return out_unsigned_p
? ARM_FIND_VCVTU_VARIANT (vcvta)
: ARM_FIND_VCVT_VARIANT (vcvta);
case BUILT_IN_LCEILF:
return out_unsigned_p
? ARM_FIND_VCVTU_VARIANT (vcvtp)
: ARM_FIND_VCVT_VARIANT (vcvtp);
case BUILT_IN_LFLOORF:
return out_unsigned_p
? ARM_FIND_VCVTU_VARIANT (vcvtm)
: ARM_FIND_VCVT_VARIANT (vcvtm);
#undef ARM_CHECK_BUILTIN_MODE
#define ARM_CHECK_BUILTIN_MODE(C, N) \
(out_mode == N##Imode && out_n == C \
......@@ -30021,9 +30052,12 @@ arm_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
}
return NULL_TREE;
}
#undef ARM_FIND_VCVT_VARIANT
#undef ARM_FIND_VCVTU_VARIANT
#undef ARM_CHECK_BUILTIN_MODE
#undef ARM_FIND_VRINT_VARIANT
/* The AAPCS sets the maximum alignment of a vector to 64 bits. */
static HOST_WIDE_INT
arm_vector_alignment (const_tree type)
......
......@@ -141,6 +141,18 @@ VAR2 (RINT, vrintp, v2sf, v4sf),
VAR2 (RINT, vrintm, v2sf, v4sf),
VAR2 (RINT, vrintz, v2sf, v4sf),
VAR2 (RINT, vrintx, v2sf, v4sf),
VAR1 (RINT, vcvtav2sf, v2si),
VAR1 (RINT, vcvtav4sf, v4si),
VAR1 (RINT, vcvtauv2sf, v2si),
VAR1 (RINT, vcvtauv4sf, v4si),
VAR1 (RINT, vcvtpv2sf, v2si),
VAR1 (RINT, vcvtpv4sf, v4si),
VAR1 (RINT, vcvtpuv2sf, v2si),
VAR1 (RINT, vcvtpuv4sf, v4si),
VAR1 (RINT, vcvtmv2sf, v2si),
VAR1 (RINT, vcvtmv4sf, v4si),
VAR1 (RINT, vcvtmuv2sf, v2si),
VAR1 (RINT, vcvtmuv4sf, v4si),
VAR1 (VTBL, vtbl1, v8qi),
VAR1 (VTBL, vtbl2, v8qi),
VAR1 (VTBL, vtbl3, v8qi),
......
......@@ -223,6 +223,8 @@
(define_int_iterator NEON_VRINT [UNSPEC_NVRINTP UNSPEC_NVRINTZ UNSPEC_NVRINTM
UNSPEC_NVRINTX UNSPEC_NVRINTA UNSPEC_NVRINTN])
(define_int_iterator NEON_VCVT [UNSPEC_NVRINTP UNSPEC_NVRINTM UNSPEC_NVRINTA])
(define_int_iterator CRC [UNSPEC_CRC32B UNSPEC_CRC32H UNSPEC_CRC32W
UNSPEC_CRC32CB UNSPEC_CRC32CH UNSPEC_CRC32CW])
......
......@@ -629,6 +629,17 @@
[(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
)
(define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
[(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
(FIXUORS:<V_cmp_result> (unspec:VCVTF
[(match_operand:VCVTF 1 "register_operand" "w")]
NEON_VCVT)))]
"TARGET_NEON && TARGET_FPU_ARMV8"
"vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1"
[(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")
(set_attr "predicable" "no")]
)
(define_insn "ior<mode>3"
[(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
(ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
......
2014-09-02 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
PR target/62275
* gcc.target/arm/vect-lceilf_1.c: New test.
* gcc.target/arm/vect-lfloorf_1.c: Likewise.
* gcc.target/arm/vect-lroundf_1.c: Likewise.
2014-09-02 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
PR target/62275
* gcc.target/arm/lceil-vcvt_1.c: New test.
* gcc.target/arm/lfloor-vcvt_1.c: Likewise.
* gcc.target/arm/lround-vcvt_1.c: Likewise.
......
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_neon_ok } */
/* { dg-options "-O2 -ffast-math -ftree-vectorize -fdump-tree-vect-all" } */
/* { dg-add-options arm_v8_neon } */
#define N 32
void
foo (int *output, float *input)
{
int i = 0;
/* Vectorizable. */
for (i = 0; i < N; i++)
output[i] = __builtin_lceilf (input[i]);
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_neon_ok } */
/* { dg-options "-O2 -ffast-math -ftree-vectorize -fdump-tree-vect-all" } */
/* { dg-add-options arm_v8_neon } */
#define N 32
void
foo (int *output, float *input)
{
int i = 0;
/* Vectorizable. */
for (i = 0; i < N; i++)
output[i] = __builtin_lfloorf (input[i]);
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_neon_ok } */
/* { dg-options "-O2 -ffast-math -ftree-vectorize -fdump-tree-vect-all" } */
/* { dg-add-options arm_v8_neon } */
#define N 32
void
foo (int *output, float *input)
{
int i = 0;
/* Vectorizable. */
for (i = 0; i < N; i++)
output[i] = __builtin_lroundf (input[i]);
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment