Commit d403b8d4 by Matthew Wahab Committed by Matthew Wahab

[PATCH 8/17][ARM] Add VFP FP16 arithmetic instructions.

gcc/
2016-09-23  Matthew Wahab  <matthew.wahab@arm.com>

	* config/arm/iterators.md (Code iterators): Fix some white-space
	in the comments.
	(GLTE): New.
	(ABSNEG): New
	(FCVT): Moved from vfp.md.
	(VCVT_HF_US_N): New.
	(VCVT_SI_US_N): New.
	(VCVT_HF_US): New.
	(VCVTH_US): New.
	(FP16_RND): New.
	(absneg_str): New.
	(FCVTI32typename): Moved from vfp.md.
	(sup): Add UNSPEC_VCVTA_S, UNSPEC_VCVTA_U, UNSPEC_VCVTM_S,
	UNSPEC_VCVTM_U, UNSPEC_VCVTN_S, UNSPEC_VCVTN_U, UNSPEC_VCVTP_S,
	UNSPEC_VCVTP_U, UNSPEC_VCVT_HF_S_N, UNSPEC_VCVT_HF_U_N,
	UNSPEC_VCVT_SI_S_N, UNSPEC_VCVT_SI_U_N,  UNSPEC_VCVTH_S_N,
	UNSPEC_VCVTH_U_N, UNSPEC_VCVTH_S and UNSPEC_VCVTH_U.
	(vcvth_op): New.
	(fp16_rnd_str): New.
	(fp16_rnd_insn): New.
	* config/arm/unspecs.md (UNSPEC_VCVT_HF_S_N): New.
	(UNSPEC_VCVT_HF_U_N): New.
	(UNSPEC_VCVT_SI_S_N): New.
	(UNSPEC_VCVT_SI_U_N): New.
	(UNSPEC_VCVTH_S): New.
	(UNSPEC_VCVTH_U): New.
	(UNSPEC_VCVTA_S): New.
	(UNSPEC_VCVTA_U): New.
	(UNSPEC_VCVTM_S): New.
	(UNSPEC_VCVTM_U): New.
	(UNSPEC_VCVTN_S): New.
	(UNSPEC_VCVTN_U): New.
	(UNSPEC_VCVTP_S): New.
	(UNSPEC_VCVTP_U): New.
	(UNSPEC_VCVTP_S): New.
	(UNSPEC_VCVTP_U): New.
	(UNSPEC_VRND): New.
	(UNSPEC_VRNDA): New.
	(UNSPEC_VRNDI): New.
	(UNSPEC_VRNDM): New.
	(UNSPEC_VRNDN): New.
	(UNSPEC_VRNDP): New.
	(UNSPEC_VRNDX): New.
	* config/arm/vfp.md (<absneg_str>hf2): New.
	(neon_vabshf): New.
	(neon_v<fp16_rnd_str>hf): New.
	(neon_vrndihf): New.
	(addhf3): New.
	(subhf3): New.
	(divhf3): New.
	(mulhf3): New.
	(*mulsf3neghf_vfp): New.
	(*negmulhf3_vfp): New.
	(*mulsf3addhf_vfp): New.
	(*mulhf3subhf_vfp): New.
	(*mulhf3neghfaddhf_vfp): New.
	(*mulhf3neghfsubhf_vfp): New.
	(fmahf4): New.
	(neon_vfmahf): New.
	(fmsubhf4_fp16): New.
	(neon_vfmshf): New.
	(*fnmsubhf4): New.
	(*fnmaddhf4): New.
	(neon_vsqrthf): New.
	(neon_vrsqrtshf): New.
	(FCVT): Move to iterators.md.
	(FCVTI32typename): Likewise.
	(neon_vcvth<sup>hf): New.
	(neon_vcvth<sup>si): New.
	(neon_vcvth<sup>_nhf_unspec): New.
	(neon_vcvth<sup>_nhf): New.
	(neon_vcvth<sup>_nsi_unspec): New.
	(neon_vcvth<sup>_nsi): New.
	(neon_vcvt<vcvth_op>h<sup>si): New.
	(neon_<fmaxmin_op>hf): New.

testsuite/
2016-09-23  Matthew Wahab  <matthew.wahab@arm.com>

	* gcc.target/arm/armv8_2-fp16-arith-1.c: New.
	* gcc.target/arm/armv8_2-fp16-conv-1.c: New.

From-SVN: r240411
parent e2080e79
2016-09-23 Matthew Wahab <matthew.wahab@arm.com>
* config/arm/iterators.md (Code iterators): Fix some white-space
in the comments.
(GLTE): New.
(ABSNEG): New
(FCVT): Moved from vfp.md.
(VCVT_HF_US_N): New.
(VCVT_SI_US_N): New.
(VCVT_HF_US): New.
(VCVTH_US): New.
(FP16_RND): New.
(absneg_str): New.
(FCVTI32typename): Moved from vfp.md.
(sup): Add UNSPEC_VCVTA_S, UNSPEC_VCVTA_U, UNSPEC_VCVTM_S,
UNSPEC_VCVTM_U, UNSPEC_VCVTN_S, UNSPEC_VCVTN_U, UNSPEC_VCVTP_S,
UNSPEC_VCVTP_U, UNSPEC_VCVT_HF_S_N, UNSPEC_VCVT_HF_U_N,
UNSPEC_VCVT_SI_S_N, UNSPEC_VCVT_SI_U_N, UNSPEC_VCVTH_S_N,
UNSPEC_VCVTH_U_N, UNSPEC_VCVTH_S and UNSPEC_VCVTH_U.
(vcvth_op): New.
(fp16_rnd_str): New.
(fp16_rnd_insn): New.
* config/arm/unspecs.md (UNSPEC_VCVT_HF_S_N): New.
(UNSPEC_VCVT_HF_U_N): New.
(UNSPEC_VCVT_SI_S_N): New.
(UNSPEC_VCVT_SI_U_N): New.
(UNSPEC_VCVTH_S): New.
(UNSPEC_VCVTH_U): New.
(UNSPEC_VCVTA_S): New.
(UNSPEC_VCVTA_U): New.
(UNSPEC_VCVTM_S): New.
(UNSPEC_VCVTM_U): New.
(UNSPEC_VCVTN_S): New.
(UNSPEC_VCVTN_U): New.
(UNSPEC_VCVTP_S): New.
(UNSPEC_VCVTP_U): New.
(UNSPEC_VCVTP_S): New.
(UNSPEC_VCVTP_U): New.
(UNSPEC_VRND): New.
(UNSPEC_VRNDA): New.
(UNSPEC_VRNDI): New.
(UNSPEC_VRNDM): New.
(UNSPEC_VRNDN): New.
(UNSPEC_VRNDP): New.
(UNSPEC_VRNDX): New.
* config/arm/vfp.md (<absneg_str>hf2): New.
(neon_vabshf): New.
(neon_v<fp16_rnd_str>hf): New.
(neon_vrndihf): New.
(addhf3): New.
(subhf3): New.
(divhf3): New.
(mulhf3): New.
(*mulsf3neghf_vfp): New.
(*negmulhf3_vfp): New.
(*mulsf3addhf_vfp): New.
(*mulhf3subhf_vfp): New.
(*mulhf3neghfaddhf_vfp): New.
(*mulhf3neghfsubhf_vfp): New.
(fmahf4): New.
(neon_vfmahf): New.
(fmsubhf4_fp16): New.
(neon_vfmshf): New.
(*fnmsubhf4): New.
(*fnmaddhf4): New.
(neon_vsqrthf): New.
(neon_vrsqrtshf): New.
(FCVT): Move to iterators.md.
(FCVTI32typename): Likewise.
(neon_vcvth<sup>hf): New.
(neon_vcvth<sup>si): New.
(neon_vcvth<sup>_nhf_unspec): New.
(neon_vcvth<sup>_nhf): New.
(neon_vcvth<sup>_nsi_unspec): New.
(neon_vcvth<sup>_nsi): New.
(neon_vcvt<vcvth_op>h<sup>si): New.
(neon_<fmaxmin_op>hf): New.
2016-09-23 Dominik Vogt <vogt@linux.vnet.ibm.com> 2016-09-23 Dominik Vogt <vogt@linux.vnet.ibm.com>
* config/s390/s390.md (bitoff, bitoff_plus): Neq mode attributes. * config/s390/s390.md (bitoff, bitoff_plus): Neq mode attributes.
......
...@@ -199,14 +199,17 @@ ...@@ -199,14 +199,17 @@
;; Code iterators ;; Code iterators
;;---------------------------------------------------------------------------- ;;----------------------------------------------------------------------------
;; A list of condition codes used in compare instructions where ;; A list of condition codes used in compare instructions where
;; the carry flag from the addition is used instead of doing the ;; the carry flag from the addition is used instead of doing the
;; compare a second time. ;; compare a second time.
(define_code_iterator LTUGEU [ltu geu]) (define_code_iterator LTUGEU [ltu geu])
;; The signed gt, ge comparisons ;; The signed gt, ge comparisons
(define_code_iterator GTGE [gt ge]) (define_code_iterator GTGE [gt ge])
;; The signed gt, ge, lt, le comparisons
(define_code_iterator GLTE [gt ge lt le])
;; The unsigned gt, ge comparisons ;; The unsigned gt, ge comparisons
(define_code_iterator GTUGEU [gtu geu]) (define_code_iterator GTUGEU [gtu geu])
...@@ -235,6 +238,12 @@ ...@@ -235,6 +238,12 @@
;; Binary operators whose second operand can be shifted. ;; Binary operators whose second operand can be shifted.
(define_code_iterator SHIFTABLE_OPS [plus minus ior xor and]) (define_code_iterator SHIFTABLE_OPS [plus minus ior xor and])
;; Operations on the sign of a number.
(define_code_iterator ABSNEG [abs neg])
;; Conversions.
(define_code_iterator FCVT [unsigned_float float])
;; plus and minus are the only SHIFTABLE_OPS for which Thumb2 allows ;; plus and minus are the only SHIFTABLE_OPS for which Thumb2 allows
;; a stack pointer opoerand. The minus operation is a candidate for an rsub ;; a stack pointer opoerand. The minus operation is a candidate for an rsub
;; and hence only plus is supported. ;; and hence only plus is supported.
...@@ -330,6 +339,22 @@ ...@@ -330,6 +339,22 @@
(define_int_iterator VCVT_US_N [UNSPEC_VCVT_S_N UNSPEC_VCVT_U_N]) (define_int_iterator VCVT_US_N [UNSPEC_VCVT_S_N UNSPEC_VCVT_U_N])
(define_int_iterator VCVT_HF_US_N [UNSPEC_VCVT_HF_S_N UNSPEC_VCVT_HF_U_N])
(define_int_iterator VCVT_SI_US_N [UNSPEC_VCVT_SI_S_N UNSPEC_VCVT_SI_U_N])
(define_int_iterator VCVT_HF_US [UNSPEC_VCVTA_S UNSPEC_VCVTA_U
UNSPEC_VCVTM_S UNSPEC_VCVTM_U
UNSPEC_VCVTN_S UNSPEC_VCVTN_U
UNSPEC_VCVTP_S UNSPEC_VCVTP_U])
(define_int_iterator VCVTH_US [UNSPEC_VCVTH_S UNSPEC_VCVTH_U])
;; Operators for FP16 instructions.
(define_int_iterator FP16_RND [UNSPEC_VRND UNSPEC_VRNDA
UNSPEC_VRNDM UNSPEC_VRNDN
UNSPEC_VRNDP UNSPEC_VRNDX])
(define_int_iterator VQMOVN [UNSPEC_VQMOVN_S UNSPEC_VQMOVN_U]) (define_int_iterator VQMOVN [UNSPEC_VQMOVN_S UNSPEC_VQMOVN_U])
(define_int_iterator VMOVL [UNSPEC_VMOVL_S UNSPEC_VMOVL_U]) (define_int_iterator VMOVL [UNSPEC_VMOVL_S UNSPEC_VMOVL_U])
...@@ -687,6 +712,12 @@ ...@@ -687,6 +712,12 @@
(define_code_attr shift [(ashiftrt "ashr") (lshiftrt "lshr")]) (define_code_attr shift [(ashiftrt "ashr") (lshiftrt "lshr")])
(define_code_attr shifttype [(ashiftrt "signed") (lshiftrt "unsigned")]) (define_code_attr shifttype [(ashiftrt "signed") (lshiftrt "unsigned")])
;; String reprentations of operations on the sign of a number.
(define_code_attr absneg_str [(abs "abs") (neg "neg")])
;; Conversions.
(define_code_attr FCVTI32typename [(unsigned_float "u32") (float "s32")])
;;---------------------------------------------------------------------------- ;;----------------------------------------------------------------------------
;; Int attributes ;; Int attributes
;;---------------------------------------------------------------------------- ;;----------------------------------------------------------------------------
...@@ -718,7 +749,13 @@ ...@@ -718,7 +749,13 @@
(UNSPEC_VPMAX "s") (UNSPEC_VPMAX_U "u") (UNSPEC_VPMAX "s") (UNSPEC_VPMAX_U "u")
(UNSPEC_VPMIN "s") (UNSPEC_VPMIN_U "u") (UNSPEC_VPMIN "s") (UNSPEC_VPMIN_U "u")
(UNSPEC_VCVT_S "s") (UNSPEC_VCVT_U "u") (UNSPEC_VCVT_S "s") (UNSPEC_VCVT_U "u")
(UNSPEC_VCVTA_S "s") (UNSPEC_VCVTA_U "u")
(UNSPEC_VCVTM_S "s") (UNSPEC_VCVTM_U "u")
(UNSPEC_VCVTN_S "s") (UNSPEC_VCVTN_U "u")
(UNSPEC_VCVTP_S "s") (UNSPEC_VCVTP_U "u")
(UNSPEC_VCVT_S_N "s") (UNSPEC_VCVT_U_N "u") (UNSPEC_VCVT_S_N "s") (UNSPEC_VCVT_U_N "u")
(UNSPEC_VCVT_HF_S_N "s") (UNSPEC_VCVT_HF_U_N "u")
(UNSPEC_VCVT_SI_S_N "s") (UNSPEC_VCVT_SI_U_N "u")
(UNSPEC_VQMOVN_S "s") (UNSPEC_VQMOVN_U "u") (UNSPEC_VQMOVN_S "s") (UNSPEC_VQMOVN_U "u")
(UNSPEC_VMOVL_S "s") (UNSPEC_VMOVL_U "u") (UNSPEC_VMOVL_S "s") (UNSPEC_VMOVL_U "u")
(UNSPEC_VSHL_S "s") (UNSPEC_VSHL_U "u") (UNSPEC_VSHL_S "s") (UNSPEC_VSHL_U "u")
...@@ -733,9 +770,25 @@ ...@@ -733,9 +770,25 @@
(UNSPEC_VSHLL_S_N "s") (UNSPEC_VSHLL_U_N "u") (UNSPEC_VSHLL_S_N "s") (UNSPEC_VSHLL_U_N "u")
(UNSPEC_VSRA_S_N "s") (UNSPEC_VSRA_U_N "u") (UNSPEC_VSRA_S_N "s") (UNSPEC_VSRA_U_N "u")
(UNSPEC_VRSRA_S_N "s") (UNSPEC_VRSRA_U_N "u") (UNSPEC_VRSRA_S_N "s") (UNSPEC_VRSRA_U_N "u")
(UNSPEC_VCVTH_S "s") (UNSPEC_VCVTH_U "u")
]) ])
(define_int_attr vcvth_op
[(UNSPEC_VCVTA_S "a") (UNSPEC_VCVTA_U "a")
(UNSPEC_VCVTM_S "m") (UNSPEC_VCVTM_U "m")
(UNSPEC_VCVTN_S "n") (UNSPEC_VCVTN_U "n")
(UNSPEC_VCVTP_S "p") (UNSPEC_VCVTP_U "p")])
(define_int_attr fp16_rnd_str
[(UNSPEC_VRND "rnd") (UNSPEC_VRNDA "rnda")
(UNSPEC_VRNDM "rndm") (UNSPEC_VRNDN "rndn")
(UNSPEC_VRNDP "rndp") (UNSPEC_VRNDX "rndx")])
(define_int_attr fp16_rnd_insn
[(UNSPEC_VRND "vrintz") (UNSPEC_VRNDA "vrinta")
(UNSPEC_VRNDM "vrintm") (UNSPEC_VRNDN "vrintn")
(UNSPEC_VRNDP "vrintp") (UNSPEC_VRNDX "vrintx")])
(define_int_attr cmp_op_unsp [(UNSPEC_VCEQ "eq") (UNSPEC_VCGT "gt") (define_int_attr cmp_op_unsp [(UNSPEC_VCEQ "eq") (UNSPEC_VCGT "gt")
(UNSPEC_VCGE "ge") (UNSPEC_VCLE "le") (UNSPEC_VCGE "ge") (UNSPEC_VCLE "le")
(UNSPEC_VCLT "lt") (UNSPEC_VCAGE "ge") (UNSPEC_VCLT "lt") (UNSPEC_VCAGE "ge")
......
...@@ -203,6 +203,20 @@ ...@@ -203,6 +203,20 @@
UNSPEC_VCVT_U UNSPEC_VCVT_U
UNSPEC_VCVT_S_N UNSPEC_VCVT_S_N
UNSPEC_VCVT_U_N UNSPEC_VCVT_U_N
UNSPEC_VCVT_HF_S_N
UNSPEC_VCVT_HF_U_N
UNSPEC_VCVT_SI_S_N
UNSPEC_VCVT_SI_U_N
UNSPEC_VCVTH_S
UNSPEC_VCVTH_U
UNSPEC_VCVTA_S
UNSPEC_VCVTA_U
UNSPEC_VCVTM_S
UNSPEC_VCVTM_U
UNSPEC_VCVTN_S
UNSPEC_VCVTN_U
UNSPEC_VCVTP_S
UNSPEC_VCVTP_U
UNSPEC_VEXT UNSPEC_VEXT
UNSPEC_VHADD_S UNSPEC_VHADD_S
UNSPEC_VHADD_U UNSPEC_VHADD_U
...@@ -365,5 +379,12 @@ ...@@ -365,5 +379,12 @@
UNSPEC_NVRINTN UNSPEC_NVRINTN
UNSPEC_VQRDMLAH UNSPEC_VQRDMLAH
UNSPEC_VQRDMLSH UNSPEC_VQRDMLSH
UNSPEC_VRND
UNSPEC_VRNDA
UNSPEC_VRNDI
UNSPEC_VRNDM
UNSPEC_VRNDN
UNSPEC_VRNDP
UNSPEC_VRNDX
]) ])
2016-09-23 Matthew Wahab <matthew.wahab@arm.com>
* gcc.target/arm/armv8_2-fp16-arith-1.c: New.
* gcc.target/arm/armv8_2-fp16-conv-1.c: New.
2016-09-23 Dominik Vogt <vogt@linux.vnet.ibm.com> 2016-09-23 Dominik Vogt <vogt@linux.vnet.ibm.com>
* gcc.target/s390/md/rXsbg_mode_sXl.c: Adapt expected assembly * gcc.target/s390/md/rXsbg_mode_sXl.c: Adapt expected assembly
......
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_2a_fp16_scalar_ok } */
/* { dg-options "-O2 -ffast-math" } */
/* { dg-add-options arm_v8_2a_fp16_scalar } */
/* Test instructions generated for half-precision arithmetic. */
typedef __fp16 float16_t;
typedef __simd64_float16_t float16x4_t;
typedef __simd128_float16_t float16x8_t;
float16_t
fp16_abs (float16_t a)
{
return (a < 0) ? -a : a;
}
#define TEST_UNOP(NAME, OPERATOR, TY) \
TY test_##NAME##_##TY (TY a) \
{ \
return OPERATOR (a); \
}
#define TEST_BINOP(NAME, OPERATOR, TY) \
TY test_##NAME##_##TY (TY a, TY b) \
{ \
return a OPERATOR b; \
}
#define TEST_CMP(NAME, OPERATOR, RTY, TY) \
RTY test_##NAME##_##TY (TY a, TY b) \
{ \
return a OPERATOR b; \
}
/* Scalars. */
TEST_UNOP (neg, -, float16_t)
TEST_UNOP (abs, fp16_abs, float16_t)
TEST_BINOP (add, +, float16_t)
TEST_BINOP (sub, -, float16_t)
TEST_BINOP (mult, *, float16_t)
TEST_BINOP (div, /, float16_t)
TEST_CMP (equal, ==, int, float16_t)
TEST_CMP (unequal, !=, int, float16_t)
TEST_CMP (lessthan, <, int, float16_t)
TEST_CMP (greaterthan, >, int, float16_t)
TEST_CMP (lessthanequal, <=, int, float16_t)
TEST_CMP (greaterthanqual, >=, int, float16_t)
/* { dg-final { scan-assembler-times {vneg\.f16\ts[0-9]+, s[0-9]+} 1 } } */
/* { dg-final { scan-assembler-times {vabs\.f16\ts[0-9]+, s[0-9]+} 2 } } */
/* { dg-final { scan-assembler-times {vadd\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 1 } } */
/* { dg-final { scan-assembler-times {vsub\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 1 } } */
/* { dg-final { scan-assembler-times {vmul\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 1 } } */
/* { dg-final { scan-assembler-times {vdiv\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 1 } } */
/* { dg-final { scan-assembler-times {vcmp\.f32\ts[0-9]+, s[0-9]+} 2 } } */
/* { dg-final { scan-assembler-times {vcmpe\.f32\ts[0-9]+, s[0-9]+} 4 } } */
/* { dg-final { scan-assembler-not {vadd\.f32} } } */
/* { dg-final { scan-assembler-not {vsub\.f32} } } */
/* { dg-final { scan-assembler-not {vmul\.f32} } } */
/* { dg-final { scan-assembler-not {vdiv\.f32} } } */
/* { dg-final { scan-assembler-not {vcmp\.f16} } } */
/* { dg-final { scan-assembler-not {vcmpe\.f16} } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_2a_fp16_scalar_ok } */
/* { dg-options "-O2" } */
/* { dg-add-options arm_v8_2a_fp16_scalar } */
/* Test ARMv8.2 FP16 conversions. */
#include <arm_fp16.h>
float
f16_to_f32 (__fp16 a)
{
return (float)a;
}
float
f16_to_pf32 (__fp16* a)
{
return (float)*a;
}
short
f16_to_s16 (__fp16 a)
{
return (short)a;
}
short
pf16_to_s16 (__fp16* a)
{
return (short)*a;
}
/* { dg-final { scan-assembler-times {vcvtb\.f32\.f16\ts[0-9]+, s[0-9]+} 4 } } */
__fp16
f32_to_f16 (float a)
{
return (__fp16)a;
}
void
f32_to_pf16 (__fp16* x, float a)
{
*x = (__fp16)a;
}
__fp16
s16_to_f16 (short a)
{
return (__fp16)a;
}
void
s16_to_pf16 (__fp16* x, short a)
{
*x = (__fp16)a;
}
/* { dg-final { scan-assembler-times {vcvtb\.f16\.f32\ts[0-9]+, s[0-9]+} 4 } } */
float
s16_to_f32 (short a)
{
return (float)a;
}
/* { dg-final { scan-assembler-times {vcvt\.f32\.s32\ts[0-9]+, s[0-9]+} 3 } } */
short
f32_to_s16 (float a)
{
return (short)a;
}
/* { dg-final { scan-assembler-times {vcvt\.s32\.f32\ts[0-9]+, s[0-9]+} 3 } } */
unsigned short
f32_to_u16 (float a)
{
return (unsigned short)a;
}
/* { dg-final { scan-assembler-times {vcvt\.u32\.f32\ts[0-9]+, s[0-9]+} 1 } } */
short
f64_to_s16 (double a)
{
return (short)a;
}
/* { dg-final { scan-assembler-times {vcvt\.s32\.f64\ts[0-9]+, d[0-9]+} 1 } } */
unsigned short
f64_to_u16 (double a)
{
return (unsigned short)a;
}
/* { dg-final { scan-assembler-times {vcvt\.s32\.f64\ts[0-9]+, d[0-9]+} 1 } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment