Commit 4b644867 by Alan Lawrence Committed by Alan Lawrence

[ARM] Remaining intrinsics

	
	* config/arm/arm-builtins.c (VAR11, VAR12): New.
	* config/arm/arm_neon_builtins.def (vcombine, vld2_dup, vld3_dup,
	vld4_dup): Add v4hf variant.
	(vget_high, vget_low): Add v8hf variant.
	(vld1, vst1, vst1_lane, vld2, vld2_lane, vst2, vst2_lane, vld3,
	vld3_lane, vst3, vst3_lane, vld4, vld4_lane, vst4, vst4_lane): Add
	v4hf and v8hf variants.

	* config/arm/iterators.md (VD_LANE, VD_RE, VQ2, VQ_HS): New.
	(VDX): Add V4HF.
	(V_DOUBLE): Add case for V4HF.
	(VQX): Add V8HF.
	(V_HALF): Add case for V8HF.
	(VDQX): Add V4HF, V8HF.
	(V_elem, V_two_elem, V_three_elem, V_four_elem, V_cmp_result,
	V_uf_sclr, V_sz_elem, V_mode_nunits, q): Add cases for V4HF & V8HF.

	* config/arm/neon.md (vec_set<mode>internal, vec_extract<mode>,
	neon_vget_lane<mode>_sext_internal, neon_vget_lane<mode>_zext_internal,
	vec_load_lanesoi<mode>, neon_vld2<mode>, vec_store_lanesoi<mode>,
	neon_vst2<mode>, vec_load_lanesci<mode>, neon_vld3<mode>,
	neon_vld3qa<mode>, neon_vld3qb<mode>, vec_store_lanesci<mode>,
	neon_vst3<mode>, neon_vst3qa<mode>, neon_vst3qb<mode>,
	vec_load_lanesxi<mode>, neon_vld4<mode>, neon_vld4qa<mode>,
	neon_vld4qb<mode>, vec_store_lanesxi<mode>, neon_vst4<mode>,
	neon_vst4qa<mode>, neon_vst4qb<mode>): Change VQ iterator to VQ2.

	(neon_vcreate, neon_vreinterpretv8qi<mode>,
	neon_vreinterpretv4hi<mode>, neon_vreinterpretv2si<mode>,
	neon_vreinterpretv2sf<mode>, neon_vreinterpretdi<mode>):
	Change VDX to VD_RE.

	(neon_vld2_lane<mode>, neon_vst2_lane<mode>, neon_vld3_lane<mode>,
	neon_vst3_lane<mode>, neon_vld4_lane<mode>, neon_vst4_lane<mode>):
	Change VD iterator to VD_LANE, and VMQ iterator to VQ_HS.

	* config/arm/arm_neon.h (float16x4x2_t, float16x8x2_t, float16x4x3_t,
	float16x8x3_t, float16x4x4_t, float16x8x4_t, vcombine_f16,
	vget_high_f16, vget_low_f16, vld1_f16, vld1q_f16, vst1_f16, vst1q_f16,
	vst1_lane_f16, vst1q_lane_f16, vld2_f16, vld2q_f16, vld2_lane_f16,
	vld2q_lane_f16, vld2_dup_f16, vst2_f16, vst2q_f16, vst2_lane_f16,
	vst2q_lane_f16, vld3_f16, vld3q_f16, vld3_lane_f16, vld3q_lane_f16,
	vld3_dup_f16, vst3_f16, vst3q_f16, vst3_lane_f16, vst3q_lane_f16,
	vld4_f16, vld4q_f16, vld4_lane_f16, vld4q_lane_f16, vld4_dup_f16,
	vst4_f16, vst4q_f16, vst4_lane_f16, vst4q_lane_f16): New.

From-SVN: r227541
parent 7fe08e71
2015-09-08 Alan Lawrence <alan.lawrence@arm.com>
* config/arm/arm-builtins.c (VAR11, VAR12): New.
* config/arm/arm_neon_builtins.def (vcombine, vld2_dup, vld3_dup,
vld4_dup): Add v4hf variant.
(vget_high, vget_low): Add v8hf variant.
(vld1, vst1, vst1_lane, vld2, vld2_lane, vst2, vst2_lane, vld3,
vld3_lane, vst3, vst3_lane, vld4, vld4_lane, vst4, vst4_lane): Add
v4hf and v8hf variants.
* config/arm/iterators.md (VD_LANE, VD_RE, VQ2, VQ_HS): New.
(VDX): Add V4HF.
(V_DOUBLE): Add case for V4HF.
(VQX): Add V8HF.
(V_HALF): Add case for V8HF.
(VDQX): Add V4HF, V8HF.
(V_elem, V_two_elem, V_three_elem, V_four_elem, V_cmp_result,
V_uf_sclr, V_sz_elem, V_mode_nunits, q): Add cases for V4HF & V8HF.
* config/arm/neon.md (vec_set<mode>internal, vec_extract<mode>,
neon_vget_lane<mode>_sext_internal, neon_vget_lane<mode>_zext_internal,
vec_load_lanesoi<mode>, neon_vld2<mode>, vec_store_lanesoi<mode>,
neon_vst2<mode>, vec_load_lanesci<mode>, neon_vld3<mode>,
neon_vld3qa<mode>, neon_vld3qb<mode>, vec_store_lanesci<mode>,
neon_vst3<mode>, neon_vst3qa<mode>, neon_vst3qb<mode>,
vec_load_lanesxi<mode>, neon_vld4<mode>, neon_vld4qa<mode>,
neon_vld4qb<mode>, vec_store_lanesxi<mode>, neon_vst4<mode>,
neon_vst4qa<mode>, neon_vst4qb<mode>): Change VQ iterator to VQ2.
(neon_vcreate, neon_vreinterpretv8qi<mode>,
neon_vreinterpretv4hi<mode>, neon_vreinterpretv2si<mode>,
neon_vreinterpretv2sf<mode>, neon_vreinterpretdi<mode>):
Change VDX to VD_RE.
(neon_vld2_lane<mode>, neon_vst2_lane<mode>, neon_vld3_lane<mode>,
neon_vst3_lane<mode>, neon_vld4_lane<mode>, neon_vst4_lane<mode>):
Change VD iterator to VD_LANE, and VMQ iterator to VQ_HS.
* config/arm/arm_neon.h (float16x4x2_t, float16x8x2_t, float16x4x3_t,
float16x8x3_t, float16x4x4_t, float16x8x4_t, vcombine_f16,
vget_high_f16, vget_low_f16, vld1_f16, vld1q_f16, vst1_f16, vst1q_f16,
vst1_lane_f16, vst1q_lane_f16, vld2_f16, vld2q_f16, vld2_lane_f16,
vld2q_lane_f16, vld2_dup_f16, vst2_f16, vst2q_f16, vst2_lane_f16,
vst2q_lane_f16, vld3_f16, vld3q_f16, vld3_lane_f16, vld3q_lane_f16,
vld3_dup_f16, vst3_f16, vst3q_f16, vst3_lane_f16, vst3q_lane_f16,
vld4_f16, vld4q_f16, vld4_lane_f16, vld4q_lane_f16, vld4_dup_f16,
vst4_f16, vst4q_f16, vst4_lane_f16, vst4q_lane_f16): New.
2015-09-07 Alan Lawrence <alan.lawrence.com>
* config/arm/arm_neon.h (vgetq_lane_f16, vsetq_lane_f16, vld1q_lane_f16,
......
......@@ -239,6 +239,12 @@ typedef struct {
#define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
VAR9 (T, N, A, B, C, D, E, F, G, H, I) \
VAR1 (T, N, J)
#define VAR11(T, N, A, B, C, D, E, F, G, H, I, J, K) \
VAR10 (T, N, A, B, C, D, E, F, G, H, I, J) \
VAR1 (T, N, K)
#define VAR12(T, N, A, B, C, D, E, F, G, H, I, J, K, L) \
VAR11 (T, N, A, B, C, D, E, F, G, H, I, J, K) \
VAR1 (T, N, L)
/* The NEON builtin data can be found in arm_neon_builtins.def.
The mode entries in the following table correspond to the "key" type of the
......
......@@ -164,9 +164,9 @@ VAR10 (UNOP, vdup_n,
v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
VAR10 (GETLANE, vdup_lane,
v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di)
VAR5 (UNOP, vget_high, v16qi, v8hi, v4si, v4sf, v2di)
VAR5 (UNOP, vget_low, v16qi, v8hi, v4si, v4sf, v2di)
VAR6 (COMBINE, vcombine, v8qi, v4hi, v4hf, v2si, v2sf, di)
VAR6 (UNOP, vget_high, v16qi, v8hi, v8hf, v4si, v4sf, v2di)
VAR6 (UNOP, vget_low, v16qi, v8hi, v8hf, v4si, v4sf, v2di)
VAR3 (UNOP, vmovn, v8hi, v4si, v2di)
VAR3 (UNOP, vqmovns, v8hi, v4si, v2di)
VAR3 (UNOP, vqmovnu, v8hi, v4si, v2di)
......@@ -242,40 +242,40 @@ VAR6 (UNOP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di, ti)
VAR6 (UNOP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di, ti)
VAR6 (UNOP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di, ti)
VAR6 (UNOP, vreinterpretti, v16qi, v8hi, v4si, v4sf, v2di, ti)
VAR10 (LOAD1, vld1,
v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
VAR12 (LOAD1, vld1,
v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di)
VAR10 (LOAD1LANE, vld1_lane,
v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
VAR10 (LOAD1, vld1_dup,
v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
VAR10 (STORE1, vst1,
v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
VAR10 (STORE1LANE, vst1_lane,
v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
VAR9 (LOAD1, vld2,
v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf)
VAR7 (LOAD1LANE, vld2_lane,
v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
VAR5 (LOAD1, vld2_dup, v8qi, v4hi, v2si, v2sf, di)
VAR9 (STORE1, vst2,
v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf)
VAR7 (STORE1LANE, vst2_lane,
v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
VAR9 (LOAD1, vld3,
v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf)
VAR7 (LOAD1LANE, vld3_lane,
v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
VAR5 (LOAD1, vld3_dup, v8qi, v4hi, v2si, v2sf, di)
VAR9 (STORE1, vst3,
v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf)
VAR7 (STORE1LANE, vst3_lane,
v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
VAR9 (LOAD1, vld4,
v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf)
VAR7 (LOAD1LANE, vld4_lane,
v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
VAR5 (LOAD1, vld4_dup, v8qi, v4hi, v2si, v2sf, di)
VAR9 (STORE1, vst4,
v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf)
VAR7 (STORE1LANE, vst4_lane,
v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf)
VAR12 (STORE1, vst1,
v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di)
VAR12 (STORE1LANE, vst1_lane,
v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf, v2di)
VAR11 (LOAD1, vld2,
v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf)
VAR9 (LOAD1LANE, vld2_lane,
v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf)
VAR6 (LOAD1, vld2_dup, v8qi, v4hi, v4hf, v2si, v2sf, di)
VAR11 (STORE1, vst2,
v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf)
VAR9 (STORE1LANE, vst2_lane,
v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf)
VAR11 (LOAD1, vld3,
v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf)
VAR9 (LOAD1LANE, vld3_lane,
v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf)
VAR6 (LOAD1, vld3_dup, v8qi, v4hi, v4hf, v2si, v2sf, di)
VAR11 (STORE1, vst3,
v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf)
VAR9 (STORE1LANE, vst3_lane,
v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf)
VAR11 (LOAD1, vld4,
v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf)
VAR9 (LOAD1LANE, vld4_lane,
v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf)
VAR6 (LOAD1, vld4_dup, v8qi, v4hi, v4hf, v2si, v2sf, di)
VAR11 (STORE1, vst4,
v8qi, v4hi, v4hf, v2si, v2sf, di, v16qi, v8hi, v8hf, v4si, v4sf)
VAR9 (STORE1LANE, vst4_lane,
v8qi, v4hi, v4hf, v2si, v2sf, v8hi, v8hf, v4si, v4sf)
......@@ -65,20 +65,32 @@
;; Integer modes supported by Neon and IWMMXT, except V2DI
(define_mode_iterator VINTW [V2SI V4HI V8QI V4SI V8HI V16QI])
;; Double-width vector modes.
;; Double-width vector modes, on which we support arithmetic (no HF!)
(define_mode_iterator VD [V8QI V4HI V2SI V2SF])
;; Double-width vector modes plus 64-bit elements for vreinterpret + vcreate.
(define_mode_iterator VD_RE [V8QI V4HI V2SI V2SF DI])
;; Double-width vector modes plus 64-bit elements.
(define_mode_iterator VDX [V8QI V4HI V2SI V2SF DI])
(define_mode_iterator VDX [V8QI V4HI V4HF V2SI V2SF DI])
;; Double-width vector modes, with V4HF - for vldN_lane and vstN_lane.
(define_mode_iterator VD_LANE [V8QI V4HI V4HF V2SI V2SF])
;; Double-width vector modes without floating-point elements.
(define_mode_iterator VDI [V8QI V4HI V2SI])
;; Quad-width vector modes.
;; Quad-width vector modes supporting arithmetic (no HF!).
(define_mode_iterator VQ [V16QI V8HI V4SI V4SF])
;; Quad-width vector modes, including V8HF.
(define_mode_iterator VQ2 [V16QI V8HI V8HF V4SI V4SF])
;; Quad-width vector modes with 16- or 32-bit elements
(define_mode_iterator VQ_HS [V8HI V8HF V4SI V4SF])
;; Quad-width vector modes plus 64-bit elements.
(define_mode_iterator VQX [V16QI V8HI V4SI V4SF V2DI])
(define_mode_iterator VQX [V16QI V8HI V8HF V4SI V4SF V2DI])
;; Quad-width vector modes without floating-point elements.
(define_mode_iterator VQI [V16QI V8HI V4SI])
......@@ -111,7 +123,8 @@
(define_mode_iterator VDQI [V8QI V16QI V4HI V8HI V2SI V4SI V2DI])
;; Vector modes, including 64-bit integer elements.
(define_mode_iterator VDQX [V8QI V16QI V4HI V8HI V2SI V4SI V2SF V4SF DI V2DI])
(define_mode_iterator VDQX [V8QI V16QI V4HI V8HI V2SI V4SI
V4HF V8HF V2SF V4SF DI V2DI])
;; Vector modes including 64-bit integer elements, but no floats.
(define_mode_iterator VDQIX [V8QI V16QI V4HI V8HI V2SI V4SI DI V2DI])
......@@ -366,7 +379,8 @@
;; Define element mode for each vector mode.
(define_mode_attr V_elem [(V8QI "QI") (V16QI "QI")
(V4HI "HI") (V8HI "HI")
(V4HI "HI") (V8HI "HI")
(V4HF "HF") (V8HF "HF")
(V2SI "SI") (V4SI "SI")
(V2SF "SF") (V4SF "SF")
(DI "DI") (V2DI "DI")])
......@@ -383,6 +397,7 @@
;; size for structure lane/dup loads and stores.
(define_mode_attr V_two_elem [(V8QI "HI") (V16QI "HI")
(V4HI "SI") (V8HI "SI")
(V4HF "SF") (V8HF "SF")
(V2SI "V2SI") (V4SI "V2SI")
(V2SF "V2SF") (V4SF "V2SF")
(DI "V2DI") (V2DI "V2DI")])
......@@ -390,6 +405,7 @@
;; Similar, for three elements.
(define_mode_attr V_three_elem [(V8QI "BLK") (V16QI "BLK")
(V4HI "BLK") (V8HI "BLK")
(V4HF "BLK") (V8HF "BLK")
(V2SI "BLK") (V4SI "BLK")
(V2SF "BLK") (V4SF "BLK")
(DI "EI") (V2DI "EI")])
......@@ -397,6 +413,7 @@
;; Similar, for four elements.
(define_mode_attr V_four_elem [(V8QI "SI") (V16QI "SI")
(V4HI "V4HI") (V8HI "V4HI")
(V4HF "V4HF") (V8HF "V4HF")
(V2SI "V4SI") (V4SI "V4SI")
(V2SF "V4SF") (V4SF "V4SF")
(DI "OI") (V2DI "OI")])
......@@ -421,7 +438,8 @@
;; Modes with half the number of equal-sized elements.
(define_mode_attr V_HALF [(V16QI "V8QI") (V8HI "V4HI")
(V4SI "V2SI") (V4SF "V2SF") (V2DF "DF")
(V8HF "V4HF") (V4SI "V2SI")
(V4SF "V2SF") (V2DF "DF")
(V2DI "DI")])
;; Same, but lower-case.
......@@ -431,8 +449,9 @@
;; Modes with twice the number of equal-sized elements.
(define_mode_attr V_DOUBLE [(V8QI "V16QI") (V4HI "V8HI")
(V2SI "V4SI") (V2SF "V4SF") (DF "V2DF")
(DI "V2DI")])
(V2SI "V4SI") (V4HF "V8HF")
(V2SF "V4SF") (DF "V2DF")
(DI "V2DI")])
;; Same, but lower-case.
(define_mode_attr V_double [(V8QI "v16qi") (V4HI "v8hi")
......@@ -454,8 +473,9 @@
;; Mode of result of comparison operations (and bit-select operand 1).
(define_mode_attr V_cmp_result [(V8QI "V8QI") (V16QI "V16QI")
(V4HI "V4HI") (V8HI "V8HI")
(V4HI "V4HI") (V8HI "V8HI")
(V2SI "V2SI") (V4SI "V4SI")
(V4HF "V4HI") (V8HF "V8HI")
(V2SF "V2SI") (V4SF "V4SI")
(DI "DI") (V2DI "V2DI")])
......@@ -492,12 +512,14 @@
(define_mode_attr V_uf_sclr [(V8QI "u8") (V16QI "u8")
(V4HI "u16") (V8HI "u16")
(V2SI "32") (V4SI "32")
(V4HF "u16") (V8HF "u16")
(V2SF "32") (V4SF "32")])
(define_mode_attr V_sz_elem [(V8QI "8") (V16QI "8")
(V4HI "16") (V8HI "16")
(V2SI "32") (V4SI "32")
(DI "64") (V2DI "64")
(V4HF "16") (V8HF "16")
(V2SF "32") (V4SF "32")])
(define_mode_attr V_elem_ch [(V8QI "b") (V16QI "b")
......@@ -564,6 +586,7 @@
(DI "true") (V2DI "false")])
(define_mode_attr V_mode_nunits [(V8QI "8") (V16QI "16")
(V4HF "4") (V8HF "8")
(V4HI "4") (V8HI "8")
(V2SI "2") (V4SI "4")
(V2SF "2") (V4SF "4")
......@@ -607,6 +630,7 @@
(define_mode_attr q [(V8QI "") (V16QI "_q")
(V4HI "") (V8HI "_q")
(V2SI "") (V4SI "_q")
(V4HF "") (V8HF "_q")
(V2SF "") (V4SF "_q")
(DI "") (V2DI "_q")
(DF "") (V2DF "_q")])
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment