Commit 91bd4114 by James Greenhalgh Committed by James Greenhalgh

[AArch64] Rewrite the vdup_lane intrinsics in C

gcc/
	* config/aarch64/aarch64-simd-builtins.def
	(dup_lane_scalar): Remove.
	* config/aarch64/aarch64-simd.md
	(aarch64_simd_dup): Add 'w->w' alternative.
	(aarch64_dup_lane<mode>): Allow for VALL.
	(aarch64_dup_lane_scalar<mode>): Remove.
	(aarch64_dup_lane_<vswap_width_name><mode>): New.
	(aarch64_get_lane_signed<mode>): Add w->w altenative.
	(aarch64_get_lane_unsigned<mode>): Likewise.
	(aarch64_get_lane<mode>): Likewise.
	* config/aarch64/aarch64.c (aarch64_evpc_dup): New.
	(aarch64_expand_vec_perm_const_1): Use aarch64_evpc_dup.
	* config/aarch64/iterators.md (VSWAP_WIDTH): New.
	(VCON): Change container of V2SF.
	(vswap_width_name): Likewise.
	* config/aarch64/arm_neon.h
	(__aarch64_vdup_lane_any): New.
	(__aarch64_vdup<q>_lane<q>_<fpsu><8,16,32,64>): Likewise.
	(vdup<q>_n_<psuf><8,16,32,64>): Convert to C implementation.
	(vdup<q>_lane<q>_<fpsu><8,16,32,64>): Likewise.

gcc/testsuite/
	* gcc.target/aarch64/scalar_intrinsics.c
	(vdup<bhsd>_lane<su><8,16,32,64>): Force values to SIMD registers.

From-SVN: r202180
parent d617d2d8
2013-09-02 James Greenhalgh <james.greenhalgh@arm.com>
* config/aarch64/aarch64-simd-builtins.def
(dup_lane_scalar): Remove.
* config/aarch64/aarch64-simd.md
(aarch64_simd_dup): Add 'w->w' alternative.
(aarch64_dup_lane<mode>): Allow for VALL.
(aarch64_dup_lane_scalar<mode>): Remove.
(aarch64_dup_lane_<vswap_width_name><mode>): New.
(aarch64_get_lane_signed<mode>): Add w->w altenative.
(aarch64_get_lane_unsigned<mode>): Likewise.
(aarch64_get_lane<mode>): Likewise.
* config/aarch64/aarch64.c (aarch64_evpc_dup): New.
(aarch64_expand_vec_perm_const_1): Use aarch64_evpc_dup.
* config/aarch64/iterators.md (VSWAP_WIDTH): New.
(VCON): Change container of V2SF.
(vswap_width_name): Likewise.
* config/aarch64/arm_neon.h
(__aarch64_vdup_lane_any): New.
(__aarch64_vdup<q>_lane<q>_<fpsu><8,16,32,64>): Likewise.
(vdup<q>_n_<psuf><8,16,32,64>): Convert to C implementation.
(vdup<q>_lane<q>_<fpsu><8,16,32,64>): Likewise.
2013-09-02 Eric Botcazou <ebotcazou@adacore.com> 2013-09-02 Eric Botcazou <ebotcazou@adacore.com>
PR middle-end/56382 PR middle-end/56382
......
...@@ -336,32 +336,47 @@ ...@@ -336,32 +336,47 @@
}) })
(define_insn "aarch64_simd_dup<mode>" (define_insn "aarch64_simd_dup<mode>"
[(set (match_operand:VDQ 0 "register_operand" "=w") [(set (match_operand:VDQ 0 "register_operand" "=w, w")
(vec_duplicate:VDQ (match_operand:<VEL> 1 "register_operand" "r")))] (vec_duplicate:VDQ (match_operand:<VEL> 1 "register_operand" "r, w")))]
"TARGET_SIMD"
"@
dup\\t%0.<Vtype>, %<vw>1
dup\\t%0.<Vtype>, %1.<Vetype>[0]"
[(set_attr "simd_type" "simd_dupgp, simd_dup")
(set_attr "simd_mode" "<MODE>")]
)
(define_insn "aarch64_simd_dup<mode>"
[(set (match_operand:VDQF 0 "register_operand" "=w")
(vec_duplicate:VDQF (match_operand:<VEL> 1 "register_operand" "w")))]
"TARGET_SIMD" "TARGET_SIMD"
"dup\\t%0.<Vtype>, %<vw>1" "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
[(set_attr "simd_type" "simd_dupgp") [(set_attr "simd_type" "simd_dup")
(set_attr "simd_mode" "<MODE>")] (set_attr "simd_mode" "<MODE>")]
) )
(define_insn "aarch64_dup_lane<mode>" (define_insn "aarch64_dup_lane<mode>"
[(set (match_operand:VDQ_I 0 "register_operand" "=w") [(set (match_operand:VALL 0 "register_operand" "=w")
(vec_duplicate:VDQ_I (vec_duplicate:VALL
(vec_select:<VEL> (vec_select:<VEL>
(match_operand:<VCON> 1 "register_operand" "w") (match_operand:VALL 1 "register_operand" "w")
(parallel [(match_operand:SI 2 "immediate_operand" "i")]) (parallel [(match_operand:SI 2 "immediate_operand" "i")])
)))] )))]
"TARGET_SIMD" "TARGET_SIMD"
"dup\\t%<v>0<Vmtype>, %1.<Vetype>[%2]" "dup\\t%0.<Vtype>, %1.<Vetype>[%2]"
[(set_attr "simd_type" "simd_dup") [(set_attr "simd_type" "simd_dup")
(set_attr "simd_mode" "<MODE>")] (set_attr "simd_mode" "<MODE>")]
) )
(define_insn "aarch64_simd_dup<mode>" (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
[(set (match_operand:VDQF 0 "register_operand" "=w") [(set (match_operand:VALL 0 "register_operand" "=w")
(vec_duplicate:VDQF (match_operand:<VEL> 1 "register_operand" "w")))] (vec_duplicate:VALL
(vec_select:<VEL>
(match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
(parallel [(match_operand:SI 2 "immediate_operand" "i")])
)))]
"TARGET_SIMD" "TARGET_SIMD"
"dup\\t%0.<Vtype>, %1.<Vetype>[0]" "dup\\t%0.<Vtype>, %1.<Vetype>[%2]"
[(set_attr "simd_type" "simd_dup") [(set_attr "simd_type" "simd_dup")
(set_attr "simd_mode" "<MODE>")] (set_attr "simd_mode" "<MODE>")]
) )
......
...@@ -7932,6 +7932,55 @@ aarch64_evpc_zip (struct expand_vec_perm_d *d) ...@@ -7932,6 +7932,55 @@ aarch64_evpc_zip (struct expand_vec_perm_d *d)
} }
static bool static bool
aarch64_evpc_dup (struct expand_vec_perm_d *d)
{
rtx (*gen) (rtx, rtx, rtx);
rtx out = d->target;
rtx in0;
enum machine_mode vmode = d->vmode;
unsigned int i, elt, nelt = d->nelt;
rtx lane;
/* TODO: This may not be big-endian safe. */
if (BYTES_BIG_ENDIAN)
return false;
elt = d->perm[0];
for (i = 1; i < nelt; i++)
{
if (elt != d->perm[i])
return false;
}
/* The generic preparation in aarch64_expand_vec_perm_const_1
swaps the operand order and the permute indices if it finds
d->perm[0] to be in the second operand. Thus, we can always
use d->op0 and need not do any extra arithmetic to get the
correct lane number. */
in0 = d->op0;
lane = GEN_INT (elt);
switch (vmode)
{
case V16QImode: gen = gen_aarch64_dup_lanev16qi; break;
case V8QImode: gen = gen_aarch64_dup_lanev8qi; break;
case V8HImode: gen = gen_aarch64_dup_lanev8hi; break;
case V4HImode: gen = gen_aarch64_dup_lanev4hi; break;
case V4SImode: gen = gen_aarch64_dup_lanev4si; break;
case V2SImode: gen = gen_aarch64_dup_lanev2si; break;
case V2DImode: gen = gen_aarch64_dup_lanev2di; break;
case V4SFmode: gen = gen_aarch64_dup_lanev4sf; break;
case V2SFmode: gen = gen_aarch64_dup_lanev2sf; break;
case V2DFmode: gen = gen_aarch64_dup_lanev2df; break;
default:
return false;
}
emit_insn (gen (out, in0, lane));
return true;
}
static bool
aarch64_evpc_tbl (struct expand_vec_perm_d *d) aarch64_evpc_tbl (struct expand_vec_perm_d *d)
{ {
rtx rperm[MAX_VECT_LEN], sel; rtx rperm[MAX_VECT_LEN], sel;
...@@ -7988,6 +8037,8 @@ aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d) ...@@ -7988,6 +8037,8 @@ aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
return true; return true;
else if (aarch64_evpc_trn (d)) else if (aarch64_evpc_trn (d))
return true; return true;
else if (aarch64_evpc_dup (d))
return true;
return aarch64_evpc_tbl (d); return aarch64_evpc_tbl (d);
} }
return false; return false;
......
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -383,7 +383,7 @@ ...@@ -383,7 +383,7 @@
(V4HI "V8HI") (V8HI "V8HI") (V4HI "V8HI") (V8HI "V8HI")
(V2SI "V4SI") (V4SI "V4SI") (V2SI "V4SI") (V4SI "V4SI")
(DI "V2DI") (V2DI "V2DI") (DI "V2DI") (V2DI "V2DI")
(V2SF "V2SF") (V4SF "V4SF") (V2SF "V4SF") (V4SF "V4SF")
(V2DF "V2DF") (SI "V4SI") (V2DF "V2DF") (SI "V4SI")
(HI "V8HI") (QI "V16QI")]) (HI "V8HI") (QI "V16QI")])
...@@ -527,6 +527,20 @@ ...@@ -527,6 +527,20 @@
(define_mode_attr fcvt_target [(V2DF "v2di") (V4SF "v4si") (V2SF "v2si")]) (define_mode_attr fcvt_target [(V2DF "v2di") (V4SF "v4si") (V2SF "v2si")])
(define_mode_attr FCVT_TARGET [(V2DF "V2DI") (V4SF "V4SI") (V2SF "V2SI")]) (define_mode_attr FCVT_TARGET [(V2DF "V2DI") (V4SF "V4SI") (V2SF "V2SI")])
(define_mode_attr VSWAP_WIDTH [(V8QI "V16QI") (V16QI "V8QI")
(V4HI "V8HI") (V8HI "V4HI")
(V2SI "V4SI") (V4SI "V2SI")
(DI "V2DI") (V2DI "DI")
(V2SF "V4SF") (V4SF "V2SF")
(DF "V2DF") (V2DF "DF")])
(define_mode_attr vswap_width_name [(V8QI "to_128") (V16QI "to_64")
(V4HI "to_128") (V8HI "to_64")
(V2SI "to_128") (V4SI "to_64")
(DI "to_128") (V2DI "to_64")
(V2SF "to_128") (V4SF "to_64")
(DF "to_128") (V2DF "to_64")])
;; ------------------------------------------------------------------- ;; -------------------------------------------------------------------
;; Code Iterators ;; Code Iterators
;; ------------------------------------------------------------------- ;; -------------------------------------------------------------------
......
2013-09-02 James Greenhalgh <james.greenhalgh@arm.com>
* gcc.target/aarch64/scalar_intrinsics.c
(vdup<bhsd>_lane<su><8,16,32,64>): Force values to SIMD registers.
2013-09-02 Richard Biener <rguenther@suse.de> 2013-09-02 Richard Biener <rguenther@suse.de>
PR middle-end/57511 PR middle-end/57511
......
...@@ -198,13 +198,21 @@ test_vcltzd_s64 (int64x1_t a) ...@@ -198,13 +198,21 @@ test_vcltzd_s64 (int64x1_t a)
int8x1_t int8x1_t
test_vdupb_lane_s8 (int8x16_t a) test_vdupb_lane_s8 (int8x16_t a)
{ {
return vdupb_lane_s8 (a, 2); int8x1_t res;
force_simd (a);
res = vdupb_laneq_s8 (a, 2);
force_simd (res);
return res;
} }
uint8x1_t uint8x1_t
test_vdupb_lane_u8 (uint8x16_t a) test_vdupb_lane_u8 (uint8x16_t a)
{ {
return vdupb_lane_u8 (a, 2); uint8x1_t res;
force_simd (a);
res = vdupb_laneq_u8 (a, 2);
force_simd (res);
return res;
} }
/* { dg-final { scan-assembler-times "aarch64_get_lanev8hi" 2 } } */ /* { dg-final { scan-assembler-times "aarch64_get_lanev8hi" 2 } } */
...@@ -212,13 +220,21 @@ test_vdupb_lane_u8 (uint8x16_t a) ...@@ -212,13 +220,21 @@ test_vdupb_lane_u8 (uint8x16_t a)
int16x1_t int16x1_t
test_vduph_lane_s16 (int16x8_t a) test_vduph_lane_s16 (int16x8_t a)
{ {
return vduph_lane_s16 (a, 2); int16x1_t res;
force_simd (a);
res = vduph_laneq_s16 (a, 2);
force_simd (res);
return res;
} }
uint16x1_t uint16x1_t
test_vduph_lane_u16 (uint16x8_t a) test_vduph_lane_u16 (uint16x8_t a)
{ {
return vduph_lane_u16 (a, 2); uint16x1_t res;
force_simd (a);
res = vduph_laneq_u16 (a, 2);
force_simd (res);
return res;
} }
/* { dg-final { scan-assembler-times "aarch64_get_lanev4si" 2 } } */ /* { dg-final { scan-assembler-times "aarch64_get_lanev4si" 2 } } */
...@@ -226,13 +242,21 @@ test_vduph_lane_u16 (uint16x8_t a) ...@@ -226,13 +242,21 @@ test_vduph_lane_u16 (uint16x8_t a)
int32x1_t int32x1_t
test_vdups_lane_s32 (int32x4_t a) test_vdups_lane_s32 (int32x4_t a)
{ {
return vdups_lane_s32 (a, 2); int32x1_t res;
force_simd (a);
res = vdups_laneq_s32 (a, 2);
force_simd (res);
return res;
} }
uint32x1_t uint32x1_t
test_vdups_lane_u32 (uint32x4_t a) test_vdups_lane_u32 (uint32x4_t a)
{ {
return vdups_lane_u32 (a, 2); uint32x1_t res;
force_simd (a);
res = vdups_laneq_u32 (a, 2);
force_simd (res);
return res;
} }
/* { dg-final { scan-assembler-times "aarch64_get_lanev2di" 2 } } */ /* { dg-final { scan-assembler-times "aarch64_get_lanev2di" 2 } } */
...@@ -240,13 +264,21 @@ test_vdups_lane_u32 (uint32x4_t a) ...@@ -240,13 +264,21 @@ test_vdups_lane_u32 (uint32x4_t a)
int64x1_t int64x1_t
test_vdupd_lane_s64 (int64x2_t a) test_vdupd_lane_s64 (int64x2_t a)
{ {
return vdupd_lane_s64 (a, 1); int64x1_t res;
force_simd (a);
res = vdupd_laneq_s64 (a, 1);
force_simd (res);
return res;
} }
uint64x1_t uint64x1_t
test_vdupd_lane_u64 (uint64x2_t a) test_vdupd_lane_u64 (uint64x2_t a)
{ {
return vdupd_lane_u64 (a, 1); uint64x1_t res;
force_simd (a);
res = vdupd_laneq_u64 (a, 1);
force_simd (res);
return res;
} }
/* { dg-final { scan-assembler-times "\\tcmtst\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 2 } } */ /* { dg-final { scan-assembler-times "\\tcmtst\\td\[0-9\]+, d\[0-9\]+, d\[0-9\]+" 2 } } */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment