Commit ae0533da by Alan Lawrence Committed by Alan Lawrence

Detect EXT patterns to vec_perm_const, use for EXT intrinsics

        * config/aarch64/aarch64-builtins.c (aarch64_types_binopv_qualifiers,
        TYPES_BINOPV): New static data.
        * config/aarch64/aarch64-simd-builtins.def (im_lane_bound): New builtin.
        * config/aarch64/aarch64-simd.md (aarch64_ext, aarch64_im_lane_boundsi):
        New patterns.
        * config/aarch64/aarch64.c (aarch64_expand_vec_perm_const_1): Match
        patterns for EXT.
        (aarch64_evpc_ext): New function.

        * config/aarch64/iterators.md (UNSPEC_EXT): New enum element.

        * config/aarch64/arm_neon.h (vext_f32, vext_f64, vext_p8, vext_p16,
        vext_s8, vext_s16, vext_s32, vext_s64, vext_u8, vext_u16, vext_u32,
        vext_u64, vextq_f32, vextq_f64, vextq_p8, vextq_p16, vextq_s8,
        vextq_s16, vextq_s32, vextq_s64, vextq_u8, vextq_u16, vextq_u32,
        vextq_u64): Replace __asm with __builtin_shuffle and im_lane_boundsi.

From-SVN: r211058
parent ed00b1fb
2014-05-29 Alan Lawrence <alan.lawrence@arm.com>
* config/aarch64/aarch64-builtins.c (aarch64_types_binopv_qualifiers,
TYPES_BINOPV): New static data.
* config/aarch64/aarch64-simd-builtins.def (im_lane_bound): New builtin.
* config/aarch64/aarch64-simd.md (aarch64_ext, aarch64_im_lane_boundsi):
New patterns.
* config/aarch64/aarch64.c (aarch64_expand_vec_perm_const_1): Match
patterns for EXT.
(aarch64_evpc_ext): New function.
* config/aarch64/iterators.md (UNSPEC_EXT): New enum element.
* config/aarch64/arm_neon.h (vext_f32, vext_f64, vext_p8, vext_p16,
vext_s8, vext_s16, vext_s32, vext_s64, vext_u8, vext_u16, vext_u32,
vext_u64, vextq_f32, vextq_f64, vextq_p8, vextq_p16, vextq_s8,
vextq_s16, vextq_s32, vextq_s64, vextq_u8, vextq_u16, vextq_u32,
vextq_u64): Replace __asm with __builtin_shuffle and im_lane_boundsi.
2014-05-29 Tom de Vries <tom@codesourcery.com>
* rtl.h (BLOCK_SYMBOL_CHECK): Use SYMBOL_REF_FLAGS.
......
......@@ -169,6 +169,10 @@ aarch64_types_binop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_none, qualifier_none, qualifier_maybe_immediate };
#define TYPES_BINOP (aarch64_types_binop_qualifiers)
static enum aarch64_type_qualifiers
aarch64_types_binopv_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_void, qualifier_none, qualifier_none };
#define TYPES_BINOPV (aarch64_types_binopv_qualifiers)
static enum aarch64_type_qualifiers
aarch64_types_binopu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned };
#define TYPES_BINOPU (aarch64_types_binopu_qualifiers)
......
......@@ -410,3 +410,6 @@
/* Implemented by aarch64_crypto_pmull<mode>. */
VAR1 (BINOPP, crypto_pmull, 0, di)
VAR1 (BINOPP, crypto_pmull, 0, v2di)
/* Meta-op to check lane bounds of immediate in aarch64_expand_builtin. */
VAR1 (BINOPV, im_lane_bound, 0, si)
......@@ -4167,6 +4167,35 @@
[(set_attr "type" "neon_permute<q>")]
)
;; Note immediate (third) operand is lane index not byte index.
(define_insn "aarch64_ext<mode>"
[(set (match_operand:VALL 0 "register_operand" "=w")
(unspec:VALL [(match_operand:VALL 1 "register_operand" "w")
(match_operand:VALL 2 "register_operand" "w")
(match_operand:SI 3 "immediate_operand" "i")]
UNSPEC_EXT))]
"TARGET_SIMD"
{
operands[3] = GEN_INT (INTVAL (operands[3])
* GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)));
return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
}
[(set_attr "type" "neon_ext<q>")]
)
;; This exists solely to check the arguments to the corresponding __builtin.
;; Used where we want an error for out-of-range indices which would otherwise
;; be silently wrapped (e.g. the mask to a __builtin_shuffle).
(define_expand "aarch64_im_lane_boundsi"
[(match_operand:SI 0 "immediate_operand" "i")
(match_operand:SI 1 "immediate_operand" "i")]
"TARGET_SIMD"
{
aarch64_simd_lane_bounds (operands[0], 0, INTVAL (operands[1]));
DONE;
}
)
(define_insn "aarch64_st2<mode>_dreg"
[(set (match_operand:TI 0 "aarch64_simd_struct_operand" "=Utv")
(unspec:TI [(match_operand:OI 1 "register_operand" "w")
......
......@@ -8990,6 +8990,70 @@ aarch64_evpc_zip (struct expand_vec_perm_d *d)
return true;
}
/* Recognize patterns for the EXT insn. */
static bool
aarch64_evpc_ext (struct expand_vec_perm_d *d)
{
unsigned int i, nelt = d->nelt;
rtx (*gen) (rtx, rtx, rtx, rtx);
rtx offset;
unsigned int location = d->perm[0]; /* Always < nelt. */
/* Check if the extracted indices are increasing by one. */
for (i = 1; i < nelt; i++)
{
unsigned int required = location + i;
if (d->one_vector_p)
{
/* We'll pass the same vector in twice, so allow indices to wrap. */
required &= (nelt - 1);
}
if (d->perm[i] != required)
return false;
}
/* The mid-end handles masks that just return one of the input vectors. */
gcc_assert (location != 0);
switch (d->vmode)
{
case V16QImode: gen = gen_aarch64_extv16qi; break;
case V8QImode: gen = gen_aarch64_extv8qi; break;
case V4HImode: gen = gen_aarch64_extv4hi; break;
case V8HImode: gen = gen_aarch64_extv8hi; break;
case V2SImode: gen = gen_aarch64_extv2si; break;
case V4SImode: gen = gen_aarch64_extv4si; break;
case V2SFmode: gen = gen_aarch64_extv2sf; break;
case V4SFmode: gen = gen_aarch64_extv4sf; break;
case V2DImode: gen = gen_aarch64_extv2di; break;
case V2DFmode: gen = gen_aarch64_extv2df; break;
default:
return false;
}
/* Success! */
if (d->testing_p)
return true;
if (BYTES_BIG_ENDIAN)
{
/* After setup, we want the high elements of the first vector (stored
at the LSB end of the register), and the low elements of the second
vector (stored at the MSB end of the register). So swap. */
rtx temp = d->op0;
d->op0 = d->op1;
d->op1 = temp;
/* location != 0 (above), so safe to assume (nelt - location) < nelt. */
location = nelt - location;
}
offset = GEN_INT (location);
emit_insn (gen (d->target, d->op0, d->op1, offset));
return true;
}
static bool
aarch64_evpc_dup (struct expand_vec_perm_d *d)
{
......@@ -9094,7 +9158,9 @@ aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
if (TARGET_SIMD)
{
if (aarch64_evpc_zip (d))
if (aarch64_evpc_ext (d))
return true;
else if (aarch64_evpc_zip (d))
return true;
else if (aarch64_evpc_uzp (d))
return true;
......
......@@ -270,6 +270,7 @@
UNSPEC_UZP2 ; Used in vector permute patterns.
UNSPEC_TRN1 ; Used in vector permute patterns.
UNSPEC_TRN2 ; Used in vector permute patterns.
UNSPEC_EXT ; Used in aarch64-simd.md.
UNSPEC_AESE ; Used in aarch64-simd.md.
UNSPEC_AESD ; Used in aarch64-simd.md.
UNSPEC_AESMC ; Used in aarch64-simd.md.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment