Commit 568421ba by Sameera Deshpande Committed by Sameera Deshpande

Patch implementing vld1_*_x3, vst1_*_x2 and vst1_*_x3 intrinsics for AARCH64 for all types.

From-SVN: r260989
parent 5328e74a
2018-05-31 Sameera Deshpande <sameera.deshpande@linaro.org>
* config/aarch64/aarch64-simd-builtins.def (ld1x3): New.
(st1x2): Likewise.
(st1x3): Likewise.
* config/aarch64/aarch64-simd.md
(aarch64_ld1x3<VALLDIF:mode>): New pattern.
(aarch64_ld1_x3_<mode>): Likewise
(aarch64_st1x2<VALLDIF:mode>): Likewise
(aarch64_st1_x2_<mode>): Likewise
(aarch64_st1x3<VALLDIF:mode>): Likewise
(aarch64_st1_x3_<mode>): Likewise
* config/aarch64/arm_neon.h (vld1_u8_x3): New function.
(vld1_s8_x3): Likewise.
(vld1_u16_x3): Likewise.
(vld1_s16_x3): Likewise.
(vld1_u32_x3): Likewise.
(vld1_s32_x3): Likewise.
(vld1_u64_x3): Likewise.
(vld1_s64_x3): Likewise.
(vld1_f16_x3): Likewise.
(vld1_f32_x3): Likewise.
(vld1_f64_x3): Likewise.
(vld1_p8_x3): Likewise.
(vld1_p16_x3): Likewise.
(vld1_p64_x3): Likewise.
(vld1q_u8_x3): Likewise.
(vld1q_s8_x3): Likewise.
(vld1q_u16_x3): Likewise.
(vld1q_s16_x3): Likewise.
(vld1q_u32_x3): Likewise.
(vld1q_s32_x3): Likewise.
(vld1q_u64_x3): Likewise.
(vld1q_s64_x3): Likewise.
(vld1q_f16_x3): Likewise.
(vld1q_f32_x3): Likewise.
(vld1q_f64_x3): Likewise.
(vld1q_p8_x3): Likewise.
(vld1q_p16_x3): Likewise.
(vld1q_p64_x3): Likewise.
(vst1_s64_x2): Likewise.
(vst1_u64_x2): Likewise.
(vst1_f64_x2): Likewise.
(vst1_s8_x2): Likewise.
(vst1_p8_x2): Likewise.
(vst1_s16_x2): Likewise.
(vst1_p16_x2): Likewise.
(vst1_s32_x2): Likewise.
(vst1_u8_x2): Likewise.
(vst1_u16_x2): Likewise.
(vst1_u32_x2): Likewise.
(vst1_f16_x2): Likewise.
(vst1_f32_x2): Likewise.
(vst1_p64_x2): Likewise.
(vst1q_s8_x2): Likewise.
(vst1q_p8_x2): Likewise.
(vst1q_s16_x2): Likewise.
(vst1q_p16_x2): Likewise.
(vst1q_s32_x2): Likewise.
(vst1q_s64_x2): Likewise.
(vst1q_u8_x2): Likewise.
(vst1q_u16_x2): Likewise.
(vst1q_u32_x2): Likewise.
(vst1q_u64_x2): Likewise.
(vst1q_f16_x2): Likewise.
(vst1q_f32_x2): Likewise.
(vst1q_f64_x2): Likewise.
(vst1q_p64_x2): Likewise.
(vst1_s64_x3): Likewise.
(vst1_u64_x3): Likewise.
(vst1_f64_x3): Likewise.
(vst1_s8_x3): Likewise.
(vst1_p8_x3): Likewise.
(vst1_s16_x3): Likewise.
(vst1_p16_x3): Likewise.
(vst1_s32_x3): Likewise.
(vst1_u8_x3): Likewise.
(vst1_u16_x3): Likewise.
(vst1_u32_x3): Likewise.
(vst1_f16_x3): Likewise.
(vst1_f32_x3): Likewise.
(vst1_p64_x3): Likewise.
(vst1q_s8_x3): Likewise.
(vst1q_p8_x3): Likewise.
(vst1q_s16_x3): Likewise.
(vst1q_p16_x3): Likewise.
(vst1q_s32_x3): Likewise.
(vst1q_s64_x3): Likewise.
(vst1q_u8_x3): Likewise.
(vst1q_u16_x3): Likewise.
(vst1q_u32_x3): Likewise.
(vst1q_u64_x3): Likewise.
(vst1q_f16_x3): Likewise.
(vst1q_f32_x3): Likewise.
(vst1q_f64_x3): Likewise.
(vst1q_p64_x3): Likewise.
2018-05-30 Jozef Lawrynowicz <jozef.l@mittosystems.com>
* config/msp430/msp430.c (msp430_output_labelref): Prepend
......
......@@ -445,6 +445,15 @@
BUILTIN_VALL_F16 (STORE1, st1, 0)
VAR1(STORE1P, st1, 0, v2di)
/* Implemented by aarch64_ld1x3<VALLDIF:mode>. */
BUILTIN_VALLDIF (LOADSTRUCT, ld1x3, 0)
/* Implemented by aarch64_st1x2<VALLDIF:mode>. */
BUILTIN_VALLDIF (STORESTRUCT, st1x2, 0)
/* Implemented by aarch64_st1x3<VALLDIF:mode>. */
BUILTIN_VALLDIF (STORESTRUCT, st1x3, 0)
/* Implemented by fma<mode>4. */
BUILTIN_VHSDF (TERNOP, fma, 4)
VAR1 (TERNOP, fma, 4, hf)
......
......@@ -5056,6 +5056,70 @@
}
})
(define_expand "aarch64_ld1x3<VALLDIF:mode>"
[(match_operand:CI 0 "register_operand" "=w")
(match_operand:DI 1 "register_operand" "r")
(unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
"TARGET_SIMD"
{
rtx mem = gen_rtx_MEM (CImode, operands[1]);
emit_insn (gen_aarch64_ld1_x3_<VALLDIF:mode> (operands[0], mem));
DONE;
})
(define_insn "aarch64_ld1_x3_<mode>"
[(set (match_operand:CI 0 "register_operand" "=w")
(unspec:CI
[(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
(unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD1))]
"TARGET_SIMD"
"ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
[(set_attr "type" "neon_load1_3reg<q>")]
)
(define_expand "aarch64_st1x2<VALLDIF:mode>"
[(match_operand:DI 0 "register_operand" "")
(match_operand:OI 1 "register_operand" "")
(unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
"TARGET_SIMD"
{
rtx mem = gen_rtx_MEM (OImode, operands[0]);
emit_insn (gen_aarch64_st1_x2_<VALLDIF:mode> (mem, operands[1]));
DONE;
})
(define_insn "aarch64_st1_x2_<mode>"
[(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
(unspec:OI
[(match_operand:OI 1 "register_operand" "w")
(unspec:VALLDIF [(const_int 2)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
"TARGET_SIMD"
"st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
[(set_attr "type" "neon_store1_2reg<q>")]
)
(define_expand "aarch64_st1x3<VALLDIF:mode>"
[(match_operand:DI 0 "register_operand" "")
(match_operand:CI 1 "register_operand" "")
(unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
"TARGET_SIMD"
{
rtx mem = gen_rtx_MEM (CImode, operands[0]);
emit_insn (gen_aarch64_st1_x3_<VALLDIF:mode> (mem, operands[1]));
DONE;
})
(define_insn "aarch64_st1_x3_<mode>"
[(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
(unspec:CI
[(match_operand:CI 1 "register_operand" "w")
(unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
"TARGET_SIMD"
"st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
[(set_attr "type" "neon_store1_3reg<q>")]
)
(define_insn "*aarch64_mov<mode>"
[(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
(match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
......
2018-05-31 Sameera Deshpande <sameera.deshpande@linaro.org>
* gcc.target/aarch64/advsimd-intrinsics/vld1x3.c: New test for
vld1x3 intrinsics for aarch64.
* gcc.target/aarch64/advsimd-intrinsics/vst1x2.c: New test for
vst1x2 intrinsics for aarch64.
* gcc.target/aarch64/advsimd-intrinsics/vst1x3.c: New test for
vst1x3 intrinsics for aarch64.
2018-05-30 Jonathan Wakely <jwakely@redhat.com>
PR c++/77777
......
/* We haven't implemented these intrinsics for arm yet. */
/* { dg-xfail-if "" { arm*-*-* } } */
/* { dg-do run } */
/* { dg-options "-O3" } */
#include <arm_neon.h>
#include "arm-neon-ref.h"
extern void abort (void);
#define TESTMETH(BASE, ELTS, SUFFIX) \
int __attribute__ ((noinline)) \
test_vld##SUFFIX##_x3 () \
{ \
BASE##_t data[ELTS * 3]; \
BASE##_t temp[ELTS * 3]; \
BASE##x##ELTS##x##3##_t vectors; \
int i,j; \
for (i = 0; i < ELTS * 3; i++) \
data [i] = (BASE##_t) 3*i; \
asm volatile ("" : : : "memory"); \
vectors = vld1##SUFFIX##_x3 (data); \
vst1##SUFFIX (temp, vectors.val[0]); \
vst1##SUFFIX (&temp[ELTS], vectors.val[1]); \
vst1##SUFFIX (&temp[ELTS * 2], vectors.val[2]); \
asm volatile ("" : : : "memory"); \
for (j = 0; j < ELTS * 3; j++) \
if (temp[j] != data[j]) \
return 1; \
return 0; \
}
#define VARIANTS_1(VARIANT) \
VARIANT (uint8, 8, _u8) \
VARIANT (uint16, 4, _u16) \
VARIANT (uint32, 2, _u32) \
VARIANT (uint64, 1, _u64) \
VARIANT (int8, 8, _s8) \
VARIANT (int16, 4, _s16) \
VARIANT (int32, 2, _s32) \
VARIANT (int64, 1, _s64) \
VARIANT (poly8, 8, _p8) \
VARIANT (poly16, 4, _p16) \
VARIANT (float16, 4, _f16) \
VARIANT (float32, 2, _f32) \
VARIANT (uint8, 16, q_u8) \
VARIANT (uint16, 8, q_u16) \
VARIANT (uint32, 4, q_u32) \
VARIANT (uint64, 2, q_u64) \
VARIANT (int8, 16, q_s8) \
VARIANT (int16, 8, q_s16) \
VARIANT (int32, 4, q_s32) \
VARIANT (int64, 2, q_s64) \
VARIANT (poly8, 16, q_p8) \
VARIANT (poly16, 8, q_p16) \
VARIANT (float16, 8, q_f16) \
VARIANT (float32, 4, q_f32)
#ifdef __aarch64__
#define VARIANTS(VARIANT) VARIANTS_1(VARIANT) \
VARIANT (float64, 1, _f64) \
VARIANT (float64, 2, q_f64)
#else
#define VARIANTS(VARIANT) VARIANTS_1(VARIANT)
#endif
/* Tests of vld1_x3 and vld1q_x3. */
VARIANTS (TESTMETH)
#define CHECKS(BASE, ELTS, SUFFIX) \
if (test_vld##SUFFIX##_x3 () != 0) \
fprintf (stderr, "test_vld1##SUFFIX##_x3");
int
main (int argc, char **argv)
{
VARIANTS (CHECKS)
return 0;
}
/* We haven't implemented these intrinsics for arm yet. */
/* { dg-xfail-if "" { arm*-*-* } } */
/* { dg-do run } */
/* { dg-options "-O3" } */
#include <arm_neon.h>
#include "arm-neon-ref.h"
extern void abort (void);
#define TESTMETH(BASE, ELTS, SUFFIX) \
int __attribute__ ((noinline)) \
test_vst1##SUFFIX##_x2 () \
{ \
BASE##_t data[ELTS * 2]; \
BASE##_t temp[ELTS * 2]; \
BASE##x##ELTS##x##2##_t vectors; \
int i,j; \
for (i = 0; i < ELTS * 2; i++) \
data [i] = (BASE##_t) 2*i; \
asm volatile ("" : : : "memory"); \
vectors.val[0] = vld1##SUFFIX (data); \
vectors.val[1] = vld1##SUFFIX (&data[ELTS]); \
vst1##SUFFIX##_x2 (temp, vectors); \
asm volatile ("" : : : "memory"); \
for (j = 0; j < ELTS * 2; j++) \
if (temp[j] != data[j]) \
return 1; \
return 0; \
}
#define VARIANTS_1(VARIANT) \
VARIANT (uint8, 8, _u8) \
VARIANT (uint16, 4, _u16) \
VARIANT (uint32, 2, _u32) \
VARIANT (uint64, 1, _u64) \
VARIANT (int8, 8, _s8) \
VARIANT (int16, 4, _s16) \
VARIANT (int32, 2, _s32) \
VARIANT (int64, 1, _s64) \
VARIANT (poly8, 8, _p8) \
VARIANT (poly16, 4, _p16) \
VARIANT (float16, 4, _f16) \
VARIANT (float32, 2, _f32) \
VARIANT (uint8, 16, q_u8) \
VARIANT (uint16, 8, q_u16) \
VARIANT (uint32, 4, q_u32) \
VARIANT (uint64, 2, q_u64) \
VARIANT (int8, 16, q_s8) \
VARIANT (int16, 8, q_s16) \
VARIANT (int32, 4, q_s32) \
VARIANT (int64, 2, q_s64) \
VARIANT (poly8, 16, q_p8) \
VARIANT (poly16, 8, q_p16) \
VARIANT (float16, 8, q_f16) \
VARIANT (float32, 4, q_f32)
#ifdef __aarch64__
#define VARIANTS(VARIANT) VARIANTS_1(VARIANT) \
VARIANT (float64, 1, _f64) \
VARIANT (float64, 2, q_f64)
#else
#define VARIANTS(VARIANT) VARIANTS_1(VARIANT)
#endif
/* Tests of vst1_x2 and vst1q_x2. */
VARIANTS (TESTMETH)
#define CHECKS(BASE, ELTS, SUFFIX) \
if (test_vst1##SUFFIX##_x2 () != 0) \
fprintf (stderr, "test_vst1##SUFFIX##_x2");
int
main (int argc, char **argv)
{
VARIANTS (CHECKS)
return 0;
}
/* We haven't implemented these intrinsics for arm yet. */
/* { dg-xfail-if "" { arm*-*-* } } */
/* { dg-do run } */
/* { dg-options "-O3" } */
#include <arm_neon.h>
#include "arm-neon-ref.h"
extern void abort (void);
#define TESTMETH(BASE, ELTS, SUFFIX) \
int __attribute__ ((noinline)) \
test_vst1##SUFFIX##_x3 () \
{ \
BASE##_t data[ELTS * 3]; \
BASE##_t temp[ELTS * 3]; \
BASE##x##ELTS##x##3##_t vectors; \
int i,j; \
for (i = 0; i < ELTS * 3; i++) \
data [i] = (BASE##_t) 3*i; \
asm volatile ("" : : : "memory"); \
vectors.val[0] = vld1##SUFFIX (data); \
vectors.val[1] = vld1##SUFFIX (&data[ELTS]); \
vectors.val[2] = vld1##SUFFIX (&data[ELTS * 2]); \
vst1##SUFFIX##_x3 (temp, vectors); \
asm volatile ("" : : : "memory"); \
for (j = 0; j < ELTS * 3; j++) \
if (temp[j] != data[j]) \
return 1; \
return 0; \
}
#define VARIANTS_1(VARIANT) \
VARIANT (uint8, 8, _u8) \
VARIANT (uint16, 4, _u16) \
VARIANT (uint32, 2, _u32) \
VARIANT (uint64, 1, _u64) \
VARIANT (int8, 8, _s8) \
VARIANT (int16, 4, _s16) \
VARIANT (int32, 2, _s32) \
VARIANT (int64, 1, _s64) \
VARIANT (poly8, 8, _p8) \
VARIANT (poly16, 4, _p16) \
VARIANT (float16, 4, _f16) \
VARIANT (float32, 2, _f32) \
VARIANT (uint8, 16, q_u8) \
VARIANT (uint16, 8, q_u16) \
VARIANT (uint32, 4, q_u32) \
VARIANT (uint64, 2, q_u64) \
VARIANT (int8, 16, q_s8) \
VARIANT (int16, 8, q_s16) \
VARIANT (int32, 4, q_s32) \
VARIANT (int64, 2, q_s64) \
VARIANT (poly8, 16, q_p8) \
VARIANT (poly16, 8, q_p16) \
VARIANT (float16, 8, q_f16) \
VARIANT (float32, 4, q_f32)
#ifdef __aarch64__
#define VARIANTS(VARIANT) VARIANTS_1(VARIANT) \
VARIANT (float64, 1, _f64) \
VARIANT (float64, 2, q_f64)
#else
#define VARIANTS(VARIANT) VARIANTS_1(VARIANT)
#endif
/* Tests of vst1_x3 and vst1q_x3. */
VARIANTS (TESTMETH)
#define CHECKS(BASE, ELTS, SUFFIX) \
if (test_vst1##SUFFIX##_x3 () != 0) \
fprintf (stderr, "test_vst1##SUFFIX##_x3");
int
main (int argc, char **argv)
{
VARIANTS (CHECKS)
return 0;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment