Commit 1dfcc3b5 by Srinath Parvathaneni Committed by Kyrylo Tkachov

[ARM][GCC][11x]: MVE ACLE vector interleaving store and deinterleaving load…

[ARM][GCC][11x]: MVE ACLE vector interleaving store and deinterleaving load intrinsics and also aliases to vstr and vldr intrinsics.

This patch supports following MVE ACLE intrinsics which are aliases of vstr and
vldr intrinsics.

vst1q_p_u8, vst1q_p_s8, vld1q_z_u8, vld1q_z_s8, vst1q_p_u16, vst1q_p_s16,
vld1q_z_u16, vld1q_z_s16, vst1q_p_u32, vst1q_p_s32, vld1q_z_u32, vld1q_z_s32,
vld1q_z_f16, vst1q_p_f16, vld1q_z_f32, vst1q_p_f32.

This patch also supports following MVE ACLE vector deinterleaving loads and vector
interleaving stores.

vst2q_s8, vst2q_u8, vld2q_s8, vld2q_u8, vld4q_s8, vld4q_u8, vst2q_s16, vst2q_u16,
vld2q_s16, vld2q_u16, vld4q_s16, vld4q_u16, vst2q_s32, vst2q_u32, vld2q_s32,
vld2q_u32, vld4q_s32, vld4q_u32, vld4q_f16, vld2q_f16, vst2q_f16, vld4q_f32,
vld2q_f32, vst2q_f32.

Please refer to M-profile Vector Extension (MVE) intrinsics [1]  for more details.
[1] https://developer.arm.com/architectures/instruction-sets/simd-isas/helium/mve-intrinsics

2020-03-20  Srinath Parvathaneni  <srinath.parvathaneni@arm.com>
            Andre Vieira  <andre.simoesdiasvieira@arm.com>
            Mihail Ionescu  <mihail.ionescu@arm.com>

	* config/arm/arm_mve.h (vst1q_p_u8): Define macro.
	(vst1q_p_s8): Likewise.
	(vst2q_s8): Likewise.
	(vst2q_u8): Likewise.
	(vld1q_z_u8): Likewise.
	(vld1q_z_s8): Likewise.
	(vld2q_s8): Likewise.
	(vld2q_u8): Likewise.
	(vld4q_s8): Likewise.
	(vld4q_u8): Likewise.
	(vst1q_p_u16): Likewise.
	(vst1q_p_s16): Likewise.
	(vst2q_s16): Likewise.
	(vst2q_u16): Likewise.
	(vld1q_z_u16): Likewise.
	(vld1q_z_s16): Likewise.
	(vld2q_s16): Likewise.
	(vld2q_u16): Likewise.
	(vld4q_s16): Likewise.
	(vld4q_u16): Likewise.
	(vst1q_p_u32): Likewise.
	(vst1q_p_s32): Likewise.
	(vst2q_s32): Likewise.
	(vst2q_u32): Likewise.
	(vld1q_z_u32): Likewise.
	(vld1q_z_s32): Likewise.
	(vld2q_s32): Likewise.
	(vld2q_u32): Likewise.
	(vld4q_s32): Likewise.
	(vld4q_u32): Likewise.
	(vld4q_f16): Likewise.
	(vld2q_f16): Likewise.
	(vld1q_z_f16): Likewise.
	(vst2q_f16): Likewise.
	(vst1q_p_f16): Likewise.
	(vld4q_f32): Likewise.
	(vld2q_f32): Likewise.
	(vld1q_z_f32): Likewise.
	(vst2q_f32): Likewise.
	(vst1q_p_f32): Likewise.
	(__arm_vst1q_p_u8): Define intrinsic.
	(__arm_vst1q_p_s8): Likewise.
	(__arm_vst2q_s8): Likewise.
	(__arm_vst2q_u8): Likewise.
	(__arm_vld1q_z_u8): Likewise.
	(__arm_vld1q_z_s8): Likewise.
	(__arm_vld2q_s8): Likewise.
	(__arm_vld2q_u8): Likewise.
	(__arm_vld4q_s8): Likewise.
	(__arm_vld4q_u8): Likewise.
	(__arm_vst1q_p_u16): Likewise.
	(__arm_vst1q_p_s16): Likewise.
	(__arm_vst2q_s16): Likewise.
	(__arm_vst2q_u16): Likewise.
	(__arm_vld1q_z_u16): Likewise.
	(__arm_vld1q_z_s16): Likewise.
	(__arm_vld2q_s16): Likewise.
	(__arm_vld2q_u16): Likewise.
	(__arm_vld4q_s16): Likewise.
	(__arm_vld4q_u16): Likewise.
	(__arm_vst1q_p_u32): Likewise.
	(__arm_vst1q_p_s32): Likewise.
	(__arm_vst2q_s32): Likewise.
	(__arm_vst2q_u32): Likewise.
	(__arm_vld1q_z_u32): Likewise.
	(__arm_vld1q_z_s32): Likewise.
	(__arm_vld2q_s32): Likewise.
	(__arm_vld2q_u32): Likewise.
	(__arm_vld4q_s32): Likewise.
	(__arm_vld4q_u32): Likewise.
	(__arm_vld4q_f16): Likewise.
	(__arm_vld2q_f16): Likewise.
	(__arm_vld1q_z_f16): Likewise.
	(__arm_vst2q_f16): Likewise.
	(__arm_vst1q_p_f16): Likewise.
	(__arm_vld4q_f32): Likewise.
	(__arm_vld2q_f32): Likewise.
	(__arm_vld1q_z_f32): Likewise.
	(__arm_vst2q_f32): Likewise.
	(__arm_vst1q_p_f32): Likewise.
	(vld1q_z): Define polymorphic variant.
	(vld2q): Likewise.
	(vld4q): Likewise.
	(vst1q_p): Likewise.
	(vst2q): Likewise.
	* config/arm/arm_mve_builtins.def (STORE1): Use builtin qualifier.
	(LOAD1): Likewise.
	* config/arm/mve.md (mve_vst2q<mode>): Define RTL pattern.
	(mve_vld2q<mode>): Likewise.
	(mve_vld4q<mode>): Likewise.

gcc/testsuite/ChangeLog:

2020-03-20  Srinath Parvathaneni  <srinath.parvathaneni@arm.com>
            Andre Vieira  <andre.simoesdiasvieira@arm.com>
            Mihail Ionescu  <mihail.ionescu@arm.com>

	* gcc.target/arm/mve/intrinsics/vld1q_z_f16.c: New test.
	* gcc.target/arm/mve/intrinsics/vld1q_z_f32.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vld1q_z_s16.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vld1q_z_s32.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vld1q_z_s8.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vld1q_z_u16.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vld1q_z_u32.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vld1q_z_u8.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vld2q_f16.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vld2q_f32.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vld2q_s16.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vld2q_s32.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vld2q_s8.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vld2q_u16.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vld2q_u32.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vld2q_u8.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vld4q_f16.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vld4q_f32.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vld4q_s16.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vld4q_s32.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vld4q_s8.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vld4q_u16.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vld4q_u32.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vld4q_u8.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vst1q_p_f16.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vst1q_p_f32.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vst1q_p_s16.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vst1q_p_s32.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vst1q_p_s8.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vst1q_p_u16.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vst1q_p_u32.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vst1q_p_u8.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vst2q_f16.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vst2q_f32.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vst2q_s16.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vst2q_s32.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vst2q_s8.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vst2q_u16.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vst2q_u32.c: Likewise.
	* gcc.target/arm/mve/intrinsics/vst2q_u8.c: Likewise.
parent b5446d0c
......@@ -2,6 +2,101 @@
Andre Vieira <andre.simoesdiasvieira@arm.com>
Mihail Ionescu <mihail.ionescu@arm.com>
* config/arm/arm_mve.h (vst1q_p_u8): Define macro.
(vst1q_p_s8): Likewise.
(vst2q_s8): Likewise.
(vst2q_u8): Likewise.
(vld1q_z_u8): Likewise.
(vld1q_z_s8): Likewise.
(vld2q_s8): Likewise.
(vld2q_u8): Likewise.
(vld4q_s8): Likewise.
(vld4q_u8): Likewise.
(vst1q_p_u16): Likewise.
(vst1q_p_s16): Likewise.
(vst2q_s16): Likewise.
(vst2q_u16): Likewise.
(vld1q_z_u16): Likewise.
(vld1q_z_s16): Likewise.
(vld2q_s16): Likewise.
(vld2q_u16): Likewise.
(vld4q_s16): Likewise.
(vld4q_u16): Likewise.
(vst1q_p_u32): Likewise.
(vst1q_p_s32): Likewise.
(vst2q_s32): Likewise.
(vst2q_u32): Likewise.
(vld1q_z_u32): Likewise.
(vld1q_z_s32): Likewise.
(vld2q_s32): Likewise.
(vld2q_u32): Likewise.
(vld4q_s32): Likewise.
(vld4q_u32): Likewise.
(vld4q_f16): Likewise.
(vld2q_f16): Likewise.
(vld1q_z_f16): Likewise.
(vst2q_f16): Likewise.
(vst1q_p_f16): Likewise.
(vld4q_f32): Likewise.
(vld2q_f32): Likewise.
(vld1q_z_f32): Likewise.
(vst2q_f32): Likewise.
(vst1q_p_f32): Likewise.
(__arm_vst1q_p_u8): Define intrinsic.
(__arm_vst1q_p_s8): Likewise.
(__arm_vst2q_s8): Likewise.
(__arm_vst2q_u8): Likewise.
(__arm_vld1q_z_u8): Likewise.
(__arm_vld1q_z_s8): Likewise.
(__arm_vld2q_s8): Likewise.
(__arm_vld2q_u8): Likewise.
(__arm_vld4q_s8): Likewise.
(__arm_vld4q_u8): Likewise.
(__arm_vst1q_p_u16): Likewise.
(__arm_vst1q_p_s16): Likewise.
(__arm_vst2q_s16): Likewise.
(__arm_vst2q_u16): Likewise.
(__arm_vld1q_z_u16): Likewise.
(__arm_vld1q_z_s16): Likewise.
(__arm_vld2q_s16): Likewise.
(__arm_vld2q_u16): Likewise.
(__arm_vld4q_s16): Likewise.
(__arm_vld4q_u16): Likewise.
(__arm_vst1q_p_u32): Likewise.
(__arm_vst1q_p_s32): Likewise.
(__arm_vst2q_s32): Likewise.
(__arm_vst2q_u32): Likewise.
(__arm_vld1q_z_u32): Likewise.
(__arm_vld1q_z_s32): Likewise.
(__arm_vld2q_s32): Likewise.
(__arm_vld2q_u32): Likewise.
(__arm_vld4q_s32): Likewise.
(__arm_vld4q_u32): Likewise.
(__arm_vld4q_f16): Likewise.
(__arm_vld2q_f16): Likewise.
(__arm_vld1q_z_f16): Likewise.
(__arm_vst2q_f16): Likewise.
(__arm_vst1q_p_f16): Likewise.
(__arm_vld4q_f32): Likewise.
(__arm_vld2q_f32): Likewise.
(__arm_vld1q_z_f32): Likewise.
(__arm_vst2q_f32): Likewise.
(__arm_vst1q_p_f32): Likewise.
(vld1q_z): Define polymorphic variant.
(vld2q): Likewise.
(vld4q): Likewise.
(vst1q_p): Likewise.
(vst2q): Likewise.
* config/arm/arm_mve_builtins.def (STORE1): Use builtin qualifier.
(LOAD1): Likewise.
* config/arm/mve.md (mve_vst2q<mode>): Define RTL pattern.
(mve_vld2q<mode>): Likewise.
(mve_vld4q<mode>): Likewise.
2020-03-20 Srinath Parvathaneni <srinath.parvathaneni@arm.com>
Andre Vieira <andre.simoesdiasvieira@arm.com>
Mihail Ionescu <mihail.ionescu@arm.com>
* config/arm/arm-builtins.c (ARM_BUILTIN_GET_FPSCR_NZCVQC): Define.
(ARM_BUILTIN_SET_FPSCR_NZCVQC): Likewise.
(arm_init_mve_builtins): Add "__builtin_arm_get_fpscr_nzcvqc" and
......
......@@ -873,3 +873,6 @@ VAR1 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vsbciq_m_s, v4si)
VAR1 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vsbciq_m_u, v4si)
VAR1 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vsbcq_m_s, v4si)
VAR1 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vsbcq_m_u, v4si)
VAR5 (STORE1, vst2q, v16qi, v8hi, v4si, v8hf, v4sf)
VAR5 (LOAD1, vld4q, v16qi, v8hi, v4si, v8hf, v4sf)
VAR5 (LOAD1, vld2q, v16qi, v8hi, v4si, v8hf, v4sf)
......@@ -214,7 +214,7 @@
VLDRDQGBWB_S VLDRDQGBWB_U VADCQ_U VADCQ_M_U VADCQ_S
VADCQ_M_S VSBCIQ_U VSBCIQ_S VSBCIQ_M_U VSBCIQ_M_S
VSBCQ_U VSBCQ_S VSBCQ_M_U VSBCQ_M_S VADCIQ_U VADCIQ_M_U
VADCIQ_S VADCIQ_M_S])
VADCIQ_S VADCIQ_M_S VLD2Q VLD4Q VST2Q])
(define_mode_attr MVE_CNVT [(V8HI "V8HF") (V4SI "V4SF") (V8HF "V8HI")
(V4SF "V4SI")])
......@@ -10797,3 +10797,91 @@
"vsbc.i32\t%q0, %q1, %q2"
[(set_attr "type" "mve_move")
(set_attr "length" "4")])
;;
;; [vst2q])
;;
(define_insn "mve_vst2q<mode>"
[(set (match_operand:OI 0 "neon_struct_operand" "=Um")
(unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
(unspec:MVE_VLD_ST [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
VST2Q))
]
"(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
|| (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))"
{
rtx ops[4];
int regno = REGNO (operands[1]);
ops[0] = gen_rtx_REG (TImode, regno);
ops[1] = gen_rtx_REG (TImode, regno + 4);
rtx reg = operands[0];
while (reg && !REG_P (reg))
reg = XEXP (reg, 0);
gcc_assert (REG_P (reg));
ops[2] = reg;
ops[3] = operands[0];
output_asm_insn ("vst20.<V_sz_elem>\t{%q0, %q1}, [%2]\n\t"
"vst21.<V_sz_elem>\t{%q0, %q1}, %3", ops);
return "";
}
[(set_attr "length" "8")])
;;
;; [vld2q])
;;
(define_insn "mve_vld2q<mode>"
[(set (match_operand:OI 0 "s_register_operand" "=w")
(unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
(unspec:MVE_VLD_ST [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
VLD2Q))
]
"(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
|| (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))"
{
rtx ops[4];
int regno = REGNO (operands[0]);
ops[0] = gen_rtx_REG (TImode, regno);
ops[1] = gen_rtx_REG (TImode, regno + 4);
rtx reg = operands[1];
while (reg && !REG_P (reg))
reg = XEXP (reg, 0);
gcc_assert (REG_P (reg));
ops[2] = reg;
ops[3] = operands[1];
output_asm_insn ("vld20.<V_sz_elem>\t{%q0, %q1}, [%2]\n\t"
"vld21.<V_sz_elem>\t{%q0, %q1}, %3", ops);
return "";
}
[(set_attr "length" "8")])
;;
;; [vld4q])
;;
(define_insn "mve_vld4q<mode>"
[(set (match_operand:XI 0 "s_register_operand" "=w")
(unspec:XI [(match_operand:XI 1 "neon_struct_operand" "Um")
(unspec:MVE_VLD_ST [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
VLD4Q))
]
"(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
|| (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))"
{
rtx ops[6];
int regno = REGNO (operands[0]);
ops[0] = gen_rtx_REG (TImode, regno);
ops[1] = gen_rtx_REG (TImode, regno+4);
ops[2] = gen_rtx_REG (TImode, regno+8);
ops[3] = gen_rtx_REG (TImode, regno + 12);
rtx reg = operands[1];
while (reg && !REG_P (reg))
reg = XEXP (reg, 0);
gcc_assert (REG_P (reg));
ops[4] = reg;
ops[5] = operands[1];
output_asm_insn ("vld40.<V_sz_elem>\t{%q0, %q1, %q2, %q3}, [%4]\n\t"
"vld41.<V_sz_elem>\t{%q0, %q1, %q2, %q3}, [%4]\n\t"
"vld42.<V_sz_elem>\t{%q0, %q1, %q2, %q3}, [%4]\n\t"
"vld43.<V_sz_elem>\t{%q0, %q1, %q2, %q3}, %5", ops);
return "";
}
[(set_attr "length" "16")])
2020-03-20 Srinath Parvathaneni <srinath.parvathaneni@arm.com>
Andre Vieira <andre.simoesdiasvieira@arm.com>
Mihail Ionescu <mihail.ionescu@arm.com>
* gcc.target/arm/mve/intrinsics/vld1q_z_f16.c: New test.
* gcc.target/arm/mve/intrinsics/vld1q_z_f32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vld1q_z_s16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vld1q_z_s32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vld1q_z_s8.c: Likewise.
* gcc.target/arm/mve/intrinsics/vld1q_z_u16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vld1q_z_u32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vld1q_z_u8.c: Likewise.
* gcc.target/arm/mve/intrinsics/vld2q_f16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vld2q_f32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vld2q_s16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vld2q_s32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vld2q_s8.c: Likewise.
* gcc.target/arm/mve/intrinsics/vld2q_u16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vld2q_u32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vld2q_u8.c: Likewise.
* gcc.target/arm/mve/intrinsics/vld4q_f16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vld4q_f32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vld4q_s16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vld4q_s32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vld4q_s8.c: Likewise.
* gcc.target/arm/mve/intrinsics/vld4q_u16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vld4q_u32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vld4q_u8.c: Likewise.
* gcc.target/arm/mve/intrinsics/vst1q_p_f16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vst1q_p_f32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vst1q_p_s16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vst1q_p_s32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vst1q_p_s8.c: Likewise.
* gcc.target/arm/mve/intrinsics/vst1q_p_u16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vst1q_p_u32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vst1q_p_u8.c: Likewise.
* gcc.target/arm/mve/intrinsics/vst2q_f16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vst2q_f32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vst2q_s16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vst2q_s32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vst2q_s8.c: Likewise.
* gcc.target/arm/mve/intrinsics/vst2q_u16.c: Likewise.
* gcc.target/arm/mve/intrinsics/vst2q_u32.c: Likewise.
* gcc.target/arm/mve/intrinsics/vst2q_u8.c: Likewise.
2020-03-20 Richard Sandiford <richard.sandiford@arm.com>
PR middle-end/94072
......
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
/* { dg-add-options arm_v8_1m_mve_fp } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
float16x8_t
foo (float16_t const * base, mve_pred16_t p)
{
return vld1q_z_f16 (base, p);
}
/* { dg-final { scan-assembler "vldrht.f16" } } */
float16x8_t
foo1 (float16_t const * base, mve_pred16_t p)
{
return vld1q_z (base, p);
}
/* { dg-final { scan-assembler "vldrht.f16" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
/* { dg-add-options arm_v8_1m_mve_fp } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
float32x4_t
foo (float32_t const * base, mve_pred16_t p)
{
return vld1q_z_f32 (base, p);
}
/* { dg-final { scan-assembler "vldrwt.f32" } } */
float32x4_t
foo1 (float32_t const * base, mve_pred16_t p)
{
return vld1q_z (base, p);
}
/* { dg-final { scan-assembler "vldrwt.f32" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
/* { dg-add-options arm_v8_1m_mve } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
int16x8_t
foo (int16_t const * base, mve_pred16_t p)
{
return vld1q_z_s16 (base, p);
}
/* { dg-final { scan-assembler "vldrht.s16" } } */
int16x8_t
foo1 (int16_t const * base, mve_pred16_t p)
{
return vld1q_z (base, p);
}
/* { dg-final { scan-assembler "vldrht.s16" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
/* { dg-add-options arm_v8_1m_mve } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
int32x4_t
foo (int32_t const * base, mve_pred16_t p)
{
return vld1q_z_s32 (base, p);
}
/* { dg-final { scan-assembler "vldrwt.s32" } } */
int32x4_t
foo1 (int32_t const * base, mve_pred16_t p)
{
return vld1q_z (base, p);
}
/* { dg-final { scan-assembler "vldrwt.s32" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
/* { dg-add-options arm_v8_1m_mve } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
int8x16_t
foo (int8_t const * base, mve_pred16_t p)
{
return vld1q_z_s8 (base, p);
}
/* { dg-final { scan-assembler "vldrbt.s8" } } */
int8x16_t
foo1 (int8_t const * base, mve_pred16_t p)
{
return vld1q_z (base, p);
}
/* { dg-final { scan-assembler "vldrbt.s8" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
/* { dg-add-options arm_v8_1m_mve } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
uint16x8_t
foo (uint16_t const * base, mve_pred16_t p)
{
return vld1q_z_u16 (base, p);
}
/* { dg-final { scan-assembler "vldrht.u16" } } */
uint16x8_t
foo1 (uint16_t const * base, mve_pred16_t p)
{
return vld1q_z (base, p);
}
/* { dg-final { scan-assembler "vldrht.u16" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
/* { dg-add-options arm_v8_1m_mve } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
uint32x4_t
foo (uint32_t const * base, mve_pred16_t p)
{
return vld1q_z_u32 (base, p);
}
/* { dg-final { scan-assembler "vldrwt.u32" } } */
uint32x4_t
foo1 (uint32_t const * base, mve_pred16_t p)
{
return vld1q_z (base, p);
}
/* { dg-final { scan-assembler "vldrwt.u32" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
/* { dg-add-options arm_v8_1m_mve } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
uint8x16_t
foo (uint8_t const * base, mve_pred16_t p)
{
return vld1q_z_u8 (base, p);
}
/* { dg-final { scan-assembler "vldrbt.u8" } } */
uint8x16_t
foo1 (uint8_t const * base, mve_pred16_t p)
{
return vld1q_z (base, p);
}
/* { dg-final { scan-assembler "vldrbt.u8" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
/* { dg-add-options arm_v8_1m_mve_fp } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
float16x8x2_t
foo (float16_t const * addr)
{
return vld2q_f16 (addr);
}
/* { dg-final { scan-assembler "vld20.16" } } */
/* { dg-final { scan-assembler "vld21.16" } } */
float16x8x2_t
foo1 (float16_t const * addr)
{
return vld2q (addr);
}
/* { dg-final { scan-assembler "vld20.16" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
/* { dg-add-options arm_v8_1m_mve_fp } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
float32x4x2_t
foo (float32_t const * addr)
{
return vld2q_f32 (addr);
}
/* { dg-final { scan-assembler "vld20.32" } } */
/* { dg-final { scan-assembler "vld21.32" } } */
float32x4x2_t
foo1 (float32_t const * addr)
{
return vld2q (addr);
}
/* { dg-final { scan-assembler "vld20.32" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
/* { dg-add-options arm_v8_1m_mve } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
int16x8x2_t
foo (int16_t const * addr)
{
return vld2q_s16 (addr);
}
/* { dg-final { scan-assembler "vld20.16" } } */
/* { dg-final { scan-assembler "vld21.16" } } */
int16x8x2_t
foo1 (int16_t const * addr)
{
return vld2q (addr);
}
/* { dg-final { scan-assembler "vld20.16" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
/* { dg-add-options arm_v8_1m_mve } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
int32x4x2_t
foo (int32_t const * addr)
{
return vld2q_s32 (addr);
}
/* { dg-final { scan-assembler "vld20.32" } } */
/* { dg-final { scan-assembler "vld21.32" } } */
int32x4x2_t
foo1 (int32_t const * addr)
{
return vld2q (addr);
}
/* { dg-final { scan-assembler "vld20.32" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
/* { dg-add-options arm_v8_1m_mve } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
int8x16x2_t
foo (int8_t const * addr)
{
return vld2q_s8 (addr);
}
/* { dg-final { scan-assembler "vld20.8" } } */
/* { dg-final { scan-assembler "vld21.8" } } */
int8x16x2_t
foo1 (int8_t const * addr)
{
return vld2q (addr);
}
/* { dg-final { scan-assembler "vld20.8" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
/* { dg-add-options arm_v8_1m_mve } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
uint16x8x2_t
foo (uint16_t const * addr)
{
return vld2q_u16 (addr);
}
/* { dg-final { scan-assembler "vld20.16" } } */
/* { dg-final { scan-assembler "vld21.16" } } */
uint16x8x2_t
foo1 (uint16_t const * addr)
{
return vld2q (addr);
}
/* { dg-final { scan-assembler "vld20.16" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
/* { dg-add-options arm_v8_1m_mve } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
uint32x4x2_t
foo (uint32_t const * addr)
{
return vld2q_u32 (addr);
}
/* { dg-final { scan-assembler "vld20.32" } } */
/* { dg-final { scan-assembler "vld21.32" } } */
uint32x4x2_t
foo1 (uint32_t const * addr)
{
return vld2q (addr);
}
/* { dg-final { scan-assembler "vld20.32" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
/* { dg-add-options arm_v8_1m_mve } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
uint8x16x2_t
foo (uint8_t const * addr)
{
return vld2q_u8 (addr);
}
/* { dg-final { scan-assembler "vld20.8" } } */
/* { dg-final { scan-assembler "vld21.8" } } */
uint8x16x2_t
foo1 (uint8_t const * addr)
{
return vld2q (addr);
}
/* { dg-final { scan-assembler "vld20.8" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
/* { dg-add-options arm_v8_1m_mve_fp } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
float16x8x4_t
foo (float16_t const * addr)
{
return vld4q_f16 (addr);
}
/* { dg-final { scan-assembler "vld40.16" } } */
/* { dg-final { scan-assembler "vld41.16" } } */
/* { dg-final { scan-assembler "vld42.16" } } */
/* { dg-final { scan-assembler "vld43.16" } } */
float16x8x4_t
foo1 (float16_t const * addr)
{
return vld4q (addr);
}
/* { dg-final { scan-assembler "vld40.16" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
/* { dg-add-options arm_v8_1m_mve_fp } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
float32x4x4_t
foo (float32_t const * addr)
{
return vld4q_f32 (addr);
}
/* { dg-final { scan-assembler "vld40.32" } } */
/* { dg-final { scan-assembler "vld41.32" } } */
/* { dg-final { scan-assembler "vld42.32" } } */
/* { dg-final { scan-assembler "vld43.32" } } */
float32x4x4_t
foo1 (float32_t const * addr)
{
return vld4q (addr);
}
/* { dg-final { scan-assembler "vld40.32" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
/* { dg-add-options arm_v8_1m_mve } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
int16x8x4_t
foo (int16_t const * addr)
{
return vld4q_s16 (addr);
}
/* { dg-final { scan-assembler "vld40.16" } } */
/* { dg-final { scan-assembler "vld41.16" } } */
/* { dg-final { scan-assembler "vld42.16" } } */
/* { dg-final { scan-assembler "vld43.16" } } */
int16x8x4_t
foo1 (int16_t const * addr)
{
return vld4q (addr);
}
/* { dg-final { scan-assembler "vld40.16" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
/* { dg-add-options arm_v8_1m_mve } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
int32x4x4_t
foo (int32_t const * addr)
{
return vld4q_s32 (addr);
}
/* { dg-final { scan-assembler "vld40.32" } } */
/* { dg-final { scan-assembler "vld41.32" } } */
/* { dg-final { scan-assembler "vld42.32" } } */
/* { dg-final { scan-assembler "vld43.32" } } */
int32x4x4_t
foo1 (int32_t const * addr)
{
return vld4q (addr);
}
/* { dg-final { scan-assembler "vld40.32" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
/* { dg-add-options arm_v8_1m_mve } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
int8x16x4_t
foo (int8_t const * addr)
{
return vld4q_s8 (addr);
}
/* { dg-final { scan-assembler "vld40.8" } } */
/* { dg-final { scan-assembler "vld41.8" } } */
/* { dg-final { scan-assembler "vld42.8" } } */
/* { dg-final { scan-assembler "vld43.8" } } */
int8x16x4_t
foo1 (int8_t const * addr)
{
return vld4q (addr);
}
/* { dg-final { scan-assembler "vld40.8" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
/* { dg-add-options arm_v8_1m_mve } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
uint16x8x4_t
foo (uint16_t const * addr)
{
return vld4q_u16 (addr);
}
/* { dg-final { scan-assembler "vld40.16" } } */
/* { dg-final { scan-assembler "vld41.16" } } */
/* { dg-final { scan-assembler "vld42.16" } } */
/* { dg-final { scan-assembler "vld43.16" } } */
uint16x8x4_t
foo1 (uint16_t const * addr)
{
return vld4q (addr);
}
/* { dg-final { scan-assembler "vld40.16" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
/* { dg-add-options arm_v8_1m_mve } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
uint32x4x4_t
foo (uint32_t const * addr)
{
return vld4q_u32 (addr);
}
/* { dg-final { scan-assembler "vld40.32" } } */
/* { dg-final { scan-assembler "vld41.32" } } */
/* { dg-final { scan-assembler "vld42.32" } } */
/* { dg-final { scan-assembler "vld43.32" } } */
uint32x4x4_t
foo1 (uint32_t const * addr)
{
return vld4q (addr);
}
/* { dg-final { scan-assembler "vld40.32" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
/* { dg-add-options arm_v8_1m_mve } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
uint8x16x4_t
foo (uint8_t const * addr)
{
return vld4q_u8 (addr);
}
/* { dg-final { scan-assembler "vld40.8" } } */
/* { dg-final { scan-assembler "vld41.8" } } */
/* { dg-final { scan-assembler "vld42.8" } } */
/* { dg-final { scan-assembler "vld43.8" } } */
uint8x16x4_t
foo1 (uint8_t const * addr)
{
return vld4q (addr);
}
/* { dg-final { scan-assembler "vld40.8" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
/* { dg-add-options arm_v8_1m_mve_fp } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
void
foo (float16_t * addr, float16x8_t value, mve_pred16_t p)
{
vst1q_p_f16 (addr, value, p);
}
/* { dg-final { scan-assembler "vstrht.16" } } */
void
foo1 (float16_t * addr, float16x8_t value, mve_pred16_t p)
{
vst1q_p (addr, value, p);
}
/* { dg-final { scan-assembler "vstrht.16" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
/* { dg-add-options arm_v8_1m_mve_fp } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
void
foo (float32_t * addr, float32x4_t value, mve_pred16_t p)
{
vst1q_p_f32 (addr, value, p);
}
/* { dg-final { scan-assembler "vstrwt.32" } } */
void
foo1 (float32_t * addr, float32x4_t value, mve_pred16_t p)
{
vst1q_p (addr, value, p);
}
/* { dg-final { scan-assembler "vstrwt.32" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
/* { dg-add-options arm_v8_1m_mve } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
void
foo (int16_t * addr, int16x8_t value, mve_pred16_t p)
{
vst1q_p_s16 (addr, value, p);
}
/* { dg-final { scan-assembler "vstrht.16" } } */
void
foo1 (int16_t * addr, int16x8_t value, mve_pred16_t p)
{
vst1q_p (addr, value, p);
}
/* { dg-final { scan-assembler "vstrht.16" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
/* { dg-add-options arm_v8_1m_mve } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
void
foo (int32_t * addr, int32x4_t value, mve_pred16_t p)
{
vst1q_p_s32 (addr, value, p);
}
/* { dg-final { scan-assembler "vstrwt.32" } } */
void
foo1 (int32_t * addr, int32x4_t value, mve_pred16_t p)
{
vst1q_p (addr, value, p);
}
/* { dg-final { scan-assembler "vstrwt.32" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
/* { dg-add-options arm_v8_1m_mve } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
void
foo (int8_t * addr, int8x16_t value, mve_pred16_t p)
{
vst1q_p_s8 (addr, value, p);
}
/* { dg-final { scan-assembler "vstrbt.8" } } */
void
foo1 (int8_t * addr, int8x16_t value, mve_pred16_t p)
{
vst1q_p (addr, value, p);
}
/* { dg-final { scan-assembler "vstrbt.8" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
/* { dg-add-options arm_v8_1m_mve } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
void
foo (uint16_t * addr, uint16x8_t value, mve_pred16_t p)
{
vst1q_p_u16 (addr, value, p);
}
/* { dg-final { scan-assembler "vstrht.16" } } */
void
foo1 (uint16_t * addr, uint16x8_t value, mve_pred16_t p)
{
vst1q_p (addr, value, p);
}
/* { dg-final { scan-assembler "vstrht.16" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
/* { dg-add-options arm_v8_1m_mve } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
void
foo (uint32_t * addr, uint32x4_t value, mve_pred16_t p)
{
vst1q_p_u32 (addr, value, p);
}
/* { dg-final { scan-assembler "vstrwt.32" } } */
void
foo1 (uint32_t * addr, uint32x4_t value, mve_pred16_t p)
{
vst1q_p (addr, value, p);
}
/* { dg-final { scan-assembler "vstrwt.32" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
/* { dg-add-options arm_v8_1m_mve } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
void
foo (uint8_t * addr, uint8x16_t value, mve_pred16_t p)
{
vst1q_p_u8 (addr, value, p);
}
/* { dg-final { scan-assembler "vstrbt.8" } } */
void
foo1 (uint8_t * addr, uint8x16_t value, mve_pred16_t p)
{
vst1q_p (addr, value, p);
}
/* { dg-final { scan-assembler "vstrbt.8" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
/* { dg-add-options arm_v8_1m_mve_fp } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
void
foo (float16_t * addr, float16x8x2_t value)
{
vst2q_f16 (addr, value);
}
/* { dg-final { scan-assembler "vst20.16" } } */
/* { dg-final { scan-assembler "vst21.16" } } */
void
foo1 (float16_t * addr, float16x8x2_t value)
{
vst2q (addr, value);
}
/* { dg-final { scan-assembler "vst20.16" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
/* { dg-add-options arm_v8_1m_mve_fp } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
void
foo (float32_t * addr, float32x4x2_t value)
{
vst2q_f32 (addr, value);
}
/* { dg-final { scan-assembler "vst20.32" } } */
/* { dg-final { scan-assembler "vst21.32" } } */
void
foo1 (float32_t * addr, float32x4x2_t value)
{
vst2q (addr, value);
}
/* { dg-final { scan-assembler "vst20.32" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
/* { dg-add-options arm_v8_1m_mve } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
void
foo (int16_t * addr, int16x8x2_t value)
{
vst2q_s16 (addr, value);
}
/* { dg-final { scan-assembler "vst20.16" } } */
/* { dg-final { scan-assembler "vst21.16" } } */
void
foo1 (int16_t * addr, int16x8x2_t value)
{
vst2q (addr, value);
}
/* { dg-final { scan-assembler "vst20.16" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
/* { dg-add-options arm_v8_1m_mve } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
void
foo (int32_t * addr, int32x4x2_t value)
{
vst2q_s32 (addr, value);
}
/* { dg-final { scan-assembler "vst20.32" } } */
/* { dg-final { scan-assembler "vst21.32" } } */
void
foo1 (int32_t * addr, int32x4x2_t value)
{
vst2q (addr, value);
}
/* { dg-final { scan-assembler "vst20.32" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
/* { dg-add-options arm_v8_1m_mve } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
void
foo (int8_t * addr, int8x16x2_t value)
{
vst2q_s8 (addr, value);
}
/* { dg-final { scan-assembler "vst20.8" } } */
/* { dg-final { scan-assembler "vst21.8" } } */
void
foo1 (int8_t * addr, int8x16x2_t value)
{
vst2q (addr, value);
}
/* { dg-final { scan-assembler "vst20.8" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
/* { dg-add-options arm_v8_1m_mve } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
void
foo (uint16_t * addr, uint16x8x2_t value)
{
vst2q_u16 (addr, value);
}
/* { dg-final { scan-assembler "vst20.16" } } */
/* { dg-final { scan-assembler "vst21.16" } } */
void
foo1 (uint16_t * addr, uint16x8x2_t value)
{
vst2q (addr, value);
}
/* { dg-final { scan-assembler "vst20.16" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
/* { dg-add-options arm_v8_1m_mve } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
void
foo (uint32_t * addr, uint32x4x2_t value)
{
vst2q_u32 (addr, value);
}
/* { dg-final { scan-assembler "vst20.32" } } */
/* { dg-final { scan-assembler "vst21.32" } } */
void
foo1 (uint32_t * addr, uint32x4x2_t value)
{
vst2q (addr, value);
}
/* { dg-final { scan-assembler "vst20.32" } } */
/* { dg-do compile } */
/* { dg-require-effective-target arm_v8_1m_mve_ok } */
/* { dg-add-options arm_v8_1m_mve } */
/* { dg-additional-options "-O2" } */
#include "arm_mve.h"
void
foo (uint8_t * addr, uint8x16x2_t value)
{
vst2q_u8 (addr, value);
}
/* { dg-final { scan-assembler "vst20.8" } } */
/* { dg-final { scan-assembler "vst21.8" } } */
void
foo1 (uint8_t * addr, uint8x16x2_t value)
{
vst2q (addr, value);
}
/* { dg-final { scan-assembler "vst20.8" } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment