Commit 2f46a9cd by Christophe Lyon

Add new files missing from previous commit.

From-SVN: r216641
parent 16ad00b0
This directory contains executable tests for ARM/AArch64 Advanced SIMD
(Neon) intrinsics.
It is meant to cover execution cases of all the Advanced SIMD
intrinsics, but does not scan the generated assembler code.
The general framework is composed as follows:
- advsimd-intrinsics.exp: main dejagnu driver
- *.c: actual tests, generally one per intrinsinc family
- arm-neon-ref.h: contains macro definitions to save typing in actual
test files
- compute-ref-data.h: contains input vectors definitions
- *.inc: generic tests, shared by several families of intrinsics. For
instance, unary or binary operators
A typical .c test file starts with the following contents (look at
vld1.c and vaba.c for sample cases):
#include <arm_neon.h>
#include "arm-neon-ref.h"
#include "compute-ref-data.h"
Then, definitions of expected results, based on common input values,
as defined in compute-ref-data.h.
For example:
VECT_VAR_DECL(expected,int,16,4) [] = { 0x16, 0x17, 0x18, 0x19 };
defines the expected results of an operator generating int16x4 values.
The common input values defined in compute-ref-data.h have been chosen
to avoid corner-case values for most operators, yet exposing negative
values for signed operators. For this reason, their range is also
limited. For instance, the initialization of buffer_int16x4 will be
{ -16, -15, -14, -13 }.
The initialization of floating-point values is done via hex notation,
to avoid potential rounding problems.
To test special values and corner cases, specific initialization
values should be used in dedicated tests, to ensure proper coverage.
An example of this is vshl.
When a variant of an intrinsic is not available, its expected result
should be defined to the value of CLEAN_PATTERN_8 as defined in
arm-neon-ref.h. For example:
VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
if the given intrinsic has no variant producing an int64x1 result,
like the vcmp family (eg. vclt).
This is because the helper function (check_results(), defined in
arm-neon-ref.h), iterates over all the possible variants, to save
typing in each individual test file. Alternatively, one can directly
call the CHECK/CHECK_FP macros to check only a few expected results
(see vabs.c for an example).
Then, define the TEST_MSG string, which will be used when reporting errors.
Next, define the function performing the actual tests, in general
relying on the helpers provided by arm-neon-ref.h, which means:
* declare necessary vectors of suitable types: using
DECL_VARIABLE_ALL_VARIANTS when all variants are supported, or the
relevant of subset calls to DECL_VARIABLE.
* call clean_results() to initialize the 'results' buffers.
* initialize the input vectors, using VLOAD, VDUP or VSET_LANE (vld*
tests do not need this step, since their actual purpose is to
initialize vectors).
* execute the intrinsic on relevant variants, for instance using
TEST_MACRO_ALL_VARIANTS_2_5.
* call check_results() to check that the results match the expected
values.
A template test file could be:
=================================================================
#include <arm_neon.h>
#include "arm-neon-ref.h"
#include "compute-ref-data.h"
/* Expected results. */
VECT_VAR_DECL(expected,int,8,8) [] = { 0xf6, 0xf7, 0xf8, 0xf9,
0xfa, 0xfb, 0xfc, 0xfd };
/* and as many others as necessary. */
#define TEST_MSG "VMYINTRINSIC"
void exec_myintrinsic (void)
{
/* my test: v4=vmyintrinsic(v1,v2,v3), then store the result. */
#define TEST_VMYINTR(Q, T1, T2, W, N) \
VECT_VAR(vector_res, T1, W, N) = \
vmyintr##Q##_##T2##W(VECT_VAR(vector1, T1, W, N), \
VECT_VAR(vector2, T1, W, N), \
VECT_VAR(vector3, T1, W, N)); \
vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
#define DECL_VMYINTR_VAR(VAR) \
DECL_VARIABLE(VAR, int, 8, 8);
/* And as many others as necessary. */
DECL_VMYINTR_VAR(vector1);
DECL_VMYINTR_VAR(vector2);
DECL_VMYINTR_VAR(vector3);
DECL_VMYINTR_VAR(vector_res);
clean_results ();
/* Initialize input "vector1" from "buffer". */
VLOAD(vector1, buffer, , int, s, 8, 8);
/* And as many others as necessary. */
/* Choose init value arbitrarily. */
VDUP(vector2, , int, s, 8, 8, 1);
/* And as many others as necessary. */
/* Choose init value arbitrarily. */
VDUP(vector3, , int, s, 8, 8, -5);
/* And as many others as necessary. */
/* Execute the tests. */
TEST_VMYINTR(, int, s, 8, 8);
/* And as many others as necessary. */
check_results (TEST_MSG, "");
}
int main (void)
{
exec_vmyintrinsic ();
return 0;
}
=================================================================
# Copyright (C) 2014 Free Software Foundation, Inc.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with GCC; see the file COPYING3. If not see
# <http://www.gnu.org/licenses/>.
# GCC testsuite that uses the `dg.exp' driver.
# Exit immediately if this isn't an ARM or AArch64 target.
if {![istarget arm*-*-*]
&& ![istarget aarch64*-*-*]} then {
return
}
# Load support procs.
load_lib gcc-dg.exp
# Initialize `dg'.
load_lib c-torture.exp
load_lib target-supports.exp
load_lib torture-options.exp
dg-init
torture-init
set-torture-options $C_TORTURE_OPTIONS {{}} $LTO_TORTURE_OPTIONS
# Make sure Neon flags are provided, if necessary.
set additional_flags [add_options_for_arm_neon ""]
# Main loop.
foreach src [lsort [glob -nocomplain $srcdir/$subdir/*.c]] {
# If we're only testing specific files and this isn't one of them, skip it.
if ![runtest_file_p $runtests $src] then {
continue
}
# runtest_file_p is already run above, and the code below can run
# runtest_file_p again, make sure everything for this test is
# performed if the above runtest_file_p decided this runtest
# instance should execute the test
gcc_parallel_test_enable 0
c-torture-execute $src $additional_flags
gcc-dg-runtest $src "" $additional_flags
gcc_parallel_test_enable 1
}
# All done.
torture-finish
dg-finish
/* This file contains input data static definitions, shared by most of
the tests. */
#include <arm_neon.h>
#include "arm-neon-ref.h"
/* Initialization helpers; 4 slices are needed for vld2, vld3 and
vld4. */
#define MY_INIT_TABLE(T,W,N) xNAME(INIT_TABLE,N)(T##W##_t)
#define MY_INIT_TABLE2(T,W,N) xNAME(INIT_TABLE2,N)(T##W##_t)
#define MY_INIT_TABLE3(T,W,N) xNAME(INIT_TABLE3,N)(T##W##_t)
#define MY_INIT_TABLE4(T,W,N) xNAME(INIT_TABLE4,N)(T##W##_t)
/* Initialized input buffers. */
#define VECT_VAR_DECL_INIT(V, T, W, N) \
VECT_VAR_DECL(V,T,W,N) [] = { MY_INIT_TABLE(T,W,N) }
/* Specialized initializer with 4 entries, as used by vldX_dup and
vdup tests, which iterate 4 times on input buffers. */
#define VECT_VAR_DECL_INIT4(V, T, W, N) \
VECT_VAR_DECL(V,T,W,N) [] = { MY_INIT_TABLE(T,W,4) };
/* Initializers for arrays of vectors. */
#define VECT_ARRAY_INIT2(V, T, W, N) \
T##W##_t VECT_ARRAY_VAR(V,T,W,N,2)[] = \
{ MY_INIT_TABLE(T,W,N) \
MY_INIT_TABLE2(T,W,N) }
#define VECT_ARRAY_INIT3(V, T, W, N) \
T##W##_t VECT_ARRAY_VAR(V,T,W,N,3)[] = \
{ MY_INIT_TABLE(T,W,N) \
MY_INIT_TABLE2(T,W,N) \
MY_INIT_TABLE3(T,W,N) }
#define VECT_ARRAY_INIT4(V, T, W, N) \
T##W##_t VECT_ARRAY_VAR(V,T,W,N,4)[] = \
{ MY_INIT_TABLE(T,W,N) \
MY_INIT_TABLE2(T,W,N) \
MY_INIT_TABLE3(T,W,N) \
MY_INIT_TABLE4(T,W,N) }
/* Sample initialization vectors. */
#define INIT_TABLE_1(T) \
(T)-16,
#define INIT_TABLE2_1(T) \
(T)-15,
#define INIT_TABLE3_1(T) \
(T)-14,
#define INIT_TABLE4_1(T) \
(T)-13,
#define INIT_TABLE_2(T) \
(T)-16, (T)-15,
#define INIT_TABLE2_2(T) \
(T)-14, (T)-13,
#define INIT_TABLE3_2(T) \
(T)-12, (T)-11,
#define INIT_TABLE4_2(T) \
(T)-10, (T)-9,
/* Initializer for vld3_lane tests. */
#define INIT_TABLE_3(T) \
(T)-16, (T)-15, (T)-14,
#define INIT_TABLE_4(T) \
(T)-16, (T)-15, (T)-14, (T)-13,
#define INIT_TABLE2_4(T) \
(T)-12, (T)-11, (T)-10, (T)-9,
#define INIT_TABLE3_4(T) \
(T)-8, (T)-7, (T)-6, (T)-5,
#define INIT_TABLE4_4(T) \
(T)-4, (T)-3, (T)-2, (T)-1,
#define INIT_TABLE_8(T) \
(T)-16, (T)-15, (T)-14, (T)-13, (T)-12, (T)-11, (T)-10, (T)-9,
#define INIT_TABLE2_8(T) \
(T)-8, (T)-7, (T)-6, (T)-5, (T)-4, (T)-3, (T)-2, (T)-1,
#define INIT_TABLE3_8(T) \
(T)0, (T)1, (T)2, (T)3, (T)4, (T)5, (T)6, (T)7,
#define INIT_TABLE4_8(T) \
(T)8, (T)9, (T)10, (T)11, (T)12, (T)13, (T)14, (T)15,
#define INIT_TABLE_16(T) \
(T)-16, (T)-15, (T)-14, (T)-13, (T)-12, (T)-11, (T)-10, (T)-9, \
(T)-8, (T)-7, (T)-6, (T)-5, (T)-4, (T)-3, (T)-2, (T)-1,
#define INIT_TABLE2_16(T) \
(T)0, (T)1, (T)2, (T)3, (T)4, (T)5, (T)6, (T)7, \
(T)8, (T)9, (T)10, (T)11, (T)12, (T)13, (T)14, (T)15,
#define INIT_TABLE3_16(T) \
(T)16, (T)17, (T)18, (T)19, (T)20, (T)21, (T)22, (T)23, \
(T)24, (T)25, (T)26, (T)27, (T)28, (T)29, (T)30, (T)31,
#define INIT_TABLE4_16(T) \
(T)32, (T)33, (T)34, (T)35, (T)36, (T)37, (T)38, (T)39, \
(T)40, (T)41, (T)42, (T)43, (T)44, (T)45, (T)46, (T)47,
/* This one is used for padding between input buffers. */
#define PAD(V, T, W, N) char VECT_VAR(V,T,W,N)=42
/* Input buffers, one of each size. */
/* Insert some padding to try to exhibit out of bounds accesses. */
VECT_VAR_DECL_INIT(buffer, int, 8, 8);
PAD(buffer_pad, int, 8, 8);
VECT_VAR_DECL_INIT(buffer, int, 16, 4);
PAD(buffer_pad, int, 16, 4);
VECT_VAR_DECL_INIT(buffer, int, 32, 2);
PAD(buffer_pad, int, 32, 2);
VECT_VAR_DECL_INIT(buffer, int, 64, 1);
PAD(buffer_pad, int, 64, 1);
VECT_VAR_DECL_INIT(buffer, uint, 8, 8);
PAD(buffer_pad, uint, 8, 8);
VECT_VAR_DECL_INIT(buffer, poly, 8, 8);
PAD(buffer_pad, poly, 8, 8);
VECT_VAR_DECL_INIT(buffer, poly, 16, 4);
PAD(buffer_pad, poly, 16, 4);
VECT_VAR_DECL_INIT(buffer, uint, 16, 4);
PAD(buffer_pad, uint, 16, 4);
VECT_VAR_DECL_INIT(buffer, uint, 32, 2);
PAD(buffer_pad, uint, 32, 2);
VECT_VAR_DECL_INIT(buffer, uint, 64, 1);
PAD(buffer_pad, uint, 64, 1);
VECT_VAR_DECL_INIT(buffer, float, 32, 2);
PAD(buffer_pad, float, 32, 2);
VECT_VAR_DECL_INIT(buffer, int, 8, 16);
PAD(buffer_pad, int, 8, 16);
VECT_VAR_DECL_INIT(buffer, int, 16, 8);
PAD(buffer_pad, int, 16, 8);
VECT_VAR_DECL_INIT(buffer, int, 32, 4);
PAD(buffer_pad, int, 32, 4);
VECT_VAR_DECL_INIT(buffer, int, 64, 2);
PAD(buffer_pad, int, 64, 2);
VECT_VAR_DECL_INIT(buffer, uint, 8, 16);
PAD(buffer_pad, uint, 8, 16);
VECT_VAR_DECL_INIT(buffer, uint, 16, 8);
PAD(buffer_pad, uint, 16, 8);
VECT_VAR_DECL_INIT(buffer, uint, 32, 4);
PAD(buffer_pad, uint, 32, 4);
VECT_VAR_DECL_INIT(buffer, uint, 64, 2);
PAD(buffer_pad, uint, 64, 2);
VECT_VAR_DECL_INIT(buffer, poly, 8, 16);
PAD(buffer_pad, poly, 8, 16);
VECT_VAR_DECL_INIT(buffer, poly, 16, 8);
PAD(buffer_pad, poly, 16, 8);
VECT_VAR_DECL_INIT(buffer, float, 32, 4);
PAD(buffer_pad, float, 32, 4);
/* The tests for vld1_dup and vdup expect at least 4 entries in the
input buffer, so force 1- and 2-elements initializers to have 4
entries (using VECT_VAR_DECL_INIT4). */
VECT_VAR_DECL_INIT(buffer_dup, int, 8, 8);
VECT_VAR_DECL(buffer_dup_pad, int, 8, 8);
VECT_VAR_DECL_INIT(buffer_dup, int, 16, 4);
VECT_VAR_DECL(buffer_dup_pad, int, 16, 4);
VECT_VAR_DECL_INIT4(buffer_dup, int, 32, 2);
VECT_VAR_DECL(buffer_dup_pad, int, 32, 2);
VECT_VAR_DECL_INIT4(buffer_dup, int, 64, 1);
VECT_VAR_DECL(buffer_dup_pad, int, 64, 1);
VECT_VAR_DECL_INIT(buffer_dup, uint, 8, 8);
VECT_VAR_DECL(buffer_dup_pad, uint, 8, 8);
VECT_VAR_DECL_INIT(buffer_dup, uint, 16, 4);
VECT_VAR_DECL(buffer_dup_pad, uint, 16, 4);
VECT_VAR_DECL_INIT4(buffer_dup, uint, 32, 2);
VECT_VAR_DECL(buffer_dup_pad, uint, 32, 2);
VECT_VAR_DECL_INIT4(buffer_dup, uint, 64, 1);
VECT_VAR_DECL(buffer_dup_pad, uint, 64, 1);
VECT_VAR_DECL_INIT(buffer_dup, poly, 8, 8);
VECT_VAR_DECL(buffer_dup_pad, poly, 8, 8);
VECT_VAR_DECL_INIT(buffer_dup, poly, 16, 4);
VECT_VAR_DECL(buffer_dup_pad, poly, 16, 4);
VECT_VAR_DECL_INIT4(buffer_dup, float, 32, 2);
VECT_VAR_DECL(buffer_dup_pad, float, 32, 2);
VECT_VAR_DECL_INIT(buffer_dup, int, 8, 16);
VECT_VAR_DECL(buffer_dup_pad, int, 8, 16);
VECT_VAR_DECL_INIT(buffer_dup, int, 16, 8);
VECT_VAR_DECL(buffer_dup_pad, int, 16, 8);
VECT_VAR_DECL_INIT(buffer_dup, int, 32, 4);
VECT_VAR_DECL(buffer_dup_pad, int, 32, 4);
VECT_VAR_DECL_INIT4(buffer_dup, int, 64, 2);
VECT_VAR_DECL(buffer_dup_pad, int, 64, 2);
VECT_VAR_DECL_INIT(buffer_dup, uint, 8, 16);
VECT_VAR_DECL(buffer_dup_pad, uint, 8, 16);
VECT_VAR_DECL_INIT(buffer_dup, uint, 16, 8);
VECT_VAR_DECL(buffer_dup_pad, uint, 16, 8);
VECT_VAR_DECL_INIT(buffer_dup, uint, 32, 4);
VECT_VAR_DECL(buffer_dup_pad, uint, 32, 4);
VECT_VAR_DECL_INIT4(buffer_dup, uint, 64, 2);
VECT_VAR_DECL(buffer_dup_pad, uint, 64, 2);
VECT_VAR_DECL_INIT(buffer_dup, poly, 8, 16);
VECT_VAR_DECL(buffer_dup_pad, poly, 8, 16);
VECT_VAR_DECL_INIT(buffer_dup, poly, 16, 8);
VECT_VAR_DECL(buffer_dup_pad, poly, 16, 8);
VECT_VAR_DECL_INIT(buffer_dup, float, 32, 4);
VECT_VAR_DECL(buffer_dup_pad, float, 32, 4);
#include <arm_neon.h>
#include "arm-neon-ref.h"
#include "compute-ref-data.h"
/* Expected results. */
VECT_VAR_DECL(expected,int,8,8) [] = { 0xf6, 0xf7, 0xf8, 0xf9,
0xfa, 0xfb, 0xfc, 0xfd };
VECT_VAR_DECL(expected,int,16,4) [] = { 0x16, 0x17, 0x18, 0x19 };
VECT_VAR_DECL(expected,int,32,2) [] = { 0x20, 0x21 };
VECT_VAR_DECL(expected,int,64,1) [] = { 0x3333333333333333 };
VECT_VAR_DECL(expected,uint,8,8) [] = { 0x53, 0x54, 0x55, 0x56,
0x57, 0x58, 0x59, 0x5a };
VECT_VAR_DECL(expected,uint,16,4) [] = { 0x907, 0x908, 0x909, 0x90a };
VECT_VAR_DECL(expected,uint,32,2) [] = { 0xffffffe7, 0xffffffe8 };
VECT_VAR_DECL(expected,uint,64,1) [] = { 0x3333333333333333 };
VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
0x33, 0x33, 0x33, 0x33 };
VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333, 0x3333, 0x3333 };
VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
VECT_VAR_DECL(expected,int,8,16) [] = { 0x5e, 0x5f, 0x60, 0x61,
0x62, 0x63, 0x64, 0x65,
0x66, 0x67, 0x68, 0x69,
0x6a, 0x6b, 0x6c, 0x6d };
VECT_VAR_DECL(expected,int,16,8) [] = { 0xb9c, 0xb9d, 0xb9e, 0xb9f,
0xba0, 0xba1, 0xba2, 0xba3 };
VECT_VAR_DECL(expected,int,32,4) [] = { 0x26e0, 0x26e1, 0x26e2, 0x26e3 };
VECT_VAR_DECL(expected,int,64,2) [] = { 0x3333333333333333,
0x3333333333333333 };
VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf8, 0xf9, 0xfa, 0xfb,
0xfc, 0xfd, 0xfe, 0xff,
0x0, 0x1, 0x2, 0x3,
0x4, 0x5, 0x6, 0x7 };
VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff9, 0xfffa, 0xfffb, 0xfffc,
0xfffd, 0xfffe, 0xffff, 0x0 };
VECT_VAR_DECL(expected,uint,32,4) [] = { 0xc, 0xd, 0xe, 0xf };
VECT_VAR_DECL(expected,uint,64,2) [] = { 0x3333333333333333,
0x3333333333333333 };
VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
0x33, 0x33, 0x33, 0x33,
0x33, 0x33, 0x33, 0x33,
0x33, 0x33, 0x33, 0x33 };
VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
0x3333, 0x3333, 0x3333, 0x3333 };
VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
0x33333333, 0x33333333 };
#define TEST_MSG "VABA/VABAQ"
void exec_vaba (void)
{
/* Basic test: v4=vaba(v1,v2,v3), then store the result. */
#define TEST_VABA(Q, T1, T2, W, N) \
VECT_VAR(vector_res, T1, W, N) = \
vaba##Q##_##T2##W(VECT_VAR(vector1, T1, W, N), \
VECT_VAR(vector2, T1, W, N), \
VECT_VAR(vector3, T1, W, N)); \
vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
#define DECL_VABA_VAR(VAR) \
DECL_VARIABLE(VAR, int, 8, 8); \
DECL_VARIABLE(VAR, int, 16, 4); \
DECL_VARIABLE(VAR, int, 32, 2); \
DECL_VARIABLE(VAR, uint, 8, 8); \
DECL_VARIABLE(VAR, uint, 16, 4); \
DECL_VARIABLE(VAR, uint, 32, 2); \
DECL_VARIABLE(VAR, int, 8, 16); \
DECL_VARIABLE(VAR, int, 16, 8); \
DECL_VARIABLE(VAR, int, 32, 4); \
DECL_VARIABLE(VAR, uint, 8, 16); \
DECL_VARIABLE(VAR, uint, 16, 8); \
DECL_VARIABLE(VAR, uint, 32, 4)
DECL_VABA_VAR(vector1);
DECL_VABA_VAR(vector2);
DECL_VABA_VAR(vector3);
DECL_VABA_VAR(vector_res);
clean_results ();
/* Initialize input "vector1" from "buffer". */
VLOAD(vector1, buffer, , int, s, 8, 8);
VLOAD(vector1, buffer, , int, s, 16, 4);
VLOAD(vector1, buffer, , int, s, 32, 2);
VLOAD(vector1, buffer, , uint, u, 8, 8);
VLOAD(vector1, buffer, , uint, u, 16, 4);
VLOAD(vector1, buffer, , uint, u, 32, 2);
VLOAD(vector1, buffer, q, int, s, 8, 16);
VLOAD(vector1, buffer, q, int, s, 16, 8);
VLOAD(vector1, buffer, q, int, s, 32, 4);
VLOAD(vector1, buffer, q, uint, u, 8, 16);
VLOAD(vector1, buffer, q, uint, u, 16, 8);
VLOAD(vector1, buffer, q, uint, u, 32, 4);
/* Choose init value arbitrarily. */
VDUP(vector2, , int, s, 8, 8, 1);
VDUP(vector2, , int, s, 16, 4, -13);
VDUP(vector2, , int, s, 32, 2, 8);
VDUP(vector2, , uint, u, 8, 8, 1);
VDUP(vector2, , uint, u, 16, 4, 13);
VDUP(vector2, , uint, u, 32, 2, 8);
VDUP(vector2, q, int, s, 8, 16, 10);
VDUP(vector2, q, int, s, 16, 8, -12);
VDUP(vector2, q, int, s, 32, 4, 32);
VDUP(vector2, q, uint, u, 8, 16, 10);
VDUP(vector2, q, uint, u, 16, 8, 12);
VDUP(vector2, q, uint, u, 32, 4, 32);
/* Choose init value arbitrarily. */
VDUP(vector3, , int, s, 8, 8, -5);
VDUP(vector3, , int, s, 16, 4, 25);
VDUP(vector3, , int, s, 32, 2, -40);
VDUP(vector3, , uint, u, 8, 8, 100);
VDUP(vector3, , uint, u, 16, 4, 2340);
VDUP(vector3, , uint, u, 32, 2, 0xffffffff);
VDUP(vector3, q, int, s, 8, 16, -100);
VDUP(vector3, q, int, s, 16, 8, -3000);
VDUP(vector3, q, int, s, 32, 4, 10000);
VDUP(vector3, q, uint, u, 8, 16, 2);
VDUP(vector3, q, uint, u, 16, 8, 3);
VDUP(vector3, q, uint, u, 32, 4, 4);
/* Execute the tests. */
TEST_VABA(, int, s, 8, 8);
TEST_VABA(, int, s, 16, 4);
TEST_VABA(, int, s, 32, 2);
TEST_VABA(, uint, u, 8, 8);
TEST_VABA(, uint, u, 16, 4);
TEST_VABA(, uint, u, 32, 2);
TEST_VABA(q, int, s, 8, 16);
TEST_VABA(q, int, s, 16, 8);
TEST_VABA(q, int, s, 32, 4);
TEST_VABA(q, uint, u, 8, 16);
TEST_VABA(q, uint, u, 16, 8);
TEST_VABA(q, uint, u, 32, 4);
CHECK_RESULTS (TEST_MSG, "");
}
int main (void)
{
exec_vaba ();
return 0;
}
#include <arm_neon.h>
#include "arm-neon-ref.h"
#include "compute-ref-data.h"
/* Expected results. */
VECT_VAR_DECL(expected,int,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf4, 0xf5, 0xf6, 0xf7 };
VECT_VAR_DECL(expected,int,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffffff0, 0xfffffff1 };
VECT_VAR_DECL(expected,int,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected,uint,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf4, 0xf5, 0xf6, 0xf7 };
VECT_VAR_DECL(expected,uint,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffffff0, 0xfffffff1 };
VECT_VAR_DECL(expected,uint,64,1) [] = { 0xfffffffffffffff0 };
VECT_VAR_DECL(expected,poly,8,8) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf4, 0xf5, 0xf6, 0xf7 };
VECT_VAR_DECL(expected,poly,16,4) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3 };
VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0xc1800000, 0xc1700000 };
VECT_VAR_DECL(expected,int,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf4, 0xf5, 0xf6, 0xf7,
0xf8, 0xf9, 0xfa, 0xfb,
0xfc, 0xfd, 0xfe, 0xff };
VECT_VAR_DECL(expected,int,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
0xfff4, 0xfff5, 0xfff6, 0xfff7 };
VECT_VAR_DECL(expected,int,32,4) [] = { 0xfffffff0, 0xfffffff1,
0xfffffff2, 0xfffffff3 };
VECT_VAR_DECL(expected,int,64,2) [] = { 0xfffffffffffffff0,
0xfffffffffffffff1 };
VECT_VAR_DECL(expected,uint,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf4, 0xf5, 0xf6, 0xf7,
0xf8, 0xf9, 0xfa, 0xfb,
0xfc, 0xfd, 0xfe, 0xff };
VECT_VAR_DECL(expected,uint,16,8) [] = { 0xfff0, 0xfff1, 0xfff2,
0xfff3, 0xfff4, 0xfff5,
0xfff6, 0xfff7 };
VECT_VAR_DECL(expected,uint,32,4) [] = { 0xfffffff0, 0xfffffff1,
0xfffffff2, 0xfffffff3 };
VECT_VAR_DECL(expected,uint,64,2) [] = { 0xfffffffffffffff0,
0xfffffffffffffff1 };
VECT_VAR_DECL(expected,poly,8,16) [] = { 0xf0, 0xf1, 0xf2, 0xf3,
0xf4, 0xf5, 0xf6, 0xf7,
0xf8, 0xf9, 0xfa, 0xfb,
0xfc, 0xfd, 0xfe, 0xff };
VECT_VAR_DECL(expected,poly,16,8) [] = { 0xfff0, 0xfff1, 0xfff2, 0xfff3,
0xfff4, 0xfff5, 0xfff6, 0xfff7 };
VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0xc1800000, 0xc1700000,
0xc1600000, 0xc1500000 };
#define TEST_MSG "VLD1/VLD1Q"
void exec_vld1 (void)
{
/* Basic test vec=vld1(buffer); then store vec: vst1(result, vector). */
/* This test actually tests vdl1 and vst1 at the same time. */
#define TEST_VLD1(VAR, BUF, Q, T1, T2, W, N) \
VECT_VAR(VAR, T1, W, N) = vld1##Q##_##T2##W(VECT_VAR(BUF, T1, W, N)); \
vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(VAR, T1, W, N))
DECL_VARIABLE_ALL_VARIANTS(vector);
clean_results ();
TEST_MACRO_ALL_VARIANTS_2_5(TEST_VLD1, vector, buffer);
TEST_VLD1(vector, buffer, , float, f, 32, 2);
TEST_VLD1(vector, buffer, q, float, f, 32, 4);
CHECK_RESULTS (TEST_MSG, "");
}
int main (void)
{
exec_vld1 ();
return 0;
}
#include <arm_neon.h>
#include "arm-neon-ref.h"
#include "compute-ref-data.h"
/* Expected results. */
VECT_VAR_DECL(expected,int,8,8) [] = { 0xe0, 0xe2, 0xe4, 0xe6,
0xe8, 0xea, 0xec, 0xee };
VECT_VAR_DECL(expected,int,16,4) [] = { 0xff80, 0xff88, 0xff90, 0xff98 };
VECT_VAR_DECL(expected,int,32,2) [] = { 0xfffff000, 0xfffff100 };
VECT_VAR_DECL(expected,int,64,1) [] = { 0xffffffffffffff80 };
VECT_VAR_DECL(expected,uint,8,8) [] = { 0xe0, 0xe2, 0xe4, 0xe6,
0xe8, 0xea, 0xec, 0xee };
VECT_VAR_DECL(expected,uint,16,4) [] = { 0xff80, 0xff88, 0xff90, 0xff98 };
VECT_VAR_DECL(expected,uint,32,2) [] = { 0xfffff000, 0xfffff100 };
VECT_VAR_DECL(expected,uint,64,1) [] = { 0xffffffffffffff80 };
VECT_VAR_DECL(expected,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
0x33, 0x33, 0x33, 0x33 };
VECT_VAR_DECL(expected,poly,16,4) [] = { 0x3333, 0x3333,
0x3333, 0x3333 };
VECT_VAR_DECL(expected,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
VECT_VAR_DECL(expected,int,8,16) [] = { 0x0, 0x20, 0x40, 0x60,
0x80, 0xa0, 0xc0, 0xe0,
0x0, 0x20, 0x40, 0x60,
0x80, 0xa0, 0xc0, 0xe0 };
VECT_VAR_DECL(expected,int,16,8) [] = { 0x0, 0x1000, 0x2000, 0x3000,
0x4000, 0x5000, 0x6000, 0x7000 };
VECT_VAR_DECL(expected,int,32,4) [] = { 0x0, 0x40000000,
0x80000000, 0xc0000000 };
VECT_VAR_DECL(expected,int,64,2) [] = { 0x0, 0x8000000000000000 };
VECT_VAR_DECL(expected,uint,8,16) [] = { 0x0, 0x20, 0x40, 0x60,
0x80, 0xa0, 0xc0, 0xe0,
0x0, 0x20, 0x40, 0x60,
0x80, 0xa0, 0xc0, 0xe0 };
VECT_VAR_DECL(expected,uint,16,8) [] = { 0x0, 0x1000, 0x2000, 0x3000,
0x4000, 0x5000, 0x6000, 0x7000 };
VECT_VAR_DECL(expected,uint,32,4) [] = { 0x0, 0x40000000,
0x80000000, 0xc0000000 };
VECT_VAR_DECL(expected,uint,64,2) [] = { 0x0, 0x8000000000000000 };
VECT_VAR_DECL(expected,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
0x33, 0x33, 0x33, 0x33,
0x33, 0x33, 0x33, 0x33,
0x33, 0x33, 0x33, 0x33 };
VECT_VAR_DECL(expected,poly,16,8) [] = { 0x3333, 0x3333, 0x3333, 0x3333,
0x3333, 0x3333, 0x3333, 0x3333 };
VECT_VAR_DECL(expected,hfloat,32,4) [] = { 0x33333333, 0x33333333,
0x33333333, 0x33333333 };
/* Expected results with large shift amount. */
VECT_VAR_DECL(expected_large_shift,int,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_large_shift,int,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_large_shift,int,32,2) [] = { 0x0, 0x0 };
VECT_VAR_DECL(expected_large_shift,int,64,1) [] = { 0x0 };
VECT_VAR_DECL(expected_large_shift,uint,8,8) [] = { 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_large_shift,uint,16,4) [] = { 0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_large_shift,uint,32,2) [] = { 0x0, 0x0 };
VECT_VAR_DECL(expected_large_shift,uint,64,1) [] = { 0x0 };
VECT_VAR_DECL(expected_large_shift,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
0x33, 0x33, 0x33, 0x33 };
VECT_VAR_DECL(expected_large_shift,poly,16,4) [] = { 0x3333, 0x3333,
0x3333, 0x3333 };
VECT_VAR_DECL(expected_large_shift,hfloat,32,2) [] = { 0x33333333, 0x33333333 };
VECT_VAR_DECL(expected_large_shift,int,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_large_shift,int,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_large_shift,int,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_large_shift,int,64,2) [] = { 0x0, 0x0 };
VECT_VAR_DECL(expected_large_shift,uint,8,16) [] = { 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_large_shift,uint,16,8) [] = { 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_large_shift,uint,32,4) [] = { 0x0, 0x0, 0x0, 0x0 };
VECT_VAR_DECL(expected_large_shift,uint,64,2) [] = { 0x0, 0x0 };
VECT_VAR_DECL(expected_large_shift,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
0x33, 0x33, 0x33, 0x33,
0x33, 0x33, 0x33, 0x33,
0x33, 0x33, 0x33, 0x33 };
VECT_VAR_DECL(expected_large_shift,poly,16,8) [] = { 0x3333, 0x3333,
0x3333, 0x3333,
0x3333, 0x3333,
0x3333, 0x3333 };
VECT_VAR_DECL(expected_large_shift,hfloat,32,4) [] = { 0x33333333, 0x33333333,
0x33333333, 0x33333333 };
/* Expected results with negative shift amount. */
VECT_VAR_DECL(expected_negative_shift,int,8,8) [] = { 0xf8, 0xf8, 0xf9, 0xf9,
0xfa, 0xfa, 0xfb, 0xfb };
VECT_VAR_DECL(expected_negative_shift,int,16,4) [] = { 0xfff8, 0xfff8,
0xfff9, 0xfff9 };
VECT_VAR_DECL(expected_negative_shift,int,32,2) [] = { 0xfffffffc, 0xfffffffc };
VECT_VAR_DECL(expected_negative_shift,int,64,1) [] = { 0xffffffffffffffff };
VECT_VAR_DECL(expected_negative_shift,uint,8,8) [] = { 0x78, 0x78, 0x79, 0x79,
0x7a, 0x7a, 0x7b, 0x7b };
VECT_VAR_DECL(expected_negative_shift,uint,16,4) [] = { 0x7ff8, 0x7ff8,
0x7ff9, 0x7ff9 };
VECT_VAR_DECL(expected_negative_shift,uint,32,2) [] = { 0x3ffffffc,
0x3ffffffc };
VECT_VAR_DECL(expected_negative_shift,uint,64,1) [] = { 0xfffffffffffffff };
VECT_VAR_DECL(expected_negative_shift,poly,8,8) [] = { 0x33, 0x33, 0x33, 0x33,
0x33, 0x33, 0x33, 0x33 };
VECT_VAR_DECL(expected_negative_shift,poly,16,4) [] = { 0x3333, 0x3333,
0x3333, 0x3333 };
VECT_VAR_DECL(expected_negative_shift,hfloat,32,2) [] = { 0x33333333,
0x33333333 };
VECT_VAR_DECL(expected_negative_shift,int,8,16) [] = { 0xfc, 0xfc, 0xfc, 0xfc,
0xfd, 0xfd, 0xfd, 0xfd,
0xfe, 0xfe, 0xfe, 0xfe,
0xff, 0xff, 0xff, 0xff };
VECT_VAR_DECL(expected_negative_shift,int,16,8) [] = { 0xffff, 0xffff,
0xffff, 0xffff,
0xffff, 0xffff,
0xffff, 0xffff };
VECT_VAR_DECL(expected_negative_shift,int,32,4) [] = { 0xfffffffe, 0xfffffffe,
0xfffffffe, 0xfffffffe };
VECT_VAR_DECL(expected_negative_shift,int,64,2) [] = { 0xffffffffffffffff,
0xffffffffffffffff };
VECT_VAR_DECL(expected_negative_shift,uint,8,16) [] = { 0x3c, 0x3c, 0x3c, 0x3c,
0x3d, 0x3d, 0x3d, 0x3d,
0x3e, 0x3e, 0x3e, 0x3e,
0x3f, 0x3f, 0x3f, 0x3f };
VECT_VAR_DECL(expected_negative_shift,uint,16,8) [] = { 0x7ff, 0x7ff,
0x7ff, 0x7ff,
0x7ff, 0x7ff,
0x7ff, 0x7ff };
VECT_VAR_DECL(expected_negative_shift,uint,32,4) [] = { 0x1ffffffe, 0x1ffffffe,
0x1ffffffe, 0x1ffffffe };
VECT_VAR_DECL(expected_negative_shift,uint,64,2) [] = { 0x7ffffffffffffff,
0x7ffffffffffffff };
VECT_VAR_DECL(expected_negative_shift,poly,8,16) [] = { 0x33, 0x33, 0x33, 0x33,
0x33, 0x33, 0x33, 0x33,
0x33, 0x33, 0x33, 0x33,
0x33, 0x33, 0x33, 0x33 };
VECT_VAR_DECL(expected_negative_shift,poly,16,8) [] = { 0x3333, 0x3333,
0x3333, 0x3333,
0x3333, 0x3333,
0x3333, 0x3333 };
VECT_VAR_DECL(expected_negative_shift,hfloat,32,4) [] = { 0x33333333,
0x33333333,
0x33333333,
0x33333333 };
#ifndef INSN_NAME
#define INSN_NAME vshl
#define TEST_MSG "VSHL/VSHLQ"
#endif
#define FNNAME1(NAME) exec_ ## NAME
#define FNNAME(NAME) FNNAME1(NAME)
void FNNAME (INSN_NAME) (void)
{
/* Basic test: v3=vshl(v1,v2), then store the result. */
#define TEST_VSHL(T3, Q, T1, T2, W, N) \
VECT_VAR(vector_res, T1, W, N) = \
vshl##Q##_##T2##W(VECT_VAR(vector, T1, W, N), \
VECT_VAR(vector_shift, T3, W, N)); \
vst1##Q##_##T2##W(VECT_VAR(result, T1, W, N), VECT_VAR(vector_res, T1, W, N))
DECL_VARIABLE_ALL_VARIANTS(vector);
DECL_VARIABLE_ALL_VARIANTS(vector_res);
DECL_VARIABLE_SIGNED_VARIANTS(vector_shift);
clean_results ();
/* Initialize input "vector" from "buffer". */
TEST_MACRO_ALL_VARIANTS_2_5(VLOAD, vector, buffer);
/* Choose init value arbitrarily, will be used as shift amount. */
VDUP(vector_shift, , int, s, 8, 8, 1);
VDUP(vector_shift, , int, s, 16, 4, 3);
VDUP(vector_shift, , int, s, 32, 2, 8);
VDUP(vector_shift, , int, s, 64, 1, 3);
VDUP(vector_shift, q, int, s, 8, 16, 5);
VDUP(vector_shift, q, int, s, 16, 8, 12);
VDUP(vector_shift, q, int, s, 32, 4, 30);
VDUP(vector_shift, q, int, s, 64, 2, 63);
/* Execute the tests. */
TEST_MACRO_ALL_VARIANTS_1_5(TEST_VSHL, int);
CHECK_RESULTS (TEST_MSG, "");
/* Test large shift amount (larger or equal to the type width. */
VDUP(vector_shift, , int, s, 8, 8, 8);
VDUP(vector_shift, , int, s, 16, 4, 16);
VDUP(vector_shift, , int, s, 32, 2, 32);
VDUP(vector_shift, , int, s, 64, 1, 64);
VDUP(vector_shift, q, int, s, 8, 16, 8);
VDUP(vector_shift, q, int, s, 16, 8, 17);
VDUP(vector_shift, q, int, s, 32, 4, 33);
VDUP(vector_shift, q, int, s, 64, 2, 65);
/* Execute the tests. */
TEST_MACRO_ALL_VARIANTS_1_5(TEST_VSHL, int);
CHECK_RESULTS_NAMED (TEST_MSG, expected_large_shift, "(large shift amount)");
/* Test negative shift amount. */
VDUP(vector_shift, , int, s, 8, 8, -1);
VDUP(vector_shift, , int, s, 16, 4, -1);
VDUP(vector_shift, , int, s, 32, 2, -2);
VDUP(vector_shift, , int, s, 64, 1, -4);
VDUP(vector_shift, q, int, s, 8, 16, -2);
VDUP(vector_shift, q, int, s, 16, 8, -5);
VDUP(vector_shift, q, int, s, 32, 4, -3);
VDUP(vector_shift, q, int, s, 64, 2, -5);
/* Execute the tests. */
TEST_MACRO_ALL_VARIANTS_1_5(TEST_VSHL, int);
CHECK_RESULTS_NAMED (TEST_MSG, expected_negative_shift, "(negative shift amount)");
}
int main (void)
{
FNNAME (INSN_NAME) ();
return 0;
}
Advanced SIMD intrinsics tests are located in gcc.target/aarch64.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment