Commit ddeabd3e by Alan Lawrence Committed by Alan Lawrence

[PATCH AArch64 1/2] Improve codegen of vector compares inc. tst instruction

gcc/:

	* config/aarch64/aarch64-builtins.c (aarch64_types_cmtst_qualifiers,
	TYPES_TST): Define.
	(aarch64_fold_builtin): Update pattern for cmtst.

	* config/aarch64/aarch64-protos.h (aarch64_const_vec_all_same_int_p):
	Declare.

	* config/aarch64/aarch64-simd-builtins.def (cmtst): Update qualifiers.

	* config/aarch64/aarch64-simd.md (aarch64_vcond_internal<mode><mode>):
	Switch operands, separate out more cases, refactor.

	(aarch64_cmtst<mode>): Rewrite pattern to match (plus ... -1).

	* config/aarch64.c (aarch64_const_vec_all_same_int_p): Take single
	argument; rename old version to...
	(aarch64_const_vec_all_same_in_range_p): ...this.
	(aarch64_print_operand, aarch64_simd_shift_imm_p): Follow renaming.

	* config/aarch64/predicates.md (aarch64_simd_imm_minus_one): Define.

gcc/testsuite/:

	* gcc.target/aarch64/simd/int_comparisons.x: New file.
	* gcc.target/aarch64/simd/int_comparisons_1.c: New test.
	* gcc.target/aarch64/simd/int_comparisons_2.c: Ditto.

From-SVN: r214948
parent e625e715
2014-09-05 Alan Lawrence <alan.lawrence@arm.com>
* config/aarch64/aarch64-builtins.c (aarch64_types_cmtst_qualifiers,
TYPES_TST): Define.
(aarch64_fold_builtin): Update pattern for cmtst.
* config/aarch64/aarch64-protos.h (aarch64_const_vec_all_same_int_p):
Declare.
* config/aarch64/aarch64-simd-builtins.def (cmtst): Update qualifiers.
* config/aarch64/aarch64-simd.md (aarch64_vcond_internal<mode><mode>):
Switch operands, separate out more cases, refactor.
(aarch64_cmtst<mode>): Rewrite pattern to match (plus ... -1).
* config/aarch64.c (aarch64_const_vec_all_same_int_p): Take single
argument; rename old version to...
(aarch64_const_vec_all_same_in_range_p): ...this.
(aarch64_print_operand, aarch64_simd_shift_imm_p): Follow renaming.
* config/aarch64/predicates.md (aarch64_simd_imm_minus_one): Define.
2014-09-05 Alan Lawrence <alan.lawrence@arm.com>
* config/aarch64/aarch64-builtins.c (enum aarch64_type_qualifiers):
Remove qualifier_const_pointer, update comment.
......@@ -144,6 +144,11 @@ aarch64_types_binop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_none, qualifier_none, qualifier_maybe_immediate };
#define TYPES_BINOP (aarch64_types_binop_qualifiers)
static enum aarch64_type_qualifiers
aarch64_types_cmtst_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_none, qualifier_none, qualifier_none,
qualifier_internal, qualifier_internal };
#define TYPES_TST (aarch64_types_cmtst_qualifiers)
static enum aarch64_type_qualifiers
aarch64_types_binopv_qualifiers[SIMD_MAX_BUILTIN_ARGS]
= { qualifier_void, qualifier_none, qualifier_none };
#define TYPES_BINOPV (aarch64_types_binopv_qualifiers)
......@@ -1285,7 +1290,7 @@ aarch64_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, tree *args,
BUILTIN_VALLDI (BINOP, cmeq, 0)
return fold_build2 (EQ_EXPR, type, args[0], args[1]);
break;
BUILTIN_VSDQ_I_DI (BINOP, cmtst, 0)
BUILTIN_VSDQ_I_DI (TST, cmtst, 0)
{
tree and_node = fold_build2 (BIT_AND_EXPR, type, args[0], args[1]);
tree vec_zero_node = build_zero_cst (type);
......
......@@ -180,6 +180,7 @@ bool aarch64_cannot_change_mode_class (enum machine_mode,
enum reg_class);
enum aarch64_symbol_type
aarch64_classify_symbolic_expression (rtx, enum aarch64_symbol_context);
bool aarch64_const_vec_all_same_int_p (rtx, HOST_WIDE_INT);
bool aarch64_constant_address_p (rtx);
bool aarch64_expand_movmem (rtx *);
bool aarch64_float_const_zero_rtx_p (rtx);
......
......@@ -246,7 +246,7 @@
/* Implemented by aarch64_cm<cmp><mode>. */
BUILTIN_VSDQ_I_DI (BINOP, cmgeu, 0)
BUILTIN_VSDQ_I_DI (BINOP, cmgtu, 0)
BUILTIN_VSDQ_I_DI (BINOP, cmtst, 0)
BUILTIN_VSDQ_I_DI (TST, cmtst, 0)
/* Implemented by reduc_<sur>plus_<mode>. */
BUILTIN_VALL (UNOP, reduc_splus_, 10)
......
......@@ -1912,58 +1912,94 @@
(match_operand:VDQ 2 "nonmemory_operand")))]
"TARGET_SIMD"
{
int inverse = 0, has_zero_imm_form = 0;
rtx op1 = operands[1];
rtx op2 = operands[2];
rtx mask = gen_reg_rtx (<MODE>mode);
enum rtx_code code = GET_CODE (operands[3]);
/* Switching OP1 and OP2 is necessary for NE (to output a cmeq insn),
and desirable for other comparisons if it results in FOO ? -1 : 0
(this allows direct use of the comparison result without a bsl). */
if (code == NE
|| (code != EQ
&& op1 == CONST0_RTX (<V_cmp_result>mode)
&& op2 == CONSTM1_RTX (<V_cmp_result>mode)))
{
op1 = operands[2];
op2 = operands[1];
switch (code)
{
case LE: code = GT; break;
case LT: code = GE; break;
case GE: code = LT; break;
case GT: code = LE; break;
/* No case EQ. */
case NE: code = EQ; break;
case LTU: code = GEU; break;
case LEU: code = GTU; break;
case GTU: code = LEU; break;
case GEU: code = LTU; break;
default: gcc_unreachable ();
}
}
switch (GET_CODE (operands[3]))
/* Make sure we can handle the last operand. */
switch (code)
{
case NE:
/* Normalized to EQ above. */
gcc_unreachable ();
case LE:
case LT:
case NE:
inverse = 1;
/* Fall through. */
case GE:
case GT:
case EQ:
has_zero_imm_form = 1;
break;
case LEU:
case LTU:
inverse = 1;
break;
/* These instructions have a form taking an immediate zero. */
if (operands[5] == CONST0_RTX (<MODE>mode))
break;
/* Fall through, as may need to load into register. */
default:
if (!REG_P (operands[5]))
operands[5] = force_reg (<MODE>mode, operands[5]);
break;
}
if (!REG_P (operands[5])
&& (operands[5] != CONST0_RTX (<MODE>mode) || !has_zero_imm_form))
operands[5] = force_reg (<MODE>mode, operands[5]);
switch (GET_CODE (operands[3]))
switch (code)
{
case LT:
emit_insn (gen_aarch64_cmlt<mode> (mask, operands[4], operands[5]));
break;
case GE:
emit_insn (gen_aarch64_cmge<mode> (mask, operands[4], operands[5]));
break;
case LE:
emit_insn (gen_aarch64_cmle<mode> (mask, operands[4], operands[5]));
break;
case GT:
emit_insn (gen_aarch64_cmgt<mode> (mask, operands[4], operands[5]));
break;
case LTU:
emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[5], operands[4]));
break;
case GEU:
emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[4], operands[5]));
break;
case LEU:
emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[5], operands[4]));
break;
case GTU:
emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[4], operands[5]));
break;
case NE:
/* NE has been normalized to EQ above. */
case EQ:
emit_insn (gen_aarch64_cmeq<mode> (mask, operands[4], operands[5]));
break;
......@@ -1972,12 +2008,6 @@
gcc_unreachable ();
}
if (inverse)
{
op1 = operands[2];
op2 = operands[1];
}
/* If we have (a = (b CMP c) ? -1 : 0);
Then we can simply move the generated mask. */
......@@ -3932,14 +3962,22 @@
;; cmtst
;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
;; we don't have any insns using ne, and aarch64_vcond_internal outputs
;; not (neg (eq (and x y) 0))
;; which is rewritten by simplify_rtx as
;; plus (eq (and x y) 0) -1.
(define_insn "aarch64_cmtst<mode>"
[(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
(neg:<V_cmp_result>
(ne:<V_cmp_result>
(plus:<V_cmp_result>
(eq:<V_cmp_result>
(and:VDQ
(match_operand:VDQ 1 "register_operand" "w")
(match_operand:VDQ 2 "register_operand" "w"))
(vec_duplicate:<V_cmp_result> (const_int 0)))))]
(match_operand:VDQ 3 "aarch64_simd_imm_zero"))
(match_operand:<V_cmp_result> 4 "aarch64_simd_imm_minus_one")))
]
"TARGET_SIMD"
"cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
[(set_attr "type" "neon_tst<q>")]
......
......@@ -137,9 +137,6 @@ static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
static void aarch64_override_options_after_change (void);
static bool aarch64_vector_mode_supported_p (enum machine_mode);
static unsigned bit_count (unsigned HOST_WIDE_INT);
static bool aarch64_const_vec_all_same_int_p (rtx,
HOST_WIDE_INT, HOST_WIDE_INT);
static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
const unsigned char *sel);
static int aarch64_address_cost (rtx, enum machine_mode, addr_space_t, bool);
......@@ -3576,6 +3573,36 @@ aarch64_get_condition_code (rtx x)
}
}
bool
aarch64_const_vec_all_same_in_range_p (rtx x,
HOST_WIDE_INT minval,
HOST_WIDE_INT maxval)
{
HOST_WIDE_INT firstval;
int count, i;
if (GET_CODE (x) != CONST_VECTOR
|| GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
return false;
firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
if (firstval < minval || firstval > maxval)
return false;
count = CONST_VECTOR_NUNITS (x);
for (i = 1; i < count; i++)
if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
return false;
return true;
}
bool
aarch64_const_vec_all_same_int_p (rtx x, HOST_WIDE_INT val)
{
return aarch64_const_vec_all_same_in_range_p (x, val, val);
}
static unsigned
bit_count (unsigned HOST_WIDE_INT value)
{
......@@ -3827,9 +3854,10 @@ aarch64_print_operand (FILE *f, rtx x, char code)
case CONST_VECTOR:
if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
{
gcc_assert (aarch64_const_vec_all_same_int_p (x,
HOST_WIDE_INT_MIN,
HOST_WIDE_INT_MAX));
gcc_assert (
aarch64_const_vec_all_same_in_range_p (x,
HOST_WIDE_INT_MIN,
HOST_WIDE_INT_MAX));
asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
}
else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
......@@ -7732,39 +7760,15 @@ aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
#undef CHECK
}
static bool
aarch64_const_vec_all_same_int_p (rtx x,
HOST_WIDE_INT minval,
HOST_WIDE_INT maxval)
{
HOST_WIDE_INT firstval;
int count, i;
if (GET_CODE (x) != CONST_VECTOR
|| GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_INT)
return false;
firstval = INTVAL (CONST_VECTOR_ELT (x, 0));
if (firstval < minval || firstval > maxval)
return false;
count = CONST_VECTOR_NUNITS (x);
for (i = 1; i < count; i++)
if (INTVAL (CONST_VECTOR_ELT (x, i)) != firstval)
return false;
return true;
}
/* Check of immediate shift constants are within range. */
bool
aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
{
int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
if (left)
return aarch64_const_vec_all_same_int_p (x, 0, bit_width - 1);
return aarch64_const_vec_all_same_in_range_p (x, 0, bit_width - 1);
else
return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
return aarch64_const_vec_all_same_in_range_p (x, 1, bit_width);
}
/* Return true if X is a uniform vector where all elements
......
......@@ -273,3 +273,9 @@
{
return aarch64_simd_imm_zero_p (op, mode);
})
(define_special_predicate "aarch64_simd_imm_minus_one"
(match_code "const_vector")
{
return aarch64_const_vec_all_same_int_p (op, -1);
})
2014-09-05 Alan Lawrence <alan.lawrence@arm.com>
* gcc.target/aarch64/simd/int_comparisons.x: New file.
* gcc.target/aarch64/simd/int_comparisons_1.c: New test.
* gcc.target/aarch64/simd/int_comparisons_2.c: Ditto.
2014-09-05 Alan Lawrence <alan.lawrence@arm.com>
* gcc.target/aarch64/simd/vrbit_1.c: New test.
2014-09-05 Richard Biener <rguenther@suse.de>
......
/* test_vcXXX wrappers for all the vcXXX (vector compare) and vtst intrinsics
in arm_neon.h (excluding the 64x1 variants as these generally produce scalar
not vector ops). */
#include "arm_neon.h"
#define DONT_FORCE(X)
#define FORCE_SIMD(V1) asm volatile ("mov %d0, %1.d[0]" \
: "=w"(V1) \
: "w"(V1) \
: /* No clobbers */);
#define OP1(SIZE, OP, BASETYPE, SUFFIX, FORCE) uint##SIZE##_t \
test_v##OP##SUFFIX (BASETYPE##SIZE##_t a) \
{ \
uint##SIZE##_t res; \
FORCE (a); \
res = v##OP##SUFFIX (a); \
FORCE (res); \
return res; \
}
#define OP2(SIZE, OP, BASETYPE, SUFFIX, FORCE) uint##SIZE##_t \
test_v##OP##SUFFIX (BASETYPE##SIZE##_t a, BASETYPE##SIZE##_t b) \
{ \
uint##SIZE##_t res; \
FORCE (a); \
FORCE (b); \
res = v##OP##SUFFIX (a, b); \
FORCE (res); \
return res; \
}
#define UNSIGNED_OPS(SIZE, BASETYPE, SUFFIX, FORCE) \
OP2 (SIZE, tst, BASETYPE, SUFFIX, FORCE) \
OP1 (SIZE, ceqz, BASETYPE, SUFFIX, FORCE) \
OP2 (SIZE, ceq, BASETYPE, SUFFIX, FORCE) \
OP2 (SIZE, cge, BASETYPE, SUFFIX, FORCE) \
OP2 (SIZE, cgt, BASETYPE, SUFFIX, FORCE) \
OP2 (SIZE, cle, BASETYPE, SUFFIX, FORCE) \
OP2 (SIZE, clt, BASETYPE, SUFFIX, FORCE)
#define ALL_OPS(SIZE, BASETYPE, SUFFIX, FORCE) \
OP1 (SIZE, cgez, BASETYPE, SUFFIX, FORCE) \
OP1 (SIZE, cgtz, BASETYPE, SUFFIX, FORCE) \
OP1 (SIZE, clez, BASETYPE, SUFFIX, FORCE) \
OP1 (SIZE, cltz, BASETYPE, SUFFIX, FORCE) \
UNSIGNED_OPS (SIZE, BASETYPE, SUFFIX, FORCE)
ALL_OPS (8x8, int, _s8, DONT_FORCE)
ALL_OPS (16x4, int, _s16, DONT_FORCE)
ALL_OPS (32x2, int, _s32, DONT_FORCE)
ALL_OPS (64x1, int, _s64, DONT_FORCE)
ALL_OPS (64, int, d_s64, FORCE_SIMD)
ALL_OPS (8x16, int, q_s8, DONT_FORCE)
ALL_OPS (16x8, int, q_s16, DONT_FORCE)
ALL_OPS (32x4, int, q_s32, DONT_FORCE)
ALL_OPS (64x2, int, q_s64, DONT_FORCE)
UNSIGNED_OPS (8x8, uint, _u8, DONT_FORCE)
UNSIGNED_OPS (16x4, uint, _u16, DONT_FORCE)
UNSIGNED_OPS (32x2, uint, _u32, DONT_FORCE)
UNSIGNED_OPS (64x1, uint, _u64, DONT_FORCE)
UNSIGNED_OPS (64, uint, d_u64, FORCE_SIMD)
UNSIGNED_OPS (8x16, uint, q_u8, DONT_FORCE)
UNSIGNED_OPS (16x8, uint, q_u16, DONT_FORCE)
UNSIGNED_OPS (32x4, uint, q_u32, DONT_FORCE)
UNSIGNED_OPS (64x2, uint, q_u64, DONT_FORCE)
/* { dg-do compile } */
/* { dg-options "-O3 -fno-inline" } */
/* Scan-assembler test, so, incorporate as little other code as possible. */
#include "arm_neon.h"
#include "int_comparisons.x"
/* Operations on all 18 integer types: (q?)_[su](8|16|32|64), d_[su]64.
(d?)_[us]64 generate regs of form 'd0' rather than e.g. 'v0.2d'. */
/* { dg-final { scan-assembler-times "\[ \t\]cmeq\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*#?0" 14 } } */
/* { dg-final { scan-assembler-times "\[ \t\]cmeq\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]*#?0" 4 } } */
/* { dg-final { scan-assembler-times "\[ \t\]cmeq\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\]" 14 } } */
/* { dg-final { scan-assembler-times "\[ \t\]cmeq\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]+d\[0-9\]+" 4 } } */
/* { dg-final { scan-assembler-times "\[ \t\]cmtst\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\]" 14 } } */
/* { dg-final { scan-assembler-times "\[ \t\]cmtst\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]+d\[0-9\]+" 4 } } */
/* vcge + vcle both implemented with cmge (signed) or cmhs (unsigned). */
/* { dg-final { scan-assembler-times "\[ \t\]cmge\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\]" 14 } } */
/* { dg-final { scan-assembler-times "\[ \t\]cmge\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]+d\[0-9\]+" 4 } } */
/* { dg-final { scan-assembler-times "\[ \t\]cmhs\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\]" 14 } } */
/* { dg-final { scan-assembler-times "\[ \t\]cmhs\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]+d\[0-9\]+" 4 } } */
/* vcgt + vclt both implemented with cmgt (signed) or cmhi (unsigned). */
/* { dg-final { scan-assembler-times "\[ \t\]cmgt\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\]" 14 } } */
/* { dg-final { scan-assembler-times "\[ \t\]cmgt\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]+d\[0-9\]+" 4 } } */
/* { dg-final { scan-assembler-times "\[ \t\]cmhi\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\]" 14 } } */
/* { dg-final { scan-assembler-times "\[ \t\]cmhi\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]+d\[0-9\]+" 4 } } */
/* Comparisons against immediate zero, on the 8 signed integer types only. */
/* { dg-final { scan-assembler-times "\[ \t\]cmge\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*#?0" 7 } } */
/* For int64_t and int64x1_t, combine_simplify_rtx failure of
https://gcc.gnu.org/ml/gcc/2014-06/msg00253.html
prevents generation of cmge....#0, instead producing mvn + sshr. */
/* { #dg-final { scan-assembler-times "\[ \t\]cmge\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]*#?0" 2 } } */
/* { dg-final { scan-assembler-times "\[ \t\]cmgt\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*#?0" 7 } } */
/* { dg-final { scan-assembler-times "\[ \t\]cmgt\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]*#?0" 2 } } */
/* { dg-final { scan-assembler-times "\[ \t\]cmle\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*#?0" 7 } } */
/* { dg-final { scan-assembler-times "\[ \t\]cmle\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]*#?0" 2 } } */
/* { dg-final { scan-assembler-times "\[ \t\]cmlt\[ \t\]+v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*v\[0-9\]+\.\[0-9\]+\[bshd\],\[ \t\]*#?0" 7 } } */
/* For int64_t and int64x1_t, cmlt ... #0 and sshr ... #63 are equivalent,
so allow either. cmgez issue above results in extra 2 * sshr....63. */
/* { dg-final { scan-assembler-times "\[ \t\](?:cmlt|sshr)\[ \t\]+d\[0-9\]+,\[ \t\]*d\[0-9\]+,\[ \t\]*#?(?:0|63)" 4 } } */
// All should have been compiled into single insns without inverting result:
/* { dg-final { scan-assembler-not "not" } } */
/* { dg-do run } */
/* { dg-options "-O2 -fno-inline" } */
/* Stops the test_xxx methods being inlined into main, thus preventing constant
propagation. */
#include "int_comparisons.x"
extern void abort (void);
#define CHECK2(R0, R1) if (res[0] != R0 || res[1] != R1) abort ()
#define TEST2(BASETYPE, SUFFIX, RESTYPE, ST1_SUFFIX) { \
BASETYPE##_t _a[2] = {2, 3}; \
BASETYPE##x2_t a = vld1##SUFFIX (_a); \
BASETYPE##_t _b[2] = {1, 3}; \
BASETYPE##x2_t b = vld1##SUFFIX (_b); \
RESTYPE res[2]; \
vst1##ST1_SUFFIX (res, test_vclt##SUFFIX (a, b)); CHECK2 (0, 0); \
vst1##ST1_SUFFIX (res, test_vclt##SUFFIX (b, a)); CHECK2 (-1, 0); \
vst1##ST1_SUFFIX (res, test_vcle##SUFFIX (a, b)); CHECK2 (0, -1); \
vst1##ST1_SUFFIX (res, test_vcle##SUFFIX (b, a)); CHECK2 (-1, -1); \
vst1##ST1_SUFFIX (res, test_vceq##SUFFIX (a, b)); CHECK2 (0, -1); \
vst1##ST1_SUFFIX (res, test_vcge##SUFFIX (a, b)); CHECK2 (-1, -1); \
vst1##ST1_SUFFIX (res, test_vcge##SUFFIX (b, a)); CHECK2 (0, -1); \
vst1##ST1_SUFFIX (res, test_vcgt##SUFFIX (a, b)); CHECK2 (-1, 0); \
vst1##ST1_SUFFIX (res, test_vcgt##SUFFIX (b, a)); CHECK2 (0, 0); \
vst1##ST1_SUFFIX (res, test_vtst##SUFFIX (a, b)); CHECK2 (0, -1); \
vst1##ST1_SUFFIX (res, test_vtst##SUFFIX (a + 1, b)); CHECK2 (-1, 0); \
}
#define CHECK4(T, R0, R1, R2, R3) \
if (res[0] != (T)R0 || res[1] != (T)R1 \
|| res[2] != (T)R2 || res[3] != (T)R3) abort ()
#define TEST4(BASETYPE, SUFFIX, RESTYPE, ST1_SUFFIX) { \
BASETYPE##_t _a[4] = {1, 2, 3, 4}; \
BASETYPE##x4_t a = vld1##SUFFIX (_a); \
BASETYPE##_t _b[4] = {4, 2, 1, 3}; \
BASETYPE##x4_t b = vld1##SUFFIX (_b); \
RESTYPE res[4]; \
vst1##ST1_SUFFIX (res, test_vclt##SUFFIX (a, b)); \
CHECK4 (RESTYPE, -1, 0, 0, 0); \
vst1##ST1_SUFFIX (res, test_vcle##SUFFIX (a, b)); \
CHECK4 (RESTYPE, -1, -1, 0, 0); \
vst1##ST1_SUFFIX (res, test_vceq##SUFFIX (a, b)); \
CHECK4 (RESTYPE, 0, -1, 0, 0); \
vst1##ST1_SUFFIX (res, test_vcge##SUFFIX (a, b)); \
CHECK4 (RESTYPE, 0, -1, -1, -1); \
vst1##ST1_SUFFIX (res, test_vcgt##SUFFIX (a, b)); \
CHECK4 (RESTYPE, 0, 0, -1, -1); \
vst1##ST1_SUFFIX (res, test_vtst##SUFFIX (a, b)); \
CHECK4 (RESTYPE, 0, -1, -1, 0); \
}
#define CHECK8(T, R0, R1, R2, R3, R4, R5, R6, R7) \
if (res[0] != (T)R0 || res[1] != (T)R1 || res[2] != (T)R2 || res[3] != (T)R3 \
|| res[4] != (T)R4 || res[5] != (T)R5 || res[6] != (T)R6 \
|| res[7] != (T)R7) abort ()
#define TEST8(BASETYPE, SUFFIX, RESTYPE, ST1_SUFFIX) { \
BASETYPE##_t _a[8] = {1, 2, 3, 4, 5, 6, 7, 8}; \
BASETYPE##x8_t a = vld1##SUFFIX (_a); \
BASETYPE##_t _b[8] = {4, 2, 1, 3, 2, 6, 8, 9}; \
BASETYPE##x8_t b = vld1##SUFFIX (_b); \
RESTYPE res[8]; \
vst1##ST1_SUFFIX (res, test_vclt##SUFFIX (a, b)); \
CHECK8 (RESTYPE, -1, 0, 0, 0, 0, 0, -1, -1); \
vst1##ST1_SUFFIX (res, test_vcle##SUFFIX (a, b)); \
CHECK8 (RESTYPE, -1, -1, 0, 0, 0, -1, -1, -1); \
vst1##ST1_SUFFIX (res, test_vceq##SUFFIX (a, b)); \
CHECK8 (RESTYPE, 0, -1, 0, 0, 0, -1, 0, 0); \
vst1##ST1_SUFFIX (res, test_vcge##SUFFIX (a, b)); \
CHECK8 (RESTYPE, 0, -1, -1, -1, -1, -1, 0, 0); \
vst1##ST1_SUFFIX (res, test_vcgt##SUFFIX (a, b)); \
CHECK8 (RESTYPE, 0, 0, -1, -1, -1, 0, 0, 0); \
vst1##ST1_SUFFIX (res, test_vtst##SUFFIX (a, b)); \
CHECK8 (RESTYPE, 0, -1, -1, 0, 0, -1, 0, -1); \
}
/* 16-way tests use same 8 values twice. */
#define CHECK16(T, R0, R1, R2, R3, R4, R5, R6, R7) \
if (res[0] != (T)R0 || res[1] != (T)R1 || res[2] != (T)R2 || res[3] != (T)R3 \
|| res[4] != (T)R4 || res[5] != (T)R5 || res[6] != (T)R6 \
|| res[7] != (T)R7 || res[8] != (T)R0 || res[9] != (T)R1 \
|| res[10] != (T)R2 || res[11] != (T)R3 || res[12] != (T)R4 \
|| res[13] != (T)R5 || res[14] != (T)R6 || res[15] != (T)R7) abort ()
#define TEST16(BASETYPE, SUFFIX, RESTYPE, ST1_SUFFIX) { \
BASETYPE##_t _a[16] = {1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8}; \
BASETYPE##x16_t a = vld1##SUFFIX (_a); \
BASETYPE##_t _b[16] = {4, 2, 1, 3, 2, 6, 8, 9, 4, 2, 1, 3, 2, 6, 8, 9}; \
BASETYPE##x16_t b = vld1##SUFFIX (_b); \
RESTYPE res[16]; \
vst1##ST1_SUFFIX (res, test_vclt##SUFFIX (a, b)); \
CHECK16 (RESTYPE, -1, 0, 0, 0, 0, 0, -1, -1); \
vst1##ST1_SUFFIX (res, test_vcle##SUFFIX (a, b)); \
CHECK16 (RESTYPE, -1, -1, 0, 0, 0, -1, -1, -1); \
vst1##ST1_SUFFIX (res, test_vceq##SUFFIX (a, b)); \
CHECK16 (RESTYPE, 0, -1, 0, 0, 0, -1, 0, 0); \
vst1##ST1_SUFFIX (res, test_vcge##SUFFIX (a, b)); \
CHECK16 (RESTYPE, 0, -1, -1, -1, -1, -1, 0, 0); \
vst1##ST1_SUFFIX (res, test_vcgt##SUFFIX (a, b)); \
CHECK16 (RESTYPE, 0, 0, -1, -1, -1, 0, 0, 0); \
vst1##ST1_SUFFIX (res, test_vtst##SUFFIX (a, b)); \
CHECK16 (RESTYPE, 0, -1, -1, 0, 0, -1, 0, -1); \
}
int
main (int argc, char **argv)
{
TEST2 (int32, _s32, uint32_t, _u32);
TEST2 (uint32, _u32, uint32_t, _u32);
TEST2 (int64, q_s64, uint64_t, q_u64);
TEST2 (uint64, q_u64, uint64_t, q_u64);
TEST4 (int16, _s16, uint16_t, _u16);
TEST4 (uint16, _u16, uint16_t, _u16);
TEST4 (int32, q_s32, uint32_t, q_u32);
TEST4 (uint32, q_u32, uint32_t, q_u32);
TEST8 (int8, _s8, uint8_t, _u8);
TEST8 (uint8, _u8, uint8_t, _u8);
TEST8 (int16, q_s16, uint16_t, q_u16);
TEST8 (uint16, q_u16, uint16_t, q_u16);
TEST16 (int8, q_s8, uint8_t, q_u8);
TEST16 (uint8, q_u8, uint8_t, q_u8);
return 0;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment