Commit d21052eb by Tamar Christina Committed by Tamar Christina

Add missing AArch64 NEON instrinctics for Armv8.2-a to Armv8.4-a

This patch adds the missing neon intrinsics for all 128 bit vector Integer modes for the
three-way XOR and negate and xor instructions for Arm8.2-a to Armv8.4-a.

gcc/
2018-05-21  Tamar Christina  <tamar.christina@arm.com>

	* config/aarch64/aarch64-simd.md (aarch64_eor3qv8hi): Change to
	eor3q<mode>4.
	(aarch64_bcaxqv8hi): Change to bcaxq<mode>4.
	* config/aarch64/aarch64-simd-builtins.def (veor3q_u8, veor3q_u32,
	veor3q_u64, veor3q_s8, veor3q_s16, veor3q_s32, veor3q_s64, vbcaxq_u8,
	vbcaxq_u32, vbcaxq_u64, vbcaxq_s8, vbcaxq_s16, vbcaxq_s32,
	vbcaxq_s64): New.
	* config/aarch64/arm_neon.h: Likewise.
	* config/aarch64/iterators.md (VQ_I): New.

gcc/testsuite/
2018-05-21  Tamar Christina  <tamar.christina@arm.com>

	* gcc.target/gcc.target/aarch64/sha3.h (veor3q_u8, veor3q_u32,
	veor3q_u64, veor3q_s8, veor3q_s16, veor3q_s32, veor3q_s64, vbcaxq_u8,
	vbcaxq_u32, vbcaxq_u64, vbcaxq_s8, vbcaxq_s16, vbcaxq_s32,
	vbcaxq_s64): New.
	* gcc.target/gcc.target/aarch64/sha3_1.c: Likewise.
	* gcc.target/gcc.target/aarch64/sha3_1.c: Likewise.
	* gcc.target/gcc.target/aarch64/sha3_1.c: Likewise.

From-SVN: r260435
parent 825f9d0b
2018-05-21 Tamar Christina <tamar.christina@arm.com>
* config/aarch64/aarch64-simd.md (aarch64_eor3qv8hi): Change to
eor3q<mode>4.
(aarch64_bcaxqv8hi): Change to bcaxq<mode>4.
* config/aarch64/aarch64-simd-builtins.def (veor3q_u8, veor3q_u32,
veor3q_u64, veor3q_s8, veor3q_s16, veor3q_s32, veor3q_s64, vbcaxq_u8,
vbcaxq_u32, vbcaxq_u64, vbcaxq_s8, vbcaxq_s16, vbcaxq_s32,
vbcaxq_s64): New.
* config/aarch64/arm_neon.h: Likewise.
* config/aarch64/iterators.md (VQ_I): New.
2018-05-21 Alexey Brodkin <abrodkin@synopsys.com> 2018-05-21 Alexey Brodkin <abrodkin@synopsys.com>
* config.gcc: Add arc/t-multilib-linux to tmake_file for * config.gcc: Add arc/t-multilib-linux to tmake_file for
......
...@@ -599,14 +599,16 @@ ...@@ -599,14 +599,16 @@
VAR1 (BINOPU, crypto_sha512su0q, 0, v2di) VAR1 (BINOPU, crypto_sha512su0q, 0, v2di)
/* Implemented by aarch64_crypto_sha512su1qv2di. */ /* Implemented by aarch64_crypto_sha512su1qv2di. */
VAR1 (TERNOPU, crypto_sha512su1q, 0, v2di) VAR1 (TERNOPU, crypto_sha512su1q, 0, v2di)
/* Implemented by aarch64_eor3qv8hi. */ /* Implemented by eor3q<mode>4. */
VAR1 (TERNOPU, eor3q, 0, v8hi) BUILTIN_VQ_I (TERNOPU, eor3q, 4)
BUILTIN_VQ_I (TERNOP, eor3q, 4)
/* Implemented by aarch64_rax1qv2di. */ /* Implemented by aarch64_rax1qv2di. */
VAR1 (BINOPU, rax1q, 0, v2di) VAR1 (BINOPU, rax1q, 0, v2di)
/* Implemented by aarch64_xarqv2di. */ /* Implemented by aarch64_xarqv2di. */
VAR1 (TERNOPUI, xarq, 0, v2di) VAR1 (TERNOPUI, xarq, 0, v2di)
/* Implemented by aarch64_bcaxqv8hi. */ /* Implemented by bcaxq<mode>4. */
VAR1 (TERNOPU, bcaxq, 0, v8hi) BUILTIN_VQ_I (TERNOPU, bcaxq, 4)
BUILTIN_VQ_I (TERNOP, bcaxq, 4)
/* Implemented by aarch64_fml<f16mac1>l<f16quad>_low<mode>. */ /* Implemented by aarch64_fml<f16mac1>l<f16quad>_low<mode>. */
VAR1 (TERNOP, fmlal_low, 0, v2sf) VAR1 (TERNOP, fmlal_low, 0, v2sf)
......
...@@ -5926,13 +5926,13 @@ ...@@ -5926,13 +5926,13 @@
;; sha3 ;; sha3
(define_insn "aarch64_eor3qv8hi" (define_insn "eor3q<mode>4"
[(set (match_operand:V8HI 0 "register_operand" "=w") [(set (match_operand:VQ_I 0 "register_operand" "=w")
(xor:V8HI (xor:VQ_I
(xor:V8HI (xor:VQ_I
(match_operand:V8HI 2 "register_operand" "%w") (match_operand:VQ_I 2 "register_operand" "w")
(match_operand:V8HI 3 "register_operand" "w")) (match_operand:VQ_I 3 "register_operand" "w"))
(match_operand:V8HI 1 "register_operand" "w")))] (match_operand:VQ_I 1 "register_operand" "w")))]
"TARGET_SIMD && TARGET_SHA3" "TARGET_SIMD && TARGET_SHA3"
"eor3\\t%0.16b, %1.16b, %2.16b, %3.16b" "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
[(set_attr "type" "crypto_sha3")] [(set_attr "type" "crypto_sha3")]
...@@ -5962,13 +5962,13 @@ ...@@ -5962,13 +5962,13 @@
[(set_attr "type" "crypto_sha3")] [(set_attr "type" "crypto_sha3")]
) )
(define_insn "aarch64_bcaxqv8hi" (define_insn "bcaxq<mode>4"
[(set (match_operand:V8HI 0 "register_operand" "=w") [(set (match_operand:VQ_I 0 "register_operand" "=w")
(xor:V8HI (xor:VQ_I
(and:V8HI (and:VQ_I
(not:V8HI (match_operand:V8HI 3 "register_operand" "w")) (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
(match_operand:V8HI 2 "register_operand" "w")) (match_operand:VQ_I 2 "register_operand" "w"))
(match_operand:V8HI 1 "register_operand" "w")))] (match_operand:VQ_I 1 "register_operand" "w")))]
"TARGET_SIMD && TARGET_SHA3" "TARGET_SIMD && TARGET_SHA3"
"bcax\\t%0.16b, %1.16b, %2.16b, %3.16b" "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
[(set_attr "type" "crypto_sha3")] [(set_attr "type" "crypto_sha3")]
......
...@@ -32068,6 +32068,13 @@ vsha512su1q_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c) ...@@ -32068,6 +32068,13 @@ vsha512su1q_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
return __builtin_aarch64_crypto_sha512su1qv2di_uuuu (__a, __b, __c); return __builtin_aarch64_crypto_sha512su1qv2di_uuuu (__a, __b, __c);
} }
__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
veor3q_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c)
{
return __builtin_aarch64_eor3qv16qi_uuuu (__a, __b, __c);
}
__extension__ extern __inline uint16x8_t __extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
veor3q_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) veor3q_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
...@@ -32075,6 +32082,49 @@ veor3q_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) ...@@ -32075,6 +32082,49 @@ veor3q_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
return __builtin_aarch64_eor3qv8hi_uuuu (__a, __b, __c); return __builtin_aarch64_eor3qv8hi_uuuu (__a, __b, __c);
} }
__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
veor3q_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
{
return __builtin_aarch64_eor3qv4si_uuuu (__a, __b, __c);
}
__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
veor3q_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
{
return __builtin_aarch64_eor3qv2di_uuuu (__a, __b, __c);
}
__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
veor3q_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c)
{
return __builtin_aarch64_eor3qv16qi (__a, __b, __c);
}
__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
veor3q_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c)
{
return __builtin_aarch64_eor3qv8hi (__a, __b, __c);
}
__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
veor3q_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c)
{
return __builtin_aarch64_eor3qv4si (__a, __b, __c);
}
__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
veor3q_s64 (int64x2_t __a, int64x2_t __b, int64x2_t __c)
{
return __builtin_aarch64_eor3qv2di (__a, __b, __c);
}
__extension__ extern __inline uint64x2_t __extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vrax1q_u64 (uint64x2_t __a, uint64x2_t __b) vrax1q_u64 (uint64x2_t __a, uint64x2_t __b)
...@@ -32089,12 +32139,63 @@ vxarq_u64 (uint64x2_t __a, uint64x2_t __b, const int imm6) ...@@ -32089,12 +32139,63 @@ vxarq_u64 (uint64x2_t __a, uint64x2_t __b, const int imm6)
return __builtin_aarch64_xarqv2di_uuus (__a, __b,imm6); return __builtin_aarch64_xarqv2di_uuus (__a, __b,imm6);
} }
__extension__ extern __inline uint8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbcaxq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c)
{
return __builtin_aarch64_bcaxqv16qi_uuuu (__a, __b, __c);
}
__extension__ extern __inline uint16x8_t __extension__ extern __inline uint16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbcaxq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) vbcaxq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c)
{ {
return __builtin_aarch64_bcaxqv8hi_uuuu (__a, __b, __c); return __builtin_aarch64_bcaxqv8hi_uuuu (__a, __b, __c);
} }
__extension__ extern __inline uint32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbcaxq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c)
{
return __builtin_aarch64_bcaxqv4si_uuuu (__a, __b, __c);
}
__extension__ extern __inline uint64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbcaxq_u64 (uint64x2_t __a, uint64x2_t __b, uint64x2_t __c)
{
return __builtin_aarch64_bcaxqv2di_uuuu (__a, __b, __c);
}
__extension__ extern __inline int8x16_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbcaxq_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c)
{
return __builtin_aarch64_bcaxqv16qi (__a, __b, __c);
}
__extension__ extern __inline int16x8_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbcaxq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c)
{
return __builtin_aarch64_bcaxqv8hi (__a, __b, __c);
}
__extension__ extern __inline int32x4_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbcaxq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c)
{
return __builtin_aarch64_bcaxqv4si (__a, __b, __c);
}
__extension__ extern __inline int64x2_t
__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
vbcaxq_s64 (int64x2_t __a, int64x2_t __b, int64x2_t __c)
{
return __builtin_aarch64_bcaxqv2di (__a, __b, __c);
}
#pragma GCC pop_options #pragma GCC pop_options
#pragma GCC push_options #pragma GCC push_options
......
...@@ -78,6 +78,9 @@ ...@@ -78,6 +78,9 @@
;; Quad vector modes. ;; Quad vector modes.
(define_mode_iterator VQ [V16QI V8HI V4SI V2DI V8HF V4SF V2DF]) (define_mode_iterator VQ [V16QI V8HI V4SI V2DI V8HF V4SF V2DF])
;; Quad integer vector modes.
(define_mode_iterator VQ_I [V16QI V8HI V4SI V2DI])
;; VQ without 2 element modes. ;; VQ without 2 element modes.
(define_mode_iterator VQ_NO2E [V16QI V8HI V4SI V8HF V4SF]) (define_mode_iterator VQ_NO2E [V16QI V8HI V4SI V8HF V4SF])
......
2018-05-21 Tamar Christina <tamar.christina@arm.com>
* gcc.target/gcc.target/aarch64/sha3.h (veor3q_u8, veor3q_u32,
veor3q_u64, veor3q_s8, veor3q_s16, veor3q_s32, veor3q_s64, vbcaxq_u8,
vbcaxq_u32, vbcaxq_u64, vbcaxq_s8, vbcaxq_s16, vbcaxq_s32,
vbcaxq_s64): New.
* gcc.target/gcc.target/aarch64/sha3_1.c: Likewise.
* gcc.target/gcc.target/aarch64/sha3_1.c: Likewise.
* gcc.target/gcc.target/aarch64/sha3_1.c: Likewise.
2018-05-21 Janus Weil <janus@gcc.gnu.org> 2018-05-21 Janus Weil <janus@gcc.gnu.org>
PR fortran/85841 PR fortran/85841
......
#include "arm_neon.h" #include "arm_neon.h"
uint16x8_t #define TEST_VEOR3(T, S) T \
test_veor3q_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c) test_veor3q_ ## S (T a, T b, T c) \
{ { \
return veor3q_u16 (a, b, c); return veor3q_ ## S (a, b, c); \
} } \
#define TEST_VBCAX(T, S) T \
test_vbcaxq_ ## S (T a, T b, T c) \
{ \
return vbcaxq_ ## S (a, b, c); \
} \
TEST_VEOR3 (uint8x16_t, u8)
TEST_VEOR3 (uint16x8_t, u16)
TEST_VEOR3 (uint32x4_t, u32)
TEST_VEOR3 (uint64x2_t, u64)
TEST_VEOR3 (int8x16_t, s8)
TEST_VEOR3 (int16x8_t, s16)
TEST_VEOR3 (int32x4_t, s32)
TEST_VEOR3 (int64x2_t, s64)
uint64x2_t uint64x2_t
test_vrax1q_u64 (uint64x2_t a, uint64x2_t b) test_vrax1q_u64 (uint64x2_t a, uint64x2_t b)
...@@ -18,8 +34,12 @@ test_vxarq_u64 (uint64x2_t a, uint64x2_t b) ...@@ -18,8 +34,12 @@ test_vxarq_u64 (uint64x2_t a, uint64x2_t b)
return vxarq_u64 (a, b, 15); return vxarq_u64 (a, b, 15);
} }
uint16x8_t TEST_VBCAX (uint8x16_t, u8)
test_vbcaxq_u16 (uint16x8_t a, uint16x8_t b, uint16x8_t c) TEST_VBCAX (uint16x8_t, u16)
{ TEST_VBCAX (uint32x4_t, u32)
return vbcaxq_u16 (a, b, c); TEST_VBCAX (uint64x2_t, u64)
} TEST_VBCAX (int8x16_t, s8)
TEST_VBCAX (int16x8_t, s16)
TEST_VBCAX (int32x4_t, s32)
TEST_VBCAX (int64x2_t, s64)
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
#include "sha3.h" #include "sha3.h"
/* { dg-final { scan-assembler-times "eor3\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */ /* { dg-final { scan-assembler-times "eor3\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 8 } } */
/* { dg-final { scan-assembler-times "rax1\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" 1 } } */ /* { dg-final { scan-assembler-times "rax1\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" 1 } } */
/* { dg-final { scan-assembler-times "xar\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d, 15" 1 } } */ /* { dg-final { scan-assembler-times "xar\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d, 15" 1 } } */
/* { dg-final { scan-assembler-times "bcax\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */ /* { dg-final { scan-assembler-times "bcax\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 8 } } */
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
#include "sha3.h" #include "sha3.h"
/* { dg-final { scan-assembler-times "eor3\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */ /* { dg-final { scan-assembler-times "eor3\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 8 } } */
/* { dg-final { scan-assembler-times "rax1\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" 1 } } */ /* { dg-final { scan-assembler-times "rax1\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" 1 } } */
/* { dg-final { scan-assembler-times "xar\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d, 15" 1 } } */ /* { dg-final { scan-assembler-times "xar\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d, 15" 1 } } */
/* { dg-final { scan-assembler-times "bcax\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */ /* { dg-final { scan-assembler-times "bcax\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 8 } } */
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
#include "sha3.h" #include "sha3.h"
/* { dg-final { scan-assembler-times "eor3\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */ /* { dg-final { scan-assembler-times "eor3\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 8 } } */
/* { dg-final { scan-assembler-times "rax1\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" 1 } } */ /* { dg-final { scan-assembler-times "rax1\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" 1 } } */
/* { dg-final { scan-assembler-times "xar\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d, 15" 1 } } */ /* { dg-final { scan-assembler-times "xar\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d, 15" 1 } } */
/* { dg-final { scan-assembler-times "bcax\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 1 } } */ /* { dg-final { scan-assembler-times "bcax\\tv\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b, v\[0-9\]+\.16b" 8 } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment