Commit 3a0e583b by Jakub Jelinek

i386: Fix some -O0 avx2intrin.h and xopintrin.h intrinsic macros [PR94046]

As the testcases show, the macros we have for -O0 for intrinsics that require
constant argument(s) should first cast the argument to the type the -O1+
inline uses and afterwards to whatever type e.g. a builtin needs.
The PR reported one which violated this, and I've grepped for all double-casts
and grepped out from that meaningful casts where the __m{128,256,512}{,d,i}
first cast is cast to same sized __v* type and has the same kind of element
type (float, double, integral).  These 7 macros were using different casts,
and I've double checked them against the inline function types.

2020-03-05  Jakub Jelinek  <jakub@redhat.com>

	PR target/94046
	* config/i386/avx2intrin.h (_mm_mask_i32gather_ps): Fix first cast of
	SRC and MASK arguments to __m128 from __m128d.
	(_mm256_mask_i32gather_ps): Fix first cast of MASK argument to __m256
	from __m256d.
	(_mm_mask_i64gather_ps): Fix first cast of MASK argument to __m128
	from __m128d.
	* config/i386/xopintrin.h (_mm_permute2_pd): Fix first cast of C
	argument to __m128i from __m128d.
	(_mm256_permute2_pd): Fix first cast of C argument to __m256i from
	__m256d.
	(_mm_permute2_ps): Fix first cast of C argument to __m128i from __m128.
	(_mm256_permute2_ps): Fix first cast of C argument to __m256i from
	__m256.

	* g++.target/i386/pr94046-1.C: New test.
	* g++.target/i386/pr94046-2.C: New test.
parent 2d22ab64
2020-03-05 Jakub Jelinek <jakub@redhat.com>
PR target/94046
* config/i386/avx2intrin.h (_mm_mask_i32gather_ps): Fix first cast of
SRC and MASK arguments to __m128 from __m128d.
(_mm256_mask_i32gather_ps): Fix first cast of MASK argument to __m256
from __m256d.
(_mm_mask_i64gather_ps): Fix first cast of MASK argument to __m128
from __m128d.
* config/i386/xopintrin.h (_mm_permute2_pd): Fix first cast of C
argument to __m128i from __m128d.
(_mm256_permute2_pd): Fix first cast of C argument to __m256i from
__m256d.
(_mm_permute2_ps): Fix first cast of C argument to __m128i from __m128.
(_mm256_permute2_ps): Fix first cast of C argument to __m256i from
__m256.
2020-03-05 Delia Burduv <delia.burduv@arm.com> 2020-03-05 Delia Burduv <delia.burduv@arm.com>
* config/arm/arm_neon.h (vbfmmlaq_f32): New. * config/arm/arm_neon.h (vbfmmlaq_f32): New.
......
...@@ -1736,10 +1736,10 @@ _mm256_mask_i64gather_epi32 (__m128i __src, int const *__base, ...@@ -1736,10 +1736,10 @@ _mm256_mask_i64gather_epi32 (__m128i __src, int const *__base,
(int)SCALE) (int)SCALE)
#define _mm_mask_i32gather_ps(SRC, BASE, INDEX, MASK, SCALE) \ #define _mm_mask_i32gather_ps(SRC, BASE, INDEX, MASK, SCALE) \
(__m128) __builtin_ia32_gathersiv4sf ((__v4sf)(__m128d)SRC, \ (__m128) __builtin_ia32_gathersiv4sf ((__v4sf)(__m128)SRC, \
(float const *)BASE, \ (float const *)BASE, \
(__v4si)(__m128i)INDEX, \ (__v4si)(__m128i)INDEX, \
(__v4sf)(__m128d)MASK, \ (__v4sf)(__m128)MASK, \
(int)SCALE) (int)SCALE)
#define _mm256_i32gather_ps(BASE, INDEX, SCALE) \ #define _mm256_i32gather_ps(BASE, INDEX, SCALE) \
...@@ -1754,7 +1754,7 @@ _mm256_mask_i64gather_epi32 (__m128i __src, int const *__base, ...@@ -1754,7 +1754,7 @@ _mm256_mask_i64gather_epi32 (__m128i __src, int const *__base,
(__m256) __builtin_ia32_gathersiv8sf ((__v8sf)(__m256)SRC, \ (__m256) __builtin_ia32_gathersiv8sf ((__v8sf)(__m256)SRC, \
(float const *)BASE, \ (float const *)BASE, \
(__v8si)(__m256i)INDEX, \ (__v8si)(__m256i)INDEX, \
(__v8sf)(__m256d)MASK, \ (__v8sf)(__m256)MASK, \
(int)SCALE) (int)SCALE)
#define _mm_i64gather_ps(BASE, INDEX, SCALE) \ #define _mm_i64gather_ps(BASE, INDEX, SCALE) \
...@@ -1769,7 +1769,7 @@ _mm256_mask_i64gather_epi32 (__m128i __src, int const *__base, ...@@ -1769,7 +1769,7 @@ _mm256_mask_i64gather_epi32 (__m128i __src, int const *__base,
(__m128) __builtin_ia32_gatherdiv4sf ((__v4sf)(__m128)SRC, \ (__m128) __builtin_ia32_gatherdiv4sf ((__v4sf)(__m128)SRC, \
(float const *)BASE, \ (float const *)BASE, \
(__v2di)(__m128i)INDEX, \ (__v2di)(__m128i)INDEX, \
(__v4sf)(__m128d)MASK, \ (__v4sf)(__m128)MASK, \
(int)SCALE) (int)SCALE)
#define _mm256_i64gather_ps(BASE, INDEX, SCALE) \ #define _mm256_i64gather_ps(BASE, INDEX, SCALE) \
......
...@@ -814,25 +814,25 @@ _mm256_permute2_ps (__m256 __X, __m256 __Y, __m256i __C, const int __I) ...@@ -814,25 +814,25 @@ _mm256_permute2_ps (__m256 __X, __m256 __Y, __m256i __C, const int __I)
#define _mm_permute2_pd(X, Y, C, I) \ #define _mm_permute2_pd(X, Y, C, I) \
((__m128d) __builtin_ia32_vpermil2pd ((__v2df)(__m128d)(X), \ ((__m128d) __builtin_ia32_vpermil2pd ((__v2df)(__m128d)(X), \
(__v2df)(__m128d)(Y), \ (__v2df)(__m128d)(Y), \
(__v2di)(__m128d)(C), \ (__v2di)(__m128i)(C), \
(int)(I))) (int)(I)))
#define _mm256_permute2_pd(X, Y, C, I) \ #define _mm256_permute2_pd(X, Y, C, I) \
((__m256d) __builtin_ia32_vpermil2pd256 ((__v4df)(__m256d)(X), \ ((__m256d) __builtin_ia32_vpermil2pd256 ((__v4df)(__m256d)(X), \
(__v4df)(__m256d)(Y), \ (__v4df)(__m256d)(Y), \
(__v4di)(__m256d)(C), \ (__v4di)(__m256i)(C), \
(int)(I))) (int)(I)))
#define _mm_permute2_ps(X, Y, C, I) \ #define _mm_permute2_ps(X, Y, C, I) \
((__m128) __builtin_ia32_vpermil2ps ((__v4sf)(__m128)(X), \ ((__m128) __builtin_ia32_vpermil2ps ((__v4sf)(__m128)(X), \
(__v4sf)(__m128)(Y), \ (__v4sf)(__m128)(Y), \
(__v4si)(__m128)(C), \ (__v4si)(__m128i)(C), \
(int)(I))) (int)(I)))
#define _mm256_permute2_ps(X, Y, C, I) \ #define _mm256_permute2_ps(X, Y, C, I) \
((__m256) __builtin_ia32_vpermil2ps256 ((__v8sf)(__m256)(X), \ ((__m256) __builtin_ia32_vpermil2ps256 ((__v8sf)(__m256)(X), \
(__v8sf)(__m256)(Y), \ (__v8sf)(__m256)(Y), \
(__v8si)(__m256)(C), \ (__v8si)(__m256i)(C), \
(int)(I))) (int)(I)))
#endif /* __OPTIMIZE__ */ #endif /* __OPTIMIZE__ */
......
2020-03-05 Jakub Jelinek <jakub@redhat.com>
PR target/94046
* g++.target/i386/pr94046-1.C: New test.
* g++.target/i386/pr94046-2.C: New test.
2020-03-05 Uroš Bizjak <ubizjak@gmail.com> 2020-03-05 Uroš Bizjak <ubizjak@gmail.com>
* g++.dg/asan/asan_test.C (dg-options): Add * g++.dg/asan/asan_test.C (dg-options): Add
......
// PR target/94046
// { dg-do compile }
// { dg-options "-O2 -mavx2 -mxop" }
#include <x86intrin.h>
#define S(x) struct x { operator __##x (); };
S (m128)
S (m128d)
S (m128i)
S (m256)
S (m256d)
S (m256i)
__m128
f1 (m128 src, float const *base, m128i idx, m128 mask)
{
return _mm_mask_i32gather_ps (src, base, idx, mask, 2);
}
__m256
f2 (m256 src, float const *base, m256i idx, m256 mask)
{
return _mm256_mask_i32gather_ps (src, base, idx, mask, 2);
}
__m128
f3 (m128 src, float const *base, m128i idx, m128 mask)
{
return _mm_mask_i64gather_ps (src, base, idx, mask, 2);
}
__m128d
f4 (m128d x, m128d y, m128i c)
{
return _mm_permute2_pd (x, y, c, 3);
}
__m128
f5 (m128 x, m128 y, m128i c)
{
return _mm_permute2_ps (x, y, c, 3);
}
__m256d
f6 (m256d x, m256d y, m256i c)
{
return _mm256_permute2_pd (x, y, c, 3);
}
__m256
f7 (m256 x, m256 y, m256i c)
{
return _mm256_permute2_ps (x, y, c, 3);
}
// PR target/94046
// { dg-do compile }
// { dg-options "-O0 -mavx2 -mxop" }
#include "pr94046-1.C"
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment