Commit df62b4af by Ilya Tocar Committed by Kirill Yukhin

avx512erintrin.h (_mm_rcp28_round_sd): Swap operands.

gcc/
	* config/i386/avx512erintrin.h (_mm_rcp28_round_sd): Swap operands.
	(_mm_rcp28_round_ss): Ditto.
	(_mm_rsqrt28_round_sd): Ditto.
	(_mm_rsqrt28_round_ss): Ditto.
	* config/i386/avx512erintrin.h (_mm_rcp14_round_sd): Ditto.
	(_mm_rcp14_round_ss): Ditto.
	(_mm_rsqrt14_round_sd): Ditto.
	(_mm_rsqrt14_round_ss): Ditto.
	* config/i386/sse.md (rsqrt14<mode>): Put nonimmediate operand as
	the first input operand, get rid of match_dup.
	(avx512er_exp2<mode><mask_name><round_saeonly_name>): Set type
	attribute to sse.
	(<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>):
	Ditto.
	(avx512er_vmrcp28<mode><round_saeonly_name>): Put nonimmediate
	operand as the first input operand, set type attribute.
	(<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>):
	Set type attribute.
	(avx512er_vmrsqrt28<mode><round_saeonly_name>): Put nonimmediate
	operand as the first input operand, set type attribute.


testsuite/gcc/
	* gcc.target/i386/avx512er-vrcp28sd-2.c: Distinguish src1 and src2.
	* gcc.target/i386/avx512er-vrcp28ss-2.c: Call correct intrinsic.
	* gcc.target/i386/avx512er-vrsqrt28sd-2.c: Distinguish src1 and src2.
	* gcc.target/i386/avx512er-vrsqrt28ss-2.c: Ditto.
	* gcc.target/i386/avx512f-vrcp14sd-2.c: Fix reference calculation.
	* gcc.target/i386/avx512f-vrcp14ss-2.c: Fix reference calculation.


Co-Authored-By: Kirill Yukhin <kirill.yukhin@intel.com>

From-SVN: r207932
parent a13cfd3e
2014-02-20 Ilya Tocar <ilya.tocar@intel.com>
Kirill Yukhin <kirill.yukhin@intel.com>
* config/i386/avx512erintrin.h (_mm_rcp28_round_sd): Swap operands.
(_mm_rcp28_round_ss): Ditto.
(_mm_rsqrt28_round_sd): Ditto.
(_mm_rsqrt28_round_ss): Ditto.
* config/i386/avx512erintrin.h (_mm_rcp14_round_sd): Ditto.
(_mm_rcp14_round_ss): Ditto.
(_mm_rsqrt14_round_sd): Ditto.
(_mm_rsqrt14_round_ss): Ditto.
* config/i386/sse.md (rsqrt14<mode>): Put nonimmediate operand as
the first input operand, get rid of match_dup.
(avx512er_exp2<mode><mask_name><round_saeonly_name>): Set type
attribute to sse.
(<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>):
Ditto.
(avx512er_vmrcp28<mode><round_saeonly_name>): Put nonimmediate
operand as the first input operand, set type attribute.
(<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>):
Set type attribute.
(avx512er_vmrsqrt28<mode><round_saeonly_name>): Put nonimmediate
operand as the first input operand, set type attribute.
2014-02-19 Bill Schmidt <wschmidt@linux.vnet.ibm.com> 2014-02-19 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
* config/rs6000/rs6000.c (vspltis_constant): Fix most significant * config/rs6000/rs6000.c (vspltis_constant): Fix most significant
......
...@@ -163,8 +163,8 @@ extern __inline __m128d ...@@ -163,8 +163,8 @@ extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_rcp28_round_sd (__m128d __A, __m128d __B, int __R) _mm_rcp28_round_sd (__m128d __A, __m128d __B, int __R)
{ {
return (__m128d) __builtin_ia32_rcp28sd_round ((__v2df) __A, return (__m128d) __builtin_ia32_rcp28sd_round ((__v2df) __B,
(__v2df) __B, (__v2df) __A,
__R); __R);
} }
...@@ -172,8 +172,8 @@ extern __inline __m128 ...@@ -172,8 +172,8 @@ extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_rcp28_round_ss (__m128 __A, __m128 __B, int __R) _mm_rcp28_round_ss (__m128 __A, __m128 __B, int __R)
{ {
return (__m128) __builtin_ia32_rcp28ss_round ((__v4sf) __A, return (__m128) __builtin_ia32_rcp28ss_round ((__v4sf) __B,
(__v4sf) __B, (__v4sf) __A,
__R); __R);
} }
...@@ -237,8 +237,8 @@ extern __inline __m128d ...@@ -237,8 +237,8 @@ extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_rsqrt28_round_sd (__m128d __A, __m128d __B, int __R) _mm_rsqrt28_round_sd (__m128d __A, __m128d __B, int __R)
{ {
return (__m128d) __builtin_ia32_rsqrt28sd_round ((__v2df) __A, return (__m128d) __builtin_ia32_rsqrt28sd_round ((__v2df) __B,
(__v2df) __B, (__v2df) __A,
__R); __R);
} }
...@@ -246,8 +246,8 @@ extern __inline __m128 ...@@ -246,8 +246,8 @@ extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_rsqrt28_round_ss (__m128 __A, __m128 __B, int __R) _mm_rsqrt28_round_ss (__m128 __A, __m128 __B, int __R)
{ {
return (__m128) __builtin_ia32_rsqrt28ss_round ((__v4sf) __A, return (__m128) __builtin_ia32_rsqrt28ss_round ((__v4sf) __B,
(__v4sf) __B, (__v4sf) __A,
__R); __R);
} }
...@@ -375,16 +375,16 @@ _mm_rsqrt28_round_ss (__m128 __A, __m128 __B, int __R) ...@@ -375,16 +375,16 @@ _mm_rsqrt28_round_ss (__m128 __A, __m128 __B, int __R)
_mm512_maskz_rsqrt28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION) _mm512_maskz_rsqrt28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
#define _mm_rcp28_sd(A, B) \ #define _mm_rcp28_sd(A, B) \
__builtin_ia32_rcp28sd_round(A, B, _MM_FROUND_CUR_DIRECTION) __builtin_ia32_rcp28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
#define _mm_rcp28_ss(A, B) \ #define _mm_rcp28_ss(A, B) \
__builtin_ia32_rcp28ss_round(A, B, _MM_FROUND_CUR_DIRECTION) __builtin_ia32_rcp28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
#define _mm_rsqrt28_sd(A, B) \ #define _mm_rsqrt28_sd(A, B) \
__builtin_ia32_rsqrt28sd_round(A, B, _MM_FROUND_CUR_DIRECTION) __builtin_ia32_rsqrt28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
#define _mm_rsqrt28_ss(A, B) \ #define _mm_rsqrt28_ss(A, B) \
__builtin_ia32_rsqrt28ss_round(A, B, _MM_FROUND_CUR_DIRECTION) __builtin_ia32_rsqrt28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
#ifdef __DISABLE_AVX512ER__ #ifdef __DISABLE_AVX512ER__
#undef __DISABLE_AVX512ER__ #undef __DISABLE_AVX512ER__
......
...@@ -1470,16 +1470,16 @@ extern __inline __m128d ...@@ -1470,16 +1470,16 @@ extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_rcp14_sd (__m128d __A, __m128d __B) _mm_rcp14_sd (__m128d __A, __m128d __B)
{ {
return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __A, return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __B,
(__v2df) __B); (__v2df) __A);
} }
extern __inline __m128 extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_rcp14_ss (__m128 __A, __m128 __B) _mm_rcp14_ss (__m128 __A, __m128 __B)
{ {
return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __A, return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __B,
(__v4sf) __B); (__v4sf) __A);
} }
extern __inline __m512d extern __inline __m512d
...@@ -1544,16 +1544,16 @@ extern __inline __m128d ...@@ -1544,16 +1544,16 @@ extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_rsqrt14_sd (__m128d __A, __m128d __B) _mm_rsqrt14_sd (__m128d __A, __m128d __B)
{ {
return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __A, return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __B,
(__v2df) __B); (__v2df) __A);
} }
extern __inline __m128 extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_rsqrt14_ss (__m128 __A, __m128 __B) _mm_rsqrt14_ss (__m128 __A, __m128 __B)
{ {
return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __A, return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __B,
(__v4sf) __B); (__v4sf) __A);
} }
#ifdef __OPTIMIZE__ #ifdef __OPTIMIZE__
......
...@@ -1551,13 +1551,12 @@ ...@@ -1551,13 +1551,12 @@
[(set (match_operand:VF_128 0 "register_operand" "=v") [(set (match_operand:VF_128 0 "register_operand" "=v")
(vec_merge:VF_128 (vec_merge:VF_128
(unspec:VF_128 (unspec:VF_128
[(match_operand:VF_128 1 "register_operand" "v") [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
(match_operand:VF_128 2 "nonimmediate_operand" "vm")]
UNSPEC_RSQRT14) UNSPEC_RSQRT14)
(match_dup 1) (match_operand:VF_128 2 "register_operand" "v")
(const_int 1)))] (const_int 1)))]
"TARGET_AVX512F" "TARGET_AVX512F"
"vrsqrt14<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}" "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
[(set_attr "type" "sse") [(set_attr "type" "sse")
(set_attr "prefix" "evex") (set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")]) (set_attr "mode" "<MODE>")])
...@@ -12804,6 +12803,7 @@ ...@@ -12804,6 +12803,7 @@
"TARGET_AVX512ER" "TARGET_AVX512ER"
"vexp2<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}" "vexp2<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
[(set_attr "prefix" "evex") [(set_attr "prefix" "evex")
(set_attr "type" "sse")
(set_attr "mode" "<MODE>")]) (set_attr "mode" "<MODE>")])
(define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>" (define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>"
...@@ -12814,6 +12814,7 @@ ...@@ -12814,6 +12814,7 @@
"TARGET_AVX512ER" "TARGET_AVX512ER"
"vrcp28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}" "vrcp28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
[(set_attr "prefix" "evex") [(set_attr "prefix" "evex")
(set_attr "type" "sse")
(set_attr "mode" "<MODE>")]) (set_attr "mode" "<MODE>")])
(define_insn "avx512er_vmrcp28<mode><round_saeonly_name>" (define_insn "avx512er_vmrcp28<mode><round_saeonly_name>"
...@@ -12825,9 +12826,10 @@ ...@@ -12825,9 +12826,10 @@
(match_operand:VF_128 2 "register_operand" "v") (match_operand:VF_128 2 "register_operand" "v")
(const_int 1)))] (const_int 1)))]
"TARGET_AVX512ER" "TARGET_AVX512ER"
"vrcp28<ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %2<round_saeonly_op3>}" "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}"
[(set_attr "length_immediate" "1") [(set_attr "length_immediate" "1")
(set_attr "prefix" "evex") (set_attr "prefix" "evex")
(set_attr "type" "sse")
(set_attr "mode" "<MODE>")]) (set_attr "mode" "<MODE>")])
(define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>" (define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>"
...@@ -12838,6 +12840,7 @@ ...@@ -12838,6 +12840,7 @@
"TARGET_AVX512ER" "TARGET_AVX512ER"
"vrsqrt28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}" "vrsqrt28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
[(set_attr "prefix" "evex") [(set_attr "prefix" "evex")
(set_attr "type" "sse")
(set_attr "mode" "<MODE>")]) (set_attr "mode" "<MODE>")])
(define_insn "avx512er_vmrsqrt28<mode><round_saeonly_name>" (define_insn "avx512er_vmrsqrt28<mode><round_saeonly_name>"
...@@ -12849,8 +12852,9 @@ ...@@ -12849,8 +12852,9 @@
(match_operand:VF_128 2 "register_operand" "v") (match_operand:VF_128 2 "register_operand" "v")
(const_int 1)))] (const_int 1)))]
"TARGET_AVX512ER" "TARGET_AVX512ER"
"vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %2<round_saeonly_op3>}" "vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}"
[(set_attr "length_immediate" "1") [(set_attr "length_immediate" "1")
(set_attr "type" "sse")
(set_attr "prefix" "evex") (set_attr "prefix" "evex")
(set_attr "mode" "<MODE>")]) (set_attr "mode" "<MODE>")])
......
2014-02-20 Ilya Tocar <ilya.tocar@intel.com>
Kirill Yukhin <kirill.yukhin@intel.com>
* gcc.target/i386/avx512er-vrcp28sd-2.c: Distinguish src1 and src2.
* gcc.target/i386/avx512er-vrcp28ss-2.c: Call correct intrinsic.
* gcc.target/i386/avx512er-vrsqrt28sd-2.c: Distinguish src1 and src2.
* gcc.target/i386/avx512er-vrsqrt28ss-2.c: Ditto.
* gcc.target/i386/avx512f-vrcp14sd-2.c: Fix reference calculation.
* gcc.target/i386/avx512f-vrcp14ss-2.c: Fix reference calculation.
2014-02-19 Jakub Jelinek <jakub@redhat.com> 2014-02-19 Jakub Jelinek <jakub@redhat.com>
PR c/37743 PR c/37743
......
...@@ -10,19 +10,20 @@ ...@@ -10,19 +10,20 @@
void static void static
avx512er_test (void) avx512er_test (void)
{ {
union128d src, res; union128d src1, src2, res;
double res_ref[2]; double res_ref[2];
int i; int i;
for (i = 0; i < 2; i++) for (i = 0; i < 2; i++)
{ {
src.a[i] = 179.345 - 6.5645 * i; src1.a[i] = 179.345 - 6.5645 * i;
res_ref[i] = src.a[i]; src2.a[i] = 204179.345 + 6.5645 * i;
res_ref[i] = src1.a[i];
} }
res_ref[0] = 1.0 / src.a[0]; res_ref[0] = 1.0 / src2.a[0];
res.x = _mm_rcp28_round_sd (src.x, src.x, _MM_FROUND_NO_EXC); res.x = _mm_rcp28_round_sd (src1.x, src2.x, _MM_FROUND_NO_EXC);
if (checkVd (res.a, res_ref, 2)) if (checkVd (res.a, res_ref, 2))
abort (); abort ();
......
...@@ -10,19 +10,20 @@ ...@@ -10,19 +10,20 @@
void static void static
avx512er_test (void) avx512er_test (void)
{ {
union128 src, res; union128 src1, src2, res;
float res_ref[4]; float res_ref[4];
int i; int i;
for (i = 0; i < 4; i++) for (i = 0; i < 4; i++)
{ {
src.a[i] = 179.345 - 6.5645 * i; src1.a[i] = 179.345 - 6.5645 * i;
res_ref[i] = src.a[i]; src2.a[i] = 179345.006 + 6.5645 * i;
res_ref[i] = src1.a[i];
} }
res_ref[0] = 1.0 / src.a[0]; res_ref[0] = 1.0 / src2.a[0];
res.x = _mm_rsqrt28_round_ss (src.x, src.x, _MM_FROUND_NO_EXC); res.x = _mm_rcp28_round_ss (src1.x, src2.x, _MM_FROUND_NO_EXC);
if (checkVf (res.a, res_ref, 4)) if (checkVf (res.a, res_ref, 4))
abort (); abort ();
......
...@@ -10,19 +10,20 @@ ...@@ -10,19 +10,20 @@
void static void static
avx512er_test (void) avx512er_test (void)
{ {
union128d src, res; union128d src1, src2, res;
double res_ref[2]; double res_ref[2];
int i; int i;
for (i = 0; i < 2; i++) for (i = 0; i < 2; i++)
{ {
src.a[i] = 179.345 - 6.5645 * i; src1.a[i] = 179.345 - 6.5645 * i;
res_ref[i] = src.a[i]; src2.a[i] = 45 - 6.5645 * i;
res_ref[i] = src1.a[i];
} }
res_ref[0] = 1.0 / sqrt (src.a[0]); res_ref[0] = 1.0 / sqrt (src2.a[0]);
res.x = _mm_rsqrt28_round_sd (src.x, src.x, _MM_FROUND_NO_EXC); res.x = _mm_rsqrt28_round_sd (src1.x, src2.x, _MM_FROUND_NO_EXC);
if (checkVd (res.a, res_ref, 2)) if (checkVd (res.a, res_ref, 2))
abort (); abort ();
......
...@@ -10,19 +10,20 @@ ...@@ -10,19 +10,20 @@
void static void static
avx512er_test (void) avx512er_test (void)
{ {
union128 src, res; union128 src1, src2, res;
float res_ref[4]; float res_ref[4];
int i; int i;
for (i = 0; i < 4; i++) for (i = 0; i < 4; i++)
{ {
src.a[i] = 179.345 - 6.5645 * i; src1.a[i] = 179.345 - 6.5645 * i;
res_ref[i] = src.a[i]; src2.a[i] = 179221345 + 6.5645 * i;
res_ref[i] = src1.a[i];
} }
res_ref[0] = 1.0 / sqrt (src.a[0]); res_ref[0] = 1.0 / sqrt (src2.a[0]);
res.x = _mm_rsqrt28_round_ss (src.x, src.x, _MM_FROUND_NO_EXC); res.x = _mm_rsqrt28_round_ss (src1.x, src2.x, _MM_FROUND_NO_EXC);
if (checkVf (res.a, res_ref, 4)) if (checkVf (res.a, res_ref, 4))
abort (); abort ();
......
...@@ -8,8 +8,8 @@ ...@@ -8,8 +8,8 @@
static void static void
compute_vrcp14sd (double *s1, double *s2, double *r) compute_vrcp14sd (double *s1, double *s2, double *r)
{ {
r[0] = 1.0 / s1[0]; r[0] = 1.0 / s2[0];
r[1] = s2[1]; r[1] = s1[1];
} }
static void static void
......
...@@ -8,10 +8,10 @@ ...@@ -8,10 +8,10 @@
static void static void
compute_vrcp14ss (float *s1, float *s2, float *r) compute_vrcp14ss (float *s1, float *s2, float *r)
{ {
r[0] = 1.0 / s1[0]; r[0] = 1.0 / s2[0];
r[1] = s2[1]; r[1] = s1[1];
r[2] = s2[2]; r[2] = s1[2];
r[3] = s2[3]; r[3] = s1[3];
} }
static void static void
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment