Commit 3b9bc511 by Ilya Verbin Committed by Ilya Verbin

[AVX-512ER] vrsqrt28ps auto generation

gcc/
	* config/i386/i386.c (ix86_emit_swsqrtsf): Emit vrsqrt28ps.
	* config/i386/sse.md (define_expand "rsqrtv16sf2"): New.
gcc/testsuite/
	* gcc.target/i386/avx512er-vrsqrt28ps-3.c: New test.
	* gcc.target/i386/avx512er-vrsqrt28ps-4.c: New test.
	* gcc.target/i386/avx512er-vrsqrt28ps-5.c: New test.
	* gcc.target/i386/avx512er-vrsqrt28ps-6.c: New test.

From-SVN: r237649
parent 21db1c78
2016-06-21 Ilya Verbin <ilya.verbin@intel.com> 2016-06-21 Ilya Verbin <ilya.verbin@intel.com>
* config/i386/i386.c (ix86_emit_swsqrtsf): Emit vrsqrt28ps.
* config/i386/sse.md (define_expand "rsqrtv16sf2"): New.
2016-06-21 Ilya Verbin <ilya.verbin@intel.com>
* config/i386/i386.c (ix86_emit_swdivsf): Emit vrcp28ps. * config/i386/i386.c (ix86_emit_swdivsf): Emit vrcp28ps.
2016-06-21 H.J. Lu <hongjiu.lu@intel.com> 2016-06-21 H.J. Lu <hongjiu.lu@intel.com>
......
...@@ -48774,6 +48774,24 @@ void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode, bool recip) ...@@ -48774,6 +48774,24 @@ void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode, bool recip)
e2 = gen_reg_rtx (mode); e2 = gen_reg_rtx (mode);
e3 = gen_reg_rtx (mode); e3 = gen_reg_rtx (mode);
if (TARGET_AVX512ER && mode == V16SFmode)
{
if (recip)
/* res = rsqrt28(a) estimate */
emit_insn (gen_rtx_SET (res, gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
UNSPEC_RSQRT28)));
else
{
/* x0 = rsqrt28(a) estimate */
emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
UNSPEC_RSQRT28)));
/* res = rcp28(x0) estimate */
emit_insn (gen_rtx_SET (res, gen_rtx_UNSPEC (mode, gen_rtvec (1, x0),
UNSPEC_RCP28)));
}
return;
}
real_from_integer (&r, VOIDmode, -3, SIGNED); real_from_integer (&r, VOIDmode, -3, SIGNED);
mthree = const_double_from_real_value (r, SFmode); mthree = const_double_from_real_value (r, SFmode);
...@@ -1559,6 +1559,17 @@ ...@@ -1559,6 +1559,17 @@
DONE; DONE;
}) })
(define_expand "rsqrtv16sf2"
[(set (match_operand:V16SF 0 "register_operand")
(unspec:V16SF
[(match_operand:V16SF 1 "vector_operand")]
UNSPEC_RSQRT28))]
"TARGET_SSE_MATH && TARGET_AVX512ER"
{
ix86_emit_swsqrtsf (operands[0], operands[1], V16SFmode, true);
DONE;
})
(define_insn "<sse>_rsqrt<mode>2" (define_insn "<sse>_rsqrt<mode>2"
[(set (match_operand:VF1_128_256 0 "register_operand" "=x") [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
(unspec:VF1_128_256 (unspec:VF1_128_256
......
2016-06-21 Ilya Verbin <ilya.verbin@intel.com> 2016-06-21 Ilya Verbin <ilya.verbin@intel.com>
* gcc.target/i386/avx512er-vrsqrt28ps-3.c: New test.
* gcc.target/i386/avx512er-vrsqrt28ps-4.c: New test.
* gcc.target/i386/avx512er-vrsqrt28ps-5.c: New test.
* gcc.target/i386/avx512er-vrsqrt28ps-6.c: New test.
2016-06-21 Ilya Verbin <ilya.verbin@intel.com>
* gcc.target/i386/avx512er-vrcp28ps-3.c: New test. * gcc.target/i386/avx512er-vrcp28ps-3.c: New test.
* gcc.target/i386/avx512er-vrcp28ps-4.c: New test. * gcc.target/i386/avx512er-vrcp28ps-4.c: New test.
......
/* { dg-do run } */
/* { dg-require-effective-target avx512er } */
/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx512er" } */
#include <math.h>
#include "avx512er-check.h"
#define MAX 1000
#define EPS 0.00001
__attribute__ ((noinline, optimize (1)))
void static
compute_rsqrt_ref (float *a, float *r)
{
for (int i = 0; i < MAX; i++)
r[i] = 1.0 / sqrtf (a[i]);
}
__attribute__ ((noinline))
void static
compute_rsqrt_exp (float *a, float *r)
{
for (int i = 0; i < MAX; i++)
r[i] = 1.0 / sqrtf (a[i]);
}
void static
avx512er_test (void)
{
float in[MAX];
float ref[MAX];
float exp[MAX];
for (int i = 0; i < MAX; i++)
in[i] = 8765.987 - 8.6756 * i;
compute_rsqrt_ref (in, ref);
compute_rsqrt_exp (in, exp);
for (int i = 0; i < MAX; i++)
{
float rel_err = (ref[i] - exp[i]) / ref[i];
rel_err = rel_err > 0.0 ? rel_err : -rel_err;
if (rel_err > EPS)
abort ();
}
}
/* { dg-do compile } */
/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx512er" } */
#include "avx512er-vrsqrt28ps-3.c"
/* { dg-final { scan-assembler-times "vrsqrt28ps\[^\n\r\]*zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-not "vrcp28ps\[^\n\r\]*zmm\[0-9\]+(?:\n|\[ \\t\]+#)" } } */
/* { dg-do run } */
/* { dg-require-effective-target avx512er } */
/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx512er" } */
#include <math.h>
#include "avx512er-check.h"
#define MAX 1000
#define EPS 0.00001
__attribute__ ((noinline, optimize (1)))
void static
compute_sqrt_ref (float *a, float *r)
{
for (int i = 0; i < MAX; i++)
r[i] = sqrtf (a[i]);
}
__attribute__ ((noinline))
void static
compute_sqrt_exp (float *a, float *r)
{
for (int i = 0; i < MAX; i++)
r[i] = sqrtf (a[i]);
}
void static
avx512er_test (void)
{
float in[MAX];
float ref[MAX];
float exp[MAX];
for (int i = 0; i < MAX; i++)
in[i] = 8765.987 - 8.6756 * i;
compute_sqrt_ref (in, ref);
compute_sqrt_exp (in, exp);
for (int i = 0; i < MAX; i++)
{
float rel_err = (ref[i] - exp[i]) / ref[i];
rel_err = rel_err > 0.0 ? rel_err : -rel_err;
if (rel_err > EPS)
abort ();
}
}
/* { dg-do compile } */
/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx512er" } */
#include "avx512er-vrsqrt28ps-5.c"
/* { dg-final { scan-assembler-times "vrsqrt28ps\[^\n\r\]*zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vrcp28ps\[^\n\r\]*zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment