Commit f8aa2b2c by Sebastian Peryt Committed by Kirill Yukhin

Scalar mask and round RTL templates

gcc/
	* config/i386/subst.md (mask_scalar, round_scalar,
	round_saeonly_scalar): New meta-templates.
	(mask_scalar_name, mask_scalar_operand3, round_scalar_name,
	round_scalar_mask_operand3, round_scalar_mask_op3,
	round_scalar_constraint, round_scalar_prefix, round_saeonly_scalar_name,
	round_saeonly_scalar_mask_operand3, round_saeonly_scalar_mask_op3,
	round_saeonly_scalar_constraint,
	round_saeonly_scalar_prefix): New subst attribute.
	* config/i386/sse.md
	(<sse>_vm<plusminus_insn><mode>3<mask_name><round_name>): Renamed to ...
	<sse>_vm<plusminus_insn><mode>3<mask_scalar_name>
	<round_scalar_name> ... this.
	(<sse>_vm<multdiv_mnemonic><mode>3<mask_name><round_name>): Renamed to ...
	<sse>_vm<multdiv_mnemonic><mode>3<mask_scalar_name>
	<round_scalar_name> ... this.
	(<sse>_vm<code><mode>3<mask_name><round_saeonly_name>): Renamed to ...
	<sse>_vm<code><mode>3<mask_scalar_name>
	<round_saeonly_scalar_name> ... this.
	(v<plusminus_mnemonic><ssescalarmodesuffix>
	\t{<round_mask_op3>%2, %1, %0<mask_operand3>|
	%0<mask_operand3>, %1, %<iptr>2<round_mask_op3>}): Changed to ...
	v<plusminus_mnemonic><ssescalarmodesuffix>
	\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|
	%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>} ... this.
	(v<multdiv_mnemonic><ssescalarmodesuffix>
	\t{<round_mask_op3>%2, %1, %0<mask_operand3>|
	%0<mask_operand3>, %1, %<iptr>2<round_mask_op3>}): Changed to ...
	v<multdiv_mnemonic><ssescalarmodesuffix>
	\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|
	%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>} ... this.
	(v<maxmin_float><ssescalarmodesuffix>
	\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|
	%0<mask_operand3>, %1, %<iptr>2<round_saeonly_mask_op3>}): Changed to ...
	v<maxmin_float><ssescalarmodesuffix>
	\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|
	%0<mask_scalar_operand3>, %1, %<iptr>2
	<round_saeonly_scalar_mask_op3>} ... this.

gcc/testsuite/
	* gcc.target/i386/avx512f-vaddsd-3.c: New test for mask 0 verification.
	* gcc.target/i386/avx512f-vaddss-3.c: Ditto.
	* gcc.target/i386/avx512f-vdivsd-3.c: Ditto.
	* gcc.target/i386/avx512f-vdivss-3.c: Ditto.
	* gcc.target/i386/avx512f-vmaxsd-3.c: Ditto.
	* gcc.target/i386/avx512f-vmaxss-3.c: Ditto.
	* gcc.target/i386/avx512f-vminsd-3.c: Ditto.
	* gcc.target/i386/avx512f-vminss-3.c: Ditto.
	* gcc.target/i386/avx512f-vmulsd-3.c: Ditto.
	* gcc.target/i386/avx512f-vmulss-3.c: Ditto.
	* gcc.target/i386/avx512f-vsubsd-3.c: Ditto.
	* gcc.target/i386/avx512f-vsubss-3.c: Ditto.

From-SVN: r250006
parent 75e2d19b
2017-07-05 Sebastian Peryt <sebastian.peryt@intel.com>
* config/i386/subst.md (mask_scalar, round_scalar,
round_saeonly_scalar): New meta-templates.
(mask_scalar_name, mask_scalar_operand3, round_scalar_name,
round_scalar_mask_operand3, round_scalar_mask_op3,
round_scalar_constraint, round_scalar_prefix, round_saeonly_scalar_name,
round_saeonly_scalar_mask_operand3, round_saeonly_scalar_mask_op3,
round_saeonly_scalar_constraint,
round_saeonly_scalar_prefix): New subst attribute.
* config/i386/sse.md
(<sse>_vm<plusminus_insn><mode>3<mask_name><round_name>): Renamed to ...
<sse>_vm<plusminus_insn><mode>3<mask_scalar_name>
<round_scalar_name> ... this.
(<sse>_vm<multdiv_mnemonic><mode>3<mask_name><round_name>): Renamed to ...
<sse>_vm<multdiv_mnemonic><mode>3<mask_scalar_name>
<round_scalar_name> ... this.
(<sse>_vm<code><mode>3<mask_name><round_saeonly_name>): Renamed to ...
<sse>_vm<code><mode>3<mask_scalar_name>
<round_saeonly_scalar_name> ... this.
(v<plusminus_mnemonic><ssescalarmodesuffix>
\t{<round_mask_op3>%2, %1, %0<mask_operand3>|
%0<mask_operand3>, %1, %<iptr>2<round_mask_op3>}): Changed to ...
v<plusminus_mnemonic><ssescalarmodesuffix>
\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|
%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>} ... this.
(v<multdiv_mnemonic><ssescalarmodesuffix>
\t{<round_mask_op3>%2, %1, %0<mask_operand3>|
%0<mask_operand3>, %1, %<iptr>2<round_mask_op3>}): Changed to ...
v<multdiv_mnemonic><ssescalarmodesuffix>
\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|
%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>} ... this.
(v<maxmin_float><ssescalarmodesuffix>
\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|
%0<mask_operand3>, %1, %<iptr>2<round_saeonly_mask_op3>}): Changed to ...
v<maxmin_float><ssescalarmodesuffix>
\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|
%0<mask_scalar_operand3>, %1, %<iptr>2
<round_saeonly_scalar_mask_op3>} ... this.
2017-07-05 Richard Earnshaw <rearnsha@arm.com> 2017-07-05 Richard Earnshaw <rearnsha@arm.com>
* config/arm/arm.c (arm_fixed_condition_code_regs): New function. * config/arm/arm.c (arm_fixed_condition_code_regs): New function.
......
...@@ -1568,21 +1568,21 @@ ...@@ -1568,21 +1568,21 @@
(set_attr "prefix" "<mask_prefix3>") (set_attr "prefix" "<mask_prefix3>")
(set_attr "mode" "<MODE>")]) (set_attr "mode" "<MODE>")])
(define_insn "<sse>_vm<plusminus_insn><mode>3<mask_name><round_name>" (define_insn "<sse>_vm<plusminus_insn><mode>3<mask_scalar_name><round_scalar_name>"
[(set (match_operand:VF_128 0 "register_operand" "=x,v") [(set (match_operand:VF_128 0 "register_operand" "=x,v")
(vec_merge:VF_128 (vec_merge:VF_128
(plusminus:VF_128 (plusminus:VF_128
(match_operand:VF_128 1 "register_operand" "0,v") (match_operand:VF_128 1 "register_operand" "0,v")
(match_operand:VF_128 2 "vector_operand" "xBm,<round_constraint>")) (match_operand:VF_128 2 "vector_operand" "xBm,<round_scalar_constraint>"))
(match_dup 1) (match_dup 1)
(const_int 1)))] (const_int 1)))]
"TARGET_SSE" "TARGET_SSE"
"@ "@
<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2} <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %<iptr>2<round_mask_op3>}" v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>}"
[(set_attr "isa" "noavx,avx") [(set_attr "isa" "noavx,avx")
(set_attr "type" "sseadd") (set_attr "type" "sseadd")
(set_attr "prefix" "<round_prefix>") (set_attr "prefix" "<round_scalar_prefix>")
(set_attr "mode" "<ssescalarmode>")]) (set_attr "mode" "<ssescalarmode>")])
(define_expand "mul<mode>3<mask_name><round_name>" (define_expand "mul<mode>3<mask_name><round_name>"
...@@ -1608,21 +1608,21 @@ ...@@ -1608,21 +1608,21 @@
(set_attr "btver2_decode" "direct,double") (set_attr "btver2_decode" "direct,double")
(set_attr "mode" "<MODE>")]) (set_attr "mode" "<MODE>")])
(define_insn "<sse>_vm<multdiv_mnemonic><mode>3<mask_name><round_name>" (define_insn "<sse>_vm<multdiv_mnemonic><mode>3<mask_scalar_name><round_scalar_name>"
[(set (match_operand:VF_128 0 "register_operand" "=x,v") [(set (match_operand:VF_128 0 "register_operand" "=x,v")
(vec_merge:VF_128 (vec_merge:VF_128
(multdiv:VF_128 (multdiv:VF_128
(match_operand:VF_128 1 "register_operand" "0,v") (match_operand:VF_128 1 "register_operand" "0,v")
(match_operand:VF_128 2 "vector_operand" "xBm,<round_constraint>")) (match_operand:VF_128 2 "vector_operand" "xBm,<round_scalar_constraint>"))
(match_dup 1) (match_dup 1)
(const_int 1)))] (const_int 1)))]
"TARGET_SSE" "TARGET_SSE"
"@ "@
<multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2} <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %<iptr>2<round_mask_op3>}" v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>}"
[(set_attr "isa" "noavx,avx") [(set_attr "isa" "noavx,avx")
(set_attr "type" "sse<multdiv_mnemonic>") (set_attr "type" "sse<multdiv_mnemonic>")
(set_attr "prefix" "<round_prefix>") (set_attr "prefix" "<round_scalar_prefix>")
(set_attr "btver2_decode" "direct,double") (set_attr "btver2_decode" "direct,double")
(set_attr "mode" "<ssescalarmode>")]) (set_attr "mode" "<ssescalarmode>")])
...@@ -1944,22 +1944,22 @@ ...@@ -1944,22 +1944,22 @@
(set_attr "prefix" "<mask_prefix3>") (set_attr "prefix" "<mask_prefix3>")
(set_attr "mode" "<MODE>")]) (set_attr "mode" "<MODE>")])
(define_insn "<sse>_vm<code><mode>3<mask_name><round_saeonly_name>" (define_insn "<sse>_vm<code><mode>3<mask_scalar_name><round_saeonly_scalar_name>"
[(set (match_operand:VF_128 0 "register_operand" "=x,v") [(set (match_operand:VF_128 0 "register_operand" "=x,v")
(vec_merge:VF_128 (vec_merge:VF_128
(smaxmin:VF_128 (smaxmin:VF_128
(match_operand:VF_128 1 "register_operand" "0,v") (match_operand:VF_128 1 "register_operand" "0,v")
(match_operand:VF_128 2 "vector_operand" "xBm,<round_saeonly_constraint>")) (match_operand:VF_128 2 "vector_operand" "xBm,<round_saeonly_scalar_constraint>"))
(match_dup 1) (match_dup 1)
(const_int 1)))] (const_int 1)))]
"TARGET_SSE" "TARGET_SSE"
"@ "@
<maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2} <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %<iptr>2<round_saeonly_mask_op3>}" v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_saeonly_scalar_mask_op3>}"
[(set_attr "isa" "noavx,avx") [(set_attr "isa" "noavx,avx")
(set_attr "type" "sse") (set_attr "type" "sse")
(set_attr "btver2_sse_attr" "maxmin") (set_attr "btver2_sse_attr" "maxmin")
(set_attr "prefix" "<round_saeonly_prefix>") (set_attr "prefix" "<round_saeonly_scalar_prefix>")
(set_attr "mode" "<ssescalarmode>")]) (set_attr "mode" "<ssescalarmode>")])
(define_insn "avx_addsubv4df3" (define_insn "avx_addsubv4df3"
......
...@@ -236,3 +236,66 @@ ...@@ -236,3 +236,66 @@
(match_dup 3) (match_dup 3)
(match_operand:SUBST_V 4 "vector_move_operand") (match_operand:SUBST_V 4 "vector_move_operand")
(match_operand:<avx512fmaskmode> 5 "register_operand")]) (match_operand:<avx512fmaskmode> 5 "register_operand")])
(define_subst_attr "mask_scalar_name" "mask_scalar" "" "_mask")
(define_subst_attr "mask_scalar_operand3" "mask_scalar" "" "%{%4%}%N3")
(define_subst "mask_scalar"
[(set (match_operand:SUBST_V 0)
(vec_merge:SUBST_V
(match_operand:SUBST_V 1)
(match_operand:SUBST_V 2)
(const_int 1)))]
"TARGET_AVX512F"
[(set (match_dup 0)
(vec_merge:SUBST_V
(vec_merge:SUBST_V
(match_dup 1)
(match_operand:SUBST_V 3 "vector_move_operand" "0C")
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk"))
(match_dup 2)
(const_int 1)))])
(define_subst_attr "round_scalar_name" "round_scalar" "" "_round")
(define_subst_attr "round_scalar_mask_operand3" "mask_scalar" "%R3" "%R5")
(define_subst_attr "round_scalar_mask_op3" "round_scalar" "" "<round_scalar_mask_operand3>")
(define_subst_attr "round_scalar_constraint" "round_scalar" "vm" "v")
(define_subst_attr "round_scalar_prefix" "round_scalar" "vex" "evex")
(define_subst "round_scalar"
[(set (match_operand:SUBST_V 0)
(vec_merge:SUBST_V
(match_operand:SUBST_V 1)
(match_operand:SUBST_V 2)
(const_int 1)))]
"TARGET_AVX512F"
[(set (match_dup 0)
(unspec:SUBST_V [
(vec_merge:SUBST_V
(match_dup 1)
(match_dup 2)
(const_int 1))
(match_operand:SI 3 "const_4_or_8_to_11_operand")]
UNSPEC_EMBEDDED_ROUNDING))])
(define_subst_attr "round_saeonly_scalar_name" "round_saeonly_scalar" "" "_round")
(define_subst_attr "round_saeonly_scalar_mask_operand3" "mask_scalar" "%r3" "%r5")
(define_subst_attr "round_saeonly_scalar_mask_op3" "round_saeonly_scalar" "" "<round_saeonly_scalar_mask_operand3>")
(define_subst_attr "round_saeonly_scalar_constraint" "round_saeonly_scalar" "vm" "v")
(define_subst_attr "round_saeonly_scalar_prefix" "round_saeonly_scalar" "vex" "evex")
(define_subst "round_saeonly_scalar"
[(set (match_operand:SUBST_V 0)
(vec_merge:SUBST_V
(match_operand:SUBST_V 1)
(match_operand:SUBST_V 2)
(const_int 1)))]
"TARGET_AVX512F"
[(set (match_dup 0)
(unspec:SUBST_V [
(vec_merge:SUBST_V
(match_dup 1)
(match_dup 2)
(const_int 1))
(match_operand:SI 3 "const48_operand")]
UNSPEC_EMBEDDED_ROUNDING))])
2017-07-05 Sebastian Peryt <sebastian.peryt@intel.com>
* gcc.target/i386/avx512f-vaddsd-3.c: New test for mask 0 verification.
* gcc.target/i386/avx512f-vaddss-3.c: Ditto.
* gcc.target/i386/avx512f-vdivsd-3.c: Ditto.
* gcc.target/i386/avx512f-vdivss-3.c: Ditto.
* gcc.target/i386/avx512f-vmaxsd-3.c: Ditto.
* gcc.target/i386/avx512f-vmaxss-3.c: Ditto.
* gcc.target/i386/avx512f-vminsd-3.c: Ditto.
* gcc.target/i386/avx512f-vminss-3.c: Ditto.
* gcc.target/i386/avx512f-vmulsd-3.c: Ditto.
* gcc.target/i386/avx512f-vmulss-3.c: Ditto.
* gcc.target/i386/avx512f-vsubsd-3.c: Ditto.
* gcc.target/i386/avx512f-vsubss-3.c: Ditto.
2017-07-05 Georg-Johann Lay <avr@gjlay.de> 2017-07-05 Georg-Johann Lay <avr@gjlay.de>
PR target/81305 PR target/81305
......
/* { dg-do run } */
/* { dg-options "-O2 -mavx512f" } */
/* { dg-require-effective-target avx512f } */
#include "avx512f-check.h"
#include "avx512f-helper.h"
#define SIZE (128 / 64)
#include "avx512f-mask-type.h"
static void
calc_add (double *r, double *s1, double *s2)
{
r[0] = s1[0] + s2[0];
r[1] = s1[1];
}
void
avx512f_test (void)
{
int i, sign;
union128d res1, res2, res3, res4, src1, src2;
MASK_TYPE mask = 0;
double res_ref[SIZE];
sign = -1;
for (i = 0; i < SIZE; i++)
{
src1.a[i] = 1.5 + 34.67 * i * sign;
src2.a[i] = -22.17 * i * sign + 1.0;
res1.a[i] = DEFAULT_VALUE;
res3.a[i] = DEFAULT_VALUE;
sign = sign * -1;
}
res1.x = _mm_mask_add_sd (res1.x, mask, src1.x, src2.x);
res2.x = _mm_maskz_add_sd (mask, src1.x, src2.x);
res3.x = _mm_mask_add_round_sd (res3.x, mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
res4.x = _mm_maskz_add_round_sd (mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
calc_add (res_ref, src1.a, src2.a);
MASK_MERGE (d) (res_ref, mask, 1);
if (check_union128d (res1, res_ref))
abort ();
MASK_ZERO (d) (res_ref, mask, 1);
if (check_union128d (res2, res_ref))
abort ();
calc_add (res_ref, src1.a, src2.a);
MASK_MERGE (d) (res_ref, mask, 1);
if (check_union128d (res3, res_ref))
abort ();
MASK_ZERO (d) (res_ref, mask, 1);
if (check_union128d (res4, res_ref))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O2 -mavx512f" } */
/* { dg-require-effective-target avx512f } */
#include "avx512f-check.h"
#include "avx512f-helper.h"
#define SIZE (128 / 32)
#include "avx512f-mask-type.h"
static void
calc_add (float *r, float *s1, float *s2)
{
r[0] = s1[0] + s2[0];
int i;
for (i = 1; i < SIZE; i++)
r[i] = s1[i];
}
void
avx512f_test (void)
{
int i, sign;
union128 res1, res2, res3, res4, src1, src2;
MASK_TYPE mask = 0;
float res_ref[SIZE];
sign = -1;
for (i = 0; i < SIZE; i++)
{
src1.a[i] = 1.5 + 34.67 * i * sign;
src2.a[i] = -22.17 * i * sign + 1.0;
res1.a[i] = DEFAULT_VALUE;
res3.a[i] = DEFAULT_VALUE;
sign = sign * -1;
}
res1.x = _mm_mask_add_ss (res1.x, mask, src1.x, src2.x);
res2.x = _mm_maskz_add_ss (mask, src1.x, src2.x);
res3.x = _mm_mask_add_round_ss (res3.x, mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
res4.x = _mm_maskz_add_round_ss (mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
calc_add (res_ref, src1.a, src2.a);
MASK_MERGE () (res_ref, mask, 1);
if (check_union128 (res1, res_ref))
abort ();
MASK_ZERO () (res_ref, mask, 1);
if (check_union128 (res2, res_ref))
abort ();
calc_add (res_ref, src1.a, src2.a);
MASK_MERGE () (res_ref, mask, 1);
if (check_union128 (res3, res_ref))
abort ();
MASK_ZERO () (res_ref, mask, 1);
if (check_union128 (res4, res_ref))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O2 -mavx512f" } */
/* { dg-require-effective-target avx512f } */
#include "avx512f-check.h"
#include "avx512f-helper.h"
#define SIZE (128 / 64)
#include "avx512f-mask-type.h"
static void
calc_div (double *r, double *s1, double *s2)
{
r[0] = s1[0] / s2[0];
r[1] = s1[1];
}
void
avx512f_test (void)
{
int i, sign;
union128d res1, res2, res3, res4, src1, src2;
MASK_TYPE mask = 0;
double res_ref[SIZE];
sign = -1;
for (i = 0; i < SIZE; i++)
{
src1.a[i] = 1.5 + 34.67 * i * sign;
src2.a[i] = -22.17 * i * sign + 1.0;
res1.a[i] = DEFAULT_VALUE;
res3.a[i] = DEFAULT_VALUE;
sign = sign * -1;
}
res1.x = _mm_mask_div_sd (res1.x, mask, src1.x, src2.x);
res2.x = _mm_maskz_div_sd (mask, src1.x, src2.x);
res3.x = _mm_mask_div_round_sd (res3.x, mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
res4.x = _mm_maskz_div_round_sd (mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
calc_div (res_ref, src1.a, src2.a);
MASK_MERGE (d) (res_ref, mask, 1);
if (check_union128d (res1, res_ref))
abort ();
MASK_ZERO (d) (res_ref, mask, 1);
if (check_union128d (res2, res_ref))
abort ();
calc_div (res_ref, src1.a, src2.a);
MASK_MERGE (d) (res_ref, mask, 1);
if (check_union128d (res3, res_ref))
abort ();
MASK_ZERO (d) (res_ref, mask, 1);
if (check_union128d (res4, res_ref))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O2 -mavx512f" } */
/* { dg-require-effective-target avx512f } */
#include "avx512f-check.h"
#include "avx512f-helper.h"
#define SIZE (128 / 32)
#include "avx512f-mask-type.h"
static void
calc_div (float *r, float *s1, float *s2)
{
r[0] = s1[0] / s2[0];
int i;
for (i = 1; i < SIZE; i++)
r[i] = s1[i];
}
void
avx512f_test (void)
{
int i, sign;
union128 res1, res2, res3, res4, src1, src2;
MASK_TYPE mask = 0;
float res_ref[SIZE];
sign = -1;
for (i = 0; i < SIZE; i++)
{
src1.a[i] = 1.5 + 34.67 * i * sign;
src2.a[i] = -22.17 * i * sign + 1.0;
res1.a[i] = DEFAULT_VALUE;
res3.a[i] = DEFAULT_VALUE;
sign = sign * -1;
}
res1.x = _mm_mask_div_ss (res1.x, mask, src1.x, src2.x);
res2.x = _mm_maskz_div_ss (mask, src1.x, src2.x);
res3.x = _mm_mask_div_round_ss (res3.x, mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
res4.x = _mm_maskz_div_round_ss (mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
calc_div (res_ref, src1.a, src2.a);
MASK_MERGE () (res_ref, mask, 1);
if (check_union128 (res1, res_ref))
abort ();
MASK_ZERO () (res_ref, mask, 1);
if (check_union128 (res2, res_ref))
abort ();
calc_div (res_ref, src1.a, src2.a);
MASK_MERGE () (res_ref, mask, 1);
if (check_union128 (res3, res_ref))
abort ();
MASK_ZERO () (res_ref, mask, 1);
if (check_union128 (res4, res_ref))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O2 -mavx512f" } */
/* { dg-require-effective-target avx512f } */
#include "avx512f-check.h"
#include "avx512f-helper.h"
#define SIZE (128 / 64)
#include "avx512f-mask-type.h"
static void
calc_max (double *r, double *s1, double *s2)
{
r[0] = s1[0] > s2[0] ? s1[0] : s2[0];
r[1] = s1[1];
}
void
avx512f_test (void)
{
int i, sign;
union128d res1, res2, res3, res4, src1, src2;
MASK_TYPE mask = 0;
double res_ref[SIZE];
sign = -1;
for (i = 0; i < SIZE; i++)
{
src1.a[i] = 1.5 + 34.67 * i * sign;
src2.a[i] = -22.17 * i * sign + 1.0;
res1.a[i] = DEFAULT_VALUE;
res3.a[i] = DEFAULT_VALUE;
sign = sign * -1;
}
res1.x = _mm_mask_max_sd (res1.x, mask, src1.x, src2.x);
res2.x = _mm_maskz_max_sd (mask, src1.x, src2.x);
res3.x = _mm_mask_max_round_sd (res3.x, mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
res4.x = _mm_maskz_max_round_sd (mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
calc_max (res_ref, src1.a, src2.a);
MASK_MERGE (d) (res_ref, mask, 1);
if (check_union128d (res1, res_ref))
abort ();
MASK_ZERO (d) (res_ref, mask, 1);
if (check_union128d (res2, res_ref))
abort ();
calc_max (res_ref, src1.a, src2.a);
MASK_MERGE (d) (res_ref, mask, 1);
if (check_union128d (res3, res_ref))
abort ();
MASK_ZERO (d) (res_ref, mask, 1);
if (check_union128d (res4, res_ref))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O2 -mavx512f" } */
/* { dg-require-effective-target avx512f } */
#include "avx512f-check.h"
#include "avx512f-helper.h"
#define SIZE (128 / 32)
#include "avx512f-mask-type.h"
static void
calc_max (float *r, float *s1, float *s2)
{
r[0] = s1[0] > s2[0] ? s1[0] : s2[0];
int i;
for (i = 1; i < SIZE; i++)
{
r[i] = s1[i];
}
}
void
avx512f_test (void)
{
int i, sign;
union128 res1, res2, res3, res4, src1, src2;
MASK_TYPE mask = 0;
float res_ref[SIZE];
sign = -1;
for (i = 0; i < SIZE; i++)
{
src1.a[i] = 1.5 + 34.67 * i * sign;
src2.a[i] = -22.17 * i * sign + 1.0;
res1.a[i] = DEFAULT_VALUE;
res3.a[i] = DEFAULT_VALUE;
sign = sign * -1;
}
res1.x = _mm_mask_max_ss (res1.x, mask, src1.x, src2.x);
res2.x = _mm_maskz_max_ss (mask, src1.x, src2.x);
res3.x = _mm_mask_max_round_ss (res3.x, mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
res4.x = _mm_maskz_max_round_ss (mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
calc_max (res_ref, src1.a, src2.a);
MASK_MERGE () (res_ref, mask, 1);
if (check_union128 (res1, res_ref))
abort ();
MASK_ZERO () (res_ref, mask, 1);
if (check_union128 (res2, res_ref))
abort ();
calc_max (res_ref, src1.a, src2.a);
MASK_MERGE () (res_ref, mask, 1);
if (check_union128 (res3, res_ref))
abort ();
MASK_ZERO () (res_ref, mask, 1);
if (check_union128 (res4, res_ref))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O2 -mavx512f" } */
/* { dg-require-effective-target avx512f } */
#include "avx512f-check.h"
#include "avx512f-helper.h"
#define SIZE (128 / 64)
#include "avx512f-mask-type.h"
static void
calc_min (double *r, double *s1, double *s2)
{
r[0] = s1[0] < s2[0] ? s1[0] : s2[0];
r[1] = s1[1];
}
void
avx512f_test (void)
{
int i, sign;
union128d res1, res2, res3, res4, src1, src2;
MASK_TYPE mask = 0;
double res_ref[SIZE];
sign = -1;
for (i = 0; i < SIZE; i++)
{
src1.a[i] = 1.5 + 34.67 * i * sign;
src2.a[i] = -22.17 * i * sign + 1.0;
res1.a[i] = DEFAULT_VALUE;
res3.a[i] = DEFAULT_VALUE;
sign = sign * -1;
}
res1.x = _mm_mask_min_sd (res1.x, mask, src1.x, src2.x);
res2.x = _mm_maskz_min_sd (mask, src1.x, src2.x);
res3.x = _mm_mask_min_round_sd (res3.x, mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
res4.x = _mm_maskz_min_round_sd (mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
calc_min (res_ref, src1.a, src2.a);
MASK_MERGE (d) (res_ref, mask, 1);
if (check_union128d (res1, res_ref))
abort ();
MASK_ZERO (d) (res_ref, mask, 1);
if (check_union128d (res2, res_ref))
abort ();
calc_min (res_ref, src1.a, src2.a);
MASK_MERGE (d) (res_ref, mask, 1);
if (check_union128d (res3, res_ref))
abort ();
MASK_ZERO (d) (res_ref, mask, 1);
if (check_union128d (res4, res_ref))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O2 -mavx512f" } */
/* { dg-require-effective-target avx512f } */
#include "avx512f-check.h"
#include "avx512f-helper.h"
#define SIZE (128 / 32)
#include "avx512f-mask-type.h"
static void
calc_min (float *r, float *s1, float *s2)
{
r[0] = s1[0] < s2[0] ? s1[0] : s2[0];
int i;
for (i = 1; i < SIZE; i++)
{
r[i] = s1[i];
}
}
void
avx512f_test (void)
{
int i, sign;
union128 res1, res2, res3, res4, src1, src2;
MASK_TYPE mask = 0;
float res_ref[SIZE];
sign = -1;
for (i = 0; i < SIZE; i++)
{
src1.a[i] = 1.5 + 34.67 * i * sign;
src2.a[i] = -22.17 * i * sign + 1.0;
res1.a[i] = DEFAULT_VALUE;
res3.a[i] = DEFAULT_VALUE;
sign = sign * -1;
}
res1.x = _mm_mask_min_ss (res1.x, mask, src1.x, src2.x);
res2.x = _mm_maskz_min_ss (mask, src1.x, src2.x);
res3.x = _mm_mask_min_round_ss (res3.x, mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
res4.x = _mm_maskz_min_round_ss (mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
calc_min (res_ref, src1.a, src2.a);
MASK_MERGE () (res_ref, mask, 1);
if (check_union128 (res1, res_ref))
abort ();
MASK_ZERO () (res_ref, mask, 1);
if (check_union128 (res2, res_ref))
abort ();
calc_min (res_ref, src1.a, src2.a);
MASK_MERGE () (res_ref, mask, 1);
if (check_union128 (res3, res_ref))
abort ();
MASK_ZERO () (res_ref, mask, 1);
if (check_union128 (res4, res_ref))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O2 -mavx512f" } */
/* { dg-require-effective-target avx512f } */
#include "avx512f-check.h"
#include "avx512f-helper.h"
#define SIZE (128 / 64)
#include "avx512f-mask-type.h"
static void
calc_mul (double *r, double *s1, double *s2)
{
r[0] = s1[0] * s2[0];
r[1] = s1[1];
}
void
avx512f_test (void)
{
int i, sign;
union128d res1, res2, res3, res4, src1, src2;
MASK_TYPE mask = 0;
double res_ref[SIZE];
sign = -1;
for (i = 0; i < SIZE; i++)
{
src1.a[i] = 1.5 + 34.67 * i * sign;
src2.a[i] = -22.17 * i * sign + 1.0;
res1.a[i] = DEFAULT_VALUE;
res3.a[i] = DEFAULT_VALUE;
sign = sign * -1;
}
res1.x = _mm_mask_mul_sd (res1.x, mask, src1.x, src2.x);
res2.x = _mm_maskz_mul_sd (mask, src1.x, src2.x);
res3.x = _mm_mask_mul_round_sd (res3.x, mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
res4.x = _mm_maskz_mul_round_sd (mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
calc_mul (res_ref, src1.a, src2.a);
MASK_MERGE (d) (res_ref, mask, 1);
if (check_union128d (res1, res_ref))
abort ();
MASK_ZERO (d) (res_ref, mask, 1);
if (check_union128d (res2, res_ref))
abort ();
calc_mul (res_ref, src1.a, src2.a);
MASK_MERGE (d) (res_ref, mask, 1);
if (check_union128d (res3, res_ref))
abort ();
MASK_ZERO (d) (res_ref, mask, 1);
if (check_union128d (res4, res_ref))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O2 -mavx512f" } */
/* { dg-require-effective-target avx512f } */
#include "avx512f-check.h"
#include "avx512f-helper.h"
#define SIZE (128 / 32)
#include "avx512f-mask-type.h"
static void
calc_mul (float *r, float *s1, float *s2)
{
r[0] = s1[0] * s2[0];
int i;
for (i = 1; i < SIZE; i++)
r[i] = s1[i];
}
void
avx512f_test (void)
{
int i, sign;
union128 res1, res2, res3, res4, src1, src2;
MASK_TYPE mask = 0;
float res_ref[SIZE];
sign = -1;
for (i = 0; i < SIZE; i++)
{
src1.a[i] = 1.5 + 34.67 * i * sign;
src2.a[i] = -22.17 * i * sign + 1.0;
res1.a[i] = DEFAULT_VALUE;
res3.a[i] = DEFAULT_VALUE;
sign = sign * -1;
}
res1.x = _mm_mask_mul_ss (res1.x, mask, src1.x, src2.x);
res2.x = _mm_maskz_mul_ss (mask, src1.x, src2.x);
res3.x = _mm_mask_mul_round_ss (res3.x, mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
res4.x = _mm_maskz_mul_round_ss (mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
calc_mul (res_ref, src1.a, src2.a);
MASK_MERGE () (res_ref, mask, 1);
if (check_union128 (res1, res_ref))
abort ();
MASK_ZERO () (res_ref, mask, 1);
if (check_union128 (res2, res_ref))
abort ();
calc_mul (res_ref, src1.a, src2.a);
MASK_MERGE () (res_ref, mask, 1);
if (check_union128 (res3, res_ref))
abort ();
MASK_ZERO () (res_ref, mask, 1);
if (check_union128 (res4, res_ref))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O2 -mavx512f" } */
/* { dg-require-effective-target avx512f } */
#include "avx512f-check.h"
#include "avx512f-helper.h"
#define SIZE (128 / 64)
#include "avx512f-mask-type.h"
static void
calc_sub (double *r, double *s1, double *s2)
{
r[0] = s1[0] - s2[0];
r[1] = s1[1];
}
void
avx512f_test (void)
{
int i, sign;
union128d res1, res2, res3, res4, src1, src2;
MASK_TYPE mask = 0;
double res_ref[SIZE];
sign = -1;
for (i = 0; i < SIZE; i++)
{
src1.a[i] = 1.5 + 34.67 * i * sign;
src2.a[i] = -22.17 * i * sign + 1.0;
res1.a[i] = DEFAULT_VALUE;
res3.a[i] = DEFAULT_VALUE;
sign = sign * -1;
}
res1.x = _mm_mask_sub_sd (res1.x, mask, src1.x, src2.x);
res2.x = _mm_maskz_sub_sd (mask, src1.x, src2.x);
res3.x = _mm_mask_sub_round_sd (res3.x, mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
res4.x = _mm_maskz_sub_round_sd (mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
calc_sub (res_ref, src1.a, src2.a);
MASK_MERGE (d) (res_ref, mask, 1);
if (check_union128d (res1, res_ref))
abort ();
MASK_ZERO (d) (res_ref, mask, 1);
if (check_union128d (res2, res_ref))
abort ();
calc_sub (res_ref, src1.a, src2.a);
MASK_MERGE (d) (res_ref, mask, 1);
if (check_union128d (res3, res_ref))
abort ();
MASK_ZERO (d) (res_ref, mask, 1);
if (check_union128d (res4, res_ref))
abort ();
}
/* { dg-do run } */
/* { dg-options "-O2 -mavx512f" } */
/* { dg-require-effective-target avx512f } */
#include "avx512f-check.h"
#include "avx512f-helper.h"
#define SIZE (128 / 32)
#include "avx512f-mask-type.h"
static void
calc_sub (float *r, float *s1, float *s2)
{
r[0] = s1[0] - s2[0];
int i;
for (i = 1; i < SIZE; i++)
r[i] = s1[i];
}
void
avx512f_test (void)
{
int i, sign;
union128 res1, res2, res3, res4, src1, src2;
MASK_TYPE mask = 0;
float res_ref[SIZE];
sign = -1;
for (i = 0; i < SIZE; i++)
{
src1.a[i] = 1.5 + 34.67 * i * sign;
src2.a[i] = -22.17 * i * sign + 1.0;
res1.a[i] = DEFAULT_VALUE;
res3.a[i] = DEFAULT_VALUE;
sign = sign * -1;
}
res1.x = _mm_mask_sub_ss (res1.x, mask, src1.x, src2.x);
res2.x = _mm_maskz_sub_ss (mask, src1.x, src2.x);
res3.x = _mm_mask_sub_round_ss (res3.x, mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
res4.x = _mm_maskz_sub_round_ss (mask, src1.x, src2.x, _MM_FROUND_NO_EXC);
calc_sub (res_ref, src1.a, src2.a);
MASK_MERGE () (res_ref, mask, 1);
if (check_union128 (res1, res_ref))
abort ();
MASK_ZERO () (res_ref, mask, 1);
if (check_union128 (res2, res_ref))
abort ();
calc_sub (res_ref, src1.a, src2.a);
MASK_MERGE () (res_ref, mask, 1);
if (check_union128 (res3, res_ref))
abort ();
MASK_ZERO () (res_ref, mask, 1);
if (check_union128 (res4, res_ref))
abort ();
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment