Commit 5358e8f5 by H.J. Lu

i386: Properly encode vector registers in vector move

On x86, when AVX and AVX512 are enabled, vector move instructions can
be encoded with either 2-byte/3-byte VEX (AVX) or 4-byte EVEX (AVX512):

   0:	c5 f9 6f d1          	vmovdqa %xmm1,%xmm2
   4:	62 f1 fd 08 6f d1    	vmovdqa64 %xmm1,%xmm2

We prefer VEX encoding over EVEX since VEX is shorter.  Also AVX512F
only supports 512-bit vector moves.  AVX512F + AVX512VL supports 128-bit
and 256-bit vector moves.  xmm16-xmm31 and ymm16-ymm31 are disallowed in
128-bit and 256-bit modes when AVX512VL is disabled.  Mode attributes on
x86 vector move patterns indicate target preferences of vector move
encoding.  For scalar register to register move, we can use 512-bit
vector move instructions to move 32-bit/64-bit scalar if AVX512VL isn't
available.  With AVX512F and AVX512VL, we should use VEX encoding for
128-bit/256-bit vector moves if upper 16 vector registers aren't used.
This patch adds a function, ix86_output_ssemov, to generate vector moves:

1. If zmm registers are used, use EVEX encoding.
2. If xmm16-xmm31/ymm16-ymm31 registers aren't used, SSE or VEX encoding
will be generated.
3. If xmm16-xmm31/ymm16-ymm31 registers are used:
   a. With AVX512VL, AVX512VL vector moves will be generated.
   b. Without AVX512VL, xmm16-xmm31/ymm16-ymm31 register to register
      move will be done with zmm register move.

There is no need to set mode attribute to XImode explicitly since
ix86_output_ssemov can properly encode xmm16-xmm31/ymm16-ymm31 registers
with and without AVX512VL.

Tested on AVX2 and AVX512 with and without --with-arch=native.

gcc/

	PR target/89229
	PR target/89346
	* config/i386/i386-protos.h (ix86_output_ssemov): New prototype.
	* config/i386/i386.c (ix86_get_ssemov): New function.
	(ix86_output_ssemov): Likewise.
	* config/i386/sse.md (VMOVE:mov<mode>_internal): Call
	ix86_output_ssemov for TYPE_SSEMOV.  Remove TARGET_AVX512VL
	check.
	(*movxi_internal_avx512f): Call ix86_output_ssemov for TYPE_SSEMOV.
	(*movoi_internal_avx): Call ix86_output_ssemov for TYPE_SSEMOV.
	Remove ext_sse_reg_operand and TARGET_AVX512VL check.
	(*movti_internal): Likewise.
	(*movtf_internal): Call ix86_output_ssemov for TYPE_SSEMOV.

gcc/testsuite/

	PR target/89229
	PR target/89346
	* gcc.target/i386/avx512vl-vmovdqa64-1.c: Updated.
	* gcc.target/i386/pr89229-2a.c: New test.
	* gcc.target/i386/pr89229-2b.c: Likewise.
	* gcc.target/i386/pr89229-2c.c: Likewise.
	* gcc.target/i386/pr89229-3a.c: Likewise.
	* gcc.target/i386/pr89229-3b.c: Likewise.
	* gcc.target/i386/pr89229-3c.c: Likewise.
	* gcc.target/i386/pr89346.c: Likewise.
parent 34ec7d53
2020-03-05 H.J. Lu <hongjiu.lu@intel.com>
PR target/89229
PR target/89346
* config/i386/i386-protos.h (ix86_output_ssemov): New prototype.
* config/i386/i386.c (ix86_get_ssemov): New function.
(ix86_output_ssemov): Likewise.
* config/i386/sse.md (VMOVE:mov<mode>_internal): Call
ix86_output_ssemov for TYPE_SSEMOV. Remove TARGET_AVX512VL
check.
(*movxi_internal_avx512f): Call ix86_output_ssemov for TYPE_SSEMOV.
(*movoi_internal_avx): Call ix86_output_ssemov for TYPE_SSEMOV.
Remove ext_sse_reg_operand and TARGET_AVX512VL check.
(*movti_internal): Likewise.
(*movtf_internal): Call ix86_output_ssemov for TYPE_SSEMOV.
2020-03-05 Jeff Law <law@redhat.com> 2020-03-05 Jeff Law <law@redhat.com>
PR tree-optimization/91890 PR tree-optimization/91890
......
...@@ -38,6 +38,8 @@ extern void ix86_expand_split_stack_prologue (void); ...@@ -38,6 +38,8 @@ extern void ix86_expand_split_stack_prologue (void);
extern void ix86_output_addr_vec_elt (FILE *, int); extern void ix86_output_addr_vec_elt (FILE *, int);
extern void ix86_output_addr_diff_elt (FILE *, int, int); extern void ix86_output_addr_diff_elt (FILE *, int, int);
extern const char *ix86_output_ssemov (rtx_insn *, rtx *);
extern enum calling_abi ix86_cfun_abi (void); extern enum calling_abi ix86_cfun_abi (void);
extern enum calling_abi ix86_function_type_abi (const_tree); extern enum calling_abi ix86_function_type_abi (const_tree);
......
...@@ -4915,6 +4915,214 @@ ix86_pre_reload_split (void) ...@@ -4915,6 +4915,214 @@ ix86_pre_reload_split (void)
&& !(cfun->curr_properties & PROP_rtl_split_insns)); && !(cfun->curr_properties & PROP_rtl_split_insns));
} }
/* Return the opcode of the TYPE_SSEMOV instruction. To move from
or to xmm16-xmm31/ymm16-ymm31 registers, we either require
TARGET_AVX512VL or it is a register to register move which can
be done with zmm register move. */
static const char *
ix86_get_ssemov (rtx *operands, unsigned size,
enum attr_mode insn_mode, machine_mode mode)
{
char buf[128];
bool misaligned_p = (misaligned_operand (operands[0], mode)
|| misaligned_operand (operands[1], mode));
bool evex_reg_p = (size == 64
|| EXT_REX_SSE_REG_P (operands[0])
|| EXT_REX_SSE_REG_P (operands[1]));
machine_mode scalar_mode;
const char *opcode = NULL;
enum
{
opcode_int,
opcode_float,
opcode_double
} type = opcode_int;
switch (insn_mode)
{
case MODE_V16SF:
case MODE_V8SF:
case MODE_V4SF:
scalar_mode = E_SFmode;
type = opcode_float;
break;
case MODE_V8DF:
case MODE_V4DF:
case MODE_V2DF:
scalar_mode = E_DFmode;
type = opcode_double;
break;
case MODE_XI:
case MODE_OI:
case MODE_TI:
scalar_mode = GET_MODE_INNER (mode);
break;
default:
gcc_unreachable ();
}
/* NB: To move xmm16-xmm31/ymm16-ymm31 registers without AVX512VL,
we can only use zmm register move without memory operand. */
if (evex_reg_p
&& !TARGET_AVX512VL
&& GET_MODE_SIZE (mode) < 64)
{
/* NB: Since ix86_hard_regno_mode_ok only allows xmm16-xmm31 or
ymm16-ymm31 in 128/256 bit modes when AVX512VL is enabled,
we get here only for xmm16-xmm31 or ymm16-ymm31 in 32/64 bit
modes. */
if (GET_MODE_SIZE (mode) >= 16
|| memory_operand (operands[0], mode)
|| memory_operand (operands[1], mode))
gcc_unreachable ();
size = 64;
switch (type)
{
case opcode_int:
opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
break;
case opcode_float:
opcode = misaligned_p ? "vmovups" : "vmovaps";
break;
case opcode_double:
opcode = misaligned_p ? "vmovupd" : "vmovapd";
break;
}
}
else if (SCALAR_FLOAT_MODE_P (scalar_mode))
{
switch (scalar_mode)
{
case E_SFmode:
opcode = misaligned_p ? "%vmovups" : "%vmovaps";
break;
case E_DFmode:
opcode = misaligned_p ? "%vmovupd" : "%vmovapd";
break;
case E_TFmode:
if (evex_reg_p)
opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
else
opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
break;
default:
gcc_unreachable ();
}
}
else if (SCALAR_INT_MODE_P (scalar_mode))
{
switch (scalar_mode)
{
case E_QImode:
if (evex_reg_p)
opcode = (misaligned_p
? (TARGET_AVX512BW
? "vmovdqu8"
: "vmovdqu64")
: "vmovdqa64");
else
opcode = (misaligned_p
? (TARGET_AVX512BW
? "vmovdqu8"
: "%vmovdqu")
: "%vmovdqa");
break;
case E_HImode:
if (evex_reg_p)
opcode = (misaligned_p
? (TARGET_AVX512BW
? "vmovdqu16"
: "vmovdqu64")
: "vmovdqa64");
else
opcode = (misaligned_p
? (TARGET_AVX512BW
? "vmovdqu16"
: "%vmovdqu")
: "%vmovdqa");
break;
case E_SImode:
if (evex_reg_p)
opcode = misaligned_p ? "vmovdqu32" : "vmovdqa32";
else
opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
break;
case E_DImode:
case E_TImode:
case E_OImode:
if (evex_reg_p)
opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
else
opcode = misaligned_p ? "%vmovdqu" : "%vmovdqa";
break;
case E_XImode:
opcode = misaligned_p ? "vmovdqu64" : "vmovdqa64";
break;
default:
gcc_unreachable ();
}
}
else
gcc_unreachable ();
switch (size)
{
case 64:
snprintf (buf, sizeof (buf), "%s\t{%%g1, %%g0|%%g0, %%g1}",
opcode);
break;
case 32:
snprintf (buf, sizeof (buf), "%s\t{%%t1, %%t0|%%t0, %%t1}",
opcode);
break;
case 16:
snprintf (buf, sizeof (buf), "%s\t{%%x1, %%x0|%%x0, %%x1}",
opcode);
break;
default:
gcc_unreachable ();
}
output_asm_insn (buf, operands);
return "";
}
/* Return the template of the TYPE_SSEMOV instruction to move
operands[1] into operands[0]. */
const char *
ix86_output_ssemov (rtx_insn *insn, rtx *operands)
{
machine_mode mode = GET_MODE (operands[0]);
if (get_attr_type (insn) != TYPE_SSEMOV
|| mode != GET_MODE (operands[1]))
gcc_unreachable ();
enum attr_mode insn_mode = get_attr_mode (insn);
switch (insn_mode)
{
case MODE_XI:
case MODE_V8DF:
case MODE_V16SF:
return ix86_get_ssemov (operands, 64, insn_mode, mode);
case MODE_OI:
case MODE_V4DF:
case MODE_V8SF:
return ix86_get_ssemov (operands, 32, insn_mode, mode);
case MODE_TI:
case MODE_V2DF:
case MODE_V4SF:
return ix86_get_ssemov (operands, 16, insn_mode, mode);
default:
gcc_unreachable ();
}
}
/* Returns true if OP contains a symbol reference */ /* Returns true if OP contains a symbol reference */
bool bool
......
...@@ -1902,11 +1902,7 @@ ...@@ -1902,11 +1902,7 @@
return standard_sse_constant_opcode (insn, operands); return standard_sse_constant_opcode (insn, operands);
case TYPE_SSEMOV: case TYPE_SSEMOV:
if (misaligned_operand (operands[0], XImode) return ix86_output_ssemov (insn, operands);
|| misaligned_operand (operands[1], XImode))
return "vmovdqu32\t{%1, %0|%0, %1}";
else
return "vmovdqa32\t{%1, %0|%0, %1}";
default: default:
gcc_unreachable (); gcc_unreachable ();
...@@ -1929,21 +1925,7 @@ ...@@ -1929,21 +1925,7 @@
return standard_sse_constant_opcode (insn, operands); return standard_sse_constant_opcode (insn, operands);
case TYPE_SSEMOV: case TYPE_SSEMOV:
if (misaligned_operand (operands[0], OImode) return ix86_output_ssemov (insn, operands);
|| misaligned_operand (operands[1], OImode))
{
if (get_attr_mode (insn) == MODE_XI)
return "vmovdqu32\t{%1, %0|%0, %1}";
else
return "vmovdqu\t{%1, %0|%0, %1}";
}
else
{
if (get_attr_mode (insn) == MODE_XI)
return "vmovdqa32\t{%1, %0|%0, %1}";
else
return "vmovdqa\t{%1, %0|%0, %1}";
}
default: default:
gcc_unreachable (); gcc_unreachable ();
...@@ -1952,15 +1934,7 @@ ...@@ -1952,15 +1934,7 @@
[(set_attr "isa" "*,avx2,*,*") [(set_attr "isa" "*,avx2,*,*")
(set_attr "type" "sselog1,sselog1,ssemov,ssemov") (set_attr "type" "sselog1,sselog1,ssemov,ssemov")
(set_attr "prefix" "vex") (set_attr "prefix" "vex")
(set (attr "mode") (set_attr "mode" "OI")])
(cond [(ior (match_operand 0 "ext_sse_reg_operand")
(match_operand 1 "ext_sse_reg_operand"))
(const_string "XI")
(and (eq_attr "alternative" "1")
(match_test "TARGET_AVX512VL"))
(const_string "XI")
]
(const_string "OI")))])
(define_insn "*movti_internal" (define_insn "*movti_internal"
[(set (match_operand:TI 0 "nonimmediate_operand" "=!r ,o ,v,v ,v ,m,?r,?Yd") [(set (match_operand:TI 0 "nonimmediate_operand" "=!r ,o ,v,v ,v ,m,?r,?Yd")
...@@ -1981,27 +1955,7 @@ ...@@ -1981,27 +1955,7 @@
return standard_sse_constant_opcode (insn, operands); return standard_sse_constant_opcode (insn, operands);
case TYPE_SSEMOV: case TYPE_SSEMOV:
/* TDmode values are passed as TImode on the stack. Moving them return ix86_output_ssemov (insn, operands);
to stack may result in unaligned memory access. */
if (misaligned_operand (operands[0], TImode)
|| misaligned_operand (operands[1], TImode))
{
if (get_attr_mode (insn) == MODE_V4SF)
return "%vmovups\t{%1, %0|%0, %1}";
else if (get_attr_mode (insn) == MODE_XI)
return "vmovdqu32\t{%1, %0|%0, %1}";
else
return "%vmovdqu\t{%1, %0|%0, %1}";
}
else
{
if (get_attr_mode (insn) == MODE_V4SF)
return "%vmovaps\t{%1, %0|%0, %1}";
else if (get_attr_mode (insn) == MODE_XI)
return "vmovdqa32\t{%1, %0|%0, %1}";
else
return "%vmovdqa\t{%1, %0|%0, %1}";
}
default: default:
gcc_unreachable (); gcc_unreachable ();
...@@ -2028,12 +1982,6 @@ ...@@ -2028,12 +1982,6 @@
(set (attr "mode") (set (attr "mode")
(cond [(eq_attr "alternative" "0,1") (cond [(eq_attr "alternative" "0,1")
(const_string "DI") (const_string "DI")
(ior (match_operand 0 "ext_sse_reg_operand")
(match_operand 1 "ext_sse_reg_operand"))
(const_string "XI")
(and (eq_attr "alternative" "3")
(match_test "TARGET_AVX512VL"))
(const_string "XI")
(match_test "TARGET_AVX") (match_test "TARGET_AVX")
(const_string "TI") (const_string "TI")
(ior (not (match_test "TARGET_SSE2")) (ior (not (match_test "TARGET_SSE2"))
...@@ -3254,31 +3202,7 @@ ...@@ -3254,31 +3202,7 @@
return standard_sse_constant_opcode (insn, operands); return standard_sse_constant_opcode (insn, operands);
case TYPE_SSEMOV: case TYPE_SSEMOV:
/* Handle misaligned load/store since we return ix86_output_ssemov (insn, operands);
don't have movmisaligntf pattern. */
if (misaligned_operand (operands[0], TFmode)
|| misaligned_operand (operands[1], TFmode))
{
if (get_attr_mode (insn) == MODE_V4SF)
return "%vmovups\t{%1, %0|%0, %1}";
else if (TARGET_AVX512VL
&& (EXT_REX_SSE_REG_P (operands[0])
|| EXT_REX_SSE_REG_P (operands[1])))
return "vmovdqu64\t{%1, %0|%0, %1}";
else
return "%vmovdqu\t{%1, %0|%0, %1}";
}
else
{
if (get_attr_mode (insn) == MODE_V4SF)
return "%vmovaps\t{%1, %0|%0, %1}";
else if (TARGET_AVX512VL
&& (EXT_REX_SSE_REG_P (operands[0])
|| EXT_REX_SSE_REG_P (operands[1])))
return "vmovdqa64\t{%1, %0|%0, %1}";
else
return "%vmovdqa\t{%1, %0|%0, %1}";
}
case TYPE_MULTI: case TYPE_MULTI:
return "#"; return "#";
......
...@@ -1013,98 +1013,7 @@ ...@@ -1013,98 +1013,7 @@
return standard_sse_constant_opcode (insn, operands); return standard_sse_constant_opcode (insn, operands);
case TYPE_SSEMOV: case TYPE_SSEMOV:
/* There is no evex-encoded vmov* for sizes smaller than 64-bytes return ix86_output_ssemov (insn, operands);
in avx512f, so we need to use workarounds, to access sse registers
16-31, which are evex-only. In avx512vl we don't need workarounds. */
if (TARGET_AVX512F && <MODE_SIZE> < 64 && !TARGET_AVX512VL
&& (EXT_REX_SSE_REG_P (operands[0])
|| EXT_REX_SSE_REG_P (operands[1])))
{
if (memory_operand (operands[0], <MODE>mode))
{
if (<MODE_SIZE> == 32)
return "vextract<shuffletype>64x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
else if (<MODE_SIZE> == 16)
return "vextract<shuffletype>32x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
else
gcc_unreachable ();
}
else if (memory_operand (operands[1], <MODE>mode))
{
if (<MODE_SIZE> == 32)
return "vbroadcast<shuffletype>64x4\t{%1, %g0|%g0, %1}";
else if (<MODE_SIZE> == 16)
return "vbroadcast<shuffletype>32x4\t{%1, %g0|%g0, %1}";
else
gcc_unreachable ();
}
else
/* Reg -> reg move is always aligned. Just use wider move. */
switch (get_attr_mode (insn))
{
case MODE_V8SF:
case MODE_V4SF:
return "vmovaps\t{%g1, %g0|%g0, %g1}";
case MODE_V4DF:
case MODE_V2DF:
return "vmovapd\t{%g1, %g0|%g0, %g1}";
case MODE_OI:
case MODE_TI:
return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
default:
gcc_unreachable ();
}
}
switch (get_attr_mode (insn))
{
case MODE_V16SF:
case MODE_V8SF:
case MODE_V4SF:
if (misaligned_operand (operands[0], <MODE>mode)
|| misaligned_operand (operands[1], <MODE>mode))
return "%vmovups\t{%1, %0|%0, %1}";
else
return "%vmovaps\t{%1, %0|%0, %1}";
case MODE_V8DF:
case MODE_V4DF:
case MODE_V2DF:
if (misaligned_operand (operands[0], <MODE>mode)
|| misaligned_operand (operands[1], <MODE>mode))
return "%vmovupd\t{%1, %0|%0, %1}";
else
return "%vmovapd\t{%1, %0|%0, %1}";
case MODE_OI:
case MODE_TI:
if (misaligned_operand (operands[0], <MODE>mode)
|| misaligned_operand (operands[1], <MODE>mode))
return TARGET_AVX512VL
&& (<MODE>mode == V4SImode
|| <MODE>mode == V2DImode
|| <MODE>mode == V8SImode
|| <MODE>mode == V4DImode
|| TARGET_AVX512BW)
? "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
: "%vmovdqu\t{%1, %0|%0, %1}";
else
return TARGET_AVX512VL ? "vmovdqa64\t{%1, %0|%0, %1}"
: "%vmovdqa\t{%1, %0|%0, %1}";
case MODE_XI:
if (misaligned_operand (operands[0], <MODE>mode)
|| misaligned_operand (operands[1], <MODE>mode))
return (<MODE>mode == V16SImode
|| <MODE>mode == V8DImode
|| TARGET_AVX512BW)
? "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
: "vmovdqu64\t{%1, %0|%0, %1}";
else
return "vmovdqa64\t{%1, %0|%0, %1}";
default:
gcc_unreachable ();
}
default: default:
gcc_unreachable (); gcc_unreachable ();
...@@ -1113,10 +1022,7 @@ ...@@ -1113,10 +1022,7 @@
[(set_attr "type" "sselog1,sselog1,ssemov,ssemov") [(set_attr "type" "sselog1,sselog1,ssemov,ssemov")
(set_attr "prefix" "maybe_vex") (set_attr "prefix" "maybe_vex")
(set (attr "mode") (set (attr "mode")
(cond [(and (eq_attr "alternative" "1") (cond [(match_test "TARGET_AVX")
(match_test "TARGET_AVX512VL"))
(const_string "<sseinsnmode>")
(match_test "TARGET_AVX")
(const_string "<sseinsnmode>") (const_string "<sseinsnmode>")
(ior (not (match_test "TARGET_SSE2")) (ior (not (match_test "TARGET_SSE2"))
(match_test "optimize_function_for_size_p (cfun)")) (match_test "optimize_function_for_size_p (cfun)"))
......
2020-03-05 H.J. Lu <hongjiu.lu@intel.com>
PR target/89229
PR target/89346
* gcc.target/i386/avx512vl-vmovdqa64-1.c: Updated.
* gcc.target/i386/pr89229-2a.c: New test.
* gcc.target/i386/pr89229-2b.c: Likewise.
* gcc.target/i386/pr89229-2c.c: Likewise.
* gcc.target/i386/pr89229-3a.c: Likewise.
* gcc.target/i386/pr89229-3b.c: Likewise.
* gcc.target/i386/pr89229-3c.c: Likewise.
* gcc.target/i386/pr89346.c: Likewise.
2020-03-05 Andre Vieira <andre.simoesdiasvieira@arm.com> 2020-03-05 Andre Vieira <andre.simoesdiasvieira@arm.com>
* g++.dg/pr80481.C: Disable epilogue vectorization. * g++.dg/pr80481.C: Disable epilogue vectorization.
......
...@@ -4,14 +4,13 @@ ...@@ -4,14 +4,13 @@
/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\\(\[^\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { target nonpic } } } */ /* { dg-final { scan-assembler-times "vmovdqa\[ \\t\]+\\(\[^\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { target nonpic } } } */
/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\\(\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { target nonpic } } } */ /* { dg-final { scan-assembler-times "vmovdqa\[ \\t\]+\\(\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { target nonpic } } } */
/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*\\)\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\nxy\]*\\(.{5,6}(?:\n|\[ \\t\]+#)" 1 { target nonpic } } } */ /* { dg-final { scan-assembler-times "vmovdqa\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\nxy\]*\\(.{5,6}(?:\n|\[ \\t\]+#)" 1 { target nonpic } } } */
/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\nxy\]*\\((?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*\\)\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\]*\\)\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*\\)\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */ /* { dg-final { scan-assembler-times "vmovdqa64\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*\\)\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)" 1 } } */
......
/* { dg-do compile { target { ! ia32 } } } */
/* { dg-options "-O2 -march=skylake-avx512" } */
typedef __int128 __m128t __attribute__ ((__vector_size__ (16),
__may_alias__));
__m128t
foo1 (void)
{
register __int128 xmm16 __asm ("xmm16") = (__int128) -1;
asm volatile ("" : "+v" (xmm16));
return (__m128t) xmm16;
}
/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */
/* { dg-do compile { target { ! ia32 } } } */
/* { dg-options "-O2 -march=skylake-avx512 -mno-avx512vl" } */
typedef __int128 __m128t __attribute__ ((__vector_size__ (16),
__may_alias__));
__m128t
foo1 (void)
{
register __int128 xmm16 __asm ("xmm16") = (__int128) -1; /* { dg-error "register specified for 'xmm16'" } */
asm volatile ("" : "+v" (xmm16));
return (__m128t) xmm16;
}
/* { dg-do compile { target { ! ia32 } } } */
/* { dg-options "-O2 -march=skylake-avx512 -mprefer-vector-width=512" } */
#include "pr89229-2a.c"
/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */
/* { dg-do compile { target { ! ia32 } } } */
/* { dg-options "-O2 -march=skylake-avx512" } */
extern __float128 d;
void
foo1 (__float128 x)
{
register __float128 xmm16 __asm ("xmm16") = x;
asm volatile ("" : "+v" (xmm16));
register __float128 xmm17 __asm ("xmm17") = xmm16;
asm volatile ("" : "+v" (xmm17));
d = xmm17;
}
/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */
/* { dg-do compile { target { ! ia32 } } } */
/* { dg-options "-O2 -march=skylake-avx512 -mno-avx512vl" } */
extern __float128 d;
void
foo1 (__float128 x)
{
register __float128 xmm16 __asm ("xmm16") = x; /* { dg-error "register specified for 'xmm16'" } */
asm volatile ("" : "+v" (xmm16));
d = xmm16;
}
/* { dg-do compile { target { ! ia32 } } } */
/* { dg-options "-O2 -march=skylake-avx512 -mprefer-vector-width=512" } */
#include "pr89229-5a.c"
/* { dg-final { scan-assembler-not "%zmm\[0-9\]+" } } */
/* { dg-do compile } */
/* { dg-options "-O2 -march=skylake-avx512" } */
#include <immintrin.h>
long long *p;
volatile __m256i y;
void
foo (void)
{
_mm256_store_epi64 (p, y);
}
/* { dg-final { scan-assembler-not "vmovdqa64" } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment