Commit 1ad6e904 by Jakub Jelinek Committed by Jakub Jelinek

re PR target/83203 (Inefficient int to avx2 vector conversion)

	PR target/83203
	* config/i386/i386.c (ix86_expand_vector_init_one_nonzero): If one_var
	is 0, for V{8,16}S[IF] and V[48]D[IF]mode use gen_vec_set<mode>_0.
	* config/i386/sse.md (VI8_AVX_AVX512F, VI4F_256_512): New mode
	iterators.
	(ssescalarmodesuffix): Add 512-bit vectors.  Use "d" or "q" for
	integral modes instead of "ss" and "sd".
	(vec_set<mode>_0): New define_insns for 256-bit and 512-bit
	vectors with 32-bit and 64-bit elements.
	(vecdupssescalarmodesuffix): New mode attribute.
	(vec_dup<mode>): Use it.

From-SVN: r256556
parent c7a61831
2018-01-11 Jakub Jelinek <jakub@redhat.com>
PR target/83203
* config/i386/i386.c (ix86_expand_vector_init_one_nonzero): If one_var
is 0, for V{8,16}S[IF] and V[48]D[IF]mode use gen_vec_set<mode>_0.
* config/i386/sse.md (VI8_AVX_AVX512F, VI4F_256_512): New mode
iterators.
(ssescalarmodesuffix): Add 512-bit vectors. Use "d" or "q" for
integral modes instead of "ss" and "sd".
(vec_set<mode>_0): New define_insns for 256-bit and 512-bit
vectors with 32-bit and 64-bit elements.
(vecdupssescalarmodesuffix): New mode attribute.
(vec_dup<mode>): Use it.
2018-01-11 H.J. Lu <hongjiu.lu@intel.com> 2018-01-11 H.J. Lu <hongjiu.lu@intel.com>
PR target/83330 PR target/83330
......
...@@ -41767,6 +41767,7 @@ ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode, ...@@ -41767,6 +41767,7 @@ ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
rtx new_target; rtx new_target;
rtx x, tmp; rtx x, tmp;
bool use_vector_set = false; bool use_vector_set = false;
rtx (*gen_vec_set_0) (rtx, rtx, rtx) = NULL;
switch (mode) switch (mode)
{ {
...@@ -41791,14 +41792,41 @@ ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode, ...@@ -41791,14 +41792,41 @@ ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
break; break;
case E_V32QImode: case E_V32QImode:
case E_V16HImode: case E_V16HImode:
use_vector_set = TARGET_AVX;
break;
case E_V8SImode: case E_V8SImode:
use_vector_set = TARGET_AVX;
gen_vec_set_0 = gen_vec_setv8si_0;
break;
case E_V8SFmode: case E_V8SFmode:
use_vector_set = TARGET_AVX;
gen_vec_set_0 = gen_vec_setv8sf_0;
break;
case E_V4DFmode: case E_V4DFmode:
use_vector_set = TARGET_AVX; use_vector_set = TARGET_AVX;
gen_vec_set_0 = gen_vec_setv4df_0;
break; break;
case E_V4DImode: case E_V4DImode:
/* Use ix86_expand_vector_set in 64bit mode only. */ /* Use ix86_expand_vector_set in 64bit mode only. */
use_vector_set = TARGET_AVX && TARGET_64BIT; use_vector_set = TARGET_AVX && TARGET_64BIT;
gen_vec_set_0 = gen_vec_setv4di_0;
break;
case E_V16SImode:
use_vector_set = TARGET_AVX512F && one_var == 0;
gen_vec_set_0 = gen_vec_setv16si_0;
break;
case E_V16SFmode:
use_vector_set = TARGET_AVX512F && one_var == 0;
gen_vec_set_0 = gen_vec_setv16sf_0;
break;
case E_V8DFmode:
use_vector_set = TARGET_AVX512F && one_var == 0;
gen_vec_set_0 = gen_vec_setv8df_0;
break;
case E_V8DImode:
/* Use ix86_expand_vector_set in 64bit mode only. */
use_vector_set = TARGET_AVX512F && TARGET_64BIT && one_var == 0;
gen_vec_set_0 = gen_vec_setv8di_0;
break; break;
default: default:
break; break;
...@@ -41806,6 +41834,12 @@ ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode, ...@@ -41806,6 +41834,12 @@ ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
if (use_vector_set) if (use_vector_set)
{ {
if (gen_vec_set_0 && one_var == 0)
{
var = force_reg (GET_MODE_INNER (mode), var);
emit_insn (gen_vec_set_0 (target, CONST0_RTX (mode), var));
return true;
}
emit_insn (gen_rtx_SET (target, CONST0_RTX (mode))); emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
var = force_reg (GET_MODE_INNER (mode), var); var = force_reg (GET_MODE_INNER (mode), var);
ix86_expand_vector_set (mmx_ok, target, var, one_var); ix86_expand_vector_set (mmx_ok, target, var, one_var);
...@@ -401,6 +401,9 @@ ...@@ -401,6 +401,9 @@
(define_mode_iterator VI8_AVX2_AVX512F (define_mode_iterator VI8_AVX2_AVX512F
[(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI]) [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
(define_mode_iterator VI8_AVX_AVX512F
[(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX")])
(define_mode_iterator VI4_128_8_256 (define_mode_iterator VI4_128_8_256
[V4SI V4DI]) [V4SI V4DI])
...@@ -622,6 +625,9 @@ ...@@ -622,6 +625,9 @@
(define_mode_iterator VI8F_128 [V2DI V2DF]) (define_mode_iterator VI8F_128 [V2DI V2DF])
(define_mode_iterator VI4F_256 [V8SI V8SF]) (define_mode_iterator VI4F_256 [V8SI V8SF])
(define_mode_iterator VI8F_256 [V4DI V4DF]) (define_mode_iterator VI8F_256 [V4DI V4DF])
(define_mode_iterator VI4F_256_512
[V8SI V8SF
(V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")])
(define_mode_iterator VI48F_256_512 (define_mode_iterator VI48F_256_512
[V8SI V8SF [V8SI V8SF
(V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
...@@ -838,10 +844,12 @@ ...@@ -838,10 +844,12 @@
;; SSE scalar suffix for vector modes ;; SSE scalar suffix for vector modes
(define_mode_attr ssescalarmodesuffix (define_mode_attr ssescalarmodesuffix
[(SF "ss") (DF "sd") [(SF "ss") (DF "sd")
(V16SF "ss") (V8DF "sd")
(V8SF "ss") (V4DF "sd") (V8SF "ss") (V4DF "sd")
(V4SF "ss") (V2DF "sd") (V4SF "ss") (V2DF "sd")
(V8SI "ss") (V4DI "sd") (V16SI "d") (V8DI "q")
(V4SI "d")]) (V8SI "d") (V4DI "q")
(V4SI "d") (V2DI "q")])
;; Pack/unpack vector modes ;; Pack/unpack vector modes
(define_mode_attr sseunpackmode (define_mode_attr sseunpackmode
...@@ -7092,6 +7100,26 @@ ...@@ -7092,6 +7100,26 @@
(set_attr "prefix" "orig,orig,maybe_evex") (set_attr "prefix" "orig,orig,maybe_evex")
(set_attr "mode" "V4SF")]) (set_attr "mode" "V4SF")])
;; All of vinsertps, vmovss, vmovd clear also the higher bits.
(define_insn "vec_set<mode>_0"
[(set (match_operand:VI4F_256_512 0 "register_operand" "=v,v,Yi")
(vec_merge:VI4F_256_512
(vec_duplicate:VI4F_256_512
(match_operand:<ssescalarmode> 2 "general_operand" "v,m,r"))
(match_operand:VI4F_256_512 1 "const0_operand" "C,C,C")
(const_int 1)))]
"TARGET_AVX"
"@
vinsertps\t{$0xe, %2, %2, %x0|%x0, %2, %2, 0xe}
vmov<ssescalarmodesuffix>\t{%x2, %x0|%x0, %2}
vmovd\t{%2, %x0|%x0, %2}"
[(set (attr "type")
(if_then_else (eq_attr "alternative" "0")
(const_string "sselog")
(const_string "ssemov")))
(set_attr "prefix" "maybe_evex")
(set_attr "mode" "SF,<ssescalarmode>,SI")])
(define_insn "sse4_1_insertps" (define_insn "sse4_1_insertps"
[(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,v") [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,v")
(unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "Yrm,*xm,vm") (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "Yrm,*xm,vm")
...@@ -9220,6 +9248,20 @@ ...@@ -9220,6 +9248,20 @@
(const_string "orig"))) (const_string "orig")))
(set_attr "mode" "V2DF,V2DF,V2DF, DF, DF, V1DF,V1DF,DF,V4SF,V2SF")]) (set_attr "mode" "V2DF,V2DF,V2DF, DF, DF, V1DF,V1DF,DF,V4SF,V2SF")])
;; vmovq clears also the higher bits.
(define_insn "vec_set<mode>_0"
[(set (match_operand:VF2_512_256 0 "register_operand" "=v")
(vec_merge:VF2_512_256
(vec_duplicate:VF2_512_256
(match_operand:<ssescalarmode> 2 "general_operand" "xm"))
(match_operand:VF2_512_256 1 "const0_operand" "C")
(const_int 1)))]
"TARGET_AVX"
"vmovq\t{%2, %x0|%x0, %2}"
[(set_attr "type" "ssemov")
(set_attr "prefix" "maybe_evex")
(set_attr "mode" "DF")])
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; ;;
;; Parallel integer down-conversion operations ;; Parallel integer down-conversion operations
...@@ -13993,6 +14035,22 @@ ...@@ -13993,6 +14035,22 @@
(const_string "orig"))) (const_string "orig")))
(set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")]) (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
;; vmovq clears also the higher bits.
(define_insn "vec_set<mode>_0"
[(set (match_operand:VI8_AVX_AVX512F 0 "register_operand" "=Yi,v")
(vec_merge:VI8_AVX_AVX512F
(vec_duplicate:VI8_AVX_AVX512F
(match_operand:<ssescalarmode> 2 "general_operand" "r,vm"))
(match_operand:VI8_AVX_AVX512F 1 "const0_operand" "C,C")
(const_int 1)))]
"TARGET_AVX"
"vmovq\t{%2, %x0|%x0, %2}"
[(set_attr "isa" "x64,*")
(set_attr "type" "ssemov")
(set_attr "prefix_rex" "1,*")
(set_attr "prefix" "maybe_evex")
(set_attr "mode" "TI")])
(define_expand "vec_unpacks_lo_<mode>" (define_expand "vec_unpacks_lo_<mode>"
[(match_operand:<sseunpackmode> 0 "register_operand") [(match_operand:<sseunpackmode> 0 "register_operand")
(match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")] (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
...@@ -17743,6 +17801,8 @@ ...@@ -17743,6 +17801,8 @@
;; Modes handled by AVX vec_dup patterns. ;; Modes handled by AVX vec_dup patterns.
(define_mode_iterator AVX_VEC_DUP_MODE (define_mode_iterator AVX_VEC_DUP_MODE
[V8SI V8SF V4DI V4DF]) [V8SI V8SF V4DI V4DF])
(define_mode_attr vecdupssescalarmodesuffix
[(V8SF "ss") (V4DF "sd") (V8SI "ss") (V4DI "sd")])
;; Modes handled by AVX2 vec_dup patterns. ;; Modes handled by AVX2 vec_dup patterns.
(define_mode_iterator AVX2_VEC_DUP_MODE (define_mode_iterator AVX2_VEC_DUP_MODE
[V32QI V16QI V16HI V8HI V8SI V4SI]) [V32QI V16QI V16HI V8HI V8SI V4SI])
...@@ -17769,7 +17829,7 @@ ...@@ -17769,7 +17829,7 @@
"TARGET_AVX" "TARGET_AVX"
"@ "@
v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1} v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1} vbroadcast<vecdupssescalarmodesuffix>\t{%1, %0|%0, %1}
v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1} v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %g0|%g0, %x1} v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %g0|%g0, %x1}
#" #"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment