Commit 2953b72f by Jakub Jelinek Committed by Jakub Jelinek

re PR target/80846 (auto-vectorized AVX2 horizontal sum should narrow to 128b…

re PR target/80846 (auto-vectorized AVX2 horizontal sum should narrow to 128b right away, to be more efficient for Ryzen and Intel)

	PR target/80846
	* config/i386/i386.c (ix86_expand_vector_init_general): Handle
	V2TImode and V4TImode.
	(ix86_expand_vector_extract): Likewise.
	* config/i386/sse.md (VMOVE): Enable V4TImode even for just
	TARGET_AVX512F, instead of only for TARGET_AVX512BW.
	(ssescalarmode): Handle V4TImode and V2TImode.
	(VEC_EXTRACT_MODE): Add V4TImode and V2TImode.
	(*vec_extractv2ti, *vec_extractv4ti): New insns.
	(VEXTRACTI128_MODE): New mode iterator.
	(splitter for *vec_extractv?ti first element): New.
	(VEC_INIT_MODE): New mode iterator.
	(vec_init<mode>): Consolidate 3 expanders into one using
	VEC_INIT_MODE mode iterator.

	* gcc.target/i386/avx-pr80846.c: New test.
	* gcc.target/i386/avx2-pr80846.c: New test.
	* gcc.target/i386/avx512f-pr80846.c: New test.

From-SVN: r250397
parent f0a40456
2017-07-20 Jakub Jelinek <jakub@redhat.com>
PR target/80846
* config/i386/i386.c (ix86_expand_vector_init_general): Handle
V2TImode and V4TImode.
(ix86_expand_vector_extract): Likewise.
* config/i386/sse.md (VMOVE): Enable V4TImode even for just
TARGET_AVX512F, instead of only for TARGET_AVX512BW.
(ssescalarmode): Handle V4TImode and V2TImode.
(VEC_EXTRACT_MODE): Add V4TImode and V2TImode.
(*vec_extractv2ti, *vec_extractv4ti): New insns.
(VEXTRACTI128_MODE): New mode iterator.
(splitter for *vec_extractv?ti first element): New.
(VEC_INIT_MODE): New mode iterator.
(vec_init<mode>): Consolidate 3 expanders into one using
VEC_INIT_MODE mode iterator.
2017-07-20 Alexander Monakov <amonakov@ispras.ru> 2017-07-20 Alexander Monakov <amonakov@ispras.ru>
* lra-assigns.c (pseudo_compare_func): Fix comparison step based on * lra-assigns.c (pseudo_compare_func): Fix comparison step based on
......
...@@ -44118,6 +44118,26 @@ ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode, ...@@ -44118,6 +44118,26 @@ ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
ix86_expand_vector_init_concat (mode, target, ops, n); ix86_expand_vector_init_concat (mode, target, ops, n);
return; return;
case V2TImode:
for (i = 0; i < 2; i++)
ops[i] = gen_lowpart (V2DImode, XVECEXP (vals, 0, i));
op0 = gen_reg_rtx (V4DImode);
ix86_expand_vector_init_concat (V4DImode, op0, ops, 2);
emit_move_insn (target, gen_lowpart (GET_MODE (target), op0));
return;
case V4TImode:
for (i = 0; i < 4; i++)
ops[i] = gen_lowpart (V2DImode, XVECEXP (vals, 0, i));
ops[4] = gen_reg_rtx (V4DImode);
ix86_expand_vector_init_concat (V4DImode, ops[4], ops, 2);
ops[5] = gen_reg_rtx (V4DImode);
ix86_expand_vector_init_concat (V4DImode, ops[5], ops + 2, 2);
op0 = gen_reg_rtx (V8DImode);
ix86_expand_vector_init_concat (V8DImode, op0, ops + 4, 2);
emit_move_insn (target, gen_lowpart (GET_MODE (target), op0));
return;
case V32QImode: case V32QImode:
half_mode = V16QImode; half_mode = V16QImode;
goto half; goto half;
...@@ -44659,6 +44679,8 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt) ...@@ -44659,6 +44679,8 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
case V2DFmode: case V2DFmode:
case V2DImode: case V2DImode:
case V2TImode:
case V4TImode:
use_vec_extr = true; use_vec_extr = true;
break; break;
...@@ -175,7 +175,7 @@ ...@@ -175,7 +175,7 @@
(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX") V1TI (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX") V1TI
(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF]) (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
...@@ -687,7 +687,8 @@ ...@@ -687,7 +687,8 @@
(V16SI "SI") (V8SI "SI") (V4SI "SI") (V16SI "SI") (V8SI "SI") (V4SI "SI")
(V8DI "DI") (V4DI "DI") (V2DI "DI") (V8DI "DI") (V4DI "DI") (V2DI "DI")
(V16SF "SF") (V8SF "SF") (V4SF "SF") (V16SF "SF") (V8SF "SF") (V4SF "SF")
(V8DF "DF") (V4DF "DF") (V2DF "DF")]) (V8DF "DF") (V4DF "DF") (V2DF "DF")
(V4TI "TI") (V2TI "TI")])
;; Mapping of vector modes to the 128bit modes ;; Mapping of vector modes to the 128bit modes
(define_mode_attr ssexmmmode (define_mode_attr ssexmmmode
...@@ -6920,15 +6921,6 @@ ...@@ -6920,15 +6921,6 @@
(set_attr "prefix" "orig,maybe_evex,orig,maybe_evex") (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex")
(set_attr "mode" "V4SF,V4SF,V2SF,V2SF")]) (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
(define_expand "vec_init<mode>"
[(match_operand:V_128 0 "register_operand")
(match_operand 1)]
"TARGET_SSE"
{
ix86_expand_vector_init (false, operands[0], operands[1]);
DONE;
})
;; Avoid combining registers from different units in a single alternative, ;; Avoid combining registers from different units in a single alternative,
;; see comment above inline_secondary_memory_needed function in i386.c ;; see comment above inline_secondary_memory_needed function in i386.c
(define_insn "vec_set<mode>_0" (define_insn "vec_set<mode>_0"
...@@ -7886,7 +7878,8 @@ ...@@ -7886,7 +7878,8 @@
(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF]) (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF
(V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
(define_expand "vec_extract<mode>" (define_expand "vec_extract<mode>"
[(match_operand:<ssescalarmode> 0 "register_operand") [(match_operand:<ssescalarmode> 0 "register_operand")
...@@ -13734,6 +13727,50 @@ ...@@ -13734,6 +13727,50 @@
operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs); operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs);
}) })
(define_insn "*vec_extractv2ti"
[(set (match_operand:TI 0 "nonimmediate_operand" "=xm,vm")
(vec_select:TI
(match_operand:V2TI 1 "register_operand" "x,v")
(parallel
[(match_operand:SI 2 "const_0_to_1_operand")])))]
"TARGET_AVX"
"@
vextract%~128\t{%2, %1, %0|%0, %1, %2}
vextracti32x4\t{%2, %g1, %0|%0, %g1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
(set_attr "prefix" "vex,evex")
(set_attr "mode" "OI")])
(define_insn "*vec_extractv4ti"
[(set (match_operand:TI 0 "nonimmediate_operand" "=vm")
(vec_select:TI
(match_operand:V4TI 1 "register_operand" "v")
(parallel
[(match_operand:SI 2 "const_0_to_3_operand")])))]
"TARGET_AVX512F"
"vextracti32x4\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sselog")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
(set_attr "prefix" "evex")
(set_attr "mode" "XI")])
(define_mode_iterator VEXTRACTI128_MODE
[(V4TI "TARGET_AVX512F") V2TI])
(define_split
[(set (match_operand:TI 0 "nonimmediate_operand")
(vec_select:TI
(match_operand:VEXTRACTI128_MODE 1 "register_operand")
(parallel [(const_int 0)])))]
"TARGET_AVX
&& reload_completed
&& (TARGET_AVX512VL || !EXT_REX_SSE_REG_P (operands[1]))"
[(set (match_dup 0) (match_dup 1))]
"operands[1] = gen_lowpart (TImode, operands[1]);")
;; Turn SImode or DImode extraction from arbitrary SSE/AVX/AVX512F ;; Turn SImode or DImode extraction from arbitrary SSE/AVX/AVX512F
;; vector modes into vec_extract*. ;; vector modes into vec_extract*.
(define_split (define_split
...@@ -18738,19 +18775,20 @@ ...@@ -18738,19 +18775,20 @@
<ssehalfvecmode>mode); <ssehalfvecmode>mode);
}) })
(define_expand "vec_init<mode>" ;; Modes handled by vec_init patterns.
[(match_operand:V_256 0 "register_operand") (define_mode_iterator VEC_INIT_MODE
(match_operand 1)] [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
"TARGET_AVX" (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
{ (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
ix86_expand_vector_init (false, operands[0], operands[1]); (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
DONE; (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
}) (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")
(V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
(define_expand "vec_init<mode>" (define_expand "vec_init<mode>"
[(match_operand:VF48_I1248 0 "register_operand") [(match_operand:VEC_INIT_MODE 0 "register_operand")
(match_operand 1)] (match_operand 1)]
"TARGET_AVX512F" "TARGET_SSE"
{ {
ix86_expand_vector_init (false, operands[0], operands[1]); ix86_expand_vector_init (false, operands[0], operands[1]);
DONE; DONE;
......
2017-07-20 Jakub Jelinek <jakub@redhat.com>
PR target/80846
* gcc.target/i386/avx-pr80846.c: New test.
* gcc.target/i386/avx2-pr80846.c: New test.
* gcc.target/i386/avx512f-pr80846.c: New test.
2017-07-20 Bin Cheng <bin.cheng@arm.com> 2017-07-20 Bin Cheng <bin.cheng@arm.com>
PR tree-optimization/81388 PR tree-optimization/81388
......
/* PR target/80846 */
/* { dg-do compile { target int128 } } */
/* { dg-options "-O2 -mavx -mno-avx2" } */
typedef __int128 V __attribute__((vector_size (32)));
typedef long long W __attribute__((vector_size (32)));
typedef int X __attribute__((vector_size (16)));
typedef __int128 Y __attribute__((vector_size (64)));
typedef long long Z __attribute__((vector_size (64)));
W f1 (__int128 x, __int128 y) { return (W) ((V) { x, y }); }
__int128 f2 (W x) { return ((V)x)[0]; }
__int128 f3 (W x) { return ((V)x)[1]; }
W f4 (X x, X y) { union { X x; __int128 i; } u = { .x = x }, v = { .x = y }; return (W) ((V) { u.i, v.i }); }
X f5 (W x) { return (X)(((V)x)[0]); }
X f6 (W x) { return (X)(((V)x)[1]); }
W f7 (void) { return (W) ((V) { 2, 3 }); }
W f8 (X x) { union { X x; __int128 i; } u = { .x = x }; return (W) ((V) { u.i, 3 }); }
W f9 (X x) { union { X x; __int128 i; } u = { .x = x }; return (W) ((V) { 2, u.i }); }
W f10 (X x) { union { X x; __int128 i; } u = { .x = x }; return (W) ((V) { u.i, u.i }); }
#ifdef __AVX512F__
Z f11 (__int128 x, __int128 y, __int128 z, __int128 a) { return (Z) ((Y) { x, y, z, a }); }
__int128 f12 (Z x) { return ((Y)x)[0]; }
__int128 f13 (Z x) { return ((Y)x)[1]; }
__int128 f14 (Z x) { return ((Y)x)[2]; }
__int128 f15 (Z x) { return ((Y)x)[3]; }
Z f16 (X x, X y, X z, X a) { union { X x; __int128 i; } u = { .x = x }, v = { .x = y }, w = { .x = z }, t = { .x = a };
return (Z) ((Y) { u.i, v.i, w.i, t.i }); }
X f17 (Z x) { return (X)(((Y)x)[0]); }
X f18 (Z x) { return (X)(((Y)x)[1]); }
X f19 (Z x) { return (X)(((Y)x)[2]); }
X f20 (Z x) { return (X)(((Y)x)[3]); }
Z f21 (void) { return (Z) ((Y) { 2, 3, 4, 5 }); }
Z f22 (X x) { union { X x; __int128 i; } u = { .x = x }; return (Z) ((Y) { u.i, 3, 4, 5 }); }
Z f23 (X x) { union { X x; __int128 i; } u = { .x = x }; return (Z) ((Y) { 2, u.i, 4, 5 }); }
Z f24 (X x) { union { X x; __int128 i; } u = { .x = x }; return (Z) ((Y) { 2, 3, u.i, 5 }); }
Z f25 (X x) { union { X x; __int128 i; } u = { .x = x }; return (Z) ((Y) { 2, 3, 4, u.i }); }
Z f26 (X x) { union { X x; __int128 i; } u = { .x = x }; return (Z) ((Y) { u.i, u.i, u.i, u.i }); }
#endif
/* PR target/80846 */
/* { dg-do compile { target int128 } } */
/* { dg-options "-O2 -mavx2 -mno-avx512f" } */
#include "avx-pr80846.c"
/* PR target/80846 */
/* { dg-do compile { target int128 } } */
/* { dg-options "-O2 -mavx512f" } */
#include "avx-pr80846.c"
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment