Commit 1104467f by Jakub Jelinek Committed by Jakub Jelinek

re PR tree-optimization/91201 (SIMD not generated for horizontal sum of bytes in array)

	PR tree-optimization/91201
	* config/i386/sse.md (reduc_plus_scal_v16qi): New expander.
	(REDUC_PLUS_MODE): Add V32QImode for TARGET_AVX and V64QImode for
	TARGET_AVX512F.
	(reduc_plus_scal_<mode>): Improve formatting by introducing
	a temporary.

	* gcc.target/i386/sse2-pr91201.c: New test.
	* gcc.target/i386/avx2-pr91201.c: New test.
	* gcc.target/i386/avx512bw-pr91201.c: New test.

From-SVN: r273927
parent 89626179
2019-07-31 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/91201
* config/i386/sse.md (reduc_plus_scal_v16qi): New expander.
(REDUC_PLUS_MODE): Add V32QImode for TARGET_AVX and V64QImode for
TARGET_AVX512F.
(reduc_plus_scal_<mode>): Improve formatting by introducing
a temporary.
2019-07-31 Sudakshina Das <sudi.das@arm.com> 2019-07-31 Sudakshina Das <sudi.das@arm.com>
* config/aarch64/aarch64-builtins.c (enum aarch64_builtins): Add * config/aarch64/aarch64-builtins.c (enum aarch64_builtins): Add
......
...@@ -2728,9 +2728,30 @@ ...@@ -2728,9 +2728,30 @@
DONE; DONE;
}) })
(define_expand "reduc_plus_scal_v16qi"
[(plus:V16QI
(match_operand:QI 0 "register_operand")
(match_operand:V16QI 1 "register_operand"))]
"TARGET_SSE2"
{
rtx tmp = gen_reg_rtx (V1TImode);
emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, operands[1]),
GEN_INT (64)));
rtx tmp2 = gen_reg_rtx (V16QImode);
emit_insn (gen_addv16qi3 (tmp2, operands[1], gen_lowpart (V16QImode, tmp)));
rtx tmp3 = gen_reg_rtx (V16QImode);
emit_move_insn (tmp3, CONST0_RTX (V16QImode));
rtx tmp4 = gen_reg_rtx (V2DImode);
emit_insn (gen_sse2_psadbw (tmp4, tmp2, tmp3));
tmp4 = gen_lowpart (V16QImode, tmp4);
emit_insn (gen_vec_extractv16qiqi (operands[0], tmp4, const0_rtx));
DONE;
})
(define_mode_iterator REDUC_PLUS_MODE (define_mode_iterator REDUC_PLUS_MODE
[(V4DF "TARGET_AVX") (V8SF "TARGET_AVX") [(V4DF "TARGET_AVX") (V8SF "TARGET_AVX")
(V8DF "TARGET_AVX512F") (V16SF "TARGET_AVX512F")]) (V8DF "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
(V32QI "TARGET_AVX") (V64QI "TARGET_AVX512F")])
(define_expand "reduc_plus_scal_<mode>" (define_expand "reduc_plus_scal_<mode>"
[(plus:REDUC_PLUS_MODE [(plus:REDUC_PLUS_MODE
...@@ -2741,8 +2762,8 @@ ...@@ -2741,8 +2762,8 @@
rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode); rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1])); emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode); rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
emit_insn (gen_add<ssehalfvecmodelower>3 rtx tmp3 = gen_lowpart (<ssehalfvecmode>mode, operands[1]);
(tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1]))); emit_insn (gen_add<ssehalfvecmodelower>3 (tmp2, tmp, tmp3));
emit_insn (gen_reduc_plus_scal_<ssehalfvecmodelower> (operands[0], tmp2)); emit_insn (gen_reduc_plus_scal_<ssehalfvecmodelower> (operands[0], tmp2));
DONE; DONE;
}) })
......
2019-07-31 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/91201
* gcc.target/i386/sse2-pr91201.c: New test.
* gcc.target/i386/avx2-pr91201.c: New test.
* gcc.target/i386/avx512bw-pr91201.c: New test.
2019-07-31 Sudakshina Das <sudi.das@arm.com> 2019-07-31 Sudakshina Das <sudi.das@arm.com>
* gcc.target/aarch64/acle/tme.c: New test. * gcc.target/aarch64/acle/tme.c: New test.
......
/* PR tree-optimization/91201 */
/* { dg-do compile } */
/* { dg-options "-O3 -mavx2 -mno-avx512f" } */
/* { dg-final { scan-assembler "\tvpsadbw\t" } } */
#include "sse2-pr91201.c"
/* PR tree-optimization/91201 */
/* { dg-do compile } */
/* { dg-options "-O3 -mavx512bw -mprefer-vector-width=512" } */
/* { dg-final { scan-assembler "\tvpsadbw\t" } } */
#include "sse2-pr91201.c"
/* PR tree-optimization/91201 */
/* { dg-do compile } */
/* { dg-options "-O3 -msse2 -mno-sse3" } */
/* { dg-final { scan-assembler "\tpsadbw\t" } } */
unsigned char bytes[1024];
unsigned char
sum (void)
{
unsigned char r = 0;
unsigned char *p = (unsigned char *) bytes;
int n;
for (n = 0; n < sizeof (bytes); ++n)
r += p[n];
return r;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment