Commit 2ef27856 by Richard Biener Committed by Richard Biener

re PR tree-optimization/92819 (Worse code generated on avx2 due to simplify_vector_constructor)

2019-12-06  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/92819
	* match.pd (VEC_PERM_EXPR -> BIT_INSERT_EXPR): Handle inserts
	into the last lane.  For two-element vectors try inserting
	into the last lane when inserting into the first fails.

	* gcc.target/i386/pr92819-1.c: New testcase.
	* gcc.target/i386/pr92803.c: Adjust.

From-SVN: r279033
parent 9961856c
2019-12-06 Richard Biener <rguenther@suse.de>
PR tree-optimization/92819
* match.pd (VEC_PERM_EXPR -> BIT_INSERT_EXPR): Handle inserts
into the last lane. For two-element vectors try inserting
into the last lane when inserting into the first fails.
2019-12-06 Jakub Jelinek <jakub@redhat.com> 2019-12-06 Jakub Jelinek <jakub@redhat.com>
* common.opt (fprofile-partial-training): Terminate description with * common.opt (fprofile-partial-training): Terminate description with
...@@ -6032,7 +6032,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) ...@@ -6032,7 +6032,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|| TREE_CODE (cop1) == VECTOR_CST || TREE_CODE (cop1) == VECTOR_CST
|| TREE_CODE (cop1) == CONSTRUCTOR)) || TREE_CODE (cop1) == CONSTRUCTOR))
{ {
if (sel.series_p (1, 1, nelts + 1, 1)) bool insert_first_p = sel.series_p (1, 1, nelts + 1, 1);
if (insert_first_p)
{ {
/* After canonicalizing the first elt to come from the /* After canonicalizing the first elt to come from the
first vector we only can insert the first elt from first vector we only can insert the first elt from
...@@ -6041,13 +6042,19 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) ...@@ -6041,13 +6042,19 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
if ((ins = fold_read_from_vector (cop0, sel[0]))) if ((ins = fold_read_from_vector (cop0, sel[0])))
op0 = op1; op0 = op1;
} }
else /* The above can fail for two-element vectors which always
appear to insert the first element, so try inserting
into the second lane as well. For more than two
elements that's wasted time. */
if (!insert_first_p || (!ins && maybe_eq (nelts, 2u)))
{ {
unsigned int encoded_nelts = sel.encoding ().encoded_nelts (); unsigned int encoded_nelts = sel.encoding ().encoded_nelts ();
for (at = 0; at < encoded_nelts; ++at) for (at = 0; at < encoded_nelts; ++at)
if (maybe_ne (sel[at], at)) if (maybe_ne (sel[at], at))
break; break;
if (at < encoded_nelts && sel.series_p (at + 1, 1, at + 1, 1)) if (at < encoded_nelts
&& (known_eq (at + 1, nelts)
|| sel.series_p (at + 1, 1, at + 1, 1)))
{ {
if (known_lt (poly_uint64 (sel[at]), nelts)) if (known_lt (poly_uint64 (sel[at]), nelts))
ins = fold_read_from_vector (cop0, sel[at]); ins = fold_read_from_vector (cop0, sel[at]);
......
2019-12-06 Richard Biener <rguenther@suse.de>
PR tree-optimization/92819
* gcc.target/i386/pr92819-1.c: New testcase.
* gcc.target/i386/pr92803.c: Adjust.
2019-12-05 Martin Sebor <msebor@redhat.com> 2019-12-05 Martin Sebor <msebor@redhat.com>
PR testsuite/92829 PR testsuite/92829
......
...@@ -31,8 +31,10 @@ barf (v8sf x) ...@@ -31,8 +31,10 @@ barf (v8sf x)
return (v4sf) { x[4], x[5], 1.0f, 2.0f }; return (v4sf) { x[4], x[5], 1.0f, 2.0f };
} }
/* We expect all CTORs to turn into permutes, the FP converting ones /* For bar we do two inserts, first zero, then convert, then insert *p. } */
/* { dg-final { scan-tree-dump-times "BIT_INSERT_EXPR" 2 "forwprop1" } } */
/* We expect all other CTORs to turn into permutes, the FP converting ones
to two each with the one with constants possibly elided in the future to two each with the one with constants possibly elided in the future
by converting 3.0f and 1.0f "back" to integers. */ by converting 3.0f and 1.0f "back" to integers. */
/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 6 "forwprop1" } } */ /* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 4 "forwprop1" } } */
/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 5 "forwprop1" { xfail *-*-* } } } */ /* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 3 "forwprop1" { xfail *-*-* } } } */
/* { dg-do compile } */
/* { dg-options "-O -msse2 -fdump-tree-forwprop1" } */
typedef double v2df __attribute__((vector_size (16)));
v2df
foo (v2df x, double *p)
{
return (v2df) { x[0], *p };
}
v2df
bar (v2df x, double *p)
{
return (v2df) { *p, x[1] };
}
/* { dg-final { scan-tree-dump-times "BIT_INSERT_EXPR" 2 "forwprop1" } } */
/* { dg-final { scan-assembler "movhpd" } } */
/* { dg-final { scan-assembler "movlpd" } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment