Commit 1536b6ab by Martin Jambor Committed by Martin Jambor

re PR tree-optimization/44423 (Massive performance regression in SSE code due to SRA)

2010-06-09  Martin Jambor  <mjambor@suse.cz>

	PR tree-optimization/44423
	* tree-sra.c (dump_access): Dump also grp_assignment_read.
	(analyze_access_subtree): Pass negative allow_replacements to children
	if the current type is scalar.

	* testsuite/gcc.dg/tree-ssa/pr44423.c: New test.

From-SVN: r160462
parent 2d7f5f9b
2010-06-09 Martin Jambor <mjambor@suse.cz>
PR tree-optimization/44423
* tree-sra.c (dump_access): Dump also grp_assignment_read.
(analyze_access_subtree): Pass negative allow_replacements to children
if the current type is scalar.
2010-06-09 Joern Rennecke <amylaar@spamcop.net>
PR testsuite/42843
......
2010-06-09 Martin Jambor <mjambor@suse.cz>
PR tree-optimization/44423
* gcc.dg/tree-ssa/pr44423.c: New test.
2010-06-09 Joern Rennecke <amylaar@spamcop.net>
PR testsuite/42843
......
/* { dg-do compile { target x86_64-*-* } } */
/* { dg-options "-O2 -fdump-tree-esra-details" } */
#include "xmmintrin.h"
typedef __m128 v4sf; // vector of 4 floats (SSE1)
#define ARRSZ 1024
typedef union {
float f[4];
v4sf v;
} V4SF;
struct COLOUR {
float r,g,b;
};
void func (float *pre1, float pre2, struct COLOUR *a, V4SF *lpic)
{
V4SF va;
int y;
va.f[0]=a->r;va.f[1]=a->g;va.f[2]=a->b;va.f[3]=0.f;
for (y=0; y<20; ++y)
{
float att = pre1[y]*pre2;
v4sf tmpatt=_mm_load1_ps(&att);
tmpatt=_mm_mul_ps(tmpatt,va.v);
lpic[y].v=_mm_add_ps(tmpatt,lpic[y].v);
}
}
int main()
{
V4SF lpic[ARRSZ];
float pre1[ARRSZ];
int i;
struct COLOUR col={0.,2.,4.};
for (i=0; i<20; ++i)
pre1[i]=0.4;
for (i=0;i<10000000;++i)
func(&pre1[0],0.3,&col,&lpic[0]);
return 0;
}
/* { dg-final { scan-tree-dump-times "Created a replacement" 0 "esra"} } */
/* { dg-final { cleanup-tree-dump "esra" } } */
......@@ -356,13 +356,13 @@ dump_access (FILE *f, struct access *access, bool grp)
print_generic_expr (f, access->type, 0);
if (grp)
fprintf (f, ", grp_write = %d, total_scalarization = %d, "
"grp_read = %d, grp_hint = %d, "
"grp_read = %d, grp_hint = %d, grp_assignment_read = %d,"
"grp_covered = %d, grp_unscalarizable_region = %d, "
"grp_unscalarized_data = %d, grp_partial_lhs = %d, "
"grp_to_be_replaced = %d, grp_maybe_modified = %d, "
"grp_not_necessarilly_dereferenced = %d\n",
access->grp_write, access->total_scalarization,
access->grp_read, access->grp_hint,
access->grp_read, access->grp_hint, access->grp_assignment_read,
access->grp_covered, access->grp_unscalarizable_region,
access->grp_unscalarized_data, access->grp_partial_lhs,
access->grp_to_be_replaced, access->grp_maybe_modified,
......@@ -1791,7 +1791,8 @@ analyze_access_subtree (struct access *root, bool allow_replacements,
else
covered_to += child->size;
sth_created |= analyze_access_subtree (child, allow_replacements,
sth_created |= analyze_access_subtree (child,
allow_replacements && !scalar,
mark_read, mark_write);
root->grp_unscalarized_data |= child->grp_unscalarized_data;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment