Commit e95b59d2 by Dorit Nuzman Committed by Dorit Nuzman

target.h (builtin_vectorization_cost): Add new target builtin.

2007-07-12  Dorit Nuzman  <dorit@il.ibm.com>

        * target.h (builtin_vectorization_cost): Add new target builtin.
        * target-def.h (TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST): New.
        * tree-vectorizer.h (TARG_SCALAR_STMT_COST): New.
        (TARG_SCALAR_LOAD_COST, TARG_SCALAR_STORE_COST): New.
        * tree-vect-analyze.c (vect_analyze_slp_instance): Initisliaze
        uninitialized variables.
        * tree-vect-transform.c (cost_for_stmt): New function.
        (vect_estimate_min_profitable_iters): Call cost_for_stmt instead of
        using cost 1 for all scalar stmts. Be less conservative when
        estimating the number of prologue/epulogue iterations. Call
        targetm.vectorize.builtin_vectorization_cost. Return
        min_profitable_iters-1.
        (vect_model_reduction_cost): Use TARG_SCALAR_TO_VEC_COST for
        initialization cost instead of TARG_VEC_STMT_COST. Use
        TARG_VEC_TO_SCALAR_COST instead of TARG_VEC_STMT_COST for reduction
        epilogue code. Fix epilogue cost computation.
        * config/spu/spu.c (spu_builtin_vectorization_cost): New.
        (TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST): Implement.
        * config/spu/spu.h (TARG_COND_BRANCH_COST, TARG_SCALAR_STMT_COST):
        (TARG_SCALAR_LOAD_COST, TARG_SCALAR_STORE_COST, TARG_VEC_STMT_COST):
        (TARG_VEC_TO_SCALAR_COST, TARG_SCALAR_TO_VEC, TARG_VEC_LOAD_COST):
        (TARG_VEC_UNALIGNED_LOAD_COST, TARG_VEC_STORE_COST): Define.

2007-07-12  Dorit Nuzman  <dorit@il.ibm.com>

        * gcc.dg/vect/costmodel/ppc/costmodel-vect-reduc-1char.c: Loops now
        get vectorized.
        * gcc.dg/vect/costmodel/i386/costmodel-vect-reduc-1char.c: Loops
        now get vectorized.
        * gcc.dg/vect/costmodel/spu/spu-costmodel-vect.exp: New.
        * gcc.dg/vect/costmodel/spu/costmodel-fast-math-vect-pr29925.c: New.
        * gcc.dg/vect/costmodel/spu/costmodel-vect-31a.c: New.
        * gcc.dg/vect/costmodel/spu/costmodel-vect-31b.c: New.
        * gcc.dg/vect/costmodel/spu/costmodel-vect-31c.c: New.
        * gcc.dg/vect/costmodel/spu/costmodel-vect-31d.c: New.
        * gcc.dg/vect/costmodel/spu/costmodel-vect-iv-9.c: New.
        * gcc.dg/vect/costmodel/spu/costmodel-vect-33.c: New.
        * gcc.dg/vect/costmodel/spu/costmodel-vect-76a.c: New.
        * gcc.dg/vect/costmodel/spu/costmodel-vect-76b.c: New.
        * gcc.dg/vect/costmodel/spu/costmodel-vect-76c.c: New.
        * gcc.dg/vect/costmodel/spu/costmodel-vect-68a.c: New.
        * gcc.dg/vect/costmodel/spu/costmodel-vect-68b.c: New.
        * gcc.dg/vect/costmodel/spu/costmodel-vect-68c.c: New.
        * gcc.dg/vect/costmodel/spu/costmodel-vect-68d.c: New.
        * lib/target-supports.exp (check_effective_target_vect_int_mul):
        Add spu.

From-SVN: r126584
parent e1c82219
2007-07-12 Dorit Nuzman <dorit@il.ibm.com>
* target.h (builtin_vectorization_cost): Add new target builtin.
* target-def.h (TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST): New.
* tree-vectorizer.h (TARG_SCALAR_STMT_COST): New.
(TARG_SCALAR_LOAD_COST, TARG_SCALAR_STORE_COST): New.
* tree-vect-analyze.c (vect_analyze_slp_instance): Initisliaze
uninitialized variables.
* tree-vect-transform.c (cost_for_stmt): New function.
(vect_estimate_min_profitable_iters): Call cost_for_stmt instead of
using cost 1 for all scalar stmts. Be less conservative when
estimating the number of prologue/epulogue iterations. Call
targetm.vectorize.builtin_vectorization_cost. Return
min_profitable_iters-1.
(vect_model_reduction_cost): Use TARG_SCALAR_TO_VEC_COST for
initialization cost instead of TARG_VEC_STMT_COST. Use
TARG_VEC_TO_SCALAR_COST instead of TARG_VEC_STMT_COST for reduction
epilogue code. Fix epilogue cost computation.
* config/spu/spu.c (spu_builtin_vectorization_cost): New.
(TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST): Implement.
* config/spu/spu.h (TARG_COND_BRANCH_COST, TARG_SCALAR_STMT_COST):
(TARG_SCALAR_LOAD_COST, TARG_SCALAR_STORE_COST, TARG_VEC_STMT_COST):
(TARG_VEC_TO_SCALAR_COST, TARG_SCALAR_TO_VEC, TARG_VEC_LOAD_COST):
(TARG_VEC_UNALIGNED_LOAD_COST, TARG_VEC_STORE_COST): Define.
2007-07-12 Richard Guenther <rguenther@suse.de> 2007-07-12 Richard Guenther <rguenther@suse.de>
* gimplify.c (gimplify_conversion): Make sure that the result * gimplify.c (gimplify_conversion): Make sure that the result
......
...@@ -133,6 +133,7 @@ static void spu_encode_section_info (tree, rtx, int); ...@@ -133,6 +133,7 @@ static void spu_encode_section_info (tree, rtx, int);
static tree spu_builtin_mul_widen_even (tree); static tree spu_builtin_mul_widen_even (tree);
static tree spu_builtin_mul_widen_odd (tree); static tree spu_builtin_mul_widen_odd (tree);
static tree spu_builtin_mask_for_load (void); static tree spu_builtin_mask_for_load (void);
static int spu_builtin_vectorization_cost (bool);
extern const char *reg_names[]; extern const char *reg_names[];
rtx spu_compare_op0, spu_compare_op1; rtx spu_compare_op0, spu_compare_op1;
...@@ -261,6 +262,9 @@ const struct attribute_spec spu_attribute_table[]; ...@@ -261,6 +262,9 @@ const struct attribute_spec spu_attribute_table[];
#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
struct gcc_target targetm = TARGET_INITIALIZER; struct gcc_target targetm = TARGET_INITIALIZER;
void void
...@@ -5191,6 +5195,21 @@ spu_builtin_mask_for_load (void) ...@@ -5191,6 +5195,21 @@ spu_builtin_mask_for_load (void)
return d->fndecl; return d->fndecl;
} }
/* Implement targetm.vectorize.builtin_vectorization_cost. */
static int
spu_builtin_vectorization_cost (bool runtime_test)
{
/* If the branch of the runtime test is taken - i.e. - the vectorized
version is skipped - this incurs a misprediction cost (because the
vectorized version is expected to be the fall-through). So we subtract
the latency of a mispredicted branch from the costs that are incured
when the vectorized version is executed. */
if (runtime_test)
return -19;
else
return 0;
}
void void
spu_init_expanders (void) spu_init_expanders (void)
{ {
......
...@@ -542,6 +542,52 @@ targetm.resolve_overloaded_builtin = spu_resolve_overloaded_builtin; \ ...@@ -542,6 +542,52 @@ targetm.resolve_overloaded_builtin = spu_resolve_overloaded_builtin; \
do { if (LOG!=0) fprintf (FILE, "\t.align\t%d\n", (LOG)); } while (0) do { if (LOG!=0) fprintf (FILE, "\t.align\t%d\n", (LOG)); } while (0)
/* Model costs for the vectorizer. */
/* Cost of conditional branch. */
#ifndef TARG_COND_BRANCH_COST
#define TARG_COND_BRANCH_COST 6
#endif
/* Cost of any scalar operation, excluding load and store. */
#ifndef TARG_SCALAR_STMT_COST
#define TARG_SCALAR_STMT_COST 1
#endif
/* Cost of scalar load. */
#undef TARG_SCALAR_LOAD_COST
#define TARG_SCALAR_LOAD_COST 2 /* load + rotate */
/* Cost of scalar store. */
#undef TARG_SCALAR_STORE_COST
#define TARG_SCALAR_STORE_COST 10
/* Cost of any vector operation, excluding load, store,
or vector to scalar operation. */
#undef TARG_VEC_STMT_COST
#define TARG_VEC_STMT_COST 1
/* Cost of vector to scalar operation. */
#undef TARG_VEC_TO_SCALAR_COST
#define TARG_VEC_TO_SCALAR_COST 1
/* Cost of scalar to vector operation. */
#undef TARG_SCALAR_TO_VEC_COST
#define TARG_SCALAR_TO_VEC_COST 1
/* Cost of aligned vector load. */
#undef TARG_VEC_LOAD_COST
#define TARG_VEC_LOAD_COST 1
/* Cost of misaligned vector load. */
#undef TARG_VEC_UNALIGNED_LOAD_COST
#define TARG_VEC_UNALIGNED_LOAD_COST 2
/* Cost of vector store. */
#undef TARG_VEC_STORE_COST
#define TARG_VEC_STORE_COST 1
/* Misc */ /* Misc */
#define CASE_VECTOR_MODE SImode #define CASE_VECTOR_MODE SImode
......
...@@ -356,6 +356,7 @@ Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ...@@ -356,6 +356,7 @@ Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
default_builtin_vectorized_conversion default_builtin_vectorized_conversion
#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN 0 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN 0
#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD 0 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD 0
#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST 0
#define TARGET_VECTORIZE \ #define TARGET_VECTORIZE \
{ \ { \
...@@ -363,7 +364,8 @@ Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. ...@@ -363,7 +364,8 @@ Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION, \ TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION, \
TARGET_VECTORIZE_BUILTIN_CONVERSION, \ TARGET_VECTORIZE_BUILTIN_CONVERSION, \
TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN, \ TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN, \
TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD \ TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD, \
TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
} }
#define TARGET_DEFAULT_TARGET_FLAGS 0 #define TARGET_DEFAULT_TARGET_FLAGS 0
......
...@@ -413,6 +413,10 @@ struct gcc_target ...@@ -413,6 +413,10 @@ struct gcc_target
element-by-element products for the odd elements. */ element-by-element products for the odd elements. */
tree (* builtin_mul_widen_even) (tree); tree (* builtin_mul_widen_even) (tree);
tree (* builtin_mul_widen_odd) (tree); tree (* builtin_mul_widen_odd) (tree);
/* Returns the cost to be added to the overheads involved with
executing the vectorized version of a loop. */
int (*builtin_vectorization_cost) (bool);
} vectorize; } vectorize;
/* The initial value of target_flags. */ /* The initial value of target_flags. */
......
2007-07-12 Dorit Nuzman <dorit@il.ibm.com>
* gcc.dg/vect/costmodel/ppc/costmodel-vect-reduc-1char.c: Loops now
get vectorized.
* gcc.dg/vect/costmodel/i386/costmodel-vect-reduc-1char.c: Loops
now get vectorized.
* gcc.dg/vect/costmodel/spu/spu-costmodel-vect.exp: New.
* gcc.dg/vect/costmodel/spu/costmodel-fast-math-vect-pr29925.c: New.
* gcc.dg/vect/costmodel/spu/costmodel-vect-31a.c: New.
* gcc.dg/vect/costmodel/spu/costmodel-vect-31b.c: New.
* gcc.dg/vect/costmodel/spu/costmodel-vect-31c.c: New.
* gcc.dg/vect/costmodel/spu/costmodel-vect-31d.c: New.
* gcc.dg/vect/costmodel/spu/costmodel-vect-iv-9.c: New.
* gcc.dg/vect/costmodel/spu/costmodel-vect-33.c: New.
* gcc.dg/vect/costmodel/spu/costmodel-vect-76a.c: New.
* gcc.dg/vect/costmodel/spu/costmodel-vect-76b.c: New.
* gcc.dg/vect/costmodel/spu/costmodel-vect-76c.c: New.
* gcc.dg/vect/costmodel/spu/costmodel-vect-68a.c: New.
* gcc.dg/vect/costmodel/spu/costmodel-vect-68b.c: New.
* gcc.dg/vect/costmodel/spu/costmodel-vect-68c.c: New.
* gcc.dg/vect/costmodel/spu/costmodel-vect-68d.c: New.
* lib/target-supports.exp (check_effective_target_vect_int_mul):
Add spu.
2007-07-12 Jakub Jelinek <jakub@redhat.com> 2007-07-12 Jakub Jelinek <jakub@redhat.com>
PR c++/30854 PR c++/30854
...@@ -46,6 +46,6 @@ int main (void) ...@@ -46,6 +46,6 @@ int main (void)
return 0; return 0;
} }
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_int_max } } } */ /* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail vect_no_int_max } } } */
/* { dg-final { scan-tree-dump-times "vectorization not profitable" 2 "vect" { xfail vect_no_int_max } } } */ /* { dg-final { scan-tree-dump-times "vectorization not profitable" 0 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -46,6 +46,6 @@ int main (void) ...@@ -46,6 +46,6 @@ int main (void)
return 0; return 0;
} }
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_int_max } } } */ /* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail vect_no_int_max } } } */
/* { dg-final { scan-tree-dump-times "vectorization not profitable" 2 "vect" { xfail vect_no_int_max } } } */ /* { dg-final { scan-tree-dump-times "vectorization not profitable" 0 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */ /* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_float } */
#include <stdlib.h>
#include "../../tree-vect.h"
void interp_pitch(float *exc, float *interp, int pitch, int len)
{
int i,k;
int maxj;
maxj=3;
for (i=0;i<len;i++)
{
float tmp = 0;
for (k=0;k<7;k++)
{
tmp += exc[i-pitch+k+maxj-6];
}
interp[i] = tmp;
}
}
int main()
{
float *exc = calloc(126,sizeof(float));
float *interp = calloc(80,sizeof(float));
int pitch = -35;
check_vect ();
interp_pitch(exc, interp, pitch, 80);
free(exc);
free(interp);
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorization not profitable" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "../../tree-vect.h"
#define N 32
struct t{
int k[N];
int l;
};
struct s{
char a; /* aligned */
char b[N-1]; /* unaligned (offset 1B) */
char c[N]; /* aligned (offset NB) */
struct t d; /* aligned (offset 2NB) */
struct t e; /* unaligned (offset 2N+4N+4 B) */
};
int main1 ()
{
int i;
struct s tmp;
/* unaligned */
for (i = 0; i < N/2; i++)
{
tmp.b[i] = 5;
}
/* check results: */
for (i = 0; i <N/2; i++)
{
if (tmp.b[i] != 5)
abort ();
}
return 0;
}
int main (void)
{
check_vect ();
return main1 ();
}
/* { dg-final { scan-tree-dump-times "vectorization not profitable" 1 "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "../../tree-vect.h"
#define N 32
struct t{
int k[N];
int l;
};
struct s{
char a; /* aligned */
char b[N-1]; /* unaligned (offset 1B) */
char c[N]; /* aligned (offset NB) */
struct t d; /* aligned (offset 2NB) */
struct t e; /* unaligned (offset 2N+4N+4 B) */
};
int main1 ()
{
int i;
struct s tmp;
/* aligned */
for (i = 0; i < N/2; i++)
{
tmp.c[i] = 6;
}
/* check results: */
for (i = 0; i <N/2; i++)
{
if (tmp.c[i] != 6)
abort ();
}
return 0;
}
int main (void)
{
check_vect ();
return main1 ();
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "../../tree-vect.h"
#define N 32
struct t{
int k[N];
int l;
};
struct s{
char a; /* aligned */
char b[N-1]; /* unaligned (offset 1B) */
char c[N]; /* aligned (offset NB) */
struct t d; /* aligned (offset 2NB) */
struct t e; /* unaligned (offset 2N+4N+4 B) */
};
int main1 ()
{
int i;
struct s tmp;
/* aligned */
for (i = 0; i < N/2; i++)
{
tmp.d.k[i] = 7;
}
/* check results: */
for (i = 0; i <N/2; i++)
{
if (tmp.d.k[i] != 7)
abort ();
}
return 0;
}
int main (void)
{
check_vect ();
return main1 ();
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "../../tree-vect.h"
#define N 32
struct t{
int k[N];
int l;
};
struct s{
char a; /* aligned */
char b[N-1]; /* unaligned (offset 1B) */
char c[N]; /* aligned (offset NB) */
struct t d; /* aligned (offset 2NB) */
struct t e; /* unaligned (offset 2N+4N+4 B) */
};
int main1 ()
{
int i;
struct s tmp;
/* unaligned */
for (i = 0; i < N/2; i++)
{
tmp.e.k[i] = 8;
}
/* check results: */
for (i = 0; i <N/2; i++)
{
if (tmp.e.k[i] != 8)
abort ();
}
return 0;
}
int main (void)
{
check_vect ();
return main1 ();
}
/* { dg-final { scan-tree-dump-times "vectorization not profitable" 0 "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-do compile } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "../../tree-vect.h"
#define N 16
struct test {
char ca[N];
};
extern struct test s;
int main1 ()
{
int i;
for (i = 0; i < N; i++)
{
s.ca[i] = 5;
}
/* check results: */
for (i = 0; i < N; i++)
{
if (s.ca[i] != 5)
abort ();
}
return 0;
}
int main (void)
{
return main1 ();
}
/* { dg-final { scan-tree-dump-times "vectorization not profitable" 0 "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "../../tree-vect.h"
#define N 32
struct s{
int m;
int n[N][N][N];
};
struct test1{
struct s a; /* array a.n is unaligned */
int b;
int c;
struct s e; /* array e.n is aligned */
};
int main1 ()
{
int i,j;
struct test1 tmp1;
/* 1. unaligned */
for (i = 0; i < N; i++)
{
tmp1.a.n[1][2][i] = 5;
}
/* check results: */
for (i = 0; i <N; i++)
{
if (tmp1.a.n[1][2][i] != 5)
abort ();
}
return 0;
}
int main (void)
{
check_vect ();
return main1 ();
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "../../tree-vect.h"
#define N 32
struct s{
int m;
int n[N][N][N];
};
struct test1{
struct s a; /* array a.n is unaligned */
int b;
int c;
struct s e; /* array e.n is aligned */
};
int main1 ()
{
int i,j;
struct test1 tmp1;
/* 2. aligned */
for (i = 3; i < N-1; i++)
{
tmp1.a.n[1][2][i] = 6;
}
/* check results: */
for (i = 3; i < N-1; i++)
{
if (tmp1.a.n[1][2][i] != 6)
abort ();
}
return 0;
}
int main (void)
{
check_vect ();
return main1 ();
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "../../tree-vect.h"
#define N 32
struct s{
int m;
int n[N][N][N];
};
struct test1{
struct s a; /* array a.n is unaligned */
int b;
int c;
struct s e; /* array e.n is aligned */
};
int main1 ()
{
int i,j;
struct test1 tmp1;
/* 3. aligned */
for (i = 0; i < N; i++)
{
tmp1.e.n[1][2][i] = 7;
}
/* check results: */
for (i = 0; i < N; i++)
{
if (tmp1.e.n[1][2][i] != 7)
abort ();
}
return 0;
}
int main (void)
{
check_vect ();
return main1 ();
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "../../tree-vect.h"
#define N 20
struct s{
int m;
int n[N][N][N];
};
struct test1{
struct s a; /* array a.n is unaligned */
int b;
int c;
struct s e; /* array e.n is aligned */
};
int main1 ()
{
int i,j;
struct test1 tmp1;
/* 4. unaligned */
for (i = 3; i < N-3; i++)
{
tmp1.e.n[1][2][i] = 8;
}
/* check results: */
for (i = 3; i <N-3; i++)
{
if (tmp1.e.n[1][2][i] != 8)
abort ();
}
return 0;
}
int main (void)
{
check_vect ();
return main1 ();
}
/* { dg-final { scan-tree-dump-times "vectorization not profitable" 0 "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "../../tree-vect.h"
#define N 8
#define OFF 4
/* Check handling of accesses for which the "initial condition" -
the expression that represents the first location accessed - is
more involved than just an ssa_name. */
int ib[N+OFF] __attribute__ ((__aligned__(16))) = {0, 1, 3, 5, 7, 11, 13, 17, 0, 2, 6, 10};
int main1 (int *pib)
{
int i;
int ia[N+OFF];
int ic[N+OFF] = {0, 1, 3, 5, 7, 11, 13, 17, 0, 2, 6, 10};
for (i = OFF; i < N; i++)
{
ia[i] = pib[i - OFF];
}
/* check results: */
for (i = OFF; i < N; i++)
{
if (ia[i] != pib[i - OFF])
abort ();
}
return 0;
}
int main (void)
{
check_vect ();
main1 (&ib[OFF]);
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "../../tree-vect.h"
#define N 8
#define OFF 4
/* Check handling of accesses for which the "initial condition" -
the expression that represents the first location accessed - is
more involved than just an ssa_name. */
int ib[N+OFF] __attribute__ ((__aligned__(16))) = {0, 1, 3, 5, 7, 11, 13, 17, 0, 2, 6, 10};
int main1 (int *pib)
{
int i;
int ia[N+OFF];
int ic[N+OFF] = {0, 1, 3, 5, 7, 11, 13, 17, 0, 2, 6, 10};
for (i = OFF; i < N; i++)
{
pib[i - OFF] = ic[i];
}
/* check results: */
for (i = OFF; i < N; i++)
{
if (pib[i - OFF] != ic[i])
abort ();
}
return 0;
}
int main (void)
{
check_vect ();
main1 (&ib[OFF]);
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorization not profitable" 0 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "../../tree-vect.h"
#define N 8
#define OFF 4
/* Check handling of accesses for which the "initial condition" -
the expression that represents the first location accessed - is
more involved than just an ssa_name. */
int ib[N+OFF] __attribute__ ((__aligned__(16))) = {0, 1, 3, 5, 7, 11, 13, 17, 0, 2, 6, 10};
int main1 (int *pib)
{
int i;
int ia[N+OFF];
int ic[N+OFF] = {0, 1, 3, 5, 7, 11, 13, 17, 0, 2, 6, 10};
for (i = OFF; i < N; i++)
{
ia[i] = ic[i - OFF];
}
/* check results: */
for (i = OFF; i < N; i++)
{
if (ia[i] != ic[i - OFF])
abort ();
}
return 0;
}
int main (void)
{
check_vect ();
main1 (&ib[OFF]);
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "../../tree-vect.h"
#define N 26
int a[N];
int main1 (int X)
{
int s = X;
int i;
/* vectorization of reduction with induction. */
for (i = 0; i < N; i++)
s += (i + a[i]);
return s;
}
int main (void)
{
int s, i;
check_vect ();
for (i = 0; i < N; i++)
a[i] = 2*i;
s = main1 (3);
if (s != 978)
abort ();
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_int_mult } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target {! vect_int_mult } } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
# Copyright (C) 1997, 2004, 2005, 2006 Free Software Foundation, Inc.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
# GCC testsuite that uses the `dg.exp' driver.
# Load support procs.
load_lib gcc-dg.exp
# Exit immediately if this isn't a powerpc target.
if { ![istarget spu*-*-*] } then {
return
}
# Set up flags used for tests that don't specify options.
set DEFAULT_VECTCFLAGS ""
# These flags are used for all targets.
lappend DEFAULT_VECTCFLAGS "-O2" "-ftree-vectorize" "-fvect-cost-model"
# If the target system supports vector instructions, the default action
# for a test is 'run', otherwise it's 'compile'. Save current default.
# Executing vector instructions on a system without hardware vector support
# is also disabled by a call to check_vect, but disabling execution here is
# more efficient.
global dg-do-what-default
set save-dg-do-what-default ${dg-do-what-default}
set dg-do-what-default run
# Initialize `dg'.
dg-init
lappend DEFAULT_VECTCFLAGS "-fdump-tree-vect-details"
# Main loop.
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/costmodel-pr*.\[cS\]]] \
"" $DEFAULT_VECTCFLAGS
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/costmodel-vect-*.\[cS\]]] \
"" $DEFAULT_VECTCFLAGS
#### Tests with special options
global SAVED_DEFAULT_VECTCFLAGS
set SAVED_DEFAULT_VECTCFLAGS $DEFAULT_VECTCFLAGS
# -ffast-math tests
set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
lappend DEFAULT_VECTCFLAGS "-ffast-math"
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/costmodel-fast-math-vect*.\[cS\]]] \
"" $DEFAULT_VECTCFLAGS
# Clean up.
set dg-do-what-default ${save-dg-do-what-default}
# All done.
dg-finish
...@@ -2039,6 +2039,7 @@ proc check_effective_target_vect_int_mult { } { ...@@ -2039,6 +2039,7 @@ proc check_effective_target_vect_int_mult { } {
} else { } else {
set et_vect_int_mult_saved 0 set et_vect_int_mult_saved 0
if { [istarget powerpc*-*-*] if { [istarget powerpc*-*-*]
|| [istarget spu-*-*]
|| [istarget i?86-*-*] || [istarget i?86-*-*]
|| [istarget x86_64-*-*] } { || [istarget x86_64-*-*] } {
set et_vect_int_mult_saved 1 set et_vect_int_mult_saved 1
......
...@@ -74,6 +74,34 @@ static void vect_update_inits_of_drs (loop_vec_info, tree); ...@@ -74,6 +74,34 @@ static void vect_update_inits_of_drs (loop_vec_info, tree);
static int vect_min_worthwhile_factor (enum tree_code); static int vect_min_worthwhile_factor (enum tree_code);
static int
cost_for_stmt (tree stmt)
{
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
switch (STMT_VINFO_TYPE (stmt_info))
{
case load_vec_info_type:
return TARG_SCALAR_LOAD_COST;
case store_vec_info_type:
return TARG_SCALAR_STORE_COST;
case op_vec_info_type:
case condition_vec_info_type:
case assignment_vec_info_type:
case reduc_vec_info_type:
case induc_vec_info_type:
case type_promotion_vec_info_type:
case type_demotion_vec_info_type:
case type_conversion_vec_info_type:
case call_vec_info_type:
return TARG_SCALAR_STMT_COST;
case undef_vec_info_type:
default:
gcc_unreachable ();
}
}
/* Function vect_estimate_min_profitable_iters /* Function vect_estimate_min_profitable_iters
Return the number of iterations required for the vector version of the Return the number of iterations required for the vector version of the
...@@ -138,7 +166,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo) ...@@ -138,7 +166,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
if (!STMT_VINFO_RELEVANT_P (stmt_info) if (!STMT_VINFO_RELEVANT_P (stmt_info)
&& !STMT_VINFO_LIVE_P (stmt_info)) && !STMT_VINFO_LIVE_P (stmt_info))
continue; continue;
scalar_single_iter_cost++; scalar_single_iter_cost += cost_for_stmt (stmt);
vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info); vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info);
vec_outside_cost += STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info); vec_outside_cost += STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info);
} }
...@@ -148,7 +176,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo) ...@@ -148,7 +176,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
loop. loop.
FORNOW: If we dont know the value of peel_iters for prologue or epilogue FORNOW: If we dont know the value of peel_iters for prologue or epilogue
at compile-time - we assume the worst. at compile-time - we assume it's (vf-1)/2 (the worst would be vf-1).
TODO: Build an expression that represents peel_iters for prologue and TODO: Build an expression that represents peel_iters for prologue and
epilogue to be used in a run-time test. */ epilogue to be used in a run-time test. */
...@@ -157,17 +185,17 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo) ...@@ -157,17 +185,17 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
if (byte_misalign < 0) if (byte_misalign < 0)
{ {
peel_iters_prologue = vf - 1; peel_iters_prologue = (vf - 1)/2;
if (vect_print_dump_info (REPORT_DETAILS)) if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "cost model: " fprintf (vect_dump, "cost model: "
"prologue peel iters set conservatively."); "prologue peel iters set to (vf-1)/2.");
/* If peeling for alignment is unknown, loop bound of main loop becomes /* If peeling for alignment is unknown, loop bound of main loop becomes
unknown. */ unknown. */
peel_iters_epilogue = vf - 1; peel_iters_epilogue = (vf - 1)/2;
if (vect_print_dump_info (REPORT_DETAILS)) if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "cost model: " fprintf (vect_dump, "cost model: "
"epilogue peel iters set conservatively because " "epilogue peel iters set to (vf-1)/2 because "
"peeling for alignment is unknown ."); "peeling for alignment is unknown .");
} }
else else
...@@ -186,10 +214,10 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo) ...@@ -186,10 +214,10 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
{ {
peel_iters_epilogue = vf - 1; peel_iters_epilogue = (vf - 1)/2;
if (vect_print_dump_info (REPORT_DETAILS)) if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "cost model: " fprintf (vect_dump, "cost model: "
"epilogue peel iters set conservatively because " "epilogue peel iters set to (vf-1)/2 because "
"loop iterations are unknown ."); "loop iterations are unknown .");
} }
else else
...@@ -229,6 +257,26 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo) ...@@ -229,6 +257,26 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
vec_outside_cost += (peel_iters_prologue * scalar_single_iter_cost) vec_outside_cost += (peel_iters_prologue * scalar_single_iter_cost)
+ (peel_iters_epilogue * scalar_single_iter_cost); + (peel_iters_epilogue * scalar_single_iter_cost);
/* Allow targets add additional (outside-of-loop) costs. FORNOW, the only
information we provide for the target is whether testing against the
threshold involves a runtime test. */
if (targetm.vectorize.builtin_vectorization_cost)
{
bool runtime_test = false;
/* If the number of iterations is unknown, or the
peeling-for-misalignment amount is unknown, we eill have to generate
a runtime test to test the loop count agains the threshold. */
if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
|| (byte_misalign < 0))
runtime_test = true;
vec_outside_cost +=
targetm.vectorize.builtin_vectorization_cost (runtime_test);
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "cost model : Adding target out-of-loop cost = %d",
targetm.vectorize.builtin_vectorization_cost (runtime_test));
}
/* Calculate number of iterations required to make the vector version /* Calculate number of iterations required to make the vector version
profitable, relative to the loop bodies only. The following condition profitable, relative to the loop bodies only. The following condition
must hold true: ((SIC*VF)-VIC)*niters > VOC*VF, where must hold true: ((SIC*VF)-VIC)*niters > VOC*VF, where
...@@ -280,7 +328,14 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo) ...@@ -280,7 +328,14 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
min_profitable_iters < vf ? vf : min_profitable_iters); min_profitable_iters < vf ? vf : min_profitable_iters);
} }
return min_profitable_iters < vf ? vf : min_profitable_iters; min_profitable_iters =
min_profitable_iters < vf ? vf : min_profitable_iters;
/* Because the condition we create is:
if (niters <= min_profitable_iters)
then skip the vectorized loop. */
min_profitable_iters--;
return min_profitable_iters;
} }
...@@ -321,7 +376,7 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code, ...@@ -321,7 +376,7 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1)); code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
/* Add in cost for initial definition. */ /* Add in cost for initial definition. */
outer_cost += TARG_VEC_STMT_COST; outer_cost += TARG_SCALAR_TO_VEC_COST;
/* Determine cost of epilogue code. /* Determine cost of epilogue code.
...@@ -341,11 +396,13 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code, ...@@ -341,11 +396,13 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
optab = optab_for_tree_code (code, vectype); optab = optab_for_tree_code (code, vectype);
/* We have a whole vector shift available. */ /* We have a whole vector shift available. */
if (!VECTOR_MODE_P (mode) if (VECTOR_MODE_P (mode)
|| optab->handlers[mode].insn_code == CODE_FOR_nothing) && optab->handlers[mode].insn_code != CODE_FOR_nothing
&& vec_shr_optab->handlers[mode].insn_code != CODE_FOR_nothing)
/* Final reduction via vector shifts and the reduction operator. Also /* Final reduction via vector shifts and the reduction operator. Also
requires scalar extract. */ requires scalar extract. */
outer_cost += ((exact_log2(nelements) * 2 + 1) * TARG_VEC_STMT_COST); outer_cost += ((exact_log2(nelements) * 2) * TARG_VEC_STMT_COST
+ TARG_VEC_TO_SCALAR_COST);
else else
/* Use extracts and reduction op for final reduction. For N elements, /* Use extracts and reduction op for final reduction. For N elements,
we have N extracts and N-1 reduction ops. */ we have N extracts and N-1 reduction ops. */
......
...@@ -326,6 +326,21 @@ typedef struct _stmt_vec_info { ...@@ -326,6 +326,21 @@ typedef struct _stmt_vec_info {
#define TARG_COND_BRANCH_COST 3 #define TARG_COND_BRANCH_COST 3
#endif #endif
/* Cost of any scalar operation, excluding load and store. */
#ifndef TARG_SCALAR_STMT_COST
#define TARG_SCALAR_STMT_COST 1
#endif
/* Cost of scalar load. */
#ifndef TARG_SCALAR_LOAD_COST
#define TARG_SCALAR_LOAD_COST 1
#endif
/* Cost of scalar store. */
#ifndef TARG_SCALAR_STORE_COST
#define TARG_SCALAR_STORE_COST 1
#endif
/* Cost of any vector operation, excluding load, store or vector to scalar /* Cost of any vector operation, excluding load, store or vector to scalar
operation. */ operation. */
#ifndef TARG_VEC_STMT_COST #ifndef TARG_VEC_STMT_COST
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment