Commit 1107f3ae by Ira Rosen Committed by Ira Rosen

tree-vectorizer.h (struct _stmt_vec_info): Add new field for pattern def…

tree-vectorizer.h (struct _stmt_vec_info): Add new field for pattern def statement, and its access macro.


        * tree-vectorizer.h (struct _stmt_vec_info): Add new field for
        pattern def statement, and its access macro.
        (NUM_PATTERNS): Set to 5.
        * tree-vect-loop.c (vect_determine_vectorization_factor): Handle
        pattern def statement.
        (vect_transform_loop): Likewise.
        * tree-vect-patterns.c (vect_vect_recog_func_ptrs): Add new
        function vect_recog_over_widening_pattern ().
        (vect_operation_fits_smaller_type): New function.
        (vect_recog_over_widening_pattern, vect_mark_pattern_stmts):
        Likewise.
        (vect_pattern_recog_1): Move the code that marks pattern
        statements to vect_mark_pattern_stmts (), and call it.  Update
        documentation.
        * tree-vect-stmts.c (vect_supportable_shift): New function.
        (vect_analyze_stmt): Handle pattern def statement.
        (new_stmt_vec_info): Initialize pattern def statement.

From-SVN: r177409
parent 6394830f
2011-08-04 Ira Rosen <ira.rosen@linaro.org>
* tree-vectorizer.h (struct _stmt_vec_info): Add new field for
pattern def statement, and its access macro.
(NUM_PATTERNS): Set to 5.
* tree-vect-loop.c (vect_determine_vectorization_factor): Handle
pattern def statement.
(vect_transform_loop): Likewise.
* tree-vect-patterns.c (vect_vect_recog_func_ptrs): Add new
function vect_recog_over_widening_pattern ().
(vect_operation_fits_smaller_type): New function.
(vect_recog_over_widening_pattern, vect_mark_pattern_stmts):
Likewise.
(vect_pattern_recog_1): Move the code that marks pattern
statements to vect_mark_pattern_stmts (), and call it. Update
documentation.
* tree-vect-stmts.c (vect_supportable_shift): New function.
(vect_analyze_stmt): Handle pattern def statement.
(new_stmt_vec_info): Initialize pattern def statement.
2011-08-04 Richard Henderson <rth@redhat.com> 2011-08-04 Richard Henderson <rth@redhat.com>
PR target/49964 PR target/49964
......
2011-08-04 Ira Rosen <ira.rosen@linaro.org>
* gcc.dg/vect/vect-over-widen-1.c: New test.
* gcc.dg/vect/vect-over-widen-2.c: New test.
* gcc.dg/vect/vect-over-widen-3.c: New test.
* gcc.dg/vect/vect-over-widen-4.c: New test.
2011-08-04 Richard Guenther <rguenther@suse.de> 2011-08-04 Richard Guenther <rguenther@suse.de>
PR fortran/49957 PR fortran/49957
......
/* { dg-require-effective-target vect_int } */
/* { dg-require-effective-target vect_shift } */
#include <stdlib.h>
#include <stdarg.h>
#include "tree-vect.h"
#define N 64
/* Modified rgb to rgb conversion from FFmpeg. */
__attribute__ ((noinline)) void
foo (unsigned char *src, unsigned char *dst)
{
unsigned char *s = src;
unsigned short *d = (unsigned short *)dst;
int i;
for (i = 0; i < N/4; i++)
{
const int b = *s++;
const int g = *s++;
const int r = *s++;
const int a = *s++;
*d = ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5));
d++;
}
s = src;
d = (unsigned short *)dst;
for (i = 0; i < N/4; i++)
{
const int b = *s++;
const int g = *s++;
const int r = *s++;
const int a = *s++;
if (*d != ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5)))
abort ();
d++;
}
}
int main (void)
{
int i;
unsigned char in[N], out[N];
check_vect ();
for (i = 0; i < N; i++)
{
in[i] = i;
out[i] = 255;
__asm__ volatile ("");
}
foo (in, out);
return 0;
}
/* { dg-final { scan-tree-dump-times "vect_recog_over_widening_pattern: detected" 4 "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
/* { dg-require-effective-target vect_shift } */
#include <stdlib.h>
#include <stdarg.h>
#include "tree-vect.h"
#define N 64
/* Modified rgb to rgb conversion from FFmpeg. */
__attribute__ ((noinline)) void
foo (unsigned char *src, unsigned char *dst)
{
unsigned char *s = src;
int *d = (int *)dst;
int i;
for (i = 0; i < N/4; i++)
{
const int b = *s++;
const int g = *s++;
const int r = *s++;
const int a = *s++;
*d = ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5));
d++;
}
s = src;
d = (int *)dst;
for (i = 0; i < N/4; i++)
{
const int b = *s++;
const int g = *s++;
const int r = *s++;
const int a = *s++;
if (*d != ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5)))
abort ();
d++;
}
}
int main (void)
{
int i;
unsigned char in[N], out[N];
check_vect ();
for (i = 0; i < N; i++)
{
in[i] = i;
out[i] = 255;
__asm__ volatile ("");
}
foo (in, out);
return 0;
}
/* Final value stays in int, so no over-widening is detected at the moment. */
/* { dg-final { scan-tree-dump-times "vect_recog_over_widening_pattern: detected" 0 "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
/* { dg-require-effective-target vect_shift } */
#include <stdlib.h>
#include <stdarg.h>
#include "tree-vect.h"
#define N 64
/* Modified rgb to rgb conversion from FFmpeg. */
__attribute__ ((noinline)) void
foo (unsigned char *src, unsigned char *dst)
{
unsigned char *s = src;
unsigned short *d = (unsigned short *)dst;
int i;
for (i = 0; i < N/4; i++)
{
const int b = *s++;
const int g = *s++;
const int r = *s++;
const int a = *s++;
*d = ((b>>3) | ((g&0xFFC)<<3) | ((r+0xF8)>>8) | (a<<9));
d++;
}
s = src;
d = (unsigned short *)dst;
for (i = 0; i < N/4; i++)
{
const int b = *s++;
const int g = *s++;
const int r = *s++;
const int a = *s++;
if (*d != ((b>>3) | ((g&0xFFC)<<3) | ((r+0xF8)>>8) | (a<<9)))
abort ();
d++;
}
}
int main (void)
{
int i;
unsigned char in[N], out[N];
check_vect ();
for (i = 0; i < N; i++)
{
in[i] = i;
out[i] = 255;
__asm__ volatile ("");
}
foo (in, out);
return 0;
}
/* { dg-final { scan-tree-dump-times "vect_recog_over_widening_pattern: detected" 1 "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
/* { dg-require-effective-target vect_shift } */
#include <stdlib.h>
#include <stdarg.h>
#include "tree-vect.h"
#define N 64
/* Modified rgb to rgb conversion from FFmpeg. */
__attribute__ ((noinline)) int
foo (unsigned char *src, unsigned char *dst)
{
unsigned char *s = src;
unsigned short *d = (unsigned short *)dst, res;
int i, result = 0;
for (i = 0; i < N/4; i++)
{
const int b = *s++;
const int g = *s++;
const int r = *s++;
const int a = *s++;
res = ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5));
*d = res;
result += res;
d++;
}
s = src;
d = (unsigned short *)dst;
for (i = 0; i < N/4; i++)
{
const int b = *s++;
const int g = *s++;
const int r = *s++;
const int a = *s++;
if (*d != ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5)))
abort ();
d++;
}
return result;
}
int main (void)
{
int i;
unsigned char in[N], out[N];
check_vect ();
for (i = 0; i < N; i++)
{
in[i] = i;
out[i] = 255;
__asm__ volatile ("");
}
foo (in, out);
return 0;
}
/* { dg-final { scan-tree-dump-times "vect_recog_over_widening_pattern: detected" 4 "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -181,8 +181,8 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo) ...@@ -181,8 +181,8 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
stmt_vec_info stmt_info; stmt_vec_info stmt_info;
int i; int i;
HOST_WIDE_INT dummy; HOST_WIDE_INT dummy;
gimple stmt, pattern_stmt = NULL; gimple stmt, pattern_stmt = NULL, pattern_def_stmt = NULL;
bool analyze_pattern_stmt = false; bool analyze_pattern_stmt = false, pattern_def = false;
if (vect_print_dump_info (REPORT_DETAILS)) if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "=== vect_determine_vectorization_factor ==="); fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
...@@ -296,6 +296,29 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo) ...@@ -296,6 +296,29 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
|| STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
analyze_pattern_stmt = true; analyze_pattern_stmt = true;
/* If a pattern statement has a def stmt, analyze it too. */
if (is_pattern_stmt_p (stmt_info)
&& (pattern_def_stmt = STMT_VINFO_PATTERN_DEF_STMT (stmt_info))
&& (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
|| STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt))))
{
if (pattern_def)
pattern_def = false;
else
{
if (vect_print_dump_info (REPORT_DETAILS))
{
fprintf (vect_dump, "==> examining pattern def stmt: ");
print_gimple_stmt (vect_dump, pattern_def_stmt, 0,
TDF_SLIM);
}
pattern_def = true;
stmt = pattern_def_stmt;
stmt_info = vinfo_for_stmt (stmt);
}
}
if (gimple_get_lhs (stmt) == NULL_TREE) if (gimple_get_lhs (stmt) == NULL_TREE)
{ {
if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
...@@ -400,7 +423,7 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo) ...@@ -400,7 +423,7 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
|| (nunits > vectorization_factor)) || (nunits > vectorization_factor))
vectorization_factor = nunits; vectorization_factor = nunits;
if (!analyze_pattern_stmt) if (!analyze_pattern_stmt && !pattern_def)
gsi_next (&si); gsi_next (&si);
} }
} }
...@@ -5085,8 +5108,8 @@ vect_transform_loop (loop_vec_info loop_vinfo) ...@@ -5085,8 +5108,8 @@ vect_transform_loop (loop_vec_info loop_vinfo)
tree cond_expr = NULL_TREE; tree cond_expr = NULL_TREE;
gimple_seq cond_expr_stmt_list = NULL; gimple_seq cond_expr_stmt_list = NULL;
bool do_peeling_for_loop_bound; bool do_peeling_for_loop_bound;
gimple stmt, pattern_stmt; gimple stmt, pattern_stmt, pattern_def_stmt;
bool transform_pattern_stmt = false; bool transform_pattern_stmt = false, pattern_def = false;
if (vect_print_dump_info (REPORT_DETAILS)) if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "=== vec_transform_loop ==="); fprintf (vect_dump, "=== vec_transform_loop ===");
...@@ -5230,6 +5253,30 @@ vect_transform_loop (loop_vec_info loop_vinfo) ...@@ -5230,6 +5253,30 @@ vect_transform_loop (loop_vec_info loop_vinfo)
|| STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
transform_pattern_stmt = true; transform_pattern_stmt = true;
/* If pattern statement has a def stmt, vectorize it too. */
if (is_pattern_stmt_p (stmt_info)
&& (pattern_def_stmt = STMT_VINFO_PATTERN_DEF_STMT (stmt_info))
&& (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
|| STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt))))
{
if (pattern_def)
pattern_def = false;
else
{
if (vect_print_dump_info (REPORT_DETAILS))
{
fprintf (vect_dump, "==> vectorizing pattern def"
" stmt: ");
print_gimple_stmt (vect_dump, pattern_def_stmt, 0,
TDF_SLIM);
}
pattern_def = true;
stmt = pattern_def_stmt;
stmt_info = vinfo_for_stmt (stmt);
}
}
gcc_assert (STMT_VINFO_VECTYPE (stmt_info)); gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
nunits = (unsigned int) TYPE_VECTOR_SUBPARTS ( nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (
STMT_VINFO_VECTYPE (stmt_info)); STMT_VINFO_VECTYPE (stmt_info));
...@@ -5257,7 +5304,7 @@ vect_transform_loop (loop_vec_info loop_vinfo) ...@@ -5257,7 +5304,7 @@ vect_transform_loop (loop_vec_info loop_vinfo)
/* Hybrid SLP stmts must be vectorized in addition to SLP. */ /* Hybrid SLP stmts must be vectorized in addition to SLP. */
if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info)) if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info))
{ {
if (!transform_pattern_stmt) if (!transform_pattern_stmt && !pattern_def)
gsi_next (&si); gsi_next (&si);
continue; continue;
} }
...@@ -5289,7 +5336,7 @@ vect_transform_loop (loop_vec_info loop_vinfo) ...@@ -5289,7 +5336,7 @@ vect_transform_loop (loop_vec_info loop_vinfo)
} }
} }
if (!transform_pattern_stmt) if (!transform_pattern_stmt && !pattern_def)
gsi_next (&si); gsi_next (&si);
} /* stmts in BB */ } /* stmts in BB */
} /* BBs in loop */ } /* BBs in loop */
......
...@@ -2212,6 +2212,42 @@ vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi, ...@@ -2212,6 +2212,42 @@ vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
} }
/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
either as shift by a scalar or by a vector. */
bool
vect_supportable_shift (enum tree_code code, tree scalar_type)
{
enum machine_mode vec_mode;
optab optab;
int icode;
tree vectype;
vectype = get_vectype_for_scalar_type (scalar_type);
if (!vectype)
return false;
optab = optab_for_tree_code (code, vectype, optab_scalar);
if (!optab
|| optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
{
optab = optab_for_tree_code (code, vectype, optab_vector);
if (!optab
|| (optab_handler (optab, TYPE_MODE (vectype))
== CODE_FOR_nothing))
return false;
}
vec_mode = TYPE_MODE (vectype);
icode = (int) optab_handler (optab, vec_mode);
if (icode == CODE_FOR_nothing)
return false;
return true;
}
/* Function vectorizable_shift. /* Function vectorizable_shift.
Check if STMT performs a shift operation that can be vectorized. Check if STMT performs a shift operation that can be vectorized.
...@@ -4890,7 +4926,7 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node) ...@@ -4890,7 +4926,7 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info); enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
bool ok; bool ok;
tree scalar_type, vectype; tree scalar_type, vectype;
gimple pattern_stmt; gimple pattern_stmt, pattern_def_stmt;
if (vect_print_dump_info (REPORT_DETAILS)) if (vect_print_dump_info (REPORT_DETAILS))
{ {
...@@ -4960,6 +4996,23 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node) ...@@ -4960,6 +4996,23 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
return false; return false;
} }
if (is_pattern_stmt_p (stmt_info)
&& (pattern_def_stmt = STMT_VINFO_PATTERN_DEF_STMT (stmt_info))
&& (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
|| STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt))))
{
/* Analyze def stmt of STMT if it's a pattern stmt. */
if (vect_print_dump_info (REPORT_DETAILS))
{
fprintf (vect_dump, "==> examining pattern def statement: ");
print_gimple_stmt (vect_dump, pattern_def_stmt, 0, TDF_SLIM);
}
if (!vect_analyze_stmt (pattern_def_stmt, need_to_vectorize, node))
return false;
}
switch (STMT_VINFO_DEF_TYPE (stmt_info)) switch (STMT_VINFO_DEF_TYPE (stmt_info))
{ {
case vect_internal_def: case vect_internal_def:
...@@ -5280,6 +5333,7 @@ new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo, ...@@ -5280,6 +5333,7 @@ new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
STMT_VINFO_VECTORIZABLE (res) = true; STMT_VINFO_VECTORIZABLE (res) = true;
STMT_VINFO_IN_PATTERN_P (res) = false; STMT_VINFO_IN_PATTERN_P (res) = false;
STMT_VINFO_RELATED_STMT (res) = NULL; STMT_VINFO_RELATED_STMT (res) = NULL;
STMT_VINFO_PATTERN_DEF_STMT (res) = NULL;
STMT_VINFO_DATA_REF (res) = NULL; STMT_VINFO_DATA_REF (res) = NULL;
STMT_VINFO_DR_BASE_ADDRESS (res) = NULL; STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
......
...@@ -469,6 +469,9 @@ typedef struct _stmt_vec_info { ...@@ -469,6 +469,9 @@ typedef struct _stmt_vec_info {
pattern). */ pattern). */
gimple related_stmt; gimple related_stmt;
/* Used to keep a def stmt of a pattern stmt if such exists. */
gimple pattern_def_stmt;
/* List of datarefs that are known to have the same alignment as the dataref /* List of datarefs that are known to have the same alignment as the dataref
of this stmt. */ of this stmt. */
VEC(dr_p,heap) *same_align_refs; VEC(dr_p,heap) *same_align_refs;
...@@ -536,6 +539,7 @@ typedef struct _stmt_vec_info { ...@@ -536,6 +539,7 @@ typedef struct _stmt_vec_info {
#define STMT_VINFO_IN_PATTERN_P(S) (S)->in_pattern_p #define STMT_VINFO_IN_PATTERN_P(S) (S)->in_pattern_p
#define STMT_VINFO_RELATED_STMT(S) (S)->related_stmt #define STMT_VINFO_RELATED_STMT(S) (S)->related_stmt
#define STMT_VINFO_PATTERN_DEF_STMT(S) (S)->pattern_def_stmt
#define STMT_VINFO_SAME_ALIGN_REFS(S) (S)->same_align_refs #define STMT_VINFO_SAME_ALIGN_REFS(S) (S)->same_align_refs
#define STMT_VINFO_DEF_TYPE(S) (S)->def_type #define STMT_VINFO_DEF_TYPE(S) (S)->def_type
#define STMT_VINFO_GROUP_FIRST_ELEMENT(S) (S)->first_element #define STMT_VINFO_GROUP_FIRST_ELEMENT(S) (S)->first_element
...@@ -819,6 +823,7 @@ extern bool vectorizable_condition (gimple, gimple_stmt_iterator *, gimple *, ...@@ -819,6 +823,7 @@ extern bool vectorizable_condition (gimple, gimple_stmt_iterator *, gimple *,
extern void vect_get_load_cost (struct data_reference *, int, bool, extern void vect_get_load_cost (struct data_reference *, int, bool,
unsigned int *, unsigned int *); unsigned int *, unsigned int *);
extern void vect_get_store_cost (struct data_reference *, int, unsigned int *); extern void vect_get_store_cost (struct data_reference *, int, unsigned int *);
extern bool vect_supportable_shift (enum tree_code, tree);
/* In tree-vect-data-refs.c. */ /* In tree-vect-data-refs.c. */
extern bool vect_can_force_dr_alignment_p (const_tree, unsigned int); extern bool vect_can_force_dr_alignment_p (const_tree, unsigned int);
...@@ -897,7 +902,7 @@ extern void vect_slp_transform_bb (basic_block); ...@@ -897,7 +902,7 @@ extern void vect_slp_transform_bb (basic_block);
Additional pattern recognition functions can (and will) be added Additional pattern recognition functions can (and will) be added
in the future. */ in the future. */
typedef gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *); typedef gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *);
#define NUM_PATTERNS 4 #define NUM_PATTERNS 5
void vect_pattern_recog (loop_vec_info); void vect_pattern_recog (loop_vec_info);
/* In tree-vectorizer.c. */ /* In tree-vectorizer.c. */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment