Commit 1107f3ae by Ira Rosen Committed by Ira Rosen

tree-vectorizer.h (struct _stmt_vec_info): Add new field for pattern def…

tree-vectorizer.h (struct _stmt_vec_info): Add new field for pattern def statement, and its access macro.


        * tree-vectorizer.h (struct _stmt_vec_info): Add new field for
        pattern def statement, and its access macro.
        (NUM_PATTERNS): Set to 5.
        * tree-vect-loop.c (vect_determine_vectorization_factor): Handle
        pattern def statement.
        (vect_transform_loop): Likewise.
        * tree-vect-patterns.c (vect_vect_recog_func_ptrs): Add new
        function vect_recog_over_widening_pattern ().
        (vect_operation_fits_smaller_type): New function.
        (vect_recog_over_widening_pattern, vect_mark_pattern_stmts):
        Likewise.
        (vect_pattern_recog_1): Move the code that marks pattern
        statements to vect_mark_pattern_stmts (), and call it.  Update
        documentation.
        * tree-vect-stmts.c (vect_supportable_shift): New function.
        (vect_analyze_stmt): Handle pattern def statement.
        (new_stmt_vec_info): Initialize pattern def statement.

From-SVN: r177409
parent 6394830f
2011-08-04 Ira Rosen <ira.rosen@linaro.org>
* tree-vectorizer.h (struct _stmt_vec_info): Add new field for
pattern def statement, and its access macro.
(NUM_PATTERNS): Set to 5.
* tree-vect-loop.c (vect_determine_vectorization_factor): Handle
pattern def statement.
(vect_transform_loop): Likewise.
* tree-vect-patterns.c (vect_vect_recog_func_ptrs): Add new
function vect_recog_over_widening_pattern ().
(vect_operation_fits_smaller_type): New function.
(vect_recog_over_widening_pattern, vect_mark_pattern_stmts):
Likewise.
(vect_pattern_recog_1): Move the code that marks pattern
statements to vect_mark_pattern_stmts (), and call it. Update
documentation.
* tree-vect-stmts.c (vect_supportable_shift): New function.
(vect_analyze_stmt): Handle pattern def statement.
(new_stmt_vec_info): Initialize pattern def statement.
2011-08-04 Richard Henderson <rth@redhat.com>
PR target/49964
......
2011-08-04 Ira Rosen <ira.rosen@linaro.org>
* gcc.dg/vect/vect-over-widen-1.c: New test.
* gcc.dg/vect/vect-over-widen-2.c: New test.
* gcc.dg/vect/vect-over-widen-3.c: New test.
* gcc.dg/vect/vect-over-widen-4.c: New test.
2011-08-04 Richard Guenther <rguenther@suse.de>
PR fortran/49957
......
/* { dg-require-effective-target vect_int } */
/* { dg-require-effective-target vect_shift } */
#include <stdlib.h>
#include <stdarg.h>
#include "tree-vect.h"
#define N 64
/* Modified rgb to rgb conversion from FFmpeg. */
__attribute__ ((noinline)) void
foo (unsigned char *src, unsigned char *dst)
{
unsigned char *s = src;
unsigned short *d = (unsigned short *)dst;
int i;
for (i = 0; i < N/4; i++)
{
const int b = *s++;
const int g = *s++;
const int r = *s++;
const int a = *s++;
*d = ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5));
d++;
}
s = src;
d = (unsigned short *)dst;
for (i = 0; i < N/4; i++)
{
const int b = *s++;
const int g = *s++;
const int r = *s++;
const int a = *s++;
if (*d != ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5)))
abort ();
d++;
}
}
int main (void)
{
int i;
unsigned char in[N], out[N];
check_vect ();
for (i = 0; i < N; i++)
{
in[i] = i;
out[i] = 255;
__asm__ volatile ("");
}
foo (in, out);
return 0;
}
/* { dg-final { scan-tree-dump-times "vect_recog_over_widening_pattern: detected" 4 "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
/* { dg-require-effective-target vect_shift } */
#include <stdlib.h>
#include <stdarg.h>
#include "tree-vect.h"
#define N 64
/* Modified rgb to rgb conversion from FFmpeg. */
__attribute__ ((noinline)) void
foo (unsigned char *src, unsigned char *dst)
{
unsigned char *s = src;
int *d = (int *)dst;
int i;
for (i = 0; i < N/4; i++)
{
const int b = *s++;
const int g = *s++;
const int r = *s++;
const int a = *s++;
*d = ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5));
d++;
}
s = src;
d = (int *)dst;
for (i = 0; i < N/4; i++)
{
const int b = *s++;
const int g = *s++;
const int r = *s++;
const int a = *s++;
if (*d != ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5)))
abort ();
d++;
}
}
int main (void)
{
int i;
unsigned char in[N], out[N];
check_vect ();
for (i = 0; i < N; i++)
{
in[i] = i;
out[i] = 255;
__asm__ volatile ("");
}
foo (in, out);
return 0;
}
/* Final value stays in int, so no over-widening is detected at the moment. */
/* { dg-final { scan-tree-dump-times "vect_recog_over_widening_pattern: detected" 0 "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
/* { dg-require-effective-target vect_shift } */
#include <stdlib.h>
#include <stdarg.h>
#include "tree-vect.h"
#define N 64
/* Modified rgb to rgb conversion from FFmpeg. */
__attribute__ ((noinline)) void
foo (unsigned char *src, unsigned char *dst)
{
unsigned char *s = src;
unsigned short *d = (unsigned short *)dst;
int i;
for (i = 0; i < N/4; i++)
{
const int b = *s++;
const int g = *s++;
const int r = *s++;
const int a = *s++;
*d = ((b>>3) | ((g&0xFFC)<<3) | ((r+0xF8)>>8) | (a<<9));
d++;
}
s = src;
d = (unsigned short *)dst;
for (i = 0; i < N/4; i++)
{
const int b = *s++;
const int g = *s++;
const int r = *s++;
const int a = *s++;
if (*d != ((b>>3) | ((g&0xFFC)<<3) | ((r+0xF8)>>8) | (a<<9)))
abort ();
d++;
}
}
int main (void)
{
int i;
unsigned char in[N], out[N];
check_vect ();
for (i = 0; i < N; i++)
{
in[i] = i;
out[i] = 255;
__asm__ volatile ("");
}
foo (in, out);
return 0;
}
/* { dg-final { scan-tree-dump-times "vect_recog_over_widening_pattern: detected" 1 "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
/* { dg-require-effective-target vect_shift } */
#include <stdlib.h>
#include <stdarg.h>
#include "tree-vect.h"
#define N 64
/* Modified rgb to rgb conversion from FFmpeg. */
__attribute__ ((noinline)) int
foo (unsigned char *src, unsigned char *dst)
{
unsigned char *s = src;
unsigned short *d = (unsigned short *)dst, res;
int i, result = 0;
for (i = 0; i < N/4; i++)
{
const int b = *s++;
const int g = *s++;
const int r = *s++;
const int a = *s++;
res = ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5));
*d = res;
result += res;
d++;
}
s = src;
d = (unsigned short *)dst;
for (i = 0; i < N/4; i++)
{
const int b = *s++;
const int g = *s++;
const int r = *s++;
const int a = *s++;
if (*d != ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5)))
abort ();
d++;
}
return result;
}
int main (void)
{
int i;
unsigned char in[N], out[N];
check_vect ();
for (i = 0; i < N; i++)
{
in[i] = i;
out[i] = 255;
__asm__ volatile ("");
}
foo (in, out);
return 0;
}
/* { dg-final { scan-tree-dump-times "vect_recog_over_widening_pattern: detected" 4 "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
......@@ -181,8 +181,8 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
stmt_vec_info stmt_info;
int i;
HOST_WIDE_INT dummy;
gimple stmt, pattern_stmt = NULL;
bool analyze_pattern_stmt = false;
gimple stmt, pattern_stmt = NULL, pattern_def_stmt = NULL;
bool analyze_pattern_stmt = false, pattern_def = false;
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
......@@ -296,6 +296,29 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
|| STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
analyze_pattern_stmt = true;
/* If a pattern statement has a def stmt, analyze it too. */
if (is_pattern_stmt_p (stmt_info)
&& (pattern_def_stmt = STMT_VINFO_PATTERN_DEF_STMT (stmt_info))
&& (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
|| STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt))))
{
if (pattern_def)
pattern_def = false;
else
{
if (vect_print_dump_info (REPORT_DETAILS))
{
fprintf (vect_dump, "==> examining pattern def stmt: ");
print_gimple_stmt (vect_dump, pattern_def_stmt, 0,
TDF_SLIM);
}
pattern_def = true;
stmt = pattern_def_stmt;
stmt_info = vinfo_for_stmt (stmt);
}
}
if (gimple_get_lhs (stmt) == NULL_TREE)
{
if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
......@@ -400,7 +423,7 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
|| (nunits > vectorization_factor))
vectorization_factor = nunits;
if (!analyze_pattern_stmt)
if (!analyze_pattern_stmt && !pattern_def)
gsi_next (&si);
}
}
......@@ -5085,8 +5108,8 @@ vect_transform_loop (loop_vec_info loop_vinfo)
tree cond_expr = NULL_TREE;
gimple_seq cond_expr_stmt_list = NULL;
bool do_peeling_for_loop_bound;
gimple stmt, pattern_stmt;
bool transform_pattern_stmt = false;
gimple stmt, pattern_stmt, pattern_def_stmt;
bool transform_pattern_stmt = false, pattern_def = false;
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "=== vec_transform_loop ===");
......@@ -5230,6 +5253,30 @@ vect_transform_loop (loop_vec_info loop_vinfo)
|| STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
transform_pattern_stmt = true;
/* If pattern statement has a def stmt, vectorize it too. */
if (is_pattern_stmt_p (stmt_info)
&& (pattern_def_stmt = STMT_VINFO_PATTERN_DEF_STMT (stmt_info))
&& (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
|| STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt))))
{
if (pattern_def)
pattern_def = false;
else
{
if (vect_print_dump_info (REPORT_DETAILS))
{
fprintf (vect_dump, "==> vectorizing pattern def"
" stmt: ");
print_gimple_stmt (vect_dump, pattern_def_stmt, 0,
TDF_SLIM);
}
pattern_def = true;
stmt = pattern_def_stmt;
stmt_info = vinfo_for_stmt (stmt);
}
}
gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (
STMT_VINFO_VECTYPE (stmt_info));
......@@ -5257,7 +5304,7 @@ vect_transform_loop (loop_vec_info loop_vinfo)
/* Hybrid SLP stmts must be vectorized in addition to SLP. */
if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info))
{
if (!transform_pattern_stmt)
if (!transform_pattern_stmt && !pattern_def)
gsi_next (&si);
continue;
}
......@@ -5289,7 +5336,7 @@ vect_transform_loop (loop_vec_info loop_vinfo)
}
}
if (!transform_pattern_stmt)
if (!transform_pattern_stmt && !pattern_def)
gsi_next (&si);
} /* stmts in BB */
} /* BBs in loop */
......
......@@ -2212,6 +2212,42 @@ vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
}
/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
either as shift by a scalar or by a vector. */
bool
vect_supportable_shift (enum tree_code code, tree scalar_type)
{
enum machine_mode vec_mode;
optab optab;
int icode;
tree vectype;
vectype = get_vectype_for_scalar_type (scalar_type);
if (!vectype)
return false;
optab = optab_for_tree_code (code, vectype, optab_scalar);
if (!optab
|| optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
{
optab = optab_for_tree_code (code, vectype, optab_vector);
if (!optab
|| (optab_handler (optab, TYPE_MODE (vectype))
== CODE_FOR_nothing))
return false;
}
vec_mode = TYPE_MODE (vectype);
icode = (int) optab_handler (optab, vec_mode);
if (icode == CODE_FOR_nothing)
return false;
return true;
}
/* Function vectorizable_shift.
Check if STMT performs a shift operation that can be vectorized.
......@@ -4890,7 +4926,7 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
bool ok;
tree scalar_type, vectype;
gimple pattern_stmt;
gimple pattern_stmt, pattern_def_stmt;
if (vect_print_dump_info (REPORT_DETAILS))
{
......@@ -4960,6 +4996,23 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
return false;
}
if (is_pattern_stmt_p (stmt_info)
&& (pattern_def_stmt = STMT_VINFO_PATTERN_DEF_STMT (stmt_info))
&& (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
|| STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt))))
{
/* Analyze def stmt of STMT if it's a pattern stmt. */
if (vect_print_dump_info (REPORT_DETAILS))
{
fprintf (vect_dump, "==> examining pattern def statement: ");
print_gimple_stmt (vect_dump, pattern_def_stmt, 0, TDF_SLIM);
}
if (!vect_analyze_stmt (pattern_def_stmt, need_to_vectorize, node))
return false;
}
switch (STMT_VINFO_DEF_TYPE (stmt_info))
{
case vect_internal_def:
......@@ -5280,6 +5333,7 @@ new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
STMT_VINFO_VECTORIZABLE (res) = true;
STMT_VINFO_IN_PATTERN_P (res) = false;
STMT_VINFO_RELATED_STMT (res) = NULL;
STMT_VINFO_PATTERN_DEF_STMT (res) = NULL;
STMT_VINFO_DATA_REF (res) = NULL;
STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
......
......@@ -469,6 +469,9 @@ typedef struct _stmt_vec_info {
pattern). */
gimple related_stmt;
/* Used to keep a def stmt of a pattern stmt if such exists. */
gimple pattern_def_stmt;
/* List of datarefs that are known to have the same alignment as the dataref
of this stmt. */
VEC(dr_p,heap) *same_align_refs;
......@@ -536,6 +539,7 @@ typedef struct _stmt_vec_info {
#define STMT_VINFO_IN_PATTERN_P(S) (S)->in_pattern_p
#define STMT_VINFO_RELATED_STMT(S) (S)->related_stmt
#define STMT_VINFO_PATTERN_DEF_STMT(S) (S)->pattern_def_stmt
#define STMT_VINFO_SAME_ALIGN_REFS(S) (S)->same_align_refs
#define STMT_VINFO_DEF_TYPE(S) (S)->def_type
#define STMT_VINFO_GROUP_FIRST_ELEMENT(S) (S)->first_element
......@@ -819,6 +823,7 @@ extern bool vectorizable_condition (gimple, gimple_stmt_iterator *, gimple *,
extern void vect_get_load_cost (struct data_reference *, int, bool,
unsigned int *, unsigned int *);
extern void vect_get_store_cost (struct data_reference *, int, unsigned int *);
extern bool vect_supportable_shift (enum tree_code, tree);
/* In tree-vect-data-refs.c. */
extern bool vect_can_force_dr_alignment_p (const_tree, unsigned int);
......@@ -897,7 +902,7 @@ extern void vect_slp_transform_bb (basic_block);
Additional pattern recognition functions can (and will) be added
in the future. */
typedef gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *);
#define NUM_PATTERNS 4
#define NUM_PATTERNS 5
void vect_pattern_recog (loop_vec_info);
/* In tree-vectorizer.c. */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment