Commit 1107f3ae by Ira Rosen Committed by Ira Rosen

tree-vectorizer.h (struct _stmt_vec_info): Add new field for pattern def…

tree-vectorizer.h (struct _stmt_vec_info): Add new field for pattern def statement, and its access macro.


        * tree-vectorizer.h (struct _stmt_vec_info): Add new field for
        pattern def statement, and its access macro.
        (NUM_PATTERNS): Set to 5.
        * tree-vect-loop.c (vect_determine_vectorization_factor): Handle
        pattern def statement.
        (vect_transform_loop): Likewise.
        * tree-vect-patterns.c (vect_vect_recog_func_ptrs): Add new
        function vect_recog_over_widening_pattern ().
        (vect_operation_fits_smaller_type): New function.
        (vect_recog_over_widening_pattern, vect_mark_pattern_stmts):
        Likewise.
        (vect_pattern_recog_1): Move the code that marks pattern
        statements to vect_mark_pattern_stmts (), and call it.  Update
        documentation.
        * tree-vect-stmts.c (vect_supportable_shift): New function.
        (vect_analyze_stmt): Handle pattern def statement.
        (new_stmt_vec_info): Initialize pattern def statement.

From-SVN: r177409
parent 6394830f
2011-08-04 Ira Rosen <ira.rosen@linaro.org>
* tree-vectorizer.h (struct _stmt_vec_info): Add new field for
pattern def statement, and its access macro.
(NUM_PATTERNS): Set to 5.
* tree-vect-loop.c (vect_determine_vectorization_factor): Handle
pattern def statement.
(vect_transform_loop): Likewise.
* tree-vect-patterns.c (vect_vect_recog_func_ptrs): Add new
function vect_recog_over_widening_pattern ().
(vect_operation_fits_smaller_type): New function.
(vect_recog_over_widening_pattern, vect_mark_pattern_stmts):
Likewise.
(vect_pattern_recog_1): Move the code that marks pattern
statements to vect_mark_pattern_stmts (), and call it. Update
documentation.
* tree-vect-stmts.c (vect_supportable_shift): New function.
(vect_analyze_stmt): Handle pattern def statement.
(new_stmt_vec_info): Initialize pattern def statement.
2011-08-04 Richard Henderson <rth@redhat.com> 2011-08-04 Richard Henderson <rth@redhat.com>
PR target/49964 PR target/49964
......
2011-08-04 Ira Rosen <ira.rosen@linaro.org>
* gcc.dg/vect/vect-over-widen-1.c: New test.
* gcc.dg/vect/vect-over-widen-2.c: New test.
* gcc.dg/vect/vect-over-widen-3.c: New test.
* gcc.dg/vect/vect-over-widen-4.c: New test.
2011-08-04 Richard Guenther <rguenther@suse.de> 2011-08-04 Richard Guenther <rguenther@suse.de>
PR fortran/49957 PR fortran/49957
......
/* { dg-require-effective-target vect_int } */
/* { dg-require-effective-target vect_shift } */
#include <stdlib.h>
#include <stdarg.h>
#include "tree-vect.h"
#define N 64
/* Modified rgb to rgb conversion from FFmpeg. */
__attribute__ ((noinline)) void
foo (unsigned char *src, unsigned char *dst)
{
unsigned char *s = src;
unsigned short *d = (unsigned short *)dst;
int i;
for (i = 0; i < N/4; i++)
{
const int b = *s++;
const int g = *s++;
const int r = *s++;
const int a = *s++;
*d = ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5));
d++;
}
s = src;
d = (unsigned short *)dst;
for (i = 0; i < N/4; i++)
{
const int b = *s++;
const int g = *s++;
const int r = *s++;
const int a = *s++;
if (*d != ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5)))
abort ();
d++;
}
}
int main (void)
{
int i;
unsigned char in[N], out[N];
check_vect ();
for (i = 0; i < N; i++)
{
in[i] = i;
out[i] = 255;
__asm__ volatile ("");
}
foo (in, out);
return 0;
}
/* { dg-final { scan-tree-dump-times "vect_recog_over_widening_pattern: detected" 4 "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
/* { dg-require-effective-target vect_shift } */
#include <stdlib.h>
#include <stdarg.h>
#include "tree-vect.h"
#define N 64
/* Modified rgb to rgb conversion from FFmpeg. */
__attribute__ ((noinline)) void
foo (unsigned char *src, unsigned char *dst)
{
unsigned char *s = src;
int *d = (int *)dst;
int i;
for (i = 0; i < N/4; i++)
{
const int b = *s++;
const int g = *s++;
const int r = *s++;
const int a = *s++;
*d = ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5));
d++;
}
s = src;
d = (int *)dst;
for (i = 0; i < N/4; i++)
{
const int b = *s++;
const int g = *s++;
const int r = *s++;
const int a = *s++;
if (*d != ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5)))
abort ();
d++;
}
}
int main (void)
{
int i;
unsigned char in[N], out[N];
check_vect ();
for (i = 0; i < N; i++)
{
in[i] = i;
out[i] = 255;
__asm__ volatile ("");
}
foo (in, out);
return 0;
}
/* Final value stays in int, so no over-widening is detected at the moment. */
/* { dg-final { scan-tree-dump-times "vect_recog_over_widening_pattern: detected" 0 "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
/* { dg-require-effective-target vect_shift } */
#include <stdlib.h>
#include <stdarg.h>
#include "tree-vect.h"
#define N 64
/* Modified rgb to rgb conversion from FFmpeg. */
__attribute__ ((noinline)) void
foo (unsigned char *src, unsigned char *dst)
{
unsigned char *s = src;
unsigned short *d = (unsigned short *)dst;
int i;
for (i = 0; i < N/4; i++)
{
const int b = *s++;
const int g = *s++;
const int r = *s++;
const int a = *s++;
*d = ((b>>3) | ((g&0xFFC)<<3) | ((r+0xF8)>>8) | (a<<9));
d++;
}
s = src;
d = (unsigned short *)dst;
for (i = 0; i < N/4; i++)
{
const int b = *s++;
const int g = *s++;
const int r = *s++;
const int a = *s++;
if (*d != ((b>>3) | ((g&0xFFC)<<3) | ((r+0xF8)>>8) | (a<<9)))
abort ();
d++;
}
}
int main (void)
{
int i;
unsigned char in[N], out[N];
check_vect ();
for (i = 0; i < N; i++)
{
in[i] = i;
out[i] = 255;
__asm__ volatile ("");
}
foo (in, out);
return 0;
}
/* { dg-final { scan-tree-dump-times "vect_recog_over_widening_pattern: detected" 1 "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-require-effective-target vect_int } */
/* { dg-require-effective-target vect_shift } */
#include <stdlib.h>
#include <stdarg.h>
#include "tree-vect.h"
#define N 64
/* Modified rgb to rgb conversion from FFmpeg. */
__attribute__ ((noinline)) int
foo (unsigned char *src, unsigned char *dst)
{
unsigned char *s = src;
unsigned short *d = (unsigned short *)dst, res;
int i, result = 0;
for (i = 0; i < N/4; i++)
{
const int b = *s++;
const int g = *s++;
const int r = *s++;
const int a = *s++;
res = ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5));
*d = res;
result += res;
d++;
}
s = src;
d = (unsigned short *)dst;
for (i = 0; i < N/4; i++)
{
const int b = *s++;
const int g = *s++;
const int r = *s++;
const int a = *s++;
if (*d != ((b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8) | (a>>5)))
abort ();
d++;
}
return result;
}
int main (void)
{
int i;
unsigned char in[N], out[N];
check_vect ();
for (i = 0; i < N; i++)
{
in[i] = i;
out[i] = 255;
__asm__ volatile ("");
}
foo (in, out);
return 0;
}
/* { dg-final { scan-tree-dump-times "vect_recog_over_widening_pattern: detected" 4 "vect" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -181,8 +181,8 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo) ...@@ -181,8 +181,8 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
stmt_vec_info stmt_info; stmt_vec_info stmt_info;
int i; int i;
HOST_WIDE_INT dummy; HOST_WIDE_INT dummy;
gimple stmt, pattern_stmt = NULL; gimple stmt, pattern_stmt = NULL, pattern_def_stmt = NULL;
bool analyze_pattern_stmt = false; bool analyze_pattern_stmt = false, pattern_def = false;
if (vect_print_dump_info (REPORT_DETAILS)) if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "=== vect_determine_vectorization_factor ==="); fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
...@@ -296,6 +296,29 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo) ...@@ -296,6 +296,29 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
|| STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
analyze_pattern_stmt = true; analyze_pattern_stmt = true;
/* If a pattern statement has a def stmt, analyze it too. */
if (is_pattern_stmt_p (stmt_info)
&& (pattern_def_stmt = STMT_VINFO_PATTERN_DEF_STMT (stmt_info))
&& (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
|| STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt))))
{
if (pattern_def)
pattern_def = false;
else
{
if (vect_print_dump_info (REPORT_DETAILS))
{
fprintf (vect_dump, "==> examining pattern def stmt: ");
print_gimple_stmt (vect_dump, pattern_def_stmt, 0,
TDF_SLIM);
}
pattern_def = true;
stmt = pattern_def_stmt;
stmt_info = vinfo_for_stmt (stmt);
}
}
if (gimple_get_lhs (stmt) == NULL_TREE) if (gimple_get_lhs (stmt) == NULL_TREE)
{ {
if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS)) if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
...@@ -400,7 +423,7 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo) ...@@ -400,7 +423,7 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
|| (nunits > vectorization_factor)) || (nunits > vectorization_factor))
vectorization_factor = nunits; vectorization_factor = nunits;
if (!analyze_pattern_stmt) if (!analyze_pattern_stmt && !pattern_def)
gsi_next (&si); gsi_next (&si);
} }
} }
...@@ -5085,8 +5108,8 @@ vect_transform_loop (loop_vec_info loop_vinfo) ...@@ -5085,8 +5108,8 @@ vect_transform_loop (loop_vec_info loop_vinfo)
tree cond_expr = NULL_TREE; tree cond_expr = NULL_TREE;
gimple_seq cond_expr_stmt_list = NULL; gimple_seq cond_expr_stmt_list = NULL;
bool do_peeling_for_loop_bound; bool do_peeling_for_loop_bound;
gimple stmt, pattern_stmt; gimple stmt, pattern_stmt, pattern_def_stmt;
bool transform_pattern_stmt = false; bool transform_pattern_stmt = false, pattern_def = false;
if (vect_print_dump_info (REPORT_DETAILS)) if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "=== vec_transform_loop ==="); fprintf (vect_dump, "=== vec_transform_loop ===");
...@@ -5230,6 +5253,30 @@ vect_transform_loop (loop_vec_info loop_vinfo) ...@@ -5230,6 +5253,30 @@ vect_transform_loop (loop_vec_info loop_vinfo)
|| STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
transform_pattern_stmt = true; transform_pattern_stmt = true;
/* If pattern statement has a def stmt, vectorize it too. */
if (is_pattern_stmt_p (stmt_info)
&& (pattern_def_stmt = STMT_VINFO_PATTERN_DEF_STMT (stmt_info))
&& (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
|| STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt))))
{
if (pattern_def)
pattern_def = false;
else
{
if (vect_print_dump_info (REPORT_DETAILS))
{
fprintf (vect_dump, "==> vectorizing pattern def"
" stmt: ");
print_gimple_stmt (vect_dump, pattern_def_stmt, 0,
TDF_SLIM);
}
pattern_def = true;
stmt = pattern_def_stmt;
stmt_info = vinfo_for_stmt (stmt);
}
}
gcc_assert (STMT_VINFO_VECTYPE (stmt_info)); gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
nunits = (unsigned int) TYPE_VECTOR_SUBPARTS ( nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (
STMT_VINFO_VECTYPE (stmt_info)); STMT_VINFO_VECTYPE (stmt_info));
...@@ -5257,7 +5304,7 @@ vect_transform_loop (loop_vec_info loop_vinfo) ...@@ -5257,7 +5304,7 @@ vect_transform_loop (loop_vec_info loop_vinfo)
/* Hybrid SLP stmts must be vectorized in addition to SLP. */ /* Hybrid SLP stmts must be vectorized in addition to SLP. */
if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info)) if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info))
{ {
if (!transform_pattern_stmt) if (!transform_pattern_stmt && !pattern_def)
gsi_next (&si); gsi_next (&si);
continue; continue;
} }
...@@ -5289,7 +5336,7 @@ vect_transform_loop (loop_vec_info loop_vinfo) ...@@ -5289,7 +5336,7 @@ vect_transform_loop (loop_vec_info loop_vinfo)
} }
} }
if (!transform_pattern_stmt) if (!transform_pattern_stmt && !pattern_def)
gsi_next (&si); gsi_next (&si);
} /* stmts in BB */ } /* stmts in BB */
} /* BBs in loop */ } /* BBs in loop */
......
...@@ -47,11 +47,14 @@ static gimple vect_recog_widen_mult_pattern (VEC (gimple, heap) **, tree *, ...@@ -47,11 +47,14 @@ static gimple vect_recog_widen_mult_pattern (VEC (gimple, heap) **, tree *,
static gimple vect_recog_dot_prod_pattern (VEC (gimple, heap) **, tree *, static gimple vect_recog_dot_prod_pattern (VEC (gimple, heap) **, tree *,
tree *); tree *);
static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *); static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *);
static gimple vect_recog_over_widening_pattern (VEC (gimple, heap) **, tree *,
tree *);
static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = { static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
vect_recog_widen_mult_pattern, vect_recog_widen_mult_pattern,
vect_recog_widen_sum_pattern, vect_recog_widen_sum_pattern,
vect_recog_dot_prod_pattern, vect_recog_dot_prod_pattern,
vect_recog_pow_pattern}; vect_recog_pow_pattern,
vect_recog_over_widening_pattern};
/* Function widened_name_p /* Function widened_name_p
...@@ -827,6 +830,419 @@ vect_recog_widen_sum_pattern (VEC (gimple, heap) **stmts, tree *type_in, ...@@ -827,6 +830,419 @@ vect_recog_widen_sum_pattern (VEC (gimple, heap) **stmts, tree *type_in,
} }
/* Return TRUE if the operation in STMT can be performed on a smaller type.
Input:
STMT - a statement to check.
DEF - we support operations with two operands, one of which is constant.
The other operand can be defined by a demotion operation, or by a
previous statement in a sequence of over-promoted operations. In the
later case DEF is used to replace that operand. (It is defined by a
pattern statement we created for the previous statement in the
sequence).
Input/output:
NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
NULL, it's the type of DEF.
STMTS - additional pattern statements. If a pattern statement (type
conversion) is created in this function, its original statement is
added to STMTS.
Output:
OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
operands to use in the new pattern statement for STMT (will be created
in vect_recog_over_widening_pattern ()).
NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
statements for STMT: the first one is a type promotion and the second
one is the operation itself. We return the type promotion statement
in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_STMT of
the second pattern statement. */
static bool
vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
tree *op0, tree *op1, gimple *new_def_stmt,
VEC (gimple, heap) **stmts)
{
enum tree_code code;
tree const_oprnd, oprnd;
tree interm_type = NULL_TREE, half_type, tmp, new_oprnd, type;
gimple def_stmt, new_stmt;
bool first = false;
*new_def_stmt = NULL;
if (!is_gimple_assign (stmt))
return false;
code = gimple_assign_rhs_code (stmt);
if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
&& code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
return false;
oprnd = gimple_assign_rhs1 (stmt);
const_oprnd = gimple_assign_rhs2 (stmt);
type = gimple_expr_type (stmt);
if (TREE_CODE (oprnd) != SSA_NAME
|| TREE_CODE (const_oprnd) != INTEGER_CST)
return false;
/* If we are in the middle of a sequence, we use DEF from a previous
statement. Otherwise, OPRND has to be a result of type promotion. */
if (*new_type)
{
half_type = *new_type;
oprnd = def;
}
else
{
first = true;
if (!widened_name_p (oprnd, stmt, &half_type, &def_stmt, false))
return false;
}
/* Can we perform the operation on a smaller type? */
switch (code)
{
case BIT_IOR_EXPR:
case BIT_XOR_EXPR:
case BIT_AND_EXPR:
if (!int_fits_type_p (const_oprnd, half_type))
{
/* HALF_TYPE is not enough. Try a bigger type if possible. */
if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
return false;
interm_type = build_nonstandard_integer_type (
TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
if (!int_fits_type_p (const_oprnd, interm_type))
return false;
}
break;
case LSHIFT_EXPR:
/* Try intermediate type - HALF_TYPE is not enough for sure. */
if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
return false;
/* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
(e.g., if the original value was char, the shift amount is at most 8
if we want to use short). */
if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
return false;
interm_type = build_nonstandard_integer_type (
TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
if (!vect_supportable_shift (code, interm_type))
return false;
break;
case RSHIFT_EXPR:
if (vect_supportable_shift (code, half_type))
break;
/* Try intermediate type - HALF_TYPE is not supported. */
if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
return false;
interm_type = build_nonstandard_integer_type (
TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
if (!vect_supportable_shift (code, interm_type))
return false;
break;
default:
gcc_unreachable ();
}
/* There are four possible cases:
1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
the first statement in the sequence)
a. The original, HALF_TYPE, is not enough - we replace the promotion
from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
promotion.
2. OPRND is defined by a pattern statement we created.
a. Its type is not sufficient for the operation, we create a new stmt:
a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
this statement in NEW_DEF_STMT, and it is later put in
STMT_VINFO_PATTERN_DEF_STMT of the pattern statement for STMT.
b. OPRND is good to use in the new statement. */
if (first)
{
if (interm_type)
{
/* Replace the original type conversion HALF_TYPE->TYPE with
HALF_TYPE->INTERM_TYPE. */
if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
{
new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
/* Check if the already created pattern stmt is what we need. */
if (!is_gimple_assign (new_stmt)
|| gimple_assign_rhs_code (new_stmt) != NOP_EXPR
|| TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
return false;
oprnd = gimple_assign_lhs (new_stmt);
}
else
{
/* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
oprnd = gimple_assign_rhs1 (def_stmt);
tmp = create_tmp_reg (interm_type, NULL);
add_referenced_var (tmp);
new_oprnd = make_ssa_name (tmp, NULL);
new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
oprnd, NULL_TREE);
SSA_NAME_DEF_STMT (new_oprnd) = new_stmt;
STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
VEC_safe_push (gimple, heap, *stmts, def_stmt);
oprnd = new_oprnd;
}
}
else
{
/* Retrieve the operand before the type promotion. */
oprnd = gimple_assign_rhs1 (def_stmt);
}
}
else
{
if (interm_type)
{
/* Create a type conversion HALF_TYPE->INTERM_TYPE. */
tmp = create_tmp_reg (interm_type, NULL);
add_referenced_var (tmp);
new_oprnd = make_ssa_name (tmp, NULL);
new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
oprnd, NULL_TREE);
SSA_NAME_DEF_STMT (new_oprnd) = new_stmt;
oprnd = new_oprnd;
*new_def_stmt = new_stmt;
}
/* Otherwise, OPRND is already set. */
}
if (interm_type)
*new_type = interm_type;
else
*new_type = half_type;
*op0 = oprnd;
*op1 = fold_convert (*new_type, const_oprnd);
return true;
}
/* Try to find a statement or a sequence of statements that can be performed
on a smaller type:
type x_t;
TYPE x_T, res0_T, res1_T;
loop:
S1 x_t = *p;
S2 x_T = (TYPE) x_t;
S3 res0_T = op (x_T, C0);
S4 res1_T = op (res0_T, C1);
S5 ... = () res1_T; - type demotion
where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
constants.
Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
be 'type' or some intermediate type. For now, we expect S5 to be a type
demotion operation. We also check that S3 and S4 have only one use.
.
*/
static gimple
vect_recog_over_widening_pattern (VEC (gimple, heap) **stmts,
tree *type_in, tree *type_out)
{
gimple stmt = VEC_pop (gimple, *stmts);
gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
tree op0, op1, vectype = NULL_TREE, lhs, use_lhs, use_type;
imm_use_iterator imm_iter;
use_operand_p use_p;
int nuses = 0;
tree var = NULL_TREE, new_type = NULL_TREE, tmp, new_oprnd;
bool first;
struct loop *loop = (gimple_bb (stmt))->loop_father;
first = true;
while (1)
{
if (!vinfo_for_stmt (stmt)
|| STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
return NULL;
new_def_stmt = NULL;
if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
&op0, &op1, &new_def_stmt,
stmts))
{
if (first)
return NULL;
else
break;
}
/* STMT can be performed on a smaller type. Check its uses. */
lhs = gimple_assign_lhs (stmt);
nuses = 0;
FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
{
if (is_gimple_debug (USE_STMT (use_p)))
continue;
use_stmt = USE_STMT (use_p);
nuses++;
}
if (nuses != 1 || !is_gimple_assign (use_stmt)
|| !gimple_bb (use_stmt)
|| !flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
return NULL;
/* Create pattern statement for STMT. */
vectype = get_vectype_for_scalar_type (new_type);
if (!vectype)
return NULL;
/* We want to collect all the statements for which we create pattern
statetments, except for the case when the last statement in the
sequence doesn't have a corresponding pattern statement. In such
case we associate the last pattern statement with the last statement
in the sequence. Therefore, we only add an original statetement to
the list if we know that it is not the last. */
if (prev_stmt)
VEC_safe_push (gimple, heap, *stmts, prev_stmt);
var = vect_recog_temp_ssa_var (new_type, NULL);
pattern_stmt = gimple_build_assign_with_ops (
gimple_assign_rhs_code (stmt), var, op0, op1);
SSA_NAME_DEF_STMT (var) = pattern_stmt;
STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (stmt)) = new_def_stmt;
if (vect_print_dump_info (REPORT_DETAILS))
{
fprintf (vect_dump, "created pattern stmt: ");
print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
}
prev_stmt = stmt;
stmt = use_stmt;
first = false;
}
/* We got a sequence. We expect it to end with a type demotion operation.
Otherwise, we quit (for now). There are three possible cases: the
conversion is to NEW_TYPE (we don't do anything), the conversion is to
a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
NEW_TYPE differs (we create a new conversion statement). */
if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
{
use_lhs = gimple_assign_lhs (use_stmt);
use_type = TREE_TYPE (use_lhs);
/* Support only type promotion or signedess change. */
if (!INTEGRAL_TYPE_P (use_type)
|| TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
return NULL;
if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
|| TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
{
/* Create NEW_TYPE->USE_TYPE conversion. */
tmp = create_tmp_reg (use_type, NULL);
add_referenced_var (tmp);
new_oprnd = make_ssa_name (tmp, NULL);
pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
var, NULL_TREE);
SSA_NAME_DEF_STMT (new_oprnd) = pattern_stmt;
STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
*type_in = get_vectype_for_scalar_type (new_type);
*type_out = get_vectype_for_scalar_type (use_type);
/* We created a pattern statement for the last statement in the
sequence, so we don't need to associate it with the pattern
statement created for PREV_STMT. Therefore, we add PREV_STMT
to the list in order to mark it later in vect_pattern_recog_1. */
if (prev_stmt)
VEC_safe_push (gimple, heap, *stmts, prev_stmt);
}
else
{
if (prev_stmt)
STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (use_stmt))
= STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (prev_stmt));
*type_in = vectype;
*type_out = NULL_TREE;
}
VEC_safe_push (gimple, heap, *stmts, use_stmt);
}
else
/* TODO: support general case, create a conversion to the correct type. */
return NULL;
/* Pattern detected. */
if (vect_print_dump_info (REPORT_DETAILS))
{
fprintf (vect_dump, "vect_recog_over_widening_pattern: detected: ");
print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
}
return pattern_stmt;
}
/* Mark statements that are involved in a pattern. */
static inline void
vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
tree pattern_vectype)
{
stmt_vec_info pattern_stmt_info, def_stmt_info;
stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
gimple def_stmt;
set_vinfo_for_stmt (pattern_stmt,
new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL));
gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
STMT_VINFO_DEF_TYPE (pattern_stmt_info)
= STMT_VINFO_DEF_TYPE (orig_stmt_info);
STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info)
= STMT_VINFO_PATTERN_DEF_STMT (orig_stmt_info);
if (STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info))
{
def_stmt = STMT_VINFO_PATTERN_DEF_STMT (pattern_stmt_info);
set_vinfo_for_stmt (def_stmt,
new_stmt_vec_info (def_stmt, loop_vinfo, NULL));
gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
def_stmt_info = vinfo_for_stmt (def_stmt);
STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
STMT_VINFO_DEF_TYPE (def_stmt_info)
= STMT_VINFO_DEF_TYPE (orig_stmt_info);
STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
}
}
/* Function vect_pattern_recog_1 /* Function vect_pattern_recog_1
Input: Input:
...@@ -856,7 +1272,6 @@ vect_pattern_recog_1 ( ...@@ -856,7 +1272,6 @@ vect_pattern_recog_1 (
{ {
gimple stmt = gsi_stmt (si), pattern_stmt; gimple stmt = gsi_stmt (si), pattern_stmt;
stmt_vec_info stmt_info; stmt_vec_info stmt_info;
stmt_vec_info pattern_stmt_info;
loop_vec_info loop_vinfo; loop_vec_info loop_vinfo;
tree pattern_vectype; tree pattern_vectype;
tree type_in, type_out; tree type_in, type_out;
...@@ -924,16 +1339,7 @@ vect_pattern_recog_1 ( ...@@ -924,16 +1339,7 @@ vect_pattern_recog_1 (
} }
/* Mark the stmts that are involved in the pattern. */ /* Mark the stmts that are involved in the pattern. */
set_vinfo_for_stmt (pattern_stmt, vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL));
gimple_set_bb (pattern_stmt, gimple_bb (stmt));
pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
STMT_VINFO_RELATED_STMT (pattern_stmt_info) = stmt;
STMT_VINFO_DEF_TYPE (pattern_stmt_info) = STMT_VINFO_DEF_TYPE (stmt_info);
STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
STMT_VINFO_IN_PATTERN_P (stmt_info) = true;
STMT_VINFO_RELATED_STMT (stmt_info) = pattern_stmt;
/* Patterns cannot be vectorized using SLP, because they change the order of /* Patterns cannot be vectorized using SLP, because they change the order of
computation. */ computation. */
...@@ -941,9 +1347,9 @@ vect_pattern_recog_1 ( ...@@ -941,9 +1347,9 @@ vect_pattern_recog_1 (
if (next == stmt) if (next == stmt)
VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i); VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i);
/* In case of widen-mult by a constant, it is possible that an additional /* It is possible that additional pattern stmts are created and inserted in
pattern stmt is created and inserted in STMTS_TO_REPLACE. We create a STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
stmt_info for it, and mark the relevant statements. */ relevant statements. */
for (i = 0; VEC_iterate (gimple, stmts_to_replace, i, stmt) for (i = 0; VEC_iterate (gimple, stmts_to_replace, i, stmt)
&& (unsigned) i < (VEC_length (gimple, stmts_to_replace) - 1); && (unsigned) i < (VEC_length (gimple, stmts_to_replace) - 1);
i++) i++)
...@@ -956,16 +1362,7 @@ vect_pattern_recog_1 ( ...@@ -956,16 +1362,7 @@ vect_pattern_recog_1 (
print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM); print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
} }
set_vinfo_for_stmt (pattern_stmt, vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
new_stmt_vec_info (pattern_stmt, loop_vinfo, NULL));
gimple_set_bb (pattern_stmt, gimple_bb (stmt));
pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
STMT_VINFO_RELATED_STMT (pattern_stmt_info) = stmt;
STMT_VINFO_DEF_TYPE (pattern_stmt_info)
= STMT_VINFO_DEF_TYPE (stmt_info);
STMT_VINFO_VECTYPE (pattern_stmt_info) = STMT_VINFO_VECTYPE (stmt_info);
STMT_VINFO_IN_PATTERN_P (stmt_info) = true;
} }
VEC_free (gimple, heap, stmts_to_replace); VEC_free (gimple, heap, stmts_to_replace);
......
...@@ -2212,6 +2212,42 @@ vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi, ...@@ -2212,6 +2212,42 @@ vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
} }
/* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
either as shift by a scalar or by a vector. */
bool
vect_supportable_shift (enum tree_code code, tree scalar_type)
{
enum machine_mode vec_mode;
optab optab;
int icode;
tree vectype;
vectype = get_vectype_for_scalar_type (scalar_type);
if (!vectype)
return false;
optab = optab_for_tree_code (code, vectype, optab_scalar);
if (!optab
|| optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
{
optab = optab_for_tree_code (code, vectype, optab_vector);
if (!optab
|| (optab_handler (optab, TYPE_MODE (vectype))
== CODE_FOR_nothing))
return false;
}
vec_mode = TYPE_MODE (vectype);
icode = (int) optab_handler (optab, vec_mode);
if (icode == CODE_FOR_nothing)
return false;
return true;
}
/* Function vectorizable_shift. /* Function vectorizable_shift.
Check if STMT performs a shift operation that can be vectorized. Check if STMT performs a shift operation that can be vectorized.
...@@ -4890,7 +4926,7 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node) ...@@ -4890,7 +4926,7 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info); enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
bool ok; bool ok;
tree scalar_type, vectype; tree scalar_type, vectype;
gimple pattern_stmt; gimple pattern_stmt, pattern_def_stmt;
if (vect_print_dump_info (REPORT_DETAILS)) if (vect_print_dump_info (REPORT_DETAILS))
{ {
...@@ -4960,6 +4996,23 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node) ...@@ -4960,6 +4996,23 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
return false; return false;
} }
if (is_pattern_stmt_p (stmt_info)
&& (pattern_def_stmt = STMT_VINFO_PATTERN_DEF_STMT (stmt_info))
&& (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
|| STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt))))
{
/* Analyze def stmt of STMT if it's a pattern stmt. */
if (vect_print_dump_info (REPORT_DETAILS))
{
fprintf (vect_dump, "==> examining pattern def statement: ");
print_gimple_stmt (vect_dump, pattern_def_stmt, 0, TDF_SLIM);
}
if (!vect_analyze_stmt (pattern_def_stmt, need_to_vectorize, node))
return false;
}
switch (STMT_VINFO_DEF_TYPE (stmt_info)) switch (STMT_VINFO_DEF_TYPE (stmt_info))
{ {
case vect_internal_def: case vect_internal_def:
...@@ -5280,6 +5333,7 @@ new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo, ...@@ -5280,6 +5333,7 @@ new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
STMT_VINFO_VECTORIZABLE (res) = true; STMT_VINFO_VECTORIZABLE (res) = true;
STMT_VINFO_IN_PATTERN_P (res) = false; STMT_VINFO_IN_PATTERN_P (res) = false;
STMT_VINFO_RELATED_STMT (res) = NULL; STMT_VINFO_RELATED_STMT (res) = NULL;
STMT_VINFO_PATTERN_DEF_STMT (res) = NULL;
STMT_VINFO_DATA_REF (res) = NULL; STMT_VINFO_DATA_REF (res) = NULL;
STMT_VINFO_DR_BASE_ADDRESS (res) = NULL; STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
......
...@@ -469,6 +469,9 @@ typedef struct _stmt_vec_info { ...@@ -469,6 +469,9 @@ typedef struct _stmt_vec_info {
pattern). */ pattern). */
gimple related_stmt; gimple related_stmt;
/* Used to keep a def stmt of a pattern stmt if such exists. */
gimple pattern_def_stmt;
/* List of datarefs that are known to have the same alignment as the dataref /* List of datarefs that are known to have the same alignment as the dataref
of this stmt. */ of this stmt. */
VEC(dr_p,heap) *same_align_refs; VEC(dr_p,heap) *same_align_refs;
...@@ -536,6 +539,7 @@ typedef struct _stmt_vec_info { ...@@ -536,6 +539,7 @@ typedef struct _stmt_vec_info {
#define STMT_VINFO_IN_PATTERN_P(S) (S)->in_pattern_p #define STMT_VINFO_IN_PATTERN_P(S) (S)->in_pattern_p
#define STMT_VINFO_RELATED_STMT(S) (S)->related_stmt #define STMT_VINFO_RELATED_STMT(S) (S)->related_stmt
#define STMT_VINFO_PATTERN_DEF_STMT(S) (S)->pattern_def_stmt
#define STMT_VINFO_SAME_ALIGN_REFS(S) (S)->same_align_refs #define STMT_VINFO_SAME_ALIGN_REFS(S) (S)->same_align_refs
#define STMT_VINFO_DEF_TYPE(S) (S)->def_type #define STMT_VINFO_DEF_TYPE(S) (S)->def_type
#define STMT_VINFO_GROUP_FIRST_ELEMENT(S) (S)->first_element #define STMT_VINFO_GROUP_FIRST_ELEMENT(S) (S)->first_element
...@@ -819,6 +823,7 @@ extern bool vectorizable_condition (gimple, gimple_stmt_iterator *, gimple *, ...@@ -819,6 +823,7 @@ extern bool vectorizable_condition (gimple, gimple_stmt_iterator *, gimple *,
extern void vect_get_load_cost (struct data_reference *, int, bool, extern void vect_get_load_cost (struct data_reference *, int, bool,
unsigned int *, unsigned int *); unsigned int *, unsigned int *);
extern void vect_get_store_cost (struct data_reference *, int, unsigned int *); extern void vect_get_store_cost (struct data_reference *, int, unsigned int *);
extern bool vect_supportable_shift (enum tree_code, tree);
/* In tree-vect-data-refs.c. */ /* In tree-vect-data-refs.c. */
extern bool vect_can_force_dr_alignment_p (const_tree, unsigned int); extern bool vect_can_force_dr_alignment_p (const_tree, unsigned int);
...@@ -897,7 +902,7 @@ extern void vect_slp_transform_bb (basic_block); ...@@ -897,7 +902,7 @@ extern void vect_slp_transform_bb (basic_block);
Additional pattern recognition functions can (and will) be added Additional pattern recognition functions can (and will) be added
in the future. */ in the future. */
typedef gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *); typedef gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *);
#define NUM_PATTERNS 4 #define NUM_PATTERNS 5
void vect_pattern_recog (loop_vec_info); void vect_pattern_recog (loop_vec_info);
/* In tree-vectorizer.c. */ /* In tree-vectorizer.c. */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment