Commit 079c527f by Jakub Jelinek Committed by Jakub Jelinek

re PR tree-optimization/51581 (Integer division by constant is not vectorized)

	PR tree-optimization/51581
	* expr.h (choose_multiplier): New prototype.
	* expmed.c (choose_multiplier): No longer static.
	Change multiplier_ptr from rtx * to UHWI *.
	(expand_divmod): Adjust callers.
	* tree-vect-patterns.c (vect_recog_sdivmod_pow2_pattern):
	Renamed to...
	(vect_recog_divmod_pattern): ... this.  Pass bb_vinfo as last
	argument to new_stmt_vec_info.  Attempt to optimize also divisions
	by non-pow2 constants if integer vector division isn't supported.
	* tree-vect-stmts.c (vect_analyze_stmt): If node != NULL,
	don't look at pattern stmts and sequences.

	* gcc.c-torture/execute/pr51581-1.c: New test.
	* gcc.c-torture/execute/pr51581-2.c: New test.
	* gcc.dg/vect/pr51581-1.c: New test.
	* gcc.dg/vect/pr51581-2.c: New test.
	* gcc.dg/vect/pr51581-3.c: New test.
	* gcc.target/i386/avx-pr51581-1.c: New test.
	* gcc.target/i386/avx-pr51581-2.c: New test.
	* gcc.target/i386/avx2-pr51581-1.c: New test.
	* gcc.target/i386/avx2-pr51581-2.c: New test.
	* gcc.dg/vect/slp-26.c (main1): Divide by 0x8031 instead of 3.

From-SVN: r188656
parent c55224dc
2012-06-15 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/51581
* expr.h (choose_multiplier): New prototype.
* expmed.c (choose_multiplier): No longer static.
Change multiplier_ptr from rtx * to UHWI *.
(expand_divmod): Adjust callers.
* tree-vect-patterns.c (vect_recog_sdivmod_pow2_pattern):
Renamed to...
(vect_recog_divmod_pattern): ... this. Pass bb_vinfo as last
argument to new_stmt_vec_info. Attempt to optimize also divisions
by non-pow2 constants if integer vector division isn't supported.
* tree-vect-stmts.c (vect_analyze_stmt): If node != NULL,
don't look at pattern stmts and sequences.
2012-06-15 Eric Botcazou <ebotcazou@adacore.com>
PR middle-end/53590
......
......@@ -2363,8 +2363,6 @@ static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
struct algorithm *, enum mult_variant *, int);
static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
const struct algorithm *, enum mult_variant);
static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
int, rtx *, int *, int *);
static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
static rtx extract_high_half (enum machine_mode, rtx);
static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
......@@ -3293,10 +3291,10 @@ ceil_log2 (unsigned HOST_WIDE_INT x)
Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
static
unsigned HOST_WIDE_INT
choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
unsigned HOST_WIDE_INT *multiplier_ptr,
int *post_shift_ptr, int *lgup_ptr)
{
HOST_WIDE_INT mhigh_hi, mlow_hi;
unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
......@@ -3368,12 +3366,12 @@ choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
if (n < HOST_BITS_PER_WIDE_INT)
{
unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
*multiplier_ptr = GEN_INT (mhigh_lo & mask);
*multiplier_ptr = mhigh_lo & mask;
return mhigh_lo >= mask;
}
else
{
*multiplier_ptr = GEN_INT (mhigh_lo);
*multiplier_ptr = mhigh_lo;
return mhigh_hi;
}
}
......@@ -4053,10 +4051,9 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
{
if (unsignedp)
{
unsigned HOST_WIDE_INT mh;
unsigned HOST_WIDE_INT mh, ml;
int pre_shift, post_shift;
int dummy;
rtx ml;
unsigned HOST_WIDE_INT d = (INTVAL (op1)
& GET_MODE_MASK (compute_mode));
......@@ -4118,7 +4115,8 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
= (shift_cost[speed][compute_mode][post_shift - 1]
+ shift_cost[speed][compute_mode][1]
+ 2 * add_cost[speed][compute_mode]);
t1 = expand_mult_highpart (compute_mode, op0, ml,
t1 = expand_mult_highpart (compute_mode, op0,
GEN_INT (ml),
NULL_RTX, 1,
max_cost - extra_cost);
if (t1 == 0)
......@@ -4149,7 +4147,8 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
extra_cost
= (shift_cost[speed][compute_mode][pre_shift]
+ shift_cost[speed][compute_mode][post_shift]);
t2 = expand_mult_highpart (compute_mode, t1, ml,
t2 = expand_mult_highpart (compute_mode, t1,
GEN_INT (ml),
NULL_RTX, 1,
max_cost - extra_cost);
if (t2 == 0)
......@@ -4262,8 +4261,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
else if (size <= HOST_BITS_PER_WIDE_INT)
{
choose_multiplier (abs_d, size, size - 1,
&mlr, &post_shift, &lgup);
ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
&ml, &post_shift, &lgup);
if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
{
rtx t1, t2, t3;
......@@ -4275,8 +4273,8 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
extra_cost = (shift_cost[speed][compute_mode][post_shift]
+ shift_cost[speed][compute_mode][size - 1]
+ add_cost[speed][compute_mode]);
t1 = expand_mult_highpart (compute_mode, op0, mlr,
NULL_RTX, 0,
t1 = expand_mult_highpart (compute_mode, op0,
GEN_INT (ml), NULL_RTX, 0,
max_cost - extra_cost);
if (t1 == 0)
goto fail1;
......@@ -4356,10 +4354,9 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
/* We will come here only for signed operations. */
if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
{
unsigned HOST_WIDE_INT mh;
unsigned HOST_WIDE_INT mh, ml;
int pre_shift, lgup, post_shift;
HOST_WIDE_INT d = INTVAL (op1);
rtx ml;
if (d > 0)
{
......@@ -4399,8 +4396,8 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
extra_cost = (shift_cost[speed][compute_mode][post_shift]
+ shift_cost[speed][compute_mode][size - 1]
+ 2 * add_cost[speed][compute_mode]);
t3 = expand_mult_highpart (compute_mode, t2, ml,
NULL_RTX, 1,
t3 = expand_mult_highpart (compute_mode, t2,
GEN_INT (ml), NULL_RTX, 1,
max_cost - extra_cost);
if (t3 != 0)
{
......
......@@ -243,6 +243,13 @@ extern rtx emit_store_flag (rtx, enum rtx_code, rtx, rtx, enum machine_mode,
/* Like emit_store_flag, but always succeeds. */
extern rtx emit_store_flag_force (rtx, enum rtx_code, rtx, rtx,
enum machine_mode, int, int);
/* Choose a minimal N + 1 bit approximation to 1/D that can be used to
replace division by D, and put the least significant N bits of the result
in *MULTIPLIER_PTR and return the most significant bit. */
extern unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
int, unsigned HOST_WIDE_INT *,
int *, int *);
/* Functions from builtins.c: */
extern rtx expand_builtin (tree, rtx, rtx, enum machine_mode, int);
......
2012-06-15 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/51581
* gcc.c-torture/execute/pr51581-1.c: New test.
* gcc.c-torture/execute/pr51581-2.c: New test.
* gcc.dg/vect/pr51581-1.c: New test.
* gcc.dg/vect/pr51581-2.c: New test.
* gcc.dg/vect/pr51581-3.c: New test.
* gcc.target/i386/avx-pr51581-1.c: New test.
* gcc.target/i386/avx-pr51581-2.c: New test.
* gcc.target/i386/avx2-pr51581-1.c: New test.
* gcc.target/i386/avx2-pr51581-2.c: New test.
* gcc.dg/vect/slp-26.c (main1): Divide by 0x8031 instead of 3.
2012-06-15 Richard Guenther <rguenther@suse.de>
* gcc.c-torture/execute/20120615-1.c: New testcase.
......
/* PR tree-optimization/51581 */
extern void abort (void);
#define N 4096
int a[N], c[N];
unsigned int b[N], d[N];
__attribute__((noinline, noclone)) void
f1 (void)
{
int i;
for (i = 0; i < N; i++)
c[i] = a[i] / 3;
}
__attribute__((noinline, noclone)) void
f2 (void)
{
int i;
for (i = 0; i < N; i++)
d[i] = b[i] / 3;
}
__attribute__((noinline, noclone)) void
f3 (void)
{
int i;
for (i = 0; i < N; i++)
c[i] = a[i] / 18;
}
__attribute__((noinline, noclone)) void
f4 (void)
{
int i;
for (i = 0; i < N; i++)
d[i] = b[i] / 18;
}
__attribute__((noinline, noclone)) void
f5 (void)
{
int i;
for (i = 0; i < N; i++)
c[i] = a[i] / 19;
}
__attribute__((noinline, noclone)) void
f6 (void)
{
int i;
for (i = 0; i < N; i++)
d[i] = b[i] / 19;
}
#if __SIZEOF_INT__ == 4 && __SIZEOF_LONG_LONG__ == 8
__attribute__((noinline, noclone)) void
f7 (void)
{
int i;
for (i = 0; i < N; i++)
c[i] = (int) ((unsigned long long) (a[i] * 0x55555556LL) >> 32) - (a[i] >> 31);
}
__attribute__((noinline, noclone)) void
f8 (void)
{
int i;
for (i = 0; i < N; i++)
d[i] = ((unsigned int) ((b[i] * 0xaaaaaaabULL) >> 32) >> 1);
}
__attribute__((noinline, noclone)) void
f9 (void)
{
int i;
for (i = 0; i < N; i++)
c[i] = (((int) ((unsigned long long) (a[i] * 0x38e38e39LL) >> 32)) >> 2) - (a[i] >> 31);
}
__attribute__((noinline, noclone)) void
f10 (void)
{
int i;
for (i = 0; i < N; i++)
d[i] = (unsigned int) ((b[i] * 0x38e38e39ULL) >> 32) >> 2;
}
__attribute__((noinline, noclone)) void
f11 (void)
{
int i;
for (i = 0; i < N; i++)
c[i] = (((int) ((unsigned long long) (a[i] * 0x6bca1af3LL) >> 32)) >> 3) - (a[i] >> 31);
}
__attribute__((noinline, noclone)) void
f12 (void)
{
int i;
for (i = 0; i < N; i++)
{
unsigned int tmp = (b[i] * 0xaf286bcbULL) >> 32;
d[i] = (((b[i] - tmp) >> 1) + tmp) >> 4;
}
}
#endif
int
main ()
{
int i;
for (i = 0; i < N; i++)
{
asm ("");
a[i] = i - N / 2;
b[i] = i;
}
a[0] = -__INT_MAX__ - 1;
a[1] = -__INT_MAX__;
a[N - 1] = __INT_MAX__;
b[N - 1] = ~0;
f1 ();
f2 ();
for (i = 0; i < N; i++)
if (c[i] != a[i] / 3 || d[i] != b[i] / 3)
abort ();
f3 ();
f4 ();
for (i = 0; i < N; i++)
if (c[i] != a[i] / 18 || d[i] != b[i] / 18)
abort ();
f5 ();
f6 ();
for (i = 0; i < N; i++)
if (c[i] != a[i] / 19 || d[i] != b[i] / 19)
abort ();
#if __SIZEOF_INT__ == 4 && __SIZEOF_LONG_LONG__ == 8
f7 ();
f8 ();
for (i = 0; i < N; i++)
if (c[i] != a[i] / 3 || d[i] != b[i] / 3)
abort ();
f9 ();
f10 ();
for (i = 0; i < N; i++)
if (c[i] != a[i] / 18 || d[i] != b[i] / 18)
abort ();
f11 ();
f12 ();
for (i = 0; i < N; i++)
if (c[i] != a[i] / 19 || d[i] != b[i] / 19)
abort ();
#endif
return 0;
}
/* PR tree-optimization/51581 */
extern void abort (void);
#define N 4096
int a[N], c[N];
unsigned int b[N], d[N];
__attribute__((noinline, noclone)) void
f1 (void)
{
int i;
for (i = 0; i < N; i++)
c[i] = a[i] % 3;
}
__attribute__((noinline, noclone)) void
f2 (void)
{
int i;
for (i = 0; i < N; i++)
d[i] = b[i] % 3;
}
__attribute__((noinline, noclone)) void
f3 (void)
{
int i;
for (i = 0; i < N; i++)
c[i] = a[i] % 18;
}
__attribute__((noinline, noclone)) void
f4 (void)
{
int i;
for (i = 0; i < N; i++)
d[i] = b[i] % 18;
}
__attribute__((noinline, noclone)) void
f5 (void)
{
int i;
for (i = 0; i < N; i++)
c[i] = a[i] % 19;
}
__attribute__((noinline, noclone)) void
f6 (void)
{
int i;
for (i = 0; i < N; i++)
d[i] = b[i] % 19;
}
#if __SIZEOF_INT__ == 4 && __SIZEOF_LONG_LONG__ == 8
__attribute__((noinline, noclone)) void
f7 (void)
{
int i;
for (i = 0; i < N; i++)
{
int x = (int) ((unsigned long long) (a[i] * 0x55555556LL) >> 32) - (a[i] >> 31);
c[i] = a[i] - x * 3;
}
}
__attribute__((noinline, noclone)) void
f8 (void)
{
int i;
for (i = 0; i < N; i++)
{
unsigned int x = ((unsigned int) ((b[i] * 0xaaaaaaabULL) >> 32) >> 1);
d[i] = b[i] - x * 3;
}
}
__attribute__((noinline, noclone)) void
f9 (void)
{
int i;
for (i = 0; i < N; i++)
{
int x = (((int) ((unsigned long long) (a[i] * 0x38e38e39LL) >> 32)) >> 2) - (a[i] >> 31);
c[i] = a[i] - x * 18;
}
}
__attribute__((noinline, noclone)) void
f10 (void)
{
int i;
for (i = 0; i < N; i++)
{
unsigned int x = (unsigned int) ((b[i] * 0x38e38e39ULL) >> 32) >> 2;
d[i] = b[i] - x * 18;
}
}
__attribute__((noinline, noclone)) void
f11 (void)
{
int i;
for (i = 0; i < N; i++)
{
int x = (((int) ((unsigned long long) (a[i] * 0x6bca1af3LL) >> 32)) >> 3) - (a[i] >> 31);
c[i] = a[i] - x * 19;
}
}
__attribute__((noinline, noclone)) void
f12 (void)
{
int i;
for (i = 0; i < N; i++)
{
unsigned int tmp = (b[i] * 0xaf286bcbULL) >> 32;
unsigned int x = (((b[i] - tmp) >> 1) + tmp) >> 4;
d[i] = b[i] - x * 19;
}
}
#endif
int
main ()
{
int i;
for (i = 0; i < N; i++)
{
asm ("");
a[i] = i - N / 2;
b[i] = i;
}
a[0] = -__INT_MAX__ - 1;
a[1] = -__INT_MAX__;
a[N - 1] = __INT_MAX__;
b[N - 1] = ~0;
f1 ();
f2 ();
for (i = 0; i < N; i++)
if (c[i] != a[i] % 3 || d[i] != b[i] % 3)
abort ();
f3 ();
f4 ();
for (i = 0; i < N; i++)
if (c[i] != a[i] % 18 || d[i] != b[i] % 18)
abort ();
f5 ();
f6 ();
for (i = 0; i < N; i++)
if (c[i] != a[i] % 19 || d[i] != b[i] % 19)
abort ();
#if __SIZEOF_INT__ == 4 && __SIZEOF_LONG_LONG__ == 8
f7 ();
f8 ();
for (i = 0; i < N; i++)
if (c[i] != a[i] % 3 || d[i] != b[i] % 3)
abort ();
f9 ();
f10 ();
for (i = 0; i < N; i++)
if (c[i] != a[i] % 18 || d[i] != b[i] % 18)
abort ();
f11 ();
f12 ();
for (i = 0; i < N; i++)
if (c[i] != a[i] % 19 || d[i] != b[i] % 19)
abort ();
#endif
return 0;
}
/* PR tree-optimization/51581 */
#include "tree-vect.h"
#define main main1
#include "../../gcc.c-torture/execute/pr51581-1.c"
#undef main
int
main ()
{
int i;
check_vect ();
asm ("");
return main1 ();
}
/* { dg-final { cleanup-tree-dump "vect" } } */
/* PR tree-optimization/51581 */
#include "tree-vect.h"
#define main main1
#include "../../gcc.c-torture/execute/pr51581-2.c"
#undef main
int
main ()
{
int i;
check_vect ();
asm ("");
return main1 ();
}
/* { dg-final { cleanup-tree-dump "vect" } } */
/* PR tree-optimization/51581 */
#include "tree-vect.h"
int a[8], b[8];
unsigned int c[8], d[8];
void
f1 (void)
{
a[0] = b[0] / 8;
a[1] = b[1] / 8;
a[2] = b[2] / 8;
a[3] = b[3] / 8;
a[4] = b[4] / 8;
a[5] = b[5] / 8;
a[6] = b[6] / 8;
a[7] = b[7] / 8;
}
void
f2 (void)
{
c[0] = d[0] / 3;
c[1] = d[1] / 3;
c[2] = d[2] / 3;
c[3] = d[3] / 3;
c[4] = d[4] / 3;
c[5] = d[5] / 3;
c[6] = d[6] / 3;
c[7] = d[7] / 3;
}
void
f3 (void)
{
a[0] = b[0] / 8;
a[1] = b[1] / 4;
a[2] = b[2] / 8;
a[3] = b[3] / 4;
a[4] = b[4] / 8;
a[5] = b[5] / 4;
a[6] = b[6] / 8;
a[7] = b[7] / 4;
}
void
f4 (void)
{
c[0] = d[0] / 3;
c[1] = d[1] / 5;
c[2] = d[2] / 3;
c[3] = d[3] / 5;
c[4] = d[4] / 3;
c[5] = d[5] / 5;
c[6] = d[6] / 3;
c[7] = d[7] / 5;
}
void
f5 (void)
{
a[0] = b[0] / 14;
a[1] = b[1] / 15;
a[2] = b[2] / 14;
a[3] = b[3] / 15;
a[4] = b[4] / 14;
a[5] = b[5] / 15;
a[6] = b[6] / 14;
a[7] = b[7] / 15;
}
void
f6 (void)
{
c[0] = d[0] / 6;
c[1] = d[1] / 5;
c[2] = d[2] / 6;
c[3] = d[3] / 5;
c[4] = d[4] / 6;
c[5] = d[5] / 5;
c[6] = d[6] / 13;
c[7] = d[7] / 5;
}
int
main ()
{
int i;
check_vect ();
asm ("");
for (i = 0; i < 8; i++)
{
asm ("");
b[i] = i - 4;
d[i] = i - 4;
}
f1 ();
f2 ();
for (i = 0; i < 8; i++)
if (a[i] != b[i] / 8 || c[i] != d[i] / 3)
abort ();
f3 ();
f4 ();
for (i = 0; i < 8; i+= 2)
if (a[i] != b[i] / 8 || a[i + 1] != b[i + 1] / 4
|| c[i] != d[i] / 3 || c[i + 1] != d[i + 1] / 5)
abort ();
f5 ();
f6 ();
for (i = 0; i < 8; i+= 2)
if (a[i] != b[i] / 14 || a[i + 1] != b[i + 1] / 15
|| c[i] != d[i] / (i == 6 ? 13 : 6) || c[i + 1] != d[i + 1] / 5)
abort ();
return 0;
}
/* { dg-final { cleanup-tree-dump "vect" } } */
......@@ -10,7 +10,7 @@ main1 ()
{
int i;
unsigned short in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
unsigned short out[N*8], a[N], b[N] = {3,6,9,12,15,18,21,24};
unsigned short out[N*8], a[N], b[N] = {3,0x8031,0x7fff,0x8032,0xffff,0,0x8030,0x8000};
/* Partial SLP is not supported. */
for (i = 0; i < N; i++)
......@@ -20,7 +20,7 @@ main1 ()
out[i*4 + 2] = in[i*4 + 2];
out[i*4 + 3] = in[i*4 + 3];
a[i] = b[i] / 3;
a[i] = b[i] / 0x8031;
}
/* check results: */
......@@ -30,7 +30,7 @@ main1 ()
|| out[i*4 + 1] != in[i*4 + 1]
|| out[i*4 + 2] != in[i*4 + 2]
|| out[i*4 + 3] != in[i*4 + 3]
|| a[i] != b[i] / 3)
|| a[i] != b[i] / 0x8031)
abort ();
}
......
/* PR tree-optimization/51581 */
/* { dg-do run } */
/* { dg-options "-O2 -ftree-vectorize -mavx -fno-vect-cost-model" } */
/* { dg-require-effective-target avx } */
#ifndef CHECK_H
#define CHECK_H "avx-check.h"
#endif
#ifndef TEST
#define TEST avx_test
#endif
#define main main1
#include "../../gcc.c-torture/execute/pr51581-1.c"
#undef main
#include CHECK_H
static void
TEST (void)
{
main1 ();
}
/* PR tree-optimization/51581 */
/* { dg-do run } */
/* { dg-options "-O2 -ftree-vectorize -mavx -fno-vect-cost-model" } */
/* { dg-require-effective-target avx } */
#ifndef CHECK_H
#define CHECK_H "avx-check.h"
#endif
#ifndef TEST
#define TEST avx_test
#endif
#define main main1
#include "../../gcc.c-torture/execute/pr51581-2.c"
#undef main
#include CHECK_H
static void
TEST (void)
{
main1 ();
}
/* PR tree-optimization/51581 */
/* { dg-do run } */
/* { dg-options "-O2 -ftree-vectorize -mavx2 -fno-vect-cost-model" } */
/* { dg-require-effective-target avx2 } */
#define CHECK_H "avx2-check.h"
#define TEST avx2_test
#include "avx-pr51581-1.c"
/* PR tree-optimization/51581 */
/* { dg-do run } */
/* { dg-options "-O2 -ftree-vectorize -mavx2 -fno-vect-cost-model" } */
/* { dg-require-effective-target avx2 } */
#define CHECK_H "avx2-check.h"
#define TEST avx2_test
#include "avx-pr51581-2.c"
......@@ -53,8 +53,8 @@ static gimple vect_recog_widen_shift_pattern (VEC (gimple, heap) **,
tree *, tree *);
static gimple vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **,
tree *, tree *);
static gimple vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **,
tree *, tree *);
static gimple vect_recog_divmod_pattern (VEC (gimple, heap) **,
tree *, tree *);
static gimple vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **,
tree *, tree *);
static gimple vect_recog_bool_pattern (VEC (gimple, heap) **, tree *, tree *);
......@@ -66,7 +66,7 @@ static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
vect_recog_widen_shift_pattern,
vect_recog_over_widening_pattern,
vect_recog_vector_vector_shift_pattern,
vect_recog_sdivmod_pow2_pattern,
vect_recog_divmod_pattern,
vect_recog_mixed_size_cond_pattern,
vect_recog_bool_pattern};
......@@ -1585,29 +1585,30 @@ vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **stmts,
return pattern_stmt;
}
/* Detect a signed division by power of two constant that wouldn't be
/* Detect a signed division by a constant that wouldn't be
otherwise vectorized:
type a_t, b_t;
S1 a_t = b_t / N;
where type 'type' is a signed integral type and N is a constant positive
power of two.
where type 'type' is an integral type and N is a constant.
Similarly handle signed modulo by power of two constant:
Similarly handle modulo by a constant:
S4 a_t = b_t % N;
Input/Output:
* STMTS: Contains a stmt from which the pattern search begins,
i.e. the division stmt. S1 is replaced by:
i.e. the division stmt. S1 is replaced by if N is a power
of two constant and type is signed:
S3 y_t = b_t < 0 ? N - 1 : 0;
S2 x_t = b_t + y_t;
S1' a_t = x_t >> log2 (N);
S4 is replaced by (where *_T temporaries have unsigned type):
S4 is replaced if N is a power of two constant and
type is signed by (where *_T temporaries have unsigned type):
S9 y_T = b_t < 0 ? -1U : 0U;
S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
S7 z_t = (type) z_T;
......@@ -1625,16 +1626,22 @@ vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **stmts,
S1 or modulo S4 stmt. */
static gimple
vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **stmts,
tree *type_in, tree *type_out)
vect_recog_divmod_pattern (VEC (gimple, heap) **stmts,
tree *type_in, tree *type_out)
{
gimple last_stmt = VEC_pop (gimple, *stmts);
tree oprnd0, oprnd1, vectype, itype, cond;
tree oprnd0, oprnd1, vectype, itype, witype, vecwtype, cond;
gimple pattern_stmt, def_stmt;
enum tree_code rhs_code;
stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
optab optab;
tree dummy, q;
enum tree_code dummy_code;
int dummy_int, prec;
VEC (tree, heap) *dummy_vec;
stmt_vec_info def_stmt_vinfo;
if (!is_gimple_assign (last_stmt))
return NULL;
......@@ -1658,10 +1665,7 @@ vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **stmts,
if (TREE_CODE (oprnd0) != SSA_NAME
|| TREE_CODE (oprnd1) != INTEGER_CST
|| TREE_CODE (itype) != INTEGER_TYPE
|| TYPE_UNSIGNED (itype)
|| TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype))
|| !integer_pow2p (oprnd1)
|| tree_int_cst_sgn (oprnd1) != 1)
|| TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
return NULL;
vectype = get_vectype_for_scalar_type (itype);
......@@ -1680,104 +1684,438 @@ vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **stmts,
return NULL;
}
/* Pattern detected. */
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "vect_recog_sdivmod_pow2_pattern: detected: ");
cond = build2 (LT_EXPR, boolean_type_node, oprnd0, build_int_cst (itype, 0));
if (rhs_code == TRUNC_DIV_EXPR)
prec = TYPE_PRECISION (itype);
if (integer_pow2p (oprnd1))
{
tree var = vect_recog_temp_ssa_var (itype, NULL);
def_stmt
= gimple_build_assign_with_ops3 (COND_EXPR, var, cond,
fold_build2 (MINUS_EXPR, itype,
oprnd1,
build_int_cst (itype,
1)),
build_int_cst (itype, 0));
new_pattern_def_seq (stmt_vinfo, def_stmt);
var = vect_recog_temp_ssa_var (itype, NULL);
def_stmt
= gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0,
gimple_assign_lhs (def_stmt));
append_pattern_def_seq (stmt_vinfo, def_stmt);
if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
return NULL;
pattern_stmt
= gimple_build_assign_with_ops (RSHIFT_EXPR,
vect_recog_temp_ssa_var (itype, NULL),
var,
build_int_cst (itype,
tree_log2 (oprnd1)));
/* Pattern detected. */
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "vect_recog_divmod_pattern: detected: ");
cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
build_int_cst (itype, 0));
if (rhs_code == TRUNC_DIV_EXPR)
{
tree var = vect_recog_temp_ssa_var (itype, NULL);
tree shift;
def_stmt
= gimple_build_assign_with_ops3 (COND_EXPR, var, cond,
fold_build2 (MINUS_EXPR, itype,
oprnd1,
build_int_cst (itype,
1)),
build_int_cst (itype, 0));
new_pattern_def_seq (stmt_vinfo, def_stmt);
var = vect_recog_temp_ssa_var (itype, NULL);
def_stmt
= gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0,
gimple_assign_lhs (def_stmt));
append_pattern_def_seq (stmt_vinfo, def_stmt);
shift = build_int_cst (itype, tree_log2 (oprnd1));
pattern_stmt
= gimple_build_assign_with_ops (RSHIFT_EXPR,
vect_recog_temp_ssa_var (itype,
NULL),
var, shift);
}
else
{
tree signmask;
STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
if (compare_tree_int (oprnd1, 2) == 0)
{
signmask = vect_recog_temp_ssa_var (itype, NULL);
def_stmt
= gimple_build_assign_with_ops3 (COND_EXPR, signmask, cond,
build_int_cst (itype, 1),
build_int_cst (itype, 0));
append_pattern_def_seq (stmt_vinfo, def_stmt);
}
else
{
tree utype
= build_nonstandard_integer_type (prec, 1);
tree vecutype = get_vectype_for_scalar_type (utype);
tree shift
= build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
- tree_log2 (oprnd1));
tree var = vect_recog_temp_ssa_var (utype, NULL);
def_stmt
= gimple_build_assign_with_ops3 (COND_EXPR, var, cond,
build_int_cst (utype, -1),
build_int_cst (utype, 0));
def_stmt_vinfo
= new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
append_pattern_def_seq (stmt_vinfo, def_stmt);
var = vect_recog_temp_ssa_var (utype, NULL);
def_stmt
= gimple_build_assign_with_ops (RSHIFT_EXPR, var,
gimple_assign_lhs (def_stmt),
shift);
def_stmt_vinfo
= new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
append_pattern_def_seq (stmt_vinfo, def_stmt);
signmask = vect_recog_temp_ssa_var (itype, NULL);
def_stmt
= gimple_build_assign_with_ops (NOP_EXPR, signmask, var,
NULL_TREE);
append_pattern_def_seq (stmt_vinfo, def_stmt);
}
def_stmt
= gimple_build_assign_with_ops (PLUS_EXPR,
vect_recog_temp_ssa_var (itype,
NULL),
oprnd0, signmask);
append_pattern_def_seq (stmt_vinfo, def_stmt);
def_stmt
= gimple_build_assign_with_ops (BIT_AND_EXPR,
vect_recog_temp_ssa_var (itype,
NULL),
gimple_assign_lhs (def_stmt),
fold_build2 (MINUS_EXPR, itype,
oprnd1,
build_int_cst (itype,
1)));
append_pattern_def_seq (stmt_vinfo, def_stmt);
pattern_stmt
= gimple_build_assign_with_ops (MINUS_EXPR,
vect_recog_temp_ssa_var (itype,
NULL),
gimple_assign_lhs (def_stmt),
signmask);
}
if (vect_print_dump_info (REPORT_DETAILS))
print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
VEC_safe_push (gimple, heap, *stmts, last_stmt);
*type_in = vectype;
*type_out = vectype;
return pattern_stmt;
}
else
if (!host_integerp (oprnd1, TYPE_UNSIGNED (itype))
|| integer_zerop (oprnd1)
|| prec > HOST_BITS_PER_WIDE_INT)
return NULL;
witype = build_nonstandard_integer_type (prec * 2,
TYPE_UNSIGNED (itype));
vecwtype = get_vectype_for_scalar_type (witype);
if (vecwtype == NULL_TREE)
return NULL;
if (!supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
vecwtype, vectype,
&dummy, &dummy, &dummy_code,
&dummy_code, &dummy_int, &dummy_vec))
return NULL;
STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
if (TYPE_UNSIGNED (itype))
{
tree signmask;
STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
if (compare_tree_int (oprnd1, 2) == 0)
unsigned HOST_WIDE_INT mh, ml;
int pre_shift, post_shift;
unsigned HOST_WIDE_INT d = tree_low_cst (oprnd1, 1)
& GET_MODE_MASK (TYPE_MODE (itype));
tree t1, t2, t3, t4, t5, t6;
if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
/* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
return NULL;
/* Find a suitable multiplier and right shift count
instead of multiplying with D. */
mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
/* If the suggested multiplier is more than SIZE bits, we can do better
for even divisors, using an initial right shift. */
if (mh != 0 && (d & 1) == 0)
{
pre_shift = floor_log2 (d & -d);
mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
&ml, &post_shift, &dummy_int);
gcc_assert (!mh);
}
else
pre_shift = 0;
if (mh != 0)
{
signmask = vect_recog_temp_ssa_var (itype, NULL);
if (post_shift - 1 >= prec)
return NULL;
/* t1 = oprnd0 w* ml;
t2 = t1 >> prec;
t3 = (type) t2;
t4 = oprnd0 - t3;
t5 = t4 >> 1;
t6 = t3 + t5;
q = t6 >> (post_shift - 1); */
t1 = vect_recog_temp_ssa_var (witype, NULL);
def_stmt
= gimple_build_assign_with_ops3 (COND_EXPR, signmask, cond,
build_int_cst (itype, 1),
build_int_cst (itype, 0));
= gimple_build_assign_with_ops (WIDEN_MULT_EXPR, t1, oprnd0,
build_int_cst (itype, ml));
append_pattern_def_seq (stmt_vinfo, def_stmt);
def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecwtype;
t2 = vect_recog_temp_ssa_var (witype, NULL);
def_stmt
= gimple_build_assign_with_ops (RSHIFT_EXPR, t2, t1,
build_int_cst (itype, prec));
append_pattern_def_seq (stmt_vinfo, def_stmt);
def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecwtype;
t3 = vect_recog_temp_ssa_var (itype, NULL);
def_stmt
= gimple_build_assign_with_ops (NOP_EXPR, t3, t2, NULL_TREE);
append_pattern_def_seq (stmt_vinfo, def_stmt);
t4 = vect_recog_temp_ssa_var (itype, NULL);
def_stmt
= gimple_build_assign_with_ops (MINUS_EXPR, t4, oprnd0, t3);
append_pattern_def_seq (stmt_vinfo, def_stmt);
t5 = vect_recog_temp_ssa_var (itype, NULL);
def_stmt
= gimple_build_assign_with_ops (RSHIFT_EXPR, t5, t4,
integer_one_node);
append_pattern_def_seq (stmt_vinfo, def_stmt);
t6 = vect_recog_temp_ssa_var (itype, NULL);
def_stmt
= gimple_build_assign_with_ops (PLUS_EXPR, t6, t3, t5);
if (post_shift != 1)
{
append_pattern_def_seq (stmt_vinfo, def_stmt);
q = vect_recog_temp_ssa_var (witype, NULL);
pattern_stmt
= gimple_build_assign_with_ops (RSHIFT_EXPR, q, t6,
build_int_cst (itype,
post_shift
- 1));
}
else
{
q = t6;
pattern_stmt = def_stmt;
}
}
else
{
tree utype
= build_nonstandard_integer_type (TYPE_PRECISION (itype), 1);
tree vecutype = get_vectype_for_scalar_type (utype);
tree shift
= build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
- tree_log2 (oprnd1));
tree var = vect_recog_temp_ssa_var (utype, NULL);
stmt_vec_info def_stmt_vinfo;
if (pre_shift >= prec || post_shift >= prec)
return NULL;
/* t1 = oprnd0 >> pre_shift;
t2 = t1 w* ml;
t3 = t2 >> (prec + post_shift);
q = (type) t3; */
if (pre_shift)
{
t1 = vect_recog_temp_ssa_var (itype, NULL);
def_stmt
= gimple_build_assign_with_ops (RSHIFT_EXPR, t1, oprnd0,
build_int_cst (NULL,
pre_shift));
append_pattern_def_seq (stmt_vinfo, def_stmt);
}
else
t1 = oprnd0;
t2 = vect_recog_temp_ssa_var (witype, NULL);
def_stmt
= gimple_build_assign_with_ops3 (COND_EXPR, var, cond,
build_int_cst (utype, -1),
build_int_cst (utype, 0));
def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
= gimple_build_assign_with_ops (WIDEN_MULT_EXPR, t2, t1,
build_int_cst (itype, ml));
append_pattern_def_seq (stmt_vinfo, def_stmt);
def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecwtype;
t3 = vect_recog_temp_ssa_var (witype, NULL);
def_stmt
= gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
build_int_cst (itype, post_shift
+ prec));
append_pattern_def_seq (stmt_vinfo, def_stmt);
var = vect_recog_temp_ssa_var (utype, NULL);
def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecwtype;
q = vect_recog_temp_ssa_var (itype, NULL);
pattern_stmt
= gimple_build_assign_with_ops (NOP_EXPR, q, t3, NULL_TREE);
}
}
else
{
unsigned HOST_WIDE_INT ml;
int lgup, post_shift;
HOST_WIDE_INT d = tree_low_cst (oprnd1, 0);
unsigned HOST_WIDE_INT abs_d;
bool add = false;
tree uwitype = NULL, vecuwtype = NULL;
tree t1, t2, t3, t4, t5, t6, t7;
/* Give up for -1. */
if (d == -1)
return NULL;
if (!vect_supportable_shift (RSHIFT_EXPR, witype))
{
uwitype = build_nonstandard_integer_type (prec * 2, 1);
vecuwtype = get_vectype_for_scalar_type (uwitype);
if (vecuwtype == NULL_TREE)
return NULL;
}
/* Since d might be INT_MIN, we have to cast to
unsigned HOST_WIDE_INT before negating to avoid
undefined signed overflow. */
abs_d = (d >= 0
? (unsigned HOST_WIDE_INT) d
: - (unsigned HOST_WIDE_INT) d);
/* n rem d = n rem -d */
if (rhs_code == TRUNC_MOD_EXPR && d < 0)
{
d = abs_d;
oprnd1 = build_int_cst (itype, abs_d);
}
else if (HOST_BITS_PER_WIDE_INT >= prec
&& abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
/* This case is not handled correctly below. */
return NULL;
choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &lgup);
if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
{
add = true;
ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
}
if (post_shift >= prec)
return NULL;
/* t1 = oprnd1 w* ml; */
t1 = vect_recog_temp_ssa_var (witype, NULL);
def_stmt
= gimple_build_assign_with_ops (WIDEN_MULT_EXPR, t1, oprnd0,
build_int_cst (itype, ml));
append_pattern_def_seq (stmt_vinfo, def_stmt);
def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecwtype;
if (vecuwtype != NULL)
{
/* t2 = (uwtype) t1; */
t2 = vect_recog_temp_ssa_var (uwitype, NULL);
def_stmt
= gimple_build_assign_with_ops (RSHIFT_EXPR, var,
gimple_assign_lhs (def_stmt),
shift);
def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
= gimple_build_assign_with_ops (NOP_EXPR, t2, t1, NULL_TREE);
append_pattern_def_seq (stmt_vinfo, def_stmt);
def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecuwtype;
}
else
t2 = t1;
/* t3 = t2 >> prec; or t3 = t2 >> (prec + post_shift); */
t3 = vect_recog_temp_ssa_var (vecuwtype ? uwitype : witype, NULL);
def_stmt
= gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
build_int_cst (itype,
prec
+ (!add
&& vecuwtype == NULL
? post_shift : 0)));
append_pattern_def_seq (stmt_vinfo, def_stmt);
def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecuwtype ? vecuwtype : vecwtype;
/* t4 = (type) t3; */
t4 = vect_recog_temp_ssa_var (itype, NULL);
def_stmt
= gimple_build_assign_with_ops (NOP_EXPR, t4, t3, NULL_TREE);
append_pattern_def_seq (stmt_vinfo, def_stmt);
if (add)
{
/* t5 = t4 + oprnd0; */
t5 = vect_recog_temp_ssa_var (itype, NULL);
def_stmt
= gimple_build_assign_with_ops (PLUS_EXPR, t5, t4, oprnd0);
append_pattern_def_seq (stmt_vinfo, def_stmt);
signmask = vect_recog_temp_ssa_var (itype, NULL);
}
else
t5 = t4;
if ((add || vecuwtype != NULL) && post_shift)
{
/* t6 = t5 >> post_shift; */
t6 = vect_recog_temp_ssa_var (itype, NULL);
def_stmt
= gimple_build_assign_with_ops (NOP_EXPR, signmask, var,
NULL_TREE);
= gimple_build_assign_with_ops (RSHIFT_EXPR, t6, t5,
build_int_cst (itype, post_shift));
append_pattern_def_seq (stmt_vinfo, def_stmt);
}
else
t6 = t5;
/* t7 = oprnd0 >> (prec - 1); */
t7 = vect_recog_temp_ssa_var (itype, NULL);
def_stmt
= gimple_build_assign_with_ops (PLUS_EXPR,
vect_recog_temp_ssa_var (itype, NULL),
oprnd0, signmask);
= gimple_build_assign_with_ops (RSHIFT_EXPR, t7, oprnd0,
build_int_cst (itype, prec - 1));
append_pattern_def_seq (stmt_vinfo, def_stmt);
/* q = t6 - t7; or q = t7 - t6; */
q = vect_recog_temp_ssa_var (itype, NULL);
pattern_stmt
= gimple_build_assign_with_ops (MINUS_EXPR, q, d < 0 ? t7 : t6,
d < 0 ? t6 : t7);
}
if (rhs_code == TRUNC_MOD_EXPR)
{
tree r, t1;
/* We divided. Now finish by:
t1 = q * oprnd1;
r = oprnd0 - t1; */
append_pattern_def_seq (stmt_vinfo, pattern_stmt);
t1 = vect_recog_temp_ssa_var (itype, NULL);
def_stmt
= gimple_build_assign_with_ops (BIT_AND_EXPR,
vect_recog_temp_ssa_var (itype, NULL),
gimple_assign_lhs (def_stmt),
fold_build2 (MINUS_EXPR, itype,
oprnd1,
build_int_cst (itype,
1)));
= gimple_build_assign_with_ops (MULT_EXPR, t1, q, oprnd1);
append_pattern_def_seq (stmt_vinfo, def_stmt);
r = vect_recog_temp_ssa_var (itype, NULL);
pattern_stmt
= gimple_build_assign_with_ops (MINUS_EXPR,
vect_recog_temp_ssa_var (itype, NULL),
gimple_assign_lhs (def_stmt),
signmask);
= gimple_build_assign_with_ops (MINUS_EXPR, r, oprnd0, t1);
}
/* Pattern detected. */
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "vect_recog_divmod_pattern: detected: ");
if (vect_print_dump_info (REPORT_DETAILS))
print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
......
......@@ -5361,7 +5361,9 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
Pattern statement needs to be analyzed instead of the original statement
if the original statement is not relevant. Otherwise, we analyze both
statements. */
statements. In basic blocks we are called from some SLP instance
traversal, don't analyze pattern stmts instead, the pattern stmts
already will be part of SLP instance. */
pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
if (!STMT_VINFO_RELEVANT_P (stmt_info)
......@@ -5390,6 +5392,7 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
}
}
else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
&& node == NULL
&& pattern_stmt
&& (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
|| STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
......@@ -5406,6 +5409,7 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
}
if (is_pattern_stmt_p (stmt_info)
&& node == NULL
&& (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
{
gimple_stmt_iterator si;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment