Commit 079c527f by Jakub Jelinek Committed by Jakub Jelinek

re PR tree-optimization/51581 (Integer division by constant is not vectorized)

	PR tree-optimization/51581
	* expr.h (choose_multiplier): New prototype.
	* expmed.c (choose_multiplier): No longer static.
	Change multiplier_ptr from rtx * to UHWI *.
	(expand_divmod): Adjust callers.
	* tree-vect-patterns.c (vect_recog_sdivmod_pow2_pattern):
	Renamed to...
	(vect_recog_divmod_pattern): ... this.  Pass bb_vinfo as last
	argument to new_stmt_vec_info.  Attempt to optimize also divisions
	by non-pow2 constants if integer vector division isn't supported.
	* tree-vect-stmts.c (vect_analyze_stmt): If node != NULL,
	don't look at pattern stmts and sequences.

	* gcc.c-torture/execute/pr51581-1.c: New test.
	* gcc.c-torture/execute/pr51581-2.c: New test.
	* gcc.dg/vect/pr51581-1.c: New test.
	* gcc.dg/vect/pr51581-2.c: New test.
	* gcc.dg/vect/pr51581-3.c: New test.
	* gcc.target/i386/avx-pr51581-1.c: New test.
	* gcc.target/i386/avx-pr51581-2.c: New test.
	* gcc.target/i386/avx2-pr51581-1.c: New test.
	* gcc.target/i386/avx2-pr51581-2.c: New test.
	* gcc.dg/vect/slp-26.c (main1): Divide by 0x8031 instead of 3.

From-SVN: r188656
parent c55224dc
2012-06-15 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/51581
* expr.h (choose_multiplier): New prototype.
* expmed.c (choose_multiplier): No longer static.
Change multiplier_ptr from rtx * to UHWI *.
(expand_divmod): Adjust callers.
* tree-vect-patterns.c (vect_recog_sdivmod_pow2_pattern):
Renamed to...
(vect_recog_divmod_pattern): ... this. Pass bb_vinfo as last
argument to new_stmt_vec_info. Attempt to optimize also divisions
by non-pow2 constants if integer vector division isn't supported.
* tree-vect-stmts.c (vect_analyze_stmt): If node != NULL,
don't look at pattern stmts and sequences.
2012-06-15 Eric Botcazou <ebotcazou@adacore.com> 2012-06-15 Eric Botcazou <ebotcazou@adacore.com>
PR middle-end/53590 PR middle-end/53590
......
...@@ -2363,8 +2363,6 @@ static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT, ...@@ -2363,8 +2363,6 @@ static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
struct algorithm *, enum mult_variant *, int); struct algorithm *, enum mult_variant *, int);
static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx, static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
const struct algorithm *, enum mult_variant); const struct algorithm *, enum mult_variant);
static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
int, rtx *, int *, int *);
static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int); static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
static rtx extract_high_half (enum machine_mode, rtx); static rtx extract_high_half (enum machine_mode, rtx);
static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int); static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
...@@ -3293,10 +3291,10 @@ ceil_log2 (unsigned HOST_WIDE_INT x) ...@@ -3293,10 +3291,10 @@ ceil_log2 (unsigned HOST_WIDE_INT x)
Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR), Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */ where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
static
unsigned HOST_WIDE_INT unsigned HOST_WIDE_INT
choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision, choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr) unsigned HOST_WIDE_INT *multiplier_ptr,
int *post_shift_ptr, int *lgup_ptr)
{ {
HOST_WIDE_INT mhigh_hi, mlow_hi; HOST_WIDE_INT mhigh_hi, mlow_hi;
unsigned HOST_WIDE_INT mhigh_lo, mlow_lo; unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
...@@ -3368,12 +3366,12 @@ choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision, ...@@ -3368,12 +3366,12 @@ choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
if (n < HOST_BITS_PER_WIDE_INT) if (n < HOST_BITS_PER_WIDE_INT)
{ {
unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1; unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
*multiplier_ptr = GEN_INT (mhigh_lo & mask); *multiplier_ptr = mhigh_lo & mask;
return mhigh_lo >= mask; return mhigh_lo >= mask;
} }
else else
{ {
*multiplier_ptr = GEN_INT (mhigh_lo); *multiplier_ptr = mhigh_lo;
return mhigh_hi; return mhigh_hi;
} }
} }
...@@ -4053,10 +4051,9 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, ...@@ -4053,10 +4051,9 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
{ {
if (unsignedp) if (unsignedp)
{ {
unsigned HOST_WIDE_INT mh; unsigned HOST_WIDE_INT mh, ml;
int pre_shift, post_shift; int pre_shift, post_shift;
int dummy; int dummy;
rtx ml;
unsigned HOST_WIDE_INT d = (INTVAL (op1) unsigned HOST_WIDE_INT d = (INTVAL (op1)
& GET_MODE_MASK (compute_mode)); & GET_MODE_MASK (compute_mode));
...@@ -4118,7 +4115,8 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, ...@@ -4118,7 +4115,8 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
= (shift_cost[speed][compute_mode][post_shift - 1] = (shift_cost[speed][compute_mode][post_shift - 1]
+ shift_cost[speed][compute_mode][1] + shift_cost[speed][compute_mode][1]
+ 2 * add_cost[speed][compute_mode]); + 2 * add_cost[speed][compute_mode]);
t1 = expand_mult_highpart (compute_mode, op0, ml, t1 = expand_mult_highpart (compute_mode, op0,
GEN_INT (ml),
NULL_RTX, 1, NULL_RTX, 1,
max_cost - extra_cost); max_cost - extra_cost);
if (t1 == 0) if (t1 == 0)
...@@ -4149,7 +4147,8 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, ...@@ -4149,7 +4147,8 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
extra_cost extra_cost
= (shift_cost[speed][compute_mode][pre_shift] = (shift_cost[speed][compute_mode][pre_shift]
+ shift_cost[speed][compute_mode][post_shift]); + shift_cost[speed][compute_mode][post_shift]);
t2 = expand_mult_highpart (compute_mode, t1, ml, t2 = expand_mult_highpart (compute_mode, t1,
GEN_INT (ml),
NULL_RTX, 1, NULL_RTX, 1,
max_cost - extra_cost); max_cost - extra_cost);
if (t2 == 0) if (t2 == 0)
...@@ -4262,8 +4261,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, ...@@ -4262,8 +4261,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
else if (size <= HOST_BITS_PER_WIDE_INT) else if (size <= HOST_BITS_PER_WIDE_INT)
{ {
choose_multiplier (abs_d, size, size - 1, choose_multiplier (abs_d, size, size - 1,
&mlr, &post_shift, &lgup); &ml, &post_shift, &lgup);
ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1)) if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
{ {
rtx t1, t2, t3; rtx t1, t2, t3;
...@@ -4275,8 +4273,8 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, ...@@ -4275,8 +4273,8 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
extra_cost = (shift_cost[speed][compute_mode][post_shift] extra_cost = (shift_cost[speed][compute_mode][post_shift]
+ shift_cost[speed][compute_mode][size - 1] + shift_cost[speed][compute_mode][size - 1]
+ add_cost[speed][compute_mode]); + add_cost[speed][compute_mode]);
t1 = expand_mult_highpart (compute_mode, op0, mlr, t1 = expand_mult_highpart (compute_mode, op0,
NULL_RTX, 0, GEN_INT (ml), NULL_RTX, 0,
max_cost - extra_cost); max_cost - extra_cost);
if (t1 == 0) if (t1 == 0)
goto fail1; goto fail1;
...@@ -4356,10 +4354,9 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, ...@@ -4356,10 +4354,9 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
/* We will come here only for signed operations. */ /* We will come here only for signed operations. */
if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size) if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
{ {
unsigned HOST_WIDE_INT mh; unsigned HOST_WIDE_INT mh, ml;
int pre_shift, lgup, post_shift; int pre_shift, lgup, post_shift;
HOST_WIDE_INT d = INTVAL (op1); HOST_WIDE_INT d = INTVAL (op1);
rtx ml;
if (d > 0) if (d > 0)
{ {
...@@ -4399,8 +4396,8 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, ...@@ -4399,8 +4396,8 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
extra_cost = (shift_cost[speed][compute_mode][post_shift] extra_cost = (shift_cost[speed][compute_mode][post_shift]
+ shift_cost[speed][compute_mode][size - 1] + shift_cost[speed][compute_mode][size - 1]
+ 2 * add_cost[speed][compute_mode]); + 2 * add_cost[speed][compute_mode]);
t3 = expand_mult_highpart (compute_mode, t2, ml, t3 = expand_mult_highpart (compute_mode, t2,
NULL_RTX, 1, GEN_INT (ml), NULL_RTX, 1,
max_cost - extra_cost); max_cost - extra_cost);
if (t3 != 0) if (t3 != 0)
{ {
......
...@@ -243,6 +243,13 @@ extern rtx emit_store_flag (rtx, enum rtx_code, rtx, rtx, enum machine_mode, ...@@ -243,6 +243,13 @@ extern rtx emit_store_flag (rtx, enum rtx_code, rtx, rtx, enum machine_mode,
/* Like emit_store_flag, but always succeeds. */ /* Like emit_store_flag, but always succeeds. */
extern rtx emit_store_flag_force (rtx, enum rtx_code, rtx, rtx, extern rtx emit_store_flag_force (rtx, enum rtx_code, rtx, rtx,
enum machine_mode, int, int); enum machine_mode, int, int);
/* Choose a minimal N + 1 bit approximation to 1/D that can be used to
replace division by D, and put the least significant N bits of the result
in *MULTIPLIER_PTR and return the most significant bit. */
extern unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
int, unsigned HOST_WIDE_INT *,
int *, int *);
/* Functions from builtins.c: */ /* Functions from builtins.c: */
extern rtx expand_builtin (tree, rtx, rtx, enum machine_mode, int); extern rtx expand_builtin (tree, rtx, rtx, enum machine_mode, int);
......
2012-06-15 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/51581
* gcc.c-torture/execute/pr51581-1.c: New test.
* gcc.c-torture/execute/pr51581-2.c: New test.
* gcc.dg/vect/pr51581-1.c: New test.
* gcc.dg/vect/pr51581-2.c: New test.
* gcc.dg/vect/pr51581-3.c: New test.
* gcc.target/i386/avx-pr51581-1.c: New test.
* gcc.target/i386/avx-pr51581-2.c: New test.
* gcc.target/i386/avx2-pr51581-1.c: New test.
* gcc.target/i386/avx2-pr51581-2.c: New test.
* gcc.dg/vect/slp-26.c (main1): Divide by 0x8031 instead of 3.
2012-06-15 Richard Guenther <rguenther@suse.de> 2012-06-15 Richard Guenther <rguenther@suse.de>
* gcc.c-torture/execute/20120615-1.c: New testcase. * gcc.c-torture/execute/20120615-1.c: New testcase.
......
/* PR tree-optimization/51581 */
extern void abort (void);
#define N 4096
int a[N], c[N];
unsigned int b[N], d[N];
__attribute__((noinline, noclone)) void
f1 (void)
{
int i;
for (i = 0; i < N; i++)
c[i] = a[i] / 3;
}
__attribute__((noinline, noclone)) void
f2 (void)
{
int i;
for (i = 0; i < N; i++)
d[i] = b[i] / 3;
}
__attribute__((noinline, noclone)) void
f3 (void)
{
int i;
for (i = 0; i < N; i++)
c[i] = a[i] / 18;
}
__attribute__((noinline, noclone)) void
f4 (void)
{
int i;
for (i = 0; i < N; i++)
d[i] = b[i] / 18;
}
__attribute__((noinline, noclone)) void
f5 (void)
{
int i;
for (i = 0; i < N; i++)
c[i] = a[i] / 19;
}
__attribute__((noinline, noclone)) void
f6 (void)
{
int i;
for (i = 0; i < N; i++)
d[i] = b[i] / 19;
}
#if __SIZEOF_INT__ == 4 && __SIZEOF_LONG_LONG__ == 8
__attribute__((noinline, noclone)) void
f7 (void)
{
int i;
for (i = 0; i < N; i++)
c[i] = (int) ((unsigned long long) (a[i] * 0x55555556LL) >> 32) - (a[i] >> 31);
}
__attribute__((noinline, noclone)) void
f8 (void)
{
int i;
for (i = 0; i < N; i++)
d[i] = ((unsigned int) ((b[i] * 0xaaaaaaabULL) >> 32) >> 1);
}
__attribute__((noinline, noclone)) void
f9 (void)
{
int i;
for (i = 0; i < N; i++)
c[i] = (((int) ((unsigned long long) (a[i] * 0x38e38e39LL) >> 32)) >> 2) - (a[i] >> 31);
}
__attribute__((noinline, noclone)) void
f10 (void)
{
int i;
for (i = 0; i < N; i++)
d[i] = (unsigned int) ((b[i] * 0x38e38e39ULL) >> 32) >> 2;
}
__attribute__((noinline, noclone)) void
f11 (void)
{
int i;
for (i = 0; i < N; i++)
c[i] = (((int) ((unsigned long long) (a[i] * 0x6bca1af3LL) >> 32)) >> 3) - (a[i] >> 31);
}
__attribute__((noinline, noclone)) void
f12 (void)
{
int i;
for (i = 0; i < N; i++)
{
unsigned int tmp = (b[i] * 0xaf286bcbULL) >> 32;
d[i] = (((b[i] - tmp) >> 1) + tmp) >> 4;
}
}
#endif
int
main ()
{
int i;
for (i = 0; i < N; i++)
{
asm ("");
a[i] = i - N / 2;
b[i] = i;
}
a[0] = -__INT_MAX__ - 1;
a[1] = -__INT_MAX__;
a[N - 1] = __INT_MAX__;
b[N - 1] = ~0;
f1 ();
f2 ();
for (i = 0; i < N; i++)
if (c[i] != a[i] / 3 || d[i] != b[i] / 3)
abort ();
f3 ();
f4 ();
for (i = 0; i < N; i++)
if (c[i] != a[i] / 18 || d[i] != b[i] / 18)
abort ();
f5 ();
f6 ();
for (i = 0; i < N; i++)
if (c[i] != a[i] / 19 || d[i] != b[i] / 19)
abort ();
#if __SIZEOF_INT__ == 4 && __SIZEOF_LONG_LONG__ == 8
f7 ();
f8 ();
for (i = 0; i < N; i++)
if (c[i] != a[i] / 3 || d[i] != b[i] / 3)
abort ();
f9 ();
f10 ();
for (i = 0; i < N; i++)
if (c[i] != a[i] / 18 || d[i] != b[i] / 18)
abort ();
f11 ();
f12 ();
for (i = 0; i < N; i++)
if (c[i] != a[i] / 19 || d[i] != b[i] / 19)
abort ();
#endif
return 0;
}
/* PR tree-optimization/51581 */
extern void abort (void);
#define N 4096
int a[N], c[N];
unsigned int b[N], d[N];
__attribute__((noinline, noclone)) void
f1 (void)
{
int i;
for (i = 0; i < N; i++)
c[i] = a[i] % 3;
}
__attribute__((noinline, noclone)) void
f2 (void)
{
int i;
for (i = 0; i < N; i++)
d[i] = b[i] % 3;
}
__attribute__((noinline, noclone)) void
f3 (void)
{
int i;
for (i = 0; i < N; i++)
c[i] = a[i] % 18;
}
__attribute__((noinline, noclone)) void
f4 (void)
{
int i;
for (i = 0; i < N; i++)
d[i] = b[i] % 18;
}
__attribute__((noinline, noclone)) void
f5 (void)
{
int i;
for (i = 0; i < N; i++)
c[i] = a[i] % 19;
}
__attribute__((noinline, noclone)) void
f6 (void)
{
int i;
for (i = 0; i < N; i++)
d[i] = b[i] % 19;
}
#if __SIZEOF_INT__ == 4 && __SIZEOF_LONG_LONG__ == 8
__attribute__((noinline, noclone)) void
f7 (void)
{
int i;
for (i = 0; i < N; i++)
{
int x = (int) ((unsigned long long) (a[i] * 0x55555556LL) >> 32) - (a[i] >> 31);
c[i] = a[i] - x * 3;
}
}
__attribute__((noinline, noclone)) void
f8 (void)
{
int i;
for (i = 0; i < N; i++)
{
unsigned int x = ((unsigned int) ((b[i] * 0xaaaaaaabULL) >> 32) >> 1);
d[i] = b[i] - x * 3;
}
}
__attribute__((noinline, noclone)) void
f9 (void)
{
int i;
for (i = 0; i < N; i++)
{
int x = (((int) ((unsigned long long) (a[i] * 0x38e38e39LL) >> 32)) >> 2) - (a[i] >> 31);
c[i] = a[i] - x * 18;
}
}
__attribute__((noinline, noclone)) void
f10 (void)
{
int i;
for (i = 0; i < N; i++)
{
unsigned int x = (unsigned int) ((b[i] * 0x38e38e39ULL) >> 32) >> 2;
d[i] = b[i] - x * 18;
}
}
__attribute__((noinline, noclone)) void
f11 (void)
{
int i;
for (i = 0; i < N; i++)
{
int x = (((int) ((unsigned long long) (a[i] * 0x6bca1af3LL) >> 32)) >> 3) - (a[i] >> 31);
c[i] = a[i] - x * 19;
}
}
__attribute__((noinline, noclone)) void
f12 (void)
{
int i;
for (i = 0; i < N; i++)
{
unsigned int tmp = (b[i] * 0xaf286bcbULL) >> 32;
unsigned int x = (((b[i] - tmp) >> 1) + tmp) >> 4;
d[i] = b[i] - x * 19;
}
}
#endif
int
main ()
{
int i;
for (i = 0; i < N; i++)
{
asm ("");
a[i] = i - N / 2;
b[i] = i;
}
a[0] = -__INT_MAX__ - 1;
a[1] = -__INT_MAX__;
a[N - 1] = __INT_MAX__;
b[N - 1] = ~0;
f1 ();
f2 ();
for (i = 0; i < N; i++)
if (c[i] != a[i] % 3 || d[i] != b[i] % 3)
abort ();
f3 ();
f4 ();
for (i = 0; i < N; i++)
if (c[i] != a[i] % 18 || d[i] != b[i] % 18)
abort ();
f5 ();
f6 ();
for (i = 0; i < N; i++)
if (c[i] != a[i] % 19 || d[i] != b[i] % 19)
abort ();
#if __SIZEOF_INT__ == 4 && __SIZEOF_LONG_LONG__ == 8
f7 ();
f8 ();
for (i = 0; i < N; i++)
if (c[i] != a[i] % 3 || d[i] != b[i] % 3)
abort ();
f9 ();
f10 ();
for (i = 0; i < N; i++)
if (c[i] != a[i] % 18 || d[i] != b[i] % 18)
abort ();
f11 ();
f12 ();
for (i = 0; i < N; i++)
if (c[i] != a[i] % 19 || d[i] != b[i] % 19)
abort ();
#endif
return 0;
}
/* PR tree-optimization/51581 */
#include "tree-vect.h"
#define main main1
#include "../../gcc.c-torture/execute/pr51581-1.c"
#undef main
int
main ()
{
int i;
check_vect ();
asm ("");
return main1 ();
}
/* { dg-final { cleanup-tree-dump "vect" } } */
/* PR tree-optimization/51581 */
#include "tree-vect.h"
#define main main1
#include "../../gcc.c-torture/execute/pr51581-2.c"
#undef main
int
main ()
{
int i;
check_vect ();
asm ("");
return main1 ();
}
/* { dg-final { cleanup-tree-dump "vect" } } */
/* PR tree-optimization/51581 */
#include "tree-vect.h"
int a[8], b[8];
unsigned int c[8], d[8];
void
f1 (void)
{
a[0] = b[0] / 8;
a[1] = b[1] / 8;
a[2] = b[2] / 8;
a[3] = b[3] / 8;
a[4] = b[4] / 8;
a[5] = b[5] / 8;
a[6] = b[6] / 8;
a[7] = b[7] / 8;
}
void
f2 (void)
{
c[0] = d[0] / 3;
c[1] = d[1] / 3;
c[2] = d[2] / 3;
c[3] = d[3] / 3;
c[4] = d[4] / 3;
c[5] = d[5] / 3;
c[6] = d[6] / 3;
c[7] = d[7] / 3;
}
void
f3 (void)
{
a[0] = b[0] / 8;
a[1] = b[1] / 4;
a[2] = b[2] / 8;
a[3] = b[3] / 4;
a[4] = b[4] / 8;
a[5] = b[5] / 4;
a[6] = b[6] / 8;
a[7] = b[7] / 4;
}
void
f4 (void)
{
c[0] = d[0] / 3;
c[1] = d[1] / 5;
c[2] = d[2] / 3;
c[3] = d[3] / 5;
c[4] = d[4] / 3;
c[5] = d[5] / 5;
c[6] = d[6] / 3;
c[7] = d[7] / 5;
}
void
f5 (void)
{
a[0] = b[0] / 14;
a[1] = b[1] / 15;
a[2] = b[2] / 14;
a[3] = b[3] / 15;
a[4] = b[4] / 14;
a[5] = b[5] / 15;
a[6] = b[6] / 14;
a[7] = b[7] / 15;
}
void
f6 (void)
{
c[0] = d[0] / 6;
c[1] = d[1] / 5;
c[2] = d[2] / 6;
c[3] = d[3] / 5;
c[4] = d[4] / 6;
c[5] = d[5] / 5;
c[6] = d[6] / 13;
c[7] = d[7] / 5;
}
int
main ()
{
int i;
check_vect ();
asm ("");
for (i = 0; i < 8; i++)
{
asm ("");
b[i] = i - 4;
d[i] = i - 4;
}
f1 ();
f2 ();
for (i = 0; i < 8; i++)
if (a[i] != b[i] / 8 || c[i] != d[i] / 3)
abort ();
f3 ();
f4 ();
for (i = 0; i < 8; i+= 2)
if (a[i] != b[i] / 8 || a[i + 1] != b[i + 1] / 4
|| c[i] != d[i] / 3 || c[i + 1] != d[i + 1] / 5)
abort ();
f5 ();
f6 ();
for (i = 0; i < 8; i+= 2)
if (a[i] != b[i] / 14 || a[i + 1] != b[i + 1] / 15
|| c[i] != d[i] / (i == 6 ? 13 : 6) || c[i + 1] != d[i + 1] / 5)
abort ();
return 0;
}
/* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -10,7 +10,7 @@ main1 () ...@@ -10,7 +10,7 @@ main1 ()
{ {
int i; int i;
unsigned short in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63}; unsigned short in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
unsigned short out[N*8], a[N], b[N] = {3,6,9,12,15,18,21,24}; unsigned short out[N*8], a[N], b[N] = {3,0x8031,0x7fff,0x8032,0xffff,0,0x8030,0x8000};
/* Partial SLP is not supported. */ /* Partial SLP is not supported. */
for (i = 0; i < N; i++) for (i = 0; i < N; i++)
...@@ -20,7 +20,7 @@ main1 () ...@@ -20,7 +20,7 @@ main1 ()
out[i*4 + 2] = in[i*4 + 2]; out[i*4 + 2] = in[i*4 + 2];
out[i*4 + 3] = in[i*4 + 3]; out[i*4 + 3] = in[i*4 + 3];
a[i] = b[i] / 3; a[i] = b[i] / 0x8031;
} }
/* check results: */ /* check results: */
...@@ -30,7 +30,7 @@ main1 () ...@@ -30,7 +30,7 @@ main1 ()
|| out[i*4 + 1] != in[i*4 + 1] || out[i*4 + 1] != in[i*4 + 1]
|| out[i*4 + 2] != in[i*4 + 2] || out[i*4 + 2] != in[i*4 + 2]
|| out[i*4 + 3] != in[i*4 + 3] || out[i*4 + 3] != in[i*4 + 3]
|| a[i] != b[i] / 3) || a[i] != b[i] / 0x8031)
abort (); abort ();
} }
......
/* PR tree-optimization/51581 */
/* { dg-do run } */
/* { dg-options "-O2 -ftree-vectorize -mavx -fno-vect-cost-model" } */
/* { dg-require-effective-target avx } */
#ifndef CHECK_H
#define CHECK_H "avx-check.h"
#endif
#ifndef TEST
#define TEST avx_test
#endif
#define main main1
#include "../../gcc.c-torture/execute/pr51581-1.c"
#undef main
#include CHECK_H
static void
TEST (void)
{
main1 ();
}
/* PR tree-optimization/51581 */
/* { dg-do run } */
/* { dg-options "-O2 -ftree-vectorize -mavx -fno-vect-cost-model" } */
/* { dg-require-effective-target avx } */
#ifndef CHECK_H
#define CHECK_H "avx-check.h"
#endif
#ifndef TEST
#define TEST avx_test
#endif
#define main main1
#include "../../gcc.c-torture/execute/pr51581-2.c"
#undef main
#include CHECK_H
static void
TEST (void)
{
main1 ();
}
/* PR tree-optimization/51581 */
/* { dg-do run } */
/* { dg-options "-O2 -ftree-vectorize -mavx2 -fno-vect-cost-model" } */
/* { dg-require-effective-target avx2 } */
#define CHECK_H "avx2-check.h"
#define TEST avx2_test
#include "avx-pr51581-1.c"
/* PR tree-optimization/51581 */
/* { dg-do run } */
/* { dg-options "-O2 -ftree-vectorize -mavx2 -fno-vect-cost-model" } */
/* { dg-require-effective-target avx2 } */
#define CHECK_H "avx2-check.h"
#define TEST avx2_test
#include "avx-pr51581-2.c"
...@@ -5361,7 +5361,9 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node) ...@@ -5361,7 +5361,9 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
Pattern statement needs to be analyzed instead of the original statement Pattern statement needs to be analyzed instead of the original statement
if the original statement is not relevant. Otherwise, we analyze both if the original statement is not relevant. Otherwise, we analyze both
statements. */ statements. In basic blocks we are called from some SLP instance
traversal, don't analyze pattern stmts instead, the pattern stmts
already will be part of SLP instance. */
pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info); pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
if (!STMT_VINFO_RELEVANT_P (stmt_info) if (!STMT_VINFO_RELEVANT_P (stmt_info)
...@@ -5390,6 +5392,7 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node) ...@@ -5390,6 +5392,7 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
} }
} }
else if (STMT_VINFO_IN_PATTERN_P (stmt_info) else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
&& node == NULL
&& pattern_stmt && pattern_stmt
&& (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt)) && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
|| STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
...@@ -5406,6 +5409,7 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node) ...@@ -5406,6 +5409,7 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
} }
if (is_pattern_stmt_p (stmt_info) if (is_pattern_stmt_p (stmt_info)
&& node == NULL
&& (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info))) && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
{ {
gimple_stmt_iterator si; gimple_stmt_iterator si;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment