Commit 079c527f by Jakub Jelinek Committed by Jakub Jelinek

re PR tree-optimization/51581 (Integer division by constant is not vectorized)

	PR tree-optimization/51581
	* expr.h (choose_multiplier): New prototype.
	* expmed.c (choose_multiplier): No longer static.
	Change multiplier_ptr from rtx * to UHWI *.
	(expand_divmod): Adjust callers.
	* tree-vect-patterns.c (vect_recog_sdivmod_pow2_pattern):
	Renamed to...
	(vect_recog_divmod_pattern): ... this.  Pass bb_vinfo as last
	argument to new_stmt_vec_info.  Attempt to optimize also divisions
	by non-pow2 constants if integer vector division isn't supported.
	* tree-vect-stmts.c (vect_analyze_stmt): If node != NULL,
	don't look at pattern stmts and sequences.

	* gcc.c-torture/execute/pr51581-1.c: New test.
	* gcc.c-torture/execute/pr51581-2.c: New test.
	* gcc.dg/vect/pr51581-1.c: New test.
	* gcc.dg/vect/pr51581-2.c: New test.
	* gcc.dg/vect/pr51581-3.c: New test.
	* gcc.target/i386/avx-pr51581-1.c: New test.
	* gcc.target/i386/avx-pr51581-2.c: New test.
	* gcc.target/i386/avx2-pr51581-1.c: New test.
	* gcc.target/i386/avx2-pr51581-2.c: New test.
	* gcc.dg/vect/slp-26.c (main1): Divide by 0x8031 instead of 3.

From-SVN: r188656
parent c55224dc
2012-06-15 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/51581
* expr.h (choose_multiplier): New prototype.
* expmed.c (choose_multiplier): No longer static.
Change multiplier_ptr from rtx * to UHWI *.
(expand_divmod): Adjust callers.
* tree-vect-patterns.c (vect_recog_sdivmod_pow2_pattern):
Renamed to...
(vect_recog_divmod_pattern): ... this. Pass bb_vinfo as last
argument to new_stmt_vec_info. Attempt to optimize also divisions
by non-pow2 constants if integer vector division isn't supported.
* tree-vect-stmts.c (vect_analyze_stmt): If node != NULL,
don't look at pattern stmts and sequences.
2012-06-15 Eric Botcazou <ebotcazou@adacore.com> 2012-06-15 Eric Botcazou <ebotcazou@adacore.com>
PR middle-end/53590 PR middle-end/53590
......
...@@ -2363,8 +2363,6 @@ static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT, ...@@ -2363,8 +2363,6 @@ static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
struct algorithm *, enum mult_variant *, int); struct algorithm *, enum mult_variant *, int);
static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx, static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
const struct algorithm *, enum mult_variant); const struct algorithm *, enum mult_variant);
static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
int, rtx *, int *, int *);
static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int); static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
static rtx extract_high_half (enum machine_mode, rtx); static rtx extract_high_half (enum machine_mode, rtx);
static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int); static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
...@@ -3293,10 +3291,10 @@ ceil_log2 (unsigned HOST_WIDE_INT x) ...@@ -3293,10 +3291,10 @@ ceil_log2 (unsigned HOST_WIDE_INT x)
Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR), Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */ where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
static
unsigned HOST_WIDE_INT unsigned HOST_WIDE_INT
choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision, choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr) unsigned HOST_WIDE_INT *multiplier_ptr,
int *post_shift_ptr, int *lgup_ptr)
{ {
HOST_WIDE_INT mhigh_hi, mlow_hi; HOST_WIDE_INT mhigh_hi, mlow_hi;
unsigned HOST_WIDE_INT mhigh_lo, mlow_lo; unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
...@@ -3368,12 +3366,12 @@ choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision, ...@@ -3368,12 +3366,12 @@ choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
if (n < HOST_BITS_PER_WIDE_INT) if (n < HOST_BITS_PER_WIDE_INT)
{ {
unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1; unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
*multiplier_ptr = GEN_INT (mhigh_lo & mask); *multiplier_ptr = mhigh_lo & mask;
return mhigh_lo >= mask; return mhigh_lo >= mask;
} }
else else
{ {
*multiplier_ptr = GEN_INT (mhigh_lo); *multiplier_ptr = mhigh_lo;
return mhigh_hi; return mhigh_hi;
} }
} }
...@@ -4053,10 +4051,9 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, ...@@ -4053,10 +4051,9 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
{ {
if (unsignedp) if (unsignedp)
{ {
unsigned HOST_WIDE_INT mh; unsigned HOST_WIDE_INT mh, ml;
int pre_shift, post_shift; int pre_shift, post_shift;
int dummy; int dummy;
rtx ml;
unsigned HOST_WIDE_INT d = (INTVAL (op1) unsigned HOST_WIDE_INT d = (INTVAL (op1)
& GET_MODE_MASK (compute_mode)); & GET_MODE_MASK (compute_mode));
...@@ -4118,7 +4115,8 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, ...@@ -4118,7 +4115,8 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
= (shift_cost[speed][compute_mode][post_shift - 1] = (shift_cost[speed][compute_mode][post_shift - 1]
+ shift_cost[speed][compute_mode][1] + shift_cost[speed][compute_mode][1]
+ 2 * add_cost[speed][compute_mode]); + 2 * add_cost[speed][compute_mode]);
t1 = expand_mult_highpart (compute_mode, op0, ml, t1 = expand_mult_highpart (compute_mode, op0,
GEN_INT (ml),
NULL_RTX, 1, NULL_RTX, 1,
max_cost - extra_cost); max_cost - extra_cost);
if (t1 == 0) if (t1 == 0)
...@@ -4149,7 +4147,8 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, ...@@ -4149,7 +4147,8 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
extra_cost extra_cost
= (shift_cost[speed][compute_mode][pre_shift] = (shift_cost[speed][compute_mode][pre_shift]
+ shift_cost[speed][compute_mode][post_shift]); + shift_cost[speed][compute_mode][post_shift]);
t2 = expand_mult_highpart (compute_mode, t1, ml, t2 = expand_mult_highpart (compute_mode, t1,
GEN_INT (ml),
NULL_RTX, 1, NULL_RTX, 1,
max_cost - extra_cost); max_cost - extra_cost);
if (t2 == 0) if (t2 == 0)
...@@ -4262,8 +4261,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, ...@@ -4262,8 +4261,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
else if (size <= HOST_BITS_PER_WIDE_INT) else if (size <= HOST_BITS_PER_WIDE_INT)
{ {
choose_multiplier (abs_d, size, size - 1, choose_multiplier (abs_d, size, size - 1,
&mlr, &post_shift, &lgup); &ml, &post_shift, &lgup);
ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1)) if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
{ {
rtx t1, t2, t3; rtx t1, t2, t3;
...@@ -4275,8 +4273,8 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, ...@@ -4275,8 +4273,8 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
extra_cost = (shift_cost[speed][compute_mode][post_shift] extra_cost = (shift_cost[speed][compute_mode][post_shift]
+ shift_cost[speed][compute_mode][size - 1] + shift_cost[speed][compute_mode][size - 1]
+ add_cost[speed][compute_mode]); + add_cost[speed][compute_mode]);
t1 = expand_mult_highpart (compute_mode, op0, mlr, t1 = expand_mult_highpart (compute_mode, op0,
NULL_RTX, 0, GEN_INT (ml), NULL_RTX, 0,
max_cost - extra_cost); max_cost - extra_cost);
if (t1 == 0) if (t1 == 0)
goto fail1; goto fail1;
...@@ -4356,10 +4354,9 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, ...@@ -4356,10 +4354,9 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
/* We will come here only for signed operations. */ /* We will come here only for signed operations. */
if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size) if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
{ {
unsigned HOST_WIDE_INT mh; unsigned HOST_WIDE_INT mh, ml;
int pre_shift, lgup, post_shift; int pre_shift, lgup, post_shift;
HOST_WIDE_INT d = INTVAL (op1); HOST_WIDE_INT d = INTVAL (op1);
rtx ml;
if (d > 0) if (d > 0)
{ {
...@@ -4399,8 +4396,8 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode, ...@@ -4399,8 +4396,8 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
extra_cost = (shift_cost[speed][compute_mode][post_shift] extra_cost = (shift_cost[speed][compute_mode][post_shift]
+ shift_cost[speed][compute_mode][size - 1] + shift_cost[speed][compute_mode][size - 1]
+ 2 * add_cost[speed][compute_mode]); + 2 * add_cost[speed][compute_mode]);
t3 = expand_mult_highpart (compute_mode, t2, ml, t3 = expand_mult_highpart (compute_mode, t2,
NULL_RTX, 1, GEN_INT (ml), NULL_RTX, 1,
max_cost - extra_cost); max_cost - extra_cost);
if (t3 != 0) if (t3 != 0)
{ {
......
...@@ -244,6 +244,13 @@ extern rtx emit_store_flag (rtx, enum rtx_code, rtx, rtx, enum machine_mode, ...@@ -244,6 +244,13 @@ extern rtx emit_store_flag (rtx, enum rtx_code, rtx, rtx, enum machine_mode,
extern rtx emit_store_flag_force (rtx, enum rtx_code, rtx, rtx, extern rtx emit_store_flag_force (rtx, enum rtx_code, rtx, rtx,
enum machine_mode, int, int); enum machine_mode, int, int);
/* Choose a minimal N + 1 bit approximation to 1/D that can be used to
replace division by D, and put the least significant N bits of the result
in *MULTIPLIER_PTR and return the most significant bit. */
extern unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
int, unsigned HOST_WIDE_INT *,
int *, int *);
/* Functions from builtins.c: */ /* Functions from builtins.c: */
extern rtx expand_builtin (tree, rtx, rtx, enum machine_mode, int); extern rtx expand_builtin (tree, rtx, rtx, enum machine_mode, int);
extern tree std_build_builtin_va_list (void); extern tree std_build_builtin_va_list (void);
......
2012-06-15 Jakub Jelinek <jakub@redhat.com>
PR tree-optimization/51581
* gcc.c-torture/execute/pr51581-1.c: New test.
* gcc.c-torture/execute/pr51581-2.c: New test.
* gcc.dg/vect/pr51581-1.c: New test.
* gcc.dg/vect/pr51581-2.c: New test.
* gcc.dg/vect/pr51581-3.c: New test.
* gcc.target/i386/avx-pr51581-1.c: New test.
* gcc.target/i386/avx-pr51581-2.c: New test.
* gcc.target/i386/avx2-pr51581-1.c: New test.
* gcc.target/i386/avx2-pr51581-2.c: New test.
* gcc.dg/vect/slp-26.c (main1): Divide by 0x8031 instead of 3.
2012-06-15 Richard Guenther <rguenther@suse.de> 2012-06-15 Richard Guenther <rguenther@suse.de>
* gcc.c-torture/execute/20120615-1.c: New testcase. * gcc.c-torture/execute/20120615-1.c: New testcase.
......
/* PR tree-optimization/51581 */
extern void abort (void);
#define N 4096
int a[N], c[N];
unsigned int b[N], d[N];
__attribute__((noinline, noclone)) void
f1 (void)
{
int i;
for (i = 0; i < N; i++)
c[i] = a[i] / 3;
}
__attribute__((noinline, noclone)) void
f2 (void)
{
int i;
for (i = 0; i < N; i++)
d[i] = b[i] / 3;
}
__attribute__((noinline, noclone)) void
f3 (void)
{
int i;
for (i = 0; i < N; i++)
c[i] = a[i] / 18;
}
__attribute__((noinline, noclone)) void
f4 (void)
{
int i;
for (i = 0; i < N; i++)
d[i] = b[i] / 18;
}
__attribute__((noinline, noclone)) void
f5 (void)
{
int i;
for (i = 0; i < N; i++)
c[i] = a[i] / 19;
}
__attribute__((noinline, noclone)) void
f6 (void)
{
int i;
for (i = 0; i < N; i++)
d[i] = b[i] / 19;
}
#if __SIZEOF_INT__ == 4 && __SIZEOF_LONG_LONG__ == 8
__attribute__((noinline, noclone)) void
f7 (void)
{
int i;
for (i = 0; i < N; i++)
c[i] = (int) ((unsigned long long) (a[i] * 0x55555556LL) >> 32) - (a[i] >> 31);
}
__attribute__((noinline, noclone)) void
f8 (void)
{
int i;
for (i = 0; i < N; i++)
d[i] = ((unsigned int) ((b[i] * 0xaaaaaaabULL) >> 32) >> 1);
}
__attribute__((noinline, noclone)) void
f9 (void)
{
int i;
for (i = 0; i < N; i++)
c[i] = (((int) ((unsigned long long) (a[i] * 0x38e38e39LL) >> 32)) >> 2) - (a[i] >> 31);
}
__attribute__((noinline, noclone)) void
f10 (void)
{
int i;
for (i = 0; i < N; i++)
d[i] = (unsigned int) ((b[i] * 0x38e38e39ULL) >> 32) >> 2;
}
__attribute__((noinline, noclone)) void
f11 (void)
{
int i;
for (i = 0; i < N; i++)
c[i] = (((int) ((unsigned long long) (a[i] * 0x6bca1af3LL) >> 32)) >> 3) - (a[i] >> 31);
}
__attribute__((noinline, noclone)) void
f12 (void)
{
int i;
for (i = 0; i < N; i++)
{
unsigned int tmp = (b[i] * 0xaf286bcbULL) >> 32;
d[i] = (((b[i] - tmp) >> 1) + tmp) >> 4;
}
}
#endif
int
main ()
{
int i;
for (i = 0; i < N; i++)
{
asm ("");
a[i] = i - N / 2;
b[i] = i;
}
a[0] = -__INT_MAX__ - 1;
a[1] = -__INT_MAX__;
a[N - 1] = __INT_MAX__;
b[N - 1] = ~0;
f1 ();
f2 ();
for (i = 0; i < N; i++)
if (c[i] != a[i] / 3 || d[i] != b[i] / 3)
abort ();
f3 ();
f4 ();
for (i = 0; i < N; i++)
if (c[i] != a[i] / 18 || d[i] != b[i] / 18)
abort ();
f5 ();
f6 ();
for (i = 0; i < N; i++)
if (c[i] != a[i] / 19 || d[i] != b[i] / 19)
abort ();
#if __SIZEOF_INT__ == 4 && __SIZEOF_LONG_LONG__ == 8
f7 ();
f8 ();
for (i = 0; i < N; i++)
if (c[i] != a[i] / 3 || d[i] != b[i] / 3)
abort ();
f9 ();
f10 ();
for (i = 0; i < N; i++)
if (c[i] != a[i] / 18 || d[i] != b[i] / 18)
abort ();
f11 ();
f12 ();
for (i = 0; i < N; i++)
if (c[i] != a[i] / 19 || d[i] != b[i] / 19)
abort ();
#endif
return 0;
}
/* PR tree-optimization/51581 */
extern void abort (void);
#define N 4096
int a[N], c[N];
unsigned int b[N], d[N];
__attribute__((noinline, noclone)) void
f1 (void)
{
int i;
for (i = 0; i < N; i++)
c[i] = a[i] % 3;
}
__attribute__((noinline, noclone)) void
f2 (void)
{
int i;
for (i = 0; i < N; i++)
d[i] = b[i] % 3;
}
__attribute__((noinline, noclone)) void
f3 (void)
{
int i;
for (i = 0; i < N; i++)
c[i] = a[i] % 18;
}
__attribute__((noinline, noclone)) void
f4 (void)
{
int i;
for (i = 0; i < N; i++)
d[i] = b[i] % 18;
}
__attribute__((noinline, noclone)) void
f5 (void)
{
int i;
for (i = 0; i < N; i++)
c[i] = a[i] % 19;
}
__attribute__((noinline, noclone)) void
f6 (void)
{
int i;
for (i = 0; i < N; i++)
d[i] = b[i] % 19;
}
#if __SIZEOF_INT__ == 4 && __SIZEOF_LONG_LONG__ == 8
__attribute__((noinline, noclone)) void
f7 (void)
{
int i;
for (i = 0; i < N; i++)
{
int x = (int) ((unsigned long long) (a[i] * 0x55555556LL) >> 32) - (a[i] >> 31);
c[i] = a[i] - x * 3;
}
}
__attribute__((noinline, noclone)) void
f8 (void)
{
int i;
for (i = 0; i < N; i++)
{
unsigned int x = ((unsigned int) ((b[i] * 0xaaaaaaabULL) >> 32) >> 1);
d[i] = b[i] - x * 3;
}
}
__attribute__((noinline, noclone)) void
f9 (void)
{
int i;
for (i = 0; i < N; i++)
{
int x = (((int) ((unsigned long long) (a[i] * 0x38e38e39LL) >> 32)) >> 2) - (a[i] >> 31);
c[i] = a[i] - x * 18;
}
}
__attribute__((noinline, noclone)) void
f10 (void)
{
int i;
for (i = 0; i < N; i++)
{
unsigned int x = (unsigned int) ((b[i] * 0x38e38e39ULL) >> 32) >> 2;
d[i] = b[i] - x * 18;
}
}
__attribute__((noinline, noclone)) void
f11 (void)
{
int i;
for (i = 0; i < N; i++)
{
int x = (((int) ((unsigned long long) (a[i] * 0x6bca1af3LL) >> 32)) >> 3) - (a[i] >> 31);
c[i] = a[i] - x * 19;
}
}
__attribute__((noinline, noclone)) void
f12 (void)
{
int i;
for (i = 0; i < N; i++)
{
unsigned int tmp = (b[i] * 0xaf286bcbULL) >> 32;
unsigned int x = (((b[i] - tmp) >> 1) + tmp) >> 4;
d[i] = b[i] - x * 19;
}
}
#endif
int
main ()
{
int i;
for (i = 0; i < N; i++)
{
asm ("");
a[i] = i - N / 2;
b[i] = i;
}
a[0] = -__INT_MAX__ - 1;
a[1] = -__INT_MAX__;
a[N - 1] = __INT_MAX__;
b[N - 1] = ~0;
f1 ();
f2 ();
for (i = 0; i < N; i++)
if (c[i] != a[i] % 3 || d[i] != b[i] % 3)
abort ();
f3 ();
f4 ();
for (i = 0; i < N; i++)
if (c[i] != a[i] % 18 || d[i] != b[i] % 18)
abort ();
f5 ();
f6 ();
for (i = 0; i < N; i++)
if (c[i] != a[i] % 19 || d[i] != b[i] % 19)
abort ();
#if __SIZEOF_INT__ == 4 && __SIZEOF_LONG_LONG__ == 8
f7 ();
f8 ();
for (i = 0; i < N; i++)
if (c[i] != a[i] % 3 || d[i] != b[i] % 3)
abort ();
f9 ();
f10 ();
for (i = 0; i < N; i++)
if (c[i] != a[i] % 18 || d[i] != b[i] % 18)
abort ();
f11 ();
f12 ();
for (i = 0; i < N; i++)
if (c[i] != a[i] % 19 || d[i] != b[i] % 19)
abort ();
#endif
return 0;
}
/* PR tree-optimization/51581 */
#include "tree-vect.h"
#define main main1
#include "../../gcc.c-torture/execute/pr51581-1.c"
#undef main
int
main ()
{
int i;
check_vect ();
asm ("");
return main1 ();
}
/* { dg-final { cleanup-tree-dump "vect" } } */
/* PR tree-optimization/51581 */
#include "tree-vect.h"
#define main main1
#include "../../gcc.c-torture/execute/pr51581-2.c"
#undef main
int
main ()
{
int i;
check_vect ();
asm ("");
return main1 ();
}
/* { dg-final { cleanup-tree-dump "vect" } } */
/* PR tree-optimization/51581 */
#include "tree-vect.h"
int a[8], b[8];
unsigned int c[8], d[8];
void
f1 (void)
{
a[0] = b[0] / 8;
a[1] = b[1] / 8;
a[2] = b[2] / 8;
a[3] = b[3] / 8;
a[4] = b[4] / 8;
a[5] = b[5] / 8;
a[6] = b[6] / 8;
a[7] = b[7] / 8;
}
void
f2 (void)
{
c[0] = d[0] / 3;
c[1] = d[1] / 3;
c[2] = d[2] / 3;
c[3] = d[3] / 3;
c[4] = d[4] / 3;
c[5] = d[5] / 3;
c[6] = d[6] / 3;
c[7] = d[7] / 3;
}
void
f3 (void)
{
a[0] = b[0] / 8;
a[1] = b[1] / 4;
a[2] = b[2] / 8;
a[3] = b[3] / 4;
a[4] = b[4] / 8;
a[5] = b[5] / 4;
a[6] = b[6] / 8;
a[7] = b[7] / 4;
}
void
f4 (void)
{
c[0] = d[0] / 3;
c[1] = d[1] / 5;
c[2] = d[2] / 3;
c[3] = d[3] / 5;
c[4] = d[4] / 3;
c[5] = d[5] / 5;
c[6] = d[6] / 3;
c[7] = d[7] / 5;
}
void
f5 (void)
{
a[0] = b[0] / 14;
a[1] = b[1] / 15;
a[2] = b[2] / 14;
a[3] = b[3] / 15;
a[4] = b[4] / 14;
a[5] = b[5] / 15;
a[6] = b[6] / 14;
a[7] = b[7] / 15;
}
void
f6 (void)
{
c[0] = d[0] / 6;
c[1] = d[1] / 5;
c[2] = d[2] / 6;
c[3] = d[3] / 5;
c[4] = d[4] / 6;
c[5] = d[5] / 5;
c[6] = d[6] / 13;
c[7] = d[7] / 5;
}
int
main ()
{
int i;
check_vect ();
asm ("");
for (i = 0; i < 8; i++)
{
asm ("");
b[i] = i - 4;
d[i] = i - 4;
}
f1 ();
f2 ();
for (i = 0; i < 8; i++)
if (a[i] != b[i] / 8 || c[i] != d[i] / 3)
abort ();
f3 ();
f4 ();
for (i = 0; i < 8; i+= 2)
if (a[i] != b[i] / 8 || a[i + 1] != b[i + 1] / 4
|| c[i] != d[i] / 3 || c[i + 1] != d[i + 1] / 5)
abort ();
f5 ();
f6 ();
for (i = 0; i < 8; i+= 2)
if (a[i] != b[i] / 14 || a[i + 1] != b[i + 1] / 15
|| c[i] != d[i] / (i == 6 ? 13 : 6) || c[i + 1] != d[i + 1] / 5)
abort ();
return 0;
}
/* { dg-final { cleanup-tree-dump "vect" } } */
...@@ -10,7 +10,7 @@ main1 () ...@@ -10,7 +10,7 @@ main1 ()
{ {
int i; int i;
unsigned short in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63}; unsigned short in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
unsigned short out[N*8], a[N], b[N] = {3,6,9,12,15,18,21,24}; unsigned short out[N*8], a[N], b[N] = {3,0x8031,0x7fff,0x8032,0xffff,0,0x8030,0x8000};
/* Partial SLP is not supported. */ /* Partial SLP is not supported. */
for (i = 0; i < N; i++) for (i = 0; i < N; i++)
...@@ -20,7 +20,7 @@ main1 () ...@@ -20,7 +20,7 @@ main1 ()
out[i*4 + 2] = in[i*4 + 2]; out[i*4 + 2] = in[i*4 + 2];
out[i*4 + 3] = in[i*4 + 3]; out[i*4 + 3] = in[i*4 + 3];
a[i] = b[i] / 3; a[i] = b[i] / 0x8031;
} }
/* check results: */ /* check results: */
...@@ -30,7 +30,7 @@ main1 () ...@@ -30,7 +30,7 @@ main1 ()
|| out[i*4 + 1] != in[i*4 + 1] || out[i*4 + 1] != in[i*4 + 1]
|| out[i*4 + 2] != in[i*4 + 2] || out[i*4 + 2] != in[i*4 + 2]
|| out[i*4 + 3] != in[i*4 + 3] || out[i*4 + 3] != in[i*4 + 3]
|| a[i] != b[i] / 3) || a[i] != b[i] / 0x8031)
abort (); abort ();
} }
......
/* PR tree-optimization/51581 */
/* { dg-do run } */
/* { dg-options "-O2 -ftree-vectorize -mavx -fno-vect-cost-model" } */
/* { dg-require-effective-target avx } */
#ifndef CHECK_H
#define CHECK_H "avx-check.h"
#endif
#ifndef TEST
#define TEST avx_test
#endif
#define main main1
#include "../../gcc.c-torture/execute/pr51581-1.c"
#undef main
#include CHECK_H
static void
TEST (void)
{
main1 ();
}
/* PR tree-optimization/51581 */
/* { dg-do run } */
/* { dg-options "-O2 -ftree-vectorize -mavx -fno-vect-cost-model" } */
/* { dg-require-effective-target avx } */
#ifndef CHECK_H
#define CHECK_H "avx-check.h"
#endif
#ifndef TEST
#define TEST avx_test
#endif
#define main main1
#include "../../gcc.c-torture/execute/pr51581-2.c"
#undef main
#include CHECK_H
static void
TEST (void)
{
main1 ();
}
/* PR tree-optimization/51581 */
/* { dg-do run } */
/* { dg-options "-O2 -ftree-vectorize -mavx2 -fno-vect-cost-model" } */
/* { dg-require-effective-target avx2 } */
#define CHECK_H "avx2-check.h"
#define TEST avx2_test
#include "avx-pr51581-1.c"
/* PR tree-optimization/51581 */
/* { dg-do run } */
/* { dg-options "-O2 -ftree-vectorize -mavx2 -fno-vect-cost-model" } */
/* { dg-require-effective-target avx2 } */
#define CHECK_H "avx2-check.h"
#define TEST avx2_test
#include "avx-pr51581-2.c"
...@@ -53,7 +53,7 @@ static gimple vect_recog_widen_shift_pattern (VEC (gimple, heap) **, ...@@ -53,7 +53,7 @@ static gimple vect_recog_widen_shift_pattern (VEC (gimple, heap) **,
tree *, tree *); tree *, tree *);
static gimple vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **, static gimple vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **,
tree *, tree *); tree *, tree *);
static gimple vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **, static gimple vect_recog_divmod_pattern (VEC (gimple, heap) **,
tree *, tree *); tree *, tree *);
static gimple vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **, static gimple vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **,
tree *, tree *); tree *, tree *);
...@@ -66,7 +66,7 @@ static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = { ...@@ -66,7 +66,7 @@ static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
vect_recog_widen_shift_pattern, vect_recog_widen_shift_pattern,
vect_recog_over_widening_pattern, vect_recog_over_widening_pattern,
vect_recog_vector_vector_shift_pattern, vect_recog_vector_vector_shift_pattern,
vect_recog_sdivmod_pow2_pattern, vect_recog_divmod_pattern,
vect_recog_mixed_size_cond_pattern, vect_recog_mixed_size_cond_pattern,
vect_recog_bool_pattern}; vect_recog_bool_pattern};
...@@ -1585,29 +1585,30 @@ vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **stmts, ...@@ -1585,29 +1585,30 @@ vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **stmts,
return pattern_stmt; return pattern_stmt;
} }
/* Detect a signed division by power of two constant that wouldn't be /* Detect a signed division by a constant that wouldn't be
otherwise vectorized: otherwise vectorized:
type a_t, b_t; type a_t, b_t;
S1 a_t = b_t / N; S1 a_t = b_t / N;
where type 'type' is a signed integral type and N is a constant positive where type 'type' is an integral type and N is a constant.
power of two.
Similarly handle signed modulo by power of two constant: Similarly handle modulo by a constant:
S4 a_t = b_t % N; S4 a_t = b_t % N;
Input/Output: Input/Output:
* STMTS: Contains a stmt from which the pattern search begins, * STMTS: Contains a stmt from which the pattern search begins,
i.e. the division stmt. S1 is replaced by: i.e. the division stmt. S1 is replaced by if N is a power
of two constant and type is signed:
S3 y_t = b_t < 0 ? N - 1 : 0; S3 y_t = b_t < 0 ? N - 1 : 0;
S2 x_t = b_t + y_t; S2 x_t = b_t + y_t;
S1' a_t = x_t >> log2 (N); S1' a_t = x_t >> log2 (N);
S4 is replaced by (where *_T temporaries have unsigned type): S4 is replaced if N is a power of two constant and
type is signed by (where *_T temporaries have unsigned type):
S9 y_T = b_t < 0 ? -1U : 0U; S9 y_T = b_t < 0 ? -1U : 0U;
S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N)); S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
S7 z_t = (type) z_T; S7 z_t = (type) z_T;
...@@ -1625,16 +1626,22 @@ vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **stmts, ...@@ -1625,16 +1626,22 @@ vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **stmts,
S1 or modulo S4 stmt. */ S1 or modulo S4 stmt. */
static gimple static gimple
vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **stmts, vect_recog_divmod_pattern (VEC (gimple, heap) **stmts,
tree *type_in, tree *type_out) tree *type_in, tree *type_out)
{ {
gimple last_stmt = VEC_pop (gimple, *stmts); gimple last_stmt = VEC_pop (gimple, *stmts);
tree oprnd0, oprnd1, vectype, itype, cond; tree oprnd0, oprnd1, vectype, itype, witype, vecwtype, cond;
gimple pattern_stmt, def_stmt; gimple pattern_stmt, def_stmt;
enum tree_code rhs_code; enum tree_code rhs_code;
stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt); stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
optab optab; optab optab;
tree dummy, q;
enum tree_code dummy_code;
int dummy_int, prec;
VEC (tree, heap) *dummy_vec;
stmt_vec_info def_stmt_vinfo;
if (!is_gimple_assign (last_stmt)) if (!is_gimple_assign (last_stmt))
return NULL; return NULL;
...@@ -1658,10 +1665,7 @@ vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **stmts, ...@@ -1658,10 +1665,7 @@ vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **stmts,
if (TREE_CODE (oprnd0) != SSA_NAME if (TREE_CODE (oprnd0) != SSA_NAME
|| TREE_CODE (oprnd1) != INTEGER_CST || TREE_CODE (oprnd1) != INTEGER_CST
|| TREE_CODE (itype) != INTEGER_TYPE || TREE_CODE (itype) != INTEGER_TYPE
|| TYPE_UNSIGNED (itype) || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
|| TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype))
|| !integer_pow2p (oprnd1)
|| tree_int_cst_sgn (oprnd1) != 1)
return NULL; return NULL;
vectype = get_vectype_for_scalar_type (itype); vectype = get_vectype_for_scalar_type (itype);
...@@ -1680,14 +1684,22 @@ vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **stmts, ...@@ -1680,14 +1684,22 @@ vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **stmts,
return NULL; return NULL;
} }
prec = TYPE_PRECISION (itype);
if (integer_pow2p (oprnd1))
{
if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
return NULL;
/* Pattern detected. */ /* Pattern detected. */
if (vect_print_dump_info (REPORT_DETAILS)) if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "vect_recog_sdivmod_pow2_pattern: detected: "); fprintf (vect_dump, "vect_recog_divmod_pattern: detected: ");
cond = build2 (LT_EXPR, boolean_type_node, oprnd0, build_int_cst (itype, 0)); cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
build_int_cst (itype, 0));
if (rhs_code == TRUNC_DIV_EXPR) if (rhs_code == TRUNC_DIV_EXPR)
{ {
tree var = vect_recog_temp_ssa_var (itype, NULL); tree var = vect_recog_temp_ssa_var (itype, NULL);
tree shift;
def_stmt def_stmt
= gimple_build_assign_with_ops3 (COND_EXPR, var, cond, = gimple_build_assign_with_ops3 (COND_EXPR, var, cond,
fold_build2 (MINUS_EXPR, itype, fold_build2 (MINUS_EXPR, itype,
...@@ -1702,12 +1714,12 @@ vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **stmts, ...@@ -1702,12 +1714,12 @@ vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **stmts,
gimple_assign_lhs (def_stmt)); gimple_assign_lhs (def_stmt));
append_pattern_def_seq (stmt_vinfo, def_stmt); append_pattern_def_seq (stmt_vinfo, def_stmt);
shift = build_int_cst (itype, tree_log2 (oprnd1));
pattern_stmt pattern_stmt
= gimple_build_assign_with_ops (RSHIFT_EXPR, = gimple_build_assign_with_ops (RSHIFT_EXPR,
vect_recog_temp_ssa_var (itype, NULL), vect_recog_temp_ssa_var (itype,
var, NULL),
build_int_cst (itype, var, shift);
tree_log2 (oprnd1)));
} }
else else
{ {
...@@ -1725,19 +1737,19 @@ vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **stmts, ...@@ -1725,19 +1737,19 @@ vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **stmts,
else else
{ {
tree utype tree utype
= build_nonstandard_integer_type (TYPE_PRECISION (itype), 1); = build_nonstandard_integer_type (prec, 1);
tree vecutype = get_vectype_for_scalar_type (utype); tree vecutype = get_vectype_for_scalar_type (utype);
tree shift tree shift
= build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype)) = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
- tree_log2 (oprnd1)); - tree_log2 (oprnd1));
tree var = vect_recog_temp_ssa_var (utype, NULL); tree var = vect_recog_temp_ssa_var (utype, NULL);
stmt_vec_info def_stmt_vinfo;
def_stmt def_stmt
= gimple_build_assign_with_ops3 (COND_EXPR, var, cond, = gimple_build_assign_with_ops3 (COND_EXPR, var, cond,
build_int_cst (utype, -1), build_int_cst (utype, -1),
build_int_cst (utype, 0)); build_int_cst (utype, 0));
def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, NULL); def_stmt_vinfo
= new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
set_vinfo_for_stmt (def_stmt, def_stmt_vinfo); set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype; STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
append_pattern_def_seq (stmt_vinfo, def_stmt); append_pattern_def_seq (stmt_vinfo, def_stmt);
...@@ -1746,7 +1758,8 @@ vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **stmts, ...@@ -1746,7 +1758,8 @@ vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **stmts,
= gimple_build_assign_with_ops (RSHIFT_EXPR, var, = gimple_build_assign_with_ops (RSHIFT_EXPR, var,
gimple_assign_lhs (def_stmt), gimple_assign_lhs (def_stmt),
shift); shift);
def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, NULL); def_stmt_vinfo
= new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
set_vinfo_for_stmt (def_stmt, def_stmt_vinfo); set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype; STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
append_pattern_def_seq (stmt_vinfo, def_stmt); append_pattern_def_seq (stmt_vinfo, def_stmt);
...@@ -1758,12 +1771,14 @@ vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **stmts, ...@@ -1758,12 +1771,14 @@ vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **stmts,
} }
def_stmt def_stmt
= gimple_build_assign_with_ops (PLUS_EXPR, = gimple_build_assign_with_ops (PLUS_EXPR,
vect_recog_temp_ssa_var (itype, NULL), vect_recog_temp_ssa_var (itype,
NULL),
oprnd0, signmask); oprnd0, signmask);
append_pattern_def_seq (stmt_vinfo, def_stmt); append_pattern_def_seq (stmt_vinfo, def_stmt);
def_stmt def_stmt
= gimple_build_assign_with_ops (BIT_AND_EXPR, = gimple_build_assign_with_ops (BIT_AND_EXPR,
vect_recog_temp_ssa_var (itype, NULL), vect_recog_temp_ssa_var (itype,
NULL),
gimple_assign_lhs (def_stmt), gimple_assign_lhs (def_stmt),
fold_build2 (MINUS_EXPR, itype, fold_build2 (MINUS_EXPR, itype,
oprnd1, oprnd1,
...@@ -1773,7 +1788,8 @@ vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **stmts, ...@@ -1773,7 +1788,8 @@ vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **stmts,
pattern_stmt pattern_stmt
= gimple_build_assign_with_ops (MINUS_EXPR, = gimple_build_assign_with_ops (MINUS_EXPR,
vect_recog_temp_ssa_var (itype, NULL), vect_recog_temp_ssa_var (itype,
NULL),
gimple_assign_lhs (def_stmt), gimple_assign_lhs (def_stmt),
signmask); signmask);
} }
...@@ -1786,6 +1802,328 @@ vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **stmts, ...@@ -1786,6 +1802,328 @@ vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **stmts,
*type_in = vectype; *type_in = vectype;
*type_out = vectype; *type_out = vectype;
return pattern_stmt; return pattern_stmt;
}
if (!host_integerp (oprnd1, TYPE_UNSIGNED (itype))
|| integer_zerop (oprnd1)
|| prec > HOST_BITS_PER_WIDE_INT)
return NULL;
witype = build_nonstandard_integer_type (prec * 2,
TYPE_UNSIGNED (itype));
vecwtype = get_vectype_for_scalar_type (witype);
if (vecwtype == NULL_TREE)
return NULL;
if (!supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
vecwtype, vectype,
&dummy, &dummy, &dummy_code,
&dummy_code, &dummy_int, &dummy_vec))
return NULL;
STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
if (TYPE_UNSIGNED (itype))
{
unsigned HOST_WIDE_INT mh, ml;
int pre_shift, post_shift;
unsigned HOST_WIDE_INT d = tree_low_cst (oprnd1, 1)
& GET_MODE_MASK (TYPE_MODE (itype));
tree t1, t2, t3, t4, t5, t6;
if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
/* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
return NULL;
/* Find a suitable multiplier and right shift count
instead of multiplying with D. */
mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
/* If the suggested multiplier is more than SIZE bits, we can do better
for even divisors, using an initial right shift. */
if (mh != 0 && (d & 1) == 0)
{
pre_shift = floor_log2 (d & -d);
mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
&ml, &post_shift, &dummy_int);
gcc_assert (!mh);
}
else
pre_shift = 0;
if (mh != 0)
{
if (post_shift - 1 >= prec)
return NULL;
/* t1 = oprnd0 w* ml;
t2 = t1 >> prec;
t3 = (type) t2;
t4 = oprnd0 - t3;
t5 = t4 >> 1;
t6 = t3 + t5;
q = t6 >> (post_shift - 1); */
t1 = vect_recog_temp_ssa_var (witype, NULL);
def_stmt
= gimple_build_assign_with_ops (WIDEN_MULT_EXPR, t1, oprnd0,
build_int_cst (itype, ml));
append_pattern_def_seq (stmt_vinfo, def_stmt);
def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecwtype;
t2 = vect_recog_temp_ssa_var (witype, NULL);
def_stmt
= gimple_build_assign_with_ops (RSHIFT_EXPR, t2, t1,
build_int_cst (itype, prec));
append_pattern_def_seq (stmt_vinfo, def_stmt);
def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecwtype;
t3 = vect_recog_temp_ssa_var (itype, NULL);
def_stmt
= gimple_build_assign_with_ops (NOP_EXPR, t3, t2, NULL_TREE);
append_pattern_def_seq (stmt_vinfo, def_stmt);
t4 = vect_recog_temp_ssa_var (itype, NULL);
def_stmt
= gimple_build_assign_with_ops (MINUS_EXPR, t4, oprnd0, t3);
append_pattern_def_seq (stmt_vinfo, def_stmt);
t5 = vect_recog_temp_ssa_var (itype, NULL);
def_stmt
= gimple_build_assign_with_ops (RSHIFT_EXPR, t5, t4,
integer_one_node);
append_pattern_def_seq (stmt_vinfo, def_stmt);
t6 = vect_recog_temp_ssa_var (itype, NULL);
def_stmt
= gimple_build_assign_with_ops (PLUS_EXPR, t6, t3, t5);
if (post_shift != 1)
{
append_pattern_def_seq (stmt_vinfo, def_stmt);
q = vect_recog_temp_ssa_var (witype, NULL);
pattern_stmt
= gimple_build_assign_with_ops (RSHIFT_EXPR, q, t6,
build_int_cst (itype,
post_shift
- 1));
}
else
{
q = t6;
pattern_stmt = def_stmt;
}
}
else
{
if (pre_shift >= prec || post_shift >= prec)
return NULL;
/* t1 = oprnd0 >> pre_shift;
t2 = t1 w* ml;
t3 = t2 >> (prec + post_shift);
q = (type) t3; */
if (pre_shift)
{
t1 = vect_recog_temp_ssa_var (itype, NULL);
def_stmt
= gimple_build_assign_with_ops (RSHIFT_EXPR, t1, oprnd0,
build_int_cst (NULL,
pre_shift));
append_pattern_def_seq (stmt_vinfo, def_stmt);
}
else
t1 = oprnd0;
t2 = vect_recog_temp_ssa_var (witype, NULL);
def_stmt
= gimple_build_assign_with_ops (WIDEN_MULT_EXPR, t2, t1,
build_int_cst (itype, ml));
append_pattern_def_seq (stmt_vinfo, def_stmt);
def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecwtype;
t3 = vect_recog_temp_ssa_var (witype, NULL);
def_stmt
= gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
build_int_cst (itype, post_shift
+ prec));
append_pattern_def_seq (stmt_vinfo, def_stmt);
def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecwtype;
q = vect_recog_temp_ssa_var (itype, NULL);
pattern_stmt
= gimple_build_assign_with_ops (NOP_EXPR, q, t3, NULL_TREE);
}
}
else
{
unsigned HOST_WIDE_INT ml;
int lgup, post_shift;
HOST_WIDE_INT d = tree_low_cst (oprnd1, 0);
unsigned HOST_WIDE_INT abs_d;
bool add = false;
tree uwitype = NULL, vecuwtype = NULL;
tree t1, t2, t3, t4, t5, t6, t7;
/* Give up for -1. */
if (d == -1)
return NULL;
if (!vect_supportable_shift (RSHIFT_EXPR, witype))
{
uwitype = build_nonstandard_integer_type (prec * 2, 1);
vecuwtype = get_vectype_for_scalar_type (uwitype);
if (vecuwtype == NULL_TREE)
return NULL;
}
/* Since d might be INT_MIN, we have to cast to
unsigned HOST_WIDE_INT before negating to avoid
undefined signed overflow. */
abs_d = (d >= 0
? (unsigned HOST_WIDE_INT) d
: - (unsigned HOST_WIDE_INT) d);
/* n rem d = n rem -d */
if (rhs_code == TRUNC_MOD_EXPR && d < 0)
{
d = abs_d;
oprnd1 = build_int_cst (itype, abs_d);
}
else if (HOST_BITS_PER_WIDE_INT >= prec
&& abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
/* This case is not handled correctly below. */
return NULL;
choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &lgup);
if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
{
add = true;
ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
}
if (post_shift >= prec)
return NULL;
/* t1 = oprnd1 w* ml; */
t1 = vect_recog_temp_ssa_var (witype, NULL);
def_stmt
= gimple_build_assign_with_ops (WIDEN_MULT_EXPR, t1, oprnd0,
build_int_cst (itype, ml));
append_pattern_def_seq (stmt_vinfo, def_stmt);
def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecwtype;
if (vecuwtype != NULL)
{
/* t2 = (uwtype) t1; */
t2 = vect_recog_temp_ssa_var (uwitype, NULL);
def_stmt
= gimple_build_assign_with_ops (NOP_EXPR, t2, t1, NULL_TREE);
append_pattern_def_seq (stmt_vinfo, def_stmt);
def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecuwtype;
}
else
t2 = t1;
/* t3 = t2 >> prec; or t3 = t2 >> (prec + post_shift); */
t3 = vect_recog_temp_ssa_var (vecuwtype ? uwitype : witype, NULL);
def_stmt
= gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
build_int_cst (itype,
prec
+ (!add
&& vecuwtype == NULL
? post_shift : 0)));
append_pattern_def_seq (stmt_vinfo, def_stmt);
def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecuwtype ? vecuwtype : vecwtype;
/* t4 = (type) t3; */
t4 = vect_recog_temp_ssa_var (itype, NULL);
def_stmt
= gimple_build_assign_with_ops (NOP_EXPR, t4, t3, NULL_TREE);
append_pattern_def_seq (stmt_vinfo, def_stmt);
if (add)
{
/* t5 = t4 + oprnd0; */
t5 = vect_recog_temp_ssa_var (itype, NULL);
def_stmt
= gimple_build_assign_with_ops (PLUS_EXPR, t5, t4, oprnd0);
append_pattern_def_seq (stmt_vinfo, def_stmt);
}
else
t5 = t4;
if ((add || vecuwtype != NULL) && post_shift)
{
/* t6 = t5 >> post_shift; */
t6 = vect_recog_temp_ssa_var (itype, NULL);
def_stmt
= gimple_build_assign_with_ops (RSHIFT_EXPR, t6, t5,
build_int_cst (itype, post_shift));
append_pattern_def_seq (stmt_vinfo, def_stmt);
}
else
t6 = t5;
/* t7 = oprnd0 >> (prec - 1); */
t7 = vect_recog_temp_ssa_var (itype, NULL);
def_stmt
= gimple_build_assign_with_ops (RSHIFT_EXPR, t7, oprnd0,
build_int_cst (itype, prec - 1));
append_pattern_def_seq (stmt_vinfo, def_stmt);
/* q = t6 - t7; or q = t7 - t6; */
q = vect_recog_temp_ssa_var (itype, NULL);
pattern_stmt
= gimple_build_assign_with_ops (MINUS_EXPR, q, d < 0 ? t7 : t6,
d < 0 ? t6 : t7);
}
if (rhs_code == TRUNC_MOD_EXPR)
{
tree r, t1;
/* We divided. Now finish by:
t1 = q * oprnd1;
r = oprnd0 - t1; */
append_pattern_def_seq (stmt_vinfo, pattern_stmt);
t1 = vect_recog_temp_ssa_var (itype, NULL);
def_stmt
= gimple_build_assign_with_ops (MULT_EXPR, t1, q, oprnd1);
append_pattern_def_seq (stmt_vinfo, def_stmt);
r = vect_recog_temp_ssa_var (itype, NULL);
pattern_stmt
= gimple_build_assign_with_ops (MINUS_EXPR, r, oprnd0, t1);
}
/* Pattern detected. */
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "vect_recog_divmod_pattern: detected: ");
if (vect_print_dump_info (REPORT_DETAILS))
print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
VEC_safe_push (gimple, heap, *stmts, last_stmt);
*type_in = vectype;
*type_out = vectype;
return pattern_stmt;
} }
/* Function vect_recog_mixed_size_cond_pattern /* Function vect_recog_mixed_size_cond_pattern
......
...@@ -5361,7 +5361,9 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node) ...@@ -5361,7 +5361,9 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
Pattern statement needs to be analyzed instead of the original statement Pattern statement needs to be analyzed instead of the original statement
if the original statement is not relevant. Otherwise, we analyze both if the original statement is not relevant. Otherwise, we analyze both
statements. */ statements. In basic blocks we are called from some SLP instance
traversal, don't analyze pattern stmts instead, the pattern stmts
already will be part of SLP instance. */
pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info); pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
if (!STMT_VINFO_RELEVANT_P (stmt_info) if (!STMT_VINFO_RELEVANT_P (stmt_info)
...@@ -5390,6 +5392,7 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node) ...@@ -5390,6 +5392,7 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
} }
} }
else if (STMT_VINFO_IN_PATTERN_P (stmt_info) else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
&& node == NULL
&& pattern_stmt && pattern_stmt
&& (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt)) && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
|| STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
...@@ -5406,6 +5409,7 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node) ...@@ -5406,6 +5409,7 @@ vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
} }
if (is_pattern_stmt_p (stmt_info) if (is_pattern_stmt_p (stmt_info)
&& node == NULL
&& (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info))) && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
{ {
gimple_stmt_iterator si; gimple_stmt_iterator si;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment