Commit cc453086 by Jakub Jelinek Committed by Jakub Jelinek

re PR middle-end/62263 (Good codegen for bitwise rotate requires code that is…

re PR middle-end/62263 (Good codegen for bitwise rotate requires code that is technically undefined behavior)

	PR middle-end/62263
	PR middle-end/82498
	* tree-ssa-forwprop.c (simplify_rotate): Allow def_arg1[N]
	to be any operand_equal_p operands.  For & (B - 1) require
	B to be power of 2.  Recognize
	(X << (Y & (B - 1))) | (X >> ((-Y) & (B - 1))) and similar patterns.

	* c-c++-common/rotate-5.c (f2): New function.  Move old
	function to ...
	(f4): ... this.  Use 127 instead of 128.
	(f3, f5, f6): New functions.
	(main): Test all f[1-6] functions, with both 0 and 1 as
	second arguments.
	* c-c++-common/rotate-6.c: New test.
	* c-c++-common/rotate-6a.c: New test.
	* c-c++-common/rotate-7.c: New test.
	* c-c++-common/rotate-7a.c: New test.
	* c-c++-common/rotate-8.c: New test.

From-SVN: r253760
parent 6af90df0
2017-10-14 Jakub Jelinek <jakub@redhat.com>
PR middle-end/62263
PR middle-end/82498
* tree-ssa-forwprop.c (simplify_rotate): Allow def_arg1[N]
to be any operand_equal_p operands. For & (B - 1) require
B to be power of 2. Recognize
(X << (Y & (B - 1))) | (X >> ((-Y) & (B - 1))) and similar patterns.
2017-10-14 Uros Bizjak <ubizjak@gmail.com>
PR bootstrap/82553
2017-10-14 Jakub Jelinek <jakub@redhat.com>
PR middle-end/62263
PR middle-end/82498
* c-c++-common/rotate-5.c (f2): New function. Move old
function to ...
(f4): ... this. Use 127 instead of 128.
(f3, f5, f6): New functions.
(main): Test all f[1-6] functions, with both 0 and 1 as
second arguments.
* c-c++-common/rotate-6.c: New test.
* c-c++-common/rotate-6a.c: New test.
* c-c++-common/rotate-7.c: New test.
* c-c++-common/rotate-7a.c: New test.
* c-c++-common/rotate-8.c: New test.
2017-10-14 Hristian Kirtchev <kirtchev@adacore.com>
* gnat.dg/remote_call_iface.ads, gnat.dg/remote_call_iface.adb: New
......
......@@ -15,12 +15,40 @@ f1 (unsigned long long x, unsigned int y)
return (x << y) | (x >> ((-y) & 63));
}
__attribute__((noinline, noclone))
unsigned long long
f2 (unsigned long long x, unsigned int y)
{
return (x << y) + (x >> ((-y) & 63));
}
__attribute__((noinline, noclone))
unsigned long long
f3 (unsigned long long x, unsigned int y)
{
return (x << y) ^ (x >> ((-y) & 63));
}
#if __CHAR_BIT__ * __SIZEOF_INT128__ == 128
__attribute__((noinline, noclone))
unsigned __int128
f2 (unsigned __int128 x, unsigned int y)
f4 (unsigned __int128 x, unsigned int y)
{
return (x << y) | (x >> ((-y) & 127));
}
__attribute__((noinline, noclone))
unsigned __int128
f5 (unsigned __int128 x, unsigned int y)
{
return (x << y) | (x >> ((-y) & 128));
return (x << y) + (x >> ((-y) & 127));
}
__attribute__((noinline, noclone))
unsigned __int128
f6 (unsigned __int128 x, unsigned int y)
{
return (x << y) ^ (x >> ((-y) & 127));
}
#endif
#endif
......@@ -31,12 +59,45 @@ main ()
#if __CHAR_BIT__ * __SIZEOF_LONG_LONG__ == 64
if (f1 (0x123456789abcdef0ULL, 0) != 0x123456789abcdef0ULL)
abort ();
if (f2 (0x123456789abcdef0ULL, 0) != 0x2468acf13579bde0ULL)
abort ();
if (f3 (0x123456789abcdef0ULL, 0) != 0)
abort ();
if (f1 (0x123456789abcdef0ULL, 1) != 0x2468acf13579bde0ULL)
abort ();
if (f2 (0x123456789abcdef0ULL, 1) != 0x2468acf13579bde0ULL)
abort ();
if (f3 (0x123456789abcdef0ULL, 1) != 0x2468acf13579bde0ULL)
abort ();
#if __CHAR_BIT__ * __SIZEOF_INT128__ == 128
if (f2 ((((unsigned __int128) 0x123456789abcdef0ULL) << 64)
if (f4 ((((unsigned __int128) 0x123456789abcdef0ULL) << 64)
| 0x0fedcba987654321ULL, 0)
!= ((((unsigned __int128) 0x123456789abcdef0ULL) << 64)
| 0x0fedcba987654321ULL))
abort ();
if (f5 ((((unsigned __int128) 0x123456789abcdef0ULL) << 64)
| 0x0fedcba987654321ULL, 0)
!= ((((unsigned __int128) 0x2468acf13579bde0ULL) << 64)
| 0x1fdb97530eca8642ULL))
abort ();
if (f6 ((((unsigned __int128) 0x123456789abcdef0ULL) << 64)
| 0x0fedcba987654321ULL, 0) != 0)
abort ();
if (f4 ((((unsigned __int128) 0x123456789abcdef0ULL) << 64)
| 0x0fedcba987654321ULL, 1)
!= ((((unsigned __int128) 0x2468acf13579bde0ULL) << 64)
| 0x1fdb97530eca8642ULL))
abort ();
if (f5 ((((unsigned __int128) 0x123456789abcdef0ULL) << 64)
| 0x0fedcba987654321ULL, 1)
!= ((((unsigned __int128) 0x2468acf13579bde0ULL) << 64)
| 0x1fdb97530eca8642ULL))
abort ();
if (f6 ((((unsigned __int128) 0x123456789abcdef0ULL) << 64)
| 0x0fedcba987654321ULL, 1)
!= ((((unsigned __int128) 0x2468acf13579bde0ULL) << 64)
| 0x1fdb97530eca8642ULL))
abort ();
#endif
#endif
return 0;
......
/* { dg-do run } */
/* { dg-options "-O2 -Wno-overflow" } */
#define ROTATE_N "rotate-6.c"
#include "rotate-1a.c"
/* { dg-do run } */
/* { dg-options "-O2 -Wno-overflow" } */
#define ROTATE_N "rotate-7.c"
#include "rotate-1a.c"
/* PR middle-end/62263 */
/* PR middle-end/82498 */
/* { dg-do compile } */
/* { dg-options "-O2 -fno-ipa-icf -fdump-tree-optimized" } */
/* { dg-final { scan-tree-dump-times "r\[<>]\[<>]" 23 "optimized" } } */
unsigned int
f1 (unsigned int x, unsigned char y)
{
y %= __CHAR_BIT__ * __SIZEOF_INT__;
return (x << y) | (x >> (__CHAR_BIT__ * __SIZEOF_INT__ - y));
}
unsigned int
f2 (unsigned int x, signed char y)
{
y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
return (x << y) | (x >> (__CHAR_BIT__ * __SIZEOF_INT__ - y));
}
unsigned int
f3 (unsigned int x, unsigned char y)
{
return (x << (y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1))) | (x >> (__CHAR_BIT__ * __SIZEOF_INT__ - (y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1))));
}
unsigned int
f4 (unsigned int x, unsigned char y)
{
y = y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1);
return y ? (x << y) | (x >> (__CHAR_BIT__ * __SIZEOF_INT__ - y)) : x;
}
unsigned int
f5 (unsigned int x, unsigned char y)
{
y = y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1);
return (x << y) | (x >> ((__CHAR_BIT__ * __SIZEOF_INT__ - y) & (__CHAR_BIT__ * __SIZEOF_INT__ - 1)));
}
unsigned int
f6 (unsigned int x, unsigned char y)
{
return (x << (y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1))) | (x >> ((__CHAR_BIT__ * __SIZEOF_INT__ - (y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1))) & (__CHAR_BIT__ * __SIZEOF_INT__ - 1)));
}
unsigned int
f7 (unsigned int x, unsigned char y)
{
return (x << (y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1))) | (x >> ((__CHAR_BIT__ * __SIZEOF_INT__ - y) & (__CHAR_BIT__ * __SIZEOF_INT__ - 1)));
}
unsigned int
f8 (unsigned int x, unsigned char y)
{
return (x << (y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1))) | (x >> ((-y) & (__CHAR_BIT__ * __SIZEOF_INT__ - 1)));
}
unsigned int
f9 (unsigned int x, int y)
{
return (0x12345678U << (y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1))) | (0x12345678U >> (-y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1)));
}
unsigned int
f10 (unsigned int x, int y)
{
return (0x12345678U >> (-y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1))) | (0x12345678U << (y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1)));
}
unsigned int
f11 (unsigned int x, int y)
{
return (0x12345678U >> (y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1))) | (0x12345678U << (-y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1)));
}
unsigned int
f12 (unsigned int x, int y)
{
return (0x12345678U << (-y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1))) | (0x12345678U >> (y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1)));
}
unsigned
f13 (unsigned x, unsigned char y)
{
if (y == 0)
return x;
y %= __CHAR_BIT__ * __SIZEOF_INT__;
return (x << y) | (x >> (__CHAR_BIT__ * __SIZEOF_INT__ - y));
}
unsigned
f14 (unsigned x, unsigned y)
{
if (y == 0)
return x;
y %= __CHAR_BIT__ * __SIZEOF_INT__;
return (x << y) | (x >> (__CHAR_BIT__ * __SIZEOF_INT__ - y));
}
unsigned
f15 (unsigned x, unsigned short y)
{
if (y == 0)
return x;
y %= __CHAR_BIT__ * __SIZEOF_INT__;
return (x << y) | (x >> (__CHAR_BIT__ * __SIZEOF_INT__ - y));
}
unsigned
f16 (unsigned x, unsigned char y)
{
y %= __CHAR_BIT__ * __SIZEOF_INT__;
if (y == 0)
return x;
return (x << y) | (x >> (__CHAR_BIT__ * __SIZEOF_INT__ - y));
}
unsigned
f17 (unsigned x, unsigned y)
{
y %= __CHAR_BIT__ * __SIZEOF_INT__;
if (y == 0)
return x;
return (x << y) | (x >> (__CHAR_BIT__ * __SIZEOF_INT__ - y));
}
unsigned
f18 (unsigned x, unsigned short y)
{
y %= __CHAR_BIT__ * __SIZEOF_INT__;
if (y == 0)
return x;
return (x << y) | (x >> (__CHAR_BIT__ * __SIZEOF_INT__ - y));
}
unsigned
f19 (unsigned x, unsigned char y)
{
y %= __CHAR_BIT__ * __SIZEOF_INT__;
return (x << y) | (x >> (((unsigned char) -y) % (__CHAR_BIT__ * __SIZEOF_INT__)));
}
unsigned
f20 (unsigned x, unsigned int y)
{
y %= __CHAR_BIT__ * __SIZEOF_INT__;
return (x << y) | (x >> (-y % (__CHAR_BIT__ * __SIZEOF_INT__)));
}
unsigned
f21 (unsigned x, unsigned short y)
{
y %= __CHAR_BIT__ * __SIZEOF_INT__;
return (x << y) | (x >> (((unsigned short) -y) % (__CHAR_BIT__ * __SIZEOF_INT__)));
}
unsigned
f22 (unsigned x, unsigned char y)
{
y %= __CHAR_BIT__ * __SIZEOF_INT__;
return (x << y) | (x >> (-y & ((__CHAR_BIT__ * __SIZEOF_INT__) - 1)));
}
unsigned
f23 (unsigned x, unsigned short y)
{
y %= __CHAR_BIT__ * __SIZEOF_INT__;
return (x << y) | (x >> (-y & ((__CHAR_BIT__ * __SIZEOF_INT__) - 1)));
}
......@@ -1491,9 +1491,14 @@ defcodefor_name (tree name, enum tree_code *code, tree *arg1, tree *arg2)
applied, otherwise return false.
We are looking for X with unsigned type T with bitsize B, OP being
+, | or ^, some type T2 wider than T and
+, | or ^, some type T2 wider than T. For:
(X << CNT1) OP (X >> CNT2) iff CNT1 + CNT2 == B
((T) ((T2) X << CNT1)) OP ((T) ((T2) X >> CNT2)) iff CNT1 + CNT2 == B
transform these into:
X r<< CNT1
Or for:
(X << Y) OP (X >> (B - Y))
(X << (int) Y) OP (X >> (int) (B - Y))
((T) ((T2) X << Y)) OP ((T) ((T2) X >> (B - Y)))
......@@ -1503,12 +1508,23 @@ defcodefor_name (tree name, enum tree_code *code, tree *arg1, tree *arg2)
((T) ((T2) X << Y)) | ((T) ((T2) X >> ((-Y) & (B - 1))))
((T) ((T2) X << (int) Y)) | ((T) ((T2) X >> (int) ((-Y) & (B - 1))))
and transform these into:
X r<< CNT1
transform these into:
X r<< Y
Or for:
(X << (Y & (B - 1))) | (X >> ((-Y) & (B - 1)))
(X << (int) (Y & (B - 1))) | (X >> (int) ((-Y) & (B - 1)))
((T) ((T2) X << (Y & (B - 1)))) | ((T) ((T2) X >> ((-Y) & (B - 1))))
((T) ((T2) X << (int) (Y & (B - 1)))) \
| ((T) ((T2) X >> (int) ((-Y) & (B - 1))))
transform these into:
X r<< (Y & (B - 1))
Note, in the patterns with T2 type, the type of OP operands
might be even a signed type, but should have precision B. */
might be even a signed type, but should have precision B.
Expressions with & (B - 1) should be recognized only if B is
a power of 2. */
static bool
simplify_rotate (gimple_stmt_iterator *gsi)
......@@ -1578,7 +1594,9 @@ simplify_rotate (gimple_stmt_iterator *gsi)
def_arg1[i] = tem;
}
/* Both shifts have to use the same first operand. */
if (TREE_CODE (def_arg1[0]) != SSA_NAME || def_arg1[0] != def_arg1[1])
if (!operand_equal_for_phi_arg_p (def_arg1[0], def_arg1[1])
|| !types_compatible_p (TREE_TYPE (def_arg1[0]),
TREE_TYPE (def_arg1[1])))
return false;
if (!TYPE_UNSIGNED (TREE_TYPE (def_arg1[0])))
return false;
......@@ -1649,8 +1667,10 @@ simplify_rotate (gimple_stmt_iterator *gsi)
/* The above sequence isn't safe for Y being 0,
because then one of the shifts triggers undefined behavior.
This alternative is safe even for rotation count of 0.
One shift count is Y and the other (-Y) & (B - 1). */
One shift count is Y and the other (-Y) & (B - 1).
Or one shift count is Y & (B - 1) and the other (-Y) & (B - 1). */
else if (cdef_code[i] == BIT_AND_EXPR
&& pow2p_hwi (TYPE_PRECISION (rtype))
&& tree_fits_shwi_p (cdef_arg2[i])
&& tree_to_shwi (cdef_arg2[i])
== TYPE_PRECISION (rtype) - 1
......@@ -1675,18 +1695,51 @@ simplify_rotate (gimple_stmt_iterator *gsi)
rotcnt = tem;
break;
}
defcodefor_name (tem, &code, &tem, NULL);
tree tem2;
defcodefor_name (tem, &code, &tem2, NULL);
if (CONVERT_EXPR_CODE_P (code)
&& INTEGRAL_TYPE_P (TREE_TYPE (tem))
&& TYPE_PRECISION (TREE_TYPE (tem))
&& INTEGRAL_TYPE_P (TREE_TYPE (tem2))
&& TYPE_PRECISION (TREE_TYPE (tem2))
> floor_log2 (TYPE_PRECISION (rtype))
&& type_has_mode_precision_p (TREE_TYPE (tem))
&& (tem == def_arg2[1 - i]
|| tem == def_arg2_alt[1 - i]))
&& type_has_mode_precision_p (TREE_TYPE (tem2)))
{
rotcnt = tem;
if (tem2 == def_arg2[1 - i]
|| tem2 == def_arg2_alt[1 - i])
{
rotcnt = tem2;
break;
}
}
else
tem2 = NULL_TREE;
if (cdef_code[1 - i] == BIT_AND_EXPR
&& tree_fits_shwi_p (cdef_arg2[1 - i])
&& tree_to_shwi (cdef_arg2[1 - i])
== TYPE_PRECISION (rtype) - 1
&& TREE_CODE (cdef_arg1[1 - i]) == SSA_NAME)
{
if (tem == cdef_arg1[1 - i]
|| tem2 == cdef_arg1[1 - i])
{
rotcnt = def_arg2[1 - i];
break;
}
tree tem3;
defcodefor_name (cdef_arg1[1 - i], &code, &tem3, NULL);
if (CONVERT_EXPR_CODE_P (code)
&& INTEGRAL_TYPE_P (TREE_TYPE (tem3))
&& TYPE_PRECISION (TREE_TYPE (tem3))
> floor_log2 (TYPE_PRECISION (rtype))
&& type_has_mode_precision_p (TREE_TYPE (tem3)))
{
if (tem == tem3 || tem2 == tem3)
{
rotcnt = def_arg2[1 - i];
break;
}
}
}
}
}
if (rotcnt == NULL_TREE)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment