re PR middle-end/62263 (Good codegen for bitwise rotate requires code that is…

re PR middle-end/62263 (Good codegen for bitwise rotate requires code that is technically undefined behavior) PR middle-end/62263 PR middle-end/82498 * tree-ssa-forwprop.c (simplify_rotate): Allow def_arg1[N] to be any operand_equal_p operands. For & (B - 1) require B to be power of 2. Recognize (X << (Y & (B - 1))) | (X >> ((-Y) & (B - 1))) and similar patterns. * c-c++-common/rotate-5.c (f2): New function. Move old function to ... (f4): ... this. Use 127 instead of 128. (f3, f5, f6): New functions. (main): Test all f[1-6] functions, with both 0 and 1 as second arguments. * c-c++-common/rotate-6.c: New test. * c-c++-common/rotate-6a.c: New test. * c-c++-common/rotate-7.c: New test. * c-c++-common/rotate-7a.c: New test. * c-c++-common/rotate-8.c: New test. From-SVN: r253760

re PR middle-end/62263 (Good codegen for bitwise rotate requires code that is…
re PR middle-end/62263 (Good codegen for bitwise rotate requires code that is technically undefined behavior) PR middle-end/62263 PR middle-end/82498 * tree-ssa-forwprop.c (simplify_rotate): Allow def_arg1[N] to be any operand_equal_p operands. For & (B - 1) require B to be power of 2. Recognize (X << (Y & (B - 1))) | (X >> ((-Y) & (B - 1))) and similar patterns. * c-c++-common/rotate-5.c (f2): New function. Move old function to ... (f4): ... this. Use 127 instead of 128. (f3, f5, f6): New functions. (main): Test all f[1-6] functions, with both 0 and 1 as second arguments. * c-c++-common/rotate-6.c: New test. * c-c++-common/rotate-6a.c: New test. * c-c++-common/rotate-7.c: New test. * c-c++-common/rotate-7a.c: New test. * c-c++-common/rotate-8.c: New test. From-SVN: r253760
cc453086 · Jakub Jelinek · Jakub Jelinek · 6af90df0 · cc453086 · cc453086
Commit cc453086 authored Oct 14, 2017 by Jakub Jelinek Committed by Jakub Jelinek Oct 14, 2017
9 changed files
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
+2017-10-14  Jakub Jelinek  <jakub@redhat.com>
+
+	PR middle-end/62263
+	PR middle-end/82498
+	* tree-ssa-forwprop.c (simplify_rotate): Allow def_arg1[N]
+	to be any operand_equal_p operands.  For & (B - 1) require
+	B to be power of 2.  Recognize
+	(X << (Y & (B - 1))) | (X >> ((-Y) & (B - 1))) and similar patterns.
+
 2017-10-14  Uros Bizjak  <ubizjak@gmail.com>

 	PR bootstrap/82553
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
+2017-10-14  Jakub Jelinek  <jakub@redhat.com>
+
+	PR middle-end/62263
+	PR middle-end/82498
+	* c-c++-common/rotate-5.c (f2): New function.  Move old
+	function to ...
+	(f4): ... this.  Use 127 instead of 128.
+	(f3, f5, f6): New functions.
+	(main): Test all f[1-6] functions, with both 0 and 1 as
+	second arguments.
+	* c-c++-common/rotate-6.c: New test.
+	* c-c++-common/rotate-6a.c: New test.
+	* c-c++-common/rotate-7.c: New test.
+	* c-c++-common/rotate-7a.c: New test.
+	* c-c++-common/rotate-8.c: New test.
+
 2017-10-14  Hristian Kirtchev  <kirtchev@adacore.com>

 	* gnat.dg/remote_call_iface.ads, gnat.dg/remote_call_iface.adb: New

--- a/gcc/testsuite/c-c++-common/rotate-5.c
+++ b/gcc/testsuite/c-c++-common/rotate-5.c
@@ -15,12 +15,40 @@ f1 (unsigned long long x, unsigned int y)
  return (x << y) | (x >> ((-y) & 63));
 }

+__attribute__((noinline, noclone))
+unsigned long long
+f2 (unsigned long long x, unsigned int y)
+{
+  return (x << y) + (x >> ((-y) & 63));
+}
+
+__attribute__((noinline, noclone))
+unsigned long long
+f3 (unsigned long long x, unsigned int y)
+{
+  return (x << y) ^ (x >> ((-y) & 63));
+}
+
 #if __CHAR_BIT__ * __SIZEOF_INT128__ == 128
 __attribute__((noinline, noclone))
 unsigned __int128
-f2 (unsigned __int128 x, unsigned int y)
+f4 (unsigned __int128 x, unsigned int y)
+{
+  return (x << y) | (x >> ((-y) & 127));
+}
+
+__attribute__((noinline, noclone))
+unsigned __int128
+f5 (unsigned __int128 x, unsigned int y)
 {
-  return (x << y) | (x >> ((-y) & 128));
+  return (x << y) + (x >> ((-y) & 127));
+}
+
+__attribute__((noinline, noclone))
+unsigned __int128
+f6 (unsigned __int128 x, unsigned int y)
+{
+  return (x << y) ^ (x >> ((-y) & 127));
 }
 #endif
 #endif
@@ -31,12 +59,45 @@ main ()
 #if __CHAR_BIT__ * __SIZEOF_LONG_LONG__ == 64
  if (f1 (0x123456789abcdef0ULL, 0) != 0x123456789abcdef0ULL)
    abort ();
+  if (f2 (0x123456789abcdef0ULL, 0) != 0x2468acf13579bde0ULL)
+    abort ();
+  if (f3 (0x123456789abcdef0ULL, 0) != 0)
+    abort ();
+  if (f1 (0x123456789abcdef0ULL, 1) != 0x2468acf13579bde0ULL)
+    abort ();
+  if (f2 (0x123456789abcdef0ULL, 1) != 0x2468acf13579bde0ULL)
+    abort ();
+  if (f3 (0x123456789abcdef0ULL, 1) != 0x2468acf13579bde0ULL)
+    abort ();
 #if __CHAR_BIT__ * __SIZEOF_INT128__ == 128
-  if (f2 ((((unsigned __int128) 0x123456789abcdef0ULL) << 64)
+  if (f4 ((((unsigned __int128) 0x123456789abcdef0ULL) << 64)
 	  | 0x0fedcba987654321ULL, 0)
      != ((((unsigned __int128) 0x123456789abcdef0ULL) << 64)
          | 0x0fedcba987654321ULL))
    abort ();
+  if (f5 ((((unsigned __int128) 0x123456789abcdef0ULL) << 64)
+	  | 0x0fedcba987654321ULL, 0)
+      != ((((unsigned __int128) 0x2468acf13579bde0ULL) << 64)
+          | 0x1fdb97530eca8642ULL))
+    abort ();
+  if (f6 ((((unsigned __int128) 0x123456789abcdef0ULL) << 64)
+	  | 0x0fedcba987654321ULL, 0) != 0)
+    abort ();
+  if (f4 ((((unsigned __int128) 0x123456789abcdef0ULL) << 64)
+	  | 0x0fedcba987654321ULL, 1)
+      != ((((unsigned __int128) 0x2468acf13579bde0ULL) << 64)
+          | 0x1fdb97530eca8642ULL))
+    abort ();
+  if (f5 ((((unsigned __int128) 0x123456789abcdef0ULL) << 64)
+	  | 0x0fedcba987654321ULL, 1)
+      != ((((unsigned __int128) 0x2468acf13579bde0ULL) << 64)
+          | 0x1fdb97530eca8642ULL))
+    abort ();
+  if (f6 ((((unsigned __int128) 0x123456789abcdef0ULL) << 64)
+	  | 0x0fedcba987654321ULL, 1)
+      != ((((unsigned __int128) 0x2468acf13579bde0ULL) << 64)
+          | 0x1fdb97530eca8642ULL))
+    abort ();
 #endif
 #endif
  return 0;

--- a/gcc/testsuite/c-c++-common/rotate-6.c
+++ b/gcc/testsuite/c-c++-common/rotate-6.c
--- a/gcc/testsuite/c-c++-common/rotate-6a.c
+++ b/gcc/testsuite/c-c++-common/rotate-6a.c
+/* { dg-do run } */
+/* { dg-options "-O2 -Wno-overflow" } */
+
+#define ROTATE_N "rotate-6.c"
+
+#include "rotate-1a.c"
--- a/gcc/testsuite/c-c++-common/rotate-7.c
+++ b/gcc/testsuite/c-c++-common/rotate-7.c
--- a/gcc/testsuite/c-c++-common/rotate-7a.c
+++ b/gcc/testsuite/c-c++-common/rotate-7a.c
+/* { dg-do run } */
+/* { dg-options "-O2 -Wno-overflow" } */
+
+#define ROTATE_N "rotate-7.c"
+
+#include "rotate-1a.c"
--- a/gcc/testsuite/c-c++-common/rotate-8.c
+++ b/gcc/testsuite/c-c++-common/rotate-8.c
+/* PR middle-end/62263 */
+/* PR middle-end/82498 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-ipa-icf -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times "r\[<>]\[<>]" 23 "optimized" } } */
+
+unsigned int
+f1 (unsigned int x, unsigned char y)
+{
+  y %= __CHAR_BIT__ * __SIZEOF_INT__;
+  return (x << y) | (x >> (__CHAR_BIT__ * __SIZEOF_INT__ - y));
+}
+
+unsigned int
+f2 (unsigned int x, signed char y)
+{
+  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+  return (x << y) | (x >> (__CHAR_BIT__ * __SIZEOF_INT__ - y));
+}
+
+unsigned int
+f3 (unsigned int x, unsigned char y)
+{
+  return (x << (y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1))) | (x >> (__CHAR_BIT__ * __SIZEOF_INT__ - (y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1))));
+}
+
+unsigned int
+f4 (unsigned int x, unsigned char y)
+{
+  y = y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1);
+  return y ? (x << y) | (x >> (__CHAR_BIT__ * __SIZEOF_INT__ - y)) : x;
+}
+
+unsigned int
+f5 (unsigned int x, unsigned char y)
+{
+  y = y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1);
+  return (x << y) | (x >> ((__CHAR_BIT__ * __SIZEOF_INT__ - y) & (__CHAR_BIT__ * __SIZEOF_INT__ - 1)));
+}
+
+unsigned int
+f6 (unsigned int x, unsigned char y)
+{
+  return (x << (y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1))) | (x >> ((__CHAR_BIT__ * __SIZEOF_INT__ - (y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1))) & (__CHAR_BIT__ * __SIZEOF_INT__ - 1)));
+}
+
+unsigned int
+f7 (unsigned int x, unsigned char y)
+{
+  return (x << (y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1))) | (x >> ((__CHAR_BIT__ * __SIZEOF_INT__ - y) & (__CHAR_BIT__ * __SIZEOF_INT__ - 1)));
+}
+
+unsigned int
+f8 (unsigned int x, unsigned char y)
+{
+  return (x << (y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1))) | (x >> ((-y) & (__CHAR_BIT__ * __SIZEOF_INT__ - 1)));
+}
+
+unsigned int
+f9 (unsigned int x, int y)
+{
+  return (0x12345678U << (y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1))) | (0x12345678U >> (-y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1)));
+}
+
+unsigned int
+f10 (unsigned int x, int y)
+{
+  return (0x12345678U >> (-y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1))) | (0x12345678U << (y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1)));
+}
+
+unsigned int
+f11 (unsigned int x, int y)
+{
+  return (0x12345678U >> (y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1))) | (0x12345678U << (-y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1)));
+}
+
+unsigned int
+f12 (unsigned int x, int y)
+{
+  return (0x12345678U << (-y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1))) | (0x12345678U >> (y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1)));
+}
+
+unsigned
+f13 (unsigned x, unsigned char y)
+{
+  if (y == 0)
+    return x;
+  y %= __CHAR_BIT__ * __SIZEOF_INT__;
+  return (x << y) | (x >> (__CHAR_BIT__ * __SIZEOF_INT__ - y));
+}
+
+unsigned
+f14 (unsigned x, unsigned y)
+{
+  if (y == 0)
+    return x;
+  y %= __CHAR_BIT__ * __SIZEOF_INT__;
+  return (x << y) | (x >> (__CHAR_BIT__ * __SIZEOF_INT__ - y));
+}
+
+unsigned
+f15 (unsigned x, unsigned short y)
+{
+  if (y == 0)
+    return x;
+  y %= __CHAR_BIT__ * __SIZEOF_INT__;
+  return (x << y) | (x >> (__CHAR_BIT__ * __SIZEOF_INT__ - y));
+}
+
+unsigned
+f16 (unsigned x, unsigned char y)
+{
+  y %= __CHAR_BIT__ * __SIZEOF_INT__;
+  if (y == 0)
+    return x;
+  return (x << y) | (x >> (__CHAR_BIT__ * __SIZEOF_INT__ - y));
+}
+
+unsigned
+f17 (unsigned x, unsigned y)
+{
+  y %= __CHAR_BIT__ * __SIZEOF_INT__;
+  if (y == 0)
+    return x;
+  return (x << y) | (x >> (__CHAR_BIT__ * __SIZEOF_INT__ - y));
+}
+
+unsigned
+f18 (unsigned x, unsigned short y)
+{
+  y %= __CHAR_BIT__ * __SIZEOF_INT__;
+  if (y == 0)
+    return x;
+  return (x << y) | (x >> (__CHAR_BIT__ * __SIZEOF_INT__ - y));
+}
+
+unsigned
+f19 (unsigned x, unsigned char y)
+{
+  y %= __CHAR_BIT__ * __SIZEOF_INT__;
+  return (x << y) | (x >> (((unsigned char) -y) % (__CHAR_BIT__ * __SIZEOF_INT__)));
+}
+
+unsigned
+f20 (unsigned x, unsigned int y)
+{
+  y %= __CHAR_BIT__ * __SIZEOF_INT__;
+  return (x << y) | (x >> (-y % (__CHAR_BIT__ * __SIZEOF_INT__)));
+}
+
+unsigned
+f21 (unsigned x, unsigned short y)
+{
+  y %= __CHAR_BIT__ * __SIZEOF_INT__;
+  return (x << y) | (x >> (((unsigned short) -y) % (__CHAR_BIT__ * __SIZEOF_INT__)));
+}
+
+unsigned
+f22 (unsigned x, unsigned char y)
+{
+  y %= __CHAR_BIT__ * __SIZEOF_INT__;
+  return (x << y) | (x >> (-y & ((__CHAR_BIT__ * __SIZEOF_INT__) - 1)));
+}
+
+unsigned
+f23 (unsigned x, unsigned short y)
+{
+  y %= __CHAR_BIT__ * __SIZEOF_INT__;
+  return (x << y) | (x >> (-y & ((__CHAR_BIT__ * __SIZEOF_INT__) - 1)));
+}
--- a/gcc/tree-ssa-forwprop.c
+++ b/gcc/tree-ssa-forwprop.c
@@ -1491,9 +1491,14 @@ defcodefor_name (tree name, enum tree_code *code, tree *arg1, tree *arg2)
   applied, otherwise return false.

   We are looking for X with unsigned type T with bitsize B, OP being
-   +, | or ^, some type T2 wider than T and
+   +, | or ^, some type T2 wider than T.  For:
   (X << CNT1) OP (X >> CNT2)				iff CNT1 + CNT2 == B
   ((T) ((T2) X << CNT1)) OP ((T) ((T2) X >> CNT2))	iff CNT1 + CNT2 == B
+
+   transform these into:
+   X r<< CNT1
+
+   Or for:
   (X << Y) OP (X >> (B - Y))
   (X << (int) Y) OP (X >> (int) (B - Y))
   ((T) ((T2) X << Y)) OP ((T) ((T2) X >> (B - Y)))
@@ -1503,12 +1508,23 @@ defcodefor_name (tree name, enum tree_code *code, tree *arg1, tree *arg2)
   ((T) ((T2) X << Y)) | ((T) ((T2) X >> ((-Y) & (B - 1))))
   ((T) ((T2) X << (int) Y)) | ((T) ((T2) X >> (int) ((-Y) & (B - 1))))

-   and transform these into:
-   X r<< CNT1
+   transform these into:
   X r<< Y

+   Or for:
+   (X << (Y & (B - 1))) | (X >> ((-Y) & (B - 1)))
+   (X << (int) (Y & (B - 1))) | (X >> (int) ((-Y) & (B - 1)))
+   ((T) ((T2) X << (Y & (B - 1)))) | ((T) ((T2) X >> ((-Y) & (B - 1))))
+   ((T) ((T2) X << (int) (Y & (B - 1)))) \
+     | ((T) ((T2) X >> (int) ((-Y) & (B - 1))))
+
+   transform these into:
+   X r<< (Y & (B - 1))
+
   Note, in the patterns with T2 type, the type of OP operands
-   might be even a signed type, but should have precision B.  */
+   might be even a signed type, but should have precision B.
+   Expressions with & (B - 1) should be recognized only if B is
+   a power of 2.  */

 static bool
 simplify_rotate (gimple_stmt_iterator *gsi)
@@ -1578,7 +1594,9 @@ simplify_rotate (gimple_stmt_iterator *gsi)
 	def_arg1[i] = tem;
      }
  /* Both shifts have to use the same first operand.  */
-  if (TREE_CODE (def_arg1[0]) != SSA_NAME || def_arg1[0] != def_arg1[1])
+  if (!operand_equal_for_phi_arg_p (def_arg1[0], def_arg1[1])
+      || !types_compatible_p (TREE_TYPE (def_arg1[0]),
+			      TREE_TYPE (def_arg1[1])))
    return false;
  if (!TYPE_UNSIGNED (TREE_TYPE (def_arg1[0])))
    return false;
@@ -1649,8 +1667,10 @@ simplify_rotate (gimple_stmt_iterator *gsi)
 	/* The above sequence isn't safe for Y being 0,
 	   because then one of the shifts triggers undefined behavior.
 	   This alternative is safe even for rotation count of 0.
-	   One shift count is Y and the other (-Y) & (B - 1).  */
+	   One shift count is Y and the other (-Y) & (B - 1).
+	   Or one shift count is Y & (B - 1) and the other (-Y) & (B - 1).  */
 	else if (cdef_code[i] == BIT_AND_EXPR
+		 && pow2p_hwi (TYPE_PRECISION (rtype))
 		 && tree_fits_shwi_p (cdef_arg2[i])
 		 && tree_to_shwi (cdef_arg2[i])
 		    == TYPE_PRECISION (rtype) - 1
@@ -1675,18 +1695,51 @@ simplify_rotate (gimple_stmt_iterator *gsi)
 		    rotcnt = tem;
 		    break;
 		  }
-		defcodefor_name (tem, &code, &tem, NULL);
+		tree tem2;
+		defcodefor_name (tem, &code, &tem2, NULL);
 		if (CONVERT_EXPR_CODE_P (code)
-		    && INTEGRAL_TYPE_P (TREE_TYPE (tem))
-		    && TYPE_PRECISION (TREE_TYPE (tem))
+		    && INTEGRAL_TYPE_P (TREE_TYPE (tem2))
+		    && TYPE_PRECISION (TREE_TYPE (tem2))
 		       > floor_log2 (TYPE_PRECISION (rtype))
-		    && type_has_mode_precision_p (TREE_TYPE (tem))
-		    && (tem == def_arg2[1 - i]
-			|| tem == def_arg2_alt[1 - i]))
+		    && type_has_mode_precision_p (TREE_TYPE (tem2)))
 		  {
-		    rotcnt = tem;
+		    if (tem2 == def_arg2[1 - i]
+			|| tem2 == def_arg2_alt[1 - i])
+		      {
+			rotcnt = tem2;
+			break;
+		      }
+		  }
+		else
+		  tem2 = NULL_TREE;
+
+		if (cdef_code[1 - i] == BIT_AND_EXPR
+		    && tree_fits_shwi_p (cdef_arg2[1 - i])
+		    && tree_to_shwi (cdef_arg2[1 - i])
+		       == TYPE_PRECISION (rtype) - 1
+		    && TREE_CODE (cdef_arg1[1 - i]) == SSA_NAME)
+		  {
+		    if (tem == cdef_arg1[1 - i]
+			|| tem2 == cdef_arg1[1 - i])
+		      {
+			rotcnt = def_arg2[1 - i];
 			break;
 		      }
+		    tree tem3;
+		    defcodefor_name (cdef_arg1[1 - i], &code, &tem3, NULL);
+		    if (CONVERT_EXPR_CODE_P (code)
+			&& INTEGRAL_TYPE_P (TREE_TYPE (tem3))
+			&& TYPE_PRECISION (TREE_TYPE (tem3))
+			   > floor_log2 (TYPE_PRECISION (rtype))
+			&& type_has_mode_precision_p (TREE_TYPE (tem3)))
+		      {
+			if (tem == tem3 || tem2 == tem3)
+			  {
+			    rotcnt = def_arg2[1 - i];
+			    break;
+			  }
+		      }
+		  }
 	      }
 	  }
      if (rotcnt == NULL_TREE)