re PR target/83210 (__builtin_mul_overflow() generates suboptimal code when…

re PR target/83210 (__builtin_mul_overflow() generates suboptimal code when exactly one argument is the constant 2) PR target/83210 * internal-fn.c (expand_mul_overflow): Optimize unsigned multiplication by power of 2 constant into two shifts + comparison. * gcc.target/i386/pr83210.c: New test. From-SVN: r255269

re PR target/83210 (__builtin_mul_overflow() generates suboptimal code when…
re PR target/83210 (__builtin_mul_overflow() generates suboptimal code when exactly one argument is the constant 2) PR target/83210 * internal-fn.c (expand_mul_overflow): Optimize unsigned multiplication by power of 2 constant into two shifts + comparison. * gcc.target/i386/pr83210.c: New test. From-SVN: r255269
89b1427f · Jakub Jelinek · 7c080ade · 89b1427f · 89b1427f · 89b1427f
Commit 89b1427f authored Nov 30, 2017 by Jakub Jelinek
Hide whitespace changes
Inline Side-by-side

Showing with 108 additions and 1 deletions

gcc/ChangeLog
+7 -1

gcc/internal-fn.c
+43 -0

gcc/testsuite/ChangeLog
+5 -0

gcc/testsuite/gcc.target/i386/pr83210.c
+53 -0

No files found.
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
+2017-11-30  Jakub Jelinek  <jakub@redhat.com>
+	PR target/83210
+	* internal-fn.c (expand_mul_overflow): Optimize unsigned
+	multiplication by power of 2 constant into two shifts + comparison.
 2017-11-30  Jan Hubicka  <hubicka@ucw.cz>
 	PR target/81616
-	* x86-tnue-costs.h (generic_cost): Revise for modern CPUs
+	* config/i386/x86-tune-costs.h (generic_cost): Revise for modern CPUs.
 2017-11-30  Richard Biener  <rguenther@suse.de>
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -1462,6 +1462,49 @@ expand_mul_overflow (location_t loc, tree lhs, tree arg0, tree arg1,
  type = build_nonstandard_integer_type (GET_MODE_PRECISION (mode), uns);
  sign = uns ? UNSIGNED : SIGNED;
  icode = optab_handler (uns ? umulv4_optab : mulv4_optab, mode);
+  if (uns
+      && (integer_pow2p (arg0) || integer_pow2p (arg1))
+      && (optimize_insn_for_speed_p () || icode == CODE_FOR_nothing))
+    {
+      /* Optimize unsigned multiplication by power of 2 constant
+	 using 2 shifts, one for result, one to extract the shifted
+	 out bits to see if they are all zero.
+	 Don't do this if optimizing for size and we have umulv4_optab,
+	 in that case assume multiplication will be shorter.
+	 This is heuristics based on the single target that provides
+	 umulv4 right now (i?86/x86_64), if further targets add it, this
+	 might need to be revisited.
+	 Cases where both operands are constant should be folded already
+	 during GIMPLE, and cases where one operand is constant but not
+	 power of 2 are questionable, either the WIDEN_MULT_EXPR case
+	 below can be done without multiplication, just by shifts and adds,
+	 or we'd need to divide the result (and hope it actually doesn't
+	 really divide nor multiply) and compare the result of the division
+	 with the original operand.  */
+      rtx opn0 = op0;
+      rtx opn1 = op1;
+      tree argn0 = arg0;
+      tree argn1 = arg1;
+      if (integer_pow2p (arg0))
+	{
+	  std::swap (opn0, opn1);
+	  std::swap (argn0, argn1);
+	}
+      int cnt = tree_log2 (argn1);
+      if (cnt >= 0 && cnt < GET_MODE_PRECISION (mode))
+	{
+	  rtx upper = const0_rtx;
+	  res = expand_shift (LSHIFT_EXPR, mode, opn0, cnt, NULL_RTX, uns);
+	  if (cnt != 0)
+	    upper = expand_shift (RSHIFT_EXPR, mode, opn0,
+				  GET_MODE_PRECISION (mode) - cnt,
+				  NULL_RTX, uns);
+	  do_compare_rtx_and_jump (upper, const0_rtx, EQ, true, mode,
+				   NULL_RTX, NULL, done_label,
+				   profile_probability::very_likely ());
+	  goto do_error_label;
+	}
+    }
  if (icode != CODE_FOR_nothing)
    {
      struct expand_operand ops[4];

--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
+2017-11-30  Jakub Jelinek  <jakub@redhat.com>
+	PR target/83210
+	* gcc.target/i386/pr83210.c: New test.
 2017-11-30  Jan Hubicka  <hubicka@ucw.cz>
 	PR target/81616

--- a/gcc/testsuite/gcc.target/i386/pr83210.c
+++ b/gcc/testsuite/gcc.target/i386/pr83210.c
+/* PR target/83210 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-not {\mmul[lq]\M} } } */
+void bar (void);
+unsigned
+f1 (unsigned int x)
+{
+  unsigned res;
+  if (__builtin_mul_overflow (x, 2, &res))
+    bar ();
+  return res;
+}
+unsigned long
+f2 (unsigned long x)
+{
+  unsigned long res;
+  if (__builtin_mul_overflow (16, x, &res))
+    bar ();
+  return res;
+}
+unsigned long long
+f3 (unsigned long long x)
+{
+  unsigned long long res;
+  if (__builtin_mul_overflow (x, (1ULL << (__SIZEOF_LONG_LONG__ * __CHAR_BIT__ - 1)), &res))
+    bar ();
+  return res;
+}
+#ifdef __SIZEOF_INT128__
+unsigned __int128
+f4 (unsigned __int128 x)
+{
+  unsigned __int128 res;
+  if (__builtin_mul_overflow (x, (((unsigned __int128) 1) << (__SIZEOF_INT128__ * __CHAR_BIT__ / 2)), &res))
+    bar ();
+  return res;
+}
+unsigned __int128
+f5 (unsigned __int128 x)
+{
+  unsigned __int128 res;
+  if (__builtin_mul_overflow (x, (((unsigned __int128) 1) << (__SIZEOF_INT128__ * __CHAR_BIT__ / 2 + 3)), &res))
+    bar ();
+  return res;
+}
+#endif