expmed.c (choose_mult_variant, [...]): New, split from...

* expmed.c (choose_mult_variant, expand_mult_const): New, split from... (expand_mult): ...here. (extract_high_half): New, split out from expand_mult_highpart. (expand_highpart_optab): Likewise. Don't clobber target prematurely. (expand_highpart): Evaluate the cost of a shift/add sequence, then see if any of the specialized optabs are cheaper. From-SVN: r79673

expmed.c (choose_mult_variant, [...]): New, split from...
* expmed.c (choose_mult_variant, expand_mult_const): New, split from... (expand_mult): ...here. (extract_high_half): New, split out from expand_mult_highpart. (expand_highpart_optab): Likewise. Don't clobber target prematurely. (expand_highpart): Evaluate the cost of a shift/add sequence, then see if any of the specialized optabs are cheaper. From-SVN: r79673
8efc8980 · Richard Sandiford · Richard Sandiford · d36d5600 · 8efc8980 · 8efc8980
Commit 8efc8980 authored Mar 19, 2004 by Richard Sandiford Committed by Richard Sandiford Mar 19, 2004
Show whitespace changes
Inline Side-by-side

Showing with 211 additions and 171 deletions

gcc/ChangeLog
+9 -0

gcc/expmed.c
+202 -171

No files found.
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
+2004-03-19  Richard Sandiford  <rsandifo@redhat.com>
+
+	* expmed.c (choose_mult_variant, expand_mult_const): New, split from...
+	(expand_mult): ...here.
+	(extract_high_half): New, split out from expand_mult_highpart.
+	(expand_highpart_optab): Likewise.  Don't clobber target prematurely.
+	(expand_highpart): Evaluate the cost of a shift/add sequence,
+	then see if any of the specialized optabs are cheaper.
+
 2004-03-18  Ian Lance Taylor  <ian@wasabisystems.com>

 	* mklibgcc.in: Remove obsolete MAYBE_USE_COLLECT2.

--- a/gcc/expmed.c
+++ b/gcc/expmed.c
@@ -2149,11 +2149,24 @@ struct algorithm
  char log[MAX_BITS_PER_WORD];
 };

+/* Indicates the type of fixup needed after a constant multiplication.
+   BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
+   the result should be negated, and ADD_VARIANT means that the
+   multiplicand should be added to the result.  */
+enum mult_variant {basic_variant, negate_variant, add_variant};
+
 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT, int);
+static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
+				 struct algorithm *, enum mult_variant *);
+static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
+			      const struct algorithm *, enum mult_variant);
 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
 						 int, unsigned HOST_WIDE_INT *,
 						 int *, int *);
 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
+static rtx extract_high_half (enum machine_mode, rtx);
+static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
+				       int, int);
 /* Compute and return the best algorithm for multiplying by T.
   The algorithm must cost less than cost_limit
   If retval.cost >= COST_LIMIT, no algorithm was found and all
@@ -2396,91 +2409,68 @@ synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
 	  alg_out->ops * sizeof *alg_out->log);
 }

-/* Perform a multiplication and return an rtx for the result.
-   MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
-   TARGET is a suggestion for where to store the result (an rtx).
+/* Find the cheapeast way of multiplying a value of mode MODE by VAL.
+   Try three variations:

-   We check specially for a constant integer as OP1.
-   If you want this check for OP0 as well, then before calling
-   you should swap the two operands if OP0 would be constant.  */
+       - a shift/add sequence based on VAL itself
+       - a shift/add sequence based on -VAL, followed by a negation
+       - a shift/add sequence based on VAL - 1, followed by an addition.

-rtx
-expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
-	     int unsignedp)
-{
-  rtx const_op1 = op1;
+   Return true if the cheapest of these is better than register
+   multiplication, describing the algorithm in *ALG and final
+   fixup in *VARIANT.  */

-  /* synth_mult does an `unsigned int' multiply.  As long as the mode is
-     less than or equal in size to `unsigned int' this doesn't matter.
-     If the mode is larger than `unsigned int', then synth_mult works only
-     if the constant value exactly fits in an `unsigned int' without any
-     truncation.  This means that multiplying by negative values does
-     not work; results are off by 2^32 on a 32 bit machine.  */
-
-  /* If we are multiplying in DImode, it may still be a win
-     to try to work with shifts and adds.  */
-  if (GET_CODE (op1) == CONST_DOUBLE
-      && GET_MODE_CLASS (GET_MODE (op1)) == MODE_INT
-      && HOST_BITS_PER_INT >= BITS_PER_WORD
-      && CONST_DOUBLE_HIGH (op1) == 0)
-    const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
-  else if (HOST_BITS_PER_INT < GET_MODE_BITSIZE (mode)
-	   && GET_CODE (op1) == CONST_INT
-	   && INTVAL (op1) < 0)
-    const_op1 = 0;
-
-  /* We used to test optimize here, on the grounds that it's better to
-     produce a smaller program when -O is not used.
-     But this causes such a terrible slowdown sometimes
-     that it seems better to use synth_mult always.  */
-
-  if (const_op1 && GET_CODE (const_op1) == CONST_INT
-      && (unsignedp || ! flag_trapv))
-    {
-      struct algorithm alg;
-      struct algorithm alg2;
-      HOST_WIDE_INT val = INTVAL (op1);
-      HOST_WIDE_INT val_so_far;
-      rtx insn;
+static bool
+choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
+		     struct algorithm *alg, enum mult_variant *variant)
+{
  int mult_cost;
-      enum {basic_variant, negate_variant, add_variant} variant = basic_variant;
-
-      /* op0 must be register to make mult_cost match the precomputed
-         shiftadd_cost array.  */
-      op0 = force_reg (mode, op0);
-
-      /* Try to do the computation three ways: multiply by the negative of OP1
-	 and then negate, do the multiplication directly, or do multiplication
-	 by OP1 - 1.  */
+  struct algorithm alg2;
+  rtx reg;

-      mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
+  reg = gen_rtx_REG (mode, FIRST_PSEUDO_REGISTER);
+  mult_cost = rtx_cost (gen_rtx_MULT (mode, reg, GEN_INT (val)), SET);
  mult_cost = MIN (12 * add_cost, mult_cost);

-      synth_mult (&alg, val, mult_cost);
+  *variant = basic_variant;
+  synth_mult (alg, val, mult_cost);

  /* This works only if the inverted value actually fits in an
     `unsigned int' */
  if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
    {
-	  synth_mult (&alg2, - val,
-		      (alg.cost < mult_cost ? alg.cost : mult_cost) - negate_cost);
-	  if (alg2.cost + negate_cost < alg.cost)
-	    alg = alg2, variant = negate_variant;
+      synth_mult (&alg2, -val, MIN (alg->cost, mult_cost) - negate_cost);
+      alg2.cost += negate_cost;
+      if (alg2.cost < alg->cost)
+	*alg = alg2, *variant = negate_variant;
    }

  /* This proves very useful for division-by-constant.  */
-      synth_mult (&alg2, val - 1,
-		  (alg.cost < mult_cost ? alg.cost : mult_cost) - add_cost);
-      if (alg2.cost + add_cost < alg.cost)
-	alg = alg2, variant = add_variant;
+  synth_mult (&alg2, val - 1, MIN (alg->cost, mult_cost) - add_cost);
+  alg2.cost += add_cost;
+  if (alg2.cost < alg->cost)
+    *alg = alg2, *variant = add_variant;

-      if (alg.cost < mult_cost)
-	{
-	  /* We found something cheaper than a multiply insn.  */
+  return alg->cost < mult_cost;
+}
+
+/* A subroutine of expand_mult, used for constant multiplications.
+   Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
+   convenient.  Use the shift/add sequence described by ALG and apply
+   the final fixup specified by VARIANT.  */
+
+static rtx
+expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
+		   rtx target, const struct algorithm *alg,
+		   enum mult_variant variant)
+{
+  HOST_WIDE_INT val_so_far;
+  rtx insn, accum, tem;
  int opno;
-	  rtx accum, tem;
  enum machine_mode nmode;

+  /* op0 must be register to make mult_cost match the precomputed
+     shiftadd_cost array.  */
  op0 = protect_from_queue (op0, 0);

  /* Avoid referencing memory over and over.
@@ -2491,12 +2481,12 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
  /* ACCUM starts out either as OP0 or as a zero, depending on
     the first operation.  */

-	  if (alg.op[0] == alg_zero)
+  if (alg->op[0] == alg_zero)
    {
      accum = copy_to_mode_reg (mode, const0_rtx);
      val_so_far = 0;
    }
-	  else if (alg.op[0] == alg_m)
+  else if (alg->op[0] == alg_m)
    {
      accum = copy_to_mode_reg (mode, op0);
      val_so_far = 1;
@@ -2504,18 +2494,18 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
  else
    abort ();

-	  for (opno = 1; opno < alg.ops; opno++)
+  for (opno = 1; opno < alg->ops; opno++)
    {
-	      int log = alg.log[opno];
+      int log = alg->log[opno];
      int preserve = preserve_subexpressions_p ();
      rtx shift_subtarget = preserve ? 0 : accum;
      rtx add_target
-		= (opno == alg.ops - 1 && target != 0 && variant != add_variant
+	= (opno == alg->ops - 1 && target != 0 && variant != add_variant
 	   && ! preserve)
 	  ? target : 0;
      rtx accum_target = preserve ? 0 : accum;

-	      switch (alg.op[opno])
+      switch (alg->op[opno])
 	{
 	case alg_shift:
 	  accum = expand_shift (LSHIFT_EXPR, mode, accum,
@@ -2527,8 +2517,7 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
 	  tem = expand_shift (LSHIFT_EXPR, mode, op0,
 			      build_int_2 (log, 0), NULL_RTX, 0);
 	  accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
-					 add_target
-					 ? add_target : accum_target);
+				 add_target ? add_target : accum_target);
 	  val_so_far += (HOST_WIDE_INT) 1 << log;
 	  break;

@@ -2536,8 +2525,7 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
 	  tem = expand_shift (LSHIFT_EXPR, mode, op0,
 			      build_int_2 (log, 0), NULL_RTX, 0);
 	  accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
-					 add_target
-					 ? add_target : accum_target);
+				 add_target ? add_target : accum_target);
 	  val_so_far -= (HOST_WIDE_INT) 1 << log;
 	  break;

@@ -2546,18 +2534,15 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
 				build_int_2 (log, 0), shift_subtarget,
 				0);
 	  accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
-					 add_target
-					 ? add_target : accum_target);
+				 add_target ? add_target : accum_target);
 	  val_so_far = (val_so_far << log) + 1;
 	  break;

 	case alg_sub_t2_m:
 	  accum = expand_shift (LSHIFT_EXPR, mode, accum,
-					build_int_2 (log, 0), shift_subtarget,
-					0);
+				build_int_2 (log, 0), shift_subtarget, 0);
 	  accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
-					 add_target
-					 ? add_target : accum_target);
+				 add_target ? add_target : accum_target);
 	  val_so_far = (val_so_far << log) - 1;
 	  break;

@@ -2565,8 +2550,7 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
 	  tem = expand_shift (LSHIFT_EXPR, mode, accum,
 			      build_int_2 (log, 0), NULL_RTX, 0);
 	  accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
-					 add_target
-					 ? add_target : accum_target);
+				 add_target ? add_target : accum_target);
 	  val_so_far += val_so_far << log;
 	  break;

@@ -2596,15 +2580,13 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
 	}

      insn = get_last_insn ();
-	      set_unique_reg_note (insn,
-				   REG_EQUAL,
-				   gen_rtx_MULT (nmode, tem,
-					         GEN_INT (val_so_far)));
+      set_unique_reg_note (insn, REG_EQUAL,
+			   gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)));
    }

  if (variant == negate_variant)
    {
-	      val_so_far = - val_so_far;
+      val_so_far = -val_so_far;
      accum = expand_unop (mode, neg_optab, accum, target, 0);
    }
  else if (variant == add_variant)
@@ -2617,8 +2599,53 @@ expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
    abort ();

  return accum;
-	}
-    }
+}
+
+/* Perform a multiplication and return an rtx for the result.
+   MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
+   TARGET is a suggestion for where to store the result (an rtx).
+
+   We check specially for a constant integer as OP1.
+   If you want this check for OP0 as well, then before calling
+   you should swap the two operands if OP0 would be constant.  */
+
+rtx
+expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
+	     int unsignedp)
+{
+  rtx const_op1 = op1;
+  enum mult_variant variant;
+  struct algorithm algorithm;
+
+  /* synth_mult does an `unsigned int' multiply.  As long as the mode is
+     less than or equal in size to `unsigned int' this doesn't matter.
+     If the mode is larger than `unsigned int', then synth_mult works only
+     if the constant value exactly fits in an `unsigned int' without any
+     truncation.  This means that multiplying by negative values does
+     not work; results are off by 2^32 on a 32 bit machine.  */
+
+  /* If we are multiplying in DImode, it may still be a win
+     to try to work with shifts and adds.  */
+  if (GET_CODE (op1) == CONST_DOUBLE
+      && GET_MODE_CLASS (GET_MODE (op1)) == MODE_INT
+      && HOST_BITS_PER_INT >= BITS_PER_WORD
+      && CONST_DOUBLE_HIGH (op1) == 0)
+    const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
+  else if (HOST_BITS_PER_INT < GET_MODE_BITSIZE (mode)
+	   && GET_CODE (op1) == CONST_INT
+	   && INTVAL (op1) < 0)
+    const_op1 = 0;
+
+  /* We used to test optimize here, on the grounds that it's better to
+     produce a smaller program when -O is not used.
+     But this causes such a terrible slowdown sometimes
+     that it seems better to use synth_mult always.  */
+
+  if (const_op1 && GET_CODE (const_op1) == CONST_INT
+      && (unsignedp || !flag_trapv)
+      && choose_mult_variant (mode, INTVAL (const_op1), &algorithm, &variant))
+    return expand_mult_const (mode, op0, INTVAL (const_op1), target,
+			      &algorithm, variant);

  if (GET_CODE (op0) == CONST_DOUBLE)
    {
@@ -2832,70 +2859,46 @@ expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
  return target;
 }

-/* Emit code to multiply OP0 and CNST1, putting the high half of the result
-   in TARGET if that is convenient, and return where the result is.  If the
-   operation can not be performed, 0 is returned.
+/* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */

-   MODE is the mode of operation and result.
+static rtx
+extract_high_half (enum machine_mode mode, rtx op)
+{
+  enum machine_mode wider_mode;

-   UNSIGNEDP nonzero means unsigned multiply.
+  if (mode == word_mode)
+    return gen_highpart (mode, op);

-   MAX_COST is the total allowed cost for the expanded RTL.  */
+  wider_mode = GET_MODE_WIDER_MODE (mode);
+  op = expand_shift (RSHIFT_EXPR, wider_mode, op,
+		     build_int_2 (GET_MODE_BITSIZE (mode), 0), 0, 1);
+  return convert_modes (mode, wider_mode, op, 0);
+}

-rtx
-expand_mult_highpart (enum machine_mode mode, rtx op0,
-		      unsigned HOST_WIDE_INT cnst1, rtx target,
-		      int unsignedp, int max_cost)
+/* Like expand_mult_highpart, but only consider using a multiplication
+   optab.  OP1 is an rtx for the constant operand.  */
+
+static rtx
+expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
+			    rtx target, int unsignedp, int max_cost)
 {
-  enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
-  optab mul_highpart_optab;
+  enum machine_mode wider_mode;
  optab moptab;
  rtx tem;
-  int size = GET_MODE_BITSIZE (mode);
-  rtx op1, wide_op1;
-
-  /* We can't support modes wider than HOST_BITS_PER_INT.  */
-  if (size > HOST_BITS_PER_WIDE_INT)
-    abort ();
-
-  op1 = gen_int_mode (cnst1, mode);
-
-  wide_op1
-    = immed_double_const (cnst1,
-			  (unsignedp
-			   ? (HOST_WIDE_INT) 0
-			   : -(cnst1 >> (HOST_BITS_PER_WIDE_INT - 1))),
-			  wider_mode);
-
-  /* expand_mult handles constant multiplication of word_mode
-     or narrower.  It does a poor job for large modes.  */
-  if (size < BITS_PER_WORD
-      && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
-    {
-      /* We have to do this, since expand_binop doesn't do conversion for
-	 multiply.  Maybe change expand_binop to handle widening multiply?  */
-      op0 = convert_to_mode (wider_mode, op0, unsignedp);
-
-      /* We know that this can't have signed overflow, so pretend this is
-         an unsigned multiply.  */
-      tem = expand_mult (wider_mode, op0, wide_op1, NULL_RTX, 0);
-      tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
-			  build_int_2 (size, 0), NULL_RTX, 1);
-      return convert_modes (mode, wider_mode, tem, unsignedp);
-    }
+  int size;

-  if (target == 0)
-    target = gen_reg_rtx (mode);
+  wider_mode = GET_MODE_WIDER_MODE (mode);
+  size = GET_MODE_BITSIZE (mode);

  /* Firstly, try using a multiplication insn that only generates the needed
     high part of the product, and in the sign flavor of unsignedp.  */
  if (mul_highpart_cost[(int) mode] < max_cost)
    {
-      mul_highpart_optab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
-      target = expand_binop (mode, mul_highpart_optab,
-			     op0, op1, target, unsignedp, OPTAB_DIRECT);
-      if (target)
-	return target;
+      moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
+      tem = expand_binop (mode, moptab, op0, op1, target,
+			  unsignedp, OPTAB_DIRECT);
+      if (tem)
+	return tem;
    }

  /* Secondly, same as above, but use sign flavor opposite of unsignedp.
@@ -2904,13 +2907,13 @@ expand_mult_highpart (enum machine_mode mode, rtx op0,
      && (mul_highpart_cost[(int) mode] + 2 * shift_cost[size-1] + 4 * add_cost
 	  < max_cost))
    {
-      mul_highpart_optab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
-      target = expand_binop (mode, mul_highpart_optab,
-			     op0, op1, target, unsignedp, OPTAB_DIRECT);
-      if (target)
+      moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
+      tem = expand_binop (mode, moptab, op0, op1, target,
+			  unsignedp, OPTAB_DIRECT);
+      if (tem)
 	/* We used the wrong signedness.  Adjust the result.  */
-	return expand_mult_highpart_adjust (mode, target, op0,
-					    op1, target, unsignedp);
+	return expand_mult_highpart_adjust (mode, tem, op0, op1,
+					    tem, unsignedp);
    }

  /* Try widening multiplication.  */
@@ -2918,8 +2921,10 @@ expand_mult_highpart (enum machine_mode mode, rtx op0,
  if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
      && mul_widen_cost[(int) wider_mode] < max_cost)
    {
-      op1 = force_reg (mode, op1);
-      goto try;
+      tem = expand_binop (wider_mode, moptab, op0, op1, 0,
+			  unsignedp, OPTAB_WIDEN);
+      if (tem)
+	return extract_high_half (mode, tem);
    }

  /* Try widening the mode and perform a non-widening multiplication.  */
@@ -2928,8 +2933,10 @@ expand_mult_highpart (enum machine_mode mode, rtx op0,
      && size - 1 < BITS_PER_WORD
      && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
    {
-      op1 = wide_op1;
-      goto try;
+      tem = expand_binop (wider_mode, moptab, op0, op1, 0,
+			  unsignedp, OPTAB_WIDEN);
+      if (tem)
+	return extract_high_half (mode, tem);
    }

  /* Try widening multiplication of opposite signedness, and adjust.  */
@@ -2944,10 +2951,7 @@ expand_mult_highpart (enum machine_mode mode, rtx op0,
 			  NULL_RTX, ! unsignedp, OPTAB_WIDEN);
      if (tem != 0)
 	{
-	  /* Extract the high half of the just generated product.  */
-	  tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
-			      build_int_2 (size, 0), NULL_RTX, 1);
-	  tem = convert_modes (mode, wider_mode, tem, unsignedp);
+	  tem = extract_high_half (mode, tem);
 	  /* We used the wrong signedness.  Adjust the result.  */
 	  return expand_mult_highpart_adjust (mode, tem, op0, op1,
 					      target, unsignedp);
@@ -2955,25 +2959,52 @@ expand_mult_highpart (enum machine_mode mode, rtx op0,
    }

  return 0;
+}

- try:
-  /* Pass NULL_RTX as target since TARGET has wrong mode.  */
-  tem = expand_binop (wider_mode, moptab, op0, op1,
-		      NULL_RTX, unsignedp, OPTAB_WIDEN);
-  if (tem == 0)
-    return 0;
+/* Emit code to multiply OP0 and CNST1, putting the high half of the result
+   in TARGET if that is convenient, and return where the result is.  If the
+   operation can not be performed, 0 is returned.

-  /* Extract the high half of the just generated product.  */
-  if (mode == word_mode)
-    {
-      return gen_highpart (mode, tem);
-    }
-  else
+   MODE is the mode of operation and result.
+
+   UNSIGNEDP nonzero means unsigned multiply.
+
+   MAX_COST is the total allowed cost for the expanded RTL.  */
+
+rtx
+expand_mult_highpart (enum machine_mode mode, rtx op0,
+		      unsigned HOST_WIDE_INT cnst1, rtx target,
+		      int unsignedp, int max_cost)
+{
+  enum machine_mode wider_mode;
+  enum mult_variant variant;
+  struct algorithm alg;
+  rtx op1, tem;
+
+  /* We can't support modes wider than HOST_BITS_PER_INT.  */
+  if (GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
+    abort ();
+
+  op1 = gen_int_mode (cnst1, mode);
+
+  /* See whether shift/add multiplication is cheap enough.  */
+  if (choose_mult_variant (mode, cnst1, &alg, &variant)
+      && (alg.cost += shift_cost[GET_MODE_BITSIZE (mode) - 1]) < max_cost)
    {
-      tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
-			  build_int_2 (size, 0), NULL_RTX, 1);
-      return convert_modes (mode, wider_mode, tem, unsignedp);
+      /* See whether the specialized multiplication optabs are
+	 cheaper than the shift/add version.  */
+      tem = expand_mult_highpart_optab (mode, op0, op1, target,
+					unsignedp, alg.cost);
+      if (tem)
+	return tem;
+
+      wider_mode = GET_MODE_WIDER_MODE (mode);
+      op0 = convert_to_mode (wider_mode, op0, unsignedp);
+      tem = expand_mult_const (wider_mode, op0, cnst1, 0, &alg, variant);
+      return extract_high_half (mode, tem);
    }
+  return expand_mult_highpart_optab (mode, op0, op1, target,
+				     unsignedp, max_cost);
 }

 /* Emit the code to divide OP0 by OP1, putting the result in TARGET