Move vector highpart emulation to the optabs layer

* expmed.c (expmed_mult_highpart): Rename from expand_mult_highpart. (expmed_mult_highpart_optab): Rename from expand_mult_highpart_optab. * optabs.c (can_mult_highpart_p): New. (expand_mult_highpart): New. * expr.c (expand_expr_real_2) [MULT_HIGHPART_EXPR): Use it. * tree-vect-generic.c (expand_vector_operations_1): Don't expand by pieces if can_mult_highpart_p. (expand_vector_divmod): Use can_mult_highpart_p and always generate MULT_HIGHPART_EXPR. * tree-vect-patterns.c (vect_recog_divmod_pattern): Likewise. * tree-vect-stmts.c (vectorizable_operation): Likewise. From-SVN: r189407

Move vector highpart emulation to the optabs layer
* expmed.c (expmed_mult_highpart): Rename from expand_mult_highpart. (expmed_mult_highpart_optab): Rename from expand_mult_highpart_optab. * optabs.c (can_mult_highpart_p): New. (expand_mult_highpart): New. * expr.c (expand_expr_real_2) [MULT_HIGHPART_EXPR): Use it. * tree-vect-generic.c (expand_vector_operations_1): Don't expand by pieces if can_mult_highpart_p. (expand_vector_divmod): Use can_mult_highpart_p and always generate MULT_HIGHPART_EXPR. * tree-vect-patterns.c (vect_recog_divmod_pattern): Likewise. * tree-vect-stmts.c (vectorizable_operation): Likewise. From-SVN: r189407
00f07b86 · Richard Henderson · Richard Henderson · 9283726f · 00f07b86 · 00f07b86
Commit 00f07b86 authored Jul 10, 2012 by Richard Henderson Committed by Richard Henderson Jul 10, 2012
Showing with 204 additions and 286 deletions

gcc/ChangeLog
+12 -0

gcc/expmed.c
+16 -16

gcc/expr.c
+6 -1

gcc/optabs.c
+126 -0

gcc/optabs.h
+6 -0

gcc/tree-vect-generic.c
+16 -97

gcc/tree-vect-patterns.c
+3 -20

gcc/tree-vect-stmts.c
+19 -152

No files found.
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
 2012-07-10  Richard Henderson  <rth@redhat.com>
+	* expmed.c (expmed_mult_highpart): Rename from expand_mult_highpart.
+	(expmed_mult_highpart_optab): Rename from expand_mult_highpart_optab.
+	* optabs.c (can_mult_highpart_p): New.
+	(expand_mult_highpart): New.
+	* expr.c (expand_expr_real_2) [MULT_HIGHPART_EXPR): Use it.
+	* tree-vect-generic.c (expand_vector_operations_1): Don't expand
+	by pieces if can_mult_highpart_p.
+	(expand_vector_divmod): Use can_mult_highpart_p and always
+	generate MULT_HIGHPART_EXPR.
+	* tree-vect-patterns.c (vect_recog_divmod_pattern): Likewise.
+	* tree-vect-stmts.c (vectorizable_operation): Likewise.
 	* config/spu/spu-builtins.md (spu_mpy): Move to spu.md.
 	(spu_mpyu, spu_mpyhhu, spu_mpyhh): Likewise.
 	* config/spu/spu.md (vec_widen_smult_odd_v8hi): Rename from spu_mpy.

--- a/gcc/expmed.c
+++ b/gcc/expmed.c
@@ -2381,8 +2381,8 @@ static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
 			      const struct algorithm *, enum mult_variant);
 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
 static rtx extract_high_half (enum machine_mode, rtx);
-static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
+static rtx expmed_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
-static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
+static rtx expmed_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
 				       int, int);
 /* Compute and return the best algorithm for multiplying by T.
   The algorithm must cost less than cost_limit
@@ -3477,7 +3477,7 @@ expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
  return target;
 }
-/* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
+/* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
 static rtx
 extract_high_half (enum machine_mode mode, rtx op)
@@ -3495,11 +3495,11 @@ extract_high_half (enum machine_mode mode, rtx op)
  return convert_modes (mode, wider_mode, op, 0);
 }
-/* Like expand_mult_highpart, but only consider using a multiplication
+/* Like expmed_mult_highpart, but only consider using a multiplication
   optab.  OP1 is an rtx for the constant operand.  */
 static rtx
-expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
+expmed_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
 			    rtx target, int unsignedp, int max_cost)
 {
  rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
@@ -3610,7 +3610,7 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
   MAX_COST is the total allowed cost for the expanded RTL.  */
 static rtx
-expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
+expmed_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
 		      rtx target, int unsignedp, int max_cost)
 {
  enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
@@ -3633,7 +3633,7 @@ expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
     mode == word_mode, however all the cost calculations in
     synth_mult etc. assume single-word operations.  */
  if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
-    return expand_mult_highpart_optab (mode, op0, op1, target,
+    return expmed_mult_highpart_optab (mode, op0, op1, target,
 				       unsignedp, max_cost);
  extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1];
@@ -3651,7 +3651,7 @@ expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
    {
      /* See whether the specialized multiplication optabs are
 	 cheaper than the shift/add version.  */
-      tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
+      tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
 					alg.cost.cost + extra_cost);
      if (tem)
 	return tem;
@@ -3666,7 +3666,7 @@ expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
      return tem;
    }
-  return expand_mult_highpart_optab (mode, op0, op1, target,
+  return expmed_mult_highpart_optab (mode, op0, op1, target,
 				     unsignedp, max_cost);
 }
@@ -3940,7 +3940,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
     In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
     half of the product.  Different strategies for generating the product are
-     implemented in expand_mult_highpart.
+     implemented in expmed_mult_highpart.
     If what we actually want is the remainder, we generate that by another
     by-constant multiplication and a subtraction.  */
@@ -3990,7 +3990,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
     mode for which we can do the operation with a library call.  */
  /* We might want to refine this now that we have division-by-constant
-     optimization.  Since expand_mult_highpart tries so many variants, it is
+     optimization.  Since expmed_mult_highpart tries so many variants, it is
     not straightforward to generalize this.  Maybe we should make an array
     of possible modes in init_expmed?  Save this for GCC 2.7.  */
@@ -4155,7 +4155,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
 			      = (shift_cost[speed][compute_mode][post_shift - 1]
 				 + shift_cost[speed][compute_mode][1]
 				 + 2 * add_cost[speed][compute_mode]);
-			    t1 = expand_mult_highpart (compute_mode, op0,
+			    t1 = expmed_mult_highpart (compute_mode, op0,
 						       GEN_INT (ml),
 						       NULL_RTX, 1,
 						       max_cost - extra_cost);
@@ -4187,7 +4187,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
 			    extra_cost
 			      = (shift_cost[speed][compute_mode][pre_shift]
 				 + shift_cost[speed][compute_mode][post_shift]);
-			    t2 = expand_mult_highpart (compute_mode, t1,
+			    t2 = expmed_mult_highpart (compute_mode, t1,
 						       GEN_INT (ml),
 						       NULL_RTX, 1,
 						       max_cost - extra_cost);
@@ -4313,7 +4313,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
 			extra_cost = (shift_cost[speed][compute_mode][post_shift]
 				      + shift_cost[speed][compute_mode][size - 1]
 				      + add_cost[speed][compute_mode]);
-			t1 = expand_mult_highpart (compute_mode, op0,
+			t1 = expmed_mult_highpart (compute_mode, op0,
 						   GEN_INT (ml), NULL_RTX, 0,
 						   max_cost - extra_cost);
 			if (t1 == 0)
@@ -4348,7 +4348,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
 			extra_cost = (shift_cost[speed][compute_mode][post_shift]
 				      + shift_cost[speed][compute_mode][size - 1]
 				      + 2 * add_cost[speed][compute_mode]);
-			t1 = expand_mult_highpart (compute_mode, op0, mlr,
+			t1 = expmed_mult_highpart (compute_mode, op0, mlr,
 						   NULL_RTX, 0,
 						   max_cost - extra_cost);
 			if (t1 == 0)
@@ -4436,7 +4436,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
 			extra_cost = (shift_cost[speed][compute_mode][post_shift]
 				      + shift_cost[speed][compute_mode][size - 1]
 				      + 2 * add_cost[speed][compute_mode]);
-			t3 = expand_mult_highpart (compute_mode, t2,
+			t3 = expmed_mult_highpart (compute_mode, t2,
 						   GEN_INT (ml), NULL_RTX, 1,
 						   max_cost - extra_cost);
 			if (t3 != 0)

--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -8554,9 +8554,14 @@ expand_expr_real_2 (sepops ops, rtx target, enum machine_mode tmode,
      return expand_divmod (0, code, mode, op0, op1, target, unsignedp);
    case RDIV_EXPR:
-    case MULT_HIGHPART_EXPR:
      goto binop;
+    case MULT_HIGHPART_EXPR:
+      expand_operands (treeop0, treeop1, subtarget, &op0, &op1, EXPAND_NORMAL);
+      temp = expand_mult_highpart (mode, op0, op1, target, unsignedp);
+      gcc_assert (temp);
+      return temp;
    case TRUNC_MOD_EXPR:
    case FLOOR_MOD_EXPR:
    case CEIL_MOD_EXPR:

--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -7162,6 +7162,132 @@ expand_vec_cond_expr (tree vec_cond_type, tree op0, tree op1, tree op2,
  return ops[0].value;
 }
+/* Return non-zero if a highpart multiply is supported of can be synthisized.
+   For the benefit of expand_mult_highpart, the return value is 1 for direct,
+   2 for even/odd widening, and 3 for hi/lo widening.  */
+int
+can_mult_highpart_p (enum machine_mode mode, bool uns_p)
+{
+  optab op;
+  unsigned char *sel;
+  unsigned i, nunits;
+  op = uns_p ? umul_highpart_optab : smul_highpart_optab;
+  if (optab_handler (op, mode) != CODE_FOR_nothing)
+    return 1;
+  /* If the mode is an integral vector, synth from widening operations.  */
+  if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT)
+    return 0;
+  nunits = GET_MODE_NUNITS (mode);
+  sel = XALLOCAVEC (unsigned char, nunits);
+  op = uns_p ? vec_widen_umult_even_optab : vec_widen_smult_even_optab;
+  if (optab_handler (op, mode) != CODE_FOR_nothing)
+    {
+      op = uns_p ? vec_widen_umult_odd_optab : vec_widen_smult_odd_optab;
+      if (optab_handler (op, mode) != CODE_FOR_nothing)
+	{
+	  for (i = 0; i < nunits; ++i)
+	    sel[i] = !BYTES_BIG_ENDIAN + (i & ~1) + ((i & 1) ? nunits : 0);
+	  if (can_vec_perm_p (mode, false, sel))
+	    return 2;
+	}
+    }
+  op = uns_p ? vec_widen_umult_hi_optab : vec_widen_smult_hi_optab;
+  if (optab_handler (op, mode) != CODE_FOR_nothing)
+    {
+      op = uns_p ? vec_widen_umult_lo_optab : vec_widen_smult_lo_optab;
+      if (optab_handler (op, mode) != CODE_FOR_nothing)
+	{
+	  for (i = 0; i < nunits; ++i)
+	    sel[i] = 2 * i + (BYTES_BIG_ENDIAN ? 0 : 1);
+	  if (can_vec_perm_p (mode, false, sel))
+	    return 3;
+	}
+    }
+  return 0;
+}
+/* Expand a highpart multiply.  */
+rtx
+expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
+		      rtx target, bool uns_p)
+{
+  struct expand_operand eops[3];
+  enum insn_code icode;
+  int method, i, nunits;
+  enum machine_mode wmode;
+  rtx m1, m2, perm;
+  optab tab1, tab2;
+  rtvec v;
+  method = can_mult_highpart_p (mode, uns_p);
+  switch (method)
+    {
+    case 0:
+      return NULL_RTX;
+    case 1:
+      tab1 = uns_p ? umul_highpart_optab : smul_highpart_optab;
+      return expand_binop (mode, tab1, op0, op1, target, uns_p,
+			   OPTAB_LIB_WIDEN);
+    case 2:
+      tab1 = uns_p ? vec_widen_umult_even_optab : vec_widen_smult_even_optab;
+      tab2 = uns_p ? vec_widen_umult_odd_optab : vec_widen_smult_odd_optab;
+      break;
+    case 3:
+      tab1 = uns_p ? vec_widen_umult_lo_optab : vec_widen_smult_lo_optab;
+      tab2 = uns_p ? vec_widen_umult_hi_optab : vec_widen_smult_hi_optab;
+      if (BYTES_BIG_ENDIAN)
+	{
+	  optab t = tab1;
+	  tab1 = tab2;
+	  tab2 = t;
+	}
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  icode = optab_handler (tab1, mode);
+  nunits = GET_MODE_NUNITS (mode);
+  wmode = insn_data[icode].operand[0].mode;
+  gcc_checking_assert (2 * GET_MODE_NUNITS (wmode) == nunits);
+  gcc_checking_assert (GET_MODE_SIZE (wmode) == GET_MODE_SIZE (mode));
+  create_output_operand (&eops[0], gen_reg_rtx (wmode), wmode);
+  create_input_operand (&eops[1], op0, mode);
+  create_input_operand (&eops[2], op1, mode);
+  expand_insn (icode, 3, eops);
+  m1 = gen_lowpart (mode, eops[0].value);
+  create_output_operand (&eops[0], gen_reg_rtx (wmode), wmode);
+  create_input_operand (&eops[1], op0, mode);
+  create_input_operand (&eops[2], op1, mode);
+  expand_insn (optab_handler (tab2, mode), 3, eops);
+  m2 = gen_lowpart (mode, eops[0].value);
+  v = rtvec_alloc (nunits);
+  if (method == 2)
+    {
+      for (i = 0; i < nunits; ++i)
+	RTVEC_ELT (v, i) = GEN_INT (!BYTES_BIG_ENDIAN + (i & ~1)
+				    + ((i & 1) ? nunits : 0));
+    }
+  else
+    {
+      for (i = 0; i < nunits; ++i)
+	RTVEC_ELT (v, i) = GEN_INT (2 * i + (BYTES_BIG_ENDIAN ? 0 : 1));
+    }
+  perm = gen_rtx_CONST_VECTOR (mode, v);
+  return expand_vec_perm (mode, m1, m2, perm, target);
+}
 /* Return true if there is a compare_and_swap pattern.  */

--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -1014,6 +1014,12 @@ extern bool can_vec_perm_p (enum machine_mode, bool, const unsigned char *);
 /* Generate code for VEC_PERM_EXPR.  */
 extern rtx expand_vec_perm (enum machine_mode, rtx, rtx, rtx, rtx);
+/* Return non-zero if target supports a given highpart multiplication.  */
+extern int can_mult_highpart_p (enum machine_mode, bool);
+/* Generate code for MULT_HIGHPART_EXPR.  */
+extern rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, bool);
 /* Return the insn used to implement mode MODE of OP, or CODE_FOR_nothing
   if the target does not have such an insn.  */

--- a/gcc/tree-vect-generic.c
+++ b/gcc/tree-vect-generic.c
@@ -454,10 +454,9 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
  int dummy_int;
  unsigned int i, unsignedp = TYPE_UNSIGNED (TREE_TYPE (type));
  unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type)));
-  optab op;
  tree *vec;
-  unsigned char *sel = NULL;
+  tree cur_op, mulcst, tem;
-  tree cur_op, m1, m2, mulcst, perm_mask, wider_type, tem, decl_e, decl_o;
+  optab op;
  if (prec > HOST_BITS_PER_WIDE_INT)
    return NULL_TREE;
@@ -745,54 +744,8 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
  if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
    return NULL_TREE;
-  op = optab_for_tree_code (MULT_HIGHPART_EXPR, type, optab_default);
+  if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type)))
-  if (op != NULL && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
+    return NULL_TREE;
-    wider_type = decl_e = decl_o = NULL_TREE;
-  else
-    {
-      wider_type = build_nonstandard_integer_type (prec * 2, unsignedp),
-      wider_type = build_vector_type (wider_type, nunits / 2);
-      if (GET_MODE_CLASS (TYPE_MODE (wider_type)) != MODE_VECTOR_INT
-	  || GET_MODE_BITSIZE (TYPE_MODE (wider_type))
-	     != GET_MODE_BITSIZE (TYPE_MODE (type)))
-	return NULL_TREE;
-      sel = XALLOCAVEC (unsigned char, nunits);
-      if (targetm.vectorize.builtin_mul_widen_even
-	  && targetm.vectorize.builtin_mul_widen_odd
-	  && (decl_e = targetm.vectorize.builtin_mul_widen_even (type))
-	  && (decl_o = targetm.vectorize.builtin_mul_widen_odd (type))
-	  && (TYPE_MODE (TREE_TYPE (TREE_TYPE (decl_e)))
-	      == TYPE_MODE (wider_type)))
-	{
-	  for (i = 0; i < nunits; i++)
-	    sel[i] = !BYTES_BIG_ENDIAN + (i & ~1) + ((i & 1) ? nunits : 0);
-	  if (!can_vec_perm_p (TYPE_MODE (type), false, sel))
-	    decl_e = decl_o = NULL_TREE;
-	}
-      else
-	decl_e = decl_o = NULL_TREE;
-      if (decl_e == NULL_TREE)
-	{
-	  op = optab_for_tree_code (VEC_WIDEN_MULT_LO_EXPR,
-				    type, optab_default);
-	  if (op == NULL
-	      || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
-	    return NULL_TREE;
-	  op = optab_for_tree_code (VEC_WIDEN_MULT_HI_EXPR,
-				    type, optab_default);
-	  if (op == NULL
-	      || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
-	    return NULL_TREE;
-	  for (i = 0; i < nunits; i++)
-	    sel[i] = 2 * i + (BYTES_BIG_ENDIAN ? 0 : 1);
-	  if (!can_vec_perm_p (TYPE_MODE (type), false, sel))
-	    return NULL_TREE;
-	}
-    }
  cur_op = op0;
@@ -830,46 +783,8 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
  for (i = 0; i < nunits; i++)
    vec[i] = build_int_cst (TREE_TYPE (type), mulc[i]);
  mulcst = build_vector (type, vec);
-  if (wider_type == NULL_TREE)
-    cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst);
-  else
-    {
-      for (i = 0; i < nunits; i++)
-	vec[i] = build_int_cst (TREE_TYPE (type), sel[i]);
-      perm_mask = build_vector (type, vec);
-      if (decl_e != NULL_TREE)
-	{
-	  gimple call;
-	  call = gimple_build_call (decl_e, 2, cur_op, mulcst);
-	  m1 = create_tmp_reg (wider_type, NULL);
-	  add_referenced_var (m1);
-	  m1 = make_ssa_name (m1, call);
-	  gimple_call_set_lhs (call, m1);
-	  gsi_insert_seq_before (gsi, call, GSI_SAME_STMT);
-	  call = gimple_build_call (decl_o, 2, cur_op, mulcst);
-	  m2 = create_tmp_reg (wider_type, NULL);
-	  add_referenced_var (m2);
-	  m2 = make_ssa_name (m2, call);
-	  gimple_call_set_lhs (call, m2);
-	  gsi_insert_seq_before (gsi, call, GSI_SAME_STMT);
-	}
-      else
-	{
-	  m1 = gimplify_build2 (gsi, BYTES_BIG_ENDIAN ? VEC_WIDEN_MULT_HI_EXPR
-						      : VEC_WIDEN_MULT_LO_EXPR,
-				wider_type, cur_op, mulcst);
-	  m2 = gimplify_build2 (gsi, BYTES_BIG_ENDIAN ? VEC_WIDEN_MULT_LO_EXPR
-						      : VEC_WIDEN_MULT_HI_EXPR,
-				wider_type, cur_op, mulcst);
-	}
-      m1 = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, m1);
+  cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst);
-      m2 = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, m2);
-      cur_op = gimplify_build3 (gsi, VEC_PERM_EXPR, type, m1, m2, perm_mask);
-    }
  switch (mode)
    {
@@ -1454,13 +1369,17 @@ expand_vector_operations_1 (gimple_stmt_iterator *gsi)
  if (compute_type == type)
    {
      compute_mode = TYPE_MODE (compute_type);
-      if (VECTOR_MODE_P (compute_mode)
+      if (VECTOR_MODE_P (compute_mode))
-          && op != NULL
+	{
-	  && optab_handler (op, compute_mode) != CODE_FOR_nothing)
+          if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing)
-	return;
+	    return;
-      else
+	  if (code == MULT_HIGHPART_EXPR
-	/* There is no operation in hardware, so fall back to scalars.  */
+	      && can_mult_highpart_p (compute_mode,
-	compute_type = TREE_TYPE (type);
+				      TYPE_UNSIGNED (compute_type)))
+	    return;
+	}
+      /* There is no operation in hardware, so fall back to scalars.  */
+      compute_type = TREE_TYPE (type);
    }
  gcc_assert (code != VEC_LSHIFT_EXPR && code != VEC_RSHIFT_EXPR);

--- a/gcc/tree-vect-patterns.c
+++ b/gcc/tree-vect-patterns.c
@@ -1642,10 +1642,8 @@ vect_recog_divmod_pattern (VEC (gimple, heap) **stmts,
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
  optab optab;
-  tree dummy, q;
+  tree q;
-  enum tree_code dummy_code;
  int dummy_int, prec;
-  VEC (tree, heap) *dummy_vec;
  stmt_vec_info def_stmt_vinfo;
  if (!is_gimple_assign (last_stmt))
@@ -1814,23 +1812,8 @@ vect_recog_divmod_pattern (VEC (gimple, heap) **stmts,
      || prec > HOST_BITS_PER_WIDE_INT)
    return NULL;
-  optab = optab_for_tree_code (MULT_HIGHPART_EXPR, vectype, optab_default);
+  if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
-  if (optab == NULL
+    return NULL;
-      || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
-    {
-      tree witype = build_nonstandard_integer_type (prec * 2,
-						    TYPE_UNSIGNED (itype));
-      tree vecwtype = get_vectype_for_scalar_type (witype);
-      if (vecwtype == NULL_TREE)
-	return NULL;
-      if (!supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
-					   vecwtype, vectype,
-					   &dummy, &dummy, &dummy_code,
-					   &dummy_code, &dummy_int,
-					   &dummy_vec))
-	return NULL;
-    }
  STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;

--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -3304,18 +3304,17 @@ static bool
 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
 			gimple *vec_stmt, slp_tree slp_node)
 {
-  tree vec_dest, vec_dest2 = NULL_TREE;
+  tree vec_dest;
-  tree vec_dest3 = NULL_TREE, vec_dest4 = NULL_TREE;
  tree scalar_dest;
  tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
-  tree vectype, wide_vectype = NULL_TREE;
+  tree vectype;
  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
  enum tree_code code;
  enum machine_mode vec_mode;
  tree new_temp;
  int op_type;
-  optab optab, optab2 = NULL;
+  optab optab;
  int icode;
  tree def;
  gimple def_stmt;
@@ -3332,8 +3331,6 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
  tree vop0, vop1, vop2;
  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
  int vf;
-  unsigned char *sel = NULL;
-  tree decl1 = NULL_TREE, decl2 = NULL_TREE, perm_mask = NULL_TREE;
  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
    return false;
@@ -3455,87 +3452,26 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
      || code == RROTATE_EXPR)
   return false;
-  optab = optab_for_tree_code (code, vectype, optab_default);
  /* Supportable by target?  */
-  if (!optab && code != MULT_HIGHPART_EXPR)
+  vec_mode = TYPE_MODE (vectype);
+  if (code == MULT_HIGHPART_EXPR)
    {
-      if (vect_print_dump_info (REPORT_DETAILS))
+      if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
-	fprintf (vect_dump, "no optab.");
+	icode = 0;
-      return false;
+      else
+	icode = CODE_FOR_nothing;
    }
-  vec_mode = TYPE_MODE (vectype);
+  else
-  icode = optab ? (int) optab_handler (optab, vec_mode) : CODE_FOR_nothing;
+    {
+      optab = optab_for_tree_code (code, vectype, optab_default);
-  if (icode == CODE_FOR_nothing
+      if (!optab)
-      && code == MULT_HIGHPART_EXPR
-      && VECTOR_MODE_P (vec_mode)
-      && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN)
-    {
-      /* If MULT_HIGHPART_EXPR isn't supported by the backend, see
-	 if we can emit VEC_WIDEN_MULT_{LO,HI}_EXPR followed by VEC_PERM_EXPR
-	 or builtin_mul_widen_{even,odd} followed by VEC_PERM_EXPR.  */
-      unsigned int prec = TYPE_PRECISION (TREE_TYPE (scalar_dest));
-      unsigned int unsignedp = TYPE_UNSIGNED (TREE_TYPE (scalar_dest));
-      tree wide_type
-	= build_nonstandard_integer_type (prec * 2, unsignedp);
-      wide_vectype
-        = get_same_sized_vectype (wide_type, vectype);
-      sel = XALLOCAVEC (unsigned char, nunits_in);
-      if (VECTOR_MODE_P (TYPE_MODE (wide_vectype))
-	  && GET_MODE_SIZE (TYPE_MODE (wide_vectype))
-	     == GET_MODE_SIZE (vec_mode))
-	{
-	  if (targetm.vectorize.builtin_mul_widen_even
-	      && (decl1 = targetm.vectorize.builtin_mul_widen_even (vectype))
-	      && targetm.vectorize.builtin_mul_widen_odd
-	      && (decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype))
-	      && TYPE_MODE (TREE_TYPE (TREE_TYPE (decl1)))
-		 == TYPE_MODE (wide_vectype))
-	    {
-	      for (i = 0; i < nunits_in; i++)
-		sel[i] = !BYTES_BIG_ENDIAN + (i & ~1)
-			 + ((i & 1) ? nunits_in : 0);
-	      if (can_vec_perm_p (vec_mode, false, sel))
-		icode = 0;
-	    }
-	  if (icode == CODE_FOR_nothing)
-	    {
-	      decl1 = NULL_TREE;
-	      decl2 = NULL_TREE;
-	      optab = optab_for_tree_code (VEC_WIDEN_MULT_LO_EXPR,
-					   vectype, optab_default);
-	      optab2 = optab_for_tree_code (VEC_WIDEN_MULT_HI_EXPR,
-					    vectype, optab_default);
-	      if (optab != NULL
-		  && optab2 != NULL
-		  && optab_handler (optab, vec_mode) != CODE_FOR_nothing
-		  && optab_handler (optab2, vec_mode) != CODE_FOR_nothing
-		  && insn_data[optab_handler (optab, vec_mode)].operand[0].mode
-		     == TYPE_MODE (wide_vectype)
-		  && insn_data[optab_handler (optab2,
-					      vec_mode)].operand[0].mode
-		     == TYPE_MODE (wide_vectype))
-		{
-		  for (i = 0; i < nunits_in; i++)
-		    sel[i] = !BYTES_BIG_ENDIAN + 2 * i;
-		  if (can_vec_perm_p (vec_mode, false, sel))
-		    icode = optab_handler (optab, vec_mode);
-		}
-	    }
-	}
-      if (icode == CODE_FOR_nothing)
 	{
-	  if (optab_for_tree_code (code, vectype, optab_default) == NULL)
+	  if (vect_print_dump_info (REPORT_DETAILS))
-	    {
+	    fprintf (vect_dump, "no optab.");
-	      if (vect_print_dump_info (REPORT_DETAILS))
+	  return false;
-		fprintf (vect_dump, "no optab.");
-	      return false;
-	    }
-	  wide_vectype = NULL_TREE;
-	  optab2 = NULL;
 	}
+      icode = (int) optab_handler (optab, vec_mode);
    }
  if (icode == CODE_FOR_nothing)
@@ -3575,16 +3511,7 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
    fprintf (vect_dump, "transform binary/unary operation.");
  /* Handle def.  */
-  if (wide_vectype)
+  vec_dest = vect_create_destination_var (scalar_dest, vectype);
-    {
-      vec_dest = vect_create_destination_var (scalar_dest, wide_vectype);
-      vec_dest2 = vect_create_destination_var (scalar_dest, wide_vectype);
-      vec_dest3 = vect_create_destination_var (scalar_dest, vectype);
-      vec_dest4 = vect_create_destination_var (scalar_dest, vectype);
-      perm_mask = vect_gen_perm_mask (vectype, sel);
-    }
-  else
-    vec_dest = vect_create_destination_var (scalar_dest, vectype);
  /* Allocate VECs for vector operands.  In case of SLP, vector operands are
     created in the previous stages of the recursion, so no allocation is
@@ -3693,66 +3620,6 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
 		  ? VEC_index (tree, vec_oprnds1, i) : NULL_TREE);
 	  vop2 = ((op_type == ternary_op)
 		  ? VEC_index (tree, vec_oprnds2, i) : NULL_TREE);
-	  if (wide_vectype)
-	    {
-	      tree new_temp2, vce;
-	      gcc_assert (code == MULT_HIGHPART_EXPR);
-	      if (decl1 != NULL_TREE)
-		{
-		  new_stmt = gimple_build_call (decl1, 2, vop0, vop1);
-		  new_temp = make_ssa_name (vec_dest, new_stmt);
-		  gimple_call_set_lhs (new_stmt, new_temp);
-		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
-		  new_stmt = gimple_build_call (decl2, 2, vop0, vop1);
-		  new_temp2 = make_ssa_name (vec_dest2, new_stmt);
-		  gimple_call_set_lhs (new_stmt, new_temp2);
-		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
-		}
-	      else
-		{
-		  new_temp = make_ssa_name (vec_dest, NULL);
-		  new_stmt
-		    = gimple_build_assign_with_ops (BYTES_BIG_ENDIAN
-						    ? VEC_WIDEN_MULT_HI_EXPR
-						    : VEC_WIDEN_MULT_LO_EXPR,
-						    new_temp, vop0, vop1);
-		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
-		  new_temp2 = make_ssa_name (vec_dest2, NULL);
-		  new_stmt
-		    = gimple_build_assign_with_ops (BYTES_BIG_ENDIAN
-						    ? VEC_WIDEN_MULT_LO_EXPR
-						    : VEC_WIDEN_MULT_HI_EXPR,
-						    new_temp2, vop0, vop1);
-		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
-		}
-	      vce = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
-	      new_stmt = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR,
-						       vec_dest3, vce,
-						       NULL_TREE);
-	      new_temp = make_ssa_name (vec_dest3, new_stmt);
-	      gimple_assign_set_lhs (new_stmt, new_temp);
-	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
-	      vce = build1 (VIEW_CONVERT_EXPR, vectype, new_temp2);
-	      new_stmt = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR,
-						       vec_dest4, vce,
-						       NULL_TREE);
-	      new_temp2 = make_ssa_name (vec_dest4, new_stmt);
-	      gimple_assign_set_lhs (new_stmt, new_temp2);
-	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
-	      new_temp = permute_vec_elements (new_temp, new_temp2,
-					       perm_mask, stmt, gsi);
-	      new_stmt = SSA_NAME_DEF_STMT (new_temp);
-	      if (slp_node)
-		VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
-				new_stmt);
-	      continue;
-	    }
 	  new_stmt = gimple_build_assign_with_ops3 (code, vec_dest,
 						    vop0, vop1, vop2);
 	  new_temp = make_ssa_name (vec_dest, new_stmt);