Add rtx costs for sse integer ops

From-SVN: r188785

Add rtx costs for sse integer ops
From-SVN: r188785
6bc0ff89 · Richard Henderson · Richard Henderson · b1f75d76 · 6bc0ff89 · 6bc0ff89
Commit 6bc0ff89 authored Jun 19, 2012 by Richard Henderson Committed by Richard Henderson Jun 19, 2012
Hide whitespace changes
Inline Side-by-side

Showing with 45 additions and 10 deletions

gcc/ChangeLog
+5 -0

gcc/config/i386/i386.c
+40 -10

No files found.
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
+2012-06-19  Richard Henderson  <rth@redhat.com>
+	* config/i386/i386.c (ix86_rtx_costs): Handle CONST_VECTOR, and
+	integral vector modes.
 2012-06-19  Uros Bizjak  <ubizjak@gmail.com>
 	* config/i386/i386.md (FRNDINT_ROUNDING): New int iterator.

--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -31990,13 +31990,16 @@ ix86_rtx_costs (rtx x, int code, int outer_code_i, int opno, int *total,
 	    break;
 	  case 0:
 	  case -1:
-	    /* Start with (MEM (SYMBOL_REF)), since that's where
-	       it'll probably end up.  Add a penalty for size.  */
-	    *total = (COSTS_N_INSNS (1)
-		      + (flag_pic != 0 && !TARGET_64BIT)
-		      + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
 	    break;
 	  }
+      /* FALLTHRU */
+    case CONST_VECTOR:
+      /* Start with (MEM (SYMBOL_REF)), since that's where
+	 it'll probably end up.  Add a penalty for size.  */
+      *total = (COSTS_N_INSNS (1)
+		+ (flag_pic != 0 && !TARGET_64BIT)
+		+ (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
      return true;
    case ZERO_EXTEND:
@@ -32016,8 +32019,9 @@ ix86_rtx_costs (rtx x, int code, int outer_code_i, int opno, int *total,
      return false;
    case ASHIFT:
-      if (CONST_INT_P (XEXP (x, 1))
+      if (SCALAR_INT_MODE_P (mode)
-	  && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
+	  && GET_MODE_SIZE (mode) < UNITS_PER_WORD
+	  && CONST_INT_P (XEXP (x, 1)))
 	{
 	  HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
 	  if (value == 1)
@@ -32038,7 +32042,15 @@ ix86_rtx_costs (rtx x, int code, int outer_code_i, int opno, int *total,
    case ASHIFTRT:
    case LSHIFTRT:
    case ROTATERT:
-      if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
+      if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+	{
+	  /* ??? Should be SSE vector operation cost.  */
+	  /* At least for published AMD latencies, this really is the same
+	     as the latency for a simple fpu operation like fabs.  */
+	  *total = cost->fabs;
+	  return false;
+	}
+      if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
 	{
 	  if (CONST_INT_P (XEXP (x, 1)))
 	    {
@@ -32107,6 +32119,16 @@ ix86_rtx_costs (rtx x, int code, int outer_code_i, int opno, int *total,
 	  *total = cost->fmul;
 	  return false;
 	}
+      else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+	{
+	  /* Without sse4.1, we don't have PMULLD; it's emulated with 7
+	     insns, including two PMULUDQ.  */
+	  if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
+	    *total = cost->fmul * 2 + cost->fabs * 5;
+	  else
+	    *total = cost->fmul;
+	  return false;
+	}
      else
 	{
 	  rtx op0 = XEXP (x, 0);
@@ -32171,7 +32193,7 @@ ix86_rtx_costs (rtx x, int code, int outer_code_i, int opno, int *total,
    case PLUS:
      if (GET_MODE_CLASS (mode) == MODE_INT
-	       && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
+	  && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
 	{
 	  if (GET_CODE (XEXP (x, 0)) == PLUS
 	      && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
@@ -32271,6 +32293,14 @@ ix86_rtx_costs (rtx x, int code, int outer_code_i, int opno, int *total,
      /* FALLTHRU */
    case NOT:
+      if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+	{
+	  /* ??? Should be SSE vector operation cost.  */
+	  /* At least for published AMD latencies, this really is the same
+	     as the latency for a simple fpu operation like fabs.  */
+	  *total = cost->fabs;
+	  return false;
+	}
      if (!TARGET_64BIT && mode == DImode)
 	*total = cost->add * 2;
      else
@@ -32331,7 +32361,7 @@ ix86_rtx_costs (rtx x, int code, int outer_code_i, int opno, int *total,
      /* ??? Assume all of these vector manipulation patterns are
 	 recognizable.  In which case they all pretty much have the
 	 same cost.  */
-     *total = COSTS_N_INSNS (1);
+     *total = cost->fabs;
     return true;
    default: