i386.h (TARGET_KEEPS_VECTOR_ALIGNED_STACK): New.

* gcc/config/i386/i386.h (TARGET_KEEPS_VECTOR_ALIGNED_STACK): New. * gcc/config/i386/darwin.h: (TARGET_KEEPS_VECTOR_ALIGNED_STACK): New. * gcc/config/i386/i386.md (fixuns_trunc<mode>si2, fixuns_truncsfhi2, fixuns_truncdfhi2): New. (fix_truncsfdi_sse): Call ix86_expand_convert_sign_didf_sse. (floatunsdidf2): Call ix86_expand_convert_uns_didf_sse. (floatunssisf2): Add call to ix86_expand_convert_uns_sisf_sse. (floatunssidf2): Allow nonimmediate source. * gcc/config/i386/sse.md (movdi_to_sse): New. (vec_concatv2di): Drop '*'. * gcc/config/i386/i386-protos.h (ix86_expand_convert_uns_si_sse, ix86_expand_convert_uns_didf_sse, ix86_expand_convert_uns_sidf_sse, ix86_expand_convert_uns_sisf_sse, ix86_expand_convert_sign_didf_sse): New. * gcc/config/i386/i386.c (ix86_expand_convert_uns_si_sse, ix86_expand_convert_uns_didf_sse, ix86_expand_convert_uns_sidf_sse, ix86_expand_convert_uns_sisf_sse, ix86_expand_convert_sign_didf_sse, ix86_build_const_vector, ix86_expand_vector_init_one_nonzero): New. (ix86_build_signbit_mask): Fix decl of v, refactor to call ix86_build_const_vector. (x86_emit_floatuns): Rewrite. Co-Authored-By: Richard Henderson <rth@redhat.com> From-SVN: r121790

i386.h (TARGET_KEEPS_VECTOR_ALIGNED_STACK): New.
* gcc/config/i386/i386.h (TARGET_KEEPS_VECTOR_ALIGNED_STACK): New. * gcc/config/i386/darwin.h: (TARGET_KEEPS_VECTOR_ALIGNED_STACK): New. * gcc/config/i386/i386.md (fixuns_trunc<mode>si2, fixuns_truncsfhi2, fixuns_truncdfhi2): New. (fix_truncsfdi_sse): Call ix86_expand_convert_sign_didf_sse. (floatunsdidf2): Call ix86_expand_convert_uns_didf_sse. (floatunssisf2): Add call to ix86_expand_convert_uns_sisf_sse. (floatunssidf2): Allow nonimmediate source. * gcc/config/i386/sse.md (movdi_to_sse): New. (vec_concatv2di): Drop '*'. * gcc/config/i386/i386-protos.h (ix86_expand_convert_uns_si_sse, ix86_expand_convert_uns_didf_sse, ix86_expand_convert_uns_sidf_sse, ix86_expand_convert_uns_sisf_sse, ix86_expand_convert_sign_didf_sse): New. * gcc/config/i386/i386.c (ix86_expand_convert_uns_si_sse, ix86_expand_convert_uns_didf_sse, ix86_expand_convert_uns_sidf_sse, ix86_expand_convert_uns_sisf_sse, ix86_expand_convert_sign_didf_sse, ix86_build_const_vector, ix86_expand_vector_init_one_nonzero): New. (ix86_build_signbit_mask): Fix decl of v, refactor to call ix86_build_const_vector. (x86_emit_floatuns): Rewrite. Co-Authored-By: Richard Henderson <rth@redhat.com> From-SVN: r121790
ebff937c · Stuart Hastings · Stuart Hastings · 692b647c · ebff937c · ebff937c
Commit ebff937c authored Feb 10, 2007 by Stuart Hastings Committed by Stuart Hastings Feb 10, 2007
Showing with 387 additions and 36 deletions

gcc/ChangeLog
+22 -0

gcc/config/i386/darwin.h
+3 -0

gcc/config/i386/i386-protos.h
+5 -0

gcc/config/i386/i386.c
+244 -26

gcc/config/i386/i386.h
+5 -0

gcc/config/i386/i386.md
+66 -9

gcc/config/i386/sse.md
+42 -1

No files found.
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
+2007-02-09  Stuart Hastings  <stuart@apple.com>
+	Richard Henderson  <rth@redhat.com>
+
+	* gcc/config/i386/i386.h (TARGET_KEEPS_VECTOR_ALIGNED_STACK): New.
+	* gcc/config/i386/darwin.h: (TARGET_KEEPS_VECTOR_ALIGNED_STACK): New.
+	* gcc/config/i386/i386.md (fixuns_trunc<mode>si2, fixuns_truncsfhi2,
+	fixuns_truncdfhi2): New.
+	(fix_truncsfdi_sse): Call ix86_expand_convert_sign_didf_sse.
+	(floatunsdidf2): Call ix86_expand_convert_uns_didf_sse.
+	(floatunssisf2): Add call to ix86_expand_convert_uns_sisf_sse.
+	(floatunssidf2): Allow nonimmediate source.
+	* gcc/config/i386/sse.md (movdi_to_sse): New.  (vec_concatv2di): Drop '*'.
+	* gcc/config/i386/i386-protos.h (ix86_expand_convert_uns_si_sse,
+	ix86_expand_convert_uns_didf_sse, ix86_expand_convert_uns_sidf_sse,
+	ix86_expand_convert_uns_sisf_sse, ix86_expand_convert_sign_didf_sse): New.
+	* gcc/config/i386/i386.c (ix86_expand_convert_uns_si_sse,
+	ix86_expand_convert_uns_didf_sse, ix86_expand_convert_uns_sidf_sse,
+	ix86_expand_convert_uns_sisf_sse, ix86_expand_convert_sign_didf_sse,
+	ix86_build_const_vector, ix86_expand_vector_init_one_nonzero): New.
+	(ix86_build_signbit_mask): Fix decl of v, refactor to call ix86_build_const_vector.
+	(x86_emit_floatuns): Rewrite.
+
 2007-02-10  Manuel Lopez-Ibanez  <manu@gcc.gnu.org>

 	* genautomata.c (longest_path_length): Delete unused function.

--- a/gcc/config/i386/darwin.h
+++ b/gcc/config/i386/darwin.h
@@ -66,6 +66,9 @@ Boston, MA 02110-1301, USA.  */
 #undef FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
 #define FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN (0)

+#undef TARGET_KEEPS_VECTOR_ALIGNED_STACK
+#define TARGET_KEEPS_VECTOR_ALIGNED_STACK 1
+
 /* We want -fPIC by default, unless we're using -static to compile for
   the kernel or some such.  */


--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -89,6 +89,11 @@ extern void ix86_expand_binary_operator (enum rtx_code,
 extern int ix86_binary_operator_ok (enum rtx_code, enum machine_mode, rtx[]);
 extern void ix86_expand_unary_operator (enum rtx_code, enum machine_mode,
 					rtx[]);
+extern void ix86_expand_convert_uns_si_sse (rtx, rtx);
+extern void ix86_expand_convert_uns_didf_sse (rtx, rtx);
+extern void ix86_expand_convert_uns_sidf_sse (rtx, rtx);
+extern void ix86_expand_convert_uns_sisf_sse (rtx, rtx);
+extern void ix86_expand_convert_sign_didf_sse (rtx, rtx);
 extern rtx ix86_build_signbit_mask (enum machine_mode, bool, bool);
 extern void ix86_expand_fp_absneg_operator (enum rtx_code, enum machine_mode,
 					    rtx[]);

--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -658,6 +658,11 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
 #define FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN \
  (ix86_preferred_stack_boundary > STACK_BOUNDARY && !TARGET_64BIT)

+/* Target OS keeps a vector-aligned (128-bit, 16-byte) stack.  This is
+   mandatory for the 64-bit ABI, and may or may not be true for other
+   operating systems.  */
+#define TARGET_KEEPS_VECTOR_ALIGNED_STACK TARGET_64BIT
+
 /* Minimum allocation boundary for the code of a function.  */
 #define FUNCTION_BOUNDARY 8


--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -4331,6 +4331,38 @@
   }
 })

+;; Unsigned conversion to SImode.
+
+(define_expand "fixuns_trunc<mode>si2"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "")
+	           (fix:SI (match_operand:SSEMODEF 1 "register_operand" "")))]
+  "!TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH
+   && TARGET_KEEPS_VECTOR_ALIGNED_STACK && !optimize_size"
+{
+  ix86_expand_convert_uns_si_sse (operands[0], operands[1]);
+  DONE;
+})
+
+;; Unsigned conversion to HImode.
+;; Without these patterns, we'll try the unsigned SI conversion which
+;; is complex for SSE, rather than the signed SI conversion, which isn't.
+
+(define_expand "fixuns_truncsfhi2"
+  [(set (match_dup 2)
+	(fix:SI (match_operand:SF 1 "nonimmediate_operand" "")))
+   (set (match_operand:HI 0 "nonimmediate_operand" "")
+	(subreg:HI (match_dup 2) 0))]
+  "TARGET_SSE_MATH"
+  "operands[2] = gen_reg_rtx (SImode);")
+
+(define_expand "fixuns_truncdfhi2"
+  [(set (match_dup 2)
+	(fix:SI (match_operand:DF 1 "nonimmediate_operand" "")))
+   (set (match_operand:HI 0 "nonimmediate_operand" "")
+	(subreg:HI (match_dup 2) 0))]
+  "TARGET_SSE_MATH"
+  "operands[2] = gen_reg_rtx (SImode);")
+
 ;; When SSE is available, it is always faster to use it!
 (define_insn "fix_truncsfdi_sse"
  [(set (match_operand:DI 0 "register_operand" "=r,r")
@@ -4848,8 +4880,14 @@
 (define_expand "floatdidf2"
  [(set (match_operand:DF 0 "register_operand" "")
 	(float:DF (match_operand:DI 1 "nonimmediate_operand" "")))]
-  "TARGET_80387 || (TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH)"
-  "")
+  "TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
+{
+  if (!TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH)
+    {
+      ix86_expand_convert_sign_didf_sse (operands[0], operands[1]);
+      DONE;
+    }
+})

 (define_insn "*floatdidf2_mixed"
  [(set (match_operand:DF 0 "register_operand" "=f,?f,x,x")
@@ -4944,21 +4982,40 @@

 (define_expand "floatunssisf2"
  [(use (match_operand:SF 0 "register_operand" ""))
-   (use (match_operand:SI 1 "register_operand" ""))]
-  "!TARGET_64BIT && TARGET_SSE_MATH"
-  "x86_emit_floatuns (operands); DONE;")
+   (use (match_operand:SI 1 "nonimmediate_operand" ""))]
+  "!TARGET_64BIT"
+{
+  if (TARGET_SSE_MATH && TARGET_SSE2)
+    ix86_expand_convert_uns_sisf_sse (operands[0], operands[1]);
+  else
+    x86_emit_floatuns (operands);
+  DONE;
+})
+
+(define_expand "floatunssidf2"
+  [(use (match_operand:DF 0 "register_operand" ""))
+   (use (match_operand:SI 1 "nonimmediate_operand" ""))]
+  "!TARGET_64BIT && TARGET_SSE_MATH && TARGET_SSE2"
+  "ix86_expand_convert_uns_sidf_sse (operands[0], operands[1]); DONE;")

 (define_expand "floatunsdisf2"
  [(use (match_operand:SF 0 "register_operand" ""))
-   (use (match_operand:DI 1 "register_operand" ""))]
+   (use (match_operand:DI 1 "nonimmediate_operand" ""))]
  "TARGET_64BIT && TARGET_SSE_MATH"
  "x86_emit_floatuns (operands); DONE;")

 (define_expand "floatunsdidf2"
  [(use (match_operand:DF 0 "register_operand" ""))
-   (use (match_operand:DI 1 "register_operand" ""))]
-  "TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH"
-  "x86_emit_floatuns (operands); DONE;")
+   (use (match_operand:DI 1 "nonimmediate_operand" ""))]
+  "TARGET_SSE_MATH && TARGET_SSE2
+   && (TARGET_64BIT || TARGET_KEEPS_VECTOR_ALIGNED_STACK)"
+{
+  if (TARGET_64BIT)
+    x86_emit_floatuns (operands);
+  else
+    ix86_expand_convert_uns_didf_sse (operands[0], operands[1]);
+  DONE;
+})

 ;; SSE extract/set expanders


--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -87,6 +87,47 @@
 	  (const_string "V4SF")
 	  (const_string "TI")))])

+;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
+;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
+;; from memory, we'd prefer to load the memory directly into the %xmm
+;; register.  To facilitate this happy circumstance, this pattern won't
+;; split until after register allocation.  If the 64-bit value didn't
+;; come from memory, this is the best we can do.  This is much better
+;; than storing %edx:%eax into a stack temporary and loading an %xmm
+;; from there.
+
+(define_insn_and_split "movdi_to_sse"
+  [(parallel
+    [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
+	  (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
+     (clobber (match_scratch:V4SI 2 "=&x,X"))])]
+  "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  switch (which_alternative)
+    {
+    case 0:
+      /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
+	 Assemble the 64-bit DImode value in an xmm register.  */
+      emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
+      				  gen_rtx_SUBREG (SImode, operands[1], 0)));
+      emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
+				  gen_rtx_SUBREG (SImode, operands[1], 4)));
+      emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2]));
+      break;
+
+    case 1:
+      emit_insn (gen_vec_concatv2di (operands[0], operands[1], const0_rtx));
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+  DONE;
+})
+
 (define_expand "movv4sf"
  [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
 	(match_operand:V4SF 1 "nonimmediate_operand" ""))]
@@ -4118,7 +4159,7 @@
  [(set_attr "type" "sselog,ssemov,ssemov")
   (set_attr "mode" "TI,V4SF,V2SF")])

-(define_insn "*vec_concatv2di"
+(define_insn "vec_concatv2di"
  [(set (match_operand:V2DI 0 "register_operand"     "=Y2,?Y2,Y2,x,x,x")
 	(vec_concat:V2DI
 	  (match_operand:DI 1 "nonimmediate_operand" "  m,*y ,0 ,0,0,m")