Commit ebff937c by Stuart Hastings Committed by Stuart Hastings

i386.h (TARGET_KEEPS_VECTOR_ALIGNED_STACK): New.

	* gcc/config/i386/i386.h (TARGET_KEEPS_VECTOR_ALIGNED_STACK): New.
	* gcc/config/i386/darwin.h: (TARGET_KEEPS_VECTOR_ALIGNED_STACK): New.
	* gcc/config/i386/i386.md (fixuns_trunc<mode>si2, fixuns_truncsfhi2,
	fixuns_truncdfhi2): New.
	(fix_truncsfdi_sse): Call ix86_expand_convert_sign_didf_sse.
	(floatunsdidf2): Call ix86_expand_convert_uns_didf_sse.
	(floatunssisf2): Add call to ix86_expand_convert_uns_sisf_sse.
	(floatunssidf2): Allow nonimmediate source.
	* gcc/config/i386/sse.md (movdi_to_sse): New.  (vec_concatv2di): Drop '*'.
	* gcc/config/i386/i386-protos.h (ix86_expand_convert_uns_si_sse,
	ix86_expand_convert_uns_didf_sse, ix86_expand_convert_uns_sidf_sse,
	ix86_expand_convert_uns_sisf_sse, ix86_expand_convert_sign_didf_sse): New.
	* gcc/config/i386/i386.c (ix86_expand_convert_uns_si_sse,
	ix86_expand_convert_uns_didf_sse, ix86_expand_convert_uns_sidf_sse,
	ix86_expand_convert_uns_sisf_sse, ix86_expand_convert_sign_didf_sse,
	ix86_build_const_vector, ix86_expand_vector_init_one_nonzero): New.
	(ix86_build_signbit_mask): Fix decl of v, refactor to call ix86_build_const_vector.
	(x86_emit_floatuns): Rewrite.

Co-Authored-By: Richard Henderson <rth@redhat.com>

From-SVN: r121790
parent 692b647c
2007-02-09 Stuart Hastings <stuart@apple.com>
Richard Henderson <rth@redhat.com>
* gcc/config/i386/i386.h (TARGET_KEEPS_VECTOR_ALIGNED_STACK): New.
* gcc/config/i386/darwin.h: (TARGET_KEEPS_VECTOR_ALIGNED_STACK): New.
* gcc/config/i386/i386.md (fixuns_trunc<mode>si2, fixuns_truncsfhi2,
fixuns_truncdfhi2): New.
(fix_truncsfdi_sse): Call ix86_expand_convert_sign_didf_sse.
(floatunsdidf2): Call ix86_expand_convert_uns_didf_sse.
(floatunssisf2): Add call to ix86_expand_convert_uns_sisf_sse.
(floatunssidf2): Allow nonimmediate source.
* gcc/config/i386/sse.md (movdi_to_sse): New. (vec_concatv2di): Drop '*'.
* gcc/config/i386/i386-protos.h (ix86_expand_convert_uns_si_sse,
ix86_expand_convert_uns_didf_sse, ix86_expand_convert_uns_sidf_sse,
ix86_expand_convert_uns_sisf_sse, ix86_expand_convert_sign_didf_sse): New.
* gcc/config/i386/i386.c (ix86_expand_convert_uns_si_sse,
ix86_expand_convert_uns_didf_sse, ix86_expand_convert_uns_sidf_sse,
ix86_expand_convert_uns_sisf_sse, ix86_expand_convert_sign_didf_sse,
ix86_build_const_vector, ix86_expand_vector_init_one_nonzero): New.
(ix86_build_signbit_mask): Fix decl of v, refactor to call ix86_build_const_vector.
(x86_emit_floatuns): Rewrite.
2007-02-10 Manuel Lopez-Ibanez <manu@gcc.gnu.org>
* genautomata.c (longest_path_length): Delete unused function.
......
......@@ -66,6 +66,9 @@ Boston, MA 02110-1301, USA. */
#undef FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
#define FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN (0)
#undef TARGET_KEEPS_VECTOR_ALIGNED_STACK
#define TARGET_KEEPS_VECTOR_ALIGNED_STACK 1
/* We want -fPIC by default, unless we're using -static to compile for
the kernel or some such. */
......
......@@ -89,6 +89,11 @@ extern void ix86_expand_binary_operator (enum rtx_code,
extern int ix86_binary_operator_ok (enum rtx_code, enum machine_mode, rtx[]);
extern void ix86_expand_unary_operator (enum rtx_code, enum machine_mode,
rtx[]);
extern void ix86_expand_convert_uns_si_sse (rtx, rtx);
extern void ix86_expand_convert_uns_didf_sse (rtx, rtx);
extern void ix86_expand_convert_uns_sidf_sse (rtx, rtx);
extern void ix86_expand_convert_uns_sisf_sse (rtx, rtx);
extern void ix86_expand_convert_sign_didf_sse (rtx, rtx);
extern rtx ix86_build_signbit_mask (enum machine_mode, bool, bool);
extern void ix86_expand_fp_absneg_operator (enum rtx_code, enum machine_mode,
rtx[]);
......
......@@ -658,6 +658,11 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
#define FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN \
(ix86_preferred_stack_boundary > STACK_BOUNDARY && !TARGET_64BIT)
/* Target OS keeps a vector-aligned (128-bit, 16-byte) stack. This is
mandatory for the 64-bit ABI, and may or may not be true for other
operating systems. */
#define TARGET_KEEPS_VECTOR_ALIGNED_STACK TARGET_64BIT
/* Minimum allocation boundary for the code of a function. */
#define FUNCTION_BOUNDARY 8
......
......@@ -4331,6 +4331,38 @@
}
})
;; Unsigned conversion to SImode.
(define_expand "fixuns_trunc<mode>si2"
[(set (match_operand:SI 0 "nonimmediate_operand" "")
(fix:SI (match_operand:SSEMODEF 1 "register_operand" "")))]
"!TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH
&& TARGET_KEEPS_VECTOR_ALIGNED_STACK && !optimize_size"
{
ix86_expand_convert_uns_si_sse (operands[0], operands[1]);
DONE;
})
;; Unsigned conversion to HImode.
;; Without these patterns, we'll try the unsigned SI conversion which
;; is complex for SSE, rather than the signed SI conversion, which isn't.
(define_expand "fixuns_truncsfhi2"
[(set (match_dup 2)
(fix:SI (match_operand:SF 1 "nonimmediate_operand" "")))
(set (match_operand:HI 0 "nonimmediate_operand" "")
(subreg:HI (match_dup 2) 0))]
"TARGET_SSE_MATH"
"operands[2] = gen_reg_rtx (SImode);")
(define_expand "fixuns_truncdfhi2"
[(set (match_dup 2)
(fix:SI (match_operand:DF 1 "nonimmediate_operand" "")))
(set (match_operand:HI 0 "nonimmediate_operand" "")
(subreg:HI (match_dup 2) 0))]
"TARGET_SSE_MATH"
"operands[2] = gen_reg_rtx (SImode);")
;; When SSE is available, it is always faster to use it!
(define_insn "fix_truncsfdi_sse"
[(set (match_operand:DI 0 "register_operand" "=r,r")
......@@ -4848,8 +4880,14 @@
(define_expand "floatdidf2"
[(set (match_operand:DF 0 "register_operand" "")
(float:DF (match_operand:DI 1 "nonimmediate_operand" "")))]
"TARGET_80387 || (TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH)"
"")
"TARGET_80387 || (TARGET_SSE2 && TARGET_SSE_MATH)"
{
if (!TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH)
{
ix86_expand_convert_sign_didf_sse (operands[0], operands[1]);
DONE;
}
})
(define_insn "*floatdidf2_mixed"
[(set (match_operand:DF 0 "register_operand" "=f,?f,x,x")
......@@ -4944,21 +4982,40 @@
(define_expand "floatunssisf2"
[(use (match_operand:SF 0 "register_operand" ""))
(use (match_operand:SI 1 "register_operand" ""))]
"!TARGET_64BIT && TARGET_SSE_MATH"
"x86_emit_floatuns (operands); DONE;")
(use (match_operand:SI 1 "nonimmediate_operand" ""))]
"!TARGET_64BIT"
{
if (TARGET_SSE_MATH && TARGET_SSE2)
ix86_expand_convert_uns_sisf_sse (operands[0], operands[1]);
else
x86_emit_floatuns (operands);
DONE;
})
(define_expand "floatunssidf2"
[(use (match_operand:DF 0 "register_operand" ""))
(use (match_operand:SI 1 "nonimmediate_operand" ""))]
"!TARGET_64BIT && TARGET_SSE_MATH && TARGET_SSE2"
"ix86_expand_convert_uns_sidf_sse (operands[0], operands[1]); DONE;")
(define_expand "floatunsdisf2"
[(use (match_operand:SF 0 "register_operand" ""))
(use (match_operand:DI 1 "register_operand" ""))]
(use (match_operand:DI 1 "nonimmediate_operand" ""))]
"TARGET_64BIT && TARGET_SSE_MATH"
"x86_emit_floatuns (operands); DONE;")
(define_expand "floatunsdidf2"
[(use (match_operand:DF 0 "register_operand" ""))
(use (match_operand:DI 1 "register_operand" ""))]
"TARGET_64BIT && TARGET_SSE2 && TARGET_SSE_MATH"
"x86_emit_floatuns (operands); DONE;")
(use (match_operand:DI 1 "nonimmediate_operand" ""))]
"TARGET_SSE_MATH && TARGET_SSE2
&& (TARGET_64BIT || TARGET_KEEPS_VECTOR_ALIGNED_STACK)"
{
if (TARGET_64BIT)
x86_emit_floatuns (operands);
else
ix86_expand_convert_uns_didf_sse (operands[0], operands[1]);
DONE;
})
;; SSE extract/set expanders
......
......@@ -87,6 +87,47 @@
(const_string "V4SF")
(const_string "TI")))])
;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
;; from memory, we'd prefer to load the memory directly into the %xmm
;; register. To facilitate this happy circumstance, this pattern won't
;; split until after register allocation. If the 64-bit value didn't
;; come from memory, this is the best we can do. This is much better
;; than storing %edx:%eax into a stack temporary and loading an %xmm
;; from there.
(define_insn_and_split "movdi_to_sse"
[(parallel
[(set (match_operand:V4SI 0 "register_operand" "=?x,x")
(subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
(clobber (match_scratch:V4SI 2 "=&x,X"))])]
"!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES"
"#"
"&& reload_completed"
[(const_int 0)]
{
switch (which_alternative)
{
case 0:
/* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
Assemble the 64-bit DImode value in an xmm register. */
emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
gen_rtx_SUBREG (SImode, operands[1], 0)));
emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
gen_rtx_SUBREG (SImode, operands[1], 4)));
emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2]));
break;
case 1:
emit_insn (gen_vec_concatv2di (operands[0], operands[1], const0_rtx));
break;
default:
gcc_unreachable ();
}
DONE;
})
(define_expand "movv4sf"
[(set (match_operand:V4SF 0 "nonimmediate_operand" "")
(match_operand:V4SF 1 "nonimmediate_operand" ""))]
......@@ -4118,7 +4159,7 @@
[(set_attr "type" "sselog,ssemov,ssemov")
(set_attr "mode" "TI,V4SF,V2SF")])
(define_insn "*vec_concatv2di"
(define_insn "vec_concatv2di"
[(set (match_operand:V2DI 0 "register_operand" "=Y2,?Y2,Y2,x,x,x")
(vec_concat:V2DI
(match_operand:DI 1 "nonimmediate_operand" " m,*y ,0 ,0,0,m")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment