Commit 20f9034b by Uros Bizjak

re PR target/44141 (Redundant loads and stores generated for AMD bdver1 target)

	PR target/44141
	* config/i386/i386.c (ix86_expand_vector_move_misalign): Do not handle
	128 bit vectors specially for TARGET_AVX.  Emit sse2_movupd and
	sse_movupd RTXes for TARGET_AVX, TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
	or when optimizing for size.
	* config/i386/sse.md (*mov<mode>_internal): Remove
	TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL handling from asm output code.
	Calculate "mode" attribute according to optimize_function_for_size_p
	and TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL flag.
	(*<sse>_movu<ssemodesuffix><avxsizesuffix>): Choose asm template
	depending on the mode of the instruction.  Calculate "mode" attribute
	according to optimize_function_for_size_p, TARGET_SSE_TYPELESS_STORES
	and TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL flags.
	(*<sse2>_movdqu<avxsizesuffix>): Ditto.

From-SVN: r187347
parent eac188c5
2012-05-09 Uros Bizjak <ubizjak@gmail.com>
PR target/44141
* config/i386/i386.c (ix86_expand_vector_move_misalign): Do not handle
128 bit vectors specially for TARGET_AVX. Emit sse2_movupd and
sse_movupd RTXes for TARGET_AVX, TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
or when optimizing for size.
* config/i386/sse.md (*mov<mode>_internal): Remove
TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL handling from asm output code.
Calculate "mode" attribute according to optimize_function_for_size_p
and TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL flag.
(*<sse>_movu<ssemodesuffix><avxsizesuffix>): Choose asm template
depending on the mode of the instruction. Calculate "mode" attribute
according to optimize_function_for_size_p, TARGET_SSE_TYPELESS_STORES
and TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL flags.
(*<sse2>_movdqu<avxsizesuffix>): Ditto.
2012-05-09 Georg-Johann Lay <avr@gjlay.de> 2012-05-09 Georg-Johann Lay <avr@gjlay.de>
PR target/53256 PR target/53256
...@@ -180,8 +197,7 @@ ...@@ -180,8 +197,7 @@
(clone_function_name): Likewise. (clone_function_name): Likewise.
(cgraph_create_virtual_clone): Likewise. (cgraph_create_virtual_clone): Likewise.
(cgraph_remove_node_and_inline_clones): Likewise. (cgraph_remove_node_and_inline_clones): Likewise.
(cgraph_redirect_edge_call_stmt_to_callee): Move here from (cgraph_redirect_edge_call_stmt_to_callee): Move here from cgraphunit.c
cgraphunit.c
* cgraph.h: Reorder declarations so they match file of origin. * cgraph.h: Reorder declarations so they match file of origin.
(cgraph_create_empty_node): Declare. (cgraph_create_empty_node): Declare.
* cgraphunit.c (update_call_expr): Move to cgraphclones.c * cgraphunit.c (update_call_expr): Move to cgraphclones.c
......
...@@ -15907,61 +15907,20 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) ...@@ -15907,61 +15907,20 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
op0 = operands[0]; op0 = operands[0];
op1 = operands[1]; op1 = operands[1];
if (TARGET_AVX) if (TARGET_AVX
&& GET_MODE_SIZE (mode) == 32)
{ {
switch (GET_MODE_CLASS (mode)) switch (GET_MODE_CLASS (mode))
{ {
case MODE_VECTOR_INT: case MODE_VECTOR_INT:
case MODE_INT: case MODE_INT:
switch (GET_MODE_SIZE (mode))
{
case 16:
if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
{
op0 = gen_lowpart (V4SFmode, op0);
op1 = gen_lowpart (V4SFmode, op1);
emit_insn (gen_sse_movups (op0, op1));
}
else
{
op0 = gen_lowpart (V16QImode, op0);
op1 = gen_lowpart (V16QImode, op1);
emit_insn (gen_sse2_movdqu (op0, op1));
}
break;
case 32:
op0 = gen_lowpart (V32QImode, op0); op0 = gen_lowpart (V32QImode, op0);
op1 = gen_lowpart (V32QImode, op1); op1 = gen_lowpart (V32QImode, op1);
ix86_avx256_split_vector_move_misalign (op0, op1); /* FALLTHRU */
break;
default:
gcc_unreachable ();
}
break;
case MODE_VECTOR_FLOAT: case MODE_VECTOR_FLOAT:
switch (mode)
{
case V4SFmode:
emit_insn (gen_sse_movups (op0, op1));
break;
case V2DFmode:
if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
{
op0 = gen_lowpart (V4SFmode, op0);
op1 = gen_lowpart (V4SFmode, op1);
emit_insn (gen_sse_movups (op0, op1));
}
else
emit_insn (gen_sse2_movupd (op0, op1));
break;
case V8SFmode:
case V4DFmode:
ix86_avx256_split_vector_move_misalign (op0, op1); ix86_avx256_split_vector_move_misalign (op0, op1);
break; break;
default:
gcc_unreachable ();
}
break;
default: default:
gcc_unreachable (); gcc_unreachable ();
...@@ -15972,16 +15931,6 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) ...@@ -15972,16 +15931,6 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
if (MEM_P (op1)) if (MEM_P (op1))
{ {
/* If we're optimizing for size, movups is the smallest. */
if (optimize_insn_for_size_p ()
|| TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
{
op0 = gen_lowpart (V4SFmode, op0);
op1 = gen_lowpart (V4SFmode, op1);
emit_insn (gen_sse_movups (op0, op1));
return;
}
/* ??? If we have typed data, then it would appear that using /* ??? If we have typed data, then it would appear that using
movdqu is the only way to get unaligned data loaded with movdqu is the only way to get unaligned data loaded with
integer type. */ integer type. */
...@@ -15989,16 +15938,19 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) ...@@ -15989,16 +15938,19 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
{ {
op0 = gen_lowpart (V16QImode, op0); op0 = gen_lowpart (V16QImode, op0);
op1 = gen_lowpart (V16QImode, op1); op1 = gen_lowpart (V16QImode, op1);
/* We will eventually emit movups based on insn attributes. */
emit_insn (gen_sse2_movdqu (op0, op1)); emit_insn (gen_sse2_movdqu (op0, op1));
return;
} }
else if (TARGET_SSE2 && mode == V2DFmode)
if (TARGET_SSE2 && mode == V2DFmode)
{ {
rtx zero; rtx zero;
if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL) if (TARGET_AVX
|| TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
|| TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
|| optimize_function_for_size_p (cfun))
{ {
/* We will eventually emit movups based on insn attributes. */
emit_insn (gen_sse2_movupd (op0, op1)); emit_insn (gen_sse2_movupd (op0, op1));
return; return;
} }
...@@ -16030,7 +15982,10 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) ...@@ -16030,7 +15982,10 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
} }
else else
{ {
if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL) if (TARGET_AVX
|| TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
|| TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
|| optimize_function_for_size_p (cfun))
{ {
op0 = gen_lowpart (V4SFmode, op0); op0 = gen_lowpart (V4SFmode, op0);
op1 = gen_lowpart (V4SFmode, op1); op1 = gen_lowpart (V4SFmode, op1);
...@@ -16045,6 +16000,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) ...@@ -16045,6 +16000,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
if (mode != V4SFmode) if (mode != V4SFmode)
op0 = gen_lowpart (V4SFmode, op0); op0 = gen_lowpart (V4SFmode, op0);
m = adjust_address (op1, V2SFmode, 0); m = adjust_address (op1, V2SFmode, 0);
emit_insn (gen_sse_loadlps (op0, op0, m)); emit_insn (gen_sse_loadlps (op0, op0, m));
m = adjust_address (op1, V2SFmode, 8); m = adjust_address (op1, V2SFmode, 8);
...@@ -16053,30 +16009,20 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) ...@@ -16053,30 +16009,20 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
} }
else if (MEM_P (op0)) else if (MEM_P (op0))
{ {
/* If we're optimizing for size, movups is the smallest. */ if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
if (optimize_insn_for_size_p ()
|| TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
{
op0 = gen_lowpart (V4SFmode, op0);
op1 = gen_lowpart (V4SFmode, op1);
emit_insn (gen_sse_movups (op0, op1));
return;
}
/* ??? Similar to above, only less clear
because of typeless stores. */
if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
&& GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
{ {
op0 = gen_lowpart (V16QImode, op0); op0 = gen_lowpart (V16QImode, op0);
op1 = gen_lowpart (V16QImode, op1); op1 = gen_lowpart (V16QImode, op1);
/* We will eventually emit movups based on insn attributes. */
emit_insn (gen_sse2_movdqu (op0, op1)); emit_insn (gen_sse2_movdqu (op0, op1));
return;
} }
else if (TARGET_SSE2 && mode == V2DFmode)
if (TARGET_SSE2 && mode == V2DFmode)
{ {
if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL) if (TARGET_AVX
|| TARGET_SSE_UNALIGNED_STORE_OPTIMAL
|| TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
|| optimize_function_for_size_p (cfun))
/* We will eventually emit movups based on insn attributes. */
emit_insn (gen_sse2_movupd (op0, op1)); emit_insn (gen_sse2_movupd (op0, op1));
else else
{ {
...@@ -16091,7 +16037,10 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) ...@@ -16091,7 +16037,10 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
if (mode != V4SFmode) if (mode != V4SFmode)
op1 = gen_lowpart (V4SFmode, op1); op1 = gen_lowpart (V4SFmode, op1);
if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL) if (TARGET_AVX
|| TARGET_SSE_UNALIGNED_STORE_OPTIMAL
|| TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
|| optimize_function_for_size_p (cfun))
{ {
op0 = gen_lowpart (V4SFmode, op0); op0 = gen_lowpart (V4SFmode, op0);
emit_insn (gen_sse_movups (op0, op1)); emit_insn (gen_sse_movups (op0, op1));
...@@ -449,8 +449,6 @@ ...@@ -449,8 +449,6 @@
&& (misaligned_operand (operands[0], <MODE>mode) && (misaligned_operand (operands[0], <MODE>mode)
|| misaligned_operand (operands[1], <MODE>mode))) || misaligned_operand (operands[1], <MODE>mode)))
return "vmovupd\t{%1, %0|%0, %1}"; return "vmovupd\t{%1, %0|%0, %1}";
else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
return "%vmovaps\t{%1, %0|%0, %1}";
else else
return "%vmovapd\t{%1, %0|%0, %1}"; return "%vmovapd\t{%1, %0|%0, %1}";
...@@ -460,8 +458,6 @@ ...@@ -460,8 +458,6 @@
&& (misaligned_operand (operands[0], <MODE>mode) && (misaligned_operand (operands[0], <MODE>mode)
|| misaligned_operand (operands[1], <MODE>mode))) || misaligned_operand (operands[1], <MODE>mode)))
return "vmovdqu\t{%1, %0|%0, %1}"; return "vmovdqu\t{%1, %0|%0, %1}";
else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
return "%vmovaps\t{%1, %0|%0, %1}";
else else
return "%vmovdqa\t{%1, %0|%0, %1}"; return "%vmovdqa\t{%1, %0|%0, %1}";
...@@ -475,19 +471,21 @@ ...@@ -475,19 +471,21 @@
[(set_attr "type" "sselog1,ssemov,ssemov") [(set_attr "type" "sselog1,ssemov,ssemov")
(set_attr "prefix" "maybe_vex") (set_attr "prefix" "maybe_vex")
(set (attr "mode") (set (attr "mode")
(cond [(match_test "TARGET_AVX") (cond [(and (eq_attr "alternative" "1,2")
(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
(if_then_else
(match_test "GET_MODE_SIZE (<MODE>mode) > 16")
(const_string "V8SF")
(const_string "V4SF"))
(match_test "TARGET_AVX")
(const_string "<sseinsnmode>") (const_string "<sseinsnmode>")
(ior (ior (match_test "optimize_function_for_size_p (cfun)") (ior (and (eq_attr "alternative" "1,2")
(not (match_test "TARGET_SSE2"))) (match_test "optimize_function_for_size_p (cfun)"))
(and (eq_attr "alternative" "2") (and (eq_attr "alternative" "2")
(match_test "TARGET_SSE_TYPELESS_STORES"))) (match_test "TARGET_SSE_TYPELESS_STORES")))
(const_string "V4SF") (const_string "V4SF")
(eq (const_string "<MODE>mode") (const_string "V4SFmode"))
(const_string "V4SF")
(eq (const_string "<MODE>mode") (const_string "V2DFmode"))
(const_string "V2DF")
] ]
(const_string "TI")))]) (const_string "<sseinsnmode>")))])
(define_insn "sse2_movq128" (define_insn "sse2_movq128"
[(set (match_operand:V2DI 0 "register_operand" "=x") [(set (match_operand:V2DI 0 "register_operand" "=x")
...@@ -597,11 +595,33 @@ ...@@ -597,11 +595,33 @@
[(match_operand:VF 1 "nonimmediate_operand" "xm,x")] [(match_operand:VF 1 "nonimmediate_operand" "xm,x")]
UNSPEC_MOVU))] UNSPEC_MOVU))]
"TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
"%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}" {
switch (get_attr_mode (insn))
{
case MODE_V8SF:
case MODE_V4SF:
return "%vmovups\t{%1, %0|%0, %1}";
default:
return "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}";
}
}
[(set_attr "type" "ssemov") [(set_attr "type" "ssemov")
(set_attr "movu" "1") (set_attr "movu" "1")
(set_attr "prefix" "maybe_vex") (set_attr "prefix" "maybe_vex")
(set_attr "mode" "<MODE>")]) (set (attr "mode")
(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
(if_then_else
(match_test "GET_MODE_SIZE (<MODE>mode) > 16")
(const_string "V8SF")
(const_string "V4SF"))
(match_test "TARGET_AVX")
(const_string "<MODE>")
(ior (match_test "optimize_function_for_size_p (cfun)")
(and (eq_attr "alternative" "1")
(match_test "TARGET_SSE_TYPELESS_STORES")))
(const_string "V4SF")
]
(const_string "<MODE>")))])
(define_expand "<sse2>_movdqu<avxsizesuffix>" (define_expand "<sse2>_movdqu<avxsizesuffix>"
[(set (match_operand:VI1 0 "nonimmediate_operand") [(set (match_operand:VI1 0 "nonimmediate_operand")
...@@ -618,7 +638,16 @@ ...@@ -618,7 +638,16 @@
(unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "xm,x")] (unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "xm,x")]
UNSPEC_MOVU))] UNSPEC_MOVU))]
"TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
"%vmovdqu\t{%1, %0|%0, %1}" {
switch (get_attr_mode (insn))
{
case MODE_V8SF:
case MODE_V4SF:
return "%vmovups\t{%1, %0|%0, %1}";
default:
return "%vmovdqu\t{%1, %0|%0, %1}";
}
}
[(set_attr "type" "ssemov") [(set_attr "type" "ssemov")
(set_attr "movu" "1") (set_attr "movu" "1")
(set (attr "prefix_data16") (set (attr "prefix_data16")
...@@ -627,7 +656,20 @@ ...@@ -627,7 +656,20 @@
(const_string "*") (const_string "*")
(const_string "1"))) (const_string "1")))
(set_attr "prefix" "maybe_vex") (set_attr "prefix" "maybe_vex")
(set_attr "mode" "<sseinsnmode>")]) (set (attr "mode")
(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
(if_then_else
(match_test "GET_MODE_SIZE (<MODE>mode) > 16")
(const_string "V8SF")
(const_string "V4SF"))
(match_test "TARGET_AVX")
(const_string "<sseinsnmode>")
(ior (match_test "optimize_function_for_size_p (cfun)")
(and (eq_attr "alternative" "1")
(match_test "TARGET_SSE_TYPELESS_STORES")))
(const_string "V4SF")
]
(const_string "<sseinsnmode>")))])
(define_insn "<sse3>_lddqu<avxsizesuffix>" (define_insn "<sse3>_lddqu<avxsizesuffix>"
[(set (match_operand:VI1 0 "register_operand" "=x") [(set (match_operand:VI1 0 "register_operand" "=x")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment