Commit 20f9034b by Uros Bizjak

re PR target/44141 (Redundant loads and stores generated for AMD bdver1 target)

	PR target/44141
	* config/i386/i386.c (ix86_expand_vector_move_misalign): Do not handle
	128 bit vectors specially for TARGET_AVX.  Emit sse2_movupd and
	sse_movupd RTXes for TARGET_AVX, TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
	or when optimizing for size.
	* config/i386/sse.md (*mov<mode>_internal): Remove
	TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL handling from asm output code.
	Calculate "mode" attribute according to optimize_function_for_size_p
	and TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL flag.
	(*<sse>_movu<ssemodesuffix><avxsizesuffix>): Choose asm template
	depending on the mode of the instruction.  Calculate "mode" attribute
	according to optimize_function_for_size_p, TARGET_SSE_TYPELESS_STORES
	and TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL flags.
	(*<sse2>_movdqu<avxsizesuffix>): Ditto.

From-SVN: r187347
parent eac188c5
2012-05-09 Uros Bizjak <ubizjak@gmail.com>
PR target/44141
* config/i386/i386.c (ix86_expand_vector_move_misalign): Do not handle
128 bit vectors specially for TARGET_AVX. Emit sse2_movupd and
sse_movupd RTXes for TARGET_AVX, TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
or when optimizing for size.
* config/i386/sse.md (*mov<mode>_internal): Remove
TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL handling from asm output code.
Calculate "mode" attribute according to optimize_function_for_size_p
and TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL flag.
(*<sse>_movu<ssemodesuffix><avxsizesuffix>): Choose asm template
depending on the mode of the instruction. Calculate "mode" attribute
according to optimize_function_for_size_p, TARGET_SSE_TYPELESS_STORES
and TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL flags.
(*<sse2>_movdqu<avxsizesuffix>): Ditto.
2012-05-09 Georg-Johann Lay <avr@gjlay.de>
PR target/53256
......@@ -161,7 +178,7 @@
PR target/51244
* config/sh/sh.md (*branch_true, *branch_false): New insns.
2012-05-08 Teresa Johnson <tejohnson@google.com>
2012-05-08 Teresa Johnson <tejohnson@google.com>
* gcov-io.h (__gcov_reset, __gcov_dump): Declare.
* doc/gcov.texi: Add note on using __gcov_reset and __gcov_dump.
......@@ -180,8 +197,7 @@
(clone_function_name): Likewise.
(cgraph_create_virtual_clone): Likewise.
(cgraph_remove_node_and_inline_clones): Likewise.
(cgraph_redirect_edge_call_stmt_to_callee): Move here from
cgraphunit.c
(cgraph_redirect_edge_call_stmt_to_callee): Move here from cgraphunit.c
* cgraph.h: Reorder declarations so they match file of origin.
(cgraph_create_empty_node): Declare.
* cgraphunit.c (update_call_expr): Move to cgraphclones.c
......@@ -702,7 +718,7 @@
Enable -Wunused-local-typedefs when -Wall or -Wunused is on
* opts.c (finish_options): Activate -Wunused-local-typedefs if
-Wunused is activated.
-Wunused is activated.
* doc/invoke.texi: Update blurb of -Wunused-local-typedefs.
2012-05-04 Andreas Krebbel <Andreas.Krebbel@de.ibm.com>
......@@ -1757,7 +1773,7 @@
* config/pa/pa.c (pa_legitimate_constant_p): Don't put function labels
in constant pool.
2012-04-27 Ollie Wild <aaw@google.com>
2012-04-27 Ollie Wild <aaw@google.com>
* doc/invoke.texi (Wliteral-suffix): Document new option.
......
......@@ -15907,60 +15907,19 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
op0 = operands[0];
op1 = operands[1];
if (TARGET_AVX)
if (TARGET_AVX
&& GET_MODE_SIZE (mode) == 32)
{
switch (GET_MODE_CLASS (mode))
{
case MODE_VECTOR_INT:
case MODE_INT:
switch (GET_MODE_SIZE (mode))
{
case 16:
if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
{
op0 = gen_lowpart (V4SFmode, op0);
op1 = gen_lowpart (V4SFmode, op1);
emit_insn (gen_sse_movups (op0, op1));
}
else
{
op0 = gen_lowpart (V16QImode, op0);
op1 = gen_lowpart (V16QImode, op1);
emit_insn (gen_sse2_movdqu (op0, op1));
}
break;
case 32:
op0 = gen_lowpart (V32QImode, op0);
op1 = gen_lowpart (V32QImode, op1);
ix86_avx256_split_vector_move_misalign (op0, op1);
break;
default:
gcc_unreachable ();
}
break;
op0 = gen_lowpart (V32QImode, op0);
op1 = gen_lowpart (V32QImode, op1);
/* FALLTHRU */
case MODE_VECTOR_FLOAT:
switch (mode)
{
case V4SFmode:
emit_insn (gen_sse_movups (op0, op1));
break;
case V2DFmode:
if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
{
op0 = gen_lowpart (V4SFmode, op0);
op1 = gen_lowpart (V4SFmode, op1);
emit_insn (gen_sse_movups (op0, op1));
}
else
emit_insn (gen_sse2_movupd (op0, op1));
break;
case V8SFmode:
case V4DFmode:
ix86_avx256_split_vector_move_misalign (op0, op1);
break;
default:
gcc_unreachable ();
}
ix86_avx256_split_vector_move_misalign (op0, op1);
break;
default:
......@@ -15972,16 +15931,6 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
if (MEM_P (op1))
{
/* If we're optimizing for size, movups is the smallest. */
if (optimize_insn_for_size_p ()
|| TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
{
op0 = gen_lowpart (V4SFmode, op0);
op1 = gen_lowpart (V4SFmode, op1);
emit_insn (gen_sse_movups (op0, op1));
return;
}
/* ??? If we have typed data, then it would appear that using
movdqu is the only way to get unaligned data loaded with
integer type. */
......@@ -15989,16 +15938,19 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
{
op0 = gen_lowpart (V16QImode, op0);
op1 = gen_lowpart (V16QImode, op1);
/* We will eventually emit movups based on insn attributes. */
emit_insn (gen_sse2_movdqu (op0, op1));
return;
}
if (TARGET_SSE2 && mode == V2DFmode)
else if (TARGET_SSE2 && mode == V2DFmode)
{
rtx zero;
if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
if (TARGET_AVX
|| TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
|| TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
|| optimize_function_for_size_p (cfun))
{
/* We will eventually emit movups based on insn attributes. */
emit_insn (gen_sse2_movupd (op0, op1));
return;
}
......@@ -16030,7 +15982,10 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
}
else
{
if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
if (TARGET_AVX
|| TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
|| TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
|| optimize_function_for_size_p (cfun))
{
op0 = gen_lowpart (V4SFmode, op0);
op1 = gen_lowpart (V4SFmode, op1);
......@@ -16045,6 +16000,7 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
if (mode != V4SFmode)
op0 = gen_lowpart (V4SFmode, op0);
m = adjust_address (op1, V2SFmode, 0);
emit_insn (gen_sse_loadlps (op0, op0, m));
m = adjust_address (op1, V2SFmode, 8);
......@@ -16053,30 +16009,20 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
}
else if (MEM_P (op0))
{
/* If we're optimizing for size, movups is the smallest. */
if (optimize_insn_for_size_p ()
|| TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
{
op0 = gen_lowpart (V4SFmode, op0);
op1 = gen_lowpart (V4SFmode, op1);
emit_insn (gen_sse_movups (op0, op1));
return;
}
/* ??? Similar to above, only less clear
because of typeless stores. */
if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
&& GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
{
op0 = gen_lowpart (V16QImode, op0);
op1 = gen_lowpart (V16QImode, op1);
/* We will eventually emit movups based on insn attributes. */
emit_insn (gen_sse2_movdqu (op0, op1));
return;
}
if (TARGET_SSE2 && mode == V2DFmode)
else if (TARGET_SSE2 && mode == V2DFmode)
{
if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
if (TARGET_AVX
|| TARGET_SSE_UNALIGNED_STORE_OPTIMAL
|| TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
|| optimize_function_for_size_p (cfun))
/* We will eventually emit movups based on insn attributes. */
emit_insn (gen_sse2_movupd (op0, op1));
else
{
......@@ -16091,7 +16037,10 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
if (mode != V4SFmode)
op1 = gen_lowpart (V4SFmode, op1);
if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
if (TARGET_AVX
|| TARGET_SSE_UNALIGNED_STORE_OPTIMAL
|| TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
|| optimize_function_for_size_p (cfun))
{
op0 = gen_lowpart (V4SFmode, op0);
emit_insn (gen_sse_movups (op0, op1));
......@@ -449,8 +449,6 @@
&& (misaligned_operand (operands[0], <MODE>mode)
|| misaligned_operand (operands[1], <MODE>mode)))
return "vmovupd\t{%1, %0|%0, %1}";
else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
return "%vmovaps\t{%1, %0|%0, %1}";
else
return "%vmovapd\t{%1, %0|%0, %1}";
......@@ -460,8 +458,6 @@
&& (misaligned_operand (operands[0], <MODE>mode)
|| misaligned_operand (operands[1], <MODE>mode)))
return "vmovdqu\t{%1, %0|%0, %1}";
else if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
return "%vmovaps\t{%1, %0|%0, %1}";
else
return "%vmovdqa\t{%1, %0|%0, %1}";
......@@ -475,19 +471,21 @@
[(set_attr "type" "sselog1,ssemov,ssemov")
(set_attr "prefix" "maybe_vex")
(set (attr "mode")
(cond [(match_test "TARGET_AVX")
(cond [(and (eq_attr "alternative" "1,2")
(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
(if_then_else
(match_test "GET_MODE_SIZE (<MODE>mode) > 16")
(const_string "V8SF")
(const_string "V4SF"))
(match_test "TARGET_AVX")
(const_string "<sseinsnmode>")
(ior (ior (match_test "optimize_function_for_size_p (cfun)")
(not (match_test "TARGET_SSE2")))
(ior (and (eq_attr "alternative" "1,2")
(match_test "optimize_function_for_size_p (cfun)"))
(and (eq_attr "alternative" "2")
(match_test "TARGET_SSE_TYPELESS_STORES")))
(const_string "V4SF")
(eq (const_string "<MODE>mode") (const_string "V4SFmode"))
(const_string "V4SF")
(eq (const_string "<MODE>mode") (const_string "V2DFmode"))
(const_string "V2DF")
]
(const_string "TI")))])
(const_string "<sseinsnmode>")))])
(define_insn "sse2_movq128"
[(set (match_operand:V2DI 0 "register_operand" "=x")
......@@ -597,11 +595,33 @@
[(match_operand:VF 1 "nonimmediate_operand" "xm,x")]
UNSPEC_MOVU))]
"TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
"%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}"
{
switch (get_attr_mode (insn))
{
case MODE_V8SF:
case MODE_V4SF:
return "%vmovups\t{%1, %0|%0, %1}";
default:
return "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}";
}
}
[(set_attr "type" "ssemov")
(set_attr "movu" "1")
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "<MODE>")])
(set (attr "mode")
(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
(if_then_else
(match_test "GET_MODE_SIZE (<MODE>mode) > 16")
(const_string "V8SF")
(const_string "V4SF"))
(match_test "TARGET_AVX")
(const_string "<MODE>")
(ior (match_test "optimize_function_for_size_p (cfun)")
(and (eq_attr "alternative" "1")
(match_test "TARGET_SSE_TYPELESS_STORES")))
(const_string "V4SF")
]
(const_string "<MODE>")))])
(define_expand "<sse2>_movdqu<avxsizesuffix>"
[(set (match_operand:VI1 0 "nonimmediate_operand")
......@@ -618,7 +638,16 @@
(unspec:VI1 [(match_operand:VI1 1 "nonimmediate_operand" "xm,x")]
UNSPEC_MOVU))]
"TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
"%vmovdqu\t{%1, %0|%0, %1}"
{
switch (get_attr_mode (insn))
{
case MODE_V8SF:
case MODE_V4SF:
return "%vmovups\t{%1, %0|%0, %1}";
default:
return "%vmovdqu\t{%1, %0|%0, %1}";
}
}
[(set_attr "type" "ssemov")
(set_attr "movu" "1")
(set (attr "prefix_data16")
......@@ -627,7 +656,20 @@
(const_string "*")
(const_string "1")))
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "<sseinsnmode>")])
(set (attr "mode")
(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
(if_then_else
(match_test "GET_MODE_SIZE (<MODE>mode) > 16")
(const_string "V8SF")
(const_string "V4SF"))
(match_test "TARGET_AVX")
(const_string "<sseinsnmode>")
(ior (match_test "optimize_function_for_size_p (cfun)")
(and (eq_attr "alternative" "1")
(match_test "TARGET_SSE_TYPELESS_STORES")))
(const_string "V4SF")
]
(const_string "<sseinsnmode>")))])
(define_insn "<sse3>_lddqu<avxsizesuffix>"
[(set (match_operand:VI1 0 "register_operand" "=x")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment