Commit 97afef00 by Uros Bizjak Committed by Uros Bizjak

i386-protos.h (ix86_operands_ok_for_move_multiple): New.

	* config/i386/i386-protos.h (ix86_operands_ok_for_move_multiple): New.
	* config/i386/i386.c (extract_base_offset_in_addr): New function.
	(ix86_operands_ok_for_move_multiple): Ditto.
	* config/i386/sse.md (movsd/movhpd to movupd peephole2): New pattern.
	(movlpd/movhpd to movupd peephole2): Ditto.

testsuite/ChangeLog:

	* gcc.target/i386/sse2-load-multi.c: New test.
	* gcc.target/i386/sse2-store-multi.c: Ditto.


Co-Authored-By: Wei Mi <wmi@google.com>

From-SVN: r222410
parent 8c2b7f79
2015-04-24 Uros Bizjak <ubizjak@gmail.com>
Wei Mi <wmi@google.com>
* config/i386/i386-protos.h (ix86_operands_ok_for_move_multiple): New.
* config/i386/i386.c (extract_base_offset_in_addr): New function.
(ix86_operands_ok_for_move_multiple): Ditto.
* config/i386/sse.md (movsd/movhpd to movupd peephole2): New pattern.
(movlpd/movhpd to movupd peephole2): Ditto.
2015-04-24 Marek Polacek <polacek@redhat.com> 2015-04-24 Marek Polacek <polacek@redhat.com>
PR c/61534 PR c/61534
......
...@@ -304,6 +304,8 @@ extern enum attr_cpu ix86_schedule; ...@@ -304,6 +304,8 @@ extern enum attr_cpu ix86_schedule;
#endif #endif
extern const char * ix86_output_call_insn (rtx_insn *insn, rtx call_op); extern const char * ix86_output_call_insn (rtx_insn *insn, rtx call_op);
extern bool ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
enum machine_mode mode);
#ifdef RTX_CODE #ifdef RTX_CODE
/* Target data for multipass lookahead scheduling. /* Target data for multipass lookahead scheduling.
......
...@@ -51726,6 +51726,92 @@ ix86_binds_local_p (const_tree exp) ...@@ -51726,6 +51726,92 @@ ix86_binds_local_p (const_tree exp)
} }
#endif #endif
/* If MEM is in the form of [base+offset], extract the two parts
of address and set to BASE and OFFSET, otherwise return false. */
static bool
extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
{
rtx addr;
gcc_assert (MEM_P (mem));
addr = XEXP (mem, 0);
if (GET_CODE (addr) == CONST)
addr = XEXP (addr, 0);
if (REG_P (addr) || GET_CODE (addr) == SYMBOL_REF)
{
*base = addr;
*offset = const0_rtx;
return true;
}
if (GET_CODE (addr) == PLUS
&& (REG_P (XEXP (addr, 0))
|| GET_CODE (XEXP (addr, 0)) == SYMBOL_REF)
&& CONST_INT_P (XEXP (addr, 1)))
{
*base = XEXP (addr, 0);
*offset = XEXP (addr, 1);
return true;
}
return false;
}
/* Given OPERANDS of consecutive load/store, check if we can merge
them into move multiple. LOAD is true if they are load instructions.
MODE is the mode of memory operands. */
bool
ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
enum machine_mode mode)
{
HOST_WIDE_INT offval_1, offval_2, msize;
rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
if (load)
{
mem_1 = operands[1];
mem_2 = operands[3];
reg_1 = operands[0];
reg_2 = operands[2];
}
else
{
mem_1 = operands[0];
mem_2 = operands[2];
reg_1 = operands[1];
reg_2 = operands[3];
}
gcc_assert (REG_P (reg_1) && REG_P (reg_2));
if (REGNO (reg_1) != REGNO (reg_2))
return false;
/* Check if the addresses are in the form of [base+offset]. */
if (!extract_base_offset_in_addr (mem_1, &base_1, &offset_1))
return false;
if (!extract_base_offset_in_addr (mem_2, &base_2, &offset_2))
return false;
/* Check if the bases are the same. */
if (!rtx_equal_p (base_1, base_2))
return false;
offval_1 = INTVAL (offset_1);
offval_2 = INTVAL (offset_2);
msize = GET_MODE_SIZE (mode);
/* Check if mem_1 is adjacent to mem_2 and mem_1 has lower address. */
if (offval_1 + msize != offval_2)
return false;
return true;
}
/* Initialize the GCC target structure. */ /* Initialize the GCC target structure. */
#undef TARGET_RETURN_IN_MEMORY #undef TARGET_RETURN_IN_MEMORY
#define TARGET_RETURN_IN_MEMORY ix86_return_in_memory #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
...@@ -1183,6 +1183,21 @@ ...@@ -1183,6 +1183,21 @@
] ]
(const_string "<MODE>")))]) (const_string "<MODE>")))])
;; Merge movsd/movhpd to movupd for TARGET_SSE_UNALIGNED_LOAD_OPTIMAL targets.
(define_peephole2
[(set (match_operand:V2DF 0 "register_operand")
(vec_concat:V2DF (match_operand:DF 1 "memory_operand")
(match_operand:DF 4 "const0_operand")))
(set (match_operand:V2DF 2 "register_operand")
(vec_concat:V2DF (vec_select:DF (match_dup 2)
(parallel [(const_int 0)]))
(match_operand:DF 3 "memory_operand")))]
"TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
&& ix86_operands_ok_for_move_multiple (operands, true, DFmode)"
[(set (match_dup 2)
(unspec:V2DF [(match_dup 4)] UNSPEC_LOADU))]
"operands[4] = adjust_address (operands[1], V2DFmode, 0);")
(define_insn "<sse>_storeu<ssemodesuffix><avxsizesuffix>" (define_insn "<sse>_storeu<ssemodesuffix><avxsizesuffix>"
[(set (match_operand:VF 0 "memory_operand" "=m") [(set (match_operand:VF 0 "memory_operand" "=m")
(unspec:VF (unspec:VF
...@@ -1242,6 +1257,20 @@ ...@@ -1242,6 +1257,20 @@
(set_attr "prefix" "evex") (set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")]) (set_attr "mode" "<sseinsnmode>")])
;; Merge movlpd/movhpd to movupd for TARGET_SSE_UNALIGNED_STORE_OPTIMAL targets.
(define_peephole2
[(set (match_operand:DF 0 "memory_operand")
(vec_select:DF (match_operand:V2DF 1 "register_operand")
(parallel [(const_int 0)])))
(set (match_operand:DF 2 "memory_operand")
(vec_select:DF (match_operand:V2DF 3 "register_operand")
(parallel [(const_int 1)])))]
"TARGET_SSE2 && TARGET_SSE_UNALIGNED_STORE_OPTIMAL
&& ix86_operands_ok_for_move_multiple (operands, false, DFmode)"
[(set (match_dup 4)
(unspec:V2DF [(match_dup 1)] UNSPEC_STOREU))]
"operands[4] = adjust_address (operands[0], V2DFmode, 0);")
/* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads /* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads
just fine if misaligned_operand is true, and without the UNSPEC it can just fine if misaligned_operand is true, and without the UNSPEC it can
be combined with arithmetic instructions. If misaligned_operand is be combined with arithmetic instructions. If misaligned_operand is
......
2015-04-24 Uros Bizjak <ubizjak@gmail.com>
Wei Mi <wmi@google.com>
* gcc.target/i386/sse2-load-multi.c: New test.
* gcc.target/i386/sse2-store-multi.c: Ditto.
2015-04-24 Marek Polacek <polacek@redhat.com> 2015-04-24 Marek Polacek <polacek@redhat.com>
PR c/65830 PR c/65830
......
/* { dg-do compile } */
/* { dg-options "-march=corei7 -O2" } */
#include <emmintrin.h>
double a[8];
__m128d load_1 ()
{
__m128d res;
res = _mm_load_sd (&a[1]);
res = _mm_loadh_pd (res, &a[2]);
return res;
}
__m128d load_2 (double *a)
{
__m128d res;
res = _mm_load_sd (&a[1]);
res = _mm_loadh_pd (res, &a[2]);
return res;
}
/* { dg-final { scan-assembler-times "movup" 2 } } */
/* { dg-do compile } */
/* { dg-options "-march=corei7 -O2" } */
#include <emmintrin.h>
double a[8];
void store_1 (__m128d val)
{
_mm_store_sd (&a[1], val);
_mm_storeh_pd (&a[2], val);
}
void store_2 (__m128d val, double *a)
{
_mm_store_sd (&a[1], val);
_mm_storeh_pd (&a[2], val);
}
/* { dg-final { scan-assembler-times "movup" 2 } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment