Commit 798d3d04 by Greta Yorsh Committed by Greta Yorsh

Internal memcpy using LDRD/STRD

2013-05-16  Greta Yorsh  <Greta.Yorsh@arm.com>

gcc/

	* config/arm/arm-protos.h (gen_movmem_ldrd_strd): New declaration.
	* config/arm/arm.c (next_consecutive_mem): New function.
	(gen_movmem_ldrd_strd): Likewise.
	* config/arm/arm.md (movmemqi): Update condition and code.
	(unaligned_loaddi, unaligned_storedi): New patterns.

gcc/testsuite

	* gcc.target/arm/unaligned-memcpy-2.c: Adjust expected output.
	* gcc.target/arm/unaligned-memcpy-3.c: Likewise.
	* gcc.target/arm/unaligned-memcpy-4.c: Likewise.

From-SVN: r198970
parent 0baddc45
2013-05-16 Greta Yorsh <Greta.Yorsh@arm.com>
* config/arm/arm-protos.h (gen_movmem_ldrd_strd): New declaration.
* config/arm/arm.c (next_consecutive_mem): New function.
(gen_movmem_ldrd_strd): Likewise.
* config/arm/arm.md (movmemqi): Update condition and code.
(unaligned_loaddi, unaligned_storedi): New patterns.
2013-05-16 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE> 2013-05-16 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE>
* config.gcc: Obsolete *-*-solaris2.9*. * config.gcc: Obsolete *-*-solaris2.9*.
......
...@@ -120,6 +120,7 @@ extern bool offset_ok_for_ldrd_strd (HOST_WIDE_INT); ...@@ -120,6 +120,7 @@ extern bool offset_ok_for_ldrd_strd (HOST_WIDE_INT);
extern bool operands_ok_ldrd_strd (rtx, rtx, rtx, HOST_WIDE_INT, bool, bool); extern bool operands_ok_ldrd_strd (rtx, rtx, rtx, HOST_WIDE_INT, bool, bool);
extern bool gen_operands_ldrd_strd (rtx *, bool, bool, bool); extern bool gen_operands_ldrd_strd (rtx *, bool, bool, bool);
extern int arm_gen_movmemqi (rtx *); extern int arm_gen_movmemqi (rtx *);
extern bool gen_movmem_ldrd_strd (rtx *);
extern enum machine_mode arm_select_cc_mode (RTX_CODE, rtx, rtx); extern enum machine_mode arm_select_cc_mode (RTX_CODE, rtx, rtx);
extern enum machine_mode arm_select_dominance_cc_mode (rtx, rtx, extern enum machine_mode arm_select_dominance_cc_mode (rtx, rtx,
HOST_WIDE_INT); HOST_WIDE_INT);
......
...@@ -11855,6 +11855,134 @@ arm_gen_movmemqi (rtx *operands) ...@@ -11855,6 +11855,134 @@ arm_gen_movmemqi (rtx *operands)
return 1; return 1;
} }
/* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
by mode size. */
inline static rtx
next_consecutive_mem (rtx mem)
{
enum machine_mode mode = GET_MODE (mem);
HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
return adjust_automodify_address (mem, mode, addr, offset);
}
/* Copy using LDRD/STRD instructions whenever possible.
Returns true upon success. */
bool
gen_movmem_ldrd_strd (rtx *operands)
{
unsigned HOST_WIDE_INT len;
HOST_WIDE_INT align;
rtx src, dst, base;
rtx reg0;
bool src_aligned, dst_aligned;
bool src_volatile, dst_volatile;
gcc_assert (CONST_INT_P (operands[2]));
gcc_assert (CONST_INT_P (operands[3]));
len = UINTVAL (operands[2]);
if (len > 64)
return false;
/* Maximum alignment we can assume for both src and dst buffers. */
align = INTVAL (operands[3]);
if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
return false;
/* Place src and dst addresses in registers
and update the corresponding mem rtx. */
dst = operands[0];
dst_volatile = MEM_VOLATILE_P (dst);
dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
base = copy_to_mode_reg (SImode, XEXP (dst, 0));
dst = adjust_automodify_address (dst, VOIDmode, base, 0);
src = operands[1];
src_volatile = MEM_VOLATILE_P (src);
src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
base = copy_to_mode_reg (SImode, XEXP (src, 0));
src = adjust_automodify_address (src, VOIDmode, base, 0);
if (!unaligned_access && !(src_aligned && dst_aligned))
return false;
if (src_volatile || dst_volatile)
return false;
/* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
if (!(dst_aligned || src_aligned))
return arm_gen_movmemqi (operands);
src = adjust_address (src, DImode, 0);
dst = adjust_address (dst, DImode, 0);
while (len >= 8)
{
len -= 8;
reg0 = gen_reg_rtx (DImode);
if (src_aligned)
emit_move_insn (reg0, src);
else
emit_insn (gen_unaligned_loaddi (reg0, src));
if (dst_aligned)
emit_move_insn (dst, reg0);
else
emit_insn (gen_unaligned_storedi (dst, reg0));
src = next_consecutive_mem (src);
dst = next_consecutive_mem (dst);
}
gcc_assert (len < 8);
if (len >= 4)
{
/* More than a word but less than a double-word to copy. Copy a word. */
reg0 = gen_reg_rtx (SImode);
src = adjust_address (src, SImode, 0);
dst = adjust_address (dst, SImode, 0);
if (src_aligned)
emit_move_insn (reg0, src);
else
emit_insn (gen_unaligned_loadsi (reg0, src));
if (dst_aligned)
emit_move_insn (dst, reg0);
else
emit_insn (gen_unaligned_storesi (dst, reg0));
src = next_consecutive_mem (src);
dst = next_consecutive_mem (dst);
len -= 4;
}
if (len == 0)
return true;
/* Copy the remaining bytes. */
if (len >= 2)
{
dst = adjust_address (dst, HImode, 0);
src = adjust_address (src, HImode, 0);
reg0 = gen_reg_rtx (SImode);
emit_insn (gen_unaligned_loadhiu (reg0, src));
emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
src = next_consecutive_mem (src);
dst = next_consecutive_mem (dst);
if (len == 2)
return true;
}
dst = adjust_address (dst, QImode, 0);
src = adjust_address (src, QImode, 0);
reg0 = gen_reg_rtx (QImode);
emit_move_insn (reg0, src);
emit_move_insn (dst, reg0);
return true;
}
/* Select a dominance comparison mode if possible for a test of the general /* Select a dominance comparison mode if possible for a test of the general
form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms. form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
COND_OR == DOM_CC_X_AND_Y => (X && Y) COND_OR == DOM_CC_X_AND_Y => (X && Y)
......
...@@ -4421,6 +4421,64 @@ ...@@ -4421,6 +4421,64 @@
(set_attr "predicable" "yes") (set_attr "predicable" "yes")
(set_attr "type" "store1")]) (set_attr "type" "store1")])
;; Unaligned double-word load and store.
;; Split after reload into two unaligned single-word accesses.
;; It prevents lower_subreg from splitting some other aligned
;; double-word accesses too early. Used for internal memcpy.
(define_insn_and_split "unaligned_loaddi"
[(set (match_operand:DI 0 "s_register_operand" "=l,r")
(unspec:DI [(match_operand:DI 1 "memory_operand" "o,o")]
UNSPEC_UNALIGNED_LOAD))]
"unaligned_access && TARGET_32BIT"
"#"
"&& reload_completed"
[(set (match_dup 0) (unspec:SI [(match_dup 1)] UNSPEC_UNALIGNED_LOAD))
(set (match_dup 2) (unspec:SI [(match_dup 3)] UNSPEC_UNALIGNED_LOAD))]
{
operands[2] = gen_highpart (SImode, operands[0]);
operands[0] = gen_lowpart (SImode, operands[0]);
operands[3] = gen_highpart (SImode, operands[1]);
operands[1] = gen_lowpart (SImode, operands[1]);
/* If the first destination register overlaps with the base address,
swap the order in which the loads are emitted. */
if (reg_overlap_mentioned_p (operands[0], operands[1]))
{
rtx tmp = operands[1];
operands[1] = operands[3];
operands[3] = tmp;
tmp = operands[0];
operands[0] = operands[2];
operands[2] = tmp;
}
}
[(set_attr "arch" "t2,any")
(set_attr "length" "4,8")
(set_attr "predicable" "yes")
(set_attr "type" "load2")])
(define_insn_and_split "unaligned_storedi"
[(set (match_operand:DI 0 "memory_operand" "=o,o")
(unspec:DI [(match_operand:DI 1 "s_register_operand" "l,r")]
UNSPEC_UNALIGNED_STORE))]
"unaligned_access && TARGET_32BIT"
"#"
"&& reload_completed"
[(set (match_dup 0) (unspec:SI [(match_dup 1)] UNSPEC_UNALIGNED_STORE))
(set (match_dup 2) (unspec:SI [(match_dup 3)] UNSPEC_UNALIGNED_STORE))]
{
operands[2] = gen_highpart (SImode, operands[0]);
operands[0] = gen_lowpart (SImode, operands[0]);
operands[3] = gen_highpart (SImode, operands[1]);
operands[1] = gen_lowpart (SImode, operands[1]);
}
[(set_attr "arch" "t2,any")
(set_attr "length" "4,8")
(set_attr "predicable" "yes")
(set_attr "type" "store2")])
(define_insn "*extv_reg" (define_insn "*extv_reg"
[(set (match_operand:SI 0 "s_register_operand" "=r") [(set (match_operand:SI 0 "s_register_operand" "=r")
(sign_extract:SI (match_operand:SI 1 "s_register_operand" "r") (sign_extract:SI (match_operand:SI 1 "s_register_operand" "r")
...@@ -7374,10 +7432,18 @@ ...@@ -7374,10 +7432,18 @@
(match_operand:BLK 1 "general_operand" "") (match_operand:BLK 1 "general_operand" "")
(match_operand:SI 2 "const_int_operand" "") (match_operand:SI 2 "const_int_operand" "")
(match_operand:SI 3 "const_int_operand" "")] (match_operand:SI 3 "const_int_operand" "")]
"TARGET_EITHER" ""
" "
if (TARGET_32BIT) if (TARGET_32BIT)
{ {
if (TARGET_LDRD && current_tune->prefer_ldrd_strd
&& !optimize_function_for_size_p (cfun))
{
if (gen_movmem_ldrd_strd (operands))
DONE;
FAIL;
}
if (arm_gen_movmemqi (operands)) if (arm_gen_movmemqi (operands))
DONE; DONE;
FAIL; FAIL;
......
2013-05-16 Greta Yorsh <Greta.Yorsh@arm.com>
* gcc.target/arm/unaligned-memcpy-2.c: Adjust expected output.
* gcc.target/arm/unaligned-memcpy-3.c: Likewise.
* gcc.target/arm/unaligned-memcpy-4.c: Likewise.
2013-05-16 Nathan Sidwell <nathan@codesourcery.com> 2013-05-16 Nathan Sidwell <nathan@codesourcery.com>
* gcc.dg/visibility-21.c: New. * gcc.dg/visibility-21.c: New.
......
...@@ -14,7 +14,10 @@ void aligned_dest (char *src) ...@@ -14,7 +14,10 @@ void aligned_dest (char *src)
/* Expect a multi-word store for the main part of the copy, but subword /* Expect a multi-word store for the main part of the copy, but subword
loads/stores for the remainder. */ loads/stores for the remainder. */
/* { dg-final { scan-assembler-times "stmia" 1 } } */ /* { dg-final { scan-assembler-times "ldmia" 0 } } */
/* { dg-final { scan-assembler-times "ldrd" 0 } } */
/* { dg-final { scan-assembler-times "stmia" 1 { target { ! { arm_prefer_ldrd_strd } } } } } */
/* { dg-final { scan-assembler-times "strd" 1 { target { arm_prefer_ldrd_strd } } } } */
/* { dg-final { scan-assembler-times "ldrh" 1 } } */ /* { dg-final { scan-assembler-times "ldrh" 1 } } */
/* { dg-final { scan-assembler-times "strh" 1 } } */ /* { dg-final { scan-assembler-times "strh" 1 } } */
/* { dg-final { scan-assembler-times "ldrb" 1 } } */ /* { dg-final { scan-assembler-times "ldrb" 1 } } */
......
...@@ -14,8 +14,11 @@ void aligned_src (char *dest) ...@@ -14,8 +14,11 @@ void aligned_src (char *dest)
/* Expect a multi-word load for the main part of the copy, but subword /* Expect a multi-word load for the main part of the copy, but subword
loads/stores for the remainder. */ loads/stores for the remainder. */
/* { dg-final { scan-assembler-times "ldmia" 1 } } */ /* { dg-final { scan-assembler-times "ldmia" 1 { target { ! { arm_prefer_ldrd_strd } } } } } */
/* { dg-final { scan-assembler-times "ldrh" 1 } } */ /* { dg-final { scan-assembler-times "ldrd" 1 { target { arm_prefer_ldrd_strd } } } } */
/* { dg-final { scan-assembler-times "strd" 0 } } */
/* { dg-final { scan-assembler-times "stm" 0 } } */
/* { dg-final { scan-assembler-times "ldrh" 1 { target { ! { arm_prefer_ldrd_strd } } } } } */
/* { dg-final { scan-assembler-times "strh" 1 } } */ /* { dg-final { scan-assembler-times "strh" 1 } } */
/* { dg-final { scan-assembler-times "ldrb" 1 } } */ /* { dg-final { scan-assembler-times "ldrb" 1 { target { ! { arm_prefer_ldrd_strd } } } } } */
/* { dg-final { scan-assembler-times "strb" 1 } } */ /* { dg-final { scan-assembler-times "strb" 1 } } */
...@@ -14,5 +14,9 @@ void aligned_both (void) ...@@ -14,5 +14,9 @@ void aligned_both (void)
/* We know both src and dest to be aligned: expect multiword loads/stores. */ /* We know both src and dest to be aligned: expect multiword loads/stores. */
/* { dg-final { scan-assembler-times "ldmia" 1 } } */ /* { dg-final { scan-assembler-times "ldmia" 1 { target { ! { arm_prefer_ldrd_strd } } } } } */
/* { dg-final { scan-assembler-times "stmia" 1 } } */ /* { dg-final { scan-assembler-times "stmia" 1 { target { ! { arm_prefer_ldrd_strd } } } } } */
/* { dg-final { scan-assembler "ldrd" { target { arm_prefer_ldrd_strd } } } } */
/* { dg-final { scan-assembler-times "ldm" 0 { target { arm_prefer_ldrd_strd } } } } */
/* { dg-final { scan-assembler "strd" { target { arm_prefer_ldrd_strd } } } } */
/* { dg-final { scan-assembler-times "stm" 0 { target { arm_prefer_ldrd_strd } } } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment