Commit a1c6b246 by Richard Sandiford Committed by Richard Sandiford

mips.h (MOVE_MAX): Use UNITS_PER_WORD and describe MIPS-specific implementation details.

gcc/
	* config/mips/mips.h (MOVE_MAX): Use UNITS_PER_WORD and describe
	MIPS-specific implementation details.
	(MIPS_MAX_MOVE_BYTES_PER_LOOP_ITER): New macro.
	(MIPS_MAX_MOVE_BYTES_STRAIGHT): Likewise.
	(MOVE_RATIO): Define to MIPS_MAX_MOVE_BYTES_STRAIGHT / UNITS_PER_WORD
	for targets with movmemsi.
	(MOVE_BY_PIECES_P): Define.
	* config/mips/mips.c (MAX_MOVE_REGS, MAX_MOVE_BYTES): Delete.
	(mips_block_move_loop): Add a bytes_per_iter argument.
	(mips_expand_block_move): Use MIPS_MAX_MOVE_BYTES_STRAIGHT.
	Update call to mips_block_move_loop.

From-SVN: r129605
parent 32e520ab
2007-10-24 Richard Sandiford <rsandifo@nildram.co.uk>
* config/mips/mips.h (MOVE_MAX): Use UNITS_PER_WORD and describe
MIPS-specific implementation details.
(MIPS_MAX_MOVE_BYTES_PER_LOOP_ITER): New macro.
(MIPS_MAX_MOVE_BYTES_STRAIGHT): Likewise.
(MOVE_RATIO): Define to MIPS_MAX_MOVE_BYTES_STRAIGHT / UNITS_PER_WORD
for targets with movmemsi.
(MOVE_BY_PIECES_P): Define.
* config/mips/mips.c (MAX_MOVE_REGS, MAX_MOVE_BYTES): Delete.
(mips_block_move_loop): Add a bytes_per_iter argument.
(mips_expand_block_move): Use MIPS_MAX_MOVE_BYTES_STRAIGHT.
Update call to mips_block_move_loop.
2007-10-24 Michael Matz <matz@suse.de> 2007-10-24 Michael Matz <matz@suse.de>
PR debug/33868 PR debug/33868
...@@ -5622,9 +5622,6 @@ mips_expand_fcc_reload (rtx dest, rtx src, rtx scratch) ...@@ -5622,9 +5622,6 @@ mips_expand_fcc_reload (rtx dest, rtx src, rtx scratch)
emit_insn (gen_slt_sf (dest, fp2, fp1)); emit_insn (gen_slt_sf (dest, fp2, fp1));
} }
#define MAX_MOVE_REGS 4
#define MAX_MOVE_BYTES (MAX_MOVE_REGS * UNITS_PER_WORD)
/* Emit straight-line code to move LENGTH bytes from SRC to DEST. /* Emit straight-line code to move LENGTH bytes from SRC to DEST.
Assume that the areas do not overlap. */ Assume that the areas do not overlap. */
...@@ -5710,22 +5707,23 @@ mips_adjust_block_mem (rtx mem, HOST_WIDE_INT length, ...@@ -5710,22 +5707,23 @@ mips_adjust_block_mem (rtx mem, HOST_WIDE_INT length,
set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT)); set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
} }
/* Move LENGTH bytes from SRC to DEST using a loop that moves MAX_MOVE_BYTES /* Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
per iteration. LENGTH must be at least MAX_MOVE_BYTES. Assume that the bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
memory regions do not overlap. */ the memory regions do not overlap. */
static void static void
mips_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length) mips_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length,
HOST_WIDE_INT bytes_per_iter)
{ {
rtx label, src_reg, dest_reg, final_src; rtx label, src_reg, dest_reg, final_src;
HOST_WIDE_INT leftover; HOST_WIDE_INT leftover;
leftover = length % MAX_MOVE_BYTES; leftover = length % bytes_per_iter;
length -= leftover; length -= leftover;
/* Create registers and memory references for use within the loop. */ /* Create registers and memory references for use within the loop. */
mips_adjust_block_mem (src, MAX_MOVE_BYTES, &src_reg, &src); mips_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
mips_adjust_block_mem (dest, MAX_MOVE_BYTES, &dest_reg, &dest); mips_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
/* Calculate the value that SRC_REG should have after the last iteration /* Calculate the value that SRC_REG should have after the last iteration
of the loop. */ of the loop. */
...@@ -5737,11 +5735,11 @@ mips_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length) ...@@ -5737,11 +5735,11 @@ mips_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length)
emit_label (label); emit_label (label);
/* Emit the loop body. */ /* Emit the loop body. */
mips_block_move_straight (dest, src, MAX_MOVE_BYTES); mips_block_move_straight (dest, src, bytes_per_iter);
/* Move on to the next block. */ /* Move on to the next block. */
mips_emit_move (src_reg, plus_constant (src_reg, MAX_MOVE_BYTES)); mips_emit_move (src_reg, plus_constant (src_reg, bytes_per_iter));
mips_emit_move (dest_reg, plus_constant (dest_reg, MAX_MOVE_BYTES)); mips_emit_move (dest_reg, plus_constant (dest_reg, bytes_per_iter));
/* Emit the loop condition. */ /* Emit the loop condition. */
if (Pmode == DImode) if (Pmode == DImode)
...@@ -5763,14 +5761,15 @@ mips_expand_block_move (rtx dest, rtx src, rtx length) ...@@ -5763,14 +5761,15 @@ mips_expand_block_move (rtx dest, rtx src, rtx length)
{ {
if (GET_CODE (length) == CONST_INT) if (GET_CODE (length) == CONST_INT)
{ {
if (INTVAL (length) <= 2 * MAX_MOVE_BYTES) if (INTVAL (length) <= MIPS_MAX_MOVE_BYTES_STRAIGHT)
{ {
mips_block_move_straight (dest, src, INTVAL (length)); mips_block_move_straight (dest, src, INTVAL (length));
return true; return true;
} }
else if (optimize) else if (optimize)
{ {
mips_block_move_loop (dest, src, INTVAL (length)); mips_block_move_loop (dest, src, INTVAL (length),
MIPS_MAX_MOVE_BYTES_PER_LOOP_ITER);
return true; return true;
} }
} }
......
...@@ -2338,9 +2338,10 @@ typedef struct mips_args { ...@@ -2338,9 +2338,10 @@ typedef struct mips_args {
#define DEFAULT_SIGNED_CHAR 1 #define DEFAULT_SIGNED_CHAR 1
#endif #endif
/* Max number of bytes we can move from memory to memory /* Although LDC1 and SDC1 provide 64-bit moves on 32-bit targets,
in one reasonably fast instruction. */ we generally don't want to use them for copying arbitrary data.
#define MOVE_MAX (TARGET_64BIT ? 8 : 4) A single N-word move is usually the same cost as N single-word moves. */
#define MOVE_MAX UNITS_PER_WORD
#define MAX_MOVE_MAX 8 #define MAX_MOVE_MAX 8
/* Define this macro as a C expression which is nonzero if /* Define this macro as a C expression which is nonzero if
...@@ -2769,6 +2770,18 @@ while (0) ...@@ -2769,6 +2770,18 @@ while (0)
#undef PTRDIFF_TYPE #undef PTRDIFF_TYPE
#define PTRDIFF_TYPE (POINTER_SIZE == 64 ? "long int" : "int") #define PTRDIFF_TYPE (POINTER_SIZE == 64 ? "long int" : "int")
/* The maximum number of bytes that can be copied by one iteration of
a movmemsi loop; see mips_block_move_loop. */
#define MIPS_MAX_MOVE_BYTES_PER_LOOP_ITER \
(UNITS_PER_WORD * 4)
/* The maximum number of bytes that can be copied by a straight-line
implementation of movmemsi; see mips_block_move_straight. We want
to make sure that any loop-based implementation will iterate at
least twice. */
#define MIPS_MAX_MOVE_BYTES_STRAIGHT \
(MIPS_MAX_MOVE_BYTES_PER_LOOP_ITER * 2)
/* The base cost of a memcpy call, for MOVE_RATIO and friends. These /* The base cost of a memcpy call, for MOVE_RATIO and friends. These
values were determined experimentally by benchmarking with CSiBE. values were determined experimentally by benchmarking with CSiBE.
In theory, the call overhead is higher for TARGET_ABICALLS (especially In theory, the call overhead is higher for TARGET_ABICALLS (especially
...@@ -2778,23 +2791,39 @@ while (0) ...@@ -2778,23 +2791,39 @@ while (0)
#define MIPS_CALL_RATIO 8 #define MIPS_CALL_RATIO 8
/* Define MOVE_RATIO to encourage use of movmemsi when enabled, /* Any loop-based implementation of movmemsi will have at least
since it should always generate code at least as good as MIPS_MAX_MOVE_BYTES_STRAIGHT / UNITS_PER_WORD memory-to-memory
move_by_pieces(). But when inline movmemsi pattern is disabled moves, so allow individual copies of fewer elements.
(i.e., with -mips16 or -mmemcpy), instead use a value approximating
the length of a memcpy call sequence, so that move_by_pieces will When movmemsi is not available, use a value approximating
generate inline code if it is shorter than a function call. the length of a memcpy call sequence, so that move_by_pieces
Since move_by_pieces_ninsns() counts memory-to-memory moves, but will generate inline code if it is shorter than a function call.
we'll have to generate a load/store pair for each, halve the value of Since move_by_pieces_ninsns counts memory-to-memory moves, but
MIPS_CALL_RATIO to take that into account. we'll have to generate a load/store pair for each, halve the
The default value for MOVE_RATIO when HAVE_movmemsi is true is 2. value of MIPS_CALL_RATIO to take that into account. */
There is no point to setting it to less than this to try to disable
move_by_pieces entirely, because that also disables some desirable #define MOVE_RATIO \
tree-level optimizations, specifically related to optimizing a (HAVE_movmemsi \
one-byte string copy into a simple move byte operation. */ ? MIPS_MAX_MOVE_BYTES_STRAIGHT / MOVE_MAX \
: MIPS_CALL_RATIO / 2)
#define MOVE_RATIO \
((TARGET_MIPS16 || TARGET_MEMCPY) ? MIPS_CALL_RATIO / 2 : 2) /* movmemsi is meant to generate code that is at least as good as
move_by_pieces. However, movmemsi effectively uses a by-pieces
implementation both for moves smaller than a word and for word-aligned
moves of no more than MIPS_MAX_MOVE_BYTES_STRAIGHT bytes. We should
allow the tree-level optimisers to do such moves by pieces, as it
often exposes other optimization opportunities. We might as well
continue to use movmemsi at the rtl level though, as it produces
better code when scheduling is disabled (such as at -O). */
#define MOVE_BY_PIECES_P(SIZE, ALIGN) \
(HAVE_movmemsi \
? (!currently_expanding_to_rtl \
&& ((ALIGN) < BITS_PER_WORD \
? (SIZE) < UNITS_PER_WORD \
: (SIZE) <= MIPS_MAX_MOVE_BYTES_STRAIGHT)) \
: (move_by_pieces_ninsns (SIZE, ALIGN, MOVE_MAX_PIECES + 1) \
< (unsigned int) MOVE_RATIO))
/* For CLEAR_RATIO, when optimizing for size, give a better estimate /* For CLEAR_RATIO, when optimizing for size, give a better estimate
of the length of a memset call, but use the default otherwise. */ of the length of a memset call, but use the default otherwise. */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment