Commit 5ec3397e by Aaron Sawdey

rs6000-string.c (do_load_for_compare_from_addr): New function.

2018-01-08  Aaron Sawdey  <acsawdey@linux.vnet.ibm.com>

	* config/rs6000/rs6000-string.c (do_load_for_compare_from_addr): New
	function.
	(do_ifelse): New function.
	(do_isel): New function.
	(do_sub3): New function.
	(do_add3): New function.
	(do_load_mask_compare): New function.
	(do_overlap_load_compare): New function.
	(expand_compare_loop): New function.
	(expand_block_compare): Call expand_compare_loop() when appropriate.
	* config/rs6000/rs6000.opt (-mblock-compare-inline-limit): Change
	option description.
	(-mblock-compare-inline-loop-limit): New option.

From-SVN: r256351
parent 5a2a87e1
...@@ -303,6 +303,959 @@ compute_current_alignment (unsigned HOST_WIDE_INT base_align, ...@@ -303,6 +303,959 @@ compute_current_alignment (unsigned HOST_WIDE_INT base_align,
return MIN (base_align, offset & -offset); return MIN (base_align, offset & -offset);
} }
/* Prepare address and then do a load.
MODE is the mode to use for the load.
DEST is the destination register for the data.
ADDR is the address to be loaded.
ORIG_ADDR is the original address expression. */
static void
do_load_for_compare_from_addr (machine_mode mode, rtx dest, rtx addr,
rtx orig_addr)
{
rtx mem = gen_rtx_MEM (mode, addr);
MEM_COPY_ATTRIBUTES (mem, orig_addr);
set_mem_size (mem, GET_MODE_SIZE (mode));
do_load_for_compare (dest, mem, mode);
return;
}
/* Do a branch for an if/else decision.
CMPMODE is the mode to use for the comparison.
COMPARISON is the rtx code for the compare needed.
A is the first thing to be compared.
B is the second thing to be compared.
CR is the condition code reg input, or NULL_RTX.
TRUE_LABEL is the label to branch to if the condition is true.
The return value is the CR used for the comparison.
If CR is null_rtx, then a new register of CMPMODE is generated.
If A and B are both null_rtx, then CR must not be null, and the
compare is not generated so you can use this with a dot form insn. */
static void
do_ifelse (machine_mode cmpmode, rtx_code comparison,
rtx a, rtx b, rtx cr, rtx true_label)
{
gcc_assert ((a == NULL_RTX && b == NULL_RTX && cr != NULL_RTX)
|| (a != NULL_RTX && b != NULL_RTX));
if (cr != NULL_RTX)
gcc_assert (GET_MODE (cr) == cmpmode);
else
cr = gen_reg_rtx (cmpmode);
rtx label_ref = gen_rtx_LABEL_REF (VOIDmode, true_label);
if (a != NULL_RTX)
emit_move_insn (cr, gen_rtx_COMPARE (cmpmode, a, b));
rtx cmp_rtx = gen_rtx_fmt_ee (comparison, VOIDmode, cr, const0_rtx);
rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx, label_ref, pc_rtx);
rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
JUMP_LABEL (j) = true_label;
LABEL_NUSES (true_label) += 1;
}
/* Emit an isel of the proper mode for DEST.
DEST is the isel destination register.
SRC1 is the isel source if CR is true.
SRC2 is the isel source if CR is false.
CR is the condition for the isel. */
static void
do_isel (rtx dest, rtx cmp, rtx src_t, rtx src_f, rtx cr)
{
if (GET_MODE (dest) == DImode)
emit_insn (gen_isel_signed_di (dest, cmp, src_t, src_f, cr));
else
emit_insn (gen_isel_signed_si (dest, cmp, src_t, src_f, cr));
}
/* Emit a subtract of the proper mode for DEST.
DEST is the destination register for the subtract.
SRC1 is the first subtract input.
SRC2 is the second subtract input.
Computes DEST = SRC1-SRC2. */
static void
do_sub3 (rtx dest, rtx src1, rtx src2)
{
if (GET_MODE (dest) == DImode)
emit_insn (gen_subdi3 (dest, src1, src2));
else
emit_insn (gen_subsi3 (dest, src1, src2));
}
/* Emit an add of the proper mode for DEST.
DEST is the destination register for the add.
SRC1 is the first add input.
SRC2 is the second add input.
Computes DEST = SRC1+SRC2. */
static void
do_add3 (rtx dest, rtx src1, rtx src2)
{
if (GET_MODE (dest) == DImode)
emit_insn (gen_adddi3 (dest, src1, src2));
else
emit_insn (gen_addsi3 (dest, src1, src2));
}
/* Generate rtl for a load, shift, and compare of less than a full word.
LOAD_MODE is the machine mode for the loads.
DIFF is the reg for the difference.
CMP_REM is the reg containing the remaining bytes to compare.
DCOND is the CCUNS reg for the compare if we are doing P9 code with setb.
SRC1_ADDR is the first source address.
SRC2_ADDR is the second source address.
ORIG_SRC1 is the original first source block's address rtx.
ORIG_SRC2 is the original second source block's address rtx. */
static void
do_load_mask_compare (const machine_mode load_mode, rtx diff, rtx cmp_rem, rtx dcond,
rtx src1_addr, rtx src2_addr, rtx orig_src1, rtx orig_src2)
{
HOST_WIDE_INT load_mode_size = GET_MODE_SIZE (load_mode);
rtx shift_amount = gen_reg_rtx (word_mode);
rtx d1 = gen_reg_rtx (word_mode);
rtx d2 = gen_reg_rtx (word_mode);
do_load_for_compare_from_addr (load_mode, d1, src1_addr, orig_src1);
do_load_for_compare_from_addr (load_mode, d2, src2_addr, orig_src2);
do_sub3 (shift_amount, GEN_INT (load_mode_size), cmp_rem);
if (word_mode == DImode)
{
emit_insn (gen_ashldi3 (shift_amount, shift_amount,
GEN_INT (LOG2_BITS_PER_UNIT)));
emit_insn (gen_lshrdi3 (d1, d1,
gen_lowpart (SImode, shift_amount)));
emit_insn (gen_lshrdi3 (d2, d2,
gen_lowpart (SImode, shift_amount)));
}
else
{
emit_insn (gen_ashlsi3 (shift_amount, shift_amount,
GEN_INT (LOG2_BITS_PER_UNIT)));
emit_insn (gen_lshrsi3 (d1, d1, shift_amount));
emit_insn (gen_lshrsi3 (d2, d2, shift_amount));
}
if (TARGET_P9_MISC)
{
/* Generate a compare, and convert with a setb later. */
rtx cmp = gen_rtx_COMPARE (CCUNSmode, d1, d2);
emit_insn (gen_rtx_SET (dcond, cmp));
}
else
{
if (word_mode == DImode)
emit_insn (gen_subfdi3_carry (diff, d2, d1));
else
emit_insn (gen_subfsi3_carry (diff, d2, d1));
}
}
/* Generate rtl for an overlapping load and compare of less than a
full load_mode. This assumes that the previous word is part of the
block being compared so it's ok to back up part of a word so we can
compare the last unaligned full word that ends at the end of the block.
LOAD_MODE is the machine mode for the loads.
ISCONST tells whether the remaining length is a constant or in a register.
BYTES_REM is the remaining length if ISCONST is true.
DIFF is the reg for the difference.
CMP_REM is the reg containing the remaining bytes to compare if !ISCONST.
DCOND is the CCUNS reg for the compare if we are doing P9 code with setb.
SRC1_ADDR is the first source address.
SRC2_ADDR is the second source address.
ORIG_SRC1 is the original first source block's address rtx.
ORIG_SRC2 is the original second source block's address rtx. */
static void
do_overlap_load_compare (machine_mode load_mode, bool isConst,
HOST_WIDE_INT bytes_rem, rtx diff,
rtx cmp_rem, rtx dcond, rtx src1_addr, rtx src2_addr,
rtx orig_src1, rtx orig_src2)
{
HOST_WIDE_INT load_mode_size = GET_MODE_SIZE (load_mode);
HOST_WIDE_INT addr_adj = load_mode_size - bytes_rem;
rtx d1 = gen_reg_rtx (word_mode);
rtx d2 = gen_reg_rtx (word_mode);
rtx addr1, addr2;
if (!isConst || addr_adj)
{
rtx adj_reg = gen_reg_rtx (word_mode);
if (isConst)
emit_move_insn (adj_reg, GEN_INT (-addr_adj));
else
{
rtx reg_lms = gen_reg_rtx (word_mode);
emit_move_insn (reg_lms, GEN_INT (load_mode_size));
do_sub3 (adj_reg, cmp_rem, reg_lms);
}
addr1 = gen_rtx_PLUS (word_mode, src1_addr, adj_reg);
addr2 = gen_rtx_PLUS (word_mode, src2_addr, adj_reg);
}
else
{
addr1 = src1_addr;
addr2 = src2_addr;
}
do_load_for_compare_from_addr (load_mode, d1, addr1, orig_src1);
do_load_for_compare_from_addr (load_mode, d2, addr2, orig_src2);
if (TARGET_P9_MISC)
{
/* Generate a compare, and convert with a setb later. */
rtx cmp = gen_rtx_COMPARE (CCUNSmode, d1, d2);
emit_insn (gen_rtx_SET (dcond, cmp));
}
else
{
if (word_mode == DImode)
emit_insn (gen_subfdi3_carry (diff, d2, d1));
else
emit_insn (gen_subfsi3_carry (diff, d2, d1));
}
}
/* Expand a block compare operation using loop code, and return true
if successful. Return false if we should let the compiler generate
normal code, probably a memcmp call.
OPERANDS[0] is the target (result).
OPERANDS[1] is the first source.
OPERANDS[2] is the second source.
OPERANDS[3] is the length.
OPERANDS[4] is the alignment. */
bool
expand_compare_loop (rtx operands[])
{
rtx target = operands[0];
rtx orig_src1 = operands[1];
rtx orig_src2 = operands[2];
rtx bytes_rtx = operands[3];
rtx align_rtx = operands[4];
/* This case is complicated to handle because the subtract
with carry instructions do not generate the 64-bit
carry and so we must emit code to calculate it ourselves.
We choose not to implement this yet. */
if (TARGET_32BIT && TARGET_POWERPC64)
return false;
/* Allow non-const length. */
int bytes_is_const = CONST_INT_P (bytes_rtx);
/* This must be a fixed size alignment. */
if (!CONST_INT_P (align_rtx))
return false;
HOST_WIDE_INT align1 = MEM_ALIGN (orig_src1) / BITS_PER_UNIT;
HOST_WIDE_INT align2 = MEM_ALIGN (orig_src2) / BITS_PER_UNIT;
HOST_WIDE_INT minalign = MIN (align1, align2);
bool isP7 = (rs6000_tune == PROCESSOR_POWER7);
gcc_assert (GET_MODE (target) == SImode);
/* Anything to move? */
HOST_WIDE_INT bytes = 0;
if (bytes_is_const)
bytes = INTVAL (bytes_rtx);
if (bytes_is_const && bytes == 0)
return true;
/* Limit the amount we compare, if known statically. */
HOST_WIDE_INT max_bytes;
switch (rs6000_tune)
{
case PROCESSOR_POWER7:
if (!bytes_is_const)
if (minalign < 8)
max_bytes = 0;
else
max_bytes = 128;
else
if (minalign < 8)
max_bytes = 32;
else
max_bytes = 128;
break;
case PROCESSOR_POWER8:
if (!bytes_is_const)
max_bytes = 0;
else
if (minalign < 8)
max_bytes = 128;
else
max_bytes = 64;
break;
case PROCESSOR_POWER9:
if (bytes_is_const)
max_bytes = 191;
else
max_bytes = 0;
break;
default:
max_bytes = 128;
}
/* Allow the option to override the default. */
if (rs6000_block_compare_inline_loop_limit >= 0)
max_bytes = (unsigned HOST_WIDE_INT) rs6000_block_compare_inline_loop_limit;
if (max_bytes == 0)
return false;
rtx cmp_rem = gen_reg_rtx (word_mode); /* Remainder for library call. */
rtx loop_cmp = gen_reg_rtx (word_mode); /* Actual amount compared by loop. */
HOST_WIDE_INT niter;
rtx iter = gen_reg_rtx (word_mode);
rtx iv1 = gen_reg_rtx (word_mode);
rtx iv2 = gen_reg_rtx (word_mode);
rtx d1_1 = gen_reg_rtx (word_mode); /* Addr expression src1+iv1 */
rtx d1_2 = gen_reg_rtx (word_mode); /* Addr expression src1+iv2 */
rtx d2_1 = gen_reg_rtx (word_mode); /* Addr expression src2+iv1 */
rtx d2_2 = gen_reg_rtx (word_mode); /* Addr expression src2+iv2 */
/* Strip unneeded subreg from length if there is one. */
if (SUBREG_P (bytes_rtx) && subreg_lowpart_p (bytes_rtx))
bytes_rtx = SUBREG_REG (bytes_rtx);
/* Extend bytes_rtx to word_mode if needed. But, we expect only to
maybe have to deal with the case were bytes_rtx is SImode and
word_mode is DImode. */
if (!bytes_is_const)
{
if (GET_MODE_SIZE (GET_MODE (bytes_rtx)) > GET_MODE_SIZE (word_mode))
/* Do not expect length longer than word_mode. */
return false;
else if (GET_MODE_SIZE (GET_MODE (bytes_rtx)) < GET_MODE_SIZE (word_mode))
{
bytes_rtx = force_reg (GET_MODE (bytes_rtx), bytes_rtx);
bytes_rtx = force_reg (word_mode,
gen_rtx_fmt_e (ZERO_EXTEND, word_mode,
bytes_rtx));
}
else
/* Make sure it's in a register before we get started. */
bytes_rtx = force_reg (GET_MODE (bytes_rtx), bytes_rtx);
}
machine_mode load_mode = word_mode;
HOST_WIDE_INT load_mode_size = GET_MODE_SIZE (load_mode);
/* Number of bytes per iteration of the unrolled loop. */
HOST_WIDE_INT loop_bytes = 2 * load_mode_size;
/* max iters and bytes compared in the loop. */
HOST_WIDE_INT max_loop_iter = max_bytes / loop_bytes;
HOST_WIDE_INT max_loop_bytes = max_loop_iter * loop_bytes;
int l2lb = floor_log2 (loop_bytes);
if (bytes_is_const && (max_bytes < load_mode_size
|| !IN_RANGE (bytes, load_mode_size, max_bytes)))
return false;
bool no_remainder_code = false;
rtx final_label = gen_label_rtx ();
rtx final_ref = gen_rtx_LABEL_REF (VOIDmode, final_label);
rtx diff_label = gen_label_rtx ();
rtx library_call_label = NULL;
rtx cleanup_label = gen_label_rtx ();
rtx cr;
rtx src1_addr = copy_addr_to_reg (XEXP (orig_src1, 0));
rtx src2_addr = copy_addr_to_reg (XEXP (orig_src2, 0));
/* Difference found is stored here before jump to diff_label. */
rtx diff = gen_reg_rtx (word_mode);
rtx j;
/* Example of generated code for 35 bytes aligned 1 byte.
mtctr 8
li 6,0
li 5,8
.L13:
ldbrx 7,3,6
ldbrx 9,10,6
ldbrx 0,3,5
ldbrx 4,10,5
addi 6,6,16
addi 5,5,16
subfc. 9,9,7
bne 0,.L10
subfc. 9,4,0
bdnzt 2,.L13
bne 0,.L10
add 3,3,6
add 10,10,6
addi 9,3,-5
ldbrx 7,0,9
addi 9,10,-5
ldbrx 9,0,9
subfc 9,9,7
.p2align 4,,15
.L10:
popcntd 9,9
subfe 10,10,10
or 9,9,10
Compiled with -fno-reorder-blocks for clarity. */
/* Structure of what we're going to do:
Two separate lengths: what we will compare before bailing to library
call (max_bytes), and the total length to be checked.
if length <= 16, branch to linear cleanup code starting with
remainder length check (length not known at compile time)
set up 2 iv's and load count reg, compute remainder length
unrollx2 compare loop
if loop exit due to a difference, branch to difference handling code
if remainder length < 8, branch to final cleanup compare
load and compare 8B
final cleanup comparison (depends on alignment and length)
load 8B, shift off bytes past length, compare
load 8B ending at last byte and compare
load/compare 1 byte at a time (short block abutting 4k boundary)
difference handling, 64->32 conversion
final result
branch around memcmp call
memcmp library call
*/
/* If bytes is not const, compare length and branch directly
to the cleanup code that can handle 0-16 bytes if length
is >= 16. Stash away bytes-max_bytes for the library call. */
if (bytes_is_const)
{
/* These need to be set for some of the places we may jump to. */
if (bytes > max_bytes)
{
no_remainder_code = true;
niter = max_loop_iter;
library_call_label = gen_label_rtx ();
}
else
{
niter = bytes / loop_bytes;
}
emit_move_insn (iter, GEN_INT (niter));
emit_move_insn (loop_cmp, GEN_INT (niter * loop_bytes));
emit_move_insn (cmp_rem, GEN_INT (bytes - niter * loop_bytes));
}
else
{
library_call_label = gen_label_rtx ();
/* If we go to the cleanup code, it expects length to be in cmp_rem. */
emit_move_insn (cmp_rem, bytes_rtx);
/* Check for > max_bytes bytes. We want to bail out as quickly as
possible if we have to go over to memcmp. */
do_ifelse (CCmode, GT, bytes_rtx, GEN_INT (max_bytes),
NULL_RTX, library_call_label);
/* Check for < loop_bytes bytes. */
do_ifelse (CCmode, LT, bytes_rtx, GEN_INT (loop_bytes),
NULL_RTX, cleanup_label);
/* Loop compare bytes and iterations if bytes>max_bytes. */
rtx mb_reg = gen_reg_rtx (word_mode);
emit_move_insn (mb_reg, GEN_INT (max_loop_bytes));
rtx mi_reg = gen_reg_rtx (word_mode);
emit_move_insn (mi_reg, GEN_INT (max_loop_iter));
/* Compute number of loop iterations if bytes <= max_bytes. */
if (word_mode == DImode)
emit_insn (gen_lshrdi3 (iter, bytes_rtx, GEN_INT (l2lb)));
else
emit_insn (gen_lshrsi3 (iter, bytes_rtx, GEN_INT (l2lb)));
/* Compute bytes to compare in loop if bytes <= max_bytes. */
rtx mask = GEN_INT (HOST_WIDE_INT_M1U << l2lb);
if (word_mode == DImode)
{
emit_insn (gen_anddi3 (loop_cmp, bytes_rtx, mask));
}
else
{
emit_insn (gen_andsi3 (loop_cmp, bytes_rtx, mask));
}
/* Check for bytes <= max_bytes. */
if (TARGET_ISEL)
{
/* P9 has fast isel so we use one compare and two isel. */
cr = gen_reg_rtx (CCmode);
rtx compare_rtx = gen_rtx_COMPARE (CCmode, bytes_rtx,
GEN_INT (max_bytes));
emit_move_insn (cr, compare_rtx);
rtx cmp_rtx = gen_rtx_LE (VOIDmode, cr, const0_rtx);
do_isel (loop_cmp, cmp_rtx, loop_cmp, mb_reg, cr);
do_isel (iter, cmp_rtx, iter, mi_reg, cr);
}
else
{
rtx lab_after = gen_label_rtx ();
do_ifelse (CCmode, LE, bytes_rtx, GEN_INT (max_bytes),
NULL_RTX, lab_after);
emit_move_insn (loop_cmp, mb_reg);
emit_move_insn (iter, mi_reg);
emit_label (lab_after);
}
/* Now compute remainder bytes which isn't used until after the loop. */
do_sub3 (cmp_rem, bytes_rtx, loop_cmp);
}
rtx dcond = NULL_RTX; /* Used for when we jump to diff_label. */
/* For p9 we need to have just one of these as multiple places define
it and it gets used by the setb at the end. */
if (TARGET_P9_MISC)
dcond = gen_reg_rtx (CCUNSmode);
if (!bytes_is_const || bytes >= loop_bytes)
{
/* It should not be possible to come here if remaining bytes is
< 16 in the runtime case either. Compute number of loop
iterations. We compare 2*word_mode per iteration so 16B for
64-bit code and 8B for 32-bit. Set up two induction
variables and load count register. */
/* HACK ALERT: create hard reg for CTR here. If we just use a
pseudo, cse will get rid of it and then the allocator will
see it used in the lshr above and won't give us ctr. */
rtx ctr = gen_rtx_REG (Pmode, CTR_REGNO);
emit_move_insn (ctr, iter);
emit_move_insn (diff, GEN_INT (0));
emit_move_insn (iv1, GEN_INT (0));
emit_move_insn (iv2, GEN_INT (load_mode_size));
/* inner loop to compare 2*word_mode */
rtx loop_top_label = gen_label_rtx ();
emit_label (loop_top_label);
rtx src1_ix1 = gen_rtx_PLUS (word_mode, src1_addr, iv1);
rtx src2_ix1 = gen_rtx_PLUS (word_mode, src2_addr, iv1);
do_load_for_compare_from_addr (load_mode, d1_1,
src1_ix1, orig_src1);
do_load_for_compare_from_addr (load_mode, d2_1,
src2_ix1, orig_src2);
do_add3 (iv1, iv1, GEN_INT (loop_bytes));
rtx src1_ix2 = gen_rtx_PLUS (word_mode, src1_addr, iv2);
rtx src2_ix2 = gen_rtx_PLUS (word_mode, src2_addr, iv2);
do_load_for_compare_from_addr (load_mode, d1_2,
src1_ix2, orig_src1);
do_load_for_compare_from_addr (load_mode, d2_2,
src2_ix2, orig_src2);
do_add3 (iv2, iv2, GEN_INT (loop_bytes));
if (TARGET_P9_MISC)
{
/* Generate a compare, and convert with a setb later. */
rtx cmp = gen_rtx_COMPARE (CCUNSmode, d1_1, d2_1);
emit_insn (gen_rtx_SET (dcond, cmp));
}
else
{
dcond = gen_reg_rtx (CCmode);
if (word_mode == DImode)
emit_insn (gen_subfdi3_carry_dot2 (diff, d2_1, d1_1, dcond));
else
emit_insn (gen_subfsi3_carry_dot2 (diff, d2_1, d1_1, dcond));
}
do_ifelse (GET_MODE (dcond), NE, NULL_RTX, NULL_RTX,
dcond, diff_label);
if (TARGET_P9_MISC)
{
/* Generate a compare, and convert with a setb later. */
rtx cmp = gen_rtx_COMPARE (CCUNSmode, d1_2, d2_2);
emit_insn (gen_rtx_SET (dcond, cmp));
}
else
{
dcond = gen_reg_rtx (CCmode);
if (word_mode == DImode)
emit_insn (gen_subfdi3_carry_dot2 (diff, d2_2, d1_2, dcond));
else
emit_insn (gen_subfsi3_carry_dot2 (diff, d2_2, d1_2, dcond));
}
rtx eqrtx = gen_rtx_EQ (VOIDmode, d1_2, d2_2);
if (TARGET_64BIT)
j = emit_jump_insn (gen_bdnztf_di (loop_top_label, ctr, ctr,
eqrtx, dcond));
else
j = emit_jump_insn (gen_bdnztf_si (loop_top_label, ctr, ctr,
eqrtx, dcond));
JUMP_LABEL (j) = loop_top_label;
LABEL_NUSES (loop_top_label) += 1;
}
HOST_WIDE_INT bytes_remaining = 0;
if (bytes_is_const)
bytes_remaining = (bytes % loop_bytes);
/* If diff is nonzero, branch to difference handling
code. If we exit here with a nonzero diff, it is
because the second word differed. */
if (TARGET_P9_MISC)
do_ifelse (CCUNSmode, NE, NULL_RTX, NULL_RTX, dcond, diff_label);
else
do_ifelse (CCmode, NE, diff, const0_rtx, NULL_RTX, diff_label);
if (library_call_label != NULL && bytes_is_const && bytes > max_bytes)
{
/* If the length is known at compile time, then we will always
have a remainder to go to the library call with. */
rtx library_call_ref = gen_rtx_LABEL_REF (VOIDmode, library_call_label);
j = emit_jump_insn (gen_rtx_SET (pc_rtx, library_call_ref));
JUMP_LABEL (j) = library_call_label;
LABEL_NUSES (library_call_label) += 1;
emit_barrier ();
}
if (bytes_is_const && bytes_remaining == 0)
{
/* No remainder and if we are here then diff is 0 so just return 0 */
if (TARGET_64BIT)
emit_insn (gen_movsi (target, gen_lowpart (SImode, diff)));
else
emit_move_insn (target, diff);
j = emit_jump_insn (gen_rtx_SET (pc_rtx, final_ref));
JUMP_LABEL (j) = final_label;
LABEL_NUSES (final_label) += 1;
emit_barrier ();
}
else if (!no_remainder_code)
{
/* Update addresses to point to the next word to examine. */
do_add3 (src1_addr, src1_addr, iv1);
do_add3 (src2_addr, src2_addr, iv1);
emit_label (cleanup_label);
if (!bytes_is_const)
{
/* If we're dealing with runtime length, we have to check if
it's zero after the loop. When length is known at compile
time the no-remainder condition is dealt with above. By
doing this after cleanup_label, we also deal with the
case where length is 0 at the start and we bypass the
loop with a branch to cleanup_label. */
emit_move_insn (target, const0_rtx);
do_ifelse (CCmode, EQ, cmp_rem, const0_rtx,
NULL_RTX, final_label);
}
rtx final_cleanup = gen_label_rtx ();
rtx cmp_rem_before = gen_reg_rtx (word_mode);
/* Compare one more word_mode chunk if needed. */
if (!bytes_is_const
|| (bytes_is_const && bytes_remaining >= load_mode_size))
{
/* If remainder length < word length, branch to final
cleanup compare. */
if (!bytes_is_const)
do_ifelse (CCmode, LT, cmp_rem, GEN_INT (load_mode_size),
NULL_RTX, final_cleanup);
/* load and compare 8B */
do_load_for_compare_from_addr (load_mode, d1_1,
src1_addr, orig_src1);
do_load_for_compare_from_addr (load_mode, d2_1,
src2_addr, orig_src2);
/* Compare the word, see if we need to do the last partial. */
if (TARGET_P9_MISC)
{
/* Generate a compare, and convert with a setb later. */
rtx cmp = gen_rtx_COMPARE (CCUNSmode, d1_1, d2_1);
emit_insn (gen_rtx_SET (dcond, cmp));
}
else
{
dcond = gen_reg_rtx (CCmode);
if (word_mode == DImode)
emit_insn (gen_subfdi3_carry_dot2 (diff, d2_1, d1_1, dcond));
else
emit_insn (gen_subfsi3_carry_dot2 (diff, d2_1, d1_1, dcond));
}
do_ifelse (GET_MODE (dcond), NE, NULL_RTX, NULL_RTX,
dcond, diff_label);
do_add3 (src1_addr, src1_addr, GEN_INT (load_mode_size));
do_add3 (src2_addr, src2_addr, GEN_INT (load_mode_size));
emit_move_insn (cmp_rem_before, cmp_rem);
do_add3 (cmp_rem, cmp_rem, GEN_INT (-load_mode_size));
if (bytes_is_const)
bytes_remaining -= load_mode_size;
else
/* See if remaining length is now zero. We previously set
target to 0 so we can just jump to the end. */
do_ifelse (CCmode, EQ, cmp_rem, const0_rtx,
NULL_RTX, final_label);
}
/* Cases:
bytes_is_const
We can always shift back to do an overlapping compare
of the last chunk because we know length >= 8.
!bytes_is_const
align>=load_mode_size
Read word_mode and mask
align<load_mode_size
avoid stepping past end
Three strategies:
* decrement address and do overlapping compare
* read word_mode and mask
* carefully avoid crossing 4k boundary
*/
if ((!bytes_is_const || (bytes_is_const && bytes_remaining && isP7))
&& align1 >= load_mode_size && align2 >= load_mode_size)
{
/* Alignment is larger than word_mode so we do not need to be
concerned with extra page crossings. But, we do not know
that the length is larger than load_mode_size so we might
end up compareing against data before the block if we try
an overlapping compare. Also we use this on P7 for fixed length
remainder because P7 doesn't like overlapping unaligned.
Strategy: load 8B, shift off bytes past length, and compare. */
emit_label (final_cleanup);
do_load_mask_compare (load_mode, diff, cmp_rem, dcond,
src1_addr, src2_addr, orig_src1, orig_src2);
}
else if (bytes_remaining && bytes_is_const)
{
/* We do not do loop expand if length < 32 so we know at the
end we can do an overlapping compare.
Strategy: shift address back and do word_mode load that
ends at the end of the block. */
emit_label (final_cleanup);
do_overlap_load_compare (load_mode, true, bytes_remaining, diff,
cmp_rem, dcond, src1_addr, src2_addr,
orig_src1, orig_src2);
}
else if (!bytes_is_const)
{
rtx handle4k_label = gen_label_rtx ();
rtx nonconst_overlap = gen_label_rtx ();
emit_label (nonconst_overlap);
/* Here we have to handle the case where whe have runtime
length which may be too short for overlap compare, and
alignment is not at least load_mode_size so we have to
tread carefully to avoid stepping across 4k boundaries. */
/* If the length after the loop was larger than word_mode
size, we can just do an overlapping compare and we're
done. We fall through to this code from the word_mode
compare that preceeds this. */
do_overlap_load_compare (load_mode, false, 0, diff,
cmp_rem, dcond, src1_addr, src2_addr,
orig_src1, orig_src2);
rtx diff_ref = gen_rtx_LABEL_REF (VOIDmode, diff_label);
j = emit_jump_insn (gen_rtx_SET (pc_rtx, diff_ref));
JUMP_LABEL (j) = diff_label;
LABEL_NUSES (diff_label) += 1;
emit_barrier ();
/* If we couldn't do the overlap compare we have to be more
careful of the 4k boundary. Test to see if either
address is less than word_mode_size away from a 4k
boundary. If not, then we can do a load/shift/compare
and we are done. We come to this code if length was less
than word_mode_size. */
emit_label (final_cleanup);
/* We can still avoid the slow case if the length was larger
than one loop iteration, in which case go do the overlap
load compare path. */
do_ifelse (CCmode, GT, bytes_rtx, GEN_INT (loop_bytes),
NULL_RTX, nonconst_overlap);
rtx rem4k = gen_reg_rtx (word_mode);
rtx dist1 = gen_reg_rtx (word_mode);
rtx dist2 = gen_reg_rtx (word_mode);
do_sub3 (rem4k, GEN_INT (4096), cmp_rem);
if (word_mode == SImode)
emit_insn (gen_andsi3 (dist1, src1_addr, GEN_INT (0xfff)));
else
emit_insn (gen_anddi3 (dist1, src1_addr, GEN_INT (0xfff)));
do_ifelse (CCmode, LE, dist1, rem4k, NULL_RTX, handle4k_label);
if (word_mode == SImode)
emit_insn (gen_andsi3 (dist2, src2_addr, GEN_INT (0xfff)));
else
emit_insn (gen_anddi3 (dist2, src2_addr, GEN_INT (0xfff)));
do_ifelse (CCmode, LE, dist2, rem4k, NULL_RTX, handle4k_label);
/* We don't have a 4k boundary to deal with, so do
a load/shift/compare and jump to diff. */
do_load_mask_compare (load_mode, diff, cmp_rem, dcond,
src1_addr, src2_addr, orig_src1, orig_src2);
j = emit_jump_insn (gen_rtx_SET (pc_rtx, diff_ref));
JUMP_LABEL (j) = diff_label;
LABEL_NUSES (diff_label) += 1;
emit_barrier ();
/* Finally in the unlikely case we are inching up to a
4k boundary we use a compact lbzx/compare loop to do
it a byte at a time. */
emit_label (handle4k_label);
rtx ctr = gen_rtx_REG (Pmode, CTR_REGNO);
emit_move_insn (ctr, cmp_rem);
rtx ixreg = gen_reg_rtx (Pmode);
emit_move_insn (ixreg, const0_rtx);
rtx src1_ix = gen_rtx_PLUS (word_mode, src1_addr, ixreg);
rtx src2_ix = gen_rtx_PLUS (word_mode, src2_addr, ixreg);
rtx d1 = gen_reg_rtx (word_mode);
rtx d2 = gen_reg_rtx (word_mode);
rtx fc_loop = gen_label_rtx ();
emit_label (fc_loop);
do_load_for_compare_from_addr (QImode, d1, src1_ix, orig_src1);
do_load_for_compare_from_addr (QImode, d2, src2_ix, orig_src2);
do_add3 (ixreg, ixreg, const1_rtx);
rtx cond = gen_reg_rtx (CCmode);
rtx subexpr = gen_rtx_MINUS (word_mode, d1, d2);
rs6000_emit_dot_insn (diff, subexpr, 2, cond);
rtx eqrtx = gen_rtx_EQ (VOIDmode, d1, d2);
if (TARGET_64BIT)
j = emit_jump_insn (gen_bdnztf_di (fc_loop, ctr, ctr,
eqrtx, cond));
else
j = emit_jump_insn (gen_bdnztf_si (fc_loop, ctr, ctr,
eqrtx, cond));
JUMP_LABEL (j) = fc_loop;
LABEL_NUSES (fc_loop) += 1;
if (TARGET_64BIT)
emit_insn (gen_movsi (target, gen_lowpart (SImode, diff)));
else
emit_move_insn (target, diff);
/* Since we are comparing bytes, the difference can be used
as the final result and we are done here. */
j = emit_jump_insn (gen_rtx_SET (pc_rtx, final_ref));
JUMP_LABEL (j) = final_label;
LABEL_NUSES (final_label) += 1;
emit_barrier ();
}
}
emit_label (diff_label);
/* difference handling, 64->32 conversion */
/* We need to produce DI result from sub, then convert to target SI
while maintaining <0 / ==0 / >0 properties. This sequence works:
subfc L,A,B
subfe H,H,H
popcntd L,L
rldimi L,H,6,0
This is an alternate one Segher cooked up if somebody
wants to expand this for something that doesn't have popcntd:
subfc L,a,b
subfe H,x,x
addic t,L,-1
subfe v,t,L
or z,v,H
And finally, p9 can just do this:
cmpld A,B
setb r */
if (TARGET_P9_MISC)
emit_insn (gen_setb_unsigned (target, dcond));
else
{
if (TARGET_64BIT)
{
rtx tmp_reg_ca = gen_reg_rtx (DImode);
emit_insn (gen_subfdi3_carry_in_xx (tmp_reg_ca));
emit_insn (gen_popcntddi2 (diff, diff));
emit_insn (gen_iordi3 (diff, diff, tmp_reg_ca));
emit_insn (gen_movsi (target, gen_lowpart (SImode, diff)));
}
else
{
rtx tmp_reg_ca = gen_reg_rtx (SImode);
emit_insn (gen_subfsi3_carry_in_xx (tmp_reg_ca));
emit_insn (gen_popcntdsi2 (diff, diff));
emit_insn (gen_iorsi3 (target, diff, tmp_reg_ca));
}
}
if (library_call_label != NULL)
{
/* Branch around memcmp call. */
j = emit_jump_insn (gen_rtx_SET (pc_rtx, final_ref));
JUMP_LABEL (j) = final_label;
LABEL_NUSES (final_label) += 1;
emit_barrier ();
/* Make memcmp library call. cmp_rem is the remaining bytes that
were compared and cmp_rem is the expected amount to be compared
by memcmp. If we don't find a difference in the loop compare, do
the library call directly instead of doing a small compare just
to get to an arbitrary boundary before calling it anyway.
Also, update addresses to point to the next word to examine. */
emit_label (library_call_label);
rtx len_rtx = gen_reg_rtx (word_mode);
if (bytes_is_const)
{
emit_move_insn (len_rtx, cmp_rem);
do_add3 (src1_addr, src1_addr, iv1);
do_add3 (src2_addr, src2_addr, iv1);
}
else
emit_move_insn (len_rtx, bytes_rtx);
tree fun = builtin_decl_explicit (BUILT_IN_MEMCMP);
emit_library_call_value (XEXP (DECL_RTL (fun), 0),
target, LCT_NORMAL, GET_MODE (target),
src1_addr, Pmode,
src2_addr, Pmode,
len_rtx, GET_MODE (len_rtx));
}
/* emit final_label */
emit_label (final_label);
return true;
}
/* Expand a block compare operation, and return true if successful. /* Expand a block compare operation, and return true if successful.
Return false if we should let the compiler generate normal code, Return false if we should let the compiler generate normal code,
probably a memcmp call. probably a memcmp call.
...@@ -331,21 +1284,36 @@ expand_block_compare (rtx operands[]) ...@@ -331,21 +1284,36 @@ expand_block_compare (rtx operands[])
if (TARGET_32BIT && TARGET_POWERPC64) if (TARGET_32BIT && TARGET_POWERPC64)
return false; return false;
/* If this is not a fixed size compare, just call memcmp. */ bool isP7 = (rs6000_tune == PROCESSOR_POWER7);
if (!CONST_INT_P (bytes_rtx))
/* Allow this param to shut off all expansion. */
if (rs6000_block_compare_inline_limit == 0)
return false;
/* targetm.slow_unaligned_access -- don't do unaligned stuff.
However slow_unaligned_access returns true on P7 even though the
performance of this code is good there. */
if (!isP7
&& (targetm.slow_unaligned_access (word_mode, MEM_ALIGN (orig_src1))
|| targetm.slow_unaligned_access (word_mode, MEM_ALIGN (orig_src2))))
return false; return false;
/* Unaligned l*brx traps on P7 so don't do this. However this should
not affect much because LE isn't really supported on P7 anyway. */
if (isP7 && !BYTES_BIG_ENDIAN)
return false;
/* If this is not a fixed size compare, try generating loop code and
if that fails just call memcmp. */
if (!CONST_INT_P (bytes_rtx))
return expand_compare_loop (operands);
/* This must be a fixed size alignment. */ /* This must be a fixed size alignment. */
if (!CONST_INT_P (align_rtx)) if (!CONST_INT_P (align_rtx))
return false; return false;
unsigned int base_align = UINTVAL (align_rtx) / BITS_PER_UNIT; unsigned int base_align = UINTVAL (align_rtx) / BITS_PER_UNIT;
/* targetm.slow_unaligned_access -- don't do unaligned stuff. */
if (targetm.slow_unaligned_access (word_mode, MEM_ALIGN (orig_src1))
|| targetm.slow_unaligned_access (word_mode, MEM_ALIGN (orig_src2)))
return false;
gcc_assert (GET_MODE (target) == SImode); gcc_assert (GET_MODE (target) == SImode);
/* Anything to move? */ /* Anything to move? */
...@@ -353,14 +1321,6 @@ expand_block_compare (rtx operands[]) ...@@ -353,14 +1321,6 @@ expand_block_compare (rtx operands[])
if (bytes == 0) if (bytes == 0)
return true; return true;
/* The code generated for p7 and older is not faster than glibc
memcmp if alignment is small and length is not short, so bail
out to avoid those conditions. */
if (!TARGET_EFFICIENT_OVERLAPPING_UNALIGNED
&& ((base_align == 1 && bytes > 16)
|| (base_align == 2 && bytes > 32)))
return false;
rtx tmp_reg_src1 = gen_reg_rtx (word_mode); rtx tmp_reg_src1 = gen_reg_rtx (word_mode);
rtx tmp_reg_src2 = gen_reg_rtx (word_mode); rtx tmp_reg_src2 = gen_reg_rtx (word_mode);
/* P7/P8 code uses cond for subfc. but P9 uses /* P7/P8 code uses cond for subfc. but P9 uses
...@@ -383,10 +1343,18 @@ expand_block_compare (rtx operands[]) ...@@ -383,10 +1343,18 @@ expand_block_compare (rtx operands[])
select_block_compare_mode (offset, bytes, base_align, word_mode_ok); select_block_compare_mode (offset, bytes, base_align, word_mode_ok);
unsigned int load_mode_size = GET_MODE_SIZE (load_mode); unsigned int load_mode_size = GET_MODE_SIZE (load_mode);
/* We don't want to generate too much code. */ /* We don't want to generate too much code. The loop code can take
unsigned HOST_WIDE_INT max_bytes = over for lengths greater than 31 bytes. */
load_mode_size * (unsigned HOST_WIDE_INT) rs6000_block_compare_inline_limit; unsigned HOST_WIDE_INT max_bytes = rs6000_block_compare_inline_limit;
if (!IN_RANGE (bytes, 1, max_bytes)) if (!IN_RANGE (bytes, 1, max_bytes))
return expand_compare_loop (operands);
/* The code generated for p7 and older is not faster than glibc
memcmp if alignment is small and length is not short, so bail
out to avoid those conditions. */
if (!TARGET_EFFICIENT_OVERLAPPING_UNALIGNED
&& ((base_align == 1 && bytes > 16)
|| (base_align == 2 && bytes > 32)))
return false; return false;
bool generate_6432_conversion = false; bool generate_6432_conversion = false;
...@@ -461,14 +1429,14 @@ expand_block_compare (rtx operands[]) ...@@ -461,14 +1429,14 @@ expand_block_compare (rtx operands[])
rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0)); rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0));
src1 = replace_equiv_address (src1, src1_reg); src1 = replace_equiv_address (src1, src1_reg);
} }
set_mem_size (src1, load_mode_size); set_mem_size (src1, cmp_bytes);
if (!REG_P (XEXP (src2, 0))) if (!REG_P (XEXP (src2, 0)))
{ {
rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0)); rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0));
src2 = replace_equiv_address (src2, src2_reg); src2 = replace_equiv_address (src2, src2_reg);
} }
set_mem_size (src2, load_mode_size); set_mem_size (src2, cmp_bytes);
do_load_for_compare (tmp_reg_src1, src1, load_mode); do_load_for_compare (tmp_reg_src1, src1, load_mode);
do_load_for_compare (tmp_reg_src2, src2, load_mode); do_load_for_compare (tmp_reg_src2, src2, load_mode);
...@@ -536,7 +1504,7 @@ expand_block_compare (rtx operands[]) ...@@ -536,7 +1504,7 @@ expand_block_compare (rtx operands[])
{ {
rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label); rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label);
rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, fin_ref)); rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, fin_ref));
JUMP_LABEL(j) = final_label; JUMP_LABEL (j) = final_label;
LABEL_NUSES (final_label) += 1; LABEL_NUSES (final_label) += 1;
emit_barrier (); emit_barrier ();
} }
...@@ -576,7 +1544,7 @@ expand_block_compare (rtx operands[]) ...@@ -576,7 +1544,7 @@ expand_block_compare (rtx operands[])
rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx, rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
cvt_ref, pc_rtx); cvt_ref, pc_rtx);
rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse)); rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
JUMP_LABEL(j) = convert_label; JUMP_LABEL (j) = convert_label;
LABEL_NUSES (convert_label) += 1; LABEL_NUSES (convert_label) += 1;
} }
else else
...@@ -791,9 +1759,9 @@ expand_strn_compare (rtx operands[], int no_length) ...@@ -791,9 +1759,9 @@ expand_strn_compare (rtx operands[], int no_length)
rtx jmp; rtx jmp;
/* Strncmp for power8 in glibc does this: /* Strncmp for power8 in glibc does this:
rldicl r8,r3,0,52 rldicl r8,r3,0,52
cmpldi cr7,r8,4096-16 cmpldi cr7,r8,4096-16
bgt cr7,L(pagecross) */ bgt cr7,L(pagecross) */
/* Make sure that the length we use for the alignment test and /* Make sure that the length we use for the alignment test and
the subsequent code generation are in agreement so we do not the subsequent code generation are in agreement so we do not
......
...@@ -331,8 +331,12 @@ Target Report Var(rs6000_block_move_inline_limit) Init(0) RejectNegative Joined ...@@ -331,8 +331,12 @@ Target Report Var(rs6000_block_move_inline_limit) Init(0) RejectNegative Joined
Specify how many bytes should be moved inline before calling out to memcpy/memmove. Specify how many bytes should be moved inline before calling out to memcpy/memmove.
mblock-compare-inline-limit= mblock-compare-inline-limit=
Target Report Var(rs6000_block_compare_inline_limit) Init(5) RejectNegative Joined UInteger Save Target Report Var(rs6000_block_compare_inline_limit) Init(31) RejectNegative Joined UInteger Save
Specify the maximum number pairs of load instructions that should be generated inline for the compare. If the number needed exceeds the limit, a call to memcmp will be generated instead. Specify the maximum number of bytes to compare inline with non-looping code. If this is set to 0, all inline expansion (non-loop and loop) of memcmp is disabled.
mblock-compare-inline-loop-limit=
Target Report Var(rs6000_block_compare_inline_loop_limit) Init(-1) RejectNegative Joined UInteger Save
Specify the maximum number of bytes to compare inline with loop code generation. If the length is not known at compile time, memcmp will be called after this many bytes are compared. By default, a length will be picked depending on the tuning target.
mstring-compare-inline-limit= mstring-compare-inline-limit=
Target Report Var(rs6000_string_compare_inline_limit) Init(8) RejectNegative Joined UInteger Save Target Report Var(rs6000_string_compare_inline_limit) Init(8) RejectNegative Joined UInteger Save
......
...@@ -14,11 +14,80 @@ int lib_strncmp(const char *a, const char *b, size_t n) asm("strncmp"); ...@@ -14,11 +14,80 @@ int lib_strncmp(const char *a, const char *b, size_t n) asm("strncmp");
#ifndef NRAND #ifndef NRAND
#define NRAND 10000 #define NRAND 10000
#endif #endif
#define MAX_SZ 200 #define MAX_SZ 600
#define DEF_RS(ALIGN) \
static void test_memcmp_runtime_size_ ## ALIGN (const char *str1, \
const char *str2, \
size_t sz, int expect) \
{ \
char three[8192] __attribute__ ((aligned (4096))); \
char four[8192] __attribute__ ((aligned (4096))); \
char *a, *b; \
int i,j,a1,a2,r; \
for (j = 0; j < 2; j++) \
{ \
for (i = 0; i < 2; i++) \
{ \
a = three+i*ALIGN+j*(4096-2*i*ALIGN); \
b = four+i*ALIGN+j*(4096-2*i*ALIGN); \
memcpy(a,str1,sz); \
memcpy(b,str2,sz); \
asm(" "); \
r = memcmp(a,b,sz); \
asm(" "); \
if ( r < 0 && !(expect < 0) ) abort(); \
if ( r > 0 && !(expect > 0) ) abort(); \
if ( r == 0 && !(expect == 0) ) abort(); \
} \
} \
}
DEF_RS(1)
DEF_RS(2)
DEF_RS(4)
DEF_RS(8)
DEF_RS(16)
static void test_memcmp_runtime_size (const char *str1, const char *str2,
size_t sz, int expect)
{
char three[8192] __attribute__ ((aligned (4096)));
char four[8192] __attribute__ ((aligned (4096)));
char *a, *b;
int i,j,a1,a2,r;
test_memcmp_runtime_size_1 (str1,str2,sz,expect);
test_memcmp_runtime_size_2 (str1,str2,sz,expect);
test_memcmp_runtime_size_4 (str1,str2,sz,expect);
test_memcmp_runtime_size_8 (str1,str2,sz,expect);
test_memcmp_runtime_size_16 (str1,str2,sz,expect);
for (j = 0; j < 2; j++)
{
for (i = 0; i < 2; i++)
{
for (a1=0; a1 < 2*sizeof(void *); a1++)
{
for (a2=0; a2 < 2*sizeof(void *); a2++)
{
a = three+i*a1+j*(4096-2*i*a1);
b = four+i*a2+j*(4096-2*i*a2);
memcpy(a,str1,sz);
memcpy(b,str2,sz);
asm(" ");
r = memcmp(a,b,sz);
asm(" ");
if ( r < 0 && !(expect < 0) ) abort();
if ( r > 0 && !(expect > 0) ) abort();
if ( r == 0 && !(expect == 0) ) abort();
}
}
}
}
}
static void test_driver_memcmp (void (test_memcmp)(const char *, const char *, int), static void test_driver_memcmp (void (test_memcmp)(const char *, const char *, int),
void (test_strncmp)(const char *, const char *, int), void (test_strncmp)(const char *, const char *, int),
size_t sz, int align) size_t sz, int align)
{ {
char buf1[MAX_SZ*2+10],buf2[MAX_SZ*2+10]; char buf1[MAX_SZ*2+10],buf2[MAX_SZ*2+10];
size_t test_sz = (sz<MAX_SZ)?sz:MAX_SZ; size_t test_sz = (sz<MAX_SZ)?sz:MAX_SZ;
...@@ -35,11 +104,12 @@ static void test_driver_memcmp (void (test_memcmp)(const char *, const char *, i ...@@ -35,11 +104,12 @@ static void test_driver_memcmp (void (test_memcmp)(const char *, const char *, i
buf1[j] = rand() & 0xff; buf1[j] = rand() & 0xff;
buf2[j] = rand() & 0xff; buf2[j] = rand() & 0xff;
} }
e = lib_memcmp(buf1,buf2,sz);
(*test_memcmp)(buf1,buf2,e);
test_memcmp_runtime_size (buf1, buf2, sz, e);
e = lib_strncmp(buf1,buf2,sz);
(*test_strncmp)(buf1,buf2,e);
} }
e = lib_memcmp(buf1,buf2,sz);
(*test_memcmp)(buf1,buf2,e);
e = lib_strncmp(buf1,buf2,sz);
(*test_strncmp)(buf1,buf2,e);
} }
for(diff_pos = ((test_sz>10)?(test_sz-10):0); diff_pos < test_sz+10; diff_pos++) for(diff_pos = ((test_sz>10)?(test_sz-10):0); diff_pos < test_sz+10; diff_pos++)
for(zero_pos = ((test_sz>10)?(test_sz-10):0); zero_pos < test_sz+10; zero_pos++) for(zero_pos = ((test_sz>10)?(test_sz-10):0); zero_pos < test_sz+10; zero_pos++)
...@@ -53,6 +123,9 @@ static void test_driver_memcmp (void (test_memcmp)(const char *, const char *, i ...@@ -53,6 +123,9 @@ static void test_driver_memcmp (void (test_memcmp)(const char *, const char *, i
(*test_memcmp)(buf1,buf2,e); (*test_memcmp)(buf1,buf2,e);
(*test_memcmp)(buf2,buf1,-e); (*test_memcmp)(buf2,buf1,-e);
(*test_memcmp)(buf2,buf2,0); (*test_memcmp)(buf2,buf2,0);
test_memcmp_runtime_size (buf1, buf2, sz, e);
test_memcmp_runtime_size (buf2, buf1, sz, -e);
test_memcmp_runtime_size (buf2, buf2, sz, 0);
e = lib_strncmp(buf1,buf2,sz); e = lib_strncmp(buf1,buf2,sz);
(*test_strncmp)(buf1,buf2,e); (*test_strncmp)(buf1,buf2,e);
(*test_strncmp)(buf2,buf1,-e); (*test_strncmp)(buf2,buf1,-e);
...@@ -61,6 +134,7 @@ static void test_driver_memcmp (void (test_memcmp)(const char *, const char *, i ...@@ -61,6 +134,7 @@ static void test_driver_memcmp (void (test_memcmp)(const char *, const char *, i
buf2[diff_pos] = 0; buf2[diff_pos] = 0;
e = lib_memcmp(buf1,buf2,sz); e = lib_memcmp(buf1,buf2,sz);
(*test_memcmp)(buf1,buf2,e); (*test_memcmp)(buf1,buf2,e);
test_memcmp_runtime_size (buf1, buf2, sz, e);
e = lib_strncmp(buf1,buf2,sz); e = lib_strncmp(buf1,buf2,sz);
(*test_strncmp)(buf1,buf2,e); (*test_strncmp)(buf1,buf2,e);
memset(buf2+diff_pos,'B',sizeof(buf2)-diff_pos); memset(buf2+diff_pos,'B',sizeof(buf2)-diff_pos);
...@@ -68,6 +142,8 @@ static void test_driver_memcmp (void (test_memcmp)(const char *, const char *, i ...@@ -68,6 +142,8 @@ static void test_driver_memcmp (void (test_memcmp)(const char *, const char *, i
e = lib_memcmp(buf1,buf2,sz); e = lib_memcmp(buf1,buf2,sz);
(*test_memcmp)(buf1,buf2,e); (*test_memcmp)(buf1,buf2,e);
(*test_memcmp)(buf2,buf1,-e); (*test_memcmp)(buf2,buf1,-e);
test_memcmp_runtime_size (buf1, buf2, sz, e);
test_memcmp_runtime_size (buf2, buf1, sz, -e);
e = lib_strncmp(buf1,buf2,sz); e = lib_strncmp(buf1,buf2,sz);
(*test_strncmp)(buf1,buf2,e); (*test_strncmp)(buf1,buf2,e);
(*test_strncmp)(buf2,buf1,-e); (*test_strncmp)(buf2,buf1,-e);
...@@ -371,7 +447,14 @@ DEF_TEST(100,2) ...@@ -371,7 +447,14 @@ DEF_TEST(100,2)
DEF_TEST(100,4) DEF_TEST(100,4)
DEF_TEST(100,8) DEF_TEST(100,8)
DEF_TEST(100,16) DEF_TEST(100,16)
DEF_TEST(191,1)
DEF_TEST(192,1)
DEF_TEST(193,1)
DEF_TEST(200,1)
DEF_TEST(400,1)
#else #else
DEF_TEST(1,1)
DEF_TEST(2,1)
DEF_TEST(3,1) DEF_TEST(3,1)
DEF_TEST(4,1) DEF_TEST(4,1)
DEF_TEST(5,1) DEF_TEST(5,1)
...@@ -389,13 +472,15 @@ DEF_TEST(16,1) ...@@ -389,13 +472,15 @@ DEF_TEST(16,1)
DEF_TEST(32,1) DEF_TEST(32,1)
DEF_TEST(100,1) DEF_TEST(100,1)
DEF_TEST(100,8) DEF_TEST(100,8)
DEF_TEST(180,1)
DEF_TEST(180,8)
#endif #endif
int int
main(int argc, char **argv) main(int argc, char **argv)
{ {
#ifdef TEST_ALL #ifdef TEST_ALL
RUN_TEST(1,1) RUN_TEST(1,1)
RUN_TEST(1,2) RUN_TEST(1,2)
RUN_TEST(1,4) RUN_TEST(1,4)
RUN_TEST(1,8) RUN_TEST(1,8)
...@@ -645,7 +730,14 @@ main(int argc, char **argv) ...@@ -645,7 +730,14 @@ main(int argc, char **argv)
RUN_TEST(100,4) RUN_TEST(100,4)
RUN_TEST(100,8) RUN_TEST(100,8)
RUN_TEST(100,16) RUN_TEST(100,16)
RUN_TEST(191,1)
RUN_TEST(192,1)
RUN_TEST(193,1)
RUN_TEST(200,1)
RUN_TEST(400,1)
#else #else
RUN_TEST(1,1)
RUN_TEST(2,1)
RUN_TEST(3,1) RUN_TEST(3,1)
RUN_TEST(4,1) RUN_TEST(4,1)
RUN_TEST(5,1) RUN_TEST(5,1)
...@@ -663,5 +755,7 @@ main(int argc, char **argv) ...@@ -663,5 +755,7 @@ main(int argc, char **argv)
RUN_TEST(32,1) RUN_TEST(32,1)
RUN_TEST(100,1) RUN_TEST(100,1)
RUN_TEST(100,8) RUN_TEST(100,8)
RUN_TEST(180,1)
RUN_TEST(180,8)
#endif #endif
} }
...@@ -81,6 +81,15 @@ DEF_TEST(13) ...@@ -81,6 +81,15 @@ DEF_TEST(13)
DEF_TEST(14) DEF_TEST(14)
DEF_TEST(15) DEF_TEST(15)
DEF_TEST(16) DEF_TEST(16)
DEF_TEST(32)
DEF_TEST(64)
DEF_TEST(65)
DEF_TEST(66)
DEF_TEST(67)
DEF_TEST(68)
DEF_TEST(69)
DEF_TEST(70)
DEF_TEST(71)
int int
main(int argc, char **argv) main(int argc, char **argv)
...@@ -101,5 +110,14 @@ main(int argc, char **argv) ...@@ -101,5 +110,14 @@ main(int argc, char **argv)
RUN_TEST(14); RUN_TEST(14);
RUN_TEST(15); RUN_TEST(15);
RUN_TEST(16); RUN_TEST(16);
RUN_TEST(32);
RUN_TEST(64);
RUN_TEST(65);
RUN_TEST(66);
RUN_TEST(67);
RUN_TEST(68);
RUN_TEST(69);
RUN_TEST(70);
RUN_TEST(71);
return 0; return 0;
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment