Commit ef4adf1f by Aaron Sawdey Committed by Aaron Sawdey

rs6000-string.c (expand_strncmp_gpr_sequence): Change to a shorter sequence with fewer branches.

2018-10-26  Aaron Sawdey  <acsawdey@linux.ibm.com>

	* config/rs6000/rs6000-string.c (expand_strncmp_gpr_sequence): Change to
	a shorter sequence with fewer branches.
	(emit_final_str_compare_gpr): Ditto.

From-SVN: r265546
parent ffd0bbe1
2018-10-26 Aaron Sawdey <acsawdey@linux.ibm.com>
* config/rs6000/rs6000-string.c (expand_strncmp_gpr_sequence): Change to
a shorter sequence with fewer branches.
(emit_final_str_compare_gpr): Ditto.
2018-10-26 Paul A. Clarke <pc@us.ibm.com>
* config/rs6000/tmmintrin.h: New file.
......@@ -1763,12 +1763,9 @@ expand_strncmp_gpr_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
while (bytes_to_compare > 0)
{
/* GPR compare sequence:
check each 8B with: ld/ld cmpd bne
If equal, use rldicr/cmpb to check for zero byte.
check each 8B with: ld/ld/cmpb/cmpb/orc./bne
cleanup code at end:
cmpb get byte that differs
cmpb look for zero byte
orc combine
cntlzd get bit of first zero/diff byte
subfic convert for rldcl use
rldcl rldcl extract diff/zero byte
......@@ -1801,22 +1798,36 @@ expand_strncmp_gpr_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
rid of the extra bytes. */
cmp_bytes = bytes_to_compare;
rtx addr1 = gen_rtx_PLUS (Pmode, src1_addr, GEN_INT (offset));
rtx offset_reg = gen_reg_rtx (Pmode);
emit_move_insn (offset_reg, GEN_INT (offset));
rtx addr1 = gen_rtx_PLUS (Pmode, src1_addr, offset_reg);
do_load_for_compare_from_addr (load_mode, tmp_reg_src1, addr1, orig_src1);
rtx addr2 = gen_rtx_PLUS (Pmode, src2_addr, GEN_INT (offset));
rtx addr2 = gen_rtx_PLUS (Pmode, src2_addr, offset_reg);
do_load_for_compare_from_addr (load_mode, tmp_reg_src2, addr2, orig_src2);
/* We must always left-align the data we read, and
clear any bytes to the right that are beyond the string.
Otherwise the cmpb sequence won't produce the correct
results. The beginning of the compare will be done
with word_mode so will not have any extra shifts or
clear rights. */
results. However if there is only one byte left, we
can just subtract to get the final result so the shifts
and clears are not needed. */
unsigned HOST_WIDE_INT remain = bytes_to_compare - cmp_bytes;
/* Loading just a single byte is a special case. If we are
loading more than that, we have to check whether we are
looking at the entire chunk of data. If not, rotate left and
clear right so that bytes we aren't supposed to look at are
zeroed, and the first byte we are supposed to compare is
leftmost. */
if (load_mode_size != 1)
{
if (load_mode_size < word_mode_size)
{
/* Rotate left first. */
rtx sh = GEN_INT (BITS_PER_UNIT * (word_mode_size - load_mode_size));
rtx sh = GEN_INT (BITS_PER_UNIT
* (word_mode_size - load_mode_size));
do_rotl3 (tmp_reg_src1, tmp_reg_src1, sh);
do_rotl3 (tmp_reg_src2, tmp_reg_src2, sh);
}
......@@ -1830,6 +1841,7 @@ expand_strncmp_gpr_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
do_and3 (tmp_reg_src1, tmp_reg_src1, mask);
do_and3 (tmp_reg_src2, tmp_reg_src2, mask);
}
}
/* Cases to handle. A and B are chunks of the two strings.
1: Not end of comparison:
......@@ -1842,8 +1854,6 @@ expand_strncmp_gpr_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
A == B: branch to result 0.
A != B: cleanup code to compute result. */
unsigned HOST_WIDE_INT remain = bytes_to_compare - cmp_bytes;
rtx dst_label;
if (remain > 0 || equality_compare_rest)
{
......@@ -1857,13 +1867,77 @@ expand_strncmp_gpr_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
/* Branch to end and produce result of 0. */
dst_label = final_move_label;
if (load_mode_size == 1)
{
/* Special case for comparing just single byte. */
if (equality_compare_rest)
{
/* Use subf./bne to branch to final_move_label if the
byte differs, otherwise fall through to the strncmp
call. We must also check for a zero byte here as we
must not make the library call if this is the end of
the string. */
rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, final_move_label);
rtx cond = gen_reg_rtx (CCmode);
rtx diff_rtx = gen_rtx_MINUS (word_mode,
tmp_reg_src1, tmp_reg_src2);
rs6000_emit_dot_insn (result_reg, diff_rtx, 2, cond);
rtx cmp_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx);
rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
lab_ref, pc_rtx);
rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
JUMP_LABEL (j) = final_move_label;
LABEL_NUSES (final_move_label) += 1;
/* Check for zero byte here before fall through to
library call. This catches the case where the
strings are equal and end in a zero byte at this
position. */
rtx cond0 = gen_reg_rtx (CCmode);
emit_move_insn (cond0, gen_rtx_COMPARE (CCmode, tmp_reg_src1,
const0_rtx));
rtx cmp0eq_rtx = gen_rtx_EQ (VOIDmode, cond0, const0_rtx);
rtx ifelse0 = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp0eq_rtx,
lab_ref, pc_rtx);
rtx j0 = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse0));
JUMP_LABEL (j0) = final_move_label;
LABEL_NUSES (final_move_label) += 1;
}
else
{
/* This is the last byte to be compared so we can use
subf to compute the final result and branch
unconditionally to final_move_label. */
do_sub3 (result_reg, tmp_reg_src1, tmp_reg_src2);
rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_move_label);
rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, fin_ref));
JUMP_LABEL (j) = final_move_label;
LABEL_NUSES (final_move_label) += 1;
emit_barrier ();
}
}
else
{
rtx cmpb_zero = gen_reg_rtx (word_mode);
rtx cmpb_diff = gen_reg_rtx (word_mode);
rtx zero_reg = gen_reg_rtx (word_mode);
rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, dst_label);
rtx cond = gen_reg_rtx (CCmode);
/* Always produce the 0 result, it is needed if
cmpb finds a 0 byte in this chunk. */
rtx tmp = gen_rtx_MINUS (word_mode, tmp_reg_src1, tmp_reg_src2);
rs6000_emit_dot_insn (result_reg, tmp, 1, cond);
emit_move_insn (zero_reg, GEN_INT (0));
do_cmpb3 (cmpb_diff, tmp_reg_src1, tmp_reg_src2);
do_cmpb3 (cmpb_zero, tmp_reg_src1, zero_reg);
rtx not_diff = gen_rtx_NOT (word_mode, cmpb_diff);
rtx orc_rtx = gen_rtx_IOR (word_mode, not_diff, cmpb_zero);
rs6000_emit_dot_insn (result_reg, orc_rtx, 2, cond);
rtx cmp_rtx;
if (remain == 0 && !equality_compare_rest)
......@@ -1876,35 +1950,6 @@ expand_strncmp_gpr_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
JUMP_LABEL (j) = dst_label;
LABEL_NUSES (dst_label) += 1;
if (remain > 0 || equality_compare_rest)
{
/* Generate a cmpb to test for a 0 byte and branch
to final result if found. */
rtx cmpb_zero = gen_reg_rtx (word_mode);
rtx lab_ref_fin = gen_rtx_LABEL_REF (VOIDmode, final_move_label);
rtx condz = gen_reg_rtx (CCmode);
rtx zero_reg = gen_reg_rtx (word_mode);
emit_move_insn (zero_reg, GEN_INT (0));
do_cmpb3 (cmpb_zero, tmp_reg_src1, zero_reg);
if (cmp_bytes < word_mode_size)
{
/* Don't want to look at zero bytes past end. */
HOST_WIDE_INT mb =
BITS_PER_UNIT * (word_mode_size - cmp_bytes);
rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
do_and3 (cmpb_zero, cmpb_zero, mask);
}
emit_move_insn (condz, gen_rtx_COMPARE (CCmode, cmpb_zero, zero_reg));
rtx cmpnz_rtx = gen_rtx_NE (VOIDmode, condz, const0_rtx);
rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmpnz_rtx,
lab_ref_fin, pc_rtx);
rtx j2 = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
JUMP_LABEL (j2) = final_move_label;
LABEL_NUSES (final_move_label) += 1;
}
offset += cmp_bytes;
......@@ -2089,9 +2134,6 @@ expand_strncmp_vec_sequence (unsigned HOST_WIDE_INT bytes_to_compare,
byte and generates the final result, taking into account
zero bytes:
cmpb cmpb_result1, src1, src2
cmpb cmpb_result2, src1, zero
orc cmpb_result1, cmp_result1, cmpb_result2
cntlzd get bit of first zero/diff byte
addi convert for rldcl use
rldcl rldcl extract diff/zero byte
......@@ -2105,10 +2147,7 @@ static void
emit_final_str_compare_gpr (rtx str1, rtx str2, rtx result)
{
machine_mode m = GET_MODE (str1);
rtx cmpb_diff = gen_reg_rtx (m);
rtx cmpb_zero = gen_reg_rtx (m);
rtx rot_amt = gen_reg_rtx (m);
rtx zero_reg = gen_reg_rtx (m);
rtx rot1_1 = gen_reg_rtx (m);
rtx rot1_2 = gen_reg_rtx (m);
......@@ -2117,12 +2156,7 @@ emit_final_str_compare_gpr (rtx str1, rtx str2, rtx result)
if (m == SImode)
{
emit_insn (gen_cmpbsi3 (cmpb_diff, str1, str2));
emit_insn (gen_movsi (zero_reg, GEN_INT (0)));
emit_insn (gen_cmpbsi3 (cmpb_zero, str1, zero_reg));
emit_insn (gen_one_cmplsi2 (cmpb_diff,cmpb_diff));
emit_insn (gen_iorsi3 (cmpb_diff, cmpb_diff, cmpb_zero));
emit_insn (gen_clzsi2 (rot_amt, cmpb_diff));
emit_insn (gen_clzsi2 (rot_amt, result));
emit_insn (gen_addsi3 (rot_amt, rot_amt, GEN_INT (8)));
emit_insn (gen_rotlsi3 (rot1_1, str1,
gen_lowpart (SImode, rot_amt)));
......@@ -2134,12 +2168,7 @@ emit_final_str_compare_gpr (rtx str1, rtx str2, rtx result)
}
else if (m == DImode)
{
emit_insn (gen_cmpbdi3 (cmpb_diff, str1, str2));
emit_insn (gen_movdi (zero_reg, GEN_INT (0)));
emit_insn (gen_cmpbdi3 (cmpb_zero, str1, zero_reg));
emit_insn (gen_one_cmpldi2 (cmpb_diff,cmpb_diff));
emit_insn (gen_iordi3 (cmpb_diff, cmpb_diff, cmpb_zero));
emit_insn (gen_clzdi2 (rot_amt, cmpb_diff));
emit_insn (gen_clzdi2 (rot_amt, result));
emit_insn (gen_adddi3 (rot_amt, rot_amt, GEN_INT (8)));
emit_insn (gen_rotldi3 (rot1_1, str1,
gen_lowpart (SImode, rot_amt)));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment