Commit eae298d6 by Christian Bruel Committed by Christian Bruel

sh-mem.cc (sh_expand_cmpnstr): Unroll small sizes and optimized non constant lengths.

2014-01-13  Christian Bruel  <christian.bruel@st.com>

	* config/sh/sh-mem.cc (sh_expand_cmpnstr): Unroll small sizes and
	  optimized non constant lengths.

From-SVN: r206575
parent 698da88a
2014-01-13 Christian Bruel <christian.bruel@st.com>
* config/sh/sh-mem.cc (sh_expand_cmpnstr): Unroll small sizes and
optimized non constant lengths.
2014-01-13 Jakub Jelinek <jakub@redhat.com> 2014-01-13 Jakub Jelinek <jakub@redhat.com>
PR libgomp/59194 PR libgomp/59194
......
...@@ -324,7 +324,6 @@ sh_expand_cmpnstr (rtx *operands) ...@@ -324,7 +324,6 @@ sh_expand_cmpnstr (rtx *operands)
rtx addr2 = operands[2]; rtx addr2 = operands[2];
rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0)); rtx s1_addr = copy_addr_to_reg (XEXP (addr1, 0));
rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0)); rtx s2_addr = copy_addr_to_reg (XEXP (addr2, 0));
rtx tmp0 = gen_reg_rtx (SImode);
rtx tmp1 = gen_reg_rtx (SImode); rtx tmp1 = gen_reg_rtx (SImode);
rtx tmp2 = gen_reg_rtx (SImode); rtx tmp2 = gen_reg_rtx (SImode);
...@@ -334,22 +333,27 @@ sh_expand_cmpnstr (rtx *operands) ...@@ -334,22 +333,27 @@ sh_expand_cmpnstr (rtx *operands)
rtx L_end_loop_byte = gen_label_rtx (); rtx L_end_loop_byte = gen_label_rtx ();
rtx len = force_reg (SImode, operands[3]); rtx len = force_reg (SImode, operands[3]);
int constp = (CONST_INT_P (operands[3])); int constp = CONST_INT_P (operands[3]);
int bytes = (constp ? INTVAL (operands[3]) : 0);
int witers = bytes / 4;
/* We could still loop on a register count. Not found very /* Loop on a register count. */
convincing to optimize yet. */ if (constp)
if (! constp)
return false;
if (witers > 1)
{ {
rtx L_loop_long = gen_label_rtx (); rtx tmp0 = gen_reg_rtx (SImode);
rtx L_end_loop_long = gen_label_rtx ();
rtx tmp3 = gen_reg_rtx (SImode); rtx tmp3 = gen_reg_rtx (SImode);
rtx lenw = gen_reg_rtx (SImode); rtx lenw = gen_reg_rtx (SImode);
rtx L_loop_long = gen_label_rtx ();
rtx L_end_loop_long = gen_label_rtx ();
rtx L_small = gen_label_rtx ();
int align = INTVAL (operands[4]); int align = INTVAL (operands[4]);
int bytes = INTVAL (operands[3]);
int witers = bytes / 4;
if (witers > 1)
{
addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0);
addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0);
emit_move_insn (tmp0, const0_rtx); emit_move_insn (tmp0, const0_rtx);
...@@ -361,9 +365,6 @@ sh_expand_cmpnstr (rtx *operands) ...@@ -361,9 +365,6 @@ sh_expand_cmpnstr (rtx *operands)
add_int_reg_note (jump, REG_BR_PROB, prob_likely); add_int_reg_note (jump, REG_BR_PROB, prob_likely);
} }
addr1 = adjust_automodify_address (addr1, SImode, s1_addr, 0);
addr2 = adjust_automodify_address (addr2, SImode, s2_addr, 0);
/* word count. Do we have iterations ? */ /* word count. Do we have iterations ? */
emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2))); emit_insn (gen_lshrsi3 (lenw, len, GEN_INT (2)));
...@@ -372,11 +373,13 @@ sh_expand_cmpnstr (rtx *operands) ...@@ -372,11 +373,13 @@ sh_expand_cmpnstr (rtx *operands)
/* tmp2 is aligned, OK to load. */ /* tmp2 is aligned, OK to load. */
emit_move_insn (tmp2, addr2); emit_move_insn (tmp2, addr2);
emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, 4)); emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr,
GET_MODE_SIZE (SImode)));
/* tmp1 is aligned, OK to load. */ /* tmp1 is aligned, OK to load. */
emit_move_insn (tmp1, addr1); emit_move_insn (tmp1, addr1);
emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, 4)); emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr,
GET_MODE_SIZE (SImode)));
/* Is there a 0 byte ? */ /* Is there a 0 byte ? */
emit_insn (gen_andsi3 (tmp3, tmp2, tmp1)); emit_insn (gen_andsi3 (tmp3, tmp2, tmp1));
...@@ -396,6 +399,7 @@ sh_expand_cmpnstr (rtx *operands) ...@@ -396,6 +399,7 @@ sh_expand_cmpnstr (rtx *operands)
emit_insn (gen_addsi3 (lenw, lenw, GEN_INT (-1))); emit_insn (gen_addsi3 (lenw, lenw, GEN_INT (-1)));
emit_insn (gen_tstsi_t (lenw, lenw)); emit_insn (gen_tstsi_t (lenw, lenw));
} }
jump = emit_jump_insn (gen_branch_false (L_loop_long)); jump = emit_jump_insn (gen_branch_false (L_loop_long));
add_int_reg_note (jump, REG_BR_PROB, prob_likely); add_int_reg_note (jump, REG_BR_PROB, prob_likely);
...@@ -409,23 +413,52 @@ sh_expand_cmpnstr (rtx *operands) ...@@ -409,23 +413,52 @@ sh_expand_cmpnstr (rtx *operands)
else else
{ {
/* Remaining bytes to read. */ /* Remaining bytes to read. */
emit_move_insn (len, GEN_INT (bytes % 4)); jump = emit_jump_insn (gen_jump_compact (L_small));
jump = emit_jump_insn (gen_jump_compact (L_loop_byte));
emit_barrier_after (jump); emit_barrier_after (jump);
} }
emit_label (L_end_loop_long); emit_label (L_end_loop_long);
/* Remaining bytes to read. */
emit_move_insn (len, GEN_INT (4));
/* Found last word. Restart it byte per byte. */ /* Found last word. Restart it byte per byte. */
emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr, -4)); bytes = 4;
emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr, -4)); emit_move_insn (s1_addr, plus_constant (Pmode, s1_addr,
-GET_MODE_SIZE (SImode)));
emit_move_insn (s2_addr, plus_constant (Pmode, s2_addr,
-GET_MODE_SIZE (SImode)));
} }
addr1 = adjust_address (addr1, QImode, 0); emit_label (L_small);
addr2 = adjust_address (addr2, QImode, 0);
gcc_assert (bytes <= 7);
addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
while (bytes--)
{
emit_insn (gen_extendqisi2 (tmp1, addr1));
emit_insn (gen_extendqisi2 (tmp2, addr2));
emit_insn (gen_cmpeqsi_t (tmp2, const0_rtx));
jump = emit_jump_insn (gen_branch_true (L_end_loop_byte));
add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
emit_insn (gen_cmpeqsi_t (tmp1, tmp2));
if (flag_delayed_branch)
emit_insn (gen_zero_extendqisi2 (tmp2, gen_lowpart (QImode, tmp2)));
jump = emit_jump_insn (gen_branch_false (L_end_loop_byte));
add_int_reg_note (jump, REG_BR_PROB, prob_unlikely);
addr1 = adjust_address (addr1, QImode, GET_MODE_SIZE (QImode));
addr2 = adjust_address (addr2, QImode, GET_MODE_SIZE (QImode));
}
jump = emit_jump_insn (gen_jump_compact( L_end_loop_byte));
emit_barrier_after (jump);
}
addr1 = adjust_automodify_address (addr1, QImode, s1_addr, 0);
addr2 = adjust_automodify_address (addr2, QImode, s2_addr, 0);
emit_label (L_loop_byte); emit_label (L_loop_byte);
......
2014-01-13 Christian Bruel <christian.bruel@st.com>
* gcc.target/sh/cmpstrn.c: New case.
2014-01-13 Jakub Jelinek <jakub@redhat.com> 2014-01-13 Jakub Jelinek <jakub@redhat.com>
* gcc.dg/vect/vect-simd-clone-10.c: Add dg-do run. * gcc.dg/vect/vect-simd-clone-10.c: Add dg-do run.
......
...@@ -6,16 +6,23 @@ ...@@ -6,16 +6,23 @@
/* { dg-final { scan-assembler-not "jmp" } } */ /* { dg-final { scan-assembler-not "jmp" } } */
/* { dg-final { scan-assembler-times "cmp/str" 1 } } */ /* { dg-final { scan-assembler-times "cmp/str" 1 } } */
/* Test that the cmp/str loop is optimized out. */ /* Test that cmp/str is not used for small lengths. */
test01(const char *s1, const char *s2, int n) test01(const char *s1)
{ {
return __builtin_strncmp (s1, "abcde", 3); return __builtin_strncmp (s1, "abcde", 3);
} }
/* Test that the cmp/str loop is used. */ /* Test that the cmp/str loop is used. */
test02(const char *s1, const char *s2, int n) test02(const char *s1)
{ {
return __builtin_strncmp (s1, "abcdefghi", 8); return __builtin_strncmp (s1, "abcdefghi", 8);
} }
/* Test that no call is generated */
test03(const char *s1, int n)
{
return __builtin_strncmp (s1, "abcde", n);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment