Commit 8889fbe5 by Monk Chiang Committed by Chung-Ju Wu

[NDS32] Optimize movmem and setmem operations.

gcc/
	* config/nds32/nds32-intrinsic.md (unaligned_storedi): Modify patterns
	implementation.
	(unaligned_store_dw): Ditto.
	* config/nds32/nds32-memory-manipulation.c
	(nds32_expand_movmemsi_loop_known_size): Refactoring implementation.
	(nds32_gen_dup_4_byte_to_word_value): Rename to ...
	(nds32_gen_dup_4_byte_to_word_value_aux): ... this.
	(emit_setmem_word_loop): Rename to ...
	(emit_setmem_doubleword_loop): ... this.
	(nds32_gen_dup_4_byte_to_word_value): New function.
	(nds32_gen_dup_8_byte_to_double_word_value): New function.
	(nds32_expand_setmem_loop): Refine implementation.
	(nds32_expand_setmem_loop_v3m): Ditto.
	* config/nds32/nds32-multiple.md (unaligned_store_update_base_dw): New
	pattern.

Co-Authored-By: Chung-Ju Wu <jasonwucj@gmail.com>

From-SVN: r260805
parent 0be3bad7
2018-05-27 Monk Chiang <sh.chiang04@gmail.com>
Chung-Ju Wu <jasonwucj@gmail.com>
* config/nds32/nds32-intrinsic.md (unaligned_storedi): Modify patterns
implementation.
(unaligned_store_dw): Ditto.
* config/nds32/nds32-memory-manipulation.c
(nds32_expand_movmemsi_loop_known_size): Refactoring implementation.
(nds32_gen_dup_4_byte_to_word_value): Rename to ...
(nds32_gen_dup_4_byte_to_word_value_aux): ... this.
(emit_setmem_word_loop): Rename to ...
(emit_setmem_doubleword_loop): ... this.
(nds32_gen_dup_4_byte_to_word_value): New function.
(nds32_gen_dup_8_byte_to_double_word_value): New function.
(nds32_expand_setmem_loop): Refine implementation.
(nds32_expand_setmem_loop_v3m): Ditto.
* config/nds32/nds32-multiple.md (unaligned_store_update_base_dw): New
pattern.
2018-05-27 Chung-Ju Wu <jasonwucj@gmail.com>
* config/nds32/nds32.md (bswapsi2, bswaphi2): New patterns.
......
......@@ -1596,22 +1596,17 @@
if (TARGET_ISA_V3M)
nds32_expand_unaligned_store (operands, DImode);
else
emit_insn (gen_unaligned_store_dw (operands[0], operands[1]));
emit_insn (gen_unaligned_store_dw (gen_rtx_MEM (DImode, operands[0]),
operands[1]));
DONE;
})
(define_insn "unaligned_store_dw"
[(set (mem:DI (match_operand:SI 0 "register_operand" "r"))
(unspec:DI [(match_operand:DI 1 "register_operand" "r")] UNSPEC_UASTORE_DW))]
[(set (match_operand:DI 0 "nds32_lmw_smw_base_operand" "=Umw")
(unspec:DI [(match_operand:DI 1 "register_operand" " r")] UNSPEC_UASTORE_DW))]
""
{
rtx otherops[3];
otherops[0] = gen_rtx_REG (SImode, REGNO (operands[1]));
otherops[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
otherops[2] = operands[0];
output_asm_insn ("smw.bi\t%0, [%2], %1, 0", otherops);
return "";
return nds32_output_smw_double_word (operands);
}
[(set_attr "type" "store")
(set_attr "length" "4")]
......
......@@ -257,8 +257,124 @@ static bool
nds32_expand_movmemsi_loop_known_size (rtx dstmem, rtx srcmem,
rtx size, rtx alignment)
{
return nds32_expand_movmemsi_loop_unknown_size (dstmem, srcmem,
size, alignment);
rtx dst_base_reg, src_base_reg;
rtx dst_itr, src_itr;
rtx dstmem_m, srcmem_m, dst_itr_m, src_itr_m;
rtx dst_end;
rtx double_word_mode_loop, byte_mode_loop;
rtx tmp;
int start_regno;
bool align_to_4_bytes = (INTVAL (alignment) & 3) == 0;
unsigned HOST_WIDE_INT total_bytes = UINTVAL (size);
if (TARGET_ISA_V3M && !align_to_4_bytes)
return 0;
if (TARGET_REDUCED_REGS)
start_regno = 2;
else
start_regno = 16;
dst_itr = gen_reg_rtx (Pmode);
src_itr = gen_reg_rtx (Pmode);
dst_end = gen_reg_rtx (Pmode);
tmp = gen_reg_rtx (QImode);
double_word_mode_loop = gen_label_rtx ();
byte_mode_loop = gen_label_rtx ();
dst_base_reg = copy_to_mode_reg (Pmode, XEXP (dstmem, 0));
src_base_reg = copy_to_mode_reg (Pmode, XEXP (srcmem, 0));
if (total_bytes < 8)
{
/* Emit total_bytes less than 8 loop version of movmem.
add $dst_end, $dst, $size
move $dst_itr, $dst
.Lbyte_mode_loop:
lbi.bi $tmp, [$src_itr], #1
sbi.bi $tmp, [$dst_itr], #1
! Not readch upper bound. Loop.
bne $dst_itr, $dst_end, .Lbyte_mode_loop */
/* add $dst_end, $dst, $size */
dst_end = expand_binop (Pmode, add_optab, dst_base_reg, size,
NULL_RTX, 0, OPTAB_WIDEN);
/* move $dst_itr, $dst
move $src_itr, $src */
emit_move_insn (dst_itr, dst_base_reg);
emit_move_insn (src_itr, src_base_reg);
/* .Lbyte_mode_loop: */
emit_label (byte_mode_loop);
/* lbi.bi $tmp, [$src_itr], #1 */
nds32_emit_post_inc_load_store (tmp, src_itr, QImode, true);
/* sbi.bi $tmp, [$dst_itr], #1 */
nds32_emit_post_inc_load_store (tmp, dst_itr, QImode, false);
/* ! Not readch upper bound. Loop.
bne $dst_itr, $dst_end, .Lbyte_mode_loop */
emit_cmp_and_jump_insns (dst_itr, dst_end, NE, NULL,
SImode, 1, byte_mode_loop);
return true;
}
else if (total_bytes % 8 == 0)
{
/* Emit multiple of 8 loop version of movmem.
add $dst_end, $dst, $size
move $dst_itr, $dst
move $src_itr, $src
.Ldouble_word_mode_loop:
lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr
smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr
! move will delete after register allocation
move $src_itr, $src_itr'
move $dst_itr, $dst_itr'
! Not readch upper bound. Loop.
bne $double_word_end, $dst_itr, .Ldouble_word_mode_loop */
/* add $dst_end, $dst, $size */
dst_end = expand_binop (Pmode, add_optab, dst_base_reg, size,
NULL_RTX, 0, OPTAB_WIDEN);
/* move $dst_itr, $dst
move $src_itr, $src */
emit_move_insn (dst_itr, dst_base_reg);
emit_move_insn (src_itr, src_base_reg);
/* .Ldouble_word_mode_loop: */
emit_label (double_word_mode_loop);
/* lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr
smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr */
src_itr_m = src_itr;
dst_itr_m = dst_itr;
srcmem_m = srcmem;
dstmem_m = dstmem;
nds32_emit_mem_move_block (start_regno, 2,
&dst_itr_m, &dstmem_m,
&src_itr_m, &srcmem_m,
true);
/* move $src_itr, $src_itr'
move $dst_itr, $dst_itr' */
emit_move_insn (dst_itr, dst_itr_m);
emit_move_insn (src_itr, src_itr_m);
/* ! Not readch upper bound. Loop.
bne $double_word_end, $dst_itr, .Ldouble_word_mode_loop */
emit_cmp_and_jump_insns (dst_end, dst_itr, NE, NULL,
Pmode, 1, double_word_mode_loop);
}
else
{
/* Handle size greater than 8, and not a multiple of 8. */
return nds32_expand_movmemsi_loop_unknown_size (dstmem, srcmem,
size, alignment);
}
return true;
}
static bool
......@@ -433,10 +549,8 @@ nds32_expand_movmemsi (rtx dstmem, rtx srcmem, rtx total_bytes, rtx alignment)
/* Auxiliary function for expand setmem pattern. */
static rtx
nds32_gen_dup_4_byte_to_word_value (rtx value)
nds32_gen_dup_4_byte_to_word_value_aux (rtx value, rtx value4word)
{
rtx value4word = gen_reg_rtx (SImode);
gcc_assert (GET_MODE (value) == QImode || CONST_INT_P (value));
if (CONST_INT_P (value))
......@@ -493,7 +607,30 @@ nds32_gen_dup_4_byte_to_word_value (rtx value)
}
static rtx
emit_setmem_word_loop (rtx itr, rtx size, rtx value)
nds32_gen_dup_4_byte_to_word_value (rtx value)
{
rtx value4word = gen_reg_rtx (SImode);
nds32_gen_dup_4_byte_to_word_value_aux (value, value4word);
return value4word;
}
static rtx
nds32_gen_dup_8_byte_to_double_word_value (rtx value)
{
rtx value4doubleword = gen_reg_rtx (DImode);
nds32_gen_dup_4_byte_to_word_value_aux (
value, nds32_di_low_part_subreg(value4doubleword));
emit_move_insn (nds32_di_high_part_subreg(value4doubleword),
nds32_di_low_part_subreg(value4doubleword));
return value4doubleword;
}
static rtx
emit_setmem_doubleword_loop (rtx itr, rtx size, rtx value)
{
rtx word_mode_label = gen_label_rtx ();
rtx word_mode_end_label = gen_label_rtx ();
......@@ -502,9 +639,9 @@ emit_setmem_word_loop (rtx itr, rtx size, rtx value)
rtx word_mode_end = gen_reg_rtx (SImode);
rtx size_for_word = gen_reg_rtx (SImode);
/* and $size_for_word, $size, #~3 */
/* and $size_for_word, $size, #~0x7 */
size_for_word = expand_binop (SImode, and_optab, size,
gen_int_mode (~3, SImode),
gen_int_mode (~0x7, SImode),
NULL_RTX, 0, OPTAB_WIDEN);
emit_move_insn (byte_mode_size, size);
......@@ -516,8 +653,8 @@ emit_setmem_word_loop (rtx itr, rtx size, rtx value)
word_mode_end = expand_binop (Pmode, add_optab, itr, size_for_word,
NULL_RTX, 0, OPTAB_WIDEN);
/* andi $byte_mode_size, $size, 3 */
byte_mode_size_tmp = expand_binop (SImode, and_optab, size, GEN_INT (3),
/* andi $byte_mode_size, $size, 0x7 */
byte_mode_size_tmp = expand_binop (SImode, and_optab, size, GEN_INT (0x7),
NULL_RTX, 0, OPTAB_WIDEN);
emit_move_insn (byte_mode_size, byte_mode_size_tmp);
......@@ -527,9 +664,9 @@ emit_setmem_word_loop (rtx itr, rtx size, rtx value)
/* ! word-mode set loop
smw.bim $value4word, [$dst_itr], $value4word, 0
bne $word_mode_end, $dst_itr, .Lword_mode */
emit_insn (gen_unaligned_store_update_base_w (itr,
itr,
value));
emit_insn (gen_unaligned_store_update_base_dw (itr,
itr,
value));
emit_cmp_and_jump_insns (word_mode_end, itr, NE, NULL,
Pmode, 1, word_mode_label);
......@@ -581,7 +718,7 @@ emit_setmem_byte_loop (rtx itr, rtx size, rtx value, bool need_end)
static bool
nds32_expand_setmem_loop (rtx dstmem, rtx size, rtx value)
{
rtx value4word;
rtx value4doubleword;
rtx value4byte;
rtx dst;
rtx byte_mode_size;
......@@ -624,7 +761,7 @@ nds32_expand_setmem_loop (rtx dstmem, rtx size, rtx value)
or $tmp3, $tmp1, $tmp2 ! $tmp3 <- 0x0000abab
slli $tmp4, $tmp3, 16 ! $tmp4 <- 0xabab0000
or $val4word, $tmp3, $tmp4 ! $value4word <- 0xabababab */
value4word = nds32_gen_dup_4_byte_to_word_value (value);
value4doubleword = nds32_gen_dup_8_byte_to_double_word_value (value);
/* and $size_for_word, $size, #-4
beqz $size_for_word, .Lword_mode_end
......@@ -637,7 +774,7 @@ nds32_expand_setmem_loop (rtx dstmem, rtx size, rtx value)
smw.bim $value4word, [$dst], $value4word, 0
bne $word_mode_end, $dst, .Lword_mode
.Lword_mode_end: */
byte_mode_size = emit_setmem_word_loop (dst, size, value4word);
byte_mode_size = emit_setmem_doubleword_loop (dst, size, value4doubleword);
/* beqz $byte_mode_size, .Lend
add $byte_mode_end, $dst, $byte_mode_size
......@@ -648,8 +785,8 @@ nds32_expand_setmem_loop (rtx dstmem, rtx size, rtx value)
bne $byte_mode_end, $dst, .Lbyte_mode
.Lend: */
value4byte = simplify_gen_subreg (QImode, value4word, SImode,
subreg_lowpart_offset (QImode, SImode));
value4byte = simplify_gen_subreg (QImode, value4doubleword, DImode,
subreg_lowpart_offset (QImode, DImode));
emit_setmem_byte_loop (dst, byte_mode_size, value4byte, false);
......@@ -666,14 +803,15 @@ nds32_expand_setmem_loop_v3m (rtx dstmem, rtx size, rtx value)
rtx byte_loop_size = gen_reg_rtx (SImode);
rtx remain_size = gen_reg_rtx (SImode);
rtx new_base_reg;
rtx value4byte, value4word;
rtx value4byte, value4doubleword;
rtx byte_mode_size;
rtx last_byte_loop_label = gen_label_rtx ();
size = force_reg (SImode, size);
value4word = nds32_gen_dup_4_byte_to_word_value (value);
value4byte = simplify_gen_subreg (QImode, value4word, SImode, 0);
value4doubleword = nds32_gen_dup_8_byte_to_double_word_value (value);
value4byte = simplify_gen_subreg (QImode, value4doubleword, DImode,
subreg_lowpart_offset (QImode, DImode));
emit_move_insn (byte_loop_size, size);
emit_move_insn (byte_loop_base, base_reg);
......@@ -701,9 +839,9 @@ nds32_expand_setmem_loop_v3m (rtx dstmem, rtx size, rtx value)
emit_insn (gen_subsi3 (remain_size, size, need_align_bytes));
/* Set memory word by word. */
byte_mode_size = emit_setmem_word_loop (new_base_reg,
remain_size,
value4word);
byte_mode_size = emit_setmem_doubleword_loop (new_base_reg,
remain_size,
value4doubleword);
emit_move_insn (byte_loop_base, new_base_reg);
emit_move_insn (byte_loop_size, byte_mode_size);
......
......@@ -2854,6 +2854,25 @@
(set_attr "length" "4")]
)
(define_expand "unaligned_store_update_base_dw"
[(parallel [(set (match_operand:SI 0 "register_operand" "=r")
(plus:SI (match_operand:SI 1 "register_operand" "0") (const_int 8)))
(set (mem:DI (match_dup 1))
(unspec:DI [(match_operand:DI 2 "register_operand" "r")] UNSPEC_UASTORE_DW))])]
""
{
/* DO NOT emit unaligned_store_w_m immediately since web pass don't
recognize post_inc, try it again after GCC 5.0.
REF: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63156 */
emit_insn (gen_unaligned_store_dw (gen_rtx_MEM (DImode, operands[1]), operands[2]));
emit_insn (gen_addsi3 (operands[0], operands[1], gen_int_mode (8, Pmode)));
DONE;
}
[(set_attr "type" "store_multiple")
(set_attr "combo" "2")
(set_attr "length" "4")]
)
(define_insn "*stmsi25"
[(match_parallel 0 "nds32_store_multiple_operation"
[(set (mem:SI (match_operand:SI 1 "register_operand" "r"))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment