Commit 350013bc by Bin Cheng Committed by Marcus Shawcroft

[AArch64]load store pair optimization using sched_fusion pass.

From-SVN: r218430
parent a66272f6
2014-12-05 Bin Cheng <bin.cheng@arm.com>
* config/aarch64/aarch64.md (load_pair<mode>): Split to
load_pairsi, load_pairdi, load_pairsf and load_pairdf.
(load_pairsi, load_pairdi, load_pairsf, load_pairdf): Split
from load_pair<mode>. New alternative to support int/fp
registers in fp/int mode patterns.
(store_pair<mode>:): Split to store_pairsi, store_pairdi,
store_pairsf and store_pairdi.
(store_pairsi, store_pairdi, store_pairsf, store_pairdf): Split
from store_pair<mode>. New alternative to support int/fp
registers in fp/int mode patterns.
(*load_pair_extendsidi2_aarch64): New pattern.
(*load_pair_zero_extendsidi2_aarch64): New pattern.
(aarch64-ldpstp.md): Include.
* config/aarch64/aarch64-ldpstp.md: New file.
* config/aarch64/aarch64-protos.h (aarch64_gen_adjusted_ldpstp):
New.
(extract_base_offset_in_addr): New.
(aarch64_operands_ok_for_ldpstp): New.
(aarch64_operands_adjust_ok_for_ldpstp): New.
* config/aarch64/aarch64.c (enum sched_fusion_type): New enum.
(TARGET_SCHED_FUSION_PRIORITY): New hook.
(fusion_load_store): New functon.
(extract_base_offset_in_addr): New function.
(aarch64_gen_adjusted_ldpstp): New function.
(aarch64_sched_fusion_priority): New function.
(aarch64_operands_ok_for_ldpstp): New function.
(aarch64_operands_adjust_ok_for_ldpstp): New function.
2014-12-05 Olivier Hainque <hainque@adacore.com>
* defaults.h: (DWARF_REG_TO_UNWIND_COLUMN): Define default.
;; AArch64 ldp/stp peephole optimizations.
;; Copyright (C) 2014 Free Software Foundation, Inc.
;; Contributed by ARM Ltd.
;;
;; This file is part of GCC.
;;
;; GCC is free software; you can redistribute it and/or modify it
;; under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 3, or (at your option)
;; any later version.
;;
;; GCC is distributed in the hope that it will be useful, but
;; WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;; General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with GCC; see the file COPYING3. If not see
;; <http://www.gnu.org/licenses/>.
(define_peephole2
[(set (match_operand:GPI 0 "register_operand" "")
(match_operand:GPI 1 "aarch64_mem_pair_operand" ""))
(set (match_operand:GPI 2 "register_operand" "")
(match_operand:GPI 3 "memory_operand" ""))]
"aarch64_operands_ok_for_ldpstp (operands, true, <MODE>mode)"
[(parallel [(set (match_dup 0) (match_dup 1))
(set (match_dup 2) (match_dup 3))])]
{
rtx base, offset_1, offset_2, tmp;
extract_base_offset_in_addr (operands[1], &base, &offset_1);
extract_base_offset_in_addr (operands[3], &base, &offset_2);
if (INTVAL (offset_1) > INTVAL (offset_2))
{
tmp = operands[0];
operands[0] = operands[2];
operands[2] = tmp;
tmp = operands[1];
operands[1] = operands[3];
operands[3] = tmp;
}
})
(define_peephole2
[(set (match_operand:GPI 0 "aarch64_mem_pair_operand" "")
(match_operand:GPI 1 "aarch64_reg_or_zero" ""))
(set (match_operand:GPI 2 "memory_operand" "")
(match_operand:GPI 3 "aarch64_reg_or_zero" ""))]
"aarch64_operands_ok_for_ldpstp (operands, false, <MODE>mode)"
[(parallel [(set (match_dup 0) (match_dup 1))
(set (match_dup 2) (match_dup 3))])]
{
rtx base, offset_1, offset_2, tmp;
extract_base_offset_in_addr (operands[0], &base, &offset_1);
extract_base_offset_in_addr (operands[2], &base, &offset_2);
if (INTVAL (offset_1) > INTVAL (offset_2))
{
tmp = operands[0];
operands[0] = operands[2];
operands[2] = tmp;
tmp = operands[1];
operands[1] = operands[3];
operands[3] = tmp;
}
})
(define_peephole2
[(set (match_operand:GPF 0 "register_operand" "")
(match_operand:GPF 1 "aarch64_mem_pair_operand" ""))
(set (match_operand:GPF 2 "register_operand" "")
(match_operand:GPF 3 "memory_operand" ""))]
"aarch64_operands_ok_for_ldpstp (operands, true, <MODE>mode)"
[(parallel [(set (match_dup 0) (match_dup 1))
(set (match_dup 2) (match_dup 3))])]
{
rtx base, offset_1, offset_2, tmp;
extract_base_offset_in_addr (operands[1], &base, &offset_1);
extract_base_offset_in_addr (operands[3], &base, &offset_2);
if (INTVAL (offset_1) > INTVAL (offset_2))
{
tmp = operands[0];
operands[0] = operands[2];
operands[2] = tmp;
tmp = operands[1];
operands[1] = operands[3];
operands[3] = tmp;
}
})
(define_peephole2
[(set (match_operand:GPF 0 "aarch64_mem_pair_operand" "")
(match_operand:GPF 1 "register_operand" ""))
(set (match_operand:GPF 2 "memory_operand" "")
(match_operand:GPF 3 "register_operand" ""))]
"aarch64_operands_ok_for_ldpstp (operands, false, <MODE>mode)"
[(parallel [(set (match_dup 0) (match_dup 1))
(set (match_dup 2) (match_dup 3))])]
{
rtx base, offset_1, offset_2, tmp;
extract_base_offset_in_addr (operands[0], &base, &offset_1);
extract_base_offset_in_addr (operands[2], &base, &offset_2);
if (INTVAL (offset_1) > INTVAL (offset_2))
{
tmp = operands[0];
operands[0] = operands[2];
operands[2] = tmp;
tmp = operands[1];
operands[1] = operands[3];
operands[3] = tmp;
}
})
;; Handle sign/zero extended consecutive load/store.
(define_peephole2
[(set (match_operand:DI 0 "register_operand" "")
(sign_extend:DI (match_operand:SI 1 "aarch64_mem_pair_operand" "")))
(set (match_operand:DI 2 "register_operand" "")
(sign_extend:DI (match_operand:SI 3 "memory_operand" "")))]
"aarch64_operands_ok_for_ldpstp (operands, true, SImode)"
[(parallel [(set (match_dup 0) (sign_extend:DI (match_dup 1)))
(set (match_dup 2) (sign_extend:DI (match_dup 3)))])]
{
rtx base, offset_1, offset_2, tmp;
extract_base_offset_in_addr (operands[1], &base, &offset_1);
extract_base_offset_in_addr (operands[3], &base, &offset_2);
if (INTVAL (offset_1) > INTVAL (offset_2))
{
tmp = operands[0];
operands[0] = operands[2];
operands[2] = tmp;
tmp = operands[1];
operands[1] = operands[3];
operands[3] = tmp;
}
})
(define_peephole2
[(set (match_operand:DI 0 "register_operand" "")
(zero_extend:DI (match_operand:SI 1 "aarch64_mem_pair_operand" "")))
(set (match_operand:DI 2 "register_operand" "")
(zero_extend:DI (match_operand:SI 3 "memory_operand" "")))]
"aarch64_operands_ok_for_ldpstp (operands, true, SImode)"
[(parallel [(set (match_dup 0) (zero_extend:DI (match_dup 1)))
(set (match_dup 2) (zero_extend:DI (match_dup 3)))])]
{
rtx base, offset_1, offset_2, tmp;
extract_base_offset_in_addr (operands[1], &base, &offset_1);
extract_base_offset_in_addr (operands[3], &base, &offset_2);
if (INTVAL (offset_1) > INTVAL (offset_2))
{
tmp = operands[0];
operands[0] = operands[2];
operands[2] = tmp;
tmp = operands[1];
operands[1] = operands[3];
operands[3] = tmp;
}
})
;; Handle consecutive load/store whose offset is out of the range
;; supported by ldp/ldpsw/stp. We firstly adjust offset in a scratch
;; register, then merge them into ldp/ldpsw/stp by using the adjusted
;; offset.
(define_peephole2
[(match_scratch:DI 8 "r")
(set (match_operand:GPI 0 "register_operand" "")
(match_operand:GPI 1 "memory_operand" ""))
(set (match_operand:GPI 2 "register_operand" "")
(match_operand:GPI 3 "memory_operand" ""))
(set (match_operand:GPI 4 "register_operand" "")
(match_operand:GPI 5 "memory_operand" ""))
(set (match_operand:GPI 6 "register_operand" "")
(match_operand:GPI 7 "memory_operand" ""))
(match_dup 8)]
"aarch64_operands_adjust_ok_for_ldpstp (operands, true, <MODE>mode)"
[(const_int 0)]
{
rtx base, offset_1, offset_2, tmp;
extract_base_offset_in_addr (operands[1], &base, &offset_1);
extract_base_offset_in_addr (operands[3], &base, &offset_2);
if (INTVAL (offset_1) > INTVAL (offset_2))
{
tmp = operands[0];
operands[0] = operands[6];
operands[6] = tmp;
tmp = operands[1];
operands[1] = operands[7];
operands[7] = tmp;
tmp = operands[2];
operands[2] = operands[4];
operands[4] = tmp;
tmp = operands[3];
operands[3] = operands[5];
operands[5] = tmp;
}
if (aarch64_gen_adjusted_ldpstp (operands, true, <MODE>mode, UNKNOWN))
DONE;
else
FAIL;
})
(define_peephole2
[(match_scratch:DI 8 "r")
(set (match_operand:GPF 0 "register_operand" "")
(match_operand:GPF 1 "memory_operand" ""))
(set (match_operand:GPF 2 "register_operand" "")
(match_operand:GPF 3 "memory_operand" ""))
(set (match_operand:GPF 4 "register_operand" "")
(match_operand:GPF 5 "memory_operand" ""))
(set (match_operand:GPF 6 "register_operand" "")
(match_operand:GPF 7 "memory_operand" ""))
(match_dup 8)]
"aarch64_operands_adjust_ok_for_ldpstp (operands, true, <MODE>mode)"
[(const_int 0)]
{
rtx base, offset_1, offset_2, tmp;
extract_base_offset_in_addr (operands[1], &base, &offset_1);
extract_base_offset_in_addr (operands[3], &base, &offset_2);
if (INTVAL (offset_1) > INTVAL (offset_2))
{
tmp = operands[0];
operands[0] = operands[6];
operands[6] = tmp;
tmp = operands[1];
operands[1] = operands[7];
operands[7] = tmp;
tmp = operands[2];
operands[2] = operands[4];
operands[4] = tmp;
tmp = operands[3];
operands[3] = operands[5];
operands[5] = tmp;
}
if (aarch64_gen_adjusted_ldpstp (operands, true, <MODE>mode, UNKNOWN))
DONE;
else
FAIL;
})
(define_peephole2
[(match_scratch:DI 8 "r")
(set (match_operand:DI 0 "register_operand" "")
(sign_extend:DI (match_operand:SI 1 "memory_operand" "")))
(set (match_operand:DI 2 "register_operand" "")
(sign_extend:DI (match_operand:SI 3 "memory_operand" "")))
(set (match_operand:DI 4 "register_operand" "")
(sign_extend:DI (match_operand:SI 5 "memory_operand" "")))
(set (match_operand:DI 6 "register_operand" "")
(sign_extend:DI (match_operand:SI 7 "memory_operand" "")))
(match_dup 8)]
"aarch64_operands_adjust_ok_for_ldpstp (operands, true, SImode)"
[(const_int 0)]
{
rtx base, offset_1, offset_2, tmp;
extract_base_offset_in_addr (operands[1], &base, &offset_1);
extract_base_offset_in_addr (operands[3], &base, &offset_2);
if (INTVAL (offset_1) > INTVAL (offset_2))
{
tmp = operands[0];
operands[0] = operands[6];
operands[6] = tmp;
tmp = operands[1];
operands[1] = operands[7];
operands[7] = tmp;
tmp = operands[2];
operands[2] = operands[4];
operands[4] = tmp;
tmp = operands[3];
operands[3] = operands[5];
operands[5] = tmp;
}
if (aarch64_gen_adjusted_ldpstp (operands, true, SImode, SIGN_EXTEND))
DONE;
else
FAIL;
})
(define_peephole2
[(match_scratch:DI 8 "r")
(set (match_operand:DI 0 "register_operand" "")
(zero_extend:DI (match_operand:SI 1 "memory_operand" "")))
(set (match_operand:DI 2 "register_operand" "")
(zero_extend:DI (match_operand:SI 3 "memory_operand" "")))
(set (match_operand:DI 4 "register_operand" "")
(zero_extend:DI (match_operand:SI 5 "memory_operand" "")))
(set (match_operand:DI 6 "register_operand" "")
(zero_extend:DI (match_operand:SI 7 "memory_operand" "")))
(match_dup 8)]
"aarch64_operands_adjust_ok_for_ldpstp (operands, true, SImode)"
[(const_int 0)]
{
rtx base, offset_1, offset_2, tmp;
extract_base_offset_in_addr (operands[1], &base, &offset_1);
extract_base_offset_in_addr (operands[3], &base, &offset_2);
if (INTVAL (offset_1) > INTVAL (offset_2))
{
tmp = operands[0];
operands[0] = operands[6];
operands[6] = tmp;
tmp = operands[1];
operands[1] = operands[7];
operands[7] = tmp;
tmp = operands[2];
operands[2] = operands[4];
operands[4] = tmp;
tmp = operands[3];
operands[3] = operands[5];
operands[5] = tmp;
}
if (aarch64_gen_adjusted_ldpstp (operands, true, SImode, ZERO_EXTEND))
DONE;
else
FAIL;
})
(define_peephole2
[(match_scratch:DI 8 "r")
(set (match_operand:GPI 0 "memory_operand" "")
(match_operand:GPI 1 "aarch64_reg_or_zero" ""))
(set (match_operand:GPI 2 "memory_operand" "")
(match_operand:GPI 3 "aarch64_reg_or_zero" ""))
(set (match_operand:GPI 4 "memory_operand" "")
(match_operand:GPI 5 "aarch64_reg_or_zero" ""))
(set (match_operand:GPI 6 "memory_operand" "")
(match_operand:GPI 7 "aarch64_reg_or_zero" ""))
(match_dup 8)]
"aarch64_operands_adjust_ok_for_ldpstp (operands, false, <MODE>mode)"
[(const_int 0)]
{
rtx base, offset_1, offset_2, tmp;
extract_base_offset_in_addr (operands[0], &base, &offset_1);
extract_base_offset_in_addr (operands[2], &base, &offset_2);
if (INTVAL (offset_1) > INTVAL (offset_2))
{
tmp = operands[0];
operands[0] = operands[6];
operands[6] = tmp;
tmp = operands[1];
operands[1] = operands[7];
operands[7] = tmp;
tmp = operands[2];
operands[2] = operands[4];
operands[4] = tmp;
tmp = operands[3];
operands[3] = operands[5];
operands[5] = tmp;
}
if (aarch64_gen_adjusted_ldpstp (operands, false, <MODE>mode, UNKNOWN))
DONE;
else
FAIL;
})
(define_peephole2
[(match_scratch:DI 8 "r")
(set (match_operand:GPF 0 "memory_operand" "")
(match_operand:GPF 1 "aarch64_reg_or_zero" ""))
(set (match_operand:GPF 2 "memory_operand" "")
(match_operand:GPF 3 "aarch64_reg_or_zero" ""))
(set (match_operand:GPF 4 "memory_operand" "")
(match_operand:GPF 5 "aarch64_reg_or_zero" ""))
(set (match_operand:GPF 6 "memory_operand" "")
(match_operand:GPF 7 "aarch64_reg_or_zero" ""))
(match_dup 8)]
"aarch64_operands_adjust_ok_for_ldpstp (operands, false, <MODE>mode)"
[(const_int 0)]
{
rtx base, offset_1, offset_2, tmp;
extract_base_offset_in_addr (operands[0], &base, &offset_1);
extract_base_offset_in_addr (operands[2], &base, &offset_2);
if (INTVAL (offset_1) > INTVAL (offset_2))
{
tmp = operands[0];
operands[0] = operands[6];
operands[6] = tmp;
tmp = operands[1];
operands[1] = operands[7];
operands[7] = tmp;
tmp = operands[2];
operands[2] = operands[4];
operands[4] = tmp;
tmp = operands[3];
operands[3] = operands[5];
operands[5] = tmp;
}
if (aarch64_gen_adjusted_ldpstp (operands, false, <MODE>mode, UNKNOWN))
DONE;
else
FAIL;
})
......@@ -293,6 +293,7 @@ void aarch64_expand_compare_and_swap (rtx op[]);
void aarch64_split_compare_and_swap (rtx op[]);
void aarch64_split_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx, rtx);
bool aarch64_gen_adjusted_ldpstp (rtx *, bool, enum machine_mode, RTX_CODE);
#endif /* RTX_CODE */
void aarch64_init_builtins (void);
......@@ -316,4 +317,8 @@ extern bool
aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel);
void aarch64_atomic_assign_expand_fenv (tree *, tree *, tree *);
int aarch64_ccmp_mode_to_code (enum machine_mode mode);
bool extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset);
bool aarch64_operands_ok_for_ldpstp (rtx *, bool, enum machine_mode);
bool aarch64_operands_adjust_ok_for_ldpstp (rtx *, bool, enum machine_mode);
#endif /* GCC_AARCH64_PROTOS_H */
......@@ -10382,6 +10382,484 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
return false;
}
/* If MEM is in the form of [base+offset], extract the two parts
of address and set to BASE and OFFSET, otherwise return false
after clearing BASE and OFFSET. */
bool
extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
{
rtx addr;
gcc_assert (MEM_P (mem));
addr = XEXP (mem, 0);
if (REG_P (addr))
{
*base = addr;
*offset = const0_rtx;
return true;
}
if (GET_CODE (addr) == PLUS
&& REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1)))
{
*base = XEXP (addr, 0);
*offset = XEXP (addr, 1);
return true;
}
*base = NULL_RTX;
*offset = NULL_RTX;
return false;
}
/* Types for scheduling fusion. */
enum sched_fusion_type
{
SCHED_FUSION_NONE = 0,
SCHED_FUSION_LD_SIGN_EXTEND,
SCHED_FUSION_LD_ZERO_EXTEND,
SCHED_FUSION_LD,
SCHED_FUSION_ST,
SCHED_FUSION_NUM
};
/* If INSN is a load or store of address in the form of [base+offset],
extract the two parts and set to BASE and OFFSET. Return scheduling
fusion type this INSN is. */
static enum sched_fusion_type
fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset)
{
rtx x, dest, src;
enum sched_fusion_type fusion = SCHED_FUSION_LD;
gcc_assert (INSN_P (insn));
x = PATTERN (insn);
if (GET_CODE (x) != SET)
return SCHED_FUSION_NONE;
src = SET_SRC (x);
dest = SET_DEST (x);
if (GET_MODE (src) != SImode && GET_MODE (src) != DImode
&& GET_MODE (src) != SFmode && GET_MODE (src) != DFmode)
return SCHED_FUSION_NONE;
if (GET_CODE (src) == SIGN_EXTEND)
{
fusion = SCHED_FUSION_LD_SIGN_EXTEND;
src = XEXP (src, 0);
if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
return SCHED_FUSION_NONE;
}
else if (GET_CODE (src) == ZERO_EXTEND)
{
fusion = SCHED_FUSION_LD_ZERO_EXTEND;
src = XEXP (src, 0);
if (GET_CODE (src) != MEM || GET_MODE (src) != SImode)
return SCHED_FUSION_NONE;
}
if (GET_CODE (src) == MEM && REG_P (dest))
extract_base_offset_in_addr (src, base, offset);
else if (GET_CODE (dest) == MEM && (REG_P (src) || src == const0_rtx))
{
fusion = SCHED_FUSION_ST;
extract_base_offset_in_addr (dest, base, offset);
}
else
return SCHED_FUSION_NONE;
if (*base == NULL_RTX || *offset == NULL_RTX)
fusion = SCHED_FUSION_NONE;
return fusion;
}
/* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
Currently we only support to fuse ldr or str instructions, so FUSION_PRI
and PRI are only calculated for these instructions. For other instruction,
FUSION_PRI and PRI are simply set to MAX_PRI - 1. In the future, other
type instruction fusion can be added by returning different priorities.
It's important that irrelevant instructions get the largest FUSION_PRI. */
static void
aarch64_sched_fusion_priority (rtx_insn *insn, int max_pri,
int *fusion_pri, int *pri)
{
int tmp, off_val;
rtx base, offset;
enum sched_fusion_type fusion;
gcc_assert (INSN_P (insn));
tmp = max_pri - 1;
fusion = fusion_load_store (insn, &base, &offset);
if (fusion == SCHED_FUSION_NONE)
{
*pri = tmp;
*fusion_pri = tmp;
return;
}
/* Set FUSION_PRI according to fusion type and base register. */
*fusion_pri = tmp - fusion * FIRST_PSEUDO_REGISTER - REGNO (base);
/* Calculate PRI. */
tmp /= 2;
/* INSN with smaller offset goes first. */
off_val = (int)(INTVAL (offset));
if (off_val >= 0)
tmp -= (off_val & 0xfffff);
else
tmp += ((- off_val) & 0xfffff);
*pri = tmp;
return;
}
/* Given OPERANDS of consecutive load/store, check if we can merge
them into ldp/stp. LOAD is true if they are load instructions.
MODE is the mode of memory operands. */
bool
aarch64_operands_ok_for_ldpstp (rtx *operands, bool load,
enum machine_mode mode)
{
HOST_WIDE_INT offval_1, offval_2, msize;
enum reg_class rclass_1, rclass_2;
rtx mem_1, mem_2, reg_1, reg_2, base_1, base_2, offset_1, offset_2;
if (load)
{
mem_1 = operands[1];
mem_2 = operands[3];
reg_1 = operands[0];
reg_2 = operands[2];
gcc_assert (REG_P (reg_1) && REG_P (reg_2));
if (REGNO (reg_1) == REGNO (reg_2))
return false;
}
else
{
mem_1 = operands[0];
mem_2 = operands[2];
reg_1 = operands[1];
reg_2 = operands[3];
}
/* Check if the addresses are in the form of [base+offset]. */
extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
return false;
extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
return false;
/* Check if the bases are same. */
if (!rtx_equal_p (base_1, base_2))
return false;
offval_1 = INTVAL (offset_1);
offval_2 = INTVAL (offset_2);
msize = GET_MODE_SIZE (mode);
/* Check if the offsets are consecutive. */
if (offval_1 != (offval_2 + msize) && offval_2 != (offval_1 + msize))
return false;
/* Check if the addresses are clobbered by load. */
if (load)
{
if (reg_mentioned_p (reg_1, mem_1))
return false;
/* In increasing order, the last load can clobber the address. */
if (offval_1 > offval_2 && reg_mentioned_p (reg_2, mem_2))
return false;
}
if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
rclass_1 = FP_REGS;
else
rclass_1 = GENERAL_REGS;
if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
rclass_2 = FP_REGS;
else
rclass_2 = GENERAL_REGS;
/* Check if the registers are of same class. */
if (rclass_1 != rclass_2)
return false;
return true;
}
/* Given OPERANDS of consecutive load/store, check if we can merge
them into ldp/stp by adjusting the offset. LOAD is true if they
are load instructions. MODE is the mode of memory operands.
Given below consecutive stores:
str w1, [xb, 0x100]
str w1, [xb, 0x104]
str w1, [xb, 0x108]
str w1, [xb, 0x10c]
Though the offsets are out of the range supported by stp, we can
still pair them after adjusting the offset, like:
add scratch, xb, 0x100
stp w1, w1, [scratch]
stp w1, w1, [scratch, 0x8]
The peephole patterns detecting this opportunity should guarantee
the scratch register is avaliable. */
bool
aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load,
enum machine_mode mode)
{
enum reg_class rclass_1, rclass_2, rclass_3, rclass_4;
HOST_WIDE_INT offval_1, offval_2, offval_3, offval_4, msize;
rtx mem_1, mem_2, mem_3, mem_4, reg_1, reg_2, reg_3, reg_4;
rtx base_1, base_2, base_3, base_4, offset_1, offset_2, offset_3, offset_4;
if (load)
{
reg_1 = operands[0];
mem_1 = operands[1];
reg_2 = operands[2];
mem_2 = operands[3];
reg_3 = operands[4];
mem_3 = operands[5];
reg_4 = operands[6];
mem_4 = operands[7];
gcc_assert (REG_P (reg_1) && REG_P (reg_2)
&& REG_P (reg_3) && REG_P (reg_4));
if (REGNO (reg_1) == REGNO (reg_2) || REGNO (reg_3) == REGNO (reg_4))
return false;
}
else
{
mem_1 = operands[0];
reg_1 = operands[1];
mem_2 = operands[2];
reg_2 = operands[3];
mem_3 = operands[4];
reg_3 = operands[5];
mem_4 = operands[6];
reg_4 = operands[7];
}
/* Skip if memory operand is by itslef valid for ldp/stp. */
if (!MEM_P (mem_1) || aarch64_mem_pair_operand (mem_1, mode))
return false;
/* Check if the addresses are in the form of [base+offset]. */
extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
return false;
extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
return false;
extract_base_offset_in_addr (mem_3, &base_3, &offset_3);
if (base_3 == NULL_RTX || offset_3 == NULL_RTX)
return false;
extract_base_offset_in_addr (mem_4, &base_4, &offset_4);
if (base_4 == NULL_RTX || offset_4 == NULL_RTX)
return false;
/* Check if the bases are same. */
if (!rtx_equal_p (base_1, base_2)
|| !rtx_equal_p (base_2, base_3)
|| !rtx_equal_p (base_3, base_4))
return false;
offval_1 = INTVAL (offset_1);
offval_2 = INTVAL (offset_2);
offval_3 = INTVAL (offset_3);
offval_4 = INTVAL (offset_4);
msize = GET_MODE_SIZE (mode);
/* Check if the offsets are consecutive. */
if ((offval_1 != (offval_2 + msize)
|| offval_1 != (offval_3 + msize * 2)
|| offval_1 != (offval_4 + msize * 3))
&& (offval_4 != (offval_3 + msize)
|| offval_4 != (offval_2 + msize * 2)
|| offval_4 != (offval_1 + msize * 3)))
return false;
/* Check if the addresses are clobbered by load. */
if (load)
{
if (reg_mentioned_p (reg_1, mem_1)
|| reg_mentioned_p (reg_2, mem_2)
|| reg_mentioned_p (reg_3, mem_3))
return false;
/* In increasing order, the last load can clobber the address. */
if (offval_1 > offval_2 && reg_mentioned_p (reg_4, mem_4))
return false;
}
if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
rclass_1 = FP_REGS;
else
rclass_1 = GENERAL_REGS;
if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
rclass_2 = FP_REGS;
else
rclass_2 = GENERAL_REGS;
if (REG_P (reg_3) && FP_REGNUM_P (REGNO (reg_3)))
rclass_3 = FP_REGS;
else
rclass_3 = GENERAL_REGS;
if (REG_P (reg_4) && FP_REGNUM_P (REGNO (reg_4)))
rclass_4 = FP_REGS;
else
rclass_4 = GENERAL_REGS;
/* Check if the registers are of same class. */
if (rclass_1 != rclass_2 || rclass_2 != rclass_3 || rclass_3 != rclass_4)
return false;
return true;
}
/* Given OPERANDS of consecutive load/store, this function pairs them
into ldp/stp after adjusting the offset. It depends on the fact
that addresses of load/store instructions are in increasing order.
MODE is the mode of memory operands. CODE is the rtl operator
which should be applied to all memory operands, it's SIGN_EXTEND,
ZERO_EXTEND or UNKNOWN. */
bool
aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
enum machine_mode mode, RTX_CODE code)
{
rtx base, offset, t1, t2;
rtx mem_1, mem_2, mem_3, mem_4;
HOST_WIDE_INT off_val, abs_off, adj_off, new_off, stp_off_limit, msize;
if (load)
{
mem_1 = operands[1];
mem_2 = operands[3];
mem_3 = operands[5];
mem_4 = operands[7];
}
else
{
mem_1 = operands[0];
mem_2 = operands[2];
mem_3 = operands[4];
mem_4 = operands[6];
gcc_assert (code == UNKNOWN);
}
extract_base_offset_in_addr (mem_1, &base, &offset);
gcc_assert (base != NULL_RTX && offset != NULL_RTX);
/* Adjust offset thus it can fit in ldp/stp instruction. */
msize = GET_MODE_SIZE (mode);
stp_off_limit = msize * 0x40;
off_val = INTVAL (offset);
abs_off = (off_val < 0) ? -off_val : off_val;
new_off = abs_off % stp_off_limit;
adj_off = abs_off - new_off;
/* Further adjust to make sure all offsets are OK. */
if ((new_off + msize * 2) >= stp_off_limit)
{
adj_off += stp_off_limit;
new_off -= stp_off_limit;
}
/* Make sure the adjustment can be done with ADD/SUB instructions. */
if (adj_off >= 0x1000)
return false;
if (off_val < 0)
{
adj_off = -adj_off;
new_off = -new_off;
}
/* Create new memory references. */
mem_1 = change_address (mem_1, VOIDmode,
plus_constant (DImode, operands[8], new_off));
/* Check if the adjusted address is OK for ldp/stp. */
if (!aarch64_mem_pair_operand (mem_1, mode))
return false;
msize = GET_MODE_SIZE (mode);
mem_2 = change_address (mem_2, VOIDmode,
plus_constant (DImode,
operands[8],
new_off + msize));
mem_3 = change_address (mem_3, VOIDmode,
plus_constant (DImode,
operands[8],
new_off + msize * 2));
mem_4 = change_address (mem_4, VOIDmode,
plus_constant (DImode,
operands[8],
new_off + msize * 3));
if (code == ZERO_EXTEND)
{
mem_1 = gen_rtx_ZERO_EXTEND (DImode, mem_1);
mem_2 = gen_rtx_ZERO_EXTEND (DImode, mem_2);
mem_3 = gen_rtx_ZERO_EXTEND (DImode, mem_3);
mem_4 = gen_rtx_ZERO_EXTEND (DImode, mem_4);
}
else if (code == SIGN_EXTEND)
{
mem_1 = gen_rtx_SIGN_EXTEND (DImode, mem_1);
mem_2 = gen_rtx_SIGN_EXTEND (DImode, mem_2);
mem_3 = gen_rtx_SIGN_EXTEND (DImode, mem_3);
mem_4 = gen_rtx_SIGN_EXTEND (DImode, mem_4);
}
if (load)
{
operands[1] = mem_1;
operands[3] = mem_2;
operands[5] = mem_3;
operands[7] = mem_4;
}
else
{
operands[0] = mem_1;
operands[2] = mem_2;
operands[4] = mem_3;
operands[6] = mem_4;
}
/* Emit adjusting instruction. */
emit_insn (gen_rtx_SET (VOIDmode, operands[8],
plus_constant (DImode, base, adj_off)));
/* Emit ldp/stp instructions. */
t1 = gen_rtx_SET (VOIDmode, operands[0], operands[1]);
t2 = gen_rtx_SET (VOIDmode, operands[2], operands[3]);
emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
t1 = gen_rtx_SET (VOIDmode, operands[4], operands[5]);
t2 = gen_rtx_SET (VOIDmode, operands[6], operands[7]);
emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, t1, t2)));
return true;
}
#undef TARGET_ADDRESS_COST
#define TARGET_ADDRESS_COST aarch64_address_cost
......@@ -10647,6 +11125,9 @@ aarch_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
#define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
#undef TARGET_SCHED_FUSION_PRIORITY
#define TARGET_SCHED_FUSION_PRIORITY aarch64_sched_fusion_priority
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-aarch64.h"
......@@ -1081,62 +1081,139 @@
;; Operands 1 and 3 are tied together by the final condition; so we allow
;; fairly lax checking on the second memory operation.
(define_insn "load_pair<mode>"
[(set (match_operand:GPI 0 "register_operand" "=r")
(match_operand:GPI 1 "aarch64_mem_pair_operand" "Ump"))
(set (match_operand:GPI 2 "register_operand" "=r")
(match_operand:GPI 3 "memory_operand" "m"))]
(define_insn "load_pairsi"
[(set (match_operand:SI 0 "register_operand" "=r,*w")
(match_operand:SI 1 "aarch64_mem_pair_operand" "Ump,Ump"))
(set (match_operand:SI 2 "register_operand" "=r,*w")
(match_operand:SI 3 "memory_operand" "m,m"))]
"rtx_equal_p (XEXP (operands[3], 0),
plus_constant (Pmode,
XEXP (operands[1], 0),
GET_MODE_SIZE (<MODE>mode)))"
"ldp\\t%<w>0, %<w>2, %1"
[(set_attr "type" "load2")]
GET_MODE_SIZE (SImode)))"
"@
ldp\\t%w0, %w2, %1
ldp\\t%s0, %s2, %1"
[(set_attr "type" "load2,neon_load1_2reg")
(set_attr "fp" "*,yes")]
)
(define_insn "load_pairdi"
[(set (match_operand:DI 0 "register_operand" "=r,*w")
(match_operand:DI 1 "aarch64_mem_pair_operand" "Ump,Ump"))
(set (match_operand:DI 2 "register_operand" "=r,*w")
(match_operand:DI 3 "memory_operand" "m,m"))]
"rtx_equal_p (XEXP (operands[3], 0),
plus_constant (Pmode,
XEXP (operands[1], 0),
GET_MODE_SIZE (DImode)))"
"@
ldp\\t%x0, %x2, %1
ldp\\t%d0, %d2, %1"
[(set_attr "type" "load2,neon_load1_2reg")
(set_attr "fp" "*,yes")]
)
;; Operands 0 and 2 are tied together by the final condition; so we allow
;; fairly lax checking on the second memory operation.
(define_insn "store_pair<mode>"
[(set (match_operand:GPI 0 "aarch64_mem_pair_operand" "=Ump")
(match_operand:GPI 1 "register_operand" "r"))
(set (match_operand:GPI 2 "memory_operand" "=m")
(match_operand:GPI 3 "register_operand" "r"))]
(define_insn "store_pairsi"
[(set (match_operand:SI 0 "aarch64_mem_pair_operand" "=Ump,Ump")
(match_operand:SI 1 "aarch64_reg_or_zero" "rZ,*w"))
(set (match_operand:SI 2 "memory_operand" "=m,m")
(match_operand:SI 3 "aarch64_reg_or_zero" "rZ,*w"))]
"rtx_equal_p (XEXP (operands[2], 0),
plus_constant (Pmode,
XEXP (operands[0], 0),
GET_MODE_SIZE (<MODE>mode)))"
"stp\\t%<w>1, %<w>3, %0"
[(set_attr "type" "store2")]
GET_MODE_SIZE (SImode)))"
"@
stp\\t%w1, %w3, %0
stp\\t%s1, %s3, %0"
[(set_attr "type" "store2,neon_store1_2reg")
(set_attr "fp" "*,yes")]
)
(define_insn "store_pairdi"
[(set (match_operand:DI 0 "aarch64_mem_pair_operand" "=Ump,Ump")
(match_operand:DI 1 "aarch64_reg_or_zero" "rZ,*w"))
(set (match_operand:DI 2 "memory_operand" "=m,m")
(match_operand:DI 3 "aarch64_reg_or_zero" "rZ,*w"))]
"rtx_equal_p (XEXP (operands[2], 0),
plus_constant (Pmode,
XEXP (operands[0], 0),
GET_MODE_SIZE (DImode)))"
"@
stp\\t%x1, %x3, %0
stp\\t%d1, %d3, %0"
[(set_attr "type" "store2,neon_store1_2reg")
(set_attr "fp" "*,yes")]
)
;; Operands 1 and 3 are tied together by the final condition; so we allow
;; fairly lax checking on the second memory operation.
(define_insn "load_pair<mode>"
[(set (match_operand:GPF 0 "register_operand" "=w")
(match_operand:GPF 1 "aarch64_mem_pair_operand" "Ump"))
(set (match_operand:GPF 2 "register_operand" "=w")
(match_operand:GPF 3 "memory_operand" "m"))]
(define_insn "load_pairsf"
[(set (match_operand:SF 0 "register_operand" "=w,*r")
(match_operand:SF 1 "aarch64_mem_pair_operand" "Ump,Ump"))
(set (match_operand:SF 2 "register_operand" "=w,*r")
(match_operand:SF 3 "memory_operand" "m,m"))]
"rtx_equal_p (XEXP (operands[3], 0),
plus_constant (Pmode,
XEXP (operands[1], 0),
GET_MODE_SIZE (<MODE>mode)))"
"ldp\\t%<w>0, %<w>2, %1"
[(set_attr "type" "neon_load1_2reg<q>")]
GET_MODE_SIZE (SFmode)))"
"@
ldp\\t%s0, %s2, %1
ldp\\t%w0, %w2, %1"
[(set_attr "type" "neon_load1_2reg,load2")
(set_attr "fp" "yes,*")]
)
(define_insn "load_pairdf"
[(set (match_operand:DF 0 "register_operand" "=w,*r")
(match_operand:DF 1 "aarch64_mem_pair_operand" "Ump,Ump"))
(set (match_operand:DF 2 "register_operand" "=w,*r")
(match_operand:DF 3 "memory_operand" "m,m"))]
"rtx_equal_p (XEXP (operands[3], 0),
plus_constant (Pmode,
XEXP (operands[1], 0),
GET_MODE_SIZE (DFmode)))"
"@
ldp\\t%d0, %d2, %1
ldp\\t%x0, %x2, %1"
[(set_attr "type" "neon_load1_2reg,load2")
(set_attr "fp" "yes,*")]
)
;; Operands 0 and 2 are tied together by the final condition; so we allow
;; fairly lax checking on the second memory operation.
(define_insn "store_pair<mode>"
[(set (match_operand:GPF 0 "aarch64_mem_pair_operand" "=Ump")
(match_operand:GPF 1 "register_operand" "w"))
(set (match_operand:GPF 2 "memory_operand" "=m")
(match_operand:GPF 3 "register_operand" "w"))]
(define_insn "store_pairsf"
[(set (match_operand:SF 0 "aarch64_mem_pair_operand" "=Ump,Ump")
(match_operand:SF 1 "register_operand" "w,*r"))
(set (match_operand:SF 2 "memory_operand" "=m,m")
(match_operand:SF 3 "register_operand" "w,*r"))]
"rtx_equal_p (XEXP (operands[2], 0),
plus_constant (Pmode,
XEXP (operands[0], 0),
GET_MODE_SIZE (<MODE>mode)))"
"stp\\t%<w>1, %<w>3, %0"
[(set_attr "type" "neon_store1_2reg<q>")]
GET_MODE_SIZE (SFmode)))"
"@
stp\\t%s1, %s3, %0
stp\\t%w1, %w3, %0"
[(set_attr "type" "neon_store1_2reg,store2")
(set_attr "fp" "yes,*")]
)
(define_insn "store_pairdf"
[(set (match_operand:DF 0 "aarch64_mem_pair_operand" "=Ump,Ump")
(match_operand:DF 1 "register_operand" "w,*r"))
(set (match_operand:DF 2 "memory_operand" "=m,m")
(match_operand:DF 3 "register_operand" "w,*r"))]
"rtx_equal_p (XEXP (operands[2], 0),
plus_constant (Pmode,
XEXP (operands[0], 0),
GET_MODE_SIZE (DFmode)))"
"@
stp\\t%d1, %d3, %0
stp\\t%x1, %x3, %0"
[(set_attr "type" "neon_store1_2reg,store2")
(set_attr "fp" "yes,*")]
)
;; Load pair with post-index writeback. This is primarily used in function
......@@ -1225,6 +1302,19 @@
[(set_attr "type" "extend,load1")]
)
(define_insn "*load_pair_extendsidi2_aarch64"
[(set (match_operand:DI 0 "register_operand" "=r")
(sign_extend:DI (match_operand:SI 1 "aarch64_mem_pair_operand" "Ump")))
(set (match_operand:DI 2 "register_operand" "=r")
(sign_extend:DI (match_operand:SI 3 "memory_operand" "m")))]
"rtx_equal_p (XEXP (operands[3], 0),
plus_constant (Pmode,
XEXP (operands[1], 0),
GET_MODE_SIZE (SImode)))"
"ldpsw\\t%0, %2, %1"
[(set_attr "type" "load2")]
)
(define_insn "*zero_extendsidi2_aarch64"
[(set (match_operand:DI 0 "register_operand" "=r,r")
(zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "r,m")))]
......@@ -1235,6 +1325,19 @@
[(set_attr "type" "extend,load1")]
)
(define_insn "*load_pair_zero_extendsidi2_aarch64"
[(set (match_operand:DI 0 "register_operand" "=r")
(zero_extend:DI (match_operand:SI 1 "aarch64_mem_pair_operand" "Ump")))
(set (match_operand:DI 2 "register_operand" "=r")
(zero_extend:DI (match_operand:SI 3 "memory_operand" "m")))]
"rtx_equal_p (XEXP (operands[3], 0),
plus_constant (Pmode,
XEXP (operands[1], 0),
GET_MODE_SIZE (SImode)))"
"ldp\\t%w0, %w2, %1"
[(set_attr "type" "load2")]
)
(define_expand "<ANY_EXTEND:optab><SHORT:mode><GPI:mode>2"
[(set (match_operand:GPI 0 "register_operand")
(ANY_EXTEND:GPI (match_operand:SHORT 1 "nonimmediate_operand")))]
......@@ -4238,3 +4341,6 @@
;; Atomic Operations
(include "atomics.md")
;; ldp/stp peephole patterns
(include "aarch64-ldpstp.md")
2014-12-05 Bin Cheng <bin.cheng@arm.com>
* gcc.target/aarch64/ldp_stp_1.c: New test.
* gcc.target/aarch64/ldp_stp_2.c: New test.
* gcc.target/aarch64/ldp_stp_3.c: New test.
* gcc.target/aarch64/ldp_stp_4.c: New test.
* gcc.target/aarch64/ldp_stp_5.c: New test.
* gcc.target/aarch64/lr_free_1.c: Disable scheduling fusion
and peephole2 pass.
2014-12-05 Sandra Loosemore <sandra@codesourcery.com>
* gcc.dg/vect/pr63341-1.c: Remove explicit "dg-do run".
......
/* { dg-options "-O2" } */
int arr[4][4];
void
foo ()
{
arr[0][1] = 1;
arr[1][0] = -1;
arr[2][0] = 1;
arr[1][1] = -1;
arr[0][2] = 1;
arr[0][3] = -1;
arr[1][2] = 1;
arr[2][1] = -1;
arr[3][0] = 1;
arr[3][1] = -1;
arr[2][2] = 1;
arr[1][3] = -1;
arr[2][3] = 1;
arr[3][2] = -1;
}
/* { dg-final { scan-assembler-times "stp\tw\[0-9\]+, w\[0-9\]" 7 } } */
/* { dg-options "-O2" } */
extern void abort (void);
int arr[4][4] = {{0, 1, 1, -1}, {-1, -1, 1, -1}, {1, -1, 1, 1}, {1, -1, -1, 0}};
long long
foo ()
{
long long ll = 0;
ll += arr[0][1];
ll += arr[1][0];
ll += arr[1][1];
ll += arr[2][0];
return ll;
}
/* { dg-final { scan-assembler-times "ldpsw\tx\[0-9\]+, x\[0-9\]" 1 } } */
/* { dg-options "-O2" } */
extern void abort (void);
unsigned int arr[4][4] = {{0, 1, 1, 2}, {2, 2, 1, 2}, {1, 2, 1, 1}, {1, 2, 2, 0}};
unsigned long long
foo ()
{
unsigned long long ll = 0;
ll += arr[0][1];
ll += arr[1][0];
ll += arr[1][1];
ll += arr[2][0];
return ll;
}
/* { dg-final { scan-assembler-times "ldp\tw\[0-9\]+, w\[0-9\]" 1 } } */
/* { dg-options "-O2" } */
float arr[4][4];
void
foo ()
{
arr[0][1] = 1;
arr[1][0] = -1;
arr[2][0] = 1;
arr[1][1] = -1;
arr[0][2] = 1;
arr[0][3] = -1;
arr[1][2] = 1;
arr[2][1] = -1;
arr[3][0] = 1;
arr[3][1] = -1;
arr[2][2] = 1;
arr[1][3] = -1;
arr[2][3] = 1;
arr[3][2] = -1;
}
/* { dg-final { scan-assembler-times "stp\ts\[0-9\]+, s\[0-9\]" 7 } } */
/* { dg-options "-O2" } */
double arr[4][4];
void
foo ()
{
arr[0][1] = 1;
arr[1][0] = -1;
arr[2][0] = 1;
arr[1][1] = -1;
arr[0][2] = 1;
arr[0][3] = -1;
arr[1][2] = 1;
arr[2][1] = -1;
arr[3][0] = 1;
arr[3][1] = -1;
arr[2][2] = 1;
arr[1][3] = -1;
arr[2][3] = 1;
arr[3][2] = -1;
}
/* { dg-final { scan-assembler-times "stp\td\[0-9\]+, d\[0-9\]" 7 } } */
/* { dg-do run } */
/* { dg-options "-fno-inline -O2 -fomit-frame-pointer -ffixed-x2 -ffixed-x3 -ffixed-x4 -ffixed-x5 -ffixed-x6 -ffixed-x7 -ffixed-x8 -ffixed-x9 -ffixed-x10 -ffixed-x11 -ffixed-x12 -ffixed-x13 -ffixed-x14 -ffixed-x15 -ffixed-x16 -ffixed-x17 -ffixed-x18 -ffixed-x19 -ffixed-x20 -ffixed-x21 -ffixed-x22 -ffixed-x23 -ffixed-x24 -ffixed-x25 -ffixed-x26 -ffixed-x27 -ffixed-28 -ffixed-29 --save-temps -mgeneral-regs-only -fno-ipa-cp" } */
/* { dg-options "-fno-inline -O2 -fomit-frame-pointer -ffixed-x2 -ffixed-x3 -ffixed-x4 -ffixed-x5 -ffixed-x6 -ffixed-x7 -ffixed-x8 -ffixed-x9 -ffixed-x10 -ffixed-x11 -ffixed-x12 -ffixed-x13 -ffixed-x14 -ffixed-x15 -ffixed-x16 -ffixed-x17 -ffixed-x18 -ffixed-x19 -ffixed-x20 -ffixed-x21 -ffixed-x22 -ffixed-x23 -ffixed-x24 -ffixed-x25 -ffixed-x26 -ffixed-x27 -ffixed-28 -ffixed-29 --save-temps -mgeneral-regs-only -fno-ipa-cp -fno-schedule-fusion -fno-peephole2" } */
extern void abort ();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment