Commit 350013bc by Bin Cheng Committed by Marcus Shawcroft

[AArch64]load store pair optimization using sched_fusion pass.

From-SVN: r218430
parent a66272f6
2014-12-05 Bin Cheng <bin.cheng@arm.com>
* config/aarch64/aarch64.md (load_pair<mode>): Split to
load_pairsi, load_pairdi, load_pairsf and load_pairdf.
(load_pairsi, load_pairdi, load_pairsf, load_pairdf): Split
from load_pair<mode>. New alternative to support int/fp
registers in fp/int mode patterns.
(store_pair<mode>:): Split to store_pairsi, store_pairdi,
store_pairsf and store_pairdi.
(store_pairsi, store_pairdi, store_pairsf, store_pairdf): Split
from store_pair<mode>. New alternative to support int/fp
registers in fp/int mode patterns.
(*load_pair_extendsidi2_aarch64): New pattern.
(*load_pair_zero_extendsidi2_aarch64): New pattern.
(aarch64-ldpstp.md): Include.
* config/aarch64/aarch64-ldpstp.md: New file.
* config/aarch64/aarch64-protos.h (aarch64_gen_adjusted_ldpstp):
New.
(extract_base_offset_in_addr): New.
(aarch64_operands_ok_for_ldpstp): New.
(aarch64_operands_adjust_ok_for_ldpstp): New.
* config/aarch64/aarch64.c (enum sched_fusion_type): New enum.
(TARGET_SCHED_FUSION_PRIORITY): New hook.
(fusion_load_store): New functon.
(extract_base_offset_in_addr): New function.
(aarch64_gen_adjusted_ldpstp): New function.
(aarch64_sched_fusion_priority): New function.
(aarch64_operands_ok_for_ldpstp): New function.
(aarch64_operands_adjust_ok_for_ldpstp): New function.
2014-12-05 Olivier Hainque <hainque@adacore.com> 2014-12-05 Olivier Hainque <hainque@adacore.com>
* defaults.h: (DWARF_REG_TO_UNWIND_COLUMN): Define default. * defaults.h: (DWARF_REG_TO_UNWIND_COLUMN): Define default.
...@@ -293,6 +293,7 @@ void aarch64_expand_compare_and_swap (rtx op[]); ...@@ -293,6 +293,7 @@ void aarch64_expand_compare_and_swap (rtx op[]);
void aarch64_split_compare_and_swap (rtx op[]); void aarch64_split_compare_and_swap (rtx op[]);
void aarch64_split_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx, rtx); void aarch64_split_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx, rtx);
bool aarch64_gen_adjusted_ldpstp (rtx *, bool, enum machine_mode, RTX_CODE);
#endif /* RTX_CODE */ #endif /* RTX_CODE */
void aarch64_init_builtins (void); void aarch64_init_builtins (void);
...@@ -316,4 +317,8 @@ extern bool ...@@ -316,4 +317,8 @@ extern bool
aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel); aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel);
void aarch64_atomic_assign_expand_fenv (tree *, tree *, tree *); void aarch64_atomic_assign_expand_fenv (tree *, tree *, tree *);
int aarch64_ccmp_mode_to_code (enum machine_mode mode); int aarch64_ccmp_mode_to_code (enum machine_mode mode);
bool extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset);
bool aarch64_operands_ok_for_ldpstp (rtx *, bool, enum machine_mode);
bool aarch64_operands_adjust_ok_for_ldpstp (rtx *, bool, enum machine_mode);
#endif /* GCC_AARCH64_PROTOS_H */ #endif /* GCC_AARCH64_PROTOS_H */
...@@ -1081,62 +1081,139 @@ ...@@ -1081,62 +1081,139 @@
;; Operands 1 and 3 are tied together by the final condition; so we allow ;; Operands 1 and 3 are tied together by the final condition; so we allow
;; fairly lax checking on the second memory operation. ;; fairly lax checking on the second memory operation.
(define_insn "load_pair<mode>" (define_insn "load_pairsi"
[(set (match_operand:GPI 0 "register_operand" "=r") [(set (match_operand:SI 0 "register_operand" "=r,*w")
(match_operand:GPI 1 "aarch64_mem_pair_operand" "Ump")) (match_operand:SI 1 "aarch64_mem_pair_operand" "Ump,Ump"))
(set (match_operand:GPI 2 "register_operand" "=r") (set (match_operand:SI 2 "register_operand" "=r,*w")
(match_operand:GPI 3 "memory_operand" "m"))] (match_operand:SI 3 "memory_operand" "m,m"))]
"rtx_equal_p (XEXP (operands[3], 0), "rtx_equal_p (XEXP (operands[3], 0),
plus_constant (Pmode, plus_constant (Pmode,
XEXP (operands[1], 0), XEXP (operands[1], 0),
GET_MODE_SIZE (<MODE>mode)))" GET_MODE_SIZE (SImode)))"
"ldp\\t%<w>0, %<w>2, %1" "@
[(set_attr "type" "load2")] ldp\\t%w0, %w2, %1
ldp\\t%s0, %s2, %1"
[(set_attr "type" "load2,neon_load1_2reg")
(set_attr "fp" "*,yes")]
) )
(define_insn "load_pairdi"
[(set (match_operand:DI 0 "register_operand" "=r,*w")
(match_operand:DI 1 "aarch64_mem_pair_operand" "Ump,Ump"))
(set (match_operand:DI 2 "register_operand" "=r,*w")
(match_operand:DI 3 "memory_operand" "m,m"))]
"rtx_equal_p (XEXP (operands[3], 0),
plus_constant (Pmode,
XEXP (operands[1], 0),
GET_MODE_SIZE (DImode)))"
"@
ldp\\t%x0, %x2, %1
ldp\\t%d0, %d2, %1"
[(set_attr "type" "load2,neon_load1_2reg")
(set_attr "fp" "*,yes")]
)
;; Operands 0 and 2 are tied together by the final condition; so we allow ;; Operands 0 and 2 are tied together by the final condition; so we allow
;; fairly lax checking on the second memory operation. ;; fairly lax checking on the second memory operation.
(define_insn "store_pair<mode>" (define_insn "store_pairsi"
[(set (match_operand:GPI 0 "aarch64_mem_pair_operand" "=Ump") [(set (match_operand:SI 0 "aarch64_mem_pair_operand" "=Ump,Ump")
(match_operand:GPI 1 "register_operand" "r")) (match_operand:SI 1 "aarch64_reg_or_zero" "rZ,*w"))
(set (match_operand:GPI 2 "memory_operand" "=m") (set (match_operand:SI 2 "memory_operand" "=m,m")
(match_operand:GPI 3 "register_operand" "r"))] (match_operand:SI 3 "aarch64_reg_or_zero" "rZ,*w"))]
"rtx_equal_p (XEXP (operands[2], 0), "rtx_equal_p (XEXP (operands[2], 0),
plus_constant (Pmode, plus_constant (Pmode,
XEXP (operands[0], 0), XEXP (operands[0], 0),
GET_MODE_SIZE (<MODE>mode)))" GET_MODE_SIZE (SImode)))"
"stp\\t%<w>1, %<w>3, %0" "@
[(set_attr "type" "store2")] stp\\t%w1, %w3, %0
stp\\t%s1, %s3, %0"
[(set_attr "type" "store2,neon_store1_2reg")
(set_attr "fp" "*,yes")]
)
(define_insn "store_pairdi"
[(set (match_operand:DI 0 "aarch64_mem_pair_operand" "=Ump,Ump")
(match_operand:DI 1 "aarch64_reg_or_zero" "rZ,*w"))
(set (match_operand:DI 2 "memory_operand" "=m,m")
(match_operand:DI 3 "aarch64_reg_or_zero" "rZ,*w"))]
"rtx_equal_p (XEXP (operands[2], 0),
plus_constant (Pmode,
XEXP (operands[0], 0),
GET_MODE_SIZE (DImode)))"
"@
stp\\t%x1, %x3, %0
stp\\t%d1, %d3, %0"
[(set_attr "type" "store2,neon_store1_2reg")
(set_attr "fp" "*,yes")]
) )
;; Operands 1 and 3 are tied together by the final condition; so we allow ;; Operands 1 and 3 are tied together by the final condition; so we allow
;; fairly lax checking on the second memory operation. ;; fairly lax checking on the second memory operation.
(define_insn "load_pair<mode>" (define_insn "load_pairsf"
[(set (match_operand:GPF 0 "register_operand" "=w") [(set (match_operand:SF 0 "register_operand" "=w,*r")
(match_operand:GPF 1 "aarch64_mem_pair_operand" "Ump")) (match_operand:SF 1 "aarch64_mem_pair_operand" "Ump,Ump"))
(set (match_operand:GPF 2 "register_operand" "=w") (set (match_operand:SF 2 "register_operand" "=w,*r")
(match_operand:GPF 3 "memory_operand" "m"))] (match_operand:SF 3 "memory_operand" "m,m"))]
"rtx_equal_p (XEXP (operands[3], 0), "rtx_equal_p (XEXP (operands[3], 0),
plus_constant (Pmode, plus_constant (Pmode,
XEXP (operands[1], 0), XEXP (operands[1], 0),
GET_MODE_SIZE (<MODE>mode)))" GET_MODE_SIZE (SFmode)))"
"ldp\\t%<w>0, %<w>2, %1" "@
[(set_attr "type" "neon_load1_2reg<q>")] ldp\\t%s0, %s2, %1
ldp\\t%w0, %w2, %1"
[(set_attr "type" "neon_load1_2reg,load2")
(set_attr "fp" "yes,*")]
)
(define_insn "load_pairdf"
[(set (match_operand:DF 0 "register_operand" "=w,*r")
(match_operand:DF 1 "aarch64_mem_pair_operand" "Ump,Ump"))
(set (match_operand:DF 2 "register_operand" "=w,*r")
(match_operand:DF 3 "memory_operand" "m,m"))]
"rtx_equal_p (XEXP (operands[3], 0),
plus_constant (Pmode,
XEXP (operands[1], 0),
GET_MODE_SIZE (DFmode)))"
"@
ldp\\t%d0, %d2, %1
ldp\\t%x0, %x2, %1"
[(set_attr "type" "neon_load1_2reg,load2")
(set_attr "fp" "yes,*")]
) )
;; Operands 0 and 2 are tied together by the final condition; so we allow ;; Operands 0 and 2 are tied together by the final condition; so we allow
;; fairly lax checking on the second memory operation. ;; fairly lax checking on the second memory operation.
(define_insn "store_pair<mode>" (define_insn "store_pairsf"
[(set (match_operand:GPF 0 "aarch64_mem_pair_operand" "=Ump") [(set (match_operand:SF 0 "aarch64_mem_pair_operand" "=Ump,Ump")
(match_operand:GPF 1 "register_operand" "w")) (match_operand:SF 1 "register_operand" "w,*r"))
(set (match_operand:GPF 2 "memory_operand" "=m") (set (match_operand:SF 2 "memory_operand" "=m,m")
(match_operand:GPF 3 "register_operand" "w"))] (match_operand:SF 3 "register_operand" "w,*r"))]
"rtx_equal_p (XEXP (operands[2], 0), "rtx_equal_p (XEXP (operands[2], 0),
plus_constant (Pmode, plus_constant (Pmode,
XEXP (operands[0], 0), XEXP (operands[0], 0),
GET_MODE_SIZE (<MODE>mode)))" GET_MODE_SIZE (SFmode)))"
"stp\\t%<w>1, %<w>3, %0" "@
[(set_attr "type" "neon_store1_2reg<q>")] stp\\t%s1, %s3, %0
stp\\t%w1, %w3, %0"
[(set_attr "type" "neon_store1_2reg,store2")
(set_attr "fp" "yes,*")]
)
(define_insn "store_pairdf"
[(set (match_operand:DF 0 "aarch64_mem_pair_operand" "=Ump,Ump")
(match_operand:DF 1 "register_operand" "w,*r"))
(set (match_operand:DF 2 "memory_operand" "=m,m")
(match_operand:DF 3 "register_operand" "w,*r"))]
"rtx_equal_p (XEXP (operands[2], 0),
plus_constant (Pmode,
XEXP (operands[0], 0),
GET_MODE_SIZE (DFmode)))"
"@
stp\\t%d1, %d3, %0
stp\\t%x1, %x3, %0"
[(set_attr "type" "neon_store1_2reg,store2")
(set_attr "fp" "yes,*")]
) )
;; Load pair with post-index writeback. This is primarily used in function ;; Load pair with post-index writeback. This is primarily used in function
...@@ -1225,6 +1302,19 @@ ...@@ -1225,6 +1302,19 @@
[(set_attr "type" "extend,load1")] [(set_attr "type" "extend,load1")]
) )
(define_insn "*load_pair_extendsidi2_aarch64"
[(set (match_operand:DI 0 "register_operand" "=r")
(sign_extend:DI (match_operand:SI 1 "aarch64_mem_pair_operand" "Ump")))
(set (match_operand:DI 2 "register_operand" "=r")
(sign_extend:DI (match_operand:SI 3 "memory_operand" "m")))]
"rtx_equal_p (XEXP (operands[3], 0),
plus_constant (Pmode,
XEXP (operands[1], 0),
GET_MODE_SIZE (SImode)))"
"ldpsw\\t%0, %2, %1"
[(set_attr "type" "load2")]
)
(define_insn "*zero_extendsidi2_aarch64" (define_insn "*zero_extendsidi2_aarch64"
[(set (match_operand:DI 0 "register_operand" "=r,r") [(set (match_operand:DI 0 "register_operand" "=r,r")
(zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "r,m")))] (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "r,m")))]
...@@ -1235,6 +1325,19 @@ ...@@ -1235,6 +1325,19 @@
[(set_attr "type" "extend,load1")] [(set_attr "type" "extend,load1")]
) )
(define_insn "*load_pair_zero_extendsidi2_aarch64"
[(set (match_operand:DI 0 "register_operand" "=r")
(zero_extend:DI (match_operand:SI 1 "aarch64_mem_pair_operand" "Ump")))
(set (match_operand:DI 2 "register_operand" "=r")
(zero_extend:DI (match_operand:SI 3 "memory_operand" "m")))]
"rtx_equal_p (XEXP (operands[3], 0),
plus_constant (Pmode,
XEXP (operands[1], 0),
GET_MODE_SIZE (SImode)))"
"ldp\\t%w0, %w2, %1"
[(set_attr "type" "load2")]
)
(define_expand "<ANY_EXTEND:optab><SHORT:mode><GPI:mode>2" (define_expand "<ANY_EXTEND:optab><SHORT:mode><GPI:mode>2"
[(set (match_operand:GPI 0 "register_operand") [(set (match_operand:GPI 0 "register_operand")
(ANY_EXTEND:GPI (match_operand:SHORT 1 "nonimmediate_operand")))] (ANY_EXTEND:GPI (match_operand:SHORT 1 "nonimmediate_operand")))]
...@@ -4238,3 +4341,6 @@ ...@@ -4238,3 +4341,6 @@
;; Atomic Operations ;; Atomic Operations
(include "atomics.md") (include "atomics.md")
;; ldp/stp peephole patterns
(include "aarch64-ldpstp.md")
2014-12-05 Bin Cheng <bin.cheng@arm.com>
* gcc.target/aarch64/ldp_stp_1.c: New test.
* gcc.target/aarch64/ldp_stp_2.c: New test.
* gcc.target/aarch64/ldp_stp_3.c: New test.
* gcc.target/aarch64/ldp_stp_4.c: New test.
* gcc.target/aarch64/ldp_stp_5.c: New test.
* gcc.target/aarch64/lr_free_1.c: Disable scheduling fusion
and peephole2 pass.
2014-12-05 Sandra Loosemore <sandra@codesourcery.com> 2014-12-05 Sandra Loosemore <sandra@codesourcery.com>
* gcc.dg/vect/pr63341-1.c: Remove explicit "dg-do run". * gcc.dg/vect/pr63341-1.c: Remove explicit "dg-do run".
......
/* { dg-options "-O2" } */
int arr[4][4];
void
foo ()
{
arr[0][1] = 1;
arr[1][0] = -1;
arr[2][0] = 1;
arr[1][1] = -1;
arr[0][2] = 1;
arr[0][3] = -1;
arr[1][2] = 1;
arr[2][1] = -1;
arr[3][0] = 1;
arr[3][1] = -1;
arr[2][2] = 1;
arr[1][3] = -1;
arr[2][3] = 1;
arr[3][2] = -1;
}
/* { dg-final { scan-assembler-times "stp\tw\[0-9\]+, w\[0-9\]" 7 } } */
/* { dg-options "-O2" } */
extern void abort (void);
int arr[4][4] = {{0, 1, 1, -1}, {-1, -1, 1, -1}, {1, -1, 1, 1}, {1, -1, -1, 0}};
long long
foo ()
{
long long ll = 0;
ll += arr[0][1];
ll += arr[1][0];
ll += arr[1][1];
ll += arr[2][0];
return ll;
}
/* { dg-final { scan-assembler-times "ldpsw\tx\[0-9\]+, x\[0-9\]" 1 } } */
/* { dg-options "-O2" } */
extern void abort (void);
unsigned int arr[4][4] = {{0, 1, 1, 2}, {2, 2, 1, 2}, {1, 2, 1, 1}, {1, 2, 2, 0}};
unsigned long long
foo ()
{
unsigned long long ll = 0;
ll += arr[0][1];
ll += arr[1][0];
ll += arr[1][1];
ll += arr[2][0];
return ll;
}
/* { dg-final { scan-assembler-times "ldp\tw\[0-9\]+, w\[0-9\]" 1 } } */
/* { dg-options "-O2" } */
float arr[4][4];
void
foo ()
{
arr[0][1] = 1;
arr[1][0] = -1;
arr[2][0] = 1;
arr[1][1] = -1;
arr[0][2] = 1;
arr[0][3] = -1;
arr[1][2] = 1;
arr[2][1] = -1;
arr[3][0] = 1;
arr[3][1] = -1;
arr[2][2] = 1;
arr[1][3] = -1;
arr[2][3] = 1;
arr[3][2] = -1;
}
/* { dg-final { scan-assembler-times "stp\ts\[0-9\]+, s\[0-9\]" 7 } } */
/* { dg-options "-O2" } */
double arr[4][4];
void
foo ()
{
arr[0][1] = 1;
arr[1][0] = -1;
arr[2][0] = 1;
arr[1][1] = -1;
arr[0][2] = 1;
arr[0][3] = -1;
arr[1][2] = 1;
arr[2][1] = -1;
arr[3][0] = 1;
arr[3][1] = -1;
arr[2][2] = 1;
arr[1][3] = -1;
arr[2][3] = 1;
arr[3][2] = -1;
}
/* { dg-final { scan-assembler-times "stp\td\[0-9\]+, d\[0-9\]" 7 } } */
/* { dg-do run } */ /* { dg-do run } */
/* { dg-options "-fno-inline -O2 -fomit-frame-pointer -ffixed-x2 -ffixed-x3 -ffixed-x4 -ffixed-x5 -ffixed-x6 -ffixed-x7 -ffixed-x8 -ffixed-x9 -ffixed-x10 -ffixed-x11 -ffixed-x12 -ffixed-x13 -ffixed-x14 -ffixed-x15 -ffixed-x16 -ffixed-x17 -ffixed-x18 -ffixed-x19 -ffixed-x20 -ffixed-x21 -ffixed-x22 -ffixed-x23 -ffixed-x24 -ffixed-x25 -ffixed-x26 -ffixed-x27 -ffixed-28 -ffixed-29 --save-temps -mgeneral-regs-only -fno-ipa-cp" } */ /* { dg-options "-fno-inline -O2 -fomit-frame-pointer -ffixed-x2 -ffixed-x3 -ffixed-x4 -ffixed-x5 -ffixed-x6 -ffixed-x7 -ffixed-x8 -ffixed-x9 -ffixed-x10 -ffixed-x11 -ffixed-x12 -ffixed-x13 -ffixed-x14 -ffixed-x15 -ffixed-x16 -ffixed-x17 -ffixed-x18 -ffixed-x19 -ffixed-x20 -ffixed-x21 -ffixed-x22 -ffixed-x23 -ffixed-x24 -ffixed-x25 -ffixed-x26 -ffixed-x27 -ffixed-28 -ffixed-29 --save-temps -mgeneral-regs-only -fno-ipa-cp -fno-schedule-fusion -fno-peephole2" } */
extern void abort (); extern void abort ();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment