Commit 7692ce17 by Kyrylo Tkachov Committed by Kyrylo Tkachov

[AArch64] Add STP pattern to store a vec_concat of two 64-bit registers

On top of the previous vec_merge simplifications [1] we can add this pattern to perform
a store of a vec_concat of two 64-bit values in distinct registers as an STP.
This avoids constructing such a vector explicitly in a register and storing it as
a Q register.
This way for the code in the testcase we can generate:

construct_lane_1:
        ldp     d1, d0, [x0]
        fmov    d3, 1.0e+0
        fmov    d2, 2.0e+0
        fadd    d4, d1, d3
        fadd    d5, d0, d2
        stp     d4, d5, [x1, 32]
        ret

construct_lane_2:
        ldp     x2, x0, [x0]
        add     x3, x2, 1
        add     x4, x0, 2
        stp     x3, x4, [x1, 32]
        ret

instead of the current:
construct_lane_1:
        ldp     d0, d1, [x0]
        fmov    d3, 1.0e+0
        fmov    d2, 2.0e+0
        fadd    d0, d0, d3
        fadd    d1, d1, d2
        dup     v0.2d, v0.d[0]
        ins     v0.d[1], v1.d[0]
        str     q0, [x1, 32]
        ret

construct_lane_2:
        ldp     x2, x3, [x0]
        add     x0, x2, 1
        add     x2, x3, 2
        dup     v0.2d, x0
        ins     v0.d[1], x2
        str     q0, [x1, 32]
        ret

Bootstrapped and tested on aarch64-none-linux-gnu.

[1] https://gcc.gnu.org/ml/gcc-patches/2017-06/msg00272.html
    https://gcc.gnu.org/ml/gcc-patches/2017-06/msg00273.html
    https://gcc.gnu.org/ml/gcc-patches/2017-06/msg00274.html

	* config/aarch64/aarch64-simd.md (store_pair_lanes<mode>):
	New pattern.
	* config/aarch64/constraints.md (Uml): New constraint.
	* config/aarch64/predicates.md (aarch64_mem_pair_lanes_operand): New
	predicate.

	* gcc.target/aarch64/store_v2vec_lanes.c: New test.

From-SVN: r254551
parent 040939a2
2017-11-08 Kyrylo Tkachov <kyrylo.tkachov@arm.com> 2017-11-08 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* config/aarch64/aarch64-simd.md (store_pair_lanes<mode>):
New pattern.
* config/aarch64/constraints.md (Uml): New constraint.
* config/aarch64/predicates.md (aarch64_mem_pair_lanes_operand): New
predicate.
2017-11-08 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* simplify-rtx.c (simplify_ternary_operation): Simplify vec_merge * simplify-rtx.c (simplify_ternary_operation): Simplify vec_merge
of two vec_duplicates into a vec_concat. of two vec_duplicates into a vec_concat.
...@@ -2949,6 +2949,18 @@ ...@@ -2949,6 +2949,18 @@
[(set_attr "type" "neon_load1_1reg_q")] [(set_attr "type" "neon_load1_1reg_q")]
) )
(define_insn "store_pair_lanes<mode>"
[(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Uml, Uml")
(vec_concat:<VDBL>
(match_operand:VDC 1 "register_operand" "w, r")
(match_operand:VDC 2 "register_operand" "w, r")))]
"TARGET_SIMD"
"@
stp\\t%d1, %d2, %0
stp\\t%x1, %x2, %0"
[(set_attr "type" "neon_stp, store_16")]
)
;; In this insn, operand 1 should be low, and operand 2 the high part of the ;; In this insn, operand 1 should be low, and operand 2 the high part of the
;; dest vector. ;; dest vector.
......
...@@ -171,6 +171,15 @@ ...@@ -171,6 +171,15 @@
(match_test "aarch64_legitimate_address_p (GET_MODE (op), XEXP (op, 0), (match_test "aarch64_legitimate_address_p (GET_MODE (op), XEXP (op, 0),
PARALLEL, 1)"))) PARALLEL, 1)")))
;; Used for storing two 64-bit values in an AdvSIMD register using an STP
;; as a 128-bit vec_concat.
(define_memory_constraint "Uml"
"@internal
A memory address suitable for a load/store pair operation."
(and (match_code "mem")
(match_test "aarch64_legitimate_address_p (DFmode, XEXP (op, 0),
PARALLEL, 1)")))
(define_memory_constraint "Utv" (define_memory_constraint "Utv"
"@internal "@internal
An address valid for loading/storing opaque structure An address valid for loading/storing opaque structure
......
...@@ -189,6 +189,13 @@ ...@@ -189,6 +189,13 @@
(match_test "aarch64_legitimate_address_p (mode, XEXP (op, 0), PARALLEL, (match_test "aarch64_legitimate_address_p (mode, XEXP (op, 0), PARALLEL,
0)"))) 0)")))
;; Used for storing two 64-bit values in an AdvSIMD register using an STP
;; as a 128-bit vec_concat.
(define_predicate "aarch64_mem_pair_lanes_operand"
(and (match_code "mem")
(match_test "aarch64_legitimate_address_p (DFmode, XEXP (op, 0),
PARALLEL, 1)")))
(define_predicate "aarch64_prefetch_operand" (define_predicate "aarch64_prefetch_operand"
(match_test "aarch64_address_valid_for_prefetch_p (op, false)")) (match_test "aarch64_address_valid_for_prefetch_p (op, false)"))
......
2017-11-08 Kyrylo Tkachov <kyrylo.tkachov@arm.com> 2017-11-08 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* gcc.target/aarch64/store_v2vec_lanes.c: New test.
2017-11-08 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
* gcc.target/aarch64/load_v2vec_lanes_1.c: New test. * gcc.target/aarch64/load_v2vec_lanes_1.c: New test.
2017-11-08 Kyrylo Tkachov <kyrylo.tkachov@arm.com> 2017-11-08 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
......
/* { dg-do compile } */
/* { dg-options "-O2" } */
typedef long long v2di __attribute__ ((vector_size (16)));
typedef double v2df __attribute__ ((vector_size (16)));
void
construct_lane_1 (double *y, v2df *z)
{
double y0 = y[0] + 1;
double y1 = y[1] + 2;
v2df x = {y0, y1};
z[2] = x;
}
void
construct_lane_2 (long long *y, v2di *z)
{
long long y0 = y[0] + 1;
long long y1 = y[1] + 2;
v2di x = {y0, y1};
z[2] = x;
}
/* We can use the load_pair_lanes<mode> pattern to vec_concat two DI/DF
values from consecutive memory into a 2-element vector by using
a Q-reg LDR. */
/* { dg-final { scan-assembler-times "stp\td\[0-9\]+, d\[0-9\]+" 1 } } */
/* { dg-final { scan-assembler-times "stp\tx\[0-9\]+, x\[0-9\]+" 1 } } */
/* { dg-final { scan-assembler-not "ins\t" } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment