Commit 2d8c6dc1 by Alan Hayward Committed by Richard Sandiford

aarch64-protos.h (aarch64_simd_disambiguate_copy): Declare.

gcc/
	* config/aarch64/aarch64-protos.h (aarch64_simd_disambiguate_copy):
	Declare.
	* config/aarch64/aarch64.c (aarch64_classify_address): Allow extra
	addressing modes for BE.
	(aarch64_print_operand): Add 'R' specifier.
	(aarch64_simd_disambiguate_copy): Delete.
	(aarch64_simd_emit_reg_reg_move): New function.
	* config/aarch64/aarch64-simd.md: Use aarch64_simd_emit_reg_reg_move
	in define_splits for structural moves.
	(mov<mode>): Use less restrictive predicates.
	(*aarch64_mov<mode>): Simplify and only allow for LE.
	(*aarch64_be_movoi, *aarch64_be_movci, *aarch64_be_movxi): New.

From-SVN: r219958
parent 9ab41c76
2015-01-21 Richard Sandiford <richard.sandiford@arm.com>
* config/aarch64/aarch64-protos.h (aarch64_simd_disambiguate_copy):
Declare.
* config/aarch64/aarch64.c (aarch64_classify_address): Allow extra
addressing modes for BE.
(aarch64_print_operand): Add 'R' specifier.
(aarch64_simd_disambiguate_copy): Delete.
(aarch64_simd_emit_reg_reg_move): New function.
* config/aarch64/aarch64-simd.md: Use aarch64_simd_emit_reg_reg_move
in define_splits for structural moves.
(mov<mode>): Use less restrictive predicates.
(*aarch64_mov<mode>): Simplify and only allow for LE.
(*aarch64_be_movoi, *aarch64_be_movci, *aarch64_be_movxi): New.
2015-01-21 Alan Hayward <alan.hayward@arm.com> 2015-01-21 Alan Hayward <alan.hayward@arm.com>
* rtlanal.c (subreg_get_info): Exit early for simple and common * rtlanal.c (subreg_get_info): Exit early for simple and common
......
...@@ -263,7 +263,7 @@ void aarch64_emit_call_insn (rtx); ...@@ -263,7 +263,7 @@ void aarch64_emit_call_insn (rtx);
/* Initialize builtins for SIMD intrinsics. */ /* Initialize builtins for SIMD intrinsics. */
void init_aarch64_simd_builtins (void); void init_aarch64_simd_builtins (void);
void aarch64_simd_disambiguate_copy (rtx *, rtx *, rtx *, unsigned int); void aarch64_simd_emit_reg_reg_move (rtx *, enum machine_mode, unsigned int);
/* Emit code to place a AdvSIMD pair result in memory locations (with equal /* Emit code to place a AdvSIMD pair result in memory locations (with equal
registers). */ registers). */
......
...@@ -158,19 +158,10 @@ ...@@ -158,19 +158,10 @@
"TARGET_SIMD && reload_completed "TARGET_SIMD && reload_completed
&& GP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[0]))
&& GP_REGNUM_P (REGNO (operands[1]))" && GP_REGNUM_P (REGNO (operands[1]))"
[(set (match_dup 0) (match_dup 1)) [(const_int 0)]
(set (match_dup 2) (match_dup 3))]
{ {
int rdest = REGNO (operands[0]); aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
int rsrc = REGNO (operands[1]); DONE;
rtx dest[2], src[2];
dest[0] = gen_rtx_REG (DImode, rdest);
src[0] = gen_rtx_REG (DImode, rsrc);
dest[1] = gen_rtx_REG (DImode, rdest + 1);
src[1] = gen_rtx_REG (DImode, rsrc + 1);
aarch64_simd_disambiguate_copy (operands, dest, src, 2);
}) })
(define_split (define_split
...@@ -4051,8 +4042,8 @@ ...@@ -4051,8 +4042,8 @@
;; Reload patterns for AdvSIMD register list operands. ;; Reload patterns for AdvSIMD register list operands.
(define_expand "mov<mode>" (define_expand "mov<mode>"
[(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "") [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
(match_operand:VSTRUCT 1 "aarch64_simd_general_operand" ""))] (match_operand:VSTRUCT 1 "general_operand" ""))]
"TARGET_SIMD" "TARGET_SIMD"
{ {
if (can_create_pseudo_p ()) if (can_create_pseudo_p ())
...@@ -4065,20 +4056,14 @@ ...@@ -4065,20 +4056,14 @@
(define_insn "*aarch64_mov<mode>" (define_insn "*aarch64_mov<mode>"
[(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w") [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
(match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))] (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
"TARGET_SIMD "TARGET_SIMD && !BYTES_BIG_ENDIAN
&& (register_operand (operands[0], <MODE>mode) && (register_operand (operands[0], <MODE>mode)
|| register_operand (operands[1], <MODE>mode))" || register_operand (operands[1], <MODE>mode))"
"@
{ #
switch (which_alternative) st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
{ ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
case 0: return "#"; [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
case 1: return "st1\\t{%S1.16b - %<Vendreg>1.16b}, %0";
case 2: return "ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1";
default: gcc_unreachable ();
}
}
[(set_attr "type" "neon_move,neon_store<nregs>_<nregs>reg_q,\
neon_load<nregs>_<nregs>reg_q") neon_load<nregs>_<nregs>reg_q")
(set (attr "length") (symbol_ref "aarch64_simd_attr_length_move (insn)"))] (set (attr "length") (symbol_ref "aarch64_simd_attr_length_move (insn)"))]
) )
...@@ -4101,70 +4086,102 @@ ...@@ -4101,70 +4086,102 @@
[(set_attr "type" "neon_store1_1reg<q>")] [(set_attr "type" "neon_store1_1reg<q>")]
) )
(define_insn "*aarch64_be_movoi"
[(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
(match_operand:OI 1 "general_operand" " w,w,m"))]
"TARGET_SIMD && BYTES_BIG_ENDIAN
&& (register_operand (operands[0], OImode)
|| register_operand (operands[1], OImode))"
"@
#
stp\\t%q1, %R1, %0
ldp\\t%q0, %R0, %1"
[(set_attr "type" "multiple,neon_store2_2reg_q,neon_load2_2reg_q")
(set (attr "length") (symbol_ref "aarch64_simd_attr_length_move (insn)"))]
)
(define_insn "*aarch64_be_movci"
[(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
(match_operand:CI 1 "general_operand" " w,w,o"))]
"TARGET_SIMD && BYTES_BIG_ENDIAN
&& (register_operand (operands[0], CImode)
|| register_operand (operands[1], CImode))"
"#"
[(set_attr "type" "multiple")
(set (attr "length") (symbol_ref "aarch64_simd_attr_length_move (insn)"))]
)
(define_insn "*aarch64_be_movxi"
[(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
(match_operand:XI 1 "general_operand" " w,w,o"))]
"TARGET_SIMD && BYTES_BIG_ENDIAN
&& (register_operand (operands[0], XImode)
|| register_operand (operands[1], XImode))"
"#"
[(set_attr "type" "multiple")
(set (attr "length") (symbol_ref "aarch64_simd_attr_length_move (insn)"))]
)
(define_split (define_split
[(set (match_operand:OI 0 "register_operand" "") [(set (match_operand:OI 0 "register_operand")
(match_operand:OI 1 "register_operand" ""))] (match_operand:OI 1 "register_operand"))]
"TARGET_SIMD && reload_completed" "TARGET_SIMD && reload_completed"
[(set (match_dup 0) (match_dup 1)) [(const_int 0)]
(set (match_dup 2) (match_dup 3))]
{ {
int rdest = REGNO (operands[0]); aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
int rsrc = REGNO (operands[1]); DONE;
rtx dest[2], src[2];
dest[0] = gen_rtx_REG (TFmode, rdest);
src[0] = gen_rtx_REG (TFmode, rsrc);
dest[1] = gen_rtx_REG (TFmode, rdest + 1);
src[1] = gen_rtx_REG (TFmode, rsrc + 1);
aarch64_simd_disambiguate_copy (operands, dest, src, 2);
}) })
(define_split (define_split
[(set (match_operand:CI 0 "register_operand" "") [(set (match_operand:CI 0 "nonimmediate_operand")
(match_operand:CI 1 "register_operand" ""))] (match_operand:CI 1 "general_operand"))]
"TARGET_SIMD && reload_completed" "TARGET_SIMD && reload_completed"
[(set (match_dup 0) (match_dup 1)) [(const_int 0)]
(set (match_dup 2) (match_dup 3))
(set (match_dup 4) (match_dup 5))]
{ {
int rdest = REGNO (operands[0]); if (register_operand (operands[0], CImode)
int rsrc = REGNO (operands[1]); && register_operand (operands[1], CImode))
rtx dest[3], src[3]; {
aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
dest[0] = gen_rtx_REG (TFmode, rdest); DONE;
src[0] = gen_rtx_REG (TFmode, rsrc); }
dest[1] = gen_rtx_REG (TFmode, rdest + 1); else if (BYTES_BIG_ENDIAN)
src[1] = gen_rtx_REG (TFmode, rsrc + 1); {
dest[2] = gen_rtx_REG (TFmode, rdest + 2); emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
src[2] = gen_rtx_REG (TFmode, rsrc + 2); simplify_gen_subreg (OImode, operands[1], CImode, 0));
emit_move_insn (gen_lowpart (V16QImode,
aarch64_simd_disambiguate_copy (operands, dest, src, 3); simplify_gen_subreg (TImode, operands[0],
CImode, 32)),
gen_lowpart (V16QImode,
simplify_gen_subreg (TImode, operands[1],
CImode, 32)));
DONE;
}
else
FAIL;
}) })
(define_split (define_split
[(set (match_operand:XI 0 "register_operand" "") [(set (match_operand:XI 0 "nonimmediate_operand")
(match_operand:XI 1 "register_operand" ""))] (match_operand:XI 1 "general_operand"))]
"TARGET_SIMD && reload_completed" "TARGET_SIMD && reload_completed"
[(set (match_dup 0) (match_dup 1)) [(const_int 0)]
(set (match_dup 2) (match_dup 3))
(set (match_dup 4) (match_dup 5))
(set (match_dup 6) (match_dup 7))]
{ {
int rdest = REGNO (operands[0]); if (register_operand (operands[0], XImode)
int rsrc = REGNO (operands[1]); && register_operand (operands[1], XImode))
rtx dest[4], src[4]; {
aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
dest[0] = gen_rtx_REG (TFmode, rdest); DONE;
src[0] = gen_rtx_REG (TFmode, rsrc); }
dest[1] = gen_rtx_REG (TFmode, rdest + 1); else if (BYTES_BIG_ENDIAN)
src[1] = gen_rtx_REG (TFmode, rsrc + 1); {
dest[2] = gen_rtx_REG (TFmode, rdest + 2); emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
src[2] = gen_rtx_REG (TFmode, rsrc + 2); simplify_gen_subreg (OImode, operands[1], XImode, 0));
dest[3] = gen_rtx_REG (TFmode, rdest + 3); emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
src[3] = gen_rtx_REG (TFmode, rsrc + 3); simplify_gen_subreg (OImode, operands[1], XImode, 32));
DONE;
aarch64_simd_disambiguate_copy (operands, dest, src, 4); }
else
FAIL;
}) })
(define_expand "aarch64_ld2r<mode>" (define_expand "aarch64_ld2r<mode>"
......
...@@ -3424,12 +3424,20 @@ aarch64_classify_address (struct aarch64_address_info *info, ...@@ -3424,12 +3424,20 @@ aarch64_classify_address (struct aarch64_address_info *info,
{ {
enum rtx_code code = GET_CODE (x); enum rtx_code code = GET_CODE (x);
rtx op0, op1; rtx op0, op1;
/* On BE, we use load/store pair for all large int mode load/stores. */
bool load_store_pair_p = (outer_code == PARALLEL
|| (BYTES_BIG_ENDIAN
&& aarch64_vect_struct_mode_p (mode)));
bool allow_reg_index_p = bool allow_reg_index_p =
outer_code != PARALLEL && (GET_MODE_SIZE (mode) != 16 !load_store_pair_p
|| aarch64_vector_mode_supported_p (mode)); && (GET_MODE_SIZE (mode) != 16 || aarch64_vector_mode_supported_p (mode))
/* Don't support anything other than POST_INC or REG addressing for && !aarch64_vect_struct_mode_p (mode);
AdvSIMD. */
if (aarch64_vect_struct_mode_p (mode) /* On LE, for AdvSIMD, don't support anything other than POST_INC or
REG addressing. */
if (aarch64_vect_struct_mode_p (mode) && !BYTES_BIG_ENDIAN
&& (code != POST_INC && code != REG)) && (code != POST_INC && code != REG))
return false; return false;
...@@ -3481,7 +3489,29 @@ aarch64_classify_address (struct aarch64_address_info *info, ...@@ -3481,7 +3489,29 @@ aarch64_classify_address (struct aarch64_address_info *info,
return (aarch64_offset_7bit_signed_scaled_p (mode, offset) return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
&& offset_9bit_signed_unscaled_p (mode, offset)); && offset_9bit_signed_unscaled_p (mode, offset));
if (outer_code == PARALLEL) /* A 7bit offset check because OImode will emit a ldp/stp
instruction (only big endian will get here).
For ldp/stp instructions, the offset is scaled for the size of a
single element of the pair. */
if (mode == OImode)
return aarch64_offset_7bit_signed_scaled_p (TImode, offset);
/* Three 9/12 bit offsets checks because CImode will emit three
ldr/str instructions (only big endian will get here). */
if (mode == CImode)
return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
&& (offset_9bit_signed_unscaled_p (V16QImode, offset + 32)
|| offset_12bit_unsigned_scaled_p (V16QImode,
offset + 32)));
/* Two 7bit offsets checks because XImode will emit two ldp/stp
instructions (only big endian will get here). */
if (mode == XImode)
return (aarch64_offset_7bit_signed_scaled_p (TImode, offset)
&& aarch64_offset_7bit_signed_scaled_p (TImode,
offset + 32));
if (load_store_pair_p)
return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8) return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
&& aarch64_offset_7bit_signed_scaled_p (mode, offset)); && aarch64_offset_7bit_signed_scaled_p (mode, offset));
else else
...@@ -3541,7 +3571,7 @@ aarch64_classify_address (struct aarch64_address_info *info, ...@@ -3541,7 +3571,7 @@ aarch64_classify_address (struct aarch64_address_info *info,
return (aarch64_offset_7bit_signed_scaled_p (mode, offset) return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
&& offset_9bit_signed_unscaled_p (mode, offset)); && offset_9bit_signed_unscaled_p (mode, offset));
if (outer_code == PARALLEL) if (load_store_pair_p)
return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8) return ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
&& aarch64_offset_7bit_signed_scaled_p (mode, offset)); && aarch64_offset_7bit_signed_scaled_p (mode, offset));
else else
...@@ -3555,7 +3585,8 @@ aarch64_classify_address (struct aarch64_address_info *info, ...@@ -3555,7 +3585,8 @@ aarch64_classify_address (struct aarch64_address_info *info,
/* load literal: pc-relative constant pool entry. Only supported /* load literal: pc-relative constant pool entry. Only supported
for SI mode or larger. */ for SI mode or larger. */
info->type = ADDRESS_SYMBOLIC; info->type = ADDRESS_SYMBOLIC;
if (outer_code != PARALLEL && GET_MODE_SIZE (mode) >= 4)
if (!load_store_pair_p && GET_MODE_SIZE (mode) >= 4)
{ {
rtx sym, addend; rtx sym, addend;
...@@ -4208,6 +4239,16 @@ aarch64_print_operand (FILE *f, rtx x, char code) ...@@ -4208,6 +4239,16 @@ aarch64_print_operand (FILE *f, rtx x, char code)
asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S')); asm_fprintf (f, "v%d", REGNO (x) - V0_REGNUM + (code - 'S'));
break; break;
case 'R':
/* Print a scalar FP/SIMD register name + 1. */
if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
{
output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
return;
}
asm_fprintf (f, "q%d", REGNO (x) - V0_REGNUM + 1);
break;
case 'X': case 'X':
/* Print bottom 16 bits of integer constant in hex. */ /* Print bottom 16 bits of integer constant in hex. */
if (!CONST_INT_P (x)) if (!CONST_INT_P (x))
...@@ -8595,35 +8636,28 @@ aarch64_simd_mem_operand_p (rtx op) ...@@ -8595,35 +8636,28 @@ aarch64_simd_mem_operand_p (rtx op)
|| REG_P (XEXP (op, 0))); || REG_P (XEXP (op, 0)));
} }
/* Set up OPERANDS for a register copy from SRC to DEST, taking care /* Emit a register copy from operand to operand, taking care not to
not to early-clobber SRC registers in the process. early-clobber source registers in the process.
We assume that the operands described by SRC and DEST represent a COUNT is the number of components into which the copy needs to be
decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the decomposed. */
number of components into which the copy has been decomposed. */
void void
aarch64_simd_disambiguate_copy (rtx *operands, rtx *dest, aarch64_simd_emit_reg_reg_move (rtx *operands, enum machine_mode mode,
rtx *src, unsigned int count) unsigned int count)
{ {
unsigned int i; unsigned int i;
int rdest = REGNO (operands[0]);
int rsrc = REGNO (operands[1]);
if (!reg_overlap_mentioned_p (operands[0], operands[1]) if (!reg_overlap_mentioned_p (operands[0], operands[1])
|| REGNO (operands[0]) < REGNO (operands[1])) || rdest < rsrc)
{
for (i = 0; i < count; i++) for (i = 0; i < count; i++)
{ emit_move_insn (gen_rtx_REG (mode, rdest + i),
operands[2 * i] = dest[i]; gen_rtx_REG (mode, rsrc + i));
operands[2 * i + 1] = src[i];
}
}
else else
{
for (i = 0; i < count; i++) for (i = 0; i < count; i++)
{ emit_move_insn (gen_rtx_REG (mode, rdest + count - i - 1),
operands[2 * i] = dest[count - i - 1]; gen_rtx_REG (mode, rsrc + count - i - 1));
operands[2 * i + 1] = src[count - i - 1];
}
}
} }
/* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is /* Compute and return the length of aarch64_simd_mov<mode>, where <mode> is
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment