Commit 3520f7cc by James Greenhalgh Committed by James Greenhalgh

[AARCH64] Add support for vector and scalar floating-point immediate loads.

gcc/

	* config/aarch64/aarch64-protos.h
	(aarch64_const_double_zero_rtx_p): Rename to...
	(aarch64_float_const_zero_rtx_p): ...this.
	(aarch64_float_const_representable_p): New.
	(aarch64_output_simd_mov_immediate): Likewise.
	* config/aarch64/aarch64-simd.md (*aarch64_simd_mov<mode>): Refactor
	move immediate case.
	* config/aarch64/aarch64.c
	(aarch64_const_double_zero_rtx_p): Rename to...
	(aarch64_float_const_zero_rtx_p): ...this.
	(aarch64_print_operand): Allow printing of new constants.
	(aarch64_valid_floating_const): New.
	(aarch64_legitimate_constant_p): Check for valid floating-point
	constants.
	(aarch64_simd_valid_immediate): Likewise.
	(aarch64_vect_float_const_representable_p): New.
	(aarch64_float_const_representable_p): Likewise.
	(aarch64_simd_imm_zero_p): Also allow for floating-point 0.0.
	(aarch64_output_simd_mov_immediate): New.
	* config/aarch64/aarch64.md (*movsf_aarch64): Add new alternative.
	(*movdf_aarch64): Likewise.
	* config/aarch64/constraints.md (Ufc): New.
	(Y): call aarch64_float_const_zero_rtx.
	* config/aarch64/predicates.md (aarch64_fp_compare_operand): New.

gcc/testsuite/

	* gcc.target/aarch64/fmovd.c: New.
	* gcc.target/aarch64/fmovf.c: Likewise.
	* gcc.target/aarch64/fmovd-zero.c: Likewise.
	* gcc.target/aarch64/fmovf-zero.c: Likewise.
	* gcc.target/aarch64/vect-fmovd.c: Likewise.
	* gcc.target/aarch64/vect-fmovf.c: Likewise.
	* gcc.target/aarch64/vect-fmovd-zero.c: Likewise.
	* gcc.target/aarch64/vect-fmovf-zero.c: Likewise.

From-SVN: r194972
parent e167c04d
2013-01-07 James Greenhalgh <james.greenhalgh@arm.com>
* config/aarch64/aarch64-protos.h
(aarch64_const_double_zero_rtx_p): Rename to...
(aarch64_float_const_zero_rtx_p): ...this.
(aarch64_float_const_representable_p): New.
(aarch64_output_simd_mov_immediate): Likewise.
* config/aarch64/aarch64-simd.md (*aarch64_simd_mov<mode>): Refactor
move immediate case.
* config/aarch64/aarch64.c
(aarch64_const_double_zero_rtx_p): Rename to...
(aarch64_float_const_zero_rtx_p): ...this.
(aarch64_print_operand): Allow printing of new constants.
(aarch64_valid_floating_const): New.
(aarch64_legitimate_constant_p): Check for valid floating-point
constants.
(aarch64_simd_valid_immediate): Likewise.
(aarch64_vect_float_const_representable_p): New.
(aarch64_float_const_representable_p): Likewise.
(aarch64_simd_imm_zero_p): Also allow for floating-point 0.0.
(aarch64_output_simd_mov_immediate): New.
* config/aarch64/aarch64.md (*movsf_aarch64): Add new alternative.
(*movdf_aarch64): Likewise.
* config/aarch64/constraints.md (Ufc): New.
(Y): call aarch64_float_const_zero_rtx.
* config/aarch64/predicates.md (aarch64_fp_compare_operand): New.
2013-01-07 Richard Biener <rguenther@suse.de> 2013-01-07 Richard Biener <rguenther@suse.de>
PR tree-optimization/55888 PR tree-optimization/55888
......
...@@ -136,8 +136,8 @@ struct tune_params ...@@ -136,8 +136,8 @@ struct tune_params
HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned); HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned);
bool aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode); bool aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode);
bool aarch64_const_double_zero_rtx_p (rtx);
bool aarch64_constant_address_p (rtx); bool aarch64_constant_address_p (rtx);
bool aarch64_float_const_zero_rtx_p (rtx);
bool aarch64_function_arg_regno_p (unsigned); bool aarch64_function_arg_regno_p (unsigned);
bool aarch64_gen_movmemqi (rtx *); bool aarch64_gen_movmemqi (rtx *);
bool aarch64_is_extend_from_extract (enum machine_mode, rtx, rtx); bool aarch64_is_extend_from_extract (enum machine_mode, rtx, rtx);
...@@ -215,6 +215,9 @@ void aarch64_split_128bit_move (rtx, rtx); ...@@ -215,6 +215,9 @@ void aarch64_split_128bit_move (rtx, rtx);
bool aarch64_split_128bit_move_p (rtx, rtx); bool aarch64_split_128bit_move_p (rtx, rtx);
/* Check for a legitimate floating point constant for FMOV. */
bool aarch64_float_const_representable_p (rtx);
#if defined (RTX_CODE) #if defined (RTX_CODE)
bool aarch64_legitimate_address_p (enum machine_mode, rtx, RTX_CODE, bool); bool aarch64_legitimate_address_p (enum machine_mode, rtx, RTX_CODE, bool);
...@@ -246,4 +249,5 @@ extern void aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel); ...@@ -246,4 +249,5 @@ extern void aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel);
extern bool extern bool
aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel); aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel);
char* aarch64_output_simd_mov_immediate (rtx *, enum machine_mode, unsigned);
#endif /* GCC_AARCH64_PROTOS_H */ #endif /* GCC_AARCH64_PROTOS_H */
...@@ -394,34 +394,8 @@ ...@@ -394,34 +394,8 @@
case 4: return "ins\t%0.d[0], %1"; case 4: return "ins\t%0.d[0], %1";
case 5: return "mov\t%0, %1"; case 5: return "mov\t%0, %1";
case 6: case 6:
{ return aarch64_output_simd_mov_immediate (&operands[1],
int is_valid; <MODE>mode, 64);
unsigned char widthc;
int width;
static char templ[40];
int shift = 0, mvn = 0;
const char *mnemonic;
int length = 0;
is_valid =
aarch64_simd_immediate_valid_for_move (operands[1], <MODE>mode,
&operands[1], &width, &widthc,
&mvn, &shift);
gcc_assert (is_valid != 0);
mnemonic = mvn ? "mvni" : "movi";
if (widthc != 'd')
length += snprintf (templ, sizeof (templ),
"%s\t%%0.%d%c, %%1",
mnemonic, 64 / width, widthc);
else
length += snprintf (templ, sizeof (templ), "%s\t%%d0, %%1", mnemonic);
if (shift != 0)
length += snprintf (templ + length, sizeof (templ) - length,
", lsl %d", shift);
return templ;
}
default: gcc_unreachable (); default: gcc_unreachable ();
} }
} }
...@@ -438,39 +412,19 @@ ...@@ -438,39 +412,19 @@
&& (register_operand (operands[0], <MODE>mode) && (register_operand (operands[0], <MODE>mode)
|| register_operand (operands[1], <MODE>mode))" || register_operand (operands[1], <MODE>mode))"
{ {
switch (which_alternative) switch (which_alternative)
{ {
case 0: return "ld1\t{%0.<Vtype>}, %1"; case 0: return "ld1\t{%0.<Vtype>}, %1";
case 1: return "st1\t{%1.<Vtype>}, %0"; case 1: return "st1\t{%1.<Vtype>}, %0";
case 2: return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>"; case 2: return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
case 3: return "umov\t%0, %1.d[0]\;umov\t%H0, %1.d[1]"; case 3: return "umov\t%0, %1.d[0]\;umov\t%H0, %1.d[1]";
case 4: return "ins\t%0.d[0], %1\;ins\t%0.d[1], %H1"; case 4: return "ins\t%0.d[0], %1\;ins\t%0.d[1], %H1";
case 5: return "#"; case 5: return "#";
case 6: case 6:
{ return aarch64_output_simd_mov_immediate (&operands[1],
int is_valid; <MODE>mode, 128);
unsigned char widthc; default: gcc_unreachable ();
int width; }
static char templ[40];
int shift = 0, mvn = 0;
is_valid =
aarch64_simd_immediate_valid_for_move (operands[1], <MODE>mode,
&operands[1], &width, &widthc,
&mvn, &shift);
gcc_assert (is_valid != 0);
if (shift)
snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1, lsl %d",
mvn ? "mvni" : "movi",
128 / width, widthc, shift);
else
snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1",
mvn ? "mvni" : "movi",
128 / width, widthc);
return templ;
}
default: gcc_unreachable ();
}
} }
[(set_attr "simd_type" "simd_load1,simd_store1,simd_move,simd_movgp,simd_insgp,simd_move,simd_move_imm") [(set_attr "simd_type" "simd_load1,simd_store1,simd_move,simd_movgp,simd_insgp,simd_move,simd_move_imm")
(set_attr "simd_mode" "<MODE>") (set_attr "simd_mode" "<MODE>")
......
...@@ -915,38 +915,44 @@ ...@@ -915,38 +915,44 @@
) )
(define_insn "*movsf_aarch64" (define_insn "*movsf_aarch64"
[(set (match_operand:SF 0 "nonimmediate_operand" "= w,?r,w,w,m,r,m ,r") [(set (match_operand:SF 0 "nonimmediate_operand" "=w, ?r,w,w ,w,m,r,m ,r")
(match_operand:SF 1 "general_operand" "?rY, w,w,m,w,m,rY,r"))] (match_operand:SF 1 "general_operand" "?rY, w,w,Ufc,m,w,m,rY,r"))]
"TARGET_FLOAT && (register_operand (operands[0], SFmode) "TARGET_FLOAT && (register_operand (operands[0], SFmode)
|| register_operand (operands[1], SFmode))" || register_operand (operands[1], SFmode))"
"@ "@
fmov\\t%s0, %w1 fmov\\t%s0, %w1
fmov\\t%w0, %s1 fmov\\t%w0, %s1
fmov\\t%s0, %s1 fmov\\t%s0, %s1
fmov\\t%s0, %1
ldr\\t%s0, %1 ldr\\t%s0, %1
str\\t%s1, %0 str\\t%s1, %0
ldr\\t%w0, %1 ldr\\t%w0, %1
str\\t%w1, %0 str\\t%w1, %0
mov\\t%w0, %w1" mov\\t%w0, %w1"
[(set_attr "v8type" "fmovi2f,fmovf2i,fmov,fpsimd_load,fpsimd_store,fpsimd_load,fpsimd_store,fmov") [(set_attr "v8type" "fmovi2f,fmovf2i,\
fmov,fconst,fpsimd_load,\
fpsimd_store,fpsimd_load,fpsimd_store,fmov")
(set_attr "mode" "SF")] (set_attr "mode" "SF")]
) )
(define_insn "*movdf_aarch64" (define_insn "*movdf_aarch64"
[(set (match_operand:DF 0 "nonimmediate_operand" "= w,?r,w,w,m,r,m ,r") [(set (match_operand:DF 0 "nonimmediate_operand" "=w, ?r,w,w ,w,m,r,m ,r")
(match_operand:DF 1 "general_operand" "?rY, w,w,m,w,m,rY,r"))] (match_operand:DF 1 "general_operand" "?rY, w,w,Ufc,m,w,m,rY,r"))]
"TARGET_FLOAT && (register_operand (operands[0], DFmode) "TARGET_FLOAT && (register_operand (operands[0], DFmode)
|| register_operand (operands[1], DFmode))" || register_operand (operands[1], DFmode))"
"@ "@
fmov\\t%d0, %x1 fmov\\t%d0, %x1
fmov\\t%x0, %d1 fmov\\t%x0, %d1
fmov\\t%d0, %d1 fmov\\t%d0, %d1
fmov\\t%d0, %1
ldr\\t%d0, %1 ldr\\t%d0, %1
str\\t%d1, %0 str\\t%d1, %0
ldr\\t%x0, %1 ldr\\t%x0, %1
str\\t%x1, %0 str\\t%x1, %0
mov\\t%x0, %x1" mov\\t%x0, %x1"
[(set_attr "v8type" "fmovi2f,fmovf2i,fmov,fpsimd_load,fpsimd_store,fpsimd_load,fpsimd_store,move") [(set_attr "v8type" "fmovi2f,fmovf2i,\
fmov,fconst,fpsimd_load,\
fpsimd_store,fpsimd_load,fpsimd_store,move")
(set_attr "mode" "DF")] (set_attr "mode" "DF")]
) )
...@@ -991,7 +997,6 @@ ...@@ -991,7 +997,6 @@
(set_attr "simd" "yes,*,*,*,yes,*,*,*,*,*")] (set_attr "simd" "yes,*,*,*,yes,*,*,*,*,*")]
) )
;; Operands 1 and 3 are tied together by the final condition; so we allow ;; Operands 1 and 3 are tied together by the final condition; so we allow
;; fairly lax checking on the second memory operation. ;; fairly lax checking on the second memory operation.
(define_insn "load_pair<mode>" (define_insn "load_pair<mode>"
......
...@@ -69,7 +69,7 @@ ...@@ -69,7 +69,7 @@
(define_constraint "Y" (define_constraint "Y"
"Floating point constant zero." "Floating point constant zero."
(and (match_code "const_double") (and (match_code "const_double")
(match_test "aarch64_const_double_zero_rtx_p (op)"))) (match_test "aarch64_float_const_zero_rtx_p (op)")))
(define_constraint "Z" (define_constraint "Z"
"Integer constant zero." "Integer constant zero."
...@@ -138,6 +138,12 @@ ...@@ -138,6 +138,12 @@
(and (match_code "mem") (and (match_code "mem")
(match_test "aarch64_simd_mem_operand_p (op)"))) (match_test "aarch64_simd_mem_operand_p (op)")))
(define_constraint "Ufc"
"A floating point constant which can be used with an\
FMOV immediate operation."
(and (match_code "const_double")
(match_test "aarch64_float_const_representable_p (op)")))
(define_constraint "Dn" (define_constraint "Dn"
"@internal "@internal
A constraint that matches vector of immediates." A constraint that matches vector of immediates."
......
...@@ -41,7 +41,7 @@ ...@@ -41,7 +41,7 @@
(define_predicate "aarch64_fp_compare_operand" (define_predicate "aarch64_fp_compare_operand"
(ior (match_operand 0 "register_operand") (ior (match_operand 0 "register_operand")
(and (match_code "const_double") (and (match_code "const_double")
(match_test "aarch64_const_double_zero_rtx_p (op)")))) (match_test "aarch64_float_const_zero_rtx_p (op)"))))
(define_predicate "aarch64_plus_immediate" (define_predicate "aarch64_plus_immediate"
(and (match_code "const_int") (and (match_code "const_int")
......
2013-01-07 James Greenhalgh <james.greenhalgh@arm.com>
* gcc.target/aarch64/fmovd.c: New.
* gcc.target/aarch64/fmovf.c: Likewise.
* gcc.target/aarch64/fmovd-zero.c: Likewise.
* gcc.target/aarch64/fmovf-zero.c: Likewise.
* gcc.target/aarch64/vect-fmovd.c: Likewise.
* gcc.target/aarch64/vect-fmovf.c: Likewise.
* gcc.target/aarch64/vect-fmovd-zero.c: Likewise.
* gcc.target/aarch64/vect-fmovf-zero.c: Likewise.
2013-01-07 Richard Biener <rguenther@suse.de> 2013-01-07 Richard Biener <rguenther@suse.de>
PR tree-optimization/55888 PR tree-optimization/55888
......
/* { dg-do compile } */
/* { dg-options "-O2" } */
void
foo (double *output)
{
*output = 0.0;
}
/* { dg-final { scan-assembler "fmov\\td\[0-9\]+, xzr" } } */
/* { dg-do compile } */
/* { dg-options "-O2" } */
void
foo (double *output)
{
*output = 4.25;
}
/* { dg-final { scan-assembler "fmov\\td\[0-9\]+, 4\\.25" } } */
/* { dg-do compile } */
/* { dg-options "-O2" } */
void
foo (float *output)
{
*output = 0.0;
}
/* { dg-final { scan-assembler "fmov\\ts\[0-9\]+, wzr" } } */
/* { dg-do compile } */
/* { dg-options "-O2" } */
void
foo (float *output)
{
*output = 4.25;
}
/* { dg-final { scan-assembler "fmov\\ts\[0-9\]+, 4\\.25" } } */
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all" } */
#define N 32
void
foo (double *output)
{
int i = 0;
/* Vectorizable. */
for (i = 0; i < N; i++)
output[i] = 0.0;
}
/* { dg-final { scan-assembler "movi\\tv\[0-9\]+\\.2d, 0" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all" } */
#define N 32
void
foo (double *output)
{
int i = 0;
/* Vectorizable. */
for (i = 0; i < N; i++)
output[i] = 4.25;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { scan-assembler "fmov\\tv\[0-9\]+\\.2d, 4\\.25" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all" } */
#define N 32
void
foo (float *output)
{
int i = 0;
/* Vectorizable. */
for (i = 0; i < N; i++)
output[i] = 0.0;
}
/* { dg-final { scan-assembler "movi\\tv\[0-9\]+\\.\[24\]s, 0" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all" } */
#define N 32
void
foo (float *output)
{
int i = 0;
/* Vectorizable. */
for (i = 0; i < N; i++)
output[i] = 4.25;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { scan-assembler "fmov\\tv\[0-9\]+\\.\[24\]s, 4\\.25" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment