Commit 3520f7cc by James Greenhalgh Committed by James Greenhalgh

[AARCH64] Add support for vector and scalar floating-point immediate loads.

gcc/

	* config/aarch64/aarch64-protos.h
	(aarch64_const_double_zero_rtx_p): Rename to...
	(aarch64_float_const_zero_rtx_p): ...this.
	(aarch64_float_const_representable_p): New.
	(aarch64_output_simd_mov_immediate): Likewise.
	* config/aarch64/aarch64-simd.md (*aarch64_simd_mov<mode>): Refactor
	move immediate case.
	* config/aarch64/aarch64.c
	(aarch64_const_double_zero_rtx_p): Rename to...
	(aarch64_float_const_zero_rtx_p): ...this.
	(aarch64_print_operand): Allow printing of new constants.
	(aarch64_valid_floating_const): New.
	(aarch64_legitimate_constant_p): Check for valid floating-point
	constants.
	(aarch64_simd_valid_immediate): Likewise.
	(aarch64_vect_float_const_representable_p): New.
	(aarch64_float_const_representable_p): Likewise.
	(aarch64_simd_imm_zero_p): Also allow for floating-point 0.0.
	(aarch64_output_simd_mov_immediate): New.
	* config/aarch64/aarch64.md (*movsf_aarch64): Add new alternative.
	(*movdf_aarch64): Likewise.
	* config/aarch64/constraints.md (Ufc): New.
	(Y): call aarch64_float_const_zero_rtx.
	* config/aarch64/predicates.md (aarch64_fp_compare_operand): New.

gcc/testsuite/

	* gcc.target/aarch64/fmovd.c: New.
	* gcc.target/aarch64/fmovf.c: Likewise.
	* gcc.target/aarch64/fmovd-zero.c: Likewise.
	* gcc.target/aarch64/fmovf-zero.c: Likewise.
	* gcc.target/aarch64/vect-fmovd.c: Likewise.
	* gcc.target/aarch64/vect-fmovf.c: Likewise.
	* gcc.target/aarch64/vect-fmovd-zero.c: Likewise.
	* gcc.target/aarch64/vect-fmovf-zero.c: Likewise.

From-SVN: r194972
parent e167c04d
2013-01-07 James Greenhalgh <james.greenhalgh@arm.com>
* config/aarch64/aarch64-protos.h
(aarch64_const_double_zero_rtx_p): Rename to...
(aarch64_float_const_zero_rtx_p): ...this.
(aarch64_float_const_representable_p): New.
(aarch64_output_simd_mov_immediate): Likewise.
* config/aarch64/aarch64-simd.md (*aarch64_simd_mov<mode>): Refactor
move immediate case.
* config/aarch64/aarch64.c
(aarch64_const_double_zero_rtx_p): Rename to...
(aarch64_float_const_zero_rtx_p): ...this.
(aarch64_print_operand): Allow printing of new constants.
(aarch64_valid_floating_const): New.
(aarch64_legitimate_constant_p): Check for valid floating-point
constants.
(aarch64_simd_valid_immediate): Likewise.
(aarch64_vect_float_const_representable_p): New.
(aarch64_float_const_representable_p): Likewise.
(aarch64_simd_imm_zero_p): Also allow for floating-point 0.0.
(aarch64_output_simd_mov_immediate): New.
* config/aarch64/aarch64.md (*movsf_aarch64): Add new alternative.
(*movdf_aarch64): Likewise.
* config/aarch64/constraints.md (Ufc): New.
(Y): call aarch64_float_const_zero_rtx.
* config/aarch64/predicates.md (aarch64_fp_compare_operand): New.
2013-01-07 Richard Biener <rguenther@suse.de>
PR tree-optimization/55888
......
......@@ -136,8 +136,8 @@ struct tune_params
HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned);
bool aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode);
bool aarch64_const_double_zero_rtx_p (rtx);
bool aarch64_constant_address_p (rtx);
bool aarch64_float_const_zero_rtx_p (rtx);
bool aarch64_function_arg_regno_p (unsigned);
bool aarch64_gen_movmemqi (rtx *);
bool aarch64_is_extend_from_extract (enum machine_mode, rtx, rtx);
......@@ -215,6 +215,9 @@ void aarch64_split_128bit_move (rtx, rtx);
bool aarch64_split_128bit_move_p (rtx, rtx);
/* Check for a legitimate floating point constant for FMOV. */
bool aarch64_float_const_representable_p (rtx);
#if defined (RTX_CODE)
bool aarch64_legitimate_address_p (enum machine_mode, rtx, RTX_CODE, bool);
......@@ -246,4 +249,5 @@ extern void aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel);
extern bool
aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel);
char* aarch64_output_simd_mov_immediate (rtx *, enum machine_mode, unsigned);
#endif /* GCC_AARCH64_PROTOS_H */
......@@ -394,34 +394,8 @@
case 4: return "ins\t%0.d[0], %1";
case 5: return "mov\t%0, %1";
case 6:
{
int is_valid;
unsigned char widthc;
int width;
static char templ[40];
int shift = 0, mvn = 0;
const char *mnemonic;
int length = 0;
is_valid =
aarch64_simd_immediate_valid_for_move (operands[1], <MODE>mode,
&operands[1], &width, &widthc,
&mvn, &shift);
gcc_assert (is_valid != 0);
mnemonic = mvn ? "mvni" : "movi";
if (widthc != 'd')
length += snprintf (templ, sizeof (templ),
"%s\t%%0.%d%c, %%1",
mnemonic, 64 / width, widthc);
else
length += snprintf (templ, sizeof (templ), "%s\t%%d0, %%1", mnemonic);
if (shift != 0)
length += snprintf (templ + length, sizeof (templ) - length,
", lsl %d", shift);
return templ;
}
return aarch64_output_simd_mov_immediate (&operands[1],
<MODE>mode, 64);
default: gcc_unreachable ();
}
}
......@@ -438,39 +412,19 @@
&& (register_operand (operands[0], <MODE>mode)
|| register_operand (operands[1], <MODE>mode))"
{
switch (which_alternative)
{
case 0: return "ld1\t{%0.<Vtype>}, %1";
case 1: return "st1\t{%1.<Vtype>}, %0";
case 2: return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
case 3: return "umov\t%0, %1.d[0]\;umov\t%H0, %1.d[1]";
case 4: return "ins\t%0.d[0], %1\;ins\t%0.d[1], %H1";
case 5: return "#";
case 6:
{
int is_valid;
unsigned char widthc;
int width;
static char templ[40];
int shift = 0, mvn = 0;
is_valid =
aarch64_simd_immediate_valid_for_move (operands[1], <MODE>mode,
&operands[1], &width, &widthc,
&mvn, &shift);
gcc_assert (is_valid != 0);
if (shift)
snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1, lsl %d",
mvn ? "mvni" : "movi",
128 / width, widthc, shift);
else
snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1",
mvn ? "mvni" : "movi",
128 / width, widthc);
return templ;
}
default: gcc_unreachable ();
}
switch (which_alternative)
{
case 0: return "ld1\t{%0.<Vtype>}, %1";
case 1: return "st1\t{%1.<Vtype>}, %0";
case 2: return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
case 3: return "umov\t%0, %1.d[0]\;umov\t%H0, %1.d[1]";
case 4: return "ins\t%0.d[0], %1\;ins\t%0.d[1], %H1";
case 5: return "#";
case 6:
return aarch64_output_simd_mov_immediate (&operands[1],
<MODE>mode, 128);
default: gcc_unreachable ();
}
}
[(set_attr "simd_type" "simd_load1,simd_store1,simd_move,simd_movgp,simd_insgp,simd_move,simd_move_imm")
(set_attr "simd_mode" "<MODE>")
......
......@@ -3028,7 +3028,7 @@ aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
/* Return TRUE if rtx X is immediate constant 0.0 */
bool
aarch64_const_double_zero_rtx_p (rtx x)
aarch64_float_const_zero_rtx_p (rtx x)
{
REAL_VALUE_TYPE r;
......@@ -3369,7 +3369,8 @@ aarch64_print_operand (FILE *f, rtx x, char code)
case 'x':
/* Print a general register name or the zero register (32-bit or
64-bit). */
if (x == const0_rtx)
if (x == const0_rtx
|| (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
{
asm_fprintf (f, "%s%czr", REGISTER_PREFIX, code);
break;
......@@ -3420,11 +3421,46 @@ aarch64_print_operand (FILE *f, rtx x, char code)
break;
case CONST_VECTOR:
gcc_assert (aarch64_const_vec_all_same_int_p (x, HOST_WIDE_INT_MIN,
HOST_WIDE_INT_MAX));
asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
{
gcc_assert (aarch64_const_vec_all_same_int_p (x,
HOST_WIDE_INT_MIN,
HOST_WIDE_INT_MAX));
asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
}
else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
{
fputc ('0', f);
}
else
gcc_unreachable ();
break;
case CONST_DOUBLE:
/* CONST_DOUBLE can represent a double-width integer.
In this case, the mode of x is VOIDmode. */
if (GET_MODE (x) == VOIDmode)
; /* Do Nothing. */
else if (aarch64_float_const_zero_rtx_p (x))
{
fputc ('0', f);
break;
}
else if (aarch64_float_const_representable_p (x))
{
#define buf_size 20
char float_buf[buf_size] = {'\0'};
REAL_VALUE_TYPE r;
REAL_VALUE_FROM_CONST_DOUBLE (r, x);
real_to_decimal_for_mode (float_buf, &r,
buf_size, buf_size,
1, GET_MODE (x));
asm_fprintf (asm_out_file, "%s", float_buf);
break;
#undef buf_size
}
output_operand_lossage ("invalid constant");
return;
default:
output_operand_lossage ("invalid operand");
return;
......@@ -5006,6 +5042,27 @@ aarch64_legitimate_pic_operand_p (rtx x)
return true;
}
/* Return true if X holds either a quarter-precision or
floating-point +0.0 constant. */
static bool
aarch64_valid_floating_const (enum machine_mode mode, rtx x)
{
if (!CONST_DOUBLE_P (x))
return false;
/* TODO: We could handle moving 0.0 to a TFmode register,
but first we would like to refactor the movtf_aarch64
to be more amicable to split moves properly and
correctly gate on TARGET_SIMD. For now - reject all
constants which are not to SFmode or DFmode registers. */
if (!(mode == SFmode || mode == DFmode))
return false;
if (aarch64_float_const_zero_rtx_p (x))
return true;
return aarch64_float_const_representable_p (x);
}
static bool
aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
{
......@@ -5019,8 +5076,8 @@ aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
if ((GET_CODE (x) == CONST_VECTOR
&& aarch64_simd_valid_immediate (x, mode, false,
NULL, NULL, NULL, NULL, NULL) != -1)
|| CONST_INT_P (x))
return !targetm.cannot_force_const_mem (mode, x);
|| CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
return !targetm.cannot_force_const_mem (mode, x);
if (GET_CODE (x) == HIGH
&& aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
......@@ -5975,6 +6032,44 @@ sizetochar (int size)
}
}
/* Return true iff x is a uniform vector of floating-point
constants, and the constant can be represented in
quarter-precision form. Note, as aarch64_float_const_representable
rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0. */
static bool
aarch64_vect_float_const_representable_p (rtx x)
{
int i = 0;
REAL_VALUE_TYPE r0, ri;
rtx x0, xi;
if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
return false;
x0 = CONST_VECTOR_ELT (x, 0);
if (!CONST_DOUBLE_P (x0))
return false;
REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
{
xi = CONST_VECTOR_ELT (x, i);
if (!CONST_DOUBLE_P (xi))
return false;
REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
if (!REAL_VALUES_EQUAL (r0, ri))
return false;
}
return aarch64_float_const_representable_p (x0);
}
/* TODO: This function returns values similar to those
returned by neon_valid_immediate in gcc/config/arm/arm.c
but the API here is different enough that these magic numbers
are not used. It should be sufficient to return true or false. */
static int
aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse,
rtx *modconst, int *elementwidth,
......@@ -6004,9 +6099,32 @@ aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse,
unsigned int invmask = inverse ? 0xff : 0;
int eshift, emvn;
/* TODO: Vectors of float constants. */
if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
return -1;
{
bool simd_imm_zero = aarch64_simd_imm_zero_p (op, mode);
int elem_width = GET_MODE_BITSIZE (GET_MODE (CONST_VECTOR_ELT (op, 0)));
if (!(simd_imm_zero
|| aarch64_vect_float_const_representable_p (op)))
return -1;
if (modconst)
*modconst = CONST_VECTOR_ELT (op, 0);
if (elementwidth)
*elementwidth = elem_width;
if (elementchar)
*elementchar = sizetochar (elem_width);
if (shift)
*shift = 0;
if (simd_imm_zero)
return 19;
else
return 18;
}
/* Splat vector constant out into a byte vector. */
for (i = 0; i < n_elts; i++)
......@@ -6161,8 +6279,8 @@ aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse,
/* Return TRUE if rtx X is legal for use as either a AdvSIMD MOVI instruction
(or, implicitly, MVNI) immediate. Write back width per element
to *ELEMENTWIDTH (or zero for float elements), and a modified constant
(whatever should be output for a MOVI instruction) in *MODCONST. */
to *ELEMENTWIDTH, and a modified constant (whatever should be output
for a MOVI instruction) in *MODCONST. */
int
aarch64_simd_immediate_valid_for_move (rtx op, enum machine_mode mode,
rtx *modconst, int *elementwidth,
......@@ -6233,22 +6351,13 @@ aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
}
/* Return true if X is a uniform vector where all elements
are either the floating-point constant 0.0 or the
integer constant 0. */
bool
aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
{
int nunits;
int i;
if (GET_CODE (x) != CONST_VECTOR)
return false;
nunits = GET_MODE_NUNITS (mode);
for (i = 0; i < nunits; i++)
if (INTVAL (CONST_VECTOR_ELT (x, i)) != 0)
return false;
return true;
return x == CONST0_RTX (mode);
}
bool
......@@ -6799,6 +6908,139 @@ aarch64_c_mode_for_suffix (char suffix)
return VOIDmode;
}
/* We can only represent floating point constants which will fit in
"quarter-precision" values. These values are characterised by
a sign bit, a 4-bit mantissa and a 3-bit exponent. And are given
by:
(-1)^s * (n/16) * 2^r
Where:
's' is the sign bit.
'n' is an integer in the range 16 <= n <= 31.
'r' is an integer in the range -3 <= r <= 4. */
/* Return true iff X can be represented by a quarter-precision
floating point immediate operand X. Note, we cannot represent 0.0. */
bool
aarch64_float_const_representable_p (rtx x)
{
/* This represents our current view of how many bits
make up the mantissa. */
int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
int sign, exponent;
unsigned HOST_WIDE_INT mantissa, mask;
HOST_WIDE_INT m1, m2;
REAL_VALUE_TYPE r, m;
if (!CONST_DOUBLE_P (x))
return false;
REAL_VALUE_FROM_CONST_DOUBLE (r, x);
/* We cannot represent infinities, NaNs or +/-zero. We won't
know if we have +zero until we analyse the mantissa, but we
can reject the other invalid values. */
if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
|| REAL_VALUE_MINUS_ZERO (r))
return false;
/* Extract sign and exponent. */
sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
r = real_value_abs (&r);
exponent = REAL_EXP (&r);
/* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
highest (sign) bit, with a fixed binary point at bit point_pos.
m1 holds the low part of the mantissa, m2 the high part.
WARNING: If we ever have a representation using more than 2 * H_W_I - 1
bits for the mantissa, this can fail (low bits will be lost). */
real_ldexp (&m, &r, point_pos - exponent);
REAL_VALUE_TO_INT (&m1, &m2, m);
/* If the low part of the mantissa has bits set we cannot represent
the value. */
if (m1 != 0)
return false;
/* We have rejected the lower HOST_WIDE_INT, so update our
understanding of how many bits lie in the mantissa and
look only at the high HOST_WIDE_INT. */
mantissa = m2;
point_pos -= HOST_BITS_PER_WIDE_INT;
/* We can only represent values with a mantissa of the form 1.xxxx. */
mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
if ((mantissa & mask) != 0)
return false;
/* Having filtered unrepresentable values, we may now remove all
but the highest 5 bits. */
mantissa >>= point_pos - 5;
/* We cannot represent the value 0.0, so reject it. This is handled
elsewhere. */
if (mantissa == 0)
return false;
/* Then, as bit 4 is always set, we can mask it off, leaving
the mantissa in the range [0, 15]. */
mantissa &= ~(1 << 4);
gcc_assert (mantissa <= 15);
/* GCC internally does not use IEEE754-like encoding (where normalized
significands are in the range [1, 2). GCC uses [0.5, 1) (see real.c).
Our mantissa values are shifted 4 places to the left relative to
normalized IEEE754 so we must modify the exponent returned by REAL_EXP
by 5 places to correct for GCC's representation. */
exponent = 5 - exponent;
return (exponent >= 0 && exponent <= 7);
}
char*
aarch64_output_simd_mov_immediate (rtx *const_vector,
enum machine_mode mode,
unsigned width)
{
int is_valid;
unsigned char widthc;
int lane_width_bits;
static char templ[40];
int shift = 0, mvn = 0;
const char *mnemonic;
unsigned int lane_count = 0;
is_valid =
aarch64_simd_immediate_valid_for_move (*const_vector, mode,
const_vector, &lane_width_bits,
&widthc, &mvn, &shift);
gcc_assert (is_valid);
mode = GET_MODE_INNER (mode);
if (mode == SFmode || mode == DFmode)
{
bool zero_p =
aarch64_float_const_zero_rtx_p (*const_vector);
gcc_assert (shift == 0);
mnemonic = zero_p ? "movi" : "fmov";
}
else
mnemonic = mvn ? "mvni" : "movi";
gcc_assert (lane_width_bits != 0);
lane_count = width / lane_width_bits;
if (lane_count == 1)
snprintf (templ, sizeof (templ), "%s\t%%d0, %%1", mnemonic);
else if (shift)
snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1, lsl %d",
mnemonic, lane_count, widthc, shift);
else
snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1",
mnemonic, lane_count, widthc);
return templ;
}
/* Split operands into moves from op[1] + op[2] into op[0]. */
void
......
......@@ -915,38 +915,44 @@
)
(define_insn "*movsf_aarch64"
[(set (match_operand:SF 0 "nonimmediate_operand" "= w,?r,w,w,m,r,m ,r")
(match_operand:SF 1 "general_operand" "?rY, w,w,m,w,m,rY,r"))]
[(set (match_operand:SF 0 "nonimmediate_operand" "=w, ?r,w,w ,w,m,r,m ,r")
(match_operand:SF 1 "general_operand" "?rY, w,w,Ufc,m,w,m,rY,r"))]
"TARGET_FLOAT && (register_operand (operands[0], SFmode)
|| register_operand (operands[1], SFmode))"
"@
fmov\\t%s0, %w1
fmov\\t%w0, %s1
fmov\\t%s0, %s1
fmov\\t%s0, %1
ldr\\t%s0, %1
str\\t%s1, %0
ldr\\t%w0, %1
str\\t%w1, %0
mov\\t%w0, %w1"
[(set_attr "v8type" "fmovi2f,fmovf2i,fmov,fpsimd_load,fpsimd_store,fpsimd_load,fpsimd_store,fmov")
[(set_attr "v8type" "fmovi2f,fmovf2i,\
fmov,fconst,fpsimd_load,\
fpsimd_store,fpsimd_load,fpsimd_store,fmov")
(set_attr "mode" "SF")]
)
(define_insn "*movdf_aarch64"
[(set (match_operand:DF 0 "nonimmediate_operand" "= w,?r,w,w,m,r,m ,r")
(match_operand:DF 1 "general_operand" "?rY, w,w,m,w,m,rY,r"))]
[(set (match_operand:DF 0 "nonimmediate_operand" "=w, ?r,w,w ,w,m,r,m ,r")
(match_operand:DF 1 "general_operand" "?rY, w,w,Ufc,m,w,m,rY,r"))]
"TARGET_FLOAT && (register_operand (operands[0], DFmode)
|| register_operand (operands[1], DFmode))"
"@
fmov\\t%d0, %x1
fmov\\t%x0, %d1
fmov\\t%d0, %d1
fmov\\t%d0, %1
ldr\\t%d0, %1
str\\t%d1, %0
ldr\\t%x0, %1
str\\t%x1, %0
mov\\t%x0, %x1"
[(set_attr "v8type" "fmovi2f,fmovf2i,fmov,fpsimd_load,fpsimd_store,fpsimd_load,fpsimd_store,move")
[(set_attr "v8type" "fmovi2f,fmovf2i,\
fmov,fconst,fpsimd_load,\
fpsimd_store,fpsimd_load,fpsimd_store,move")
(set_attr "mode" "DF")]
)
......@@ -991,7 +997,6 @@
(set_attr "simd" "yes,*,*,*,yes,*,*,*,*,*")]
)
;; Operands 1 and 3 are tied together by the final condition; so we allow
;; fairly lax checking on the second memory operation.
(define_insn "load_pair<mode>"
......
......@@ -69,7 +69,7 @@
(define_constraint "Y"
"Floating point constant zero."
(and (match_code "const_double")
(match_test "aarch64_const_double_zero_rtx_p (op)")))
(match_test "aarch64_float_const_zero_rtx_p (op)")))
(define_constraint "Z"
"Integer constant zero."
......@@ -138,6 +138,12 @@
(and (match_code "mem")
(match_test "aarch64_simd_mem_operand_p (op)")))
(define_constraint "Ufc"
"A floating point constant which can be used with an\
FMOV immediate operation."
(and (match_code "const_double")
(match_test "aarch64_float_const_representable_p (op)")))
(define_constraint "Dn"
"@internal
A constraint that matches vector of immediates."
......
......@@ -41,7 +41,7 @@
(define_predicate "aarch64_fp_compare_operand"
(ior (match_operand 0 "register_operand")
(and (match_code "const_double")
(match_test "aarch64_const_double_zero_rtx_p (op)"))))
(match_test "aarch64_float_const_zero_rtx_p (op)"))))
(define_predicate "aarch64_plus_immediate"
(and (match_code "const_int")
......
2013-01-07 James Greenhalgh <james.greenhalgh@arm.com>
* gcc.target/aarch64/fmovd.c: New.
* gcc.target/aarch64/fmovf.c: Likewise.
* gcc.target/aarch64/fmovd-zero.c: Likewise.
* gcc.target/aarch64/fmovf-zero.c: Likewise.
* gcc.target/aarch64/vect-fmovd.c: Likewise.
* gcc.target/aarch64/vect-fmovf.c: Likewise.
* gcc.target/aarch64/vect-fmovd-zero.c: Likewise.
* gcc.target/aarch64/vect-fmovf-zero.c: Likewise.
2013-01-07 Richard Biener <rguenther@suse.de>
PR tree-optimization/55888
......
/* { dg-do compile } */
/* { dg-options "-O2" } */
void
foo (double *output)
{
*output = 0.0;
}
/* { dg-final { scan-assembler "fmov\\td\[0-9\]+, xzr" } } */
/* { dg-do compile } */
/* { dg-options "-O2" } */
void
foo (double *output)
{
*output = 4.25;
}
/* { dg-final { scan-assembler "fmov\\td\[0-9\]+, 4\\.25" } } */
/* { dg-do compile } */
/* { dg-options "-O2" } */
void
foo (float *output)
{
*output = 0.0;
}
/* { dg-final { scan-assembler "fmov\\ts\[0-9\]+, wzr" } } */
/* { dg-do compile } */
/* { dg-options "-O2" } */
void
foo (float *output)
{
*output = 4.25;
}
/* { dg-final { scan-assembler "fmov\\ts\[0-9\]+, 4\\.25" } } */
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all" } */
#define N 32
void
foo (double *output)
{
int i = 0;
/* Vectorizable. */
for (i = 0; i < N; i++)
output[i] = 0.0;
}
/* { dg-final { scan-assembler "movi\\tv\[0-9\]+\\.2d, 0" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all" } */
#define N 32
void
foo (double *output)
{
int i = 0;
/* Vectorizable. */
for (i = 0; i < N; i++)
output[i] = 4.25;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { scan-assembler "fmov\\tv\[0-9\]+\\.2d, 4\\.25" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all" } */
#define N 32
void
foo (float *output)
{
int i = 0;
/* Vectorizable. */
for (i = 0; i < N; i++)
output[i] = 0.0;
}
/* { dg-final { scan-assembler "movi\\tv\[0-9\]+\\.\[24\]s, 0" } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all" } */
#define N 32
void
foo (float *output)
{
int i = 0;
/* Vectorizable. */
for (i = 0; i < N; i++)
output[i] = 4.25;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { scan-assembler "fmov\\tv\[0-9\]+\\.\[24\]s, 4\\.25" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment