Commit 8b66a2d4 by Alan Lawrence Committed by Alan Lawrence

[AArch64] PR/64134: Make aarch64_expand_vector_init use 'ins' more often

gcc/:

	PR target/64134
	* config/aarch64/aarch64.c (aarch64_expand_vector_init): Load constant
	and overwrite variable parts if <= 1/2 the elements are variable.

gcc/testsuite/:

	PR target/64134
	* gcc.target/aarch64/vec_init_1.c: New test.

From-SVN: r222229
parent 86f25864
2015-04-20 Alan Lawrence <alan.lawrence@arm.com>
PR target/64134
* config/aarch64/aarch64.c (aarch64_expand_vector_init): Load constant
and overwrite variable parts if <= 1/2 the elements are variable.
2015-04-19 Vladimir Makarov <vmakarov@redhat.com> 2015-04-19 Vladimir Makarov <vmakarov@redhat.com>
PR rtl-optimization/65805 PR rtl-optimization/65805
......
...@@ -8769,22 +8769,19 @@ aarch64_expand_vector_init (rtx target, rtx vals) ...@@ -8769,22 +8769,19 @@ aarch64_expand_vector_init (rtx target, rtx vals)
machine_mode mode = GET_MODE (target); machine_mode mode = GET_MODE (target);
machine_mode inner_mode = GET_MODE_INNER (mode); machine_mode inner_mode = GET_MODE_INNER (mode);
int n_elts = GET_MODE_NUNITS (mode); int n_elts = GET_MODE_NUNITS (mode);
int n_var = 0, one_var = -1; int n_var = 0;
rtx any_const = NULL_RTX;
bool all_same = true; bool all_same = true;
rtx x, mem;
int i;
x = XVECEXP (vals, 0, 0); for (int i = 0; i < n_elts; ++i)
if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
n_var = 1, one_var = 0;
for (i = 1; i < n_elts; ++i)
{ {
x = XVECEXP (vals, 0, i); rtx x = XVECEXP (vals, 0, i);
if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x)) if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x))
++n_var, one_var = i; ++n_var;
else
any_const = x;
if (!rtx_equal_p (x, XVECEXP (vals, 0, 0))) if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
all_same = false; all_same = false;
} }
...@@ -8801,36 +8798,60 @@ aarch64_expand_vector_init (rtx target, rtx vals) ...@@ -8801,36 +8798,60 @@ aarch64_expand_vector_init (rtx target, rtx vals)
/* Splat a single non-constant element if we can. */ /* Splat a single non-constant element if we can. */
if (all_same) if (all_same)
{ {
x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0)); rtx x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x)); aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
return; return;
} }
/* One field is non-constant. Load constant then overwrite varying /* Half the fields (or less) are non-constant. Load constant then overwrite
field. This is more efficient than using the stack. */ varying fields. Hope that this is more efficient than using the stack. */
if (n_var == 1) if (n_var <= n_elts/2)
{ {
rtx copy = copy_rtx (vals); rtx copy = copy_rtx (vals);
rtx index = GEN_INT (one_var);
enum insn_code icode;
/* Load constant part of vector, substitute neighboring value for /* Load constant part of vector. We really don't care what goes into the
varying element. */ parts we will overwrite, but we're more likely to be able to load the
XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, one_var ^ 1); constant efficiently if it has fewer, larger, repeating parts
(see aarch64_simd_valid_immediate). */
for (int i = 0; i < n_elts; i++)
{
rtx x = XVECEXP (vals, 0, i);
if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
continue;
rtx subst = any_const;
for (int bit = n_elts / 2; bit > 0; bit /= 2)
{
/* Look in the copied vector, as more elements are const. */
rtx test = XVECEXP (copy, 0, i ^ bit);
if (CONST_INT_P (test) || CONST_DOUBLE_P (test))
{
subst = test;
break;
}
}
XVECEXP (copy, 0, i) = subst;
}
aarch64_expand_vector_init (target, copy); aarch64_expand_vector_init (target, copy);
/* Insert variable. */ /* Insert variables. */
x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var)); enum insn_code icode = optab_handler (vec_set_optab, mode);
icode = optab_handler (vec_set_optab, mode);
gcc_assert (icode != CODE_FOR_nothing); gcc_assert (icode != CODE_FOR_nothing);
emit_insn (GEN_FCN (icode) (target, x, index));
for (int i = 0; i < n_elts; i++)
{
rtx x = XVECEXP (vals, 0, i);
if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
continue;
x = copy_to_mode_reg (inner_mode, x);
emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i)));
}
return; return;
} }
/* Construct the vector in memory one field at a time /* Construct the vector in memory one field at a time
and load the whole vector. */ and load the whole vector. */
mem = assign_stack_temp (mode, GET_MODE_SIZE (mode)); rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
for (i = 0; i < n_elts; i++) for (int i = 0; i < n_elts; i++)
emit_move_insn (adjust_address_nv (mem, inner_mode, emit_move_insn (adjust_address_nv (mem, inner_mode,
i * GET_MODE_SIZE (inner_mode)), i * GET_MODE_SIZE (inner_mode)),
XVECEXP (vals, 0, i)); XVECEXP (vals, 0, i));
......
2015-04-20 Alan Lawrence <alan.lawrence@arm.com>
PR target/64134
* gcc.target/aarch64/vec_init_1.c: New test.
2015-04-20 Yvan Roux <yvan.roux@linaro.org> 2015-04-20 Yvan Roux <yvan.roux@linaro.org>
* gcc.target/arm/pr65729.c: Restrict to hard float ABI compliant * gcc.target/arm/pr65729.c: Restrict to hard float ABI compliant
......
/* { dg-do run } */
/* { dg-options "-O2 -fomit-frame-pointer --save-temps -fno-inline" } */
extern void abort (void);
typedef float float16x4_t __attribute__ ((vector_size ((16))));
float a;
float b;
float16x4_t
make_vector ()
{
return (float16x4_t) { 0, 0, a, b };
}
int
main (int argc, char **argv)
{
a = 4.0;
b = 3.0;
float16x4_t vec = make_vector ();
if (vec[0] != 0 || vec[1] != 0 || vec[2] != a || vec[3] != b)
abort ();
return 0;
}
/* { dg-final { scan-assembler-times "ins\\t" 2 } } */
/* What we want to check, is that make_vector does not stp the whole vector
to the stack. Unfortunately here we scan the body of main() too, which may
be a bit fragile - the test is currently passing only because of the option
-fomit-frame-pointer which avoids use of stp in the prologue to main(). */
/* { dg-final { scan-assembler-not "stp\\t" } } */
/* { dg-final { cleanup-saved-temps } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment