Commit 35a093b6 by James Greenhalgh Committed by James Greenhalgh

[Patch AArch64] GCC 6 regression in vector performance. - Fix vector…

[Patch AArch64] GCC 6 regression in vector performance. - Fix vector initialization to happen with lane load instructions.

gcc/

	* config/aarch64/aarch64.c (aarch64_expand_vector_init): Refactor,
	always use lane loads to construct non-constant vectors.

gcc/testsuite/

	* gcc.target/aarch64/vector_initialization_nostack.c: New.


Co-Authored-By: Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>

From-SVN: r233461
parent 51b3f077
2016-02-16 James Greenhalgh <james.greenhalgh@arm.com> 2016-02-16 James Greenhalgh <james.greenhalgh@arm.com>
Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
* config/aarch64/aarch64.c (aarch64_expand_vector_init): Refactor,
always use lane loads to construct non-constant vectors.
2016-02-16 James Greenhalgh <james.greenhalgh@arm.com>
* config/aarch64/aarch64.md * config/aarch64/aarch64.md
(arch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Fix register (arch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>): Fix register
......
...@@ -11053,28 +11053,37 @@ aarch64_simd_make_constant (rtx vals) ...@@ -11053,28 +11053,37 @@ aarch64_simd_make_constant (rtx vals)
return NULL_RTX; return NULL_RTX;
} }
/* Expand a vector initialisation sequence, such that TARGET is
initialised to contain VALS. */
void void
aarch64_expand_vector_init (rtx target, rtx vals) aarch64_expand_vector_init (rtx target, rtx vals)
{ {
machine_mode mode = GET_MODE (target); machine_mode mode = GET_MODE (target);
machine_mode inner_mode = GET_MODE_INNER (mode); machine_mode inner_mode = GET_MODE_INNER (mode);
/* The number of vector elements. */
int n_elts = GET_MODE_NUNITS (mode); int n_elts = GET_MODE_NUNITS (mode);
/* The number of vector elements which are not constant. */
int n_var = 0; int n_var = 0;
rtx any_const = NULL_RTX; rtx any_const = NULL_RTX;
/* The first element of vals. */
rtx v0 = XVECEXP (vals, 0, 0);
bool all_same = true; bool all_same = true;
/* Count the number of variable elements to initialise. */
for (int i = 0; i < n_elts; ++i) for (int i = 0; i < n_elts; ++i)
{ {
rtx x = XVECEXP (vals, 0, i); rtx x = XVECEXP (vals, 0, i);
if (!CONST_INT_P (x) && !CONST_DOUBLE_P (x)) if (!(CONST_INT_P (x) || CONST_DOUBLE_P (x)))
++n_var; ++n_var;
else else
any_const = x; any_const = x;
if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0))) all_same &= rtx_equal_p (x, v0);
all_same = false;
} }
/* No variable elements, hand off to aarch64_simd_make_constant which knows
how best to handle this. */
if (n_var == 0) if (n_var == 0)
{ {
rtx constant = aarch64_simd_make_constant (vals); rtx constant = aarch64_simd_make_constant (vals);
...@@ -11088,14 +11097,15 @@ aarch64_expand_vector_init (rtx target, rtx vals) ...@@ -11088,14 +11097,15 @@ aarch64_expand_vector_init (rtx target, rtx vals)
/* Splat a single non-constant element if we can. */ /* Splat a single non-constant element if we can. */
if (all_same) if (all_same)
{ {
rtx x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0)); rtx x = copy_to_mode_reg (inner_mode, v0);
aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x)); aarch64_emit_move (target, gen_rtx_VEC_DUPLICATE (mode, x));
return; return;
} }
/* Half the fields (or less) are non-constant. Load constant then overwrite /* Initialise a vector which is part-variable. We want to first try
varying fields. Hope that this is more efficient than using the stack. */ to build those lanes which are constant in the most efficient way we
if (n_var <= n_elts/2) can. */
if (n_var != n_elts)
{ {
rtx copy = copy_rtx (vals); rtx copy = copy_rtx (vals);
...@@ -11122,31 +11132,21 @@ aarch64_expand_vector_init (rtx target, rtx vals) ...@@ -11122,31 +11132,21 @@ aarch64_expand_vector_init (rtx target, rtx vals)
XVECEXP (copy, 0, i) = subst; XVECEXP (copy, 0, i) = subst;
} }
aarch64_expand_vector_init (target, copy); aarch64_expand_vector_init (target, copy);
}
/* Insert variables. */ /* Insert the variable lanes directly. */
enum insn_code icode = optab_handler (vec_set_optab, mode);
gcc_assert (icode != CODE_FOR_nothing);
for (int i = 0; i < n_elts; i++) enum insn_code icode = optab_handler (vec_set_optab, mode);
{ gcc_assert (icode != CODE_FOR_nothing);
rtx x = XVECEXP (vals, 0, i);
if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
continue;
x = copy_to_mode_reg (inner_mode, x);
emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i)));
}
return;
}
/* Construct the vector in memory one field at a time
and load the whole vector. */
rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
for (int i = 0; i < n_elts; i++) for (int i = 0; i < n_elts; i++)
emit_move_insn (adjust_address_nv (mem, inner_mode, {
i * GET_MODE_SIZE (inner_mode)), rtx x = XVECEXP (vals, 0, i);
XVECEXP (vals, 0, i)); if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
emit_move_insn (target, mem); continue;
x = copy_to_mode_reg (inner_mode, x);
emit_insn (GEN_FCN (icode) (target, x, GEN_INT (i)));
}
} }
static unsigned HOST_WIDE_INT static unsigned HOST_WIDE_INT
......
2016-02-16 James Greenhalgh <james.greenhalgh@arm.com>
Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
* gcc.target/aarch64/vector_initialization_nostack.c: New.
2016-02-16 Jakub Jelinek <jakub@redhat.com> 2016-02-16 Jakub Jelinek <jakub@redhat.com>
PR middle-end/69801 PR middle-end/69801
......
/* { dg-do compile } */
/* { dg-options "-O3 -ftree-vectorize -fno-vect-cost-model" } */
float arr_f[100][100];
float
f9 (void)
{
int i;
float sum = 0;
for (i = 0; i < 100; i++)
sum += arr_f[i][0] * arr_f[0][i];
return sum;
}
int arr[100][100];
int
f10 (void)
{
int i;
int sum = 0;
for (i = 0; i < 100; i++)
sum += arr[i][0] * arr[0][i];
return sum;
}
double arr_d[100][100];
double
f11 (void)
{
int i;
double sum = 0;
for (i = 0; i < 100; i++)
sum += arr_d[i][0] * arr_d[0][i];
return sum;
}
char arr_c[100][100];
char
f12 (void)
{
int i;
char sum = 0;
for (i = 0; i < 100; i++)
sum += arr_c[i][0] * arr_c[0][i];
return sum;
}
/* { dg-final { scan-assembler-not "sp" } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment