Commit 2d2ee186 by Richard Sandiford Committed by Richard Sandiford

Fix PEELING_FOR_NITERS calculation (PR 87288)

PEELING_FOR_GAPS now means "peel one iteration for the epilogue",
in much the same way that PEELING_FOR_ALIGNMENT > 0 means
"peel that number of iterations for the prologue".  We weren't
taking this into account when deciding whether we needed to peel
further scalar iterations beyond the iterations for "gaps" and
"alignment".

Only the first test failed before the patch.  The other two
are just for completeness.

2018-09-20  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
	PR tree-optimization/87288
	* tree-vect-loop.c (vect_analyze_loop_2): Take PEELING_FOR_GAPS
	into account when determining PEELING_FOR_NITERS.

gcc/testsuite/
	PR tree-optimization/87288
	* gcc.dg/vect/pr87288-1.c: New test.
	* gcc.dg/vect/pr87288-2.c: Likewise,
	* gcc.dg/vect/pr87288-3.c: Likewise.

From-SVN: r264440
parent 508a909e
2018-09-20 Richard Sandiford <richard.sandiford@arm.com> 2018-09-20 Richard Sandiford <richard.sandiford@arm.com>
PR tree-optimization/87288
* tree-vect-loop.c (vect_analyze_loop_2): Take PEELING_FOR_GAPS
into account when determining PEELING_FOR_NITERS.
2018-09-20 Richard Sandiford <richard.sandiford@arm.com>
PR tree-optimization/86877 PR tree-optimization/86877
* tree-vect-loop.c (vect_analyze_loop_2): Call * tree-vect-loop.c (vect_analyze_loop_2): Call
vect_verify_datarefs_alignment. vect_verify_datarefs_alignment.
......
2018-09-20 Richard Sandiford <richard.sandiford@arm.com> 2018-09-20 Richard Sandiford <richard.sandiford@arm.com>
PR tree-optimization/87288
* gcc.dg/vect/pr87288-1.c: New test.
* gcc.dg/vect/pr87288-2.c: Likewise,
* gcc.dg/vect/pr87288-3.c: Likewise.
2018-09-20 Richard Sandiford <richard.sandiford@arm.com>
PR tree-optimization/86877 PR tree-optimization/86877
* gfortran.dg/vect/vect-8-epilogue.F90: New test. * gfortran.dg/vect/vect-8-epilogue.F90: New test.
......
#include "tree-vect.h"
#define N (VECTOR_BITS / 32)
#define MAX_COUNT 4
void __attribute__ ((noipa))
run (int *restrict a, int *restrict b, int count)
{
for (int i = 0; i < count * N; ++i)
{
a[i * 2] = b[i * 2] + count;
a[i * 2 + 1] = count;
}
}
void __attribute__ ((noipa))
check (int *restrict a, int count)
{
for (int i = 0; i < count * N; ++i)
if (a[i * 2] != i * 41 + count || a[i * 2 + 1] != count)
__builtin_abort ();
if (a[count * 2 * N] != 999)
__builtin_abort ();
}
int a[N * MAX_COUNT * 2 + 1], b[N * MAX_COUNT * 2];
int
main (void)
{
check_vect ();
for (int i = 0; i < N * MAX_COUNT; ++i)
{
b[i * 2] = i * 41;
asm volatile ("" ::: "memory");
}
for (int i = 0; i <= MAX_COUNT; ++i)
{
a[i * 2 * N] = 999;
run (a, b, i);
check (a, i);
}
return 0;
}
/* { dg-final { scan-tree-dump-times {LOOP VECTORIZED} 1 "vect" { target { { vect_int && vect_perm } && vect_element_align } } } } */
#include "tree-vect.h"
#define N (VECTOR_BITS / 32)
#define MAX_COUNT 4
#define RUN_COUNT(COUNT) \
void __attribute__ ((noipa)) \
run_##COUNT (int *restrict a, int *restrict b) \
{ \
for (int i = 0; i < N * COUNT; ++i) \
{ \
a[i * 2] = b[i * 2] + COUNT; \
a[i * 2 + 1] = COUNT; \
} \
}
RUN_COUNT (1)
RUN_COUNT (2)
RUN_COUNT (3)
RUN_COUNT (4)
void __attribute__ ((noipa))
check (int *restrict a, int count)
{
for (int i = 0; i < count * N; ++i)
if (a[i * 2] != i * 41 + count || a[i * 2 + 1] != count)
__builtin_abort ();
if (a[count * 2 * N] != 999)
__builtin_abort ();
}
int a[N * MAX_COUNT * 2 + 1], b[N * MAX_COUNT * 2];
int
main (void)
{
check_vect ();
for (int i = 0; i < N * MAX_COUNT; ++i)
{
b[i * 2] = i * 41;
asm volatile ("" ::: "memory");
}
a[N * 2] = 999;
run_1 (a, b);
check (a, 1);
a[N * 4] = 999;
run_2 (a, b);
check (a, 2);
a[N * 6] = 999;
run_3 (a, b);
check (a, 3);
a[N * 8] = 999;
run_4 (a, b);
check (a, 4);
return 0;
}
/* { dg-final { scan-tree-dump {LOOP VECTORIZED} "vect" { target { { vect_int && vect_perm } && vect_element_align } } } } */
#include "tree-vect.h"
#define N (VECTOR_BITS / 32)
#define MAX_COUNT 4
#define RUN_COUNT(COUNT) \
void __attribute__ ((noipa)) \
run_##COUNT (int *restrict a, int *restrict b) \
{ \
for (int i = 0; i < N * COUNT + 1; ++i) \
{ \
a[i * 2] = b[i * 2] + COUNT; \
a[i * 2 + 1] = COUNT; \
} \
}
RUN_COUNT (1)
RUN_COUNT (2)
RUN_COUNT (3)
RUN_COUNT (4)
void __attribute__ ((noipa))
check (int *restrict a, int count)
{
for (int i = 0; i < count * N + 1; ++i)
if (a[i * 2] != i * 41 + count || a[i * 2 + 1] != count)
__builtin_abort ();
if (a[count * 2 * N + 2] != 999)
__builtin_abort ();
}
int a[N * MAX_COUNT * 2 + 3], b[N * MAX_COUNT * 2 + 2];
int
main (void)
{
check_vect ();
for (int i = 0; i < N * MAX_COUNT + 1; ++i)
{
b[i * 2] = i * 41;
asm volatile ("" ::: "memory");
}
a[N * 2 + 2] = 999;
run_1 (a, b);
check (a, 1);
a[N * 4 + 2] = 999;
run_2 (a, b);
check (a, 2);
a[N * 6 + 2] = 999;
run_3 (a, b);
check (a, 3);
a[N * 8 + 2] = 999;
run_4 (a, b);
check (a, 4);
return 0;
}
/* { dg-final { scan-tree-dump {LOOP VECTORIZED} "vect" { target { { vect_int && vect_perm } && vect_element_align } } } } */
...@@ -2074,14 +2074,22 @@ start_over: ...@@ -2074,14 +2074,22 @@ start_over:
/* The main loop handles all iterations. */ /* The main loop handles all iterations. */
LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false; LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false;
else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
&& LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0) && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0)
{ {
if (!multiple_p (LOOP_VINFO_INT_NITERS (loop_vinfo) /* Work out the (constant) number of iterations that need to be
- LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo), peeled for reasons other than niters. */
unsigned int peel_niter = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
peel_niter += 1;
if (!multiple_p (LOOP_VINFO_INT_NITERS (loop_vinfo) - peel_niter,
LOOP_VINFO_VECT_FACTOR (loop_vinfo))) LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true; LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
} }
else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
/* ??? When peeling for gaps but not alignment, we could
try to check whether the (variable) niters is known to be
VF * N + 1. That's something of a niche case though. */
|| LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
|| !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&const_vf) || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&const_vf)
|| ((tree_ctz (LOOP_VINFO_NITERS (loop_vinfo)) || ((tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
< (unsigned) exact_log2 (const_vf)) < (unsigned) exact_log2 (const_vf))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment