Commit 598eaaa2 by Yuri Rumyantsev Committed by H.J. Lu

Support non-masked epilogue vectoriziation

gcc/

2016-11-16  Yuri Rumyantsev  <ysrumyan@gmail.com>

	* params.def (PARAM_VECT_EPILOGUES_NOMASK): New.
	* tree-if-conv.c (tree_if_conversion): Make public.
	* * tree-if-conv.h: New file.
	* tree-vect-data-refs.c (vect_analyze_data_ref_dependences) Avoid
	dynamic alias checks for epilogues.
	* tree-vect-loop-manip.c (vect_do_peeling): Return created epilog.
	* tree-vect-loop.c: include tree-if-conv.h.
	(new_loop_vec_info): Add zeroing orig_loop_info field.
	(vect_analyze_loop_2): Don't try to enhance alignment for epilogues.
	(vect_analyze_loop): Add argument ORIG_LOOP_INFO which is not NULL
	if epilogue is vectorized, set up orig_loop_info field of loop_vinfo
	using passed argument.
	(vect_transform_loop): Check if created epilogue should be returned
	for further vectorization with less vf.  If-convert epilogue if
	required. Print vectorization success for epilogue.
	* tree-vectorizer.c (vectorize_loops): Add epilogue vectorization
	if it is required, pass loop_vinfo produced during vectorization of
	loop body to vect_analyze_loop.
	* tree-vectorizer.h (struct _loop_vec_info): Add new field
	orig_loop_info.
	(LOOP_VINFO_ORIG_LOOP_INFO): New.
	(LOOP_VINFO_EPILOGUE_P): New.
	(LOOP_VINFO_ORIG_VECT_FACTOR): New.
	(vect_do_peeling): Change prototype to return epilogue.
	(vect_analyze_loop): Add argument of loop_vec_info type.
	(vect_transform_loop): Return created loop.

gcc/testsuite/

2016-11-16  Yuri Rumyantsev  <ysrumyan@gmail.com>

	* lib/target-supports.exp (check_avx2_hw_available): New.
	(check_effective_target_avx2_runtime): New.
	* gcc.dg/vect/vect-tail-nomask-1.c: New test.

From-SVN: r242501
parent 03b85dcd
2016-11-16 Yuri Rumyantsev <ysrumyan@gmail.com>
* params.def (PARAM_VECT_EPILOGUES_NOMASK): New.
* tree-if-conv.c (tree_if_conversion): Make public.
* * tree-if-conv.h: New file.
* tree-vect-data-refs.c (vect_analyze_data_ref_dependences) Avoid
dynamic alias checks for epilogues.
* tree-vect-loop-manip.c (vect_do_peeling): Return created epilog.
* tree-vect-loop.c: include tree-if-conv.h.
(new_loop_vec_info): Add zeroing orig_loop_info field.
(vect_analyze_loop_2): Don't try to enhance alignment for epilogues.
(vect_analyze_loop): Add argument ORIG_LOOP_INFO which is not NULL
if epilogue is vectorized, set up orig_loop_info field of loop_vinfo
using passed argument.
(vect_transform_loop): Check if created epilogue should be returned
for further vectorization with less vf. If-convert epilogue if
required. Print vectorization success for epilogue.
* tree-vectorizer.c (vectorize_loops): Add epilogue vectorization
if it is required, pass loop_vinfo produced during vectorization of
loop body to vect_analyze_loop.
* tree-vectorizer.h (struct _loop_vec_info): Add new field
orig_loop_info.
(LOOP_VINFO_ORIG_LOOP_INFO): New.
(LOOP_VINFO_EPILOGUE_P): New.
(LOOP_VINFO_ORIG_VECT_FACTOR): New.
(vect_do_peeling): Change prototype to return epilogue.
(vect_analyze_loop): Add argument of loop_vec_info type.
(vect_transform_loop): Return created loop.
2016-11-16 Segher Boessenkool <segher@kernel.crashing.org> 2016-11-16 Segher Boessenkool <segher@kernel.crashing.org>
* config/rs6000/rs6000.c (rs6000_components_for_bb): Mark the LR * config/rs6000/rs6000.c (rs6000_components_for_bb): Mark the LR
...@@ -1270,6 +1270,11 @@ DEFPARAM (PARAM_MAX_VRP_SWITCH_ASSERTIONS, ...@@ -1270,6 +1270,11 @@ DEFPARAM (PARAM_MAX_VRP_SWITCH_ASSERTIONS,
"edge of a switch statement during VRP", "edge of a switch statement during VRP",
10, 0, 0) 10, 0, 0)
DEFPARAM (PARAM_VECT_EPILOGUES_NOMASK,
"vect-epilogues-nomask",
"Enable loop epilogue vectorization using smaller vector size.",
0, 0, 1)
/* /*
Local variables: Local variables:
......
2016-11-16 Yuri Rumyantsev <ysrumyan@gmail.com>
* lib/target-supports.exp (check_avx2_hw_available): New.
(check_effective_target_avx2_runtime): New.
* gcc.dg/vect/vect-tail-nomask-1.c: New test.
2016-11-16 Tamar Christina <tamar.christina@arm.com> 2016-11-16 Tamar Christina <tamar.christina@arm.com>
PR testsuite/78136 PR testsuite/78136
......
/* { dg-do run } */
/* { dg-require-weak "" } */
/* { dg-additional-options "--param vect-epilogues-nomask=1 -mavx2" { target avx2_runtime } } */
#define SIZE 1023
#define ALIGN 64
extern int posix_memalign(void **memptr, __SIZE_TYPE__ alignment, __SIZE_TYPE__ size) __attribute__((weak));
extern void free (void *);
void __attribute__((noinline))
test_citer (int * __restrict__ a,
int * __restrict__ b,
int * __restrict__ c)
{
int i;
a = (int *)__builtin_assume_aligned (a, ALIGN);
b = (int *)__builtin_assume_aligned (b, ALIGN);
c = (int *)__builtin_assume_aligned (c, ALIGN);
for (i = 0; i < SIZE; i++)
c[i] = a[i] + b[i];
}
void __attribute__((noinline))
test_viter (int * __restrict__ a,
int * __restrict__ b,
int * __restrict__ c,
int size)
{
int i;
a = (int *)__builtin_assume_aligned (a, ALIGN);
b = (int *)__builtin_assume_aligned (b, ALIGN);
c = (int *)__builtin_assume_aligned (c, ALIGN);
for (i = 0; i < size; i++)
c[i] = a[i] + b[i];
}
void __attribute__((noinline))
init_data (int * __restrict__ a,
int * __restrict__ b,
int * __restrict__ c,
int size)
{
for (int i = 0; i < size; i++)
{
a[i] = i;
b[i] = -i;
c[i] = 0;
asm volatile("": : :"memory");
}
a[size] = b[size] = c[size] = size;
}
void __attribute__((noinline))
run_test ()
{
int *a;
int *b;
int *c;
int i;
if (posix_memalign ((void **)&a, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
return;
if (posix_memalign ((void **)&b, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
return;
if (posix_memalign ((void **)&c, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
return;
init_data (a, b, c, SIZE);
test_citer (a, b, c);
for (i = 0; i < SIZE; i++)
if (c[i] != a[i] + b[i])
__builtin_abort ();
if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE)
__builtin_abort ();
init_data (a, b, c, SIZE);
test_viter (a, b, c, SIZE);
for (i = 0; i < SIZE; i++)
if (c[i] != a[i] + b[i])
__builtin_abort ();
if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE)
__builtin_abort ();
free (a);
free (b);
free (c);
}
int
main (int argc, const char **argv)
{
if (!posix_memalign)
return 0;
run_test ();
return 0;
}
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { target avx2_runtime } } } */
/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED \\(VS=16\\)" 2 "vect" { target avx2_runtime } } } */
...@@ -1730,6 +1730,36 @@ proc check_avx_hw_available { } { ...@@ -1730,6 +1730,36 @@ proc check_avx_hw_available { } {
}] }]
} }
# Return 1 if the target supports executing AVX2 instructions, 0
# otherwise. Cache the result.
proc check_avx2_hw_available { } {
return [check_cached_effective_target avx2_hw_available {
# If this is not the right target then we can skip the test.
if { !([istarget x86_64-*-*] || [istarget i?86-*-*]) } {
expr 0
} else {
check_runtime_nocache avx2_hw_available {
#include "cpuid.h"
int main ()
{
unsigned int eax, ebx, ecx, edx;
if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx)
|| ((ecx & bit_OSXSAVE) != bit_OSXSAVE))
return 1;
if (__get_cpuid_max (0, NULL) < 7)
return 1;
__cpuid_count (7, 0, eax, ebx, ecx, edx);
return (ebx & bit_AVX2) != bit_AVX2;
}
} ""
}
}]
}
# Return 1 if the target supports running SSE executables, 0 otherwise. # Return 1 if the target supports running SSE executables, 0 otherwise.
proc check_effective_target_sse_runtime { } { proc check_effective_target_sse_runtime { } {
...@@ -1805,6 +1835,17 @@ proc check_effective_target_avx_runtime { } { ...@@ -1805,6 +1835,17 @@ proc check_effective_target_avx_runtime { } {
return 0 return 0
} }
# Return 1 if the target supports running AVX2 executables, 0 otherwise.
proc check_effective_target_avx2_runtime { } {
if { [check_effective_target_avx2]
&& [check_avx2_hw_available]
&& [check_avx_os_support_available] } {
return 1
}
return 0
}
# Return 1 if we are compiling for 64-bit PowerPC but we do not use direct # Return 1 if we are compiling for 64-bit PowerPC but we do not use direct
# move instructions for moves from GPR to FPR. # move instructions for moves from GPR to FPR.
......
...@@ -2734,7 +2734,7 @@ ifcvt_local_dce (basic_block bb) ...@@ -2734,7 +2734,7 @@ ifcvt_local_dce (basic_block bb)
profitability analysis. Returns non-zero todo flags when something profitability analysis. Returns non-zero todo flags when something
changed. */ changed. */
static unsigned int unsigned int
tree_if_conversion (struct loop *loop) tree_if_conversion (struct loop *loop)
{ {
unsigned int todo = 0; unsigned int todo = 0;
......
/* Copyright (C) 2016 Free Software Foundation, Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
#ifndef GCC_TREE_IF_CONV_H
#define GCC_TREE_IF_CONV_H
unsigned int tree_if_conversion (struct loop *);
#endif /* GCC_TREE_IF_CONV_H */
...@@ -480,6 +480,12 @@ vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo, int *max_vf) ...@@ -480,6 +480,12 @@ vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo, int *max_vf)
LOOP_VINFO_LOOP_NEST (loop_vinfo), true)) LOOP_VINFO_LOOP_NEST (loop_vinfo), true))
return false; return false;
/* For epilogues we either have no aliases or alias versioning
was applied to original loop. Therefore we may just get max_vf
using VF of original loop. */
if (LOOP_VINFO_EPILOGUE_P (loop_vinfo))
*max_vf = LOOP_VINFO_ORIG_VECT_FACTOR (loop_vinfo);
else
FOR_EACH_VEC_ELT (LOOP_VINFO_DDRS (loop_vinfo), i, ddr) FOR_EACH_VEC_ELT (LOOP_VINFO_DDRS (loop_vinfo), i, ddr)
if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf)) if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf))
return false; return false;
......
...@@ -1614,11 +1614,13 @@ slpeel_update_phi_nodes_for_lcssa (struct loop *epilog) ...@@ -1614,11 +1614,13 @@ slpeel_update_phi_nodes_for_lcssa (struct loop *epilog)
Note this function peels prolog and epilog only if it's necessary, Note this function peels prolog and epilog only if it's necessary,
as well as guards. as well as guards.
Returns created epilogue or NULL.
TODO: Guard for prefer_scalar_loop should be emitted along with TODO: Guard for prefer_scalar_loop should be emitted along with
versioning conditions if loop versioning is needed. */ versioning conditions if loop versioning is needed. */
void
struct loop *
vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
tree *niters_vector, int th, bool check_profitability, tree *niters_vector, int th, bool check_profitability,
bool niters_no_overflow) bool niters_no_overflow)
...@@ -1634,7 +1636,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, ...@@ -1634,7 +1636,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
|| LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)); || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
if (!prolog_peeling && !epilog_peeling) if (!prolog_peeling && !epilog_peeling)
return; return NULL;
prob_vector = 9 * REG_BR_PROB_BASE / 10; prob_vector = 9 * REG_BR_PROB_BASE / 10;
if ((vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo)) == 2) if ((vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo)) == 2)
...@@ -1642,7 +1644,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, ...@@ -1642,7 +1644,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
prob_prolog = prob_epilog = (vf - 1) * REG_BR_PROB_BASE / vf; prob_prolog = prob_epilog = (vf - 1) * REG_BR_PROB_BASE / vf;
vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
struct loop *prolog, *epilog, *loop = LOOP_VINFO_LOOP (loop_vinfo); struct loop *prolog, *epilog = NULL, *loop = LOOP_VINFO_LOOP (loop_vinfo);
struct loop *first_loop = loop; struct loop *first_loop = loop;
create_lcssa_for_virtual_phi (loop); create_lcssa_for_virtual_phi (loop);
update_ssa (TODO_update_ssa_only_virtuals); update_ssa (TODO_update_ssa_only_virtuals);
...@@ -1824,6 +1826,8 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, ...@@ -1824,6 +1826,8 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
} }
adjust_vec.release (); adjust_vec.release ();
free_original_copy_tables (); free_original_copy_tables ();
return epilog;
} }
/* Function vect_create_cond_for_niters_checks. /* Function vect_create_cond_for_niters_checks.
......
...@@ -49,6 +49,7 @@ along with GCC; see the file COPYING3. If not see ...@@ -49,6 +49,7 @@ along with GCC; see the file COPYING3. If not see
#include "gimple-fold.h" #include "gimple-fold.h"
#include "cgraph.h" #include "cgraph.h"
#include "tree-cfg.h" #include "tree-cfg.h"
#include "tree-if-conv.h"
/* Loop Vectorization Pass. /* Loop Vectorization Pass.
...@@ -1171,6 +1172,7 @@ new_loop_vec_info (struct loop *loop) ...@@ -1171,6 +1172,7 @@ new_loop_vec_info (struct loop *loop)
LOOP_VINFO_PEELING_FOR_GAPS (res) = false; LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
LOOP_VINFO_PEELING_FOR_NITER (res) = false; LOOP_VINFO_PEELING_FOR_NITER (res) = false;
LOOP_VINFO_OPERANDS_SWAPPED (res) = false; LOOP_VINFO_OPERANDS_SWAPPED (res) = false;
LOOP_VINFO_ORIG_LOOP_INFO (res) = NULL;
return res; return res;
} }
...@@ -2046,6 +2048,10 @@ start_over: ...@@ -2046,6 +2048,10 @@ start_over:
if (!ok) if (!ok)
return false; return false;
/* Do not invoke vect_enhance_data_refs_alignment for eplilogue
vectorization. */
if (!LOOP_VINFO_EPILOGUE_P (loop_vinfo))
{
/* This pass will decide on using loop versioning and/or loop peeling in /* This pass will decide on using loop versioning and/or loop peeling in
order to enhance the alignment of data references in the loop. */ order to enhance the alignment of data references in the loop. */
ok = vect_enhance_data_refs_alignment (loop_vinfo); ok = vect_enhance_data_refs_alignment (loop_vinfo);
...@@ -2056,6 +2062,7 @@ start_over: ...@@ -2056,6 +2062,7 @@ start_over:
"bad data alignment.\n"); "bad data alignment.\n");
return false; return false;
} }
}
if (slp) if (slp)
{ {
...@@ -2308,9 +2315,10 @@ again: ...@@ -2308,9 +2315,10 @@ again:
Apply a set of analyses on LOOP, and create a loop_vec_info struct Apply a set of analyses on LOOP, and create a loop_vec_info struct
for it. The different analyses will record information in the for it. The different analyses will record information in the
loop_vec_info struct. */ loop_vec_info struct. If ORIG_LOOP_VINFO is not NULL epilogue must
be vectorized. */
loop_vec_info loop_vec_info
vect_analyze_loop (struct loop *loop) vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo)
{ {
loop_vec_info loop_vinfo; loop_vec_info loop_vinfo;
unsigned int vector_sizes; unsigned int vector_sizes;
...@@ -2346,6 +2354,10 @@ vect_analyze_loop (struct loop *loop) ...@@ -2346,6 +2354,10 @@ vect_analyze_loop (struct loop *loop)
} }
bool fatal = false; bool fatal = false;
if (orig_loop_vinfo)
LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = orig_loop_vinfo;
if (vect_analyze_loop_2 (loop_vinfo, fatal)) if (vect_analyze_loop_2 (loop_vinfo, fatal))
{ {
LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1; LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
...@@ -6696,12 +6708,14 @@ loop_niters_no_overflow (loop_vec_info loop_vinfo) ...@@ -6696,12 +6708,14 @@ loop_niters_no_overflow (loop_vec_info loop_vinfo)
The analysis phase has determined that the loop is vectorizable. The analysis phase has determined that the loop is vectorizable.
Vectorize the loop - created vectorized stmts to replace the scalar Vectorize the loop - created vectorized stmts to replace the scalar
stmts in the loop, and update the loop exit condition. */ stmts in the loop, and update the loop exit condition.
Returns scalar epilogue loop if any. */
void struct loop *
vect_transform_loop (loop_vec_info loop_vinfo) vect_transform_loop (loop_vec_info loop_vinfo)
{ {
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
struct loop *epilogue = NULL;
basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
int nbbs = loop->num_nodes; int nbbs = loop->num_nodes;
int i; int i;
...@@ -6780,7 +6794,7 @@ vect_transform_loop (loop_vec_info loop_vinfo) ...@@ -6780,7 +6794,7 @@ vect_transform_loop (loop_vec_info loop_vinfo)
LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = niters; LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = niters;
tree nitersm1 = unshare_expr (LOOP_VINFO_NITERSM1 (loop_vinfo)); tree nitersm1 = unshare_expr (LOOP_VINFO_NITERSM1 (loop_vinfo));
bool niters_no_overflow = loop_niters_no_overflow (loop_vinfo); bool niters_no_overflow = loop_niters_no_overflow (loop_vinfo);
vect_do_peeling (loop_vinfo, niters, nitersm1, &niters_vector, th, epilogue = vect_do_peeling (loop_vinfo, niters, nitersm1, &niters_vector, th,
check_profitability, niters_no_overflow); check_profitability, niters_no_overflow);
if (niters_vector == NULL_TREE) if (niters_vector == NULL_TREE)
{ {
...@@ -7065,6 +7079,8 @@ vect_transform_loop (loop_vec_info loop_vinfo) ...@@ -7065,6 +7079,8 @@ vect_transform_loop (loop_vec_info loop_vinfo)
if (dump_enabled_p ()) if (dump_enabled_p ())
{ {
if (!LOOP_VINFO_EPILOGUE_P (loop_vinfo))
{
dump_printf_loc (MSG_NOTE, vect_location, dump_printf_loc (MSG_NOTE, vect_location,
"LOOP VECTORIZED\n"); "LOOP VECTORIZED\n");
if (loop->inner) if (loop->inner)
...@@ -7072,6 +7088,11 @@ vect_transform_loop (loop_vec_info loop_vinfo) ...@@ -7072,6 +7088,11 @@ vect_transform_loop (loop_vec_info loop_vinfo)
"OUTER LOOP VECTORIZED\n"); "OUTER LOOP VECTORIZED\n");
dump_printf (MSG_NOTE, "\n"); dump_printf (MSG_NOTE, "\n");
} }
else
dump_printf_loc (MSG_NOTE, vect_location,
"LOOP EPILOGUE VECTORIZED (VS=%d)\n",
current_vector_size);
}
/* Free SLP instances here because otherwise stmt reference counting /* Free SLP instances here because otherwise stmt reference counting
won't work. */ won't work. */
...@@ -7082,6 +7103,49 @@ vect_transform_loop (loop_vec_info loop_vinfo) ...@@ -7082,6 +7103,49 @@ vect_transform_loop (loop_vec_info loop_vinfo)
/* Clear-up safelen field since its value is invalid after vectorization /* Clear-up safelen field since its value is invalid after vectorization
since vectorized loop can have loop-carried dependencies. */ since vectorized loop can have loop-carried dependencies. */
loop->safelen = 0; loop->safelen = 0;
/* Don't vectorize epilogue for epilogue. */
if (LOOP_VINFO_EPILOGUE_P (loop_vinfo))
epilogue = NULL;
if (epilogue)
{
unsigned int vector_sizes
= targetm.vectorize.autovectorize_vector_sizes ();
vector_sizes &= current_vector_size - 1;
if (!PARAM_VALUE (PARAM_VECT_EPILOGUES_NOMASK))
epilogue = NULL;
else if (!vector_sizes)
epilogue = NULL;
else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
&& LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0)
{
int smallest_vec_size = 1 << ctz_hwi (vector_sizes);
int ratio = current_vector_size / smallest_vec_size;
int eiters = LOOP_VINFO_INT_NITERS (loop_vinfo)
- LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
eiters = eiters % vf;
epilogue->nb_iterations_upper_bound = eiters - 1;
if (eiters < vf / ratio)
epilogue = NULL;
}
}
if (epilogue)
{
epilogue->force_vectorize = loop->force_vectorize;
epilogue->safelen = loop->safelen;
epilogue->dont_vectorize = false;
/* We may need to if-convert epilogue to vectorize it. */
if (LOOP_VINFO_SCALAR_LOOP (loop_vinfo))
tree_if_conversion (epilogue);
}
return epilogue;
} }
/* The code below is trying to perform simple optimization - revert /* The code below is trying to perform simple optimization - revert
......
...@@ -514,6 +514,7 @@ vectorize_loops (void) ...@@ -514,6 +514,7 @@ vectorize_loops (void)
hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL; hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL;
bool any_ifcvt_loops = false; bool any_ifcvt_loops = false;
unsigned ret = 0; unsigned ret = 0;
struct loop *new_loop;
vect_loops_num = number_of_loops (cfun); vect_loops_num = number_of_loops (cfun);
...@@ -538,7 +539,8 @@ vectorize_loops (void) ...@@ -538,7 +539,8 @@ vectorize_loops (void)
&& optimize_loop_nest_for_speed_p (loop)) && optimize_loop_nest_for_speed_p (loop))
|| loop->force_vectorize) || loop->force_vectorize)
{ {
loop_vec_info loop_vinfo; loop_vec_info loop_vinfo, orig_loop_vinfo = NULL;
vectorize_epilogue:
vect_location = find_loop_location (loop); vect_location = find_loop_location (loop);
if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
&& dump_enabled_p ()) && dump_enabled_p ())
...@@ -546,7 +548,7 @@ vectorize_loops (void) ...@@ -546,7 +548,7 @@ vectorize_loops (void)
LOCATION_FILE (vect_location), LOCATION_FILE (vect_location),
LOCATION_LINE (vect_location)); LOCATION_LINE (vect_location));
loop_vinfo = vect_analyze_loop (loop); loop_vinfo = vect_analyze_loop (loop, orig_loop_vinfo);
loop->aux = loop_vinfo; loop->aux = loop_vinfo;
if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo)) if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
...@@ -580,7 +582,7 @@ vectorize_loops (void) ...@@ -580,7 +582,7 @@ vectorize_loops (void)
&& dump_enabled_p ()) && dump_enabled_p ())
dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location, dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
"loop vectorized\n"); "loop vectorized\n");
vect_transform_loop (loop_vinfo); new_loop = vect_transform_loop (loop_vinfo);
num_vectorized_loops++; num_vectorized_loops++;
/* Now that the loop has been vectorized, allow it to be unrolled /* Now that the loop has been vectorized, allow it to be unrolled
etc. */ etc. */
...@@ -602,6 +604,15 @@ vectorize_loops (void) ...@@ -602,6 +604,15 @@ vectorize_loops (void)
fold_loop_vectorized_call (loop_vectorized_call, boolean_true_node); fold_loop_vectorized_call (loop_vectorized_call, boolean_true_node);
ret |= TODO_cleanup_cfg; ret |= TODO_cleanup_cfg;
} }
if (new_loop)
{
/* Epilogue of vectorized loop must be vectorized too. */
vect_loops_num = number_of_loops (cfun);
loop = new_loop;
orig_loop_vinfo = loop_vinfo; /* To pass vect_analyze_loop. */
goto vectorize_epilogue;
}
} }
vect_location = UNKNOWN_LOCATION; vect_location = UNKNOWN_LOCATION;
......
...@@ -335,6 +335,10 @@ typedef struct _loop_vec_info : public vec_info { ...@@ -335,6 +335,10 @@ typedef struct _loop_vec_info : public vec_info {
/* Mark loops having masked stores. */ /* Mark loops having masked stores. */
bool has_mask_store; bool has_mask_store;
/* For loops being epilogues of already vectorized loops
this points to the original vectorized loop. Otherwise NULL. */
_loop_vec_info *orig_loop_info;
} *loop_vec_info; } *loop_vec_info;
/* Access Functions. */ /* Access Functions. */
...@@ -374,6 +378,7 @@ typedef struct _loop_vec_info : public vec_info { ...@@ -374,6 +378,7 @@ typedef struct _loop_vec_info : public vec_info {
#define LOOP_VINFO_HAS_MASK_STORE(L) (L)->has_mask_store #define LOOP_VINFO_HAS_MASK_STORE(L) (L)->has_mask_store
#define LOOP_VINFO_SCALAR_ITERATION_COST(L) (L)->scalar_cost_vec #define LOOP_VINFO_SCALAR_ITERATION_COST(L) (L)->scalar_cost_vec
#define LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST(L) (L)->single_scalar_iteration_cost #define LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST(L) (L)->single_scalar_iteration_cost
#define LOOP_VINFO_ORIG_LOOP_INFO(L) (L)->orig_loop_info
#define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \ #define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \
((L)->may_misalign_stmts.length () > 0) ((L)->may_misalign_stmts.length () > 0)
...@@ -389,6 +394,12 @@ typedef struct _loop_vec_info : public vec_info { ...@@ -389,6 +394,12 @@ typedef struct _loop_vec_info : public vec_info {
#define LOOP_VINFO_NITERS_KNOWN_P(L) \ #define LOOP_VINFO_NITERS_KNOWN_P(L) \
(tree_fits_shwi_p ((L)->num_iters) && tree_to_shwi ((L)->num_iters) > 0) (tree_fits_shwi_p ((L)->num_iters) && tree_to_shwi ((L)->num_iters) > 0)
#define LOOP_VINFO_EPILOGUE_P(L) \
(LOOP_VINFO_ORIG_LOOP_INFO (L) != NULL)
#define LOOP_VINFO_ORIG_VECT_FACTOR(L) \
(LOOP_VINFO_VECT_FACTOR (LOOP_VINFO_ORIG_LOOP_INFO (L)))
static inline loop_vec_info static inline loop_vec_info
loop_vec_info_for_loop (struct loop *loop) loop_vec_info_for_loop (struct loop *loop)
{ {
...@@ -1032,7 +1043,7 @@ extern bool slpeel_can_duplicate_loop_p (const struct loop *, const_edge); ...@@ -1032,7 +1043,7 @@ extern bool slpeel_can_duplicate_loop_p (const struct loop *, const_edge);
struct loop *slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *, struct loop *slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *,
struct loop *, edge); struct loop *, edge);
extern void vect_loop_versioning (loop_vec_info, unsigned int, bool); extern void vect_loop_versioning (loop_vec_info, unsigned int, bool);
extern void vect_do_peeling (loop_vec_info, tree, tree, extern struct loop *vect_do_peeling (loop_vec_info, tree, tree,
tree *, int, bool, bool); tree *, int, bool, bool);
extern source_location find_loop_location (struct loop *); extern source_location find_loop_location (struct loop *);
extern bool vect_can_advance_ivs_p (loop_vec_info); extern bool vect_can_advance_ivs_p (loop_vec_info);
...@@ -1144,11 +1155,11 @@ extern void destroy_loop_vec_info (loop_vec_info, bool); ...@@ -1144,11 +1155,11 @@ extern void destroy_loop_vec_info (loop_vec_info, bool);
extern gimple *vect_force_simple_reduction (loop_vec_info, gimple *, bool, extern gimple *vect_force_simple_reduction (loop_vec_info, gimple *, bool,
bool *, bool); bool *, bool);
/* Drive for loop analysis stage. */ /* Drive for loop analysis stage. */
extern loop_vec_info vect_analyze_loop (struct loop *); extern loop_vec_info vect_analyze_loop (struct loop *, loop_vec_info);
extern tree vect_build_loop_niters (loop_vec_info); extern tree vect_build_loop_niters (loop_vec_info);
extern void vect_gen_vector_loop_niters (loop_vec_info, tree, tree *, bool); extern void vect_gen_vector_loop_niters (loop_vec_info, tree, tree *, bool);
/* Drive for loop transformation stage. */ /* Drive for loop transformation stage. */
extern void vect_transform_loop (loop_vec_info); extern struct loop *vect_transform_loop (loop_vec_info);
extern loop_vec_info vect_analyze_loop_form (struct loop *); extern loop_vec_info vect_analyze_loop_form (struct loop *);
extern bool vectorizable_live_operation (gimple *, gimple_stmt_iterator *, extern bool vectorizable_live_operation (gimple *, gimple_stmt_iterator *,
slp_tree, int, gimple **); slp_tree, int, gimple **);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment