Commit 5879ab5f by Richard Biener Committed by Richard Biener

re PR tree-optimization/88440 (size optimization of memcpy-like code)

2019-05-23  Richard Biener  <rguenther@suse.de>

	PR tree-optimization/88440
	* opts.c (default_options_table): Enable -ftree-loop-distribute-patterns
	at -O[2s]+.
	* tree-loop-distribution.c (generate_memset_builtin): Fold the
	generated call.
	(generate_memcpy_builtin): Likewise.
	(distribute_loop): Pass in whether to only distribute patterns.
	(prepare_perfect_loop_nest): Also allow size optimization.
	(pass_loop_distribution::execute): When optimizing a loop
	nest for size allow pattern replacement.

	* gcc.dg/tree-ssa/ldist-37.c: New testcase.
	* gcc.dg/tree-ssa/ldist-38.c: Likewise.
	* gcc.dg/vect/vect.exp: Add -fno-tree-loop-distribute-patterns.
	* gcc.dg/tree-ssa/ldist-37.c: Adjust.
	* gcc.dg/tree-ssa/ldist-38.c: Likewise.
	* g++.dg/tree-ssa/pr78847.C: Likewise.
	* gcc.dg/autopar/pr39500-1.c: Likewise.
	* gcc.dg/autopar/reduc-1char.c: Likewise.
	* gcc.dg/autopar/reduc-7.c: Likewise.
	* gcc.dg/tree-ssa/ivopts-lt-2.c: Likewise.
	* gcc.dg/tree-ssa/ivopts-lt.c: Likewise.
	* gcc.dg/tree-ssa/predcom-dse-1.c: Likewise.
	* gcc.dg/tree-ssa/predcom-dse-2.c: Likewise.
	* gcc.dg/tree-ssa/predcom-dse-3.c: Likewise.
	* gcc.dg/tree-ssa/predcom-dse-4.c: Likewise.
	* gcc.dg/tree-ssa/prefetch-7.c: Likewise.
	* gcc.dg/tree-ssa/prefetch-8.c: Likewise.
	* gcc.dg/tree-ssa/prefetch-9.c: Likewise.
	* gcc.dg/tree-ssa/scev-11.c: Likewise.
	* gcc.dg/vect/costmodel/i386/costmodel-vect-31.c: Likewise.
	* gcc.dg/vect/costmodel/i386/costmodel-vect-33.c: Likewise.
	* gcc.dg/vect/costmodel/x86_64/costmodel-vect-31.c: Likewise.
	* gcc.dg/vect/costmodel/x86_64/costmodel-vect-33.c: Likewise.
	* gcc.target/i386/pr30970.c: Likewise.
	* gcc.target/i386/vect-double-1.c: Likewise.
	* gcc.target/i386/vect-double-2.c: Likewise.
	* gcc.dg/tree-ssa/gen-vect-2.c: Likewise.
	* gcc.dg/tree-ssa/gen-vect-26.c: Likewise.
	* gcc.dg/tree-ssa/gen-vect-28.c: Likewise.
	* gcc.dg/tree-ssa/gen-vect-32.c: Likewise.
	* gfortran.dg/vect/vect-5.f90: Likewise.
	* gfortran.dg/vect/vect-8.f90: Likewise.

From-SVN: r271553
parent 32d94113
2019-05-23 Richard Biener <rguenther@suse.de>
PR tree-optimization/88440
* opts.c (default_options_table): Enable -ftree-loop-distribute-patterns
at -O[2s]+.
* tree-loop-distribution.c (generate_memset_builtin): Fold the
generated call.
(generate_memcpy_builtin): Likewise.
(distribute_loop): Pass in whether to only distribute patterns.
(prepare_perfect_loop_nest): Also allow size optimization.
(pass_loop_distribution::execute): When optimizing a loop
nest for size allow pattern replacement.
2019-05-23 Jakub Jelinek <jakub@redhat.com> 2019-05-23 Jakub Jelinek <jakub@redhat.com>
PR target/90568 PR target/90568
......
...@@ -550,7 +550,7 @@ static const struct default_options default_options_table[] = ...@@ -550,7 +550,7 @@ static const struct default_options default_options_table[] =
{ OPT_LEVELS_3_PLUS, OPT_fpredictive_commoning, NULL, 1 }, { OPT_LEVELS_3_PLUS, OPT_fpredictive_commoning, NULL, 1 },
{ OPT_LEVELS_3_PLUS, OPT_fsplit_loops, NULL, 1 }, { OPT_LEVELS_3_PLUS, OPT_fsplit_loops, NULL, 1 },
{ OPT_LEVELS_3_PLUS, OPT_fsplit_paths, NULL, 1 }, { OPT_LEVELS_3_PLUS, OPT_fsplit_paths, NULL, 1 },
{ OPT_LEVELS_3_PLUS, OPT_ftree_loop_distribute_patterns, NULL, 1 }, { OPT_LEVELS_2_PLUS, OPT_ftree_loop_distribute_patterns, NULL, 1 },
{ OPT_LEVELS_3_PLUS, OPT_ftree_loop_distribution, NULL, 1 }, { OPT_LEVELS_3_PLUS, OPT_ftree_loop_distribution, NULL, 1 },
{ OPT_LEVELS_3_PLUS, OPT_ftree_loop_vectorize, NULL, 1 }, { OPT_LEVELS_3_PLUS, OPT_ftree_loop_vectorize, NULL, 1 },
{ OPT_LEVELS_3_PLUS, OPT_ftree_partial_pre, NULL, 1 }, { OPT_LEVELS_3_PLUS, OPT_ftree_partial_pre, NULL, 1 },
......
2019-05-23 Richard Biener <rguenther@suse.de>
PR tree-optimization/88440
* gcc.dg/tree-ssa/ldist-37.c: New testcase.
* gcc.dg/tree-ssa/ldist-38.c: Likewise.
* gcc.dg/vect/vect.exp: Add -fno-tree-loop-distribute-patterns.
* gcc.dg/tree-ssa/ldist-37.c: Adjust.
* gcc.dg/tree-ssa/ldist-38.c: Likewise.
* g++.dg/tree-ssa/pr78847.C: Likewise.
* gcc.dg/autopar/pr39500-1.c: Likewise.
* gcc.dg/autopar/reduc-1char.c: Likewise.
* gcc.dg/autopar/reduc-7.c: Likewise.
* gcc.dg/tree-ssa/ivopts-lt-2.c: Likewise.
* gcc.dg/tree-ssa/ivopts-lt.c: Likewise.
* gcc.dg/tree-ssa/predcom-dse-1.c: Likewise.
* gcc.dg/tree-ssa/predcom-dse-2.c: Likewise.
* gcc.dg/tree-ssa/predcom-dse-3.c: Likewise.
* gcc.dg/tree-ssa/predcom-dse-4.c: Likewise.
* gcc.dg/tree-ssa/prefetch-7.c: Likewise.
* gcc.dg/tree-ssa/prefetch-8.c: Likewise.
* gcc.dg/tree-ssa/prefetch-9.c: Likewise.
* gcc.dg/tree-ssa/scev-11.c: Likewise.
* gcc.dg/vect/costmodel/i386/costmodel-vect-31.c: Likewise.
* gcc.dg/vect/costmodel/i386/costmodel-vect-33.c: Likewise.
* gcc.dg/vect/costmodel/x86_64/costmodel-vect-31.c: Likewise.
* gcc.dg/vect/costmodel/x86_64/costmodel-vect-33.c: Likewise.
* gcc.target/i386/pr30970.c: Likewise.
* gcc.target/i386/vect-double-1.c: Likewise.
* gcc.target/i386/vect-double-2.c: Likewise.
* gcc.dg/tree-ssa/gen-vect-2.c: Likewise.
* gcc.dg/tree-ssa/gen-vect-26.c: Likewise.
* gcc.dg/tree-ssa/gen-vect-28.c: Likewise.
* gcc.dg/tree-ssa/gen-vect-32.c: Likewise.
* gfortran.dg/vect/vect-5.f90: Likewise.
* gfortran.dg/vect/vect-8.f90: Likewise.
2019-05-23 Martin Liska <mliska@suse.cz> 2019-05-23 Martin Liska <mliska@suse.cz>
PR sanitizer/90570 PR sanitizer/90570
......
/* { dg-do compile } */ /* { dg-do compile } */
/* { dg-require-effective-target c++14 } */ /* { dg-require-effective-target c++14 } */
/* { dg-options "-O3 -fdump-tree-ldist" } */ /* { dg-options "-O3 -fdump-tree-ldist-optimized" } */
#include <stddef.h> #include <stddef.h>
#include <cstring> #include <cstring>
...@@ -23,4 +23,4 @@ void testWithLoopValue(const Foo foo, size_t ptr, char *buf_) { ...@@ -23,4 +23,4 @@ void testWithLoopValue(const Foo foo, size_t ptr, char *buf_) {
buf_[ptr++] = c; buf_[ptr++] = c;
} }
/* { dg-final { scan-tree-dump "memcpy\[^\n\r\]*, 9\\);" "ldist" } } */ /* { dg-final { scan-tree-dump "split to 0 loops and 1 library calls" "ldist" } } */
/* pr39500: autopar fails to parallel */ /* pr39500: autopar fails to parallel */
/* origin: nemokingdom@gmail.com(LiFeng) */ /* origin: nemokingdom@gmail.com(LiFeng) */
/* { dg-do compile } */ /* { dg-do compile } */
/* { dg-options "-O2 -ftree-parallelize-loops=4 -fdump-tree-parloops2-details" } */ /* { dg-options "-O2 -fno-tree-loop-distribute-patterns -ftree-parallelize-loops=4 -fdump-tree-parloops2-details" } */
void abort (void); void abort (void);
......
...@@ -61,5 +61,5 @@ int main (void) ...@@ -61,5 +61,5 @@ int main (void)
/* { dg-final { scan-tree-dump-times "Detected reduction" 3 "parloops2" } } */ /* { dg-final { scan-tree-dump-times "Detected reduction" 3 "parloops2" } } */
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 4 "parloops2" } } */ /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops2" } } */
...@@ -85,5 +85,5 @@ int main (void) ...@@ -85,5 +85,5 @@ int main (void)
/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops2" } } */ /* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops2" } } */
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops2" } } */ /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 2 "parloops2" } } */
/* { dg-do run { target vect_cmdline_needed } } */ /* { dg-do run { target vect_cmdline_needed } } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -fvect-cost-model=dynamic" } */ /* { dg-options "-O2 -fno-tree-loop-distribute-patterns -ftree-vectorize -fdump-tree-vect-details -fvect-cost-model=dynamic" } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -fvect-cost-model=dynamic -mno-sse" { target { i?86-*-* x86_64-*-* } } } */ /* { dg-additional-options "-mno-sse" { target { i?86-*-* x86_64-*-* } } } */
#include <stdlib.h> #include <stdlib.h>
......
/* { dg-do run { target vect_cmdline_needed } } */ /* { dg-do run { target vect_cmdline_needed } } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -fvect-cost-model=dynamic" } */ /* { dg-options "-O2 -fno-tree-loop-distribute-patterns -ftree-vectorize -fdump-tree-vect-details -fvect-cost-model=dynamic" } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -fvect-cost-model=dynamic -mno-sse" { target { i?86-*-* x86_64-*-* } } } */ /* { dg-additional-options "-mno-sse" { target { i?86-*-* x86_64-*-* } } } */
#include <stdlib.h> #include <stdlib.h>
......
/* { dg-do run { target vect_cmdline_needed } } */ /* { dg-do run { target vect_cmdline_needed } } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -fvect-cost-model=dynamic" } */ /* { dg-options "-O2 -fno-tree-loop-distribute-patterns -ftree-vectorize -fdump-tree-vect-details -fvect-cost-model=dynamic" } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -fvect-cost-model=dynamic -mno-sse" { target { i?86-*-* x86_64-*-* } } } */ /* { dg-additional-options "-mno-sse" { target { i?86-*-* x86_64-*-* } } } */
#include <stdlib.h> #include <stdlib.h>
......
/* { dg-do run { target vect_cmdline_needed } } */ /* { dg-do run { target vect_cmdline_needed } } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -fno-vect-cost-model" } */ /* { dg-options "-O2 -fno-tree-loop-distribute-patterns -ftree-vectorize -fdump-tree-vect-details -fno-vect-cost-model" } */
/* { dg-additional-options "-mno-sse" { target { i?86-*-* x86_64-*-* } } } */ /* { dg-additional-options "-mno-sse" { target { i?86-*-* x86_64-*-* } } } */
#include <stdlib.h> #include <stdlib.h>
......
/* { dg-do compile } */ /* { dg-do compile } */
/* { dg-options "-O2 -fdump-tree-ivopts" } */ /* { dg-options "-O2 -fno-tree-loop-distribute-patterns -fdump-tree-ivopts" } */
/* { dg-skip-if "PR68644" { hppa*-*-* powerpc*-*-* } } */ /* { dg-skip-if "PR68644" { hppa*-*-* powerpc*-*-* } } */
void void
......
/* { dg-do compile } */ /* { dg-do compile } */
/* { dg-options "-O2 -fdump-tree-ivopts" } */ /* { dg-options "-O2 -fno-tree-loop-distribute-patterns -fdump-tree-ivopts" } */
/* { dg-require-effective-target stdint_types } */ /* { dg-require-effective-target stdint_types } */
#include "stdint.h" #include "stdint.h"
......
/* { dg-do run } */ /* { dg-do run } */
/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details" } */ /* { dg-options "-O2 -fno-inline -fno-tree-loop-distribute-patterns -fpredictive-commoning -fdump-tree-pcom-details" } */
int arr[105] = {2, 3, 5, 7, 11}; int arr[105] = {2, 3, 5, 7, 11};
int result0[10] = {2, 3, 5, 7, 11}; int result0[10] = {2, 3, 5, 7, 11};
......
/* { dg-do run } */ /* { dg-do run } */
/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details" } */ /* { dg-options "-O2 -fno-inline -fno-tree-loop-distribute-patterns -fpredictive-commoning -fdump-tree-pcom-details" } */
int arr[105] = {2, 3, 5, 7, 11}; int arr[105] = {2, 3, 5, 7, 11};
int result0[10] = {2, 3, 5, 7, 11}; int result0[10] = {2, 3, 5, 7, 11};
......
/* { dg-do run } */ /* { dg-do run } */
/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details" } */ /* { dg-options "-O2 -fno-inline -fno-tree-loop-distribute-patterns -fpredictive-commoning -fdump-tree-pcom-details" } */
int arr1[105] = {2, 3, 5, 7, 11, 13, 0}; int arr1[105] = {2, 3, 5, 7, 11, 13, 0};
int arr2[105] = {2, 3, 5, 7, 11, 13, 0}; int arr2[105] = {2, 3, 5, 7, 11, 13, 0};
......
/* { dg-do run } */ /* { dg-do run } */
/* { dg-options "-O2 -fno-inline -fpredictive-commoning -fdump-tree-pcom-details" } */ /* { dg-options "-O2 -fno-inline -fno-tree-loop-distribute-patterns -fpredictive-commoning -fdump-tree-pcom-details" } */
int arr[105] = {2, 3, 5, 7, 11}; int arr[105] = {2, 3, 5, 7, 11};
int result0[10] = {2, 3, 5, 7, 11}; int result0[10] = {2, 3, 5, 7, 11};
......
/* { dg-do compile { target { i?86-*-* x86_64-*-* } } } */ /* { dg-do compile { target { i?86-*-* x86_64-*-* } } } */
/* { dg-options "-O2 -fprefetch-loop-arrays -march=amdfam10 --param simultaneous-prefetches=100 -fdump-tree-aprefetch-details -fdump-tree-optimized" } */ /* { dg-options "-O2 -fno-tree-loop-distribute-patterns -fprefetch-loop-arrays -march=amdfam10 --param simultaneous-prefetches=100 -fdump-tree-aprefetch-details -fdump-tree-optimized" } */
#define K 1000000 #define K 1000000
int a[K]; int a[K];
......
/* { dg-do compile { target { i?86-*-* x86_64-*-* } } } */ /* { dg-do compile { target { i?86-*-* x86_64-*-* } } } */
/* { dg-options "-O2 -fprefetch-loop-arrays -march=amdfam10 --param simultaneous-prefetches=100 -fdump-tree-aprefetch-details -fdump-tree-optimized" } */ /* { dg-options "-O2 -fno-tree-loop-distribute-patterns -fprefetch-loop-arrays -march=amdfam10 --param simultaneous-prefetches=100 -fdump-tree-aprefetch-details -fdump-tree-optimized" } */
#define K 1000000 #define K 1000000
int a[K]; int a[K];
......
/* { dg-do compile { target { i?86-*-* x86_64-*-* } } } */ /* { dg-do compile { target { i?86-*-* x86_64-*-* } } } */
/* { dg-options "-O2 -fprefetch-loop-arrays -march=amdfam10 --param simultaneous-prefetches=100 -fdump-tree-aprefetch-details -fdump-tree-optimized" } */ /* { dg-options "-O2 -fno-tree-loop-distribute-patterns -fprefetch-loop-arrays -march=amdfam10 --param simultaneous-prefetches=100 -fdump-tree-aprefetch-details -fdump-tree-optimized" } */
#define K 1000000 #define K 1000000
int a[K], b[K]; int a[K], b[K];
......
...@@ -15,7 +15,7 @@ foo (int n) ...@@ -15,7 +15,7 @@ foo (int n)
{ {
unsigned char uc = (unsigned char)i; unsigned char uc = (unsigned char)i;
a[i] = i; a[i] = i;
b[uc] = 0; b[uc] = 1;
} }
bar (a); bar (a);
......
/* { dg-require-effective-target vect_int } */ /* { dg-require-effective-target vect_int } */
/* { dg-additional-options "-fno-tree-loop-distribute-patterns" } */
#include <stdarg.h> #include <stdarg.h>
#include "../../tree-vect.h" #include "../../tree-vect.h"
......
/* { dg-do compile } */ /* { dg-do compile } */
/* { dg-require-effective-target vect_int } */ /* { dg-require-effective-target vect_int } */
/* { dg-additional-options "-fno-tree-loop-distribute-patterns" } */
#include <stdarg.h> #include <stdarg.h>
#include "../../tree-vect.h" #include "../../tree-vect.h"
......
/* { dg-require-effective-target vect_int } */ /* { dg-require-effective-target vect_int } */
/* { dg-additional-options "-fno-tree-loop-distribute-patterns" } */
#include <stdarg.h> #include <stdarg.h>
#include "../../tree-vect.h" #include "../../tree-vect.h"
......
/* { dg-do compile } */ /* { dg-do compile } */
/* { dg-require-effective-target vect_int } */ /* { dg-require-effective-target vect_int } */
/* { dg-additional-options "-fno-tree-loop-distribute-patterns" } */
#include <stdarg.h> #include <stdarg.h>
#include "../../tree-vect.h" #include "../../tree-vect.h"
......
...@@ -45,7 +45,7 @@ if ![check_vect_support_and_set_flags] { ...@@ -45,7 +45,7 @@ if ![check_vect_support_and_set_flags] {
} }
# These flags are used for all targets. # These flags are used for all targets.
lappend DEFAULT_VECTCFLAGS "-ftree-vectorize" "-fno-vect-cost-model" "-fno-common" lappend DEFAULT_VECTCFLAGS "-ftree-vectorize" "-fno-tree-loop-distribute-patterns" "-fno-vect-cost-model" "-fno-common"
# Initialize `dg'. # Initialize `dg'.
dg-init dg-init
......
/* { dg-do compile } /* { dg-do compile }
/* { dg-options "-msse2 -O2 -ftree-vectorize -mtune=generic" } */ /* { dg-options "-msse2 -O2 -fno-tree-loop-distribute-patterns -ftree-vectorize -mtune=generic" } */
#define N 256 #define N 256
int b[N]; int b[N];
......
/* { dg-do compile } */ /* { dg-do compile } */
/* { dg-skip-if "" { *-*-* } { "-march=*" } { "-march=core2" } } */ /* { dg-skip-if "" { *-*-* } { "-march=*" } { "-march=core2" } } */
/* { dg-options "-O2 -ftree-vectorize -mfpmath=sse -march=core2 -fdump-tree-vect-stats" } */ /* { dg-options "-O2 -ftree-vectorize -fno-tree-loop-distribute-patterns -mfpmath=sse -march=core2 -fdump-tree-vect-stats" } */
/* { dg-add-options bind_pic_locally } */ /* { dg-add-options bind_pic_locally } */
extern void abort (void); extern void abort (void);
......
/* { dg-do compile } */ /* { dg-do compile } */
/* { dg-options "-O2 -ftree-vectorize -mfpmath=sse -msse2 -mtune=atom -fdump-tree-vect-stats" } */ /* { dg-options "-O2 -ftree-vectorize -fno-tree-loop-distribute-patterns -mfpmath=sse -msse2 -mtune=atom -fdump-tree-vect-stats" } */
extern void abort (void); extern void abort (void);
......
! { dg-require-effective-target vect_int } ! { dg-require-effective-target vect_int }
! { dg-additional-options "--param vect-max-peeling-for-alignment=0" } ! { dg-additional-options "-fno-tree-loop-distribute-patterns --param vect-max-peeling-for-alignment=0" }
Subroutine foo (N, M) Subroutine foo (N, M)
Integer N Integer N
......
! { dg-do compile } ! { dg-do compile }
! { dg-require-effective-target vect_double } ! { dg-require-effective-target vect_double }
! { dg-additional-options "-finline-matmul-limit=0" } ! { dg-additional-options "-fno-tree-loop-distribute-patterns -finline-matmul-limit=0" }
module lfk_prec module lfk_prec
integer, parameter :: dp=kind(1.d0) integer, parameter :: dp=kind(1.d0)
......
...@@ -115,6 +115,7 @@ along with GCC; see the file COPYING3. If not see ...@@ -115,6 +115,7 @@ along with GCC; see the file COPYING3. If not see
#include "params.h" #include "params.h"
#include "tree-vectorizer.h" #include "tree-vectorizer.h"
#include "tree-eh.h" #include "tree-eh.h"
#include "gimple-fold.h"
#define MAX_DATAREFS_NUM \ #define MAX_DATAREFS_NUM \
...@@ -1028,6 +1029,7 @@ generate_memset_builtin (struct loop *loop, partition *partition) ...@@ -1028,6 +1029,7 @@ generate_memset_builtin (struct loop *loop, partition *partition)
fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET)); fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes); fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING); gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
fold_stmt (&gsi);
if (dump_file && (dump_flags & TDF_DETAILS)) if (dump_file && (dump_flags & TDF_DETAILS))
{ {
...@@ -1071,6 +1073,7 @@ generate_memcpy_builtin (struct loop *loop, partition *partition) ...@@ -1071,6 +1073,7 @@ generate_memcpy_builtin (struct loop *loop, partition *partition)
fn = build_fold_addr_expr (builtin_decl_implicit (kind)); fn = build_fold_addr_expr (builtin_decl_implicit (kind));
fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes); fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING); gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
fold_stmt (&gsi);
if (dump_file && (dump_flags & TDF_DETAILS)) if (dump_file && (dump_flags & TDF_DETAILS))
{ {
...@@ -2769,7 +2772,8 @@ finalize_partitions (struct loop *loop, vec<struct partition *> *partitions, ...@@ -2769,7 +2772,8 @@ finalize_partitions (struct loop *loop, vec<struct partition *> *partitions,
static int static int
distribute_loop (struct loop *loop, vec<gimple *> stmts, distribute_loop (struct loop *loop, vec<gimple *> stmts,
control_dependences *cd, int *nb_calls, bool *destroy_p) control_dependences *cd, int *nb_calls, bool *destroy_p,
bool only_patterns_p)
{ {
ddrs_table = new hash_table<ddr_hasher> (389); ddrs_table = new hash_table<ddr_hasher> (389);
struct graph *rdg; struct graph *rdg;
...@@ -2843,7 +2847,7 @@ distribute_loop (struct loop *loop, vec<gimple *> stmts, ...@@ -2843,7 +2847,7 @@ distribute_loop (struct loop *loop, vec<gimple *> stmts,
/* If we are only distributing patterns but did not detect any, /* If we are only distributing patterns but did not detect any,
simply bail out. */ simply bail out. */
if (!flag_tree_loop_distribution if (only_patterns_p
&& !any_builtin) && !any_builtin)
{ {
nbp = 0; nbp = 0;
...@@ -2855,7 +2859,7 @@ distribute_loop (struct loop *loop, vec<gimple *> stmts, ...@@ -2855,7 +2859,7 @@ distribute_loop (struct loop *loop, vec<gimple *> stmts,
a loop into pieces, separated by builtin calls. That is, we a loop into pieces, separated by builtin calls. That is, we
only want no or a single loop body remaining. */ only want no or a single loop body remaining. */
struct partition *into; struct partition *into;
if (!flag_tree_loop_distribution) if (only_patterns_p)
{ {
for (i = 0; partitions.iterate (i, &into); ++i) for (i = 0; partitions.iterate (i, &into); ++i)
if (!partition_builtin_p (into)) if (!partition_builtin_p (into))
...@@ -3085,7 +3089,6 @@ prepare_perfect_loop_nest (struct loop *loop) ...@@ -3085,7 +3089,6 @@ prepare_perfect_loop_nest (struct loop *loop)
&& loop_outer (outer) && loop_outer (outer)
&& outer->inner == loop && loop->next == NULL && outer->inner == loop && loop->next == NULL
&& single_exit (outer) && single_exit (outer)
&& optimize_loop_for_speed_p (outer)
&& !chrec_contains_symbols_defined_in_loop (niters, outer->num) && !chrec_contains_symbols_defined_in_loop (niters, outer->num)
&& (niters = number_of_latch_executions (outer)) != NULL_TREE && (niters = number_of_latch_executions (outer)) != NULL_TREE
&& niters != chrec_dont_know) && niters != chrec_dont_know)
...@@ -3139,9 +3142,11 @@ pass_loop_distribution::execute (function *fun) ...@@ -3139,9 +3142,11 @@ pass_loop_distribution::execute (function *fun)
walking to innermost loops. */ walking to innermost loops. */
FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST) FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
{ {
/* Don't distribute multiple exit edges loop, or cold loop. */ /* Don't distribute multiple exit edges loop, or cold loop when
not doing pattern detection. */
if (!single_exit (loop) if (!single_exit (loop)
|| !optimize_loop_for_speed_p (loop)) || (!flag_tree_loop_distribute_patterns
&& !optimize_loop_for_speed_p (loop)))
continue; continue;
/* Don't distribute loop if niters is unknown. */ /* Don't distribute loop if niters is unknown. */
...@@ -3169,9 +3174,10 @@ pass_loop_distribution::execute (function *fun) ...@@ -3169,9 +3174,10 @@ pass_loop_distribution::execute (function *fun)
bool destroy_p; bool destroy_p;
int nb_generated_loops, nb_generated_calls; int nb_generated_loops, nb_generated_calls;
nb_generated_loops = distribute_loop (loop, work_list, cd, nb_generated_loops
&nb_generated_calls, = distribute_loop (loop, work_list, cd, &nb_generated_calls,
&destroy_p); &destroy_p, (!optimize_loop_for_speed_p (loop)
|| !flag_tree_loop_distribution));
if (destroy_p) if (destroy_p)
loops_to_be_destroyed.safe_push (loop); loops_to_be_destroyed.safe_push (loop);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment