Commit c7b608a9 by Thomas Koenig

re PR libfortran/51119 (MATMUL slow for large matrices)

2017-02-26  Thomas Koenig  <tkoenig@gcc.gnu.org>

	PR fortran/51119
	* options.c (gfc_post_options): Set default limit for matmul
	inlining to 30.
	* invoke.texi: Document change.

2017-02-26  Thomas Koenig  <tkoenig@gcc.gnu.org>

	PR fortran/51119
	* gfortran.dg/inline_matmul_1.f90: Scan optimized dump instead
	of original.
	* gfortran.dg/inline_matmul_11.f90: Likewise.
	* gfortran.dg/inline_matmul_9.f90: Likewise.
	* gfortran.dg/matmul_13.f90: New test.
	* gfortran.dg/matmul_14.f90: New test.

From-SVN: r245745
parent 462a7b56
2017-02-26 Thomas Koenig <tkoenig@gcc.gnu.org>
PR fortran/51119
* options.c (gfc_post_options): Set default limit for matmul
inlining to 30.
* invoke.texi: Document change.
2017-02-25 Dominique d'Humieres <dominiq@lps.ens.fr> 2017-02-25 Dominique d'Humieres <dominiq@lps.ens.fr>
PR fortran/79601 PR fortran/79601
......
...@@ -1629,9 +1629,8 @@ for matrices with size up to @var{n}. If the matrices involved are not ...@@ -1629,9 +1629,8 @@ for matrices with size up to @var{n}. If the matrices involved are not
square, the size comparison is performed using the geometric mean of square, the size comparison is performed using the geometric mean of
the dimensions of the argument and result matrices. the dimensions of the argument and result matrices.
The default value for @var{n} is the value specified for The default value for @var{n} is 30. The @code{-fblas-matmul-limit}
@code{-fblas-matmul-limit} if this option is specified, or unlimitited can be used to change this value.
otherwise.
@item -frecursive @item -frecursive
@opindex @code{frecursive} @opindex @code{frecursive}
......
...@@ -388,10 +388,16 @@ gfc_post_options (const char **pfilename) ...@@ -388,10 +388,16 @@ gfc_post_options (const char **pfilename)
if (!flag_automatic) if (!flag_automatic)
flag_max_stack_var_size = 0; flag_max_stack_var_size = 0;
/* If we call BLAS directly, only inline up to the BLAS limit. */ /* If the user did not specify an inline matmul limit, inline up to the BLAS
limit or up to 30 if no external BLAS is specified. */
if (flag_external_blas && flag_inline_matmul_limit < 0) if (flag_inline_matmul_limit < 0)
flag_inline_matmul_limit = flag_blas_matmul_limit; {
if (flag_external_blas)
flag_inline_matmul_limit = flag_blas_matmul_limit;
else
flag_inline_matmul_limit = 30;
}
/* Optimization implies front end optimization, unless the user /* Optimization implies front end optimization, unless the user
specified it directly. */ specified it directly. */
......
2017-02-26 Thomas Koenig <tkoenig@gcc.gnu.org>
PR fortran/51119
* gfortran.dg/inline_matmul_1.f90: Scan optimized dump instead
of original.
* gfortran.dg/inline_matmul_11.f90: Likewise.
* gfortran.dg/inline_matmul_9.f90: Likewise.
* gfortran.dg/matmul_13.f90: New test.
* gfortran.dg/matmul_14.f90: New test.
2017-02-25 Jakub Jelinek <jakub@redhat.com> 2017-02-25 Jakub Jelinek <jakub@redhat.com>
PR middle-end/79396 PR middle-end/79396
......
! { dg-do run } ! { dg-do run }
! { dg-options "-ffrontend-optimize -fdump-tree-original -Wrealloc-lhs" } ! { dg-options "-ffrontend-optimize -fdump-tree-optimized -Wrealloc-lhs" }
! PR 37131 - check basic functionality of inlined matmul, making ! PR 37131 - check basic functionality of inlined matmul, making
! sure that the library is not called, with and without reallocation. ! sure that the library is not called, with and without reallocation.
...@@ -149,4 +149,4 @@ program main ...@@ -149,4 +149,4 @@ program main
end program main end program main
! { dg-final { scan-tree-dump-times "_gfortran_matmul" 0 "original" } } ! { dg-final { scan-tree-dump-times "_gfortran_matmul" 0 "optimized" } }
! { dg-do run } ! { dg-do run }
! { dg-additional-options "-ffrontend-optimize -fdump-tree-original" } ! { dg-additional-options "-ffrontend-optimize -fdump-tree-optimized" }
! PR fortran/66176 - inline conjg for matml. ! PR fortran/66176 - inline conjg for matml.
program main program main
complex, dimension(3,2) :: a complex, dimension(3,2) :: a
...@@ -29,4 +29,4 @@ program main ...@@ -29,4 +29,4 @@ program main
c = matmul(conjg(a), b) c = matmul(conjg(a), b)
if (any(conjg(c) /= res2)) call abort if (any(conjg(c) /= res2)) call abort
end program main end program main
! { dg-final { scan-tree-dump-times "_gfortran_matmul" 0 "original" } } ! { dg-final { scan-tree-dump-times "_gfortran_matmul" 0 "optimized" } }
! { dg-do run } ! { dg-do run }
! { dg-options "-ffrontend-optimize -fdump-tree-original" } ! { dg-options "-ffrontend-optimize -fdump-tree-optimized" }
! PR 66041 - this used to ICE with an incomplete fix for the PR. ! PR 66041 - this used to ICE with an incomplete fix for the PR.
program main program main
implicit none implicit none
...@@ -21,4 +21,4 @@ program main ...@@ -21,4 +21,4 @@ program main
if (any (c2-reshape([248., -749.],shape(c2)) /= 0.)) call abort if (any (c2-reshape([248., -749.],shape(c2)) /= 0.)) call abort
end program main end program main
! { dg-final { scan-tree-dump-times "_gfortran_matmul" 0 "original" } } ! { dg-final { scan-tree-dump-times "_gfortran_matmul" 0 "optimized" } }
! { dg-do compile }
! { dg-options "-O3 -fdump-tree-optimized" }
! Check that the default limit of 30 for inlining matmul applies.
program main
integer, parameter :: n = 31
real, dimension(n,n) :: a, b, c
call random_number(a)
call random_number(b)
c = matmul(a,b)
print *,sum(c)
end program main
! { dg-final { scan-tree-dump-times "_gfortran_matmul_r4" 1 "optimized" } }
! { dg-do compile }
! { dg-options "-O3 -fdump-tree-optimized" }
! Check that the default limit of 30 for inlining matmul applies.
program main
integer, parameter :: n = 30
real, dimension(n,n) :: a, b, c
call random_number(a)
call random_number(b)
c = matmul(a,b)
print *,sum(c)
end program main
! { dg-final { scan-tree-dump-times "_gfortran_matmul_r4" 0 "optimized" } }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment