Commit 998511a6 by Thomas Koenig

re PR fortran/29550 (Optimize -fexternal-blas calls for conjg())

2018-09-18  Thomas Koenig  <tkoenig@gcc.gnu.org>

	PR fortran/29550
	* gfortran.h (gfc_expr): Add external_blas flag.
	* frontend-passes.c (matrix_case): Add case A2TB2T.
	(optimize_namespace): Handle flag_external_blas by
	calling call_external_blas.
	(get_array_inq_function): Add argument okind. If
	it is nonzero, use it as the kind of argument
	to be used.
	(inline_limit_check): Remove m_case argument, add
	limit argument instead.  Remove assert about m_case.
	Set the limit for inlining from the limit argument.
	(matmul_lhs_realloc): Handle case A2TB2T.
	(inline_matmul_assign): Handle inline limit for other cases with
	two rank-two matrices.  Remove no-op calls to inline_limit_check.
	(call_external_blas): New function.
	* trans-intrinsic.c (gfc_conv_intrinsic_funcall): Do not add
	argument to external BLAS if external_blas is already set.

2018-09-18  Thomas Koenig  <tkoenig@gcc.gnu.org>

	PR fortran/29550
	* gfortran.dg/inline_matmul_13.f90: Adjust count for
	_gfortran_matmul.
	* gfortran.dg/inline_matmul_16.f90: Likewise.
	* gfortran.dg/promotion_2.f90: Add -fblas-matmul-limit=1.  Scan
	for dgemm instead of dgemm_.  Add call to random_number to make
	standard conforming.
	* gfortran.dg/matmul_blas_1.f90: New test.
	* gfortran.dg/matmul_bounds_14.f: New test.
	* gfortran.dg/matmul_bounds_15.f: New test.
	* gfortran.dg/matmul_bounds_16.f: New test.
	* gfortran.dg/blas_gemm_routines.f: New test / additional file for
	preceding tests.

From-SVN: r264412
parent 5c470e0f
......@@ -2143,6 +2143,11 @@ typedef struct gfc_expr
unsigned int no_bounds_check : 1;
/* Set this if a matmul expression has already been evaluated for conversion
to a BLAS call. */
unsigned int external_blas : 1;
/* If an expression comes from a Hollerith constant or compile-time
evaluation of a transfer statement, it may have a prescribed target-
memory representation, and these cannot always be backformed from
......
......@@ -4055,6 +4055,7 @@ gfc_conv_intrinsic_funcall (gfc_se * se, gfc_expr * expr)
to be able to call the BLAS ?gemm functions if required and possible. */
append_args = NULL;
if (expr->value.function.isym->id == GFC_ISYM_MATMUL
&& !expr->external_blas
&& sym->ts.type != BT_LOGICAL)
{
tree cint = gfc_get_int_type (gfc_c_int_kind);
......
......@@ -44,4 +44,4 @@ program main
deallocate(calloc)
end program main
! { dg-final { scan-tree-dump-times "_gfortran_matmul" 0 "original" } }
! { dg-final { scan-tree-dump-times "_gfortran_matmul" 1 "original" } }
......@@ -58,4 +58,4 @@ program main
end do
end do
end program main
! { dg-final { scan-tree-dump-times "_gfortran_matmul" 0 "optimized" } }
! { dg-final { scan-tree-dump-times "_gfortran_matmul" 1 "optimized" } }
C { dg-do run }
C { dg-options "-fcheck=bounds -fdump-tree-optimized -fblas-matmul-limit=1 -O -fexternal-blas" }
C { dg-additional-sources blas_gemm_routines.f }
C Test calling of BLAS routines
program main
call sub_s
call sub_d
call sub_c
call sub_z
end
subroutine sub_d
implicit none
real(8), dimension(3,2) :: a
real(8), dimension(2,3) :: at
real(8), dimension(2,4) :: b
real(8), dimension(4,2) :: bt
real(8), dimension(3,4) :: c
real(8), dimension(3,4) :: cres
real(8), dimension(:,:), allocatable :: c_alloc
data a / 2., -3., 5., -7., 11., -13./
data b /17., -23., 29., -31., 37., -39., 41., -47./
data cres /195., -304., 384., 275., -428., 548., 347., -540.,
& 692., 411., -640., 816./
c = matmul(a,b)
if (any (c /= cres)) stop 31
at = transpose(a)
c = (1.2,-2.2)
c = matmul(transpose(at), b)
if (any (c /= cres)) stop 32
bt = transpose(b)
c = (1.2,-2.1)
c = matmul(a, transpose(bt))
if (any (c /= cres)) stop 33
c_alloc = matmul(a,b)
if (any (c /= cres)) stop 34
at = transpose(a)
deallocate (c_alloc)
c = matmul(transpose(at), b)
if (any (c /= cres)) stop 35
bt = transpose(b)
allocate (c_alloc(20,20))
c = (1.2,-2.1)
c = matmul(a, transpose(bt))
if (any (c /= cres)) stop 36
end
subroutine sub_s
implicit none
real, dimension(3,2) :: a
real, dimension(2,3) :: at
real, dimension(2,4) :: b
real, dimension(4,2) :: bt
real, dimension(3,4) :: c
real, dimension(3,4) :: cres
real, dimension(:,:), allocatable :: c_alloc
data a / 2., -3., 5., -7., 11., -13./
data b /17., -23., 29., -31., 37., -39., 41., -47./
data cres /195., -304., 384., 275., -428., 548., 347., -540.,
& 692., 411., -640., 816./
c = matmul(a,b)
if (any (c /= cres)) stop 21
at = transpose(a)
c = (1.2,-2.2)
c = matmul(transpose(at), b)
if (any (c /= cres)) stop 22
bt = transpose(b)
c = (1.2,-2.1)
c = matmul(a, transpose(bt))
if (any (c /= cres)) stop 23
c_alloc = matmul(a,b)
if (any (c /= cres)) stop 24
at = transpose(a)
deallocate (c_alloc)
c = matmul(transpose(at), b)
if (any (c /= cres)) stop 25
bt = transpose(b)
allocate (c_alloc(20,20))
c = (1.2,-2.1)
c = matmul(a, transpose(bt))
if (any (c /= cres)) stop 26
end
subroutine sub_c
implicit none
complex, dimension(3,2) :: a
complex, dimension(2,3) :: at, ah
complex, dimension(2,4) :: b
complex, dimension(4,2) :: bt, bh
complex, dimension(3,4) :: c
complex, dimension(3,4) :: cres
complex, dimension(:,:), allocatable :: c_alloc
data a / (2.,-3.), (-5.,7.), (11.,-13.), (17.,19), (-23., -29),
& (-31., 37.)/
data b / (-41., 43.), (-47., 53.), (-59.,-61.), (-67., 71),
& ( 73.,79. ), (83.,-89.), (97.,-101.), (-107.,-109.)/
data cres /(-1759.,217.), (2522.,-358.), (-396.,-2376.),
& (-2789.,-11.),
& (4322.,202.), (-1992.,-4584.), (3485.,3.), (-5408.,-244.),
& (2550.,5750.), (143.,-4379.), (-478.,6794.), (7104.,-2952.) /
c = matmul(a,b)
if (any (c /= cres)) stop 1
at = transpose(a)
c = (1.2,-2.2)
c = matmul(transpose(at), b)
if (any (c /= cres)) stop 2
bt = transpose(b)
c = (1.2,-2.1)
c = matmul(a, transpose(bt))
if (any (c /= cres)) stop 3
ah = transpose(conjg(a))
c = (1.2,-2.2)
c = matmul(conjg(transpose(ah)), b)
if (any (c /= cres)) stop 4
bh = transpose(conjg(b))
c = (1.2,-2.2)
c = matmul(a, transpose(conjg(bh)))
if (any (c /= cres)) stop 5
c_alloc = matmul(a,b)
if (any (c /= cres)) stop 6
at = transpose(a)
deallocate (c_alloc)
c = matmul(transpose(at), b)
if (any (c /= cres)) stop 7
bt = transpose(b)
allocate (c_alloc(20,20))
c = (1.2,-2.1)
c = matmul(a, transpose(bt))
if (any (c /= cres)) stop 8
ah = transpose(conjg(a))
c = (1.2,-2.2)
c = matmul(conjg(transpose(ah)), b)
if (any (c /= cres)) stop 9
deallocate (c_alloc)
allocate (c_alloc(0,0))
bh = transpose(conjg(b))
c = (1.2,-2.2)
c = matmul(a, transpose(conjg(bh)))
if (any (c /= cres)) stop 10
end
subroutine sub_z
implicit none
complex(8), dimension(3,2) :: a
complex(8), dimension(2,3) :: at, ah
complex(8), dimension(2,4) :: b
complex(8), dimension(4,2) :: bt, bh
complex(8), dimension(3,4) :: c
complex(8), dimension(3,4) :: cres
complex(8), dimension(:,:), allocatable :: c_alloc
data a / (2.,-3.), (-5._8,7.), (11.,-13.), (17.,19),
& (-23., -29), (-31., 37.)/
data b / (-41., 43.), (-47., 53.), (-59.,-61.), (-67., 71),
& ( 73.,79. ), (83.,-89.), (97.,-101.), (-107.,-109.)/
data cres /(-1759.,217.), (2522.,-358.), (-396.,-2376.),
& (-2789.,-11.),
& (4322.,202.), (-1992.,-4584.), (3485.,3.), (-5408.,-244.),
& (2550.,5750.), (143.,-4379.), (-478.,6794.), (7104.,-2952.) /
c = matmul(a,b)
if (any (c /= cres)) stop 11
at = transpose(a)
c = (1.2,-2.2)
c = matmul(transpose(at), b)
if (any (c /= cres)) stop 12
bt = transpose(b)
c = (1.2,-2.1)
c = matmul(a, transpose(bt))
if (any (c /= cres)) stop 13
ah = transpose(conjg(a))
c = (1.2,-2.2)
c = matmul(conjg(transpose(ah)), b)
if (any (c /= cres)) stop 14
bh = transpose(conjg(b))
c = (1.2,-2.2)
c = matmul(a, transpose(conjg(bh)))
if (any (c /= cres)) stop 15
c_alloc = matmul(a,b)
if (any (c /= cres)) stop 16
at = transpose(a)
deallocate (c_alloc)
c = matmul(transpose(at), b)
if (any (c /= cres)) stop 17
bt = transpose(b)
allocate (c_alloc(20,20))
c = (1.2,-2.1)
c = matmul(a, transpose(bt))
if (any (c /= cres)) stop 18
ah = transpose(conjg(a))
c = (1.2,-2.2)
c = matmul(conjg(transpose(ah)), b)
if (any (c /= cres)) stop 19
deallocate (c_alloc)
allocate (c_alloc(0,0))
bh = transpose(conjg(b))
c = (1.2,-2.2)
c = matmul(a, transpose(conjg(bh)))
if (any (c /= cres)) stop 20
end
! { dg-final { scan-tree-dump-times "_gfortran_matmul" 0 "optimized" } }
C { dg-do run }
C { dg-options "-fno-realloc-lhs -fdump-tree-optimized -fcheck=bounds -fblas-matmul-limit=1 -O -fexternal-blas" }
C { dg-shouldfail "Fortran runtime error: Array bound mismatch for dimension 2 of array." }
C { dg-additional-sources blas_gemm_routines.f }
program main
real, dimension(3,2) :: a
real, dimension(2,3) :: b
real, dimension(:,:), allocatable :: ret
a = 1.0
b = 2.3
allocate(ret(3,2))
ret = matmul(a,b) ! This should throw an error.
end
! { dg-output "Fortran runtime error: Array bound mismatch for dimension 2 of array.*" }
! { dg-final { scan-tree-dump-times "_gfortran_matmul" 0 "optimized" } }
C { dg-do run }
C { dg-options "-fdump-tree-optimized -fcheck=bounds -fblas-matmul-limit=1 -O -fexternal-blas" }
C { dg-shouldfail "Fortran runtime error: Incorrect extent in argument B in MATMUL intrinsic in dimension 1.*" }
C { dg-additional-sources blas_gemm_routines.f }
program main
character(len=20) :: line
integer :: n, m
real, dimension(3,2) :: a
real, dimension(:,:), allocatable :: b
real, dimension(:,:), allocatable :: ret
a = 1.0
line = '3 3'
read (unit=line,fmt=*) n, m
allocate (b(n,m))
b = 2.3
ret = matmul(a,b) ! This should throw an error.
end
! { dg-output "Fortran runtime error: Incorrect extent in argument B in MATMUL intrinsic in dimension 1.*" }
! { dg-final { scan-tree-dump-times "_gfortran_matmul" 0 "optimized" } }
C { dg-do run }
C { dg-options "-fdump-tree-optimized -fcheck=bounds -fblas-matmul-limit=1 -O -fexternal-blas" }
C { dg-shouldfail "Fortran runtime error: Incorrect extent in argument B in MATMUL intrinsic in dimension 1" }
C { dg-additional-sources blas_gemm_routines.f }
program main
character(len=20) :: line
integer :: n, m
real, dimension(3,2) :: a
real, dimension(:,:), allocatable :: b
real, dimension(:,:), allocatable :: ret
a = 1.0
line = '4 3'
read (unit=line,fmt=*) n, m
allocate (b(n,m))
b = 2.3
ret = matmul(transpose(a),b) ! This should throw an error.
end
! { dg-output "Fortran runtime error: Incorrect extent in argument B in MATMUL intrinsic in dimension 1.*" }
! { dg-final { scan-tree-dump-times "_gfortran_matmul" 0 "optimized" } }
! { dg-do compile }
! { dg-options "-fdefault-real-8 -fexternal-blas -fdump-tree-original -finline-matmul-limit=0" }
! { dg-options "-fdefault-real-8 -fexternal-blas -fblas-matmul-limit=1 -fdump-tree-original -finline-matmul-limit=0" }
!
! PR fortran/54463
!
......@@ -8,8 +8,9 @@
program test
implicit none
real, dimension(3,3) :: A
call random_number(a)
A = matmul(A,A)
end program test
! { dg-final { scan-tree-dump-times "sgemm_" 0 "original" } }
! { dg-final { scan-tree-dump-times "dgemm_" 1 "original" } }
! { dg-final { scan-tree-dump-times "sgemm" 0 "original" } }
! { dg-final { scan-tree-dump-times "dgemm" 1 "original" } }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment