Commit f1abbf69 by Thomas Koenig

re PR fortran/37131 (inline matmul for small matrix sizes)

2015-05-06  Thomas Koenig  <tkoenig@gcc.gnu.org>

	PR fortran/37131
	* gfortran.h (gfc_isym_id):  Add GFC_ISYM_FE_RUNTIME_ERROR.
	(gfc_intrinsic_sym):  Add vararg.
	* intrinsic.h (gfc_check_fe_runtime_error):  Add prototype.
	(gfc_resolve_re_runtime_error):  Likewise.
	Add prototype for gfc_is_reallocatable_lhs.
	* trans-array.h (gfc_is_reallocatable_lhs):  Remove prototype.
	* check.c (gfc_check_fe_runtime_error):  New function.
	* intrinsic.c (add_sym_1p):  New function.
	(make_vararg):  New function.
	(add_subroutines):  Add fe_runtime_error.
	(gfc_intrinsic_sub_interface): Skip sorting for variable number
	of arguments.
	* iresolve.c (gfc_resolve_fe_runtime_error):  New function.
	* lang.opt (inline-matmul-limit):  New option.
	(gfc_post_options): If no inline matmul limit has been set and
	BLAS is called externally, use the BLAS limit.
	* frontend-passes.c:  Include intrinsic.h.
	(var_num):  New global counter for naming temporary variablbles.
	(matrix_case):  Enum for differentiating the different matmul
	cases.
	(realloc_string_callback):  Add "trim" to the variable name.
	(create_var): Add optional argument vname as part of the name.
	Use var_num. Set dimension of result correctly. Split off block
	creation into
	(insert_block): New function.
	(cfe_expr_0): Use "fcn" as part of temporary variable name.
	(optimize_namesapce): Also set gfc_current_ns. Call
	inline_matmul_assign.
	(combine_array_constructor):  Use "constr" as part of
	temporary name.
	(get_array_inq_function):  New function.
	(build_logical_expr):  New function.
	(get_operand):  new function.
	(inline_limit_check):  New function.
	(runtime_error_ne):  New function.
	(matmul_lhs_realloc):  New function.
	(is_functino_or_op):  New function.
	(has_function_or_op):  New function.
	(freeze_expr):  New function.
	(freeze_references):  New function.
	(convert_to_index_kind):  New function.
	(create_do_loop):  New function.
	(get_size_m1):  New function.
	(scalarized_expr):  New function.
	(inline_matmul_assign):  New function.
	* simplify.c (simplify_bound):  Simplify the case of the
	lower bound of an assumed-shape argument.

2015-05-06  Thomas Koenig  <tkoenig@gcc.gnu.org>

	PR fortran/37131
	* gfortran.dg/dependency_26.f90: Add option to suppress inlining
	matmul.
	* gfortran.dg/function_optimize_1.f90:  Likewise.
	* gfortran.dg/function_optimize_2.f90:  Likewise.
	* gfortran.dg/function_optimize_5.f90:  Likewise.
	* gfortran.dg/function_optimize_7.f90:  Likewise.
	* gfortran.dg/inline_matmul_1.f90:  New test.
	* gfortran.dg/inline_matmul_2.f90:  New test.
	* gfortran.dg/inline_matmul_3.f90:  New test.
	* gfortran.dg/inline_matmul_4.f90:  New test.
	* gfortran.dg/inline_matmul_5.f90:  New test.
	* gfortran.dg/inline_matmul_6.f90:  New test.

From-SVN: r222864
parent 5631585a
......@@ -5527,6 +5527,36 @@ gfc_check_random_seed (gfc_expr *size, gfc_expr *put, gfc_expr *get)
return true;
}
bool
gfc_check_fe_runtime_error (gfc_actual_arglist *a)
{
gfc_expr *e;
int len, i;
int num_percent, nargs;
e = a->expr;
if (e->expr_type != EXPR_CONSTANT)
return true;
len = e->value.character.length;
if (e->value.character.string[len-1] != '\0')
gfc_internal_error ("fe_runtime_error string must be null terminated");
num_percent = 0;
for (i=0; i<len-1; i++)
if (e->value.character.string[i] == '%')
num_percent ++;
nargs = 0;
for (; a; a = a->next)
nargs ++;
if (nargs -1 != num_percent)
gfc_internal_error ("fe_runtime_error: Wrong number of arguments (%d instead of %d)",
nargs, num_percent++);
return true;
}
bool
gfc_check_second_sub (gfc_expr *time)
......
......@@ -419,6 +419,7 @@ enum gfc_isym_id
GFC_ISYM_EXPONENT,
GFC_ISYM_EXTENDS_TYPE_OF,
GFC_ISYM_FDATE,
GFC_ISYM_FE_RUNTIME_ERROR,
GFC_ISYM_FGET,
GFC_ISYM_FGETC,
GFC_ISYM_FLOOR,
......@@ -1001,7 +1002,6 @@ typedef struct
bool cp_was_assumed; /* AS_ASSUMED_SIZE cp arrays are converted to
AS_EXPLICIT, but we want to remember that we
did this. */
}
gfc_array_spec;
......@@ -1907,7 +1907,7 @@ typedef struct gfc_intrinsic_sym
gfc_typespec ts;
unsigned elemental:1, inquiry:1, transformational:1, pure:1,
generic:1, specific:1, actual_ok:1, noreturn:1, conversion:1,
from_module:1;
from_module:1, vararg:1;
int standard;
......@@ -3231,4 +3231,8 @@ int gfc_code_walker (gfc_code **, walk_code_fn_t, walk_expr_fn_t, void *);
void gfc_convert_mpz_to_signed (mpz_t, int);
/* trans-array.c */
bool gfc_is_reallocatable_lhs (gfc_expr *);
#endif /* GCC_GFORTRAN_H */
......@@ -520,6 +520,29 @@ add_sym_1s (const char *name, gfc_isym_id id, enum klass cl, bt type, int kind,
(void *) 0);
}
/* Add a symbol to the subroutine ilst where the subroutine takes one
printf-style character argument and a variable number of arguments
to follow. */
static void
add_sym_1p (const char *name, gfc_isym_id id, enum klass cl, bt type, int kind,
int standard, bool (*check) (gfc_actual_arglist *),
gfc_expr *(*simplify) (gfc_expr*), void (*resolve) (gfc_code *),
const char *a1, bt type1, int kind1, int optional1, sym_intent intent1)
{
gfc_check_f cf;
gfc_simplify_f sf;
gfc_resolve_f rf;
cf.f1m = check;
sf.f1 = simplify;
rf.s1 = resolve;
add_sym (name, id, cl, ACTUAL_NO, type, kind, standard, cf, sf, rf,
a1, type1, kind1, optional1, intent1,
(void *) 0);
}
/* Add a symbol from the MAX/MIN family of intrinsic functions to the
function. MAX et al take 2 or more arguments. */
......@@ -1159,6 +1182,17 @@ make_from_module (void)
next_sym[-1].from_module = 1;
}
/* Mark the current subroutine as having a variable number of
arguments. */
static void
make_vararg (void)
{
if (sizing == SZ_NOTHING)
next_sym[-1].vararg = 1;
}
/* Set the attr.value of the current procedure. */
static void
......@@ -3292,6 +3326,17 @@ add_subroutines (void)
"fptr", BT_UNKNOWN, 0, REQUIRED, INTENT_OUT);
make_from_module();
/* Internal subroutine for emitting a runtime error. */
add_sym_1p ("fe_runtime_error", GFC_ISYM_FE_RUNTIME_ERROR, CLASS_IMPURE,
BT_UNKNOWN, 0, GFC_STD_GNU,
gfc_check_fe_runtime_error, NULL, gfc_resolve_fe_runtime_error,
"msg", BT_CHARACTER, dc, REQUIRED, INTENT_IN);
make_noreturn ();
make_vararg ();
make_from_module ();
/* Coarray collectives. */
add_sym_4s ("co_broadcast", GFC_ISYM_CO_BROADCAST, CLASS_IMPURE,
BT_UNKNOWN, 0, GFC_STD_F2008_TS,
......@@ -4501,7 +4546,7 @@ gfc_intrinsic_sub_interface (gfc_code *c, int error_flag)
init_arglist (isym);
if (!sort_actual (name, &c->ext.actual, isym->formal, &c->loc))
if (!isym->vararg && !sort_actual (name, &c->ext.actual, isym->formal, &c->loc))
goto fail;
if (!do_ts29113_check (isym, c->ext.actual))
......
......@@ -190,6 +190,7 @@ bool gfc_check_system_clock (gfc_expr *, gfc_expr *, gfc_expr *);
bool gfc_check_date_and_time (gfc_expr *, gfc_expr *, gfc_expr *, gfc_expr *);
bool gfc_check_exit (gfc_expr *);
bool gfc_check_fdate_sub (gfc_expr *);
bool gfc_check_fe_runtime_error (gfc_actual_arglist *);
bool gfc_check_flush (gfc_expr *);
bool gfc_check_free (gfc_expr *);
bool gfc_check_fstat_sub (gfc_expr *, gfc_expr *, gfc_expr *);
......@@ -602,6 +603,7 @@ void gfc_resolve_ctime_sub (gfc_code *);
void gfc_resolve_execute_command_line (gfc_code *);
void gfc_resolve_exit (gfc_code *);
void gfc_resolve_fdate_sub (gfc_code *);
void gfc_resolve_fe_runtime_error (gfc_code *);
void gfc_resolve_flush (gfc_code *);
void gfc_resolve_free (gfc_code *);
void gfc_resolve_fseek_sub (gfc_code *);
......
......@@ -178,6 +178,7 @@ and warnings}.
-finit-character=@var{n} -finit-integer=@var{n} -finit-local-zero @gol
-finit-logical=@var{<true|false>}
-finit-real=@var{<zero|inf|-inf|nan|snan>} @gol
-finline-matmul-limit=@var{n} @gol
-fmax-array-constructor=@var{n} -fmax-stack-var-size=@var{n}
-fno-align-commons @gol
-fno-automatic -fno-protect-parens -fno-underscoring @gol
......@@ -1537,6 +1538,22 @@ geometric mean of the dimensions of the argument and result matrices.
The default value for @var{n} is 30.
@item -finline-matmul-limit=@var{n}
@opindex @code{finline-matmul-limit}
When front-end optimiztion is active, some calls to the @code{MATMUL}
intrinsic function will be inlined. This may result in code size
increase if the size of the matrix cannot be determined at compile
time, as code for both cases is generated. Setting
@code{-finline-matmul-limit=0} will disable inlining in all cases.
Setting this option with a value of @var{n} will produce inline code
for matrices with size up to @var{n}. If the matrices involved are not
square, the size comparison is performed using the geometric mean of
the dimensions of the argument and result matrices.
The default value for @var{n} is the value specified for
@code{-fblas-matmul-limit} if this option is specified, or unlimitited
otherwise.
@item -frecursive
@opindex @code{frecursive}
Allow indirect recursion by forcing all local arrays to be allocated
......@@ -1632,11 +1649,12 @@ if @option{-ffrontend-optimize} is in effect.
@cindex Front-end optimization
This option performs front-end optimization, based on manipulating
parts the Fortran parse tree. Enabled by default by any @option{-O}
option. Optimizations enabled by this option include elimination of
identical function calls within expressions, removing unnecessary
calls to @code{TRIM} in comparisons and assignments and replacing
@code{TRIM(a)} with @code{a(1:LEN_TRIM(a))}.
It can be deselected by specifying @option{-fno-frontend-optimize}.
option. Optimizations enabled by this option include inlining calls
to @code{MATMUL}, elimination of identical function calls within
expressions, removing unnecessary calls to @code{TRIM} in comparisons
and assignments and replacing @code{TRIM(a)} with
@code{a(1:LEN_TRIM(a))}. It can be deselected by specifying
@option{-fno-frontend-optimize}.
@end table
@xref{Code Gen Options,,Options for Code Generation Conventions,
......
......@@ -2197,6 +2197,19 @@ gfc_resolve_rrspacing (gfc_expr *f, gfc_expr *x)
f->value.function.name = gfc_get_string ("__rrspacing_%d", x->ts.kind);
}
void
gfc_resolve_fe_runtime_error (gfc_code *c)
{
const char *name;
gfc_actual_arglist *a;
name = gfc_get_string (PREFIX ("runtime_error"));
for (a = c->ext.actual->next; a; a = a->next)
a->name = "%VAL";
c->resolved_sym = gfc_get_intrinsic_sub_symbol (name);
}
void
gfc_resolve_scale (gfc_expr *f, gfc_expr *x, gfc_expr *i ATTRIBUTE_UNUSED)
......
......@@ -542,6 +542,10 @@ Enum(gfc_init_local_real) String(inf) Value(GFC_INIT_REAL_INF)
EnumValue
Enum(gfc_init_local_real) String(-inf) Value(GFC_INIT_REAL_NEG_INF)
finline-matmul-limit=
Fortran RejectNegative Joined UInteger Var(flag_inline_matmul_limit) Init(-1)
-finline-matmul-limit=<n> Specify the size of the largest matrix for which matmul will be inlined
fmax-array-constructor=
Fortran RejectNegative Joined UInteger Var(flag_max_array_constructor) Init(65535)
-fmax-array-constructor=<n> Maximum number of objects in an array constructor
......
......@@ -378,6 +378,11 @@ gfc_post_options (const char **pfilename)
if (!flag_automatic)
flag_max_stack_var_size = 0;
/* If we call BLAS directly, only inline up to the BLAS limit. */
if (flag_external_blas && flag_inline_matmul_limit < 0)
flag_inline_matmul_limit = flag_blas_matmul_limit;
/* Optimization implies front end optimization, unless the user
specified it directly. */
......
......@@ -64,8 +64,6 @@ tree gfc_copy_only_alloc_comp (gfc_symbol *, tree, tree, int);
tree gfc_alloc_allocatable_for_assignment (gfc_loopinfo*, gfc_expr*, gfc_expr*);
bool gfc_is_reallocatable_lhs (gfc_expr *);
/* Add initialization for deferred arrays. */
void gfc_trans_deferred_array (gfc_symbol *, gfc_wrapped_block *);
/* Generate an initializer for a static pointer or allocatable array. */
......
! { dg-do compile }
! { dg-options "-fdump-tree-original" }
! { dg-options "-finline-matmul-limit=0 -fdump-tree-original" }
!
! Test the fix for PR36932 and PR36933, in which unnecessary
! temporaries were being generated. The module m2 tests the
......
! { dg-do compile }
! { dg-options "-O -fdump-tree-original -Warray-temporaries" }
! { dg-options "-O -fdump-tree-original -finline-matmul-limit=0 -Warray-temporaries" }
program main
implicit none
real, dimension(2,2) :: a, b, c, d
......
! { dg-do compile }
! { dg-options "-O -faggressive-function-elimination -fdump-tree-original" }
! { dg-options "-O -finline-matmul-limit=0 -faggressive-function-elimination -fdump-tree-original" }
program main
implicit none
real, dimension(2,2) :: a, b, c, d
......
! { dg-do compile }
! { dg-options "-ffrontend-optimize -Wfunction-elimination" }
! { dg-options "-ffrontend-optimize -finline-matmul-limit=0 -Wfunction-elimination" }
! Check the -ffrontend-optimize (in the absence of -O) and
! -Wfunction-elimination options.
program main
......
! { dg-do compile }
! { dg-options "-O -fdump-tree-original -Warray-temporaries" }
! { dg-options "-O -fdump-tree-original -Warray-temporaries -finline-matmul-limit=0" }
subroutine xx(n, m, a, b, c, d, x, z, i, s_in, s_out)
implicit none
integer, intent(in) :: n, m
......
! { dg-do run }
! { dg-options "-ffrontend-optimize -fdump-tree-original -Wrealloc-lhs" }
! PR 37131 - check basic functionality of inlined matmul, making
! sure that the library is not called, with and without reallocation.
program main
implicit none
integer, parameter :: offset = -2
real, dimension(3,2) :: a
real, dimension(2,4) :: b
real, dimension(3,4) :: c
real, dimension(3,4) :: cres
real, dimension(:,:), allocatable :: c_alloc
integer, parameter :: a1_lower_p = 1 + offset, a1_upper_p = size(a,1) + offset
integer, parameter :: a2_lower_p = 1 + offset, a2_upper_p = size(a,2) + offset
integer, parameter :: b1_lower_p = 1 + offset, b1_upper_p = size(b,1) + offset
integer, parameter :: b2_lower_p = 1 + offset, b2_upper_p = size(b,2) + offset
integer, parameter :: c1_lower_p = 1 + offset, c1_upper_p = size(c,1) + offset
integer, parameter :: c2_lower_p = 1 + offset, c2_upper_p = size(c,2) + offset
real, dimension(a1_lower_p:a1_upper_p, a2_lower_p:a2_upper_p) :: ap
real, dimension(b1_lower_p:b1_upper_p, b2_lower_p:b2_upper_p) :: bp
real, dimension(c1_lower_p:c1_upper_p, c2_lower_p:c2_upper_p) :: cp
real, dimension(4,8,4) :: f, fresult
integer :: eight = 8, two = 2
type foo
real :: a
integer :: i
end type foo
type(foo), dimension(3,2) :: afoo
type(foo), dimension(2,4) :: bfoo
type(foo), dimension(3,4) :: cfoo
data a / 2., -3., 5., -7., 11., -13./
data b /17., -23., 29., -31., 37., -39., 41., -47./
data cres /195., -304., 384., 275., -428., 548., 347., -540., 692., 411., -640., 816./
data fresult / &
0., 0., 195., 0., 0., 17., 0., 0., 0., -23.,-304., 0., 0., 0., 0., 0., &
0., 0., 384., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., &
2., 0., 275., 0., -3., 29., 0., 0., 5., -31.,-428., 0., 0., 0., 0., 0., &
0., 0., 548., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., &
-7., 0., 347., 0., 11., 37., 0., 0., -13., -39.,-540., 0., 0., 0., 0., 0., &
0., 0., 692., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., &
0., 0., 411., 0., 0., 41., 0., 0., 0., -47.,-640., 0., 0., 0., 0., 0., &
0., 0., 816., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0./
integer :: a1 = size(a,1), a2 = size(a,2)
integer :: b1 = size(b,1), b2 = size(b,2)
integer :: c1 = size(c,1), c2 = size(c,2)
integer :: a1_lower, a1_upper, a2_lower, a2_upper
integer :: b1_lower, b1_upper, b2_lower, b2_upper
integer :: c1_lower, c1_upper, c2_lower, c2_upper
a1_lower = 1 + offset ; a1_upper = a1 + offset
a2_lower = 1 + offset ; a2_upper = a2 + offset
b1_lower = 1 + offset ; b1_upper = b1 + offset
b2_lower = 1 + offset ; b2_upper = b2 + offset
c1_lower = 1 + offset ; c1_upper = c1 + offset
c2_lower = 1 + offset ; c2_upper = c2 + offset
c = matmul(a,b)
if (sum(abs(c-cres))>1e-4) call abort
c_alloc = matmul(a,b) ! { dg-warning "Code for reallocating the allocatable array" }
if (sum(abs(c_alloc-cres))>1e-4) call abort
if (any([size(c_alloc,1), size(c_alloc,2)] /= [3,4])) call abort
deallocate(c_alloc)
allocate(c_alloc(4,4))
c_alloc = matmul(a,b) ! { dg-warning "Code for reallocating the allocatable array" }
if (sum(abs(c_alloc-cres))>1e-4) call abort
if (any([size(c_alloc,1), size(c_alloc,2)] /= [3,4])) call abort
deallocate(c_alloc)
allocate(c_alloc(3,3))
c_alloc = matmul(a,b) ! { dg-warning "Code for reallocating the allocatable array" }
if (sum(abs(c_alloc-cres))>1e-4) call abort
if (any([size(c_alloc,1), size(c_alloc,2)] /= [3,4])) call abort
c_alloc = 42.
c_alloc(:,:) = matmul(a,b)
if (sum(abs(c_alloc-cres))>1e-4) call abort
if (any([size(c_alloc,1), size(c_alloc,2)] /= [3,4])) call abort
deallocate(c_alloc)
ap = a
bp = b
cp = matmul(ap, bp)
if (sum(abs(cp-cres)) > 1e-4) call abort
f = 0
f(1,1:3,2:3) = a
f(2,2:3,:) = b
c = matmul(f(1,1:3,2:3), f(2,2:3,:))
if (sum(abs(c-cres))>1e-4) call abort
f(3,1:eight:2,:) = matmul(a, b)
if (sum(abs(f(3,1:eight:2,:)-cres))>1e-4) call abort
afoo%a = a
bfoo%a = b
cfoo%a = matmul(afoo%a, bfoo%a)
if (sum(abs(cfoo%a-cres)) > 1e-4) call abort
block
real :: aa(a1, a2), bb(b1, b2), cc(c1, c2)
real :: am(a1_lower:a1_upper, a2_lower:a2_upper)
real :: bm(b1_lower:b1_upper, b2_lower:b2_upper)
real :: cm(c1_lower:c1_upper, c2_lower:c2_upper)
aa = a
bb = b
am = a
bm = b
cc = matmul(aa,bb)
if (sum(cc-cres)>1e-4) call abort
c_alloc = matmul(aa,bb) ! { dg-warning "Code for reallocating the allocatable array" }
if (sum(abs(c_alloc-cres))>1e-4) call abort
if (any([size(c_alloc,1), size(c_alloc,2)] /= [3,4])) call abort
c_alloc = 42.
deallocate(c_alloc)
allocate(c_alloc(4,4))
c_alloc = matmul(aa,bb) ! { dg-warning "Code for reallocating the allocatable array" }
if (sum(abs(c_alloc-cres))>1e-4) call abort
if (any([size(c_alloc,1), size(c_alloc,2)] /= [3,4])) call abort
deallocate(c_alloc)
allocate(c_alloc(3,3))
c_alloc = matmul(aa,bb) ! { dg-warning "Code for reallocating the allocatable array" }
if (sum(abs(c_alloc-cres))>1e-4) call abort
if (any([size(c_alloc,1), size(c_alloc,2)] /= [3,4])) call abort
deallocate(c_alloc)
cm = matmul(am, bm)
if (sum(abs(cm-cres)) > 1e-4) call abort
cm = 42.
cm(:,:) = matmul(a,bm)
if (sum(abs(cm-cres)) > 1e-4) call abort
end block
end program main
! { dg-final { scan-tree-dump-times "_gfortran_matmul" 0 "original" } }
! { dg-do compile }
! { dg-options "-ffrontend-optimize -finline-matmul-limit=0 -fdump-tree-original" }
! PR 37131 - no inlining with -finline-matmul-limit=0
program main
real, dimension(3,2) :: a
real, dimension(2,4) :: b
real, dimension(3,4) :: c
real, dimension(3,4) :: cres
real, dimension(:,:), allocatable :: calloc
integer :: a1 = size(a,1), a2 = size(a,2)
integer :: b1 = size(b,1), b2 = size(b,2)
integer :: c1 = size(c,1), c2 = size(c,2)
data a / 2., -3., 5., -7., 11., -13./
data b /17., -23., 29., -31., 37., -39., 41., -47./
data cres /195., -304., 384., 275., -428., 548., 347., -540., 692., 411., -640., 816./
c = matmul(a,b)
if (sum(c-cres)>1e-4) call abort
calloc = matmul(a,b)
if (sum(calloc-cres)>1e-4) call abort
if (any([size(calloc,1), size(calloc,2)] /= [3,4])) call abort
deallocate(calloc)
allocate(calloc(4,4))
calloc = matmul(a,b)
if (sum(calloc-cres)>1e-4) call abort
if (any([size(calloc,1), size(calloc,2)] /= [3,4])) call abort
deallocate(calloc)
allocate(calloc(3,3))
calloc = matmul(a,b)
if (sum(calloc-cres)>1e-4) call abort
if (any([size(calloc,1), size(calloc,2)] /= [3,4])) call abort
deallocate(calloc)
block
real :: aa(a1, a2), bb(b1, b2), cc(c1, c2)
aa = a
bb = b
cc = matmul(aa,bb)
if (sum(cc-cres)>1e-4) call abort
calloc = matmul(aa,bb)
if (sum(calloc-cres)>1e-4) call abort
if (any([size(calloc,1), size(calloc,2)] /= [3,4])) call abort
calloc = 42.
deallocate(calloc)
allocate(calloc(4,4))
calloc = matmul(aa,bb)
if (sum(calloc-cres)>1e-4) call abort
if (any([size(calloc,1), size(calloc,2)] /= [3,4])) call abort
deallocate(calloc)
allocate(calloc(3,3))
calloc = matmul(aa,bb)
if (sum(calloc-cres)>1e-4) call abort
if (any([size(calloc,1), size(calloc,2)] /= [3,4])) call abort
deallocate(calloc)
end block
end program main
! { dg-final { scan-tree-dump-times "_gfortran_matmul" 8 "original" } }
! { dg-final { cleanup-tree-dump "original" } }
! { dg-do run }
! { dg-options "-O3 -finline-matmul-limit=2 -fdump-tree-optimized" }
! PR 37131 - all calls to matmul should be kept
program main
real, dimension(3,2) :: a
real, dimension(2,4) :: b
real, dimension(3,4) :: c
real, dimension(3,4) :: cres
real, dimension(:,:), allocatable :: calloc
integer :: a1 = size(a,1), a2 = size(a,2)
integer :: b1 = size(b,1), b2 = size(b,2)
integer :: c1 = size(c,1), c2 = size(c,2)
data a / 2., -3., 5., -7., 11., -13./
data b /17., -23., 29., -31., 37., -39., 41., -47./
data cres /195., -304., 384., 275., -428., 548., 347., -540., 692., 411., -640., 816./
c = matmul(a,b)
if (sum(c-cres)>1e-4) call abort
calloc = matmul(a,b)
if (sum(calloc-cres)>1e-4) call abort
if (any([size(calloc,1), size(calloc,2)] /= [3,4])) call abort
deallocate(calloc)
allocate(calloc(4,4))
calloc = matmul(a,b)
if (sum(calloc-cres)>1e-4) call abort
if (any([size(calloc,1), size(calloc,2)] /= [3,4])) call abort
deallocate(calloc)
allocate(calloc(3,3))
calloc = matmul(a,b)
if (sum(calloc-cres)>1e-4) call abort
if (any([size(calloc,1), size(calloc,2)] /= [3,4])) call abort
deallocate(calloc)
block
real :: aa(a1, a2), bb(b1, b2), cc(c1, c2)
aa = a
bb = b
cc = matmul(aa,bb)
if (sum(cc-cres)>1e-4) call abort
calloc = matmul(aa,bb)
if (sum(calloc-cres)>1e-4) call abort
if (any([size(calloc,1), size(calloc,2)] /= [3,4])) call abort
calloc = 42.
deallocate(calloc)
allocate(calloc(4,4))
calloc = matmul(aa,bb)
if (sum(calloc-cres)>1e-4) call abort
if (any([size(calloc,1), size(calloc,2)] /= [3,4])) call abort
deallocate(calloc)
allocate(calloc(3,3))
calloc = matmul(aa,bb)
if (sum(calloc-cres)>1e-4) call abort
if (any([size(calloc,1), size(calloc,2)] /= [3,4])) call abort
deallocate(calloc)
end block
end program main
! { dg-final { scan-tree-dump-times "_gfortran_matmul" 8 "optimized" } }
! { dg-final { cleanup-tree-dump "optimized" } }
! { dg-do run }
! { dg-options "-O3 -finline-matmul-limit=10 -fdump-tree-optimized -fdump-tree-original" }
! PR 37131 - all calls to matmul should be optimized away with -O3
! and the high limit.
program main
real, dimension(3,2) :: a
real, dimension(2,4) :: b
real, dimension(3,4) :: c
real, dimension(3,4) :: cres
real, dimension(:,:), allocatable :: calloc
integer :: a1 = size(a,1), a2 = size(a,2)
integer :: b1 = size(b,1), b2 = size(b,2)
integer :: c1 = size(c,1), c2 = size(c,2)
data a / 2., -3., 5., -7., 11., -13./
data b /17., -23., 29., -31., 37., -39., 41., -47./
data cres /195., -304., 384., 275., -428., 548., 347., -540., 692., 411., -640., 816./
c = matmul(a,b)
if (sum(c-cres)>1e-4) call abort
calloc = matmul(a,b)
if (sum(calloc-cres)>1e-4) call abort
if (any([size(calloc,1), size(calloc,2)] /= [3,4])) call abort
deallocate(calloc)
allocate(calloc(4,4))
calloc = matmul(a,b)
if (sum(calloc-cres)>1e-4) call abort
if (any([size(calloc,1), size(calloc,2)] /= [3,4])) call abort
deallocate(calloc)
allocate(calloc(3,3))
calloc = matmul(a,b)
if (sum(calloc-cres)>1e-4) call abort
if (any([size(calloc,1), size(calloc,2)] /= [3,4])) call abort
deallocate(calloc)
block
real :: aa(a1, a2), bb(b1, b2), cc(c1, c2)
aa = a
bb = b
cc = matmul(aa,bb)
if (sum(cc-cres)>1e-4) call abort
calloc = matmul(aa,bb)
if (sum(calloc-cres)>1e-4) call abort
if (any([size(calloc,1), size(calloc,2)] /= [3,4])) call abort
calloc = 42.
deallocate(calloc)
allocate(calloc(4,4))
calloc = matmul(aa,bb)
if (sum(calloc-cres)>1e-4) call abort
if (any([size(calloc,1), size(calloc,2)] /= [3,4])) call abort
deallocate(calloc)
allocate(calloc(3,3))
calloc = matmul(aa,bb)
if (sum(calloc-cres)>1e-4) call abort
if (any([size(calloc,1), size(calloc,2)] /= [3,4])) call abort
deallocate(calloc)
end block
end program main
! { dg-final { scan-tree-dump-times "_gfortran_matmul" 4 "original" } }
! { dg-final { scan-tree-dump-times "_gfortran_matmul" 0 "optimized" } }
! { dg-final { cleanup-tree-dump "original" } }
! { dg-final { cleanup-tree-dump "optimized" } }
! { dg-do run }
! { dg-options "-ffrontend-optimize" }
program main
real, dimension(2,2) :: a,b,c
data a /2., 4., 8., 16. /
data b /3., 9., 27., 81./
c = matmul(a,b)
a = matmul(a,b)
if (any(a /= c)) call abort
end program main
! { dg-do run }
! { dg-options "-ffrontend-optimize -fdump-tree-original" }
! PR 37131 - check rank1/rank2 and rank2/rank1 cases for inline matmul.
module foo
implicit none
contains
subroutine a1b2(a,b,c)
real, dimension(:), intent(in) :: a
real, dimension(:,:), intent(in) :: b
real, dimension(:), intent(out) :: c
c = matmul(a,b)
end subroutine a1b2
subroutine a2b1(a,b,c)
real, dimension(:,:), intent(in) :: a
real, dimension(:), intent(in) :: b
real, dimension(:), intent(out) :: c
c = matmul(a,b)
end subroutine a2b1
end module foo
program main
use foo
implicit none
real, dimension(3) :: a1
real, dimension(3,2) :: b1
real, dimension(2) :: c1
real, dimension(3,2) :: a2
real, dimension(2) :: b2
real, dimension(3) :: c2
data a1 /17., -23., 29./
data b1 / 2., -3., 5., -7., 11., -13./
data b2/-2.,5./
a2 = -b1
call a1b2(a1,b1,c1)
if (any(abs(c1 - (/248., -749./)) > 1e-3)) call abort
call a2b1(a2,b2,c2)
if (any(abs(c2 - (/39., -61., 75./)) > 1e-3)) call abort
end program main
! { dg-final { scan-tree-dump-times "_gfortran_matmul" 0 "original" } }
! { dg-final { cleanup-tree-dump "original" } }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment