Commit 998511a6 by Thomas Koenig

re PR fortran/29550 (Optimize -fexternal-blas calls for conjg())

2018-09-18  Thomas Koenig  <tkoenig@gcc.gnu.org>

	PR fortran/29550
	* gfortran.h (gfc_expr): Add external_blas flag.
	* frontend-passes.c (matrix_case): Add case A2TB2T.
	(optimize_namespace): Handle flag_external_blas by
	calling call_external_blas.
	(get_array_inq_function): Add argument okind. If
	it is nonzero, use it as the kind of argument
	to be used.
	(inline_limit_check): Remove m_case argument, add
	limit argument instead.  Remove assert about m_case.
	Set the limit for inlining from the limit argument.
	(matmul_lhs_realloc): Handle case A2TB2T.
	(inline_matmul_assign): Handle inline limit for other cases with
	two rank-two matrices.  Remove no-op calls to inline_limit_check.
	(call_external_blas): New function.
	* trans-intrinsic.c (gfc_conv_intrinsic_funcall): Do not add
	argument to external BLAS if external_blas is already set.

2018-09-18  Thomas Koenig  <tkoenig@gcc.gnu.org>

	PR fortran/29550
	* gfortran.dg/inline_matmul_13.f90: Adjust count for
	_gfortran_matmul.
	* gfortran.dg/inline_matmul_16.f90: Likewise.
	* gfortran.dg/promotion_2.f90: Add -fblas-matmul-limit=1.  Scan
	for dgemm instead of dgemm_.  Add call to random_number to make
	standard conforming.
	* gfortran.dg/matmul_blas_1.f90: New test.
	* gfortran.dg/matmul_bounds_14.f: New test.
	* gfortran.dg/matmul_bounds_15.f: New test.
	* gfortran.dg/matmul_bounds_16.f: New test.
	* gfortran.dg/blas_gemm_routines.f: New test / additional file for
	preceding tests.

From-SVN: r264412
parent 5c470e0f
...@@ -53,6 +53,7 @@ static gfc_code * create_do_loop (gfc_expr *, gfc_expr *, gfc_expr *, ...@@ -53,6 +53,7 @@ static gfc_code * create_do_loop (gfc_expr *, gfc_expr *, gfc_expr *,
char *vname=NULL); char *vname=NULL);
static gfc_expr* check_conjg_transpose_variable (gfc_expr *, bool *, static gfc_expr* check_conjg_transpose_variable (gfc_expr *, bool *,
bool *); bool *);
static int call_external_blas (gfc_code **, int *, void *);
static bool has_dimen_vector_ref (gfc_expr *); static bool has_dimen_vector_ref (gfc_expr *);
static int matmul_temp_args (gfc_code **, int *,void *data); static int matmul_temp_args (gfc_code **, int *,void *data);
static int index_interchange (gfc_code **, int*, void *); static int index_interchange (gfc_code **, int*, void *);
...@@ -131,7 +132,7 @@ static int var_num = 1; ...@@ -131,7 +132,7 @@ static int var_num = 1;
/* What sort of matrix we are dealing with when inlining MATMUL. */ /* What sort of matrix we are dealing with when inlining MATMUL. */
enum matrix_case { none=0, A2B2, A2B1, A1B2, A2B2T, A2TB2 }; enum matrix_case { none=0, A2B2, A2B1, A1B2, A2B2T, A2TB2, A2TB2T };
/* Keep track of the number of expressions we have inserted so far /* Keep track of the number of expressions we have inserted so far
using create_var. */ using create_var. */
...@@ -1428,7 +1429,7 @@ optimize_namespace (gfc_namespace *ns) ...@@ -1428,7 +1429,7 @@ optimize_namespace (gfc_namespace *ns)
gfc_code_walker (&ns->code, convert_elseif, dummy_expr_callback, NULL); gfc_code_walker (&ns->code, convert_elseif, dummy_expr_callback, NULL);
gfc_code_walker (&ns->code, cfe_code, cfe_expr_0, NULL); gfc_code_walker (&ns->code, cfe_code, cfe_expr_0, NULL);
gfc_code_walker (&ns->code, optimize_code, optimize_expr, NULL); gfc_code_walker (&ns->code, optimize_code, optimize_expr, NULL);
if (flag_inline_matmul_limit != 0) if (flag_inline_matmul_limit != 0 || flag_external_blas)
{ {
bool found; bool found;
do do
...@@ -1441,9 +1442,15 @@ optimize_namespace (gfc_namespace *ns) ...@@ -1441,9 +1442,15 @@ optimize_namespace (gfc_namespace *ns)
gfc_code_walker (&ns->code, matmul_temp_args, dummy_expr_callback, gfc_code_walker (&ns->code, matmul_temp_args, dummy_expr_callback,
NULL); NULL);
gfc_code_walker (&ns->code, inline_matmul_assign, dummy_expr_callback,
NULL);
} }
if (flag_external_blas)
gfc_code_walker (&ns->code, call_external_blas, dummy_expr_callback,
NULL);
if (flag_inline_matmul_limit != 0)
gfc_code_walker (&ns->code, inline_matmul_assign, dummy_expr_callback,
NULL);
} }
if (flag_frontend_loop_interchange) if (flag_frontend_loop_interchange)
...@@ -2938,7 +2945,7 @@ matmul_temp_args (gfc_code **c, int *walk_subtrees ATTRIBUTE_UNUSED, ...@@ -2938,7 +2945,7 @@ matmul_temp_args (gfc_code **c, int *walk_subtrees ATTRIBUTE_UNUSED,
dim is zero-based. */ dim is zero-based. */
static gfc_expr * static gfc_expr *
get_array_inq_function (gfc_isym_id id, gfc_expr *e, int dim) get_array_inq_function (gfc_isym_id id, gfc_expr *e, int dim, int okind = 0)
{ {
gfc_expr *fcn; gfc_expr *fcn;
gfc_expr *dim_arg, *kind; gfc_expr *dim_arg, *kind;
...@@ -2964,8 +2971,12 @@ get_array_inq_function (gfc_isym_id id, gfc_expr *e, int dim) ...@@ -2964,8 +2971,12 @@ get_array_inq_function (gfc_isym_id id, gfc_expr *e, int dim)
} }
dim_arg = gfc_get_int_expr (gfc_default_integer_kind, &e->where, dim); dim_arg = gfc_get_int_expr (gfc_default_integer_kind, &e->where, dim);
kind = gfc_get_int_expr (gfc_default_integer_kind, &e->where, if (okind != 0)
gfc_index_integer_kind); kind = gfc_get_int_expr (gfc_default_integer_kind, &e->where,
okind);
else
kind = gfc_get_int_expr (gfc_default_integer_kind, &e->where,
gfc_index_integer_kind);
ec = gfc_copy_expr (e); ec = gfc_copy_expr (e);
...@@ -3026,7 +3037,7 @@ get_operand (gfc_intrinsic_op op, gfc_expr *e1, gfc_expr *e2) ...@@ -3026,7 +3037,7 @@ get_operand (gfc_intrinsic_op op, gfc_expr *e1, gfc_expr *e2)
removed by DCE. Only called for rank-two matrices A and B. */ removed by DCE. Only called for rank-two matrices A and B. */
static gfc_code * static gfc_code *
inline_limit_check (gfc_expr *a, gfc_expr *b, enum matrix_case m_case) inline_limit_check (gfc_expr *a, gfc_expr *b, int limit)
{ {
gfc_expr *inline_limit; gfc_expr *inline_limit;
gfc_code *if_1, *if_2, *else_2; gfc_code *if_1, *if_2, *else_2;
...@@ -3034,14 +3045,11 @@ inline_limit_check (gfc_expr *a, gfc_expr *b, enum matrix_case m_case) ...@@ -3034,14 +3045,11 @@ inline_limit_check (gfc_expr *a, gfc_expr *b, enum matrix_case m_case)
gfc_typespec ts; gfc_typespec ts;
gfc_expr *cond; gfc_expr *cond;
gcc_assert (m_case == A2B2 || m_case == A2B2T || m_case == A2TB2);
/* Calculation is done in real to avoid integer overflow. */ /* Calculation is done in real to avoid integer overflow. */
inline_limit = gfc_get_constant_expr (BT_REAL, gfc_default_real_kind, inline_limit = gfc_get_constant_expr (BT_REAL, gfc_default_real_kind,
&a->where); &a->where);
mpfr_set_si (inline_limit->value.real, flag_inline_matmul_limit, mpfr_set_si (inline_limit->value.real, limit, GFC_RND_MODE);
GFC_RND_MODE);
mpfr_pow_ui (inline_limit->value.real, inline_limit->value.real, 3, mpfr_pow_ui (inline_limit->value.real, inline_limit->value.real, 3,
GFC_RND_MODE); GFC_RND_MODE);
...@@ -3235,6 +3243,22 @@ matmul_lhs_realloc (gfc_expr *c, gfc_expr *a, gfc_expr *b, ...@@ -3235,6 +3243,22 @@ matmul_lhs_realloc (gfc_expr *c, gfc_expr *a, gfc_expr *b,
get_array_inq_function (GFC_ISYM_SIZE, b, 2)); get_array_inq_function (GFC_ISYM_SIZE, b, 2));
break; break;
case A2TB2T:
/* This can only happen for BLAS, we do not handle that case in
inline mamtul. */
ar->start[0] = get_array_inq_function (GFC_ISYM_SIZE, a, 2);
ar->start[1] = get_array_inq_function (GFC_ISYM_SIZE, b, 1);
ne1 = build_logical_expr (INTRINSIC_NE,
get_array_inq_function (GFC_ISYM_SIZE, c, 1),
get_array_inq_function (GFC_ISYM_SIZE, a, 2));
ne2 = build_logical_expr (INTRINSIC_NE,
get_array_inq_function (GFC_ISYM_SIZE, c, 2),
get_array_inq_function (GFC_ISYM_SIZE, b, 1));
cond = build_logical_expr (INTRINSIC_OR, ne1, ne2);
break;
default: default:
gcc_unreachable(); gcc_unreachable();
...@@ -3946,9 +3970,11 @@ inline_matmul_assign (gfc_code **c, int *walk_subtrees, ...@@ -3946,9 +3970,11 @@ inline_matmul_assign (gfc_code **c, int *walk_subtrees,
/* Take care of the inline flag. If the limit check evaluates to a /* Take care of the inline flag. If the limit check evaluates to a
constant, dead code elimination will eliminate the unneeded branch. */ constant, dead code elimination will eliminate the unneeded branch. */
if (m_case == A2B2 && flag_inline_matmul_limit > 0) if (flag_inline_matmul_limit > 0 && matrix_a->rank == 2
&& matrix_b->rank == 2)
{ {
if_limit = inline_limit_check (matrix_a, matrix_b, m_case); if_limit = inline_limit_check (matrix_a, matrix_b,
flag_inline_matmul_limit);
/* Insert the original statement into the else branch. */ /* Insert the original statement into the else branch. */
if_limit->block->block->next = co; if_limit->block->block->next = co;
...@@ -4118,7 +4144,6 @@ inline_matmul_assign (gfc_code **c, int *walk_subtrees, ...@@ -4118,7 +4144,6 @@ inline_matmul_assign (gfc_code **c, int *walk_subtrees,
switch (m_case) switch (m_case)
{ {
case A2B2: case A2B2:
inline_limit_check (matrix_a, matrix_b, m_case);
u1 = get_size_m1 (matrix_b, 2); u1 = get_size_m1 (matrix_b, 2);
u2 = get_size_m1 (matrix_a, 2); u2 = get_size_m1 (matrix_a, 2);
...@@ -4151,7 +4176,6 @@ inline_matmul_assign (gfc_code **c, int *walk_subtrees, ...@@ -4151,7 +4176,6 @@ inline_matmul_assign (gfc_code **c, int *walk_subtrees,
break; break;
case A2B2T: case A2B2T:
inline_limit_check (matrix_a, matrix_b, m_case);
u1 = get_size_m1 (matrix_b, 1); u1 = get_size_m1 (matrix_b, 1);
u2 = get_size_m1 (matrix_a, 2); u2 = get_size_m1 (matrix_a, 2);
...@@ -4184,7 +4208,6 @@ inline_matmul_assign (gfc_code **c, int *walk_subtrees, ...@@ -4184,7 +4208,6 @@ inline_matmul_assign (gfc_code **c, int *walk_subtrees,
break; break;
case A2TB2: case A2TB2:
inline_limit_check (matrix_a, matrix_b, m_case);
u1 = get_size_m1 (matrix_a, 2); u1 = get_size_m1 (matrix_a, 2);
u2 = get_size_m1 (matrix_b, 2); u2 = get_size_m1 (matrix_b, 2);
...@@ -4311,6 +4334,405 @@ inline_matmul_assign (gfc_code **c, int *walk_subtrees, ...@@ -4311,6 +4334,405 @@ inline_matmul_assign (gfc_code **c, int *walk_subtrees,
return 0; return 0;
} }
/* Change matmul function calls in the form of
c = matmul(a,b)
to the corresponding call to a BLAS routine, if applicable. */
static int
call_external_blas (gfc_code **c, int *walk_subtrees ATTRIBUTE_UNUSED,
void *data ATTRIBUTE_UNUSED)
{
gfc_code *co, *co_next;
gfc_expr *expr1, *expr2;
gfc_expr *matrix_a, *matrix_b;
gfc_code *if_limit = NULL;
gfc_actual_arglist *a, *b;
bool conjg_a, conjg_b, transpose_a, transpose_b;
gfc_code *call;
const char *blas_name;
const char *transa, *transb;
gfc_expr *c1, *c2, *b1;
gfc_actual_arglist *actual, *next;
bt type;
int kind;
enum matrix_case m_case;
bool realloc_c;
gfc_code **next_code_point;
/* Many of the tests for inline matmul also apply here. */
co = *c;
if (co->op != EXEC_ASSIGN)
return 0;
if (in_where || in_assoc_list)
return 0;
/* The BLOCKS generated for the temporary variables and FORALL don't
mix. */
if (forall_level > 0)
return 0;
/* For now don't do anything in OpenMP workshare, it confuses
its translation, which expects only the allowed statements in there. */
if (in_omp_workshare)
return 0;
expr1 = co->expr1;
expr2 = co->expr2;
if (expr2->expr_type != EXPR_FUNCTION
|| expr2->value.function.isym == NULL
|| expr2->value.function.isym->id != GFC_ISYM_MATMUL)
return 0;
type = expr2->ts.type;
kind = expr2->ts.kind;
/* Guard against recursion. */
if (expr2->external_blas)
return 0;
if (type != expr1->ts.type || kind != expr1->ts.kind)
return 0;
if (type == BT_REAL)
{
if (kind == 4)
blas_name = "sgemm";
else if (kind == 8)
blas_name = "dgemm";
else
return 0;
}
else if (type == BT_COMPLEX)
{
if (kind == 4)
blas_name = "cgemm";
else if (kind == 8)
blas_name = "zgemm";
else
return 0;
}
else
return 0;
a = expr2->value.function.actual;
if (a->expr->rank != 2)
return 0;
b = a->next;
if (b->expr->rank != 2)
return 0;
matrix_a = check_conjg_transpose_variable (a->expr, &conjg_a, &transpose_a);
if (matrix_a == NULL)
return 0;
if (transpose_a)
{
if (conjg_a)
transa = "C";
else
transa = "T";
}
else
transa = "N";
matrix_b = check_conjg_transpose_variable (b->expr, &conjg_b, &transpose_b);
if (matrix_b == NULL)
return 0;
if (transpose_b)
{
if (conjg_b)
transb = "C";
else
transb = "T";
}
else
transb = "N";
if (transpose_a)
{
if (transpose_b)
m_case = A2TB2T;
else
m_case = A2TB2;
}
else
{
if (transpose_b)
m_case = A2B2T;
else
m_case = A2B2;
}
current_code = c;
inserted_block = NULL;
changed_statement = NULL;
expr2->external_blas = 1;
/* We do not handle data dependencies yet. */
if (gfc_check_dependency (expr1, matrix_a, true)
|| gfc_check_dependency (expr1, matrix_b, true))
return 0;
/* Generate the if statement and hang it into the tree. */
if_limit = inline_limit_check (matrix_a, matrix_b, flag_blas_matmul_limit);
co_next = co->next;
(*current_code) = if_limit;
co->next = NULL;
if_limit->block->next = co;
call = XCNEW (gfc_code);
call->loc = co->loc;
/* Bounds checking - a bit simpler than for inlining since we only
have to take care of two-dimensional arrays here. */
realloc_c = flag_realloc_lhs && gfc_is_reallocatable_lhs (expr1);
next_code_point = &(if_limit->block->block->next);
if (gfc_option.rtcheck & GFC_RTCHECK_BOUNDS)
{
gfc_code *test;
// gfc_expr *a2, *b1, *c1, *c2, *a1, *b2;
gfc_expr *c1, *a1, *c2, *b2, *a2;
switch (m_case)
{
case A2B2:
b1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 1);
a2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 2);
test = runtime_error_ne (b1, a2, B_ERROR(1));
*next_code_point = test;
next_code_point = &test->next;
if (!realloc_c)
{
c1 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 1);
a1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 1);
test = runtime_error_ne (c1, a1, C_ERROR(1));
*next_code_point = test;
next_code_point = &test->next;
c2 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 2);
b2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 2);
test = runtime_error_ne (c2, b2, C_ERROR(2));
*next_code_point = test;
next_code_point = &test->next;
}
break;
case A2B2T:
b2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 2);
a2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 2);
/* matrix_b is transposed, hence dimension 1 for the error message. */
test = runtime_error_ne (b2, a2, B_ERROR(1));
*next_code_point = test;
next_code_point = &test->next;
if (!realloc_c)
{
c1 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 1);
a1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 1);
test = runtime_error_ne (c1, a1, C_ERROR(1));
*next_code_point = test;
next_code_point = &test->next;
c2 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 2);
b1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 1);
test = runtime_error_ne (c2, b1, C_ERROR(2));
*next_code_point = test;
next_code_point = &test->next;
}
break;
case A2TB2:
b1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 1);
a1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 1);
test = runtime_error_ne (b1, a1, B_ERROR(1));
*next_code_point = test;
next_code_point = &test->next;
if (!realloc_c)
{
c1 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 1);
a2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 2);
test = runtime_error_ne (c1, a2, C_ERROR(1));
*next_code_point = test;
next_code_point = &test->next;
c2 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 2);
b2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 2);
test = runtime_error_ne (c2, b2, C_ERROR(2));
*next_code_point = test;
next_code_point = &test->next;
}
break;
case A2TB2T:
b2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 2);
a1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 1);
test = runtime_error_ne (b2, a1, B_ERROR(1));
*next_code_point = test;
next_code_point = &test->next;
if (!realloc_c)
{
c1 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 1);
a2 = get_array_inq_function (GFC_ISYM_SIZE, matrix_a, 2);
test = runtime_error_ne (c1, a2, C_ERROR(1));
*next_code_point = test;
next_code_point = &test->next;
c2 = get_array_inq_function (GFC_ISYM_SIZE, expr1, 2);
b1 = get_array_inq_function (GFC_ISYM_SIZE, matrix_b, 1);
test = runtime_error_ne (c2, b1, C_ERROR(2));
*next_code_point = test;
next_code_point = &test->next;
}
break;
default:
gcc_unreachable ();
}
}
/* Handle the reallocation, if needed. */
if (realloc_c)
{
gfc_code *lhs_alloc;
lhs_alloc = matmul_lhs_realloc (expr1, matrix_a, matrix_b, m_case);
*next_code_point = lhs_alloc;
next_code_point = &lhs_alloc->next;
}
*next_code_point = call;
if_limit->next = co_next;
/* Set up the BLAS call. */
call->op = EXEC_CALL;
gfc_get_sym_tree (blas_name, current_ns, &(call->symtree), true);
call->symtree->n.sym->attr.subroutine = 1;
call->symtree->n.sym->attr.procedure = 1;
call->symtree->n.sym->attr.flavor = FL_PROCEDURE;
call->resolved_sym = call->symtree->n.sym;
/* Argument TRANSA. */
next = gfc_get_actual_arglist ();
next->expr = gfc_get_character_expr (gfc_default_character_kind, &co->loc,
transa, 1);
call->ext.actual = next;
/* Argument TRANSB. */
actual = next;
next = gfc_get_actual_arglist ();
next->expr = gfc_get_character_expr (gfc_default_character_kind, &co->loc,
transb, 1);
actual->next = next;
c1 = get_array_inq_function (GFC_ISYM_SIZE, gfc_copy_expr (a->expr), 1,
gfc_integer_4_kind);
c2 = get_array_inq_function (GFC_ISYM_SIZE, gfc_copy_expr (b->expr), 2,
gfc_integer_4_kind);
b1 = get_array_inq_function (GFC_ISYM_SIZE, gfc_copy_expr (b->expr), 1,
gfc_integer_4_kind);
/* Argument M. */
actual = next;
next = gfc_get_actual_arglist ();
next->expr = c1;
actual->next = next;
/* Argument N. */
actual = next;
next = gfc_get_actual_arglist ();
next->expr = c2;
actual->next = next;
/* Argument K. */
actual = next;
next = gfc_get_actual_arglist ();
next->expr = b1;
actual->next = next;
/* Argument ALPHA - set to one. */
actual = next;
next = gfc_get_actual_arglist ();
next->expr = gfc_get_constant_expr (type, kind, &co->loc);
if (type == BT_REAL)
mpfr_set_ui (next->expr->value.real, 1, GFC_RND_MODE);
else
mpc_set_ui (next->expr->value.complex, 1, GFC_MPC_RND_MODE);
actual->next = next;
/* Argument A. */
actual = next;
next = gfc_get_actual_arglist ();
next->expr = gfc_copy_expr (matrix_a);
actual->next = next;
/* Argument LDA. */
actual = next;
next = gfc_get_actual_arglist ();
next->expr = get_array_inq_function (GFC_ISYM_SIZE, gfc_copy_expr (matrix_a),
1, gfc_integer_4_kind);
actual->next = next;
/* Argument B. */
actual = next;
next = gfc_get_actual_arglist ();
next->expr = gfc_copy_expr (matrix_b);
actual->next = next;
/* Argument LDB. */
actual = next;
next = gfc_get_actual_arglist ();
next->expr = get_array_inq_function (GFC_ISYM_SIZE, gfc_copy_expr (matrix_b),
1, gfc_integer_4_kind);
actual->next = next;
/* Argument BETA - set to zero. */
actual = next;
next = gfc_get_actual_arglist ();
next->expr = gfc_get_constant_expr (type, kind, &co->loc);
if (type == BT_REAL)
mpfr_set_ui (next->expr->value.real, 0, GFC_RND_MODE);
else
mpc_set_ui (next->expr->value.complex, 0, GFC_MPC_RND_MODE);
actual->next = next;
/* Argument C. */
actual = next;
next = gfc_get_actual_arglist ();
next->expr = gfc_copy_expr (expr1);
actual->next = next;
/* Argument LDC. */
actual = next;
next = gfc_get_actual_arglist ();
next->expr = get_array_inq_function (GFC_ISYM_SIZE, gfc_copy_expr (expr1),
1, gfc_integer_4_kind);
actual->next = next;
return 0;
}
/* Code for index interchange for loops which are grouped together in DO /* Code for index interchange for loops which are grouped together in DO
CONCURRENT or FORALL statements. This is currently only applied if the CONCURRENT or FORALL statements. This is currently only applied if the
......
...@@ -2143,6 +2143,11 @@ typedef struct gfc_expr ...@@ -2143,6 +2143,11 @@ typedef struct gfc_expr
unsigned int no_bounds_check : 1; unsigned int no_bounds_check : 1;
/* Set this if a matmul expression has already been evaluated for conversion
to a BLAS call. */
unsigned int external_blas : 1;
/* If an expression comes from a Hollerith constant or compile-time /* If an expression comes from a Hollerith constant or compile-time
evaluation of a transfer statement, it may have a prescribed target- evaluation of a transfer statement, it may have a prescribed target-
memory representation, and these cannot always be backformed from memory representation, and these cannot always be backformed from
......
...@@ -4055,6 +4055,7 @@ gfc_conv_intrinsic_funcall (gfc_se * se, gfc_expr * expr) ...@@ -4055,6 +4055,7 @@ gfc_conv_intrinsic_funcall (gfc_se * se, gfc_expr * expr)
to be able to call the BLAS ?gemm functions if required and possible. */ to be able to call the BLAS ?gemm functions if required and possible. */
append_args = NULL; append_args = NULL;
if (expr->value.function.isym->id == GFC_ISYM_MATMUL if (expr->value.function.isym->id == GFC_ISYM_MATMUL
&& !expr->external_blas
&& sym->ts.type != BT_LOGICAL) && sym->ts.type != BT_LOGICAL)
{ {
tree cint = gfc_get_int_type (gfc_c_int_kind); tree cint = gfc_get_int_type (gfc_c_int_kind);
......
! { dg-do compile }
! { dg-options "-std=legacy" }
*> \brief \b CGEMM
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
* Definition:
* ===========
*
* SUBROUTINE CGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
*
* .. Scalar Arguments ..
* COMPLEX ALPHA,BETA
* INTEGER K,LDA,LDB,LDC,M,N
* CHARACTER TRANSA,TRANSB
* ..
* .. Array Arguments ..
* COMPLEX A(LDA,*),B(LDB,*),C(LDC,*)
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> CGEMM performs one of the matrix-matrix operations
*>
*> C := alpha*op( A )*op( B ) + beta*C,
*>
*> where op( X ) is one of
*>
*> op( X ) = X or op( X ) = X**T or op( X ) = X**H,
*>
*> alpha and beta are scalars, and A, B and C are matrices, with op( A )
*> an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] TRANSA
*> \verbatim
*> TRANSA is CHARACTER*1
*> On entry, TRANSA specifies the form of op( A ) to be used in
*> the matrix multiplication as follows:
*>
*> TRANSA = 'N' or 'n', op( A ) = A.
*>
*> TRANSA = 'T' or 't', op( A ) = A**T.
*>
*> TRANSA = 'C' or 'c', op( A ) = A**H.
*> \endverbatim
*>
*> \param[in] TRANSB
*> \verbatim
*> TRANSB is CHARACTER*1
*> On entry, TRANSB specifies the form of op( B ) to be used in
*> the matrix multiplication as follows:
*>
*> TRANSB = 'N' or 'n', op( B ) = B.
*>
*> TRANSB = 'T' or 't', op( B ) = B**T.
*>
*> TRANSB = 'C' or 'c', op( B ) = B**H.
*> \endverbatim
*>
*> \param[in] M
*> \verbatim
*> M is INTEGER
*> On entry, M specifies the number of rows of the matrix
*> op( A ) and of the matrix C. M must be at least zero.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> On entry, N specifies the number of columns of the matrix
*> op( B ) and the number of columns of the matrix C. N must be
*> at least zero.
*> \endverbatim
*>
*> \param[in] K
*> \verbatim
*> K is INTEGER
*> On entry, K specifies the number of columns of the matrix
*> op( A ) and the number of rows of the matrix op( B ). K must
*> be at least zero.
*> \endverbatim
*>
*> \param[in] ALPHA
*> \verbatim
*> ALPHA is COMPLEX
*> On entry, ALPHA specifies the scalar alpha.
*> \endverbatim
*>
*> \param[in] A
*> \verbatim
*> A is COMPLEX array, dimension ( LDA, ka ), where ka is
*> k when TRANSA = 'N' or 'n', and is m otherwise.
*> Before entry with TRANSA = 'N' or 'n', the leading m by k
*> part of the array A must contain the matrix A, otherwise
*> the leading k by m part of the array A must contain the
*> matrix A.
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> On entry, LDA specifies the first dimension of A as declared
*> in the calling (sub) program. When TRANSA = 'N' or 'n' then
*> LDA must be at least max( 1, m ), otherwise LDA must be at
*> least max( 1, k ).
*> \endverbatim
*>
*> \param[in] B
*> \verbatim
*> B is COMPLEX array, dimension ( LDB, kb ), where kb is
*> n when TRANSB = 'N' or 'n', and is k otherwise.
*> Before entry with TRANSB = 'N' or 'n', the leading k by n
*> part of the array B must contain the matrix B, otherwise
*> the leading n by k part of the array B must contain the
*> matrix B.
*> \endverbatim
*>
*> \param[in] LDB
*> \verbatim
*> LDB is INTEGER
*> On entry, LDB specifies the first dimension of B as declared
*> in the calling (sub) program. When TRANSB = 'N' or 'n' then
*> LDB must be at least max( 1, k ), otherwise LDB must be at
*> least max( 1, n ).
*> \endverbatim
*>
*> \param[in] BETA
*> \verbatim
*> BETA is COMPLEX
*> On entry, BETA specifies the scalar beta. When BETA is
*> supplied as zero then C need not be set on input.
*> \endverbatim
*>
*> \param[in,out] C
*> \verbatim
*> C is COMPLEX array, dimension ( LDC, N )
*> Before entry, the leading m by n part of the array C must
*> contain the matrix C, except when beta is zero, in which
*> case C need not be set on entry.
*> On exit, the array C is overwritten by the m by n matrix
*> ( alpha*op( A )*op( B ) + beta*C ).
*> \endverbatim
*>
*> \param[in] LDC
*> \verbatim
*> LDC is INTEGER
*> On entry, LDC specifies the first dimension of C as declared
*> in the calling (sub) program. LDC must be at least
*> max( 1, m ).
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \date December 2016
*
*> \ingroup complex_blas_level3
*
*> \par Further Details:
* =====================
*>
*> \verbatim
*>
*> Level 3 Blas routine.
*>
*> -- Written on 8-February-1989.
*> Jack Dongarra, Argonne National Laboratory.
*> Iain Duff, AERE Harwell.
*> Jeremy Du Croz, Numerical Algorithms Group Ltd.
*> Sven Hammarling, Numerical Algorithms Group Ltd.
*> \endverbatim
*>
* =====================================================================
SUBROUTINE CGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
*
* -- Reference BLAS level3 routine (version 3.7.0) --
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* December 2016
*
* .. Scalar Arguments ..
COMPLEX ALPHA,BETA
INTEGER K,LDA,LDB,LDC,M,N
CHARACTER TRANSA,TRANSB
* ..
* .. Array Arguments ..
COMPLEX A(LDA,*),B(LDB,*),C(LDC,*)
* ..
*
* =====================================================================
*
* .. External Functions ..
LOGICAL LSAME
EXTERNAL LSAME
* ..
* .. External Subroutines ..
EXTERNAL XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC CONJG,MAX
* ..
* .. Local Scalars ..
COMPLEX TEMP
INTEGER I,INFO,J,L,NCOLA,NROWA,NROWB
LOGICAL CONJA,CONJB,NOTA,NOTB
* ..
* .. Parameters ..
COMPLEX ONE
PARAMETER (ONE= (1.0E+0,0.0E+0))
COMPLEX ZERO
PARAMETER (ZERO= (0.0E+0,0.0E+0))
* ..
*
* Set NOTA and NOTB as true if A and B respectively are not
* conjugated or transposed, set CONJA and CONJB as true if A and
* B respectively are to be transposed but not conjugated and set
* NROWA, NCOLA and NROWB as the number of rows and columns of A
* and the number of rows of B respectively.
*
NOTA = LSAME(TRANSA,'N')
NOTB = LSAME(TRANSB,'N')
CONJA = LSAME(TRANSA,'C')
CONJB = LSAME(TRANSB,'C')
IF (NOTA) THEN
NROWA = M
NCOLA = K
ELSE
NROWA = K
NCOLA = M
END IF
IF (NOTB) THEN
NROWB = K
ELSE
NROWB = N
END IF
*
* Test the input parameters.
*
INFO = 0
IF ((.NOT.NOTA) .AND. (.NOT.CONJA) .AND.
+ (.NOT.LSAME(TRANSA,'T'))) THEN
INFO = 1
ELSE IF ((.NOT.NOTB) .AND. (.NOT.CONJB) .AND.
+ (.NOT.LSAME(TRANSB,'T'))) THEN
INFO = 2
ELSE IF (M.LT.0) THEN
INFO = 3
ELSE IF (N.LT.0) THEN
INFO = 4
ELSE IF (K.LT.0) THEN
INFO = 5
ELSE IF (LDA.LT.MAX(1,NROWA)) THEN
INFO = 8
ELSE IF (LDB.LT.MAX(1,NROWB)) THEN
INFO = 10
ELSE IF (LDC.LT.MAX(1,M)) THEN
INFO = 13
END IF
IF (INFO.NE.0) THEN
CALL XERBLA('CGEMM ',INFO)
RETURN
END IF
*
* Quick return if possible.
*
IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
+ (((ALPHA.EQ.ZERO).OR. (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN
*
* And when alpha.eq.zero.
*
IF (ALPHA.EQ.ZERO) THEN
IF (BETA.EQ.ZERO) THEN
DO 20 J = 1,N
DO 10 I = 1,M
C(I,J) = ZERO
10 CONTINUE
20 CONTINUE
ELSE
DO 40 J = 1,N
DO 30 I = 1,M
C(I,J) = BETA*C(I,J)
30 CONTINUE
40 CONTINUE
END IF
RETURN
END IF
*
* Start the operations.
*
IF (NOTB) THEN
IF (NOTA) THEN
*
* Form C := alpha*A*B + beta*C.
*
DO 90 J = 1,N
IF (BETA.EQ.ZERO) THEN
DO 50 I = 1,M
C(I,J) = ZERO
50 CONTINUE
ELSE IF (BETA.NE.ONE) THEN
DO 60 I = 1,M
C(I,J) = BETA*C(I,J)
60 CONTINUE
END IF
DO 80 L = 1,K
TEMP = ALPHA*B(L,J)
DO 70 I = 1,M
C(I,J) = C(I,J) + TEMP*A(I,L)
70 CONTINUE
80 CONTINUE
90 CONTINUE
ELSE IF (CONJA) THEN
*
* Form C := alpha*A**H*B + beta*C.
*
DO 120 J = 1,N
DO 110 I = 1,M
TEMP = ZERO
DO 100 L = 1,K
TEMP = TEMP + CONJG(A(L,I))*B(L,J)
100 CONTINUE
IF (BETA.EQ.ZERO) THEN
C(I,J) = ALPHA*TEMP
ELSE
C(I,J) = ALPHA*TEMP + BETA*C(I,J)
END IF
110 CONTINUE
120 CONTINUE
ELSE
*
* Form C := alpha*A**T*B + beta*C
*
DO 150 J = 1,N
DO 140 I = 1,M
TEMP = ZERO
DO 130 L = 1,K
TEMP = TEMP + A(L,I)*B(L,J)
130 CONTINUE
IF (BETA.EQ.ZERO) THEN
C(I,J) = ALPHA*TEMP
ELSE
C(I,J) = ALPHA*TEMP + BETA*C(I,J)
END IF
140 CONTINUE
150 CONTINUE
END IF
ELSE IF (NOTA) THEN
IF (CONJB) THEN
*
* Form C := alpha*A*B**H + beta*C.
*
DO 200 J = 1,N
IF (BETA.EQ.ZERO) THEN
DO 160 I = 1,M
C(I,J) = ZERO
160 CONTINUE
ELSE IF (BETA.NE.ONE) THEN
DO 170 I = 1,M
C(I,J) = BETA*C(I,J)
170 CONTINUE
END IF
DO 190 L = 1,K
TEMP = ALPHA*CONJG(B(J,L))
DO 180 I = 1,M
C(I,J) = C(I,J) + TEMP*A(I,L)
180 CONTINUE
190 CONTINUE
200 CONTINUE
ELSE
*
* Form C := alpha*A*B**T + beta*C
*
DO 250 J = 1,N
IF (BETA.EQ.ZERO) THEN
DO 210 I = 1,M
C(I,J) = ZERO
210 CONTINUE
ELSE IF (BETA.NE.ONE) THEN
DO 220 I = 1,M
C(I,J) = BETA*C(I,J)
220 CONTINUE
END IF
DO 240 L = 1,K
TEMP = ALPHA*B(J,L)
DO 230 I = 1,M
C(I,J) = C(I,J) + TEMP*A(I,L)
230 CONTINUE
240 CONTINUE
250 CONTINUE
END IF
ELSE IF (CONJA) THEN
IF (CONJB) THEN
*
* Form C := alpha*A**H*B**H + beta*C.
*
DO 280 J = 1,N
DO 270 I = 1,M
TEMP = ZERO
DO 260 L = 1,K
TEMP = TEMP + CONJG(A(L,I))*CONJG(B(J,L))
260 CONTINUE
IF (BETA.EQ.ZERO) THEN
C(I,J) = ALPHA*TEMP
ELSE
C(I,J) = ALPHA*TEMP + BETA*C(I,J)
END IF
270 CONTINUE
280 CONTINUE
ELSE
*
* Form C := alpha*A**H*B**T + beta*C
*
DO 310 J = 1,N
DO 300 I = 1,M
TEMP = ZERO
DO 290 L = 1,K
TEMP = TEMP + CONJG(A(L,I))*B(J,L)
290 CONTINUE
IF (BETA.EQ.ZERO) THEN
C(I,J) = ALPHA*TEMP
ELSE
C(I,J) = ALPHA*TEMP + BETA*C(I,J)
END IF
300 CONTINUE
310 CONTINUE
END IF
ELSE
IF (CONJB) THEN
*
* Form C := alpha*A**T*B**H + beta*C
*
DO 340 J = 1,N
DO 330 I = 1,M
TEMP = ZERO
DO 320 L = 1,K
TEMP = TEMP + A(L,I)*CONJG(B(J,L))
320 CONTINUE
IF (BETA.EQ.ZERO) THEN
C(I,J) = ALPHA*TEMP
ELSE
C(I,J) = ALPHA*TEMP + BETA*C(I,J)
END IF
330 CONTINUE
340 CONTINUE
ELSE
*
* Form C := alpha*A**T*B**T + beta*C
*
DO 370 J = 1,N
DO 360 I = 1,M
TEMP = ZERO
DO 350 L = 1,K
TEMP = TEMP + A(L,I)*B(J,L)
350 CONTINUE
IF (BETA.EQ.ZERO) THEN
C(I,J) = ALPHA*TEMP
ELSE
C(I,J) = ALPHA*TEMP + BETA*C(I,J)
END IF
360 CONTINUE
370 CONTINUE
END IF
END IF
*
RETURN
*
* End of CGEMM .
*
END
*> \brief \b LSAME
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
* Definition:
* ===========
*
* LOGICAL FUNCTION LSAME(CA,CB)
*
* .. Scalar Arguments ..
* CHARACTER CA,CB
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> LSAME returns .TRUE. if CA is the same letter as CB regardless of
*> case.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] CA
*> \verbatim
*> CA is CHARACTER*1
*> \endverbatim
*>
*> \param[in] CB
*> \verbatim
*> CB is CHARACTER*1
*> CA and CB specify the single characters to be compared.
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \date December 2016
*
*> \ingroup aux_blas
*
* =====================================================================
LOGICAL FUNCTION LSAME(CA,CB)
*
* -- Reference BLAS level1 routine (version 3.1) --
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* December 2016
*
* .. Scalar Arguments ..
CHARACTER CA,CB
* ..
*
* =====================================================================
*
* .. Intrinsic Functions ..
INTRINSIC ICHAR
* ..
* .. Local Scalars ..
INTEGER INTA,INTB,ZCODE
* ..
*
* Test if the characters are equal
*
LSAME = CA .EQ. CB
IF (LSAME) RETURN
*
* Now test for equivalence if both characters are alphabetic.
*
ZCODE = ICHAR('Z')
*
* Use 'Z' rather than 'A' so that ASCII can be detected on Prime
* machines, on which ICHAR returns a value with bit 8 set.
* ICHAR('A') on Prime machines returns 193 which is the same as
* ICHAR('A') on an EBCDIC machine.
*
INTA = ICHAR(CA)
INTB = ICHAR(CB)
*
IF (ZCODE.EQ.90 .OR. ZCODE.EQ.122) THEN
*
* ASCII is assumed - ZCODE is the ASCII code of either lower or
* upper case 'Z'.
*
IF (INTA.GE.97 .AND. INTA.LE.122) INTA = INTA - 32
IF (INTB.GE.97 .AND. INTB.LE.122) INTB = INTB - 32
*
ELSE IF (ZCODE.EQ.233 .OR. ZCODE.EQ.169) THEN
*
* EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or
* upper case 'Z'.
*
IF (INTA.GE.129 .AND. INTA.LE.137 .OR.
+ INTA.GE.145 .AND. INTA.LE.153 .OR.
+ INTA.GE.162 .AND. INTA.LE.169) INTA = INTA + 64
IF (INTB.GE.129 .AND. INTB.LE.137 .OR.
+ INTB.GE.145 .AND. INTB.LE.153 .OR.
+ INTB.GE.162 .AND. INTB.LE.169) INTB = INTB + 64
*
ELSE IF (ZCODE.EQ.218 .OR. ZCODE.EQ.250) THEN
*
* ASCII is assumed, on Prime machines - ZCODE is the ASCII code
* plus 128 of either lower or upper case 'Z'.
*
IF (INTA.GE.225 .AND. INTA.LE.250) INTA = INTA - 32
IF (INTB.GE.225 .AND. INTB.LE.250) INTB = INTB - 32
END IF
LSAME = INTA .EQ. INTB
*
* RETURN
*
* End of LSAME
*
END
*> \brief \b XERBLA
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
* Definition:
* ===========
*
* SUBROUTINE XERBLA( SRNAME, INFO )
*
* .. Scalar Arguments ..
* CHARACTER*(*) SRNAME
* INTEGER INFO
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> XERBLA is an error handler for the LAPACK routines.
*> It is called by an LAPACK routine if an input parameter has an
*> invalid value. A message is printed and execution stops.
*>
*> Installers may consider modifying the STOP statement in order to
*> call system-specific exception-handling facilities.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] SRNAME
*> \verbatim
*> SRNAME is CHARACTER*(*)
*> The name of the routine which called XERBLA.
*> \endverbatim
*>
*> \param[in] INFO
*> \verbatim
*> INFO is INTEGER
*> The position of the invalid parameter in the parameter list
*> of the calling routine.
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \date December 2016
*
*> \ingroup aux_blas
*
* =====================================================================
SUBROUTINE XERBLA( SRNAME, INFO )
*
* -- Reference BLAS level1 routine (version 3.7.0) --
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* December 2016
*
* .. Scalar Arguments ..
CHARACTER*(*) SRNAME
INTEGER INFO
* ..
*
* =====================================================================
*
* .. Intrinsic Functions ..
INTRINSIC LEN_TRIM
* ..
* .. Executable Statements ..
*
WRITE( *, FMT = 9999 )SRNAME( 1:LEN_TRIM( SRNAME ) ), INFO
*
STOP
*
9999 FORMAT( ' ** On entry to ', A, ' parameter number ', I2, ' had ',
$ 'an illegal value' )
*
* End of XERBLA
*
END
*> \brief \b SGEMM
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
* Definition:
* ===========
*
* SUBROUTINE SGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
*
* .. Scalar Arguments ..
* REAL ALPHA,BETA
* INTEGER K,LDA,LDB,LDC,M,N
* CHARACTER TRANSA,TRANSB
* ..
* .. Array Arguments ..
* REAL A(LDA,*),B(LDB,*),C(LDC,*)
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> SGEMM performs one of the matrix-matrix operations
*>
*> C := alpha*op( A )*op( B ) + beta*C,
*>
*> where op( X ) is one of
*>
*> op( X ) = X or op( X ) = X**T,
*>
*> alpha and beta are scalars, and A, B and C are matrices, with op( A )
*> an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] TRANSA
*> \verbatim
*> TRANSA is CHARACTER*1
*> On entry, TRANSA specifies the form of op( A ) to be used in
*> the matrix multiplication as follows:
*>
*> TRANSA = 'N' or 'n', op( A ) = A.
*>
*> TRANSA = 'T' or 't', op( A ) = A**T.
*>
*> TRANSA = 'C' or 'c', op( A ) = A**T.
*> \endverbatim
*>
*> \param[in] TRANSB
*> \verbatim
*> TRANSB is CHARACTER*1
*> On entry, TRANSB specifies the form of op( B ) to be used in
*> the matrix multiplication as follows:
*>
*> TRANSB = 'N' or 'n', op( B ) = B.
*>
*> TRANSB = 'T' or 't', op( B ) = B**T.
*>
*> TRANSB = 'C' or 'c', op( B ) = B**T.
*> \endverbatim
*>
*> \param[in] M
*> \verbatim
*> M is INTEGER
*> On entry, M specifies the number of rows of the matrix
*> op( A ) and of the matrix C. M must be at least zero.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> On entry, N specifies the number of columns of the matrix
*> op( B ) and the number of columns of the matrix C. N must be
*> at least zero.
*> \endverbatim
*>
*> \param[in] K
*> \verbatim
*> K is INTEGER
*> On entry, K specifies the number of columns of the matrix
*> op( A ) and the number of rows of the matrix op( B ). K must
*> be at least zero.
*> \endverbatim
*>
*> \param[in] ALPHA
*> \verbatim
*> ALPHA is REAL
*> On entry, ALPHA specifies the scalar alpha.
*> \endverbatim
*>
*> \param[in] A
*> \verbatim
*> A is REAL array, dimension ( LDA, ka ), where ka is
*> k when TRANSA = 'N' or 'n', and is m otherwise.
*> Before entry with TRANSA = 'N' or 'n', the leading m by k
*> part of the array A must contain the matrix A, otherwise
*> the leading k by m part of the array A must contain the
*> matrix A.
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> On entry, LDA specifies the first dimension of A as declared
*> in the calling (sub) program. When TRANSA = 'N' or 'n' then
*> LDA must be at least max( 1, m ), otherwise LDA must be at
*> least max( 1, k ).
*> \endverbatim
*>
*> \param[in] B
*> \verbatim
*> B is REAL array, dimension ( LDB, kb ), where kb is
*> n when TRANSB = 'N' or 'n', and is k otherwise.
*> Before entry with TRANSB = 'N' or 'n', the leading k by n
*> part of the array B must contain the matrix B, otherwise
*> the leading n by k part of the array B must contain the
*> matrix B.
*> \endverbatim
*>
*> \param[in] LDB
*> \verbatim
*> LDB is INTEGER
*> On entry, LDB specifies the first dimension of B as declared
*> in the calling (sub) program. When TRANSB = 'N' or 'n' then
*> LDB must be at least max( 1, k ), otherwise LDB must be at
*> least max( 1, n ).
*> \endverbatim
*>
*> \param[in] BETA
*> \verbatim
*> BETA is REAL
*> On entry, BETA specifies the scalar beta. When BETA is
*> supplied as zero then C need not be set on input.
*> \endverbatim
*>
*> \param[in,out] C
*> \verbatim
*> C is REAL array, dimension ( LDC, N )
*> Before entry, the leading m by n part of the array C must
*> contain the matrix C, except when beta is zero, in which
*> case C need not be set on entry.
*> On exit, the array C is overwritten by the m by n matrix
*> ( alpha*op( A )*op( B ) + beta*C ).
*> \endverbatim
*>
*> \param[in] LDC
*> \verbatim
*> LDC is INTEGER
*> On entry, LDC specifies the first dimension of C as declared
*> in the calling (sub) program. LDC must be at least
*> max( 1, m ).
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \date December 2016
*
*> \ingroup single_blas_level3
*
*> \par Further Details:
* =====================
*>
*> \verbatim
*>
*> Level 3 Blas routine.
*>
*> -- Written on 8-February-1989.
*> Jack Dongarra, Argonne National Laboratory.
*> Iain Duff, AERE Harwell.
*> Jeremy Du Croz, Numerical Algorithms Group Ltd.
*> Sven Hammarling, Numerical Algorithms Group Ltd.
*> \endverbatim
*>
* =====================================================================
SUBROUTINE SGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
*
* -- Reference BLAS level3 routine (version 3.7.0) --
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* December 2016
*
* .. Scalar Arguments ..
REAL ALPHA,BETA
INTEGER K,LDA,LDB,LDC,M,N
CHARACTER TRANSA,TRANSB
* ..
* .. Array Arguments ..
REAL A(LDA,*),B(LDB,*),C(LDC,*)
* ..
*
* =====================================================================
*
* .. External Functions ..
LOGICAL LSAME
EXTERNAL LSAME
* ..
* .. External Subroutines ..
EXTERNAL XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX
* ..
* .. Local Scalars ..
REAL TEMP
INTEGER I,INFO,J,L,NCOLA,NROWA,NROWB
LOGICAL NOTA,NOTB
* ..
* .. Parameters ..
REAL ONE,ZERO
PARAMETER (ONE=1.0E+0,ZERO=0.0E+0)
* ..
*
* Set NOTA and NOTB as true if A and B respectively are not
* transposed and set NROWA, NCOLA and NROWB as the number of rows
* and columns of A and the number of rows of B respectively.
*
NOTA = LSAME(TRANSA,'N')
NOTB = LSAME(TRANSB,'N')
IF (NOTA) THEN
NROWA = M
NCOLA = K
ELSE
NROWA = K
NCOLA = M
END IF
IF (NOTB) THEN
NROWB = K
ELSE
NROWB = N
END IF
*
* Test the input parameters.
*
INFO = 0
IF ((.NOT.NOTA) .AND. (.NOT.LSAME(TRANSA,'C')) .AND.
+ (.NOT.LSAME(TRANSA,'T'))) THEN
INFO = 1
ELSE IF ((.NOT.NOTB) .AND. (.NOT.LSAME(TRANSB,'C')) .AND.
+ (.NOT.LSAME(TRANSB,'T'))) THEN
INFO = 2
ELSE IF (M.LT.0) THEN
INFO = 3
ELSE IF (N.LT.0) THEN
INFO = 4
ELSE IF (K.LT.0) THEN
INFO = 5
ELSE IF (LDA.LT.MAX(1,NROWA)) THEN
INFO = 8
ELSE IF (LDB.LT.MAX(1,NROWB)) THEN
INFO = 10
ELSE IF (LDC.LT.MAX(1,M)) THEN
INFO = 13
END IF
IF (INFO.NE.0) THEN
CALL XERBLA('SGEMM ',INFO)
RETURN
END IF
*
* Quick return if possible.
*
IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
+ (((ALPHA.EQ.ZERO).OR. (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN
*
* And if alpha.eq.zero.
*
IF (ALPHA.EQ.ZERO) THEN
IF (BETA.EQ.ZERO) THEN
DO 20 J = 1,N
DO 10 I = 1,M
C(I,J) = ZERO
10 CONTINUE
20 CONTINUE
ELSE
DO 40 J = 1,N
DO 30 I = 1,M
C(I,J) = BETA*C(I,J)
30 CONTINUE
40 CONTINUE
END IF
RETURN
END IF
*
* Start the operations.
*
IF (NOTB) THEN
IF (NOTA) THEN
*
* Form C := alpha*A*B + beta*C.
*
DO 90 J = 1,N
IF (BETA.EQ.ZERO) THEN
DO 50 I = 1,M
C(I,J) = ZERO
50 CONTINUE
ELSE IF (BETA.NE.ONE) THEN
DO 60 I = 1,M
C(I,J) = BETA*C(I,J)
60 CONTINUE
END IF
DO 80 L = 1,K
TEMP = ALPHA*B(L,J)
DO 70 I = 1,M
C(I,J) = C(I,J) + TEMP*A(I,L)
70 CONTINUE
80 CONTINUE
90 CONTINUE
ELSE
*
* Form C := alpha*A**T*B + beta*C
*
DO 120 J = 1,N
DO 110 I = 1,M
TEMP = ZERO
DO 100 L = 1,K
TEMP = TEMP + A(L,I)*B(L,J)
100 CONTINUE
IF (BETA.EQ.ZERO) THEN
C(I,J) = ALPHA*TEMP
ELSE
C(I,J) = ALPHA*TEMP + BETA*C(I,J)
END IF
110 CONTINUE
120 CONTINUE
END IF
ELSE
IF (NOTA) THEN
*
* Form C := alpha*A*B**T + beta*C
*
DO 170 J = 1,N
IF (BETA.EQ.ZERO) THEN
DO 130 I = 1,M
C(I,J) = ZERO
130 CONTINUE
ELSE IF (BETA.NE.ONE) THEN
DO 140 I = 1,M
C(I,J) = BETA*C(I,J)
140 CONTINUE
END IF
DO 160 L = 1,K
TEMP = ALPHA*B(J,L)
DO 150 I = 1,M
C(I,J) = C(I,J) + TEMP*A(I,L)
150 CONTINUE
160 CONTINUE
170 CONTINUE
ELSE
*
* Form C := alpha*A**T*B**T + beta*C
*
DO 200 J = 1,N
DO 190 I = 1,M
TEMP = ZERO
DO 180 L = 1,K
TEMP = TEMP + A(L,I)*B(J,L)
180 CONTINUE
IF (BETA.EQ.ZERO) THEN
C(I,J) = ALPHA*TEMP
ELSE
C(I,J) = ALPHA*TEMP + BETA*C(I,J)
END IF
190 CONTINUE
200 CONTINUE
END IF
END IF
*
RETURN
*
* End of SGEMM .
*
END
*> \brief \b DGEMM
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
* Definition:
* ===========
*
* SUBROUTINE DGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
*
* .. Scalar Arguments ..
* DOUBLE PRECISION ALPHA,BETA
* INTEGER K,LDA,LDB,LDC,M,N
* CHARACTER TRANSA,TRANSB
* ..
* .. Array Arguments ..
* DOUBLE PRECISION A(LDA,*),B(LDB,*),C(LDC,*)
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> DGEMM performs one of the matrix-matrix operations
*>
*> C := alpha*op( A )*op( B ) + beta*C,
*>
*> where op( X ) is one of
*>
*> op( X ) = X or op( X ) = X**T,
*>
*> alpha and beta are scalars, and A, B and C are matrices, with op( A )
*> an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] TRANSA
*> \verbatim
*> TRANSA is CHARACTER*1
*> On entry, TRANSA specifies the form of op( A ) to be used in
*> the matrix multiplication as follows:
*>
*> TRANSA = 'N' or 'n', op( A ) = A.
*>
*> TRANSA = 'T' or 't', op( A ) = A**T.
*>
*> TRANSA = 'C' or 'c', op( A ) = A**T.
*> \endverbatim
*>
*> \param[in] TRANSB
*> \verbatim
*> TRANSB is CHARACTER*1
*> On entry, TRANSB specifies the form of op( B ) to be used in
*> the matrix multiplication as follows:
*>
*> TRANSB = 'N' or 'n', op( B ) = B.
*>
*> TRANSB = 'T' or 't', op( B ) = B**T.
*>
*> TRANSB = 'C' or 'c', op( B ) = B**T.
*> \endverbatim
*>
*> \param[in] M
*> \verbatim
*> M is INTEGER
*> On entry, M specifies the number of rows of the matrix
*> op( A ) and of the matrix C. M must be at least zero.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> On entry, N specifies the number of columns of the matrix
*> op( B ) and the number of columns of the matrix C. N must be
*> at least zero.
*> \endverbatim
*>
*> \param[in] K
*> \verbatim
*> K is INTEGER
*> On entry, K specifies the number of columns of the matrix
*> op( A ) and the number of rows of the matrix op( B ). K must
*> be at least zero.
*> \endverbatim
*>
*> \param[in] ALPHA
*> \verbatim
*> ALPHA is DOUBLE PRECISION.
*> On entry, ALPHA specifies the scalar alpha.
*> \endverbatim
*>
*> \param[in] A
*> \verbatim
*> A is DOUBLE PRECISION array, dimension ( LDA, ka ), where ka is
*> k when TRANSA = 'N' or 'n', and is m otherwise.
*> Before entry with TRANSA = 'N' or 'n', the leading m by k
*> part of the array A must contain the matrix A, otherwise
*> the leading k by m part of the array A must contain the
*> matrix A.
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> On entry, LDA specifies the first dimension of A as declared
*> in the calling (sub) program. When TRANSA = 'N' or 'n' then
*> LDA must be at least max( 1, m ), otherwise LDA must be at
*> least max( 1, k ).
*> \endverbatim
*>
*> \param[in] B
*> \verbatim
*> B is DOUBLE PRECISION array, dimension ( LDB, kb ), where kb is
*> n when TRANSB = 'N' or 'n', and is k otherwise.
*> Before entry with TRANSB = 'N' or 'n', the leading k by n
*> part of the array B must contain the matrix B, otherwise
*> the leading n by k part of the array B must contain the
*> matrix B.
*> \endverbatim
*>
*> \param[in] LDB
*> \verbatim
*> LDB is INTEGER
*> On entry, LDB specifies the first dimension of B as declared
*> in the calling (sub) program. When TRANSB = 'N' or 'n' then
*> LDB must be at least max( 1, k ), otherwise LDB must be at
*> least max( 1, n ).
*> \endverbatim
*>
*> \param[in] BETA
*> \verbatim
*> BETA is DOUBLE PRECISION.
*> On entry, BETA specifies the scalar beta. When BETA is
*> supplied as zero then C need not be set on input.
*> \endverbatim
*>
*> \param[in,out] C
*> \verbatim
*> C is DOUBLE PRECISION array, dimension ( LDC, N )
*> Before entry, the leading m by n part of the array C must
*> contain the matrix C, except when beta is zero, in which
*> case C need not be set on entry.
*> On exit, the array C is overwritten by the m by n matrix
*> ( alpha*op( A )*op( B ) + beta*C ).
*> \endverbatim
*>
*> \param[in] LDC
*> \verbatim
*> LDC is INTEGER
*> On entry, LDC specifies the first dimension of C as declared
*> in the calling (sub) program. LDC must be at least
*> max( 1, m ).
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \date December 2016
*
*> \ingroup double_blas_level3
*
*> \par Further Details:
* =====================
*>
*> \verbatim
*>
*> Level 3 Blas routine.
*>
*> -- Written on 8-February-1989.
*> Jack Dongarra, Argonne National Laboratory.
*> Iain Duff, AERE Harwell.
*> Jeremy Du Croz, Numerical Algorithms Group Ltd.
*> Sven Hammarling, Numerical Algorithms Group Ltd.
*> \endverbatim
*>
* =====================================================================
SUBROUTINE DGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
*
* -- Reference BLAS level3 routine (version 3.7.0) --
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* December 2016
*
* .. Scalar Arguments ..
DOUBLE PRECISION ALPHA,BETA
INTEGER K,LDA,LDB,LDC,M,N
CHARACTER TRANSA,TRANSB
* ..
* .. Array Arguments ..
DOUBLE PRECISION A(LDA,*),B(LDB,*),C(LDC,*)
* ..
*
* =====================================================================
*
* .. External Functions ..
LOGICAL LSAME
EXTERNAL LSAME
* ..
* .. External Subroutines ..
EXTERNAL XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX
* ..
* .. Local Scalars ..
DOUBLE PRECISION TEMP
INTEGER I,INFO,J,L,NCOLA,NROWA,NROWB
LOGICAL NOTA,NOTB
* ..
* .. Parameters ..
DOUBLE PRECISION ONE,ZERO
PARAMETER (ONE=1.0D+0,ZERO=0.0D+0)
* ..
*
* Set NOTA and NOTB as true if A and B respectively are not
* transposed and set NROWA, NCOLA and NROWB as the number of rows
* and columns of A and the number of rows of B respectively.
*
NOTA = LSAME(TRANSA,'N')
NOTB = LSAME(TRANSB,'N')
IF (NOTA) THEN
NROWA = M
NCOLA = K
ELSE
NROWA = K
NCOLA = M
END IF
IF (NOTB) THEN
NROWB = K
ELSE
NROWB = N
END IF
*
* Test the input parameters.
*
INFO = 0
IF ((.NOT.NOTA) .AND. (.NOT.LSAME(TRANSA,'C')) .AND.
+ (.NOT.LSAME(TRANSA,'T'))) THEN
INFO = 1
ELSE IF ((.NOT.NOTB) .AND. (.NOT.LSAME(TRANSB,'C')) .AND.
+ (.NOT.LSAME(TRANSB,'T'))) THEN
INFO = 2
ELSE IF (M.LT.0) THEN
INFO = 3
ELSE IF (N.LT.0) THEN
INFO = 4
ELSE IF (K.LT.0) THEN
INFO = 5
ELSE IF (LDA.LT.MAX(1,NROWA)) THEN
INFO = 8
ELSE IF (LDB.LT.MAX(1,NROWB)) THEN
INFO = 10
ELSE IF (LDC.LT.MAX(1,M)) THEN
INFO = 13
END IF
IF (INFO.NE.0) THEN
CALL XERBLA('DGEMM ',INFO)
RETURN
END IF
*
* Quick return if possible.
*
IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
+ (((ALPHA.EQ.ZERO).OR. (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN
*
* And if alpha.eq.zero.
*
IF (ALPHA.EQ.ZERO) THEN
IF (BETA.EQ.ZERO) THEN
DO 20 J = 1,N
DO 10 I = 1,M
C(I,J) = ZERO
10 CONTINUE
20 CONTINUE
ELSE
DO 40 J = 1,N
DO 30 I = 1,M
C(I,J) = BETA*C(I,J)
30 CONTINUE
40 CONTINUE
END IF
RETURN
END IF
*
* Start the operations.
*
IF (NOTB) THEN
IF (NOTA) THEN
*
* Form C := alpha*A*B + beta*C.
*
DO 90 J = 1,N
IF (BETA.EQ.ZERO) THEN
DO 50 I = 1,M
C(I,J) = ZERO
50 CONTINUE
ELSE IF (BETA.NE.ONE) THEN
DO 60 I = 1,M
C(I,J) = BETA*C(I,J)
60 CONTINUE
END IF
DO 80 L = 1,K
TEMP = ALPHA*B(L,J)
DO 70 I = 1,M
C(I,J) = C(I,J) + TEMP*A(I,L)
70 CONTINUE
80 CONTINUE
90 CONTINUE
ELSE
*
* Form C := alpha*A**T*B + beta*C
*
DO 120 J = 1,N
DO 110 I = 1,M
TEMP = ZERO
DO 100 L = 1,K
TEMP = TEMP + A(L,I)*B(L,J)
100 CONTINUE
IF (BETA.EQ.ZERO) THEN
C(I,J) = ALPHA*TEMP
ELSE
C(I,J) = ALPHA*TEMP + BETA*C(I,J)
END IF
110 CONTINUE
120 CONTINUE
END IF
ELSE
IF (NOTA) THEN
*
* Form C := alpha*A*B**T + beta*C
*
DO 170 J = 1,N
IF (BETA.EQ.ZERO) THEN
DO 130 I = 1,M
C(I,J) = ZERO
130 CONTINUE
ELSE IF (BETA.NE.ONE) THEN
DO 140 I = 1,M
C(I,J) = BETA*C(I,J)
140 CONTINUE
END IF
DO 160 L = 1,K
TEMP = ALPHA*B(J,L)
DO 150 I = 1,M
C(I,J) = C(I,J) + TEMP*A(I,L)
150 CONTINUE
160 CONTINUE
170 CONTINUE
ELSE
*
* Form C := alpha*A**T*B**T + beta*C
*
DO 200 J = 1,N
DO 190 I = 1,M
TEMP = ZERO
DO 180 L = 1,K
TEMP = TEMP + A(L,I)*B(J,L)
180 CONTINUE
IF (BETA.EQ.ZERO) THEN
C(I,J) = ALPHA*TEMP
ELSE
C(I,J) = ALPHA*TEMP + BETA*C(I,J)
END IF
190 CONTINUE
200 CONTINUE
END IF
END IF
*
RETURN
*
* End of DGEMM .
*
END
*> \brief \b ZGEMM
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
* Definition:
* ===========
*
* SUBROUTINE ZGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
*
* .. Scalar Arguments ..
* COMPLEX*16 ALPHA,BETA
* INTEGER K,LDA,LDB,LDC,M,N
* CHARACTER TRANSA,TRANSB
* ..
* .. Array Arguments ..
* COMPLEX*16 A(LDA,*),B(LDB,*),C(LDC,*)
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> ZGEMM performs one of the matrix-matrix operations
*>
*> C := alpha*op( A )*op( B ) + beta*C,
*>
*> where op( X ) is one of
*>
*> op( X ) = X or op( X ) = X**T or op( X ) = X**H,
*>
*> alpha and beta are scalars, and A, B and C are matrices, with op( A )
*> an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] TRANSA
*> \verbatim
*> TRANSA is CHARACTER*1
*> On entry, TRANSA specifies the form of op( A ) to be used in
*> the matrix multiplication as follows:
*>
*> TRANSA = 'N' or 'n', op( A ) = A.
*>
*> TRANSA = 'T' or 't', op( A ) = A**T.
*>
*> TRANSA = 'C' or 'c', op( A ) = A**H.
*> \endverbatim
*>
*> \param[in] TRANSB
*> \verbatim
*> TRANSB is CHARACTER*1
*> On entry, TRANSB specifies the form of op( B ) to be used in
*> the matrix multiplication as follows:
*>
*> TRANSB = 'N' or 'n', op( B ) = B.
*>
*> TRANSB = 'T' or 't', op( B ) = B**T.
*>
*> TRANSB = 'C' or 'c', op( B ) = B**H.
*> \endverbatim
*>
*> \param[in] M
*> \verbatim
*> M is INTEGER
*> On entry, M specifies the number of rows of the matrix
*> op( A ) and of the matrix C. M must be at least zero.
*> \endverbatim
*>
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> On entry, N specifies the number of columns of the matrix
*> op( B ) and the number of columns of the matrix C. N must be
*> at least zero.
*> \endverbatim
*>
*> \param[in] K
*> \verbatim
*> K is INTEGER
*> On entry, K specifies the number of columns of the matrix
*> op( A ) and the number of rows of the matrix op( B ). K must
*> be at least zero.
*> \endverbatim
*>
*> \param[in] ALPHA
*> \verbatim
*> ALPHA is COMPLEX*16
*> On entry, ALPHA specifies the scalar alpha.
*> \endverbatim
*>
*> \param[in] A
*> \verbatim
*> A is COMPLEX*16 array, dimension ( LDA, ka ), where ka is
*> k when TRANSA = 'N' or 'n', and is m otherwise.
*> Before entry with TRANSA = 'N' or 'n', the leading m by k
*> part of the array A must contain the matrix A, otherwise
*> the leading k by m part of the array A must contain the
*> matrix A.
*> \endverbatim
*>
*> \param[in] LDA
*> \verbatim
*> LDA is INTEGER
*> On entry, LDA specifies the first dimension of A as declared
*> in the calling (sub) program. When TRANSA = 'N' or 'n' then
*> LDA must be at least max( 1, m ), otherwise LDA must be at
*> least max( 1, k ).
*> \endverbatim
*>
*> \param[in] B
*> \verbatim
*> B is COMPLEX*16 array, dimension ( LDB, kb ), where kb is
*> n when TRANSB = 'N' or 'n', and is k otherwise.
*> Before entry with TRANSB = 'N' or 'n', the leading k by n
*> part of the array B must contain the matrix B, otherwise
*> the leading n by k part of the array B must contain the
*> matrix B.
*> \endverbatim
*>
*> \param[in] LDB
*> \verbatim
*> LDB is INTEGER
*> On entry, LDB specifies the first dimension of B as declared
*> in the calling (sub) program. When TRANSB = 'N' or 'n' then
*> LDB must be at least max( 1, k ), otherwise LDB must be at
*> least max( 1, n ).
*> \endverbatim
*>
*> \param[in] BETA
*> \verbatim
*> BETA is COMPLEX*16
*> On entry, BETA specifies the scalar beta. When BETA is
*> supplied as zero then C need not be set on input.
*> \endverbatim
*>
*> \param[in,out] C
*> \verbatim
*> C is COMPLEX*16 array, dimension ( LDC, N )
*> Before entry, the leading m by n part of the array C must
*> contain the matrix C, except when beta is zero, in which
*> case C need not be set on entry.
*> On exit, the array C is overwritten by the m by n matrix
*> ( alpha*op( A )*op( B ) + beta*C ).
*> \endverbatim
*>
*> \param[in] LDC
*> \verbatim
*> LDC is INTEGER
*> On entry, LDC specifies the first dimension of C as declared
*> in the calling (sub) program. LDC must be at least
*> max( 1, m ).
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \date December 2016
*
*> \ingroup complex16_blas_level3
*
*> \par Further Details:
* =====================
*>
*> \verbatim
*>
*> Level 3 Blas routine.
*>
*> -- Written on 8-February-1989.
*> Jack Dongarra, Argonne National Laboratory.
*> Iain Duff, AERE Harwell.
*> Jeremy Du Croz, Numerical Algorithms Group Ltd.
*> Sven Hammarling, Numerical Algorithms Group Ltd.
*> \endverbatim
*>
* =====================================================================
SUBROUTINE ZGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
*
* -- Reference BLAS level3 routine (version 3.7.0) --
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* December 2016
*
* .. Scalar Arguments ..
COMPLEX*16 ALPHA,BETA
INTEGER K,LDA,LDB,LDC,M,N
CHARACTER TRANSA,TRANSB
* ..
* .. Array Arguments ..
COMPLEX*16 A(LDA,*),B(LDB,*),C(LDC,*)
* ..
*
* =====================================================================
*
* .. External Functions ..
LOGICAL LSAME
EXTERNAL LSAME
* ..
* .. External Subroutines ..
EXTERNAL XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC DCONJG,MAX
* ..
* .. Local Scalars ..
COMPLEX*16 TEMP
INTEGER I,INFO,J,L,NCOLA,NROWA,NROWB
LOGICAL CONJA,CONJB,NOTA,NOTB
* ..
* .. Parameters ..
COMPLEX*16 ONE
PARAMETER (ONE= (1.0D+0,0.0D+0))
COMPLEX*16 ZERO
PARAMETER (ZERO= (0.0D+0,0.0D+0))
* ..
*
* Set NOTA and NOTB as true if A and B respectively are not
* conjugated or transposed, set CONJA and CONJB as true if A and
* B respectively are to be transposed but not conjugated and set
* NROWA, NCOLA and NROWB as the number of rows and columns of A
* and the number of rows of B respectively.
*
NOTA = LSAME(TRANSA,'N')
NOTB = LSAME(TRANSB,'N')
CONJA = LSAME(TRANSA,'C')
CONJB = LSAME(TRANSB,'C')
IF (NOTA) THEN
NROWA = M
NCOLA = K
ELSE
NROWA = K
NCOLA = M
END IF
IF (NOTB) THEN
NROWB = K
ELSE
NROWB = N
END IF
*
* Test the input parameters.
*
INFO = 0
IF ((.NOT.NOTA) .AND. (.NOT.CONJA) .AND.
+ (.NOT.LSAME(TRANSA,'T'))) THEN
INFO = 1
ELSE IF ((.NOT.NOTB) .AND. (.NOT.CONJB) .AND.
+ (.NOT.LSAME(TRANSB,'T'))) THEN
INFO = 2
ELSE IF (M.LT.0) THEN
INFO = 3
ELSE IF (N.LT.0) THEN
INFO = 4
ELSE IF (K.LT.0) THEN
INFO = 5
ELSE IF (LDA.LT.MAX(1,NROWA)) THEN
INFO = 8
ELSE IF (LDB.LT.MAX(1,NROWB)) THEN
INFO = 10
ELSE IF (LDC.LT.MAX(1,M)) THEN
INFO = 13
END IF
IF (INFO.NE.0) THEN
CALL XERBLA('ZGEMM ',INFO)
RETURN
END IF
*
* Quick return if possible.
*
IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
+ (((ALPHA.EQ.ZERO).OR. (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN
*
* And when alpha.eq.zero.
*
IF (ALPHA.EQ.ZERO) THEN
IF (BETA.EQ.ZERO) THEN
DO 20 J = 1,N
DO 10 I = 1,M
C(I,J) = ZERO
10 CONTINUE
20 CONTINUE
ELSE
DO 40 J = 1,N
DO 30 I = 1,M
C(I,J) = BETA*C(I,J)
30 CONTINUE
40 CONTINUE
END IF
RETURN
END IF
*
* Start the operations.
*
IF (NOTB) THEN
IF (NOTA) THEN
*
* Form C := alpha*A*B + beta*C.
*
DO 90 J = 1,N
IF (BETA.EQ.ZERO) THEN
DO 50 I = 1,M
C(I,J) = ZERO
50 CONTINUE
ELSE IF (BETA.NE.ONE) THEN
DO 60 I = 1,M
C(I,J) = BETA*C(I,J)
60 CONTINUE
END IF
DO 80 L = 1,K
TEMP = ALPHA*B(L,J)
DO 70 I = 1,M
C(I,J) = C(I,J) + TEMP*A(I,L)
70 CONTINUE
80 CONTINUE
90 CONTINUE
ELSE IF (CONJA) THEN
*
* Form C := alpha*A**H*B + beta*C.
*
DO 120 J = 1,N
DO 110 I = 1,M
TEMP = ZERO
DO 100 L = 1,K
TEMP = TEMP + DCONJG(A(L,I))*B(L,J)
100 CONTINUE
IF (BETA.EQ.ZERO) THEN
C(I,J) = ALPHA*TEMP
ELSE
C(I,J) = ALPHA*TEMP + BETA*C(I,J)
END IF
110 CONTINUE
120 CONTINUE
ELSE
*
* Form C := alpha*A**T*B + beta*C
*
DO 150 J = 1,N
DO 140 I = 1,M
TEMP = ZERO
DO 130 L = 1,K
TEMP = TEMP + A(L,I)*B(L,J)
130 CONTINUE
IF (BETA.EQ.ZERO) THEN
C(I,J) = ALPHA*TEMP
ELSE
C(I,J) = ALPHA*TEMP + BETA*C(I,J)
END IF
140 CONTINUE
150 CONTINUE
END IF
ELSE IF (NOTA) THEN
IF (CONJB) THEN
*
* Form C := alpha*A*B**H + beta*C.
*
DO 200 J = 1,N
IF (BETA.EQ.ZERO) THEN
DO 160 I = 1,M
C(I,J) = ZERO
160 CONTINUE
ELSE IF (BETA.NE.ONE) THEN
DO 170 I = 1,M
C(I,J) = BETA*C(I,J)
170 CONTINUE
END IF
DO 190 L = 1,K
TEMP = ALPHA*DCONJG(B(J,L))
DO 180 I = 1,M
C(I,J) = C(I,J) + TEMP*A(I,L)
180 CONTINUE
190 CONTINUE
200 CONTINUE
ELSE
*
* Form C := alpha*A*B**T + beta*C
*
DO 250 J = 1,N
IF (BETA.EQ.ZERO) THEN
DO 210 I = 1,M
C(I,J) = ZERO
210 CONTINUE
ELSE IF (BETA.NE.ONE) THEN
DO 220 I = 1,M
C(I,J) = BETA*C(I,J)
220 CONTINUE
END IF
DO 240 L = 1,K
TEMP = ALPHA*B(J,L)
DO 230 I = 1,M
C(I,J) = C(I,J) + TEMP*A(I,L)
230 CONTINUE
240 CONTINUE
250 CONTINUE
END IF
ELSE IF (CONJA) THEN
IF (CONJB) THEN
*
* Form C := alpha*A**H*B**H + beta*C.
*
DO 280 J = 1,N
DO 270 I = 1,M
TEMP = ZERO
DO 260 L = 1,K
TEMP = TEMP + DCONJG(A(L,I))*DCONJG(B(J,L))
260 CONTINUE
IF (BETA.EQ.ZERO) THEN
C(I,J) = ALPHA*TEMP
ELSE
C(I,J) = ALPHA*TEMP + BETA*C(I,J)
END IF
270 CONTINUE
280 CONTINUE
ELSE
*
* Form C := alpha*A**H*B**T + beta*C
*
DO 310 J = 1,N
DO 300 I = 1,M
TEMP = ZERO
DO 290 L = 1,K
TEMP = TEMP + DCONJG(A(L,I))*B(J,L)
290 CONTINUE
IF (BETA.EQ.ZERO) THEN
C(I,J) = ALPHA*TEMP
ELSE
C(I,J) = ALPHA*TEMP + BETA*C(I,J)
END IF
300 CONTINUE
310 CONTINUE
END IF
ELSE
IF (CONJB) THEN
*
* Form C := alpha*A**T*B**H + beta*C
*
DO 340 J = 1,N
DO 330 I = 1,M
TEMP = ZERO
DO 320 L = 1,K
TEMP = TEMP + A(L,I)*DCONJG(B(J,L))
320 CONTINUE
IF (BETA.EQ.ZERO) THEN
C(I,J) = ALPHA*TEMP
ELSE
C(I,J) = ALPHA*TEMP + BETA*C(I,J)
END IF
330 CONTINUE
340 CONTINUE
ELSE
*
* Form C := alpha*A**T*B**T + beta*C
*
DO 370 J = 1,N
DO 360 I = 1,M
TEMP = ZERO
DO 350 L = 1,K
TEMP = TEMP + A(L,I)*B(J,L)
350 CONTINUE
IF (BETA.EQ.ZERO) THEN
C(I,J) = ALPHA*TEMP
ELSE
C(I,J) = ALPHA*TEMP + BETA*C(I,J)
END IF
360 CONTINUE
370 CONTINUE
END IF
END IF
*
RETURN
*
* End of ZGEMM .
*
END
...@@ -44,4 +44,4 @@ program main ...@@ -44,4 +44,4 @@ program main
deallocate(calloc) deallocate(calloc)
end program main end program main
! { dg-final { scan-tree-dump-times "_gfortran_matmul" 0 "original" } } ! { dg-final { scan-tree-dump-times "_gfortran_matmul" 1 "original" } }
...@@ -58,4 +58,4 @@ program main ...@@ -58,4 +58,4 @@ program main
end do end do
end do end do
end program main end program main
! { dg-final { scan-tree-dump-times "_gfortran_matmul" 0 "optimized" } } ! { dg-final { scan-tree-dump-times "_gfortran_matmul" 1 "optimized" } }
C { dg-do run }
C { dg-options "-fcheck=bounds -fdump-tree-optimized -fblas-matmul-limit=1 -O -fexternal-blas" }
C { dg-additional-sources blas_gemm_routines.f }
C Test calling of BLAS routines
program main
call sub_s
call sub_d
call sub_c
call sub_z
end
subroutine sub_d
implicit none
real(8), dimension(3,2) :: a
real(8), dimension(2,3) :: at
real(8), dimension(2,4) :: b
real(8), dimension(4,2) :: bt
real(8), dimension(3,4) :: c
real(8), dimension(3,4) :: cres
real(8), dimension(:,:), allocatable :: c_alloc
data a / 2., -3., 5., -7., 11., -13./
data b /17., -23., 29., -31., 37., -39., 41., -47./
data cres /195., -304., 384., 275., -428., 548., 347., -540.,
& 692., 411., -640., 816./
c = matmul(a,b)
if (any (c /= cres)) stop 31
at = transpose(a)
c = (1.2,-2.2)
c = matmul(transpose(at), b)
if (any (c /= cres)) stop 32
bt = transpose(b)
c = (1.2,-2.1)
c = matmul(a, transpose(bt))
if (any (c /= cres)) stop 33
c_alloc = matmul(a,b)
if (any (c /= cres)) stop 34
at = transpose(a)
deallocate (c_alloc)
c = matmul(transpose(at), b)
if (any (c /= cres)) stop 35
bt = transpose(b)
allocate (c_alloc(20,20))
c = (1.2,-2.1)
c = matmul(a, transpose(bt))
if (any (c /= cres)) stop 36
end
subroutine sub_s
implicit none
real, dimension(3,2) :: a
real, dimension(2,3) :: at
real, dimension(2,4) :: b
real, dimension(4,2) :: bt
real, dimension(3,4) :: c
real, dimension(3,4) :: cres
real, dimension(:,:), allocatable :: c_alloc
data a / 2., -3., 5., -7., 11., -13./
data b /17., -23., 29., -31., 37., -39., 41., -47./
data cres /195., -304., 384., 275., -428., 548., 347., -540.,
& 692., 411., -640., 816./
c = matmul(a,b)
if (any (c /= cres)) stop 21
at = transpose(a)
c = (1.2,-2.2)
c = matmul(transpose(at), b)
if (any (c /= cres)) stop 22
bt = transpose(b)
c = (1.2,-2.1)
c = matmul(a, transpose(bt))
if (any (c /= cres)) stop 23
c_alloc = matmul(a,b)
if (any (c /= cres)) stop 24
at = transpose(a)
deallocate (c_alloc)
c = matmul(transpose(at), b)
if (any (c /= cres)) stop 25
bt = transpose(b)
allocate (c_alloc(20,20))
c = (1.2,-2.1)
c = matmul(a, transpose(bt))
if (any (c /= cres)) stop 26
end
subroutine sub_c
implicit none
complex, dimension(3,2) :: a
complex, dimension(2,3) :: at, ah
complex, dimension(2,4) :: b
complex, dimension(4,2) :: bt, bh
complex, dimension(3,4) :: c
complex, dimension(3,4) :: cres
complex, dimension(:,:), allocatable :: c_alloc
data a / (2.,-3.), (-5.,7.), (11.,-13.), (17.,19), (-23., -29),
& (-31., 37.)/
data b / (-41., 43.), (-47., 53.), (-59.,-61.), (-67., 71),
& ( 73.,79. ), (83.,-89.), (97.,-101.), (-107.,-109.)/
data cres /(-1759.,217.), (2522.,-358.), (-396.,-2376.),
& (-2789.,-11.),
& (4322.,202.), (-1992.,-4584.), (3485.,3.), (-5408.,-244.),
& (2550.,5750.), (143.,-4379.), (-478.,6794.), (7104.,-2952.) /
c = matmul(a,b)
if (any (c /= cres)) stop 1
at = transpose(a)
c = (1.2,-2.2)
c = matmul(transpose(at), b)
if (any (c /= cres)) stop 2
bt = transpose(b)
c = (1.2,-2.1)
c = matmul(a, transpose(bt))
if (any (c /= cres)) stop 3
ah = transpose(conjg(a))
c = (1.2,-2.2)
c = matmul(conjg(transpose(ah)), b)
if (any (c /= cres)) stop 4
bh = transpose(conjg(b))
c = (1.2,-2.2)
c = matmul(a, transpose(conjg(bh)))
if (any (c /= cres)) stop 5
c_alloc = matmul(a,b)
if (any (c /= cres)) stop 6
at = transpose(a)
deallocate (c_alloc)
c = matmul(transpose(at), b)
if (any (c /= cres)) stop 7
bt = transpose(b)
allocate (c_alloc(20,20))
c = (1.2,-2.1)
c = matmul(a, transpose(bt))
if (any (c /= cres)) stop 8
ah = transpose(conjg(a))
c = (1.2,-2.2)
c = matmul(conjg(transpose(ah)), b)
if (any (c /= cres)) stop 9
deallocate (c_alloc)
allocate (c_alloc(0,0))
bh = transpose(conjg(b))
c = (1.2,-2.2)
c = matmul(a, transpose(conjg(bh)))
if (any (c /= cres)) stop 10
end
subroutine sub_z
implicit none
complex(8), dimension(3,2) :: a
complex(8), dimension(2,3) :: at, ah
complex(8), dimension(2,4) :: b
complex(8), dimension(4,2) :: bt, bh
complex(8), dimension(3,4) :: c
complex(8), dimension(3,4) :: cres
complex(8), dimension(:,:), allocatable :: c_alloc
data a / (2.,-3.), (-5._8,7.), (11.,-13.), (17.,19),
& (-23., -29), (-31., 37.)/
data b / (-41., 43.), (-47., 53.), (-59.,-61.), (-67., 71),
& ( 73.,79. ), (83.,-89.), (97.,-101.), (-107.,-109.)/
data cres /(-1759.,217.), (2522.,-358.), (-396.,-2376.),
& (-2789.,-11.),
& (4322.,202.), (-1992.,-4584.), (3485.,3.), (-5408.,-244.),
& (2550.,5750.), (143.,-4379.), (-478.,6794.), (7104.,-2952.) /
c = matmul(a,b)
if (any (c /= cres)) stop 11
at = transpose(a)
c = (1.2,-2.2)
c = matmul(transpose(at), b)
if (any (c /= cres)) stop 12
bt = transpose(b)
c = (1.2,-2.1)
c = matmul(a, transpose(bt))
if (any (c /= cres)) stop 13
ah = transpose(conjg(a))
c = (1.2,-2.2)
c = matmul(conjg(transpose(ah)), b)
if (any (c /= cres)) stop 14
bh = transpose(conjg(b))
c = (1.2,-2.2)
c = matmul(a, transpose(conjg(bh)))
if (any (c /= cres)) stop 15
c_alloc = matmul(a,b)
if (any (c /= cres)) stop 16
at = transpose(a)
deallocate (c_alloc)
c = matmul(transpose(at), b)
if (any (c /= cres)) stop 17
bt = transpose(b)
allocate (c_alloc(20,20))
c = (1.2,-2.1)
c = matmul(a, transpose(bt))
if (any (c /= cres)) stop 18
ah = transpose(conjg(a))
c = (1.2,-2.2)
c = matmul(conjg(transpose(ah)), b)
if (any (c /= cres)) stop 19
deallocate (c_alloc)
allocate (c_alloc(0,0))
bh = transpose(conjg(b))
c = (1.2,-2.2)
c = matmul(a, transpose(conjg(bh)))
if (any (c /= cres)) stop 20
end
! { dg-final { scan-tree-dump-times "_gfortran_matmul" 0 "optimized" } }
C { dg-do run }
C { dg-options "-fno-realloc-lhs -fdump-tree-optimized -fcheck=bounds -fblas-matmul-limit=1 -O -fexternal-blas" }
C { dg-shouldfail "Fortran runtime error: Array bound mismatch for dimension 2 of array." }
C { dg-additional-sources blas_gemm_routines.f }
program main
real, dimension(3,2) :: a
real, dimension(2,3) :: b
real, dimension(:,:), allocatable :: ret
a = 1.0
b = 2.3
allocate(ret(3,2))
ret = matmul(a,b) ! This should throw an error.
end
! { dg-output "Fortran runtime error: Array bound mismatch for dimension 2 of array.*" }
! { dg-final { scan-tree-dump-times "_gfortran_matmul" 0 "optimized" } }
C { dg-do run }
C { dg-options "-fdump-tree-optimized -fcheck=bounds -fblas-matmul-limit=1 -O -fexternal-blas" }
C { dg-shouldfail "Fortran runtime error: Incorrect extent in argument B in MATMUL intrinsic in dimension 1.*" }
C { dg-additional-sources blas_gemm_routines.f }
program main
character(len=20) :: line
integer :: n, m
real, dimension(3,2) :: a
real, dimension(:,:), allocatable :: b
real, dimension(:,:), allocatable :: ret
a = 1.0
line = '3 3'
read (unit=line,fmt=*) n, m
allocate (b(n,m))
b = 2.3
ret = matmul(a,b) ! This should throw an error.
end
! { dg-output "Fortran runtime error: Incorrect extent in argument B in MATMUL intrinsic in dimension 1.*" }
! { dg-final { scan-tree-dump-times "_gfortran_matmul" 0 "optimized" } }
C { dg-do run }
C { dg-options "-fdump-tree-optimized -fcheck=bounds -fblas-matmul-limit=1 -O -fexternal-blas" }
C { dg-shouldfail "Fortran runtime error: Incorrect extent in argument B in MATMUL intrinsic in dimension 1" }
C { dg-additional-sources blas_gemm_routines.f }
program main
character(len=20) :: line
integer :: n, m
real, dimension(3,2) :: a
real, dimension(:,:), allocatable :: b
real, dimension(:,:), allocatable :: ret
a = 1.0
line = '4 3'
read (unit=line,fmt=*) n, m
allocate (b(n,m))
b = 2.3
ret = matmul(transpose(a),b) ! This should throw an error.
end
! { dg-output "Fortran runtime error: Incorrect extent in argument B in MATMUL intrinsic in dimension 1.*" }
! { dg-final { scan-tree-dump-times "_gfortran_matmul" 0 "optimized" } }
! { dg-do compile } ! { dg-do compile }
! { dg-options "-fdefault-real-8 -fexternal-blas -fdump-tree-original -finline-matmul-limit=0" } ! { dg-options "-fdefault-real-8 -fexternal-blas -fblas-matmul-limit=1 -fdump-tree-original -finline-matmul-limit=0" }
! !
! PR fortran/54463 ! PR fortran/54463
! !
...@@ -8,8 +8,9 @@ ...@@ -8,8 +8,9 @@
program test program test
implicit none implicit none
real, dimension(3,3) :: A real, dimension(3,3) :: A
call random_number(a)
A = matmul(A,A) A = matmul(A,A)
end program test end program test
! { dg-final { scan-tree-dump-times "sgemm_" 0 "original" } } ! { dg-final { scan-tree-dump-times "sgemm" 0 "original" } }
! { dg-final { scan-tree-dump-times "dgemm_" 1 "original" } } ! { dg-final { scan-tree-dump-times "dgemm" 1 "original" } }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment