Commit a4a11197 by Paul Thomas

re PR fortran/25049 (TRANSPOSE not allowed in initialisation expression)

2006-06-20  Paul Thomas  <pault@gcc.gnu.org>

	PR fortran/25049
	PR fortran/25050
	* check.c (non_init_transformational): New function.
	(find_substring_ref): New function to signal use of disallowed
	transformational intrinsic in an initialization expression.
	(gfc_check_all_any): Call previous if initialization expr.
	(gfc_check_count): The same.
	(gfc_check_cshift): The same.
	(gfc_check_dot_product): The same.
	(gfc_check_eoshift): The same.
	(gfc_check_minloc_maxloc): The same.
	(gfc_check_minval_maxval): The same.
	(gfc_check_gfc_check_product_sum): The same.
	(gfc_check_pack): The same.
	(gfc_check_spread): The same.
	(gfc_check_transpose): The same.
	(gfc_check_unpack): The same.

	PR fortran/18769
	*intrinsic.c (add_functions): Add gfc_simplify_transfer.
	*intrinsic.h : Add prototype for gfc_simplify_transfer.
	*simplify.c (gfc_simplify_transfer) : New function to act as
	placeholder for eventual implementation.  Emit error for now.

	PR fortran/16206
	* expr.c (find_array_element): Eliminate condition on length of
	offset. Add bounds checking. Rearrange exit. Return try and
	put gfc_constructor result as an argument.
	(find_array_section): New function.
	(find_substring_ref): New function.
	(simplify_const_ref): Add calls to previous.
	(simplify_parameter_variable): Return on NULL expr.
	(gfc_simplify_expr): Only call gfc_expand_constructor for full
	arrays.

	PR fortran/20876
	* match.c (gfc_match_forall): Add missing locus to gfc_code.

2006-06-20  Paul Thomas  <pault@gcc.gnu.org>

	PR libfortran/28005
	* m4/matmul.m4: aystride = 1 does not uniquely detect the
	presence of a temporary transpose; an array element in the
	first dimension produces the same signature.  Detect this
	using the rank of a and add specific code.
	* generated/matmul_r4.c: Regenerate.
	* generated/matmul_r8.c: Regenerate.
	* generated/matmul_r10.c: Regenerate.
	* generated/matmul_r16.c: Regenerate.
	* generated/matmul_c4.c: Regenerate.
	* generated/matmul_c8.c: Regenerate.
	* generated/matmul_c10.c: Regenerate.
	* generated/matmul_c16.c: Regenerate.
	* generated/matmul_i4.c: Regenerate.
	* generated/matmul_i8.c: Regenerate.
	* generated/matmul_i16.c: Regenerate.

2006-06-20  Paul Thomas  <pault@gcc.gnu.org>

	PR fortran/16206
	* gfortran.dg/array_initializer_1.f90: New test.

	PR fortran/28005
	* gfortran.dg/matmul_3.f90: New test.

From-SVN: r114802
parent 73dab33b
2006-06-20 Paul Thomas <pault@gcc.gnu.org>
PR fortran/25049
PR fortran/25050
* check.c (non_init_transformational): New function.
(find_substring_ref): New function to signal use of disallowed
transformational intrinsic in an initialization expression.
(gfc_check_all_any): Call previous if initialization expr.
(gfc_check_count): The same.
(gfc_check_cshift): The same.
(gfc_check_dot_product): The same.
(gfc_check_eoshift): The same.
(gfc_check_minloc_maxloc): The same.
(gfc_check_minval_maxval): The same.
(gfc_check_gfc_check_product_sum): The same.
(gfc_check_pack): The same.
(gfc_check_spread): The same.
(gfc_check_transpose): The same.
(gfc_check_unpack): The same.
PR fortran/18769
*intrinsic.c (add_functions): Add gfc_simplify_transfer.
*intrinsic.h : Add prototype for gfc_simplify_transfer.
*simplify.c (gfc_simplify_transfer) : New function to act as
placeholder for eventual implementation. Emit error for now.
PR fortran/16206
* expr.c (find_array_element): Eliminate condition on length of
offset. Add bounds checking. Rearrange exit. Return try and
put gfc_constructor result as an argument.
(find_array_section): New function.
(find_substring_ref): New function.
(simplify_const_ref): Add calls to previous.
(simplify_parameter_variable): Return on NULL expr.
(gfc_simplify_expr): Only call gfc_expand_constructor for full
arrays.
PR fortran/20876
* match.c (gfc_match_forall): Add missing locus to gfc_code.
2006-06-18 Francois-Xavier Coudert <coudert@clipper.ens.fr>
PR fortran/26801
......
......@@ -378,6 +378,18 @@ identical_dimen_shape (gfc_expr *a, int ai, gfc_expr *b, int bi)
return ret;
}
/* Error return for transformational intrinsics not allowed in
initalization expressions. */
static try
non_init_transformational (void)
{
gfc_error ("transformational intrinsic '%s' at %L is not permitted "
"in an initialization expression", gfc_current_intrinsic,
gfc_current_intrinsic_where);
return FAILURE;
}
/***** Check functions *****/
/* Check subroutine suitable for intrinsics taking a real argument and
......@@ -439,6 +451,9 @@ gfc_check_all_any (gfc_expr * mask, gfc_expr * dim)
if (dim_check (dim, 1, 1) == FAILURE)
return FAILURE;
if (gfc_init_expr)
return non_init_transformational ();
return SUCCESS;
}
......@@ -724,6 +739,9 @@ gfc_check_count (gfc_expr * mask, gfc_expr * dim)
if (dim_check (dim, 1, 1) == FAILURE)
return FAILURE;
if (gfc_init_expr)
return non_init_transformational ();
return SUCCESS;
}
......@@ -747,6 +765,9 @@ gfc_check_cshift (gfc_expr * array, gfc_expr * shift, gfc_expr * dim)
if (dim_check (dim, 2, 1) == FAILURE)
return FAILURE;
if (gfc_init_expr)
return non_init_transformational ();
return SUCCESS;
}
......@@ -848,6 +869,9 @@ gfc_check_dot_product (gfc_expr * vector_a, gfc_expr * vector_b)
return FAILURE;
}
if (gfc_init_expr)
return non_init_transformational ();
return SUCCESS;
}
......@@ -883,6 +907,9 @@ gfc_check_eoshift (gfc_expr * array, gfc_expr * shift, gfc_expr * boundary,
if (dim_check (dim, 1, 1) == FAILURE)
return FAILURE;
if (gfc_init_expr)
return non_init_transformational ();
return SUCCESS;
}
......@@ -1545,6 +1572,9 @@ gfc_check_matmul (gfc_expr * matrix_a, gfc_expr * matrix_b)
return FAILURE;
}
if (gfc_init_expr)
return non_init_transformational ();
return SUCCESS;
}
......@@ -1605,6 +1635,9 @@ gfc_check_minloc_maxloc (gfc_actual_arglist * ap)
return FAILURE;
}
if (gfc_init_expr)
return non_init_transformational ();
return SUCCESS;
}
......@@ -1673,6 +1706,9 @@ gfc_check_minval_maxval (gfc_actual_arglist * ap)
|| array_check (ap->expr, 0) == FAILURE)
return FAILURE;
if (gfc_init_expr)
return non_init_transformational ();
return check_reduction (ap);
}
......@@ -1684,6 +1720,9 @@ gfc_check_product_sum (gfc_actual_arglist * ap)
|| array_check (ap->expr, 0) == FAILURE)
return FAILURE;
if (gfc_init_expr)
return non_init_transformational ();
return check_reduction (ap);
}
......@@ -1781,6 +1820,9 @@ gfc_check_pack (gfc_expr * array, gfc_expr * mask, gfc_expr * vector)
/* TODO: More constraints here. */
}
if (gfc_init_expr)
return non_init_transformational ();
return SUCCESS;
}
......@@ -2152,6 +2194,9 @@ gfc_check_spread (gfc_expr * source, gfc_expr * dim, gfc_expr * ncopies)
if (scalar_check (ncopies, 2) == FAILURE)
return FAILURE;
if (gfc_init_expr)
return non_init_transformational ();
return SUCCESS;
}
......@@ -2367,6 +2412,9 @@ gfc_check_transpose (gfc_expr * matrix)
if (rank_check (matrix, 0, 2) == FAILURE)
return FAILURE;
if (gfc_init_expr)
return non_init_transformational ();
return SUCCESS;
}
......@@ -2405,6 +2453,9 @@ gfc_check_unpack (gfc_expr * vector, gfc_expr * mask, gfc_expr * field)
if (same_type_check (vector, 0, field, 2) == FAILURE)
return FAILURE;
if (gfc_init_expr)
return non_init_transformational ();
return SUCCESS;
}
......
......@@ -2139,7 +2139,7 @@ add_functions (void)
make_generic ("tiny", GFC_ISYM_NONE, GFC_STD_F95);
add_sym_3 ("transfer", 0, 1, BT_REAL, dr, GFC_STD_F95,
gfc_check_transfer, NULL, gfc_resolve_transfer,
gfc_check_transfer, gfc_simplify_transfer, gfc_resolve_transfer,
src, BT_REAL, dr, REQUIRED, mo, BT_REAL, dr, REQUIRED,
sz, BT_INTEGER, di, OPTIONAL);
......
......@@ -276,6 +276,7 @@ gfc_expr *gfc_simplify_sqrt (gfc_expr *);
gfc_expr *gfc_simplify_tan (gfc_expr *);
gfc_expr *gfc_simplify_tanh (gfc_expr *);
gfc_expr *gfc_simplify_tiny (gfc_expr *);
gfc_expr *gfc_simplify_transfer (gfc_expr *, gfc_expr *, gfc_expr *);
gfc_expr *gfc_simplify_trim (gfc_expr *);
gfc_expr *gfc_simplify_ubound (gfc_expr *, gfc_expr *);
gfc_expr *gfc_simplify_verify (gfc_expr *, gfc_expr *, gfc_expr *);
......
......@@ -3578,6 +3578,7 @@ gfc_match_forall (gfc_statement * st)
c = gfc_get_code ();
*c = new_st;
c->loc = gfc_current_locus;
if (gfc_match_eos () != MATCH_YES)
goto syntax;
......
......@@ -3715,6 +3715,19 @@ gfc_simplify_tiny (gfc_expr * e)
gfc_expr *
gfc_simplify_transfer (gfc_expr * source, gfc_expr *mold, gfc_expr * size)
{
/* Reference mold and size to suppress warning. */
if (gfc_init_expr && (mold || size))
gfc_error ("TRANSFER intrinsic not implemented for initialization at %L",
&source->where);
return NULL;
}
gfc_expr *
gfc_simplify_trim (gfc_expr * e)
{
gfc_expr *result;
......
2006-06-20 Paul Thomas <pault@gcc.gnu.org>
PR fortran/16206
* gfortran.dg/array_initializer_1.f90: New test.
PR fortran/28005
* gfortran.dg/matmul_3.f90: New test.
2006-06-19 Andrew Pinski <pinskia@gmail.com>
PR middle-end/28075
! { dg-do run }
! Check the fix for PR16206, in which array sections would not work
! in array initializers. Use of implied do loop variables for indices
! and substrings, with and without implied do loops, were fixed at the
! same time.
!
! Contributed by Paul Thomas <pault@gcc.gnu.org>
! based on testcase from Harald Anlauf <anlauf@gmx.de>
!
real, parameter :: x(4,4) = reshape((/(i, i = 1, 16)/), (/4,4/))
real, parameter :: y(4) = (/ x(1:2, 2), x(3:4, 4)/)
real, parameter :: z(2) = x(2:3, 3) + 1
real, parameter :: r(6) = (/(x(i:i +1, i), i = 1,3)/)
real, parameter :: s(12) = (/((x(i, i:j-1:-1), i = 3,4), j = 2,3)/)
real, parameter :: t(8) = (/(z, &
real (i)**3, y(i), i = 2, 3)/) ! { dg-warning "nonstandard" }
integer, parameter :: ii = 4
character(4), parameter :: chr(4) = (/"abcd", "efgh", "ijkl", "mnop"/)
character(4), parameter :: chrs = chr(ii)(2:3)//chr(2)(ii-3:ii-2)
character(4), parameter :: chrt(2) = (/chr(2:2)(2:3), chr(ii-1)(3:ii)/)
character(2), parameter :: chrx(2) = (/(chr(i)(i:i+1), i=2,3)/)
if (any (y .ne. (/5., 6., 15., 16./))) call abort ()
if (any (z .ne. (/11., 12./))) call abort ()
if (any (r .ne. (/1., 2., 6., 7., 11., 12./))) call abort ()
if (any (s .ne. (/11., 7., 3., 16., 12., 8., 4., &
11., 7., 16., 12., 8. /))) call abort ()
if (any (t .ne. (/11., 12., 8., 6., 11., 12., 27., 15. /))) call abort ()
if (chrs .ne. "noef") call abort ()
if (any (chrt .ne. (/"fg", "kl"/))) call abort ()
if (any (chrx .ne. (/"fg", "kl"/))) call abort ()
end
! { dg-do run }
! Check the fix for PR28005, in which the mechanism for dealing
! with matmul (transpose (a), b) would cause wrong results for
! matmul (a(i, 1:n), b(1:n, 1:n)).
!
! Based on the original testcase contributed by
! Tobias Burnus <tobias.burnus@physik.fu-berlin.de>
!
implicit none
integer, parameter :: nmax = 3
integer :: i, n = 2
integer, dimension(nmax,nmax) :: iB=0 , iC=1
integer, dimension(nmax,nmax) :: iX1=99, iX2=99, iChk
iChk = reshape((/30,66,102,36,81,126,42,96,150/),(/3,3/))
! This would give 3, 3, 99
iB = reshape((/1 ,3 ,0 ,2 ,5 ,0 ,0 ,0 ,0 /),(/3,3/))
iX1(1:n,1) = matmul( iB(2,1:n),iC(1:n,1:n) )
! This would give 4, 4, 99
ib(3,1) = 1
iX2(1:n,1) = matmul( iB(2,1:n),iC(1:n,1:n) )
! Whereas, we should have 8, 8, 99
if (any (iX1(1:n,1) .ne. (/8, 8, 99/))) call abort ()
if (any (iX1 .ne. iX2)) call abort ()
! Make sure that the fix does not break transpose temporaries.
iB = reshape((/(i, i = 1, 9)/),(/3,3/))
ic = transpose (iB)
iX1 = transpose (iB)
iX1 = matmul (iX1, iC)
iX2 = matmul (transpose (iB), iC)
if (any (iX1 .ne. iX2)) call abort ()
if (any (iX1 .ne. iChk)) call abort ()
end
2006-06-20 Paul Thomas <pault@gcc.gnu.org>
PR libfortran/28005
* m4/matmul.m4: aystride = 1 does not uniquely detect the
presence of a temporary transpose; an array element in the
first dimension produces the same signature. Detect this
using the rank of a and add specific code.
* generated/matmul_r4.c: Regenerate.
* generated/matmul_r8.c: Regenerate.
* generated/matmul_r10.c: Regenerate.
* generated/matmul_r16.c: Regenerate.
* generated/matmul_c4.c: Regenerate.
* generated/matmul_c8.c: Regenerate.
* generated/matmul_c10.c: Regenerate.
* generated/matmul_c16.c: Regenerate.
* generated/matmul_i4.c: Regenerate.
* generated/matmul_i8.c: Regenerate.
* generated/matmul_i16.c: Regenerate.
2006-06-18 John David Anglin <dave.anglin@nrc-cnrc.gc.ca>
PR libgomp/27254
......
......@@ -210,22 +210,39 @@ matmul_c10 (gfc_array_c10 * const restrict retarray,
}
else if (rxstride == 1 && aystride == 1 && bxstride == 1)
{
const GFC_COMPLEX_10 *restrict abase_x;
const GFC_COMPLEX_10 *restrict bbase_y;
GFC_COMPLEX_10 *restrict dest_y;
GFC_COMPLEX_10 s;
if (GFC_DESCRIPTOR_RANK (a) != 1)
{
const GFC_COMPLEX_10 *restrict abase_x;
const GFC_COMPLEX_10 *restrict bbase_y;
GFC_COMPLEX_10 *restrict dest_y;
GFC_COMPLEX_10 s;
for (y = 0; y < ycount; y++)
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
dest_y = &dest[y*rystride];
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
s = (GFC_COMPLEX_10) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
}
}
}
else
{
bbase_y = &bbase[y*bystride];
dest_y = &dest[y*rystride];
for (x = 0; x < xcount; x++)
const GFC_COMPLEX_10 *restrict bbase_y;
GFC_COMPLEX_10 s;
for (y = 0; y < ycount; y++)
{
abase_x = &abase[x*axstride];
bbase_y = &bbase[y*bystride];
s = (GFC_COMPLEX_10) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
s += abase[n*axstride] * bbase_y[n];
dest[y*rystride] = s;
}
}
}
......
......@@ -210,22 +210,39 @@ matmul_c16 (gfc_array_c16 * const restrict retarray,
}
else if (rxstride == 1 && aystride == 1 && bxstride == 1)
{
const GFC_COMPLEX_16 *restrict abase_x;
const GFC_COMPLEX_16 *restrict bbase_y;
GFC_COMPLEX_16 *restrict dest_y;
GFC_COMPLEX_16 s;
if (GFC_DESCRIPTOR_RANK (a) != 1)
{
const GFC_COMPLEX_16 *restrict abase_x;
const GFC_COMPLEX_16 *restrict bbase_y;
GFC_COMPLEX_16 *restrict dest_y;
GFC_COMPLEX_16 s;
for (y = 0; y < ycount; y++)
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
dest_y = &dest[y*rystride];
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
s = (GFC_COMPLEX_16) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
}
}
}
else
{
bbase_y = &bbase[y*bystride];
dest_y = &dest[y*rystride];
for (x = 0; x < xcount; x++)
const GFC_COMPLEX_16 *restrict bbase_y;
GFC_COMPLEX_16 s;
for (y = 0; y < ycount; y++)
{
abase_x = &abase[x*axstride];
bbase_y = &bbase[y*bystride];
s = (GFC_COMPLEX_16) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
s += abase[n*axstride] * bbase_y[n];
dest[y*rystride] = s;
}
}
}
......
......@@ -210,22 +210,39 @@ matmul_c4 (gfc_array_c4 * const restrict retarray,
}
else if (rxstride == 1 && aystride == 1 && bxstride == 1)
{
const GFC_COMPLEX_4 *restrict abase_x;
const GFC_COMPLEX_4 *restrict bbase_y;
GFC_COMPLEX_4 *restrict dest_y;
GFC_COMPLEX_4 s;
if (GFC_DESCRIPTOR_RANK (a) != 1)
{
const GFC_COMPLEX_4 *restrict abase_x;
const GFC_COMPLEX_4 *restrict bbase_y;
GFC_COMPLEX_4 *restrict dest_y;
GFC_COMPLEX_4 s;
for (y = 0; y < ycount; y++)
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
dest_y = &dest[y*rystride];
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
s = (GFC_COMPLEX_4) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
}
}
}
else
{
bbase_y = &bbase[y*bystride];
dest_y = &dest[y*rystride];
for (x = 0; x < xcount; x++)
const GFC_COMPLEX_4 *restrict bbase_y;
GFC_COMPLEX_4 s;
for (y = 0; y < ycount; y++)
{
abase_x = &abase[x*axstride];
bbase_y = &bbase[y*bystride];
s = (GFC_COMPLEX_4) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
s += abase[n*axstride] * bbase_y[n];
dest[y*rystride] = s;
}
}
}
......
......@@ -210,22 +210,39 @@ matmul_c8 (gfc_array_c8 * const restrict retarray,
}
else if (rxstride == 1 && aystride == 1 && bxstride == 1)
{
const GFC_COMPLEX_8 *restrict abase_x;
const GFC_COMPLEX_8 *restrict bbase_y;
GFC_COMPLEX_8 *restrict dest_y;
GFC_COMPLEX_8 s;
if (GFC_DESCRIPTOR_RANK (a) != 1)
{
const GFC_COMPLEX_8 *restrict abase_x;
const GFC_COMPLEX_8 *restrict bbase_y;
GFC_COMPLEX_8 *restrict dest_y;
GFC_COMPLEX_8 s;
for (y = 0; y < ycount; y++)
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
dest_y = &dest[y*rystride];
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
s = (GFC_COMPLEX_8) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
}
}
}
else
{
bbase_y = &bbase[y*bystride];
dest_y = &dest[y*rystride];
for (x = 0; x < xcount; x++)
const GFC_COMPLEX_8 *restrict bbase_y;
GFC_COMPLEX_8 s;
for (y = 0; y < ycount; y++)
{
abase_x = &abase[x*axstride];
bbase_y = &bbase[y*bystride];
s = (GFC_COMPLEX_8) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
s += abase[n*axstride] * bbase_y[n];
dest[y*rystride] = s;
}
}
}
......
......@@ -210,22 +210,39 @@ matmul_i16 (gfc_array_i16 * const restrict retarray,
}
else if (rxstride == 1 && aystride == 1 && bxstride == 1)
{
const GFC_INTEGER_16 *restrict abase_x;
const GFC_INTEGER_16 *restrict bbase_y;
GFC_INTEGER_16 *restrict dest_y;
GFC_INTEGER_16 s;
if (GFC_DESCRIPTOR_RANK (a) != 1)
{
const GFC_INTEGER_16 *restrict abase_x;
const GFC_INTEGER_16 *restrict bbase_y;
GFC_INTEGER_16 *restrict dest_y;
GFC_INTEGER_16 s;
for (y = 0; y < ycount; y++)
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
dest_y = &dest[y*rystride];
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
s = (GFC_INTEGER_16) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
}
}
}
else
{
bbase_y = &bbase[y*bystride];
dest_y = &dest[y*rystride];
for (x = 0; x < xcount; x++)
const GFC_INTEGER_16 *restrict bbase_y;
GFC_INTEGER_16 s;
for (y = 0; y < ycount; y++)
{
abase_x = &abase[x*axstride];
bbase_y = &bbase[y*bystride];
s = (GFC_INTEGER_16) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
s += abase[n*axstride] * bbase_y[n];
dest[y*rystride] = s;
}
}
}
......
......@@ -210,22 +210,39 @@ matmul_i4 (gfc_array_i4 * const restrict retarray,
}
else if (rxstride == 1 && aystride == 1 && bxstride == 1)
{
const GFC_INTEGER_4 *restrict abase_x;
const GFC_INTEGER_4 *restrict bbase_y;
GFC_INTEGER_4 *restrict dest_y;
GFC_INTEGER_4 s;
if (GFC_DESCRIPTOR_RANK (a) != 1)
{
const GFC_INTEGER_4 *restrict abase_x;
const GFC_INTEGER_4 *restrict bbase_y;
GFC_INTEGER_4 *restrict dest_y;
GFC_INTEGER_4 s;
for (y = 0; y < ycount; y++)
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
dest_y = &dest[y*rystride];
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
s = (GFC_INTEGER_4) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
}
}
}
else
{
bbase_y = &bbase[y*bystride];
dest_y = &dest[y*rystride];
for (x = 0; x < xcount; x++)
const GFC_INTEGER_4 *restrict bbase_y;
GFC_INTEGER_4 s;
for (y = 0; y < ycount; y++)
{
abase_x = &abase[x*axstride];
bbase_y = &bbase[y*bystride];
s = (GFC_INTEGER_4) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
s += abase[n*axstride] * bbase_y[n];
dest[y*rystride] = s;
}
}
}
......
......@@ -210,22 +210,39 @@ matmul_i8 (gfc_array_i8 * const restrict retarray,
}
else if (rxstride == 1 && aystride == 1 && bxstride == 1)
{
const GFC_INTEGER_8 *restrict abase_x;
const GFC_INTEGER_8 *restrict bbase_y;
GFC_INTEGER_8 *restrict dest_y;
GFC_INTEGER_8 s;
if (GFC_DESCRIPTOR_RANK (a) != 1)
{
const GFC_INTEGER_8 *restrict abase_x;
const GFC_INTEGER_8 *restrict bbase_y;
GFC_INTEGER_8 *restrict dest_y;
GFC_INTEGER_8 s;
for (y = 0; y < ycount; y++)
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
dest_y = &dest[y*rystride];
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
s = (GFC_INTEGER_8) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
}
}
}
else
{
bbase_y = &bbase[y*bystride];
dest_y = &dest[y*rystride];
for (x = 0; x < xcount; x++)
const GFC_INTEGER_8 *restrict bbase_y;
GFC_INTEGER_8 s;
for (y = 0; y < ycount; y++)
{
abase_x = &abase[x*axstride];
bbase_y = &bbase[y*bystride];
s = (GFC_INTEGER_8) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
s += abase[n*axstride] * bbase_y[n];
dest[y*rystride] = s;
}
}
}
......
......@@ -210,22 +210,39 @@ matmul_r10 (gfc_array_r10 * const restrict retarray,
}
else if (rxstride == 1 && aystride == 1 && bxstride == 1)
{
const GFC_REAL_10 *restrict abase_x;
const GFC_REAL_10 *restrict bbase_y;
GFC_REAL_10 *restrict dest_y;
GFC_REAL_10 s;
if (GFC_DESCRIPTOR_RANK (a) != 1)
{
const GFC_REAL_10 *restrict abase_x;
const GFC_REAL_10 *restrict bbase_y;
GFC_REAL_10 *restrict dest_y;
GFC_REAL_10 s;
for (y = 0; y < ycount; y++)
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
dest_y = &dest[y*rystride];
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
s = (GFC_REAL_10) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
}
}
}
else
{
bbase_y = &bbase[y*bystride];
dest_y = &dest[y*rystride];
for (x = 0; x < xcount; x++)
const GFC_REAL_10 *restrict bbase_y;
GFC_REAL_10 s;
for (y = 0; y < ycount; y++)
{
abase_x = &abase[x*axstride];
bbase_y = &bbase[y*bystride];
s = (GFC_REAL_10) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
s += abase[n*axstride] * bbase_y[n];
dest[y*rystride] = s;
}
}
}
......
......@@ -210,22 +210,39 @@ matmul_r16 (gfc_array_r16 * const restrict retarray,
}
else if (rxstride == 1 && aystride == 1 && bxstride == 1)
{
const GFC_REAL_16 *restrict abase_x;
const GFC_REAL_16 *restrict bbase_y;
GFC_REAL_16 *restrict dest_y;
GFC_REAL_16 s;
if (GFC_DESCRIPTOR_RANK (a) != 1)
{
const GFC_REAL_16 *restrict abase_x;
const GFC_REAL_16 *restrict bbase_y;
GFC_REAL_16 *restrict dest_y;
GFC_REAL_16 s;
for (y = 0; y < ycount; y++)
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
dest_y = &dest[y*rystride];
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
s = (GFC_REAL_16) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
}
}
}
else
{
bbase_y = &bbase[y*bystride];
dest_y = &dest[y*rystride];
for (x = 0; x < xcount; x++)
const GFC_REAL_16 *restrict bbase_y;
GFC_REAL_16 s;
for (y = 0; y < ycount; y++)
{
abase_x = &abase[x*axstride];
bbase_y = &bbase[y*bystride];
s = (GFC_REAL_16) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
s += abase[n*axstride] * bbase_y[n];
dest[y*rystride] = s;
}
}
}
......
......@@ -210,22 +210,39 @@ matmul_r4 (gfc_array_r4 * const restrict retarray,
}
else if (rxstride == 1 && aystride == 1 && bxstride == 1)
{
const GFC_REAL_4 *restrict abase_x;
const GFC_REAL_4 *restrict bbase_y;
GFC_REAL_4 *restrict dest_y;
GFC_REAL_4 s;
if (GFC_DESCRIPTOR_RANK (a) != 1)
{
const GFC_REAL_4 *restrict abase_x;
const GFC_REAL_4 *restrict bbase_y;
GFC_REAL_4 *restrict dest_y;
GFC_REAL_4 s;
for (y = 0; y < ycount; y++)
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
dest_y = &dest[y*rystride];
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
s = (GFC_REAL_4) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
}
}
}
else
{
bbase_y = &bbase[y*bystride];
dest_y = &dest[y*rystride];
for (x = 0; x < xcount; x++)
const GFC_REAL_4 *restrict bbase_y;
GFC_REAL_4 s;
for (y = 0; y < ycount; y++)
{
abase_x = &abase[x*axstride];
bbase_y = &bbase[y*bystride];
s = (GFC_REAL_4) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
s += abase[n*axstride] * bbase_y[n];
dest[y*rystride] = s;
}
}
}
......
......@@ -210,22 +210,39 @@ matmul_r8 (gfc_array_r8 * const restrict retarray,
}
else if (rxstride == 1 && aystride == 1 && bxstride == 1)
{
const GFC_REAL_8 *restrict abase_x;
const GFC_REAL_8 *restrict bbase_y;
GFC_REAL_8 *restrict dest_y;
GFC_REAL_8 s;
if (GFC_DESCRIPTOR_RANK (a) != 1)
{
const GFC_REAL_8 *restrict abase_x;
const GFC_REAL_8 *restrict bbase_y;
GFC_REAL_8 *restrict dest_y;
GFC_REAL_8 s;
for (y = 0; y < ycount; y++)
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
dest_y = &dest[y*rystride];
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
s = (GFC_REAL_8) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
}
}
}
else
{
bbase_y = &bbase[y*bystride];
dest_y = &dest[y*rystride];
for (x = 0; x < xcount; x++)
const GFC_REAL_8 *restrict bbase_y;
GFC_REAL_8 s;
for (y = 0; y < ycount; y++)
{
abase_x = &abase[x*axstride];
bbase_y = &bbase[y*bystride];
s = (GFC_REAL_8) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
s += abase[n*axstride] * bbase_y[n];
dest[y*rystride] = s;
}
}
}
......
......@@ -212,22 +212,39 @@ sinclude(`matmul_asm_'rtype_code`.m4')dnl
}
else if (rxstride == 1 && aystride == 1 && bxstride == 1)
{
const rtype_name *restrict abase_x;
const rtype_name *restrict bbase_y;
rtype_name *restrict dest_y;
rtype_name s;
if (GFC_DESCRIPTOR_RANK (a) != 1)
{
const rtype_name *restrict abase_x;
const rtype_name *restrict bbase_y;
rtype_name *restrict dest_y;
rtype_name s;
for (y = 0; y < ycount; y++)
for (y = 0; y < ycount; y++)
{
bbase_y = &bbase[y*bystride];
dest_y = &dest[y*rystride];
for (x = 0; x < xcount; x++)
{
abase_x = &abase[x*axstride];
s = (rtype_name) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
}
}
}
else
{
bbase_y = &bbase[y*bystride];
dest_y = &dest[y*rystride];
for (x = 0; x < xcount; x++)
const rtype_name *restrict bbase_y;
rtype_name s;
for (y = 0; y < ycount; y++)
{
abase_x = &abase[x*axstride];
bbase_y = &bbase[y*bystride];
s = (rtype_name) 0;
for (n = 0; n < count; n++)
s += abase_x[n] * bbase_y[n];
dest_y[x] = s;
s += abase[n*axstride] * bbase_y[n];
dest[y*rystride] = s;
}
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment