Commit d88412fc by Thomas Koenig

re PR fortran/82471 (Reorder loop for unfavorable index ordering in DO CONCURRENT and FORALL)

2017-11-05  Thomas Koenig  <tkoenig@gcc.gnu.org>

	PR fortran/82471
	* lang.opt (ffrontend-loop-interchange): New option.
	(Wfrontend-loop-interchange): New option.
	* options.c (gfc_post_options): Handle ffrontend-loop-interchange.
	* frontend-passes.c (gfc_run_passes): Run
	optimize_namespace if flag_frontend_optimize or
	flag_frontend_loop_interchange are set.
	(optimize_namespace): Run functions according to flags set;
	also call index_interchange.
	(ind_type): New function.
	(has_var): New function.
	(index_cost): New function.
	(loop_comp): New function.

2017-11-05  Thomas Koenig  <tkoenig@gcc.gnu.org>

	PR fortran/82471
	* gfortran.dg/loop_interchange_1.f90: New test.

From-SVN: r254430
parent 5233d455
2017-11-05 Thomas Koenig <tkoenig@gcc.gnu.org>
PR fortran/82471
* lang.opt (ffrontend-loop-interchange): New option.
(Wfrontend-loop-interchange): New option.
* options.c (gfc_post_options): Handle ffrontend-loop-interchange.
* frontend-passes.c (gfc_run_passes): Run
optimize_namespace if flag_frontend_optimize or
flag_frontend_loop_interchange are set.
(optimize_namespace): Run functions according to flags set;
also call index_interchange.
(ind_type): New function.
(has_var): New function.
(index_cost): New function.
(loop_comp): New function.
2017-11-05 Paul Thomas <pault@gcc.gnu.org>
PR fortran/78641
......
......@@ -55,6 +55,7 @@ static gfc_expr* check_conjg_transpose_variable (gfc_expr *, bool *,
bool *);
static bool has_dimen_vector_ref (gfc_expr *);
static int matmul_temp_args (gfc_code **, int *,void *data);
static int index_interchange (gfc_code **, int*, void *);
#ifdef CHECKING_P
static void check_locus (gfc_namespace *);
......@@ -155,9 +156,11 @@ gfc_run_passes (gfc_namespace *ns)
check_locus (ns);
#endif
if (flag_frontend_optimize || flag_frontend_loop_interchange)
optimize_namespace (ns);
if (flag_frontend_optimize)
{
optimize_namespace (ns);
optimize_reduction (ns);
if (flag_dump_fortran_optimized)
gfc_dump_parse_tree (ns, stdout);
......@@ -1350,7 +1353,9 @@ simplify_io_impl_do (gfc_code **code, int *walk_subtrees,
return 0;
}
/* Optimize a namespace, including all contained namespaces. */
/* Optimize a namespace, including all contained namespaces.
flag_frontend_optimize and flag_fronend_loop_interchange are
handled separately. */
static void
optimize_namespace (gfc_namespace *ns)
......@@ -1363,6 +1368,8 @@ optimize_namespace (gfc_namespace *ns)
in_assoc_list = false;
in_omp_workshare = false;
if (flag_frontend_optimize)
{
gfc_code_walker (&ns->code, simplify_io_impl_do, dummy_expr_callback, NULL);
gfc_code_walker (&ns->code, convert_do_while, dummy_expr_callback, NULL);
gfc_code_walker (&ns->code, convert_elseif, dummy_expr_callback, NULL);
......@@ -1384,6 +1391,11 @@ optimize_namespace (gfc_namespace *ns)
gfc_code_walker (&ns->code, inline_matmul_assign, dummy_expr_callback,
NULL);
}
}
if (flag_frontend_loop_interchange)
gfc_code_walker (&ns->code, index_interchange, dummy_expr_callback,
NULL);
/* BLOCKs are handled in the expression walker below. */
for (ns = ns->contained; ns; ns = ns->sibling)
......@@ -4225,6 +4237,170 @@ inline_matmul_assign (gfc_code **c, int *walk_subtrees,
return 0;
}
/* Code for index interchange for loops which are grouped together in DO
CONCURRENT or FORALL statements. This is currently only applied if the
iterations are grouped together in a single statement.
For this transformation, it is assumed that memory access in strides is
expensive, and that loops which access later indices (which access memory
in bigger strides) should be moved to the first loops.
For this, a loop over all the statements is executed, counting the times
that the loop iteration values are accessed in each index. The loop
indices are then sorted to minimize access to later indices from inner
loops. */
/* Type for holding index information. */
typedef struct {
gfc_symbol *sym;
gfc_forall_iterator *fa;
int num;
int n[GFC_MAX_DIMENSIONS];
} ind_type;
/* Callback function to determine if an expression is the
corresponding variable. */
static int
has_var (gfc_expr **e, int *walk_subtrees ATTRIBUTE_UNUSED, void *data)
{
gfc_expr *expr = *e;
gfc_symbol *sym;
if (expr->expr_type != EXPR_VARIABLE)
return 0;
sym = (gfc_symbol *) data;
return sym == expr->symtree->n.sym;
}
/* Callback function to calculate the cost of a certain index. */
static int
index_cost (gfc_expr **e, int *walk_subtrees ATTRIBUTE_UNUSED,
void *data)
{
ind_type *ind;
gfc_expr *expr;
gfc_array_ref *ar;
gfc_ref *ref;
int i,j;
expr = *e;
if (expr->expr_type != EXPR_VARIABLE)
return 0;
ar = NULL;
for (ref = expr->ref; ref; ref = ref->next)
{
if (ref->type == REF_ARRAY)
{
ar = &ref->u.ar;
break;
}
}
if (ar == NULL || ar->type != AR_ELEMENT)
return 0;
ind = (ind_type *) data;
for (i = 0; i < ar->dimen; i++)
{
for (j=0; ind[j].sym != NULL; j++)
{
if (gfc_expr_walker (&ar->start[i], has_var, (void *) (ind[j].sym)))
ind[j].n[i]++;
}
}
return 0;
}
/* Callback function for qsort, to sort the loop indices. */
static int
loop_comp (const void *e1, const void *e2)
{
const ind_type *i1 = (const ind_type *) e1;
const ind_type *i2 = (const ind_type *) e2;
int i;
for (i=GFC_MAX_DIMENSIONS-1; i >= 0; i--)
{
if (i1->n[i] != i2->n[i])
return i1->n[i] - i2->n[i];
}
/* All other things being equal, let's not change the ordering. */
return i2->num - i1->num;
}
/* Main function to do the index interchange. */
static int
index_interchange (gfc_code **c, int *walk_subtrees ATTRIBUTE_UNUSED,
void *data ATTRIBUTE_UNUSED)
{
gfc_code *co;
co = *c;
int n_iter;
gfc_forall_iterator *fa;
ind_type *ind;
int i, j;
if (co->op != EXEC_FORALL && co->op != EXEC_DO_CONCURRENT)
return 0;
n_iter = 0;
for (fa = co->ext.forall_iterator; fa; fa = fa->next)
n_iter ++;
/* Nothing to reorder. */
if (n_iter < 2)
return 0;
ind = XALLOCAVEC (ind_type, n_iter + 1);
i = 0;
for (fa = co->ext.forall_iterator; fa; fa = fa->next)
{
ind[i].sym = fa->var->symtree->n.sym;
ind[i].fa = fa;
for (j=0; j<GFC_MAX_DIMENSIONS; j++)
ind[i].n[j] = 0;
ind[i].num = i;
i++;
}
ind[n_iter].sym = NULL;
ind[n_iter].fa = NULL;
gfc_code_walker (c, gfc_dummy_code_callback, index_cost, (void *) ind);
qsort ((void *) ind, n_iter, sizeof (ind_type), loop_comp);
/* Do the actual index interchange. */
co->ext.forall_iterator = fa = ind[0].fa;
for (i=1; i<n_iter; i++)
{
fa->next = ind[i].fa;
fa = fa->next;
}
fa->next = NULL;
if (flag_warn_frontend_loop_interchange)
{
for (i=1; i<n_iter; i++)
{
if (ind[i-1].num > ind[i].num)
{
gfc_warning (OPT_Wfrontend_loop_interchange,
"Interchanging loops at %L", &co->loc);
break;
}
}
}
return 0;
}
#define WALK_SUBEXPR(NODE) \
do \
{ \
......
......@@ -149,8 +149,9 @@ and warnings}.
-Wdo-subscript -Wfunction-elimination -Wimplicit-interface @gol
-Wimplicit-procedure -Wintrinsic-shadow -Wuse-without-only -Wintrinsics-std @gol
-Wline-truncation -Wno-align-commons -Wno-tabs -Wreal-q-constant @gol
-Wsurprising -Wunderflow -Wunused-parameter -Wrealloc-lhs -Wrealloc-lhs-all @gol
-Wtarget-lifetime -fmax-errors=@var{n} -fsyntax-only -pedantic -pedantic-errors
-Wsurprising -Wunderflow -Wunused-parameter -Wrealloc-lhs @gol
-Wrealloc-lhs-all -Wfrontend-loop-interchange -Wtarget-lifetime @gol
-fmax-errors=@var{n} -fsyntax-only -pedantic -pedantic-errors @gol
}
@item Debugging Options
......@@ -183,6 +184,7 @@ and warnings}.
-fbounds-check -fcheck-array-temporaries @gol
-fcheck=@var{<all|array-temps|bounds|do|mem|pointer|recursion>} @gol
-fcoarray=@var{<none|single|lib>} -fexternal-blas -ff2c
-ffrontend-loop-interchange @gol
-ffrontend-optimize @gol
-finit-character=@var{n} -finit-integer=@var{n} -finit-local-zero @gol
-finit-derived @gol
......@@ -910,6 +912,13 @@ Enables some warning options for usages of language features which
may be problematic. This currently includes @option{-Wcompare-reals},
@option{-Wunused-parameter} and @option{-Wdo-subscript}.
@item -Wfrontend-loop-interchange
@opindex @code{Wfrontend-loop-interchange}
@cindex warnings, loop interchange
@cindex loop interchange, warning
Enable warning for loop interchanges performed by the
@option{-ffrontend-loop-interchange} option.
@item -Wimplicit-interface
@opindex @code{Wimplicit-interface}
@cindex warnings, implicit interface
......@@ -1782,6 +1791,14 @@ expressions, removing unnecessary calls to @code{TRIM} in comparisons
and assignments and replacing @code{TRIM(a)} with
@code{a(1:LEN_TRIM(a))}. It can be deselected by specifying
@option{-fno-frontend-optimize}.
@item -ffrontend-loop-interchange
@opindex @code{frontend-loop-interchange}
@cindex loop interchange, Fortran
Attempt to interchange loops in the Fortran front end where
profitable. Enabled by default by any @option{-O} option.
At the moment, this option only affects @code{FORALL} and
@code{DO CONCURRENT} statements with several forall triplets.
@end table
@xref{Code Gen Options,,Options for Code Generation Conventions,
......
......@@ -245,6 +245,10 @@ Wextra
Fortran Warning
; Documented in common
Wfrontend-loop-interchange
Fortran Var(flag_warn_frontend_loop_interchange)
Warn if loops have been interchanged.
Wfunction-elimination
Fortran Warning Var(warn_function_elimination)
Warn about function call elimination.
......@@ -548,6 +552,10 @@ ffree-line-length-
Fortran RejectNegative Joined UInteger Var(flag_free_line_length) Init(132)
-ffree-line-length-<n> Use n as character line width in free mode.
ffrontend-loop-interchange
Fortran Var(flag_frontend_loop_interchange) Init(-1)
Try to interchange loops if profitable.
ffrontend-optimize
Fortran Var(flag_frontend_optimize) Init(-1)
Enable front end optimization.
......
......@@ -417,6 +417,11 @@ gfc_post_options (const char **pfilename)
if (flag_frontend_optimize == -1)
flag_frontend_optimize = optimize;
/* Same for front end loop interchange. */
if (flag_frontend_loop_interchange == -1)
flag_frontend_loop_interchange = optimize;
if (flag_max_array_constructor < 65535)
flag_max_array_constructor = 65535;
......
2017-11-05 Thomas Koenig <tkoenig@gcc.gnu.org>
PR fortran/82471
* gfortran.dg/loop_interchange_1.f90: New test.
2017-11-05 Paul Thomas <pault@gcc.gnu.org>
PR fortran/78641
......
! { dg-do compile }
! { dg-additional-options "-O -Wfrontend-loop-interchange" }
PROGRAM TEST_DO_SPEED
IMPLICIT NONE
REAL, ALLOCATABLE :: A(:,:,:), B(:,:,:), C(:,:,:)
REAL :: TIC
INTEGER :: T0, T1, T2
INTEGER :: I, J, K
INTEGER, PARAMETER :: L = 512, M = 512, N = 512
ALLOCATE( A(L,M,N), B(L,M,N), C(L,M,N) )
CALL RANDOM_NUMBER(A)
CALL RANDOM_NUMBER(B)
CALL SYSTEM_CLOCK( T0, TIC)
DO CONCURRENT( K=1:N, J=1:M, I=1:L) ! { dg-warning "Interchanging loops" }
C(I,J,K) = A(I,J,K) +B(I,J,K)
END DO
END
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment