Commit 1585b483 by Thomas Koenig

re PR fortran/90539 (481.wrf slowdown by 25% on Intel Kaby with -Ofast…

re PR fortran/90539 (481.wrf slowdown by 25% on Intel Kaby with -Ofast -march=native starting with r271377)

2019-05-29  Thomas Koenig  <tkoenig@gcc.gnu.org>

	PR fortran/90539
	* gfortran.h (gfc_has_dimen_vector_ref): Add prototype.
	* trans.h (gfc_conv_subref_array_arg): Add argument check_contiguous.
	(gfc_conv_is_contiguous_expr): Add prototype.
	* frontend-passes.c (has_dimen_vector_ref): Remove prototype,
	rename to
	(gfc_has_dimen_vector_ref): New function name.
	(matmul_temp_args): Use gfc_has_dimen_vector_ref.
	(inline_matmul_assign): Likewise.
	* trans-array.c (gfc_conv_array_parameter): Also check for absence
	of a vector subscript before calling gfc_conv_subref_array_arg.
	Pass additional argument to gfc_conv_subref_array_arg.
	* trans-expr.c (gfc_conv_subref_array_arg): Add argument
	check_contiguous. If that is true, check if the argument
	is contiguous and do not repack in that case.
	* trans-intrinsic.c (gfc_conv_intrinsic_is_contiguous): Split
	away most of the work into, and call
	(gfc_conv_intrinsic_is_coniguous_expr): New function.

2019-05-29  Thomas Koenig  <tkoenig@gcc.gnu.org>

	PR fortran/90539
	* gfortran.dg/internal_pack_21.f90: Adjust scan patterns.
	* gfortran.dg/internal_pack_22.f90: New test.
	* gfortran.dg/internal_pack_23.f90: New test.

From-SVN: r271751
parent 987c9fc5
2019-05-29 Thomas Koenig <tkoenig@gcc.gnu.org>
PR fortran/90539
* gfortran.h (gfc_has_dimen_vector_ref): Add prototype.
* trans.h (gfc_conv_subref_array_arg): Add argument check_contiguous.
(gfc_conv_is_contiguous_expr): Add prototype.
* frontend-passes.c (has_dimen_vector_ref): Remove prototype,
rename to
(gfc_has_dimen_vector_ref): New function name.
(matmul_temp_args): Use gfc_has_dimen_vector_ref.
(inline_matmul_assign): Likewise.
* trans-array.c (gfc_conv_array_parameter): Also check for absence
of a vector subscript before calling gfc_conv_subref_array_arg.
Pass additional argument to gfc_conv_subref_array_arg.
* trans-expr.c (gfc_conv_subref_array_arg): Add argument
check_contiguous. If that is true, check if the argument
is contiguous and do not repack in that case.
* trans-intrinsic.c (gfc_conv_intrinsic_is_contiguous): Split
away most of the work into, and call
(gfc_conv_intrinsic_is_coniguous_expr): New function.
2019-05-29 Jakub Jelinek <jakub@redhat.com> 2019-05-29 Jakub Jelinek <jakub@redhat.com>
PR fortran/90329 PR fortran/90329
......
...@@ -54,7 +54,6 @@ static gfc_code * create_do_loop (gfc_expr *, gfc_expr *, gfc_expr *, ...@@ -54,7 +54,6 @@ static gfc_code * create_do_loop (gfc_expr *, gfc_expr *, gfc_expr *,
static gfc_expr* check_conjg_transpose_variable (gfc_expr *, bool *, static gfc_expr* check_conjg_transpose_variable (gfc_expr *, bool *,
bool *); bool *);
static int call_external_blas (gfc_code **, int *, void *); static int call_external_blas (gfc_code **, int *, void *);
static bool has_dimen_vector_ref (gfc_expr *);
static int matmul_temp_args (gfc_code **, int *,void *data); static int matmul_temp_args (gfc_code **, int *,void *data);
static int index_interchange (gfc_code **, int*, void *); static int index_interchange (gfc_code **, int*, void *);
...@@ -2868,7 +2867,7 @@ matmul_temp_args (gfc_code **c, int *walk_subtrees ATTRIBUTE_UNUSED, ...@@ -2868,7 +2867,7 @@ matmul_temp_args (gfc_code **c, int *walk_subtrees ATTRIBUTE_UNUSED,
{ {
if (matrix_a->expr_type == EXPR_VARIABLE if (matrix_a->expr_type == EXPR_VARIABLE
&& (gfc_check_dependency (matrix_a, expr1, true) && (gfc_check_dependency (matrix_a, expr1, true)
|| has_dimen_vector_ref (matrix_a))) || gfc_has_dimen_vector_ref (matrix_a)))
a_tmp = true; a_tmp = true;
} }
else else
...@@ -2881,7 +2880,7 @@ matmul_temp_args (gfc_code **c, int *walk_subtrees ATTRIBUTE_UNUSED, ...@@ -2881,7 +2880,7 @@ matmul_temp_args (gfc_code **c, int *walk_subtrees ATTRIBUTE_UNUSED,
{ {
if (matrix_b->expr_type == EXPR_VARIABLE if (matrix_b->expr_type == EXPR_VARIABLE
&& (gfc_check_dependency (matrix_b, expr1, true) && (gfc_check_dependency (matrix_b, expr1, true)
|| has_dimen_vector_ref (matrix_b))) || gfc_has_dimen_vector_ref (matrix_b)))
b_tmp = true; b_tmp = true;
} }
else else
...@@ -3681,8 +3680,8 @@ scalarized_expr (gfc_expr *e_in, gfc_expr **index, int count_index) ...@@ -3681,8 +3680,8 @@ scalarized_expr (gfc_expr *e_in, gfc_expr **index, int count_index)
/* Helper function to check for a dimen vector as subscript. */ /* Helper function to check for a dimen vector as subscript. */
static bool bool
has_dimen_vector_ref (gfc_expr *e) gfc_has_dimen_vector_ref (gfc_expr *e)
{ {
gfc_array_ref *ar; gfc_array_ref *ar;
int i; int i;
...@@ -3838,8 +3837,8 @@ inline_matmul_assign (gfc_code **c, int *walk_subtrees, ...@@ -3838,8 +3837,8 @@ inline_matmul_assign (gfc_code **c, int *walk_subtrees,
if (matrix_b == NULL) if (matrix_b == NULL)
return 0; return 0;
if (has_dimen_vector_ref (expr1) || has_dimen_vector_ref (matrix_a) if (gfc_has_dimen_vector_ref (expr1) || gfc_has_dimen_vector_ref (matrix_a)
|| has_dimen_vector_ref (matrix_b)) || gfc_has_dimen_vector_ref (matrix_b))
return 0; return 0;
/* We do not handle data dependencies yet. */ /* We do not handle data dependencies yet. */
......
...@@ -3535,6 +3535,7 @@ typedef int (*walk_expr_fn_t) (gfc_expr **, int *, void *); ...@@ -3535,6 +3535,7 @@ typedef int (*walk_expr_fn_t) (gfc_expr **, int *, void *);
int gfc_dummy_code_callback (gfc_code **, int *, void *); int gfc_dummy_code_callback (gfc_code **, int *, void *);
int gfc_expr_walker (gfc_expr **, walk_expr_fn_t, void *); int gfc_expr_walker (gfc_expr **, walk_expr_fn_t, void *);
int gfc_code_walker (gfc_code **, walk_code_fn_t, walk_expr_fn_t, void *); int gfc_code_walker (gfc_code **, walk_code_fn_t, walk_expr_fn_t, void *);
bool gfc_has_dimen_vector_ref (gfc_expr *e);
/* simplify.c */ /* simplify.c */
......
...@@ -8139,12 +8139,12 @@ gfc_conv_array_parameter (gfc_se * se, gfc_expr * expr, bool g77, ...@@ -8139,12 +8139,12 @@ gfc_conv_array_parameter (gfc_se * se, gfc_expr * expr, bool g77,
optimizers. */ optimizers. */
if (g77 && optimize && !optimize_size && expr->expr_type == EXPR_VARIABLE if (g77 && optimize && !optimize_size && expr->expr_type == EXPR_VARIABLE
&& !is_pointer (expr) && (fsym == NULL && !is_pointer (expr) && ! gfc_has_dimen_vector_ref (expr)
|| fsym->ts.type != BT_ASSUMED)) && (fsym == NULL || fsym->ts.type != BT_ASSUMED))
{ {
gfc_conv_subref_array_arg (se, expr, g77, gfc_conv_subref_array_arg (se, expr, g77,
fsym ? fsym->attr.intent : INTENT_INOUT, fsym ? fsym->attr.intent : INTENT_INOUT,
false, fsym, proc_name, sym); false, fsym, proc_name, sym, true);
return; return;
} }
......
...@@ -4579,7 +4579,7 @@ void ...@@ -4579,7 +4579,7 @@ void
gfc_conv_subref_array_arg (gfc_se *se, gfc_expr * expr, int g77, gfc_conv_subref_array_arg (gfc_se *se, gfc_expr * expr, int g77,
sym_intent intent, bool formal_ptr, sym_intent intent, bool formal_ptr,
const gfc_symbol *fsym, const char *proc_name, const gfc_symbol *fsym, const char *proc_name,
gfc_symbol *sym) gfc_symbol *sym, bool check_contiguous)
{ {
gfc_se lse; gfc_se lse;
gfc_se rse; gfc_se rse;
...@@ -4602,7 +4602,7 @@ gfc_conv_subref_array_arg (gfc_se *se, gfc_expr * expr, int g77, ...@@ -4602,7 +4602,7 @@ gfc_conv_subref_array_arg (gfc_se *se, gfc_expr * expr, int g77,
pass_optional = fsym && fsym->attr.optional && sym && sym->attr.optional; pass_optional = fsym && fsym->attr.optional && sym && sym->attr.optional;
if (pass_optional) if (pass_optional || check_contiguous)
{ {
gfc_init_se (&work_se, NULL); gfc_init_se (&work_se, NULL);
parmse = &work_se; parmse = &work_se;
...@@ -4880,50 +4880,136 @@ class_array_fcn: ...@@ -4880,50 +4880,136 @@ class_array_fcn:
else else
parmse->expr = gfc_build_addr_expr (NULL_TREE, parmse->expr); parmse->expr = gfc_build_addr_expr (NULL_TREE, parmse->expr);
if (pass_optional) /* Basically make this into
if (present)
{
if (contiguous)
{
pointer = a;
}
else
{
parmse->pre();
pointer = parmse->expr;
}
}
else
pointer = NULL;
foo (pointer);
if (present && !contiguous)
se->post();
*/
if (pass_optional || check_contiguous)
{ {
tree present;
tree type; tree type;
stmtblock_t else_block; stmtblock_t else_block;
tree pre_stmts, post_stmts; tree pre_stmts, post_stmts;
tree pointer; tree pointer;
tree else_stmt; tree else_stmt;
tree present_var = NULL_TREE;
tree cont_var = NULL_TREE;
tree post_cond;
/* Make this into type = TREE_TYPE (parmse->expr);
pointer = gfc_create_var (type, "arg_ptr");
if (check_contiguous)
{
gfc_se cont_se, array_se;
stmtblock_t if_block, else_block;
tree if_stmt, else_stmt;
cont_var = gfc_create_var (boolean_type_node, "contiguous");
/* cont_var = is_contiguous (expr); . */
gfc_init_se (&cont_se, parmse);
gfc_conv_is_contiguous_expr (&cont_se, expr);
gfc_add_block_to_block (&se->pre, &(&cont_se)->pre);
gfc_add_modify (&se->pre, cont_var, cont_se.expr);
gfc_add_block_to_block (&se->pre, &(&cont_se)->post);
/* arrayse->expr = descriptor of a. */
gfc_init_se (&array_se, se);
gfc_conv_expr_descriptor (&array_se, expr);
gfc_add_block_to_block (&se->pre, &(&array_se)->pre);
gfc_add_block_to_block (&se->pre, &(&array_se)->post);
/* if_stmt = { pointer = &a[0]; } . */
gfc_init_block (&if_block);
tmp = gfc_conv_array_data (array_se.expr);
tmp = fold_convert (type, tmp);
gfc_add_modify (&if_block, pointer, tmp);
if_stmt = gfc_finish_block (&if_block);
/* else_stmt = { parmse->pre(); pointer = parmse->expr; } . */
gfc_init_block (&else_block);
gfc_add_block_to_block (&else_block, &parmse->pre);
gfc_add_modify (&else_block, pointer, parmse->expr);
else_stmt = gfc_finish_block (&else_block);
/* And put the above into an if statement. */
pre_stmts = fold_build3_loc (input_location, COND_EXPR, void_type_node,
cont_var, if_stmt, else_stmt);
}
else
{
/* pointer = pramse->expr; . */
gfc_add_modify (&parmse->pre, pointer, parmse->expr);
pre_stmts = gfc_finish_block (&parmse->pre);
}
if (present (a)) if (pass_optional)
{ {
parmse->pre; present_var = gfc_create_var (boolean_type_node, "present");
optional = parse->expr;
}
else
optional = NULL;
call foo (optional);
if (present (a))
parmse->post;
*/ /* present_var = present(sym); . */
tmp = gfc_conv_expr_present (sym);
tmp = fold_convert (boolean_type_node, tmp);
gfc_add_modify (&se->pre, present_var, tmp);
type = TREE_TYPE (parmse->expr); /* else_stmt = { pointer = NULL; } . */
pointer = gfc_create_var (type, "optional"); gfc_init_block (&else_block);
tmp = gfc_conv_expr_present (sym); gfc_add_modify (&else_block, pointer, build_int_cst (type, 0));
present = gfc_evaluate_now (tmp, &se->pre); else_stmt = gfc_finish_block (&else_block);
gfc_add_modify (&parmse->pre, pointer, parmse->expr);
pre_stmts = gfc_finish_block (&parmse->pre); tmp = fold_build3_loc (input_location, COND_EXPR, void_type_node, present_var,
pre_stmts, else_stmt);
gfc_init_block (&else_block); gfc_add_expr_to_block (&se->pre, tmp);
gfc_add_modify (&else_block, pointer, build_int_cst (type, 0));
else_stmt = gfc_finish_block (&else_block);
}
tmp = fold_build3_loc (input_location, COND_EXPR, void_type_node, present, else
pre_stmts, else_stmt); gfc_add_expr_to_block (&se->pre, pre_stmts);
gfc_add_expr_to_block (&se->pre, tmp);
post_stmts = gfc_finish_block (&parmse->post); post_stmts = gfc_finish_block (&parmse->post);
tmp = fold_build3_loc (input_location, COND_EXPR, void_type_node, present,
/* Put together the post stuff, plus the optional
deallocation. */
if (check_contiguous)
{
/* !cont_var. */
tmp = fold_build2_loc (input_location, EQ_EXPR, boolean_type_node,
cont_var,
build_zero_cst (boolean_type_node));
if (pass_optional)
post_cond = fold_build2_loc (input_location, TRUTH_ANDIF_EXPR,
boolean_type_node, present_var, tmp);
else
post_cond = tmp;
}
else
{
gcc_assert (pass_optional);
post_cond = present_var;
}
tmp = fold_build3_loc (input_location, COND_EXPR, void_type_node, post_cond,
post_stmts, build_empty_stmt (input_location)); post_stmts, build_empty_stmt (input_location));
gfc_add_expr_to_block (&se->post, tmp); gfc_add_expr_to_block (&se->post, tmp);
se->expr = pointer; se->expr = pointer;
} }
......
...@@ -2832,6 +2832,17 @@ static void ...@@ -2832,6 +2832,17 @@ static void
gfc_conv_intrinsic_is_contiguous (gfc_se * se, gfc_expr * expr) gfc_conv_intrinsic_is_contiguous (gfc_se * se, gfc_expr * expr)
{ {
gfc_expr *arg; gfc_expr *arg;
arg = expr->value.function.actual->expr;
gfc_conv_is_contiguous_expr (se, arg);
se->expr = fold_convert (gfc_typenode_for_spec (&expr->ts), se->expr);
}
/* This function does the work for gfc_conv_intrinsic_is_contiguous,
plus it can be called directly. */
void
gfc_conv_is_contiguous_expr (gfc_se *se, gfc_expr *arg)
{
gfc_ss *ss; gfc_ss *ss;
gfc_se argse; gfc_se argse;
tree desc, tmp, stride, extent, cond; tree desc, tmp, stride, extent, cond;
...@@ -2839,8 +2850,6 @@ gfc_conv_intrinsic_is_contiguous (gfc_se * se, gfc_expr * expr) ...@@ -2839,8 +2850,6 @@ gfc_conv_intrinsic_is_contiguous (gfc_se * se, gfc_expr * expr)
tree fncall0; tree fncall0;
gfc_array_spec *as; gfc_array_spec *as;
arg = expr->value.function.actual->expr;
if (arg->ts.type == BT_CLASS) if (arg->ts.type == BT_CLASS)
gfc_add_class_array_ref (arg); gfc_add_class_array_ref (arg);
...@@ -2878,7 +2887,7 @@ gfc_conv_intrinsic_is_contiguous (gfc_se * se, gfc_expr * expr) ...@@ -2878,7 +2887,7 @@ gfc_conv_intrinsic_is_contiguous (gfc_se * se, gfc_expr * expr)
cond = fold_build2_loc (input_location, EQ_EXPR, boolean_type_node, cond = fold_build2_loc (input_location, EQ_EXPR, boolean_type_node,
stride, build_int_cst (TREE_TYPE (stride), 1)); stride, build_int_cst (TREE_TYPE (stride), 1));
for (i = 0; i < expr->value.function.actual->expr->rank - 1; i++) for (i = 0; i < arg->rank - 1; i++)
{ {
tmp = gfc_conv_descriptor_lbound_get (desc, gfc_rank_cst[i]); tmp = gfc_conv_descriptor_lbound_get (desc, gfc_rank_cst[i]);
extent = gfc_conv_descriptor_ubound_get (desc, gfc_rank_cst[i]); extent = gfc_conv_descriptor_ubound_get (desc, gfc_rank_cst[i]);
...@@ -2896,7 +2905,7 @@ gfc_conv_intrinsic_is_contiguous (gfc_se * se, gfc_expr * expr) ...@@ -2896,7 +2905,7 @@ gfc_conv_intrinsic_is_contiguous (gfc_se * se, gfc_expr * expr)
cond = fold_build2_loc (input_location, TRUTH_AND_EXPR, cond = fold_build2_loc (input_location, TRUTH_AND_EXPR,
boolean_type_node, cond, tmp); boolean_type_node, cond, tmp);
} }
se->expr = convert (gfc_typenode_for_spec (&expr->ts), cond); se->expr = cond;
} }
} }
......
...@@ -535,7 +535,10 @@ int gfc_conv_procedure_call (gfc_se *, gfc_symbol *, gfc_actual_arglist *, ...@@ -535,7 +535,10 @@ int gfc_conv_procedure_call (gfc_se *, gfc_symbol *, gfc_actual_arglist *,
void gfc_conv_subref_array_arg (gfc_se *, gfc_expr *, int, sym_intent, bool, void gfc_conv_subref_array_arg (gfc_se *, gfc_expr *, int, sym_intent, bool,
const gfc_symbol *fsym = NULL, const gfc_symbol *fsym = NULL,
const char *proc_name = NULL, const char *proc_name = NULL,
gfc_symbol *sym = NULL); gfc_symbol *sym = NULL,
bool check_contiguous = false);
void gfc_conv_is_contiguous_expr (gfc_se *, gfc_expr *);
/* Generate code for a scalar assignment. */ /* Generate code for a scalar assignment. */
tree gfc_trans_scalar_assign (gfc_se *, gfc_se *, gfc_typespec, bool, bool, tree gfc_trans_scalar_assign (gfc_se *, gfc_se *, gfc_typespec, bool, bool,
......
2019-05-29 Thomas Koenig <tkoenig@gcc.gnu.org>
PR fortran/90539
* gfortran.dg/internal_pack_21.f90: Adjust scan patterns.
* gfortran.dg/internal_pack_22.f90: New test.
* gfortran.dg/internal_pack_23.f90: New test.
2019-05-29 Jan Hubicka <hubicka@ucw.cz> 2019-05-29 Jan Hubicka <hubicka@ucw.cz>
* tree-ssa/alias-access-spath-1.c: new testcase. * tree-ssa/alias-access-spath-1.c: new testcase.
......
...@@ -20,5 +20,5 @@ END MODULE M1 ...@@ -20,5 +20,5 @@ END MODULE M1
USE M1 USE M1
CALL S2() CALL S2()
END END
! { dg-final { scan-tree-dump-times "optional" 4 "original" } } ! { dg-final { scan-tree-dump-times "arg_ptr" 5 "original" } }
! { dg-final { scan-tree-dump-not "_gfortran_internal_unpack" "original" } } ! { dg-final { scan-tree-dump-not "_gfortran_internal_unpack" "original" } }
! { dg-do run }
! { dg-additional-options "-fdump-tree-original -O" }
! Check that absent and present dummy arguments work with
! packing when handing them down to an old-fashioned argument.
module x
implicit none
contains
subroutine foo (a,b)
real, dimension(:), intent(inout), optional :: a, b
if (present(a)) stop 1
if (.not. present(b)) stop 2
call bar (a, b)
end subroutine foo
subroutine bar (a,b)
real, dimension(2), intent(inout), optional :: a, b
real :: tmp
if (present(a)) stop 3
if (.not. present(b)) stop 4
tmp = b(2)
b(2) = b(1)
b(1) = tmp
end subroutine bar
end module x
program main
use x
implicit none
real, dimension(2) :: b
b(1) = 1.
b(2) = 42.
call foo(b=b)
if (b(1) /= 42. .or. b(2) /= 1.) stop 5
end program main
! { dg-final { scan-tree-dump-not "_gfortran_internal_unpack" "original" } }
! { dg-do run }
! PR fortran/90539 - this used to cause an ICE.
module t2
implicit none
contains
subroutine foo(a)
real, dimension(*) :: a
if (a(1) /= 1.0 .or. a(2) /= 2.0) stop 1
end subroutine foo
end module t2
module t1
use t2
implicit none
contains
subroutine bar(a)
real, dimension(:) :: a
if (a(1) /= 1.0 .or. a(2) /= 2.0) stop 1
call foo(a)
end subroutine bar
end module t1
program main
use t1
call bar([1.0, 2.0])
end program main
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment