Commit 48f65795 by Jiufu Guo Committed by Jiufu Guo

rs6000: Refine small loop unroll in loop_unroll_adjust hook

In this patch, loop unroll adjust hook is introduced for powerpc.  We
can do target related heuristic adjustment in this hook.  In this patch,
-funroll-loops is enabled for small loops at O2 and above with an option
-munroll-small-loops to guard the small loops unrolling, and it works
fine with -flto.


gcc/
2019-11-11  Jiufu Guo  <guojiufu@linux.ibm.com>

	PR tree-optimization/88760
	* gcc/config/rs6000/rs6000.opt (-munroll-only-small-loops): New option.
	* gcc/common/config/rs6000/rs6000-common.c
	(rs6000_option_optimization_table) [OPT_LEVELS_2_PLUS_SPEED_ONLY]:
	Turn on -funroll-loops and -munroll-only-small-loops.
	[OPT_LEVELS_ALL]: Turn off -fweb and -frename-registers.
	* config/rs6000/rs6000.c (rs6000_option_override_internal): Remove
	set of PARAM_MAX_UNROLL_TIMES and PARAM_MAX_UNROLLED_INSNS.
	Turn off -munroll-only-small-loops for explicit -funroll-loops.
	(TARGET_LOOP_UNROLL_ADJUST): Add loop unroll adjust hook.
	(rs6000_loop_unroll_adjust): Define it.  Use -munroll-only-small-loops.

gcc.testsuite/
2019-11-11  Jiufu Guo  <guojiufu@linux.ibm.com>

	PR tree-optimization/88760
	* gcc.dg/pr59643.c: Update back to r277550.

From-SVN: r278034
parent 103cba80
2019-11-11 Jiufu Guo <guojiufu@linux.ibm.com>
PR tree-optimization/88760
* gcc/config/rs6000/rs6000.opt (-munroll-only-small-loops): New option.
* gcc/common/config/rs6000/rs6000-common.c
(rs6000_option_optimization_table) [OPT_LEVELS_2_PLUS_SPEED_ONLY]:
Turn on -funroll-loops and -munroll-only-small-loops.
[OPT_LEVELS_ALL]: Turn off -fweb and -frename-registers.
* config/rs6000/rs6000.c (rs6000_option_override_internal): Remove
set of PARAM_MAX_UNROLL_TIMES and PARAM_MAX_UNROLLED_INSNS.
Turn off -munroll-only-small-loops for explicit -funroll-loops.
(TARGET_LOOP_UNROLL_ADJUST): Add loop unroll adjust hook.
(rs6000_loop_unroll_adjust): Define it. Use -munroll-only-small-loops.
2019-11-11 Kewen Lin <linkw@gcc.gnu.org> 2019-11-11 Kewen Lin <linkw@gcc.gnu.org>
* config/rs6000/rs6000.c (rs6000_builtin_vectorization_cost): * config/rs6000/rs6000.c (rs6000_builtin_vectorization_cost):
...@@ -35,7 +35,14 @@ static const struct default_options rs6000_option_optimization_table[] = ...@@ -35,7 +35,14 @@ static const struct default_options rs6000_option_optimization_table[] =
{ OPT_LEVELS_ALL, OPT_fsplit_wide_types_early, NULL, 1 }, { OPT_LEVELS_ALL, OPT_fsplit_wide_types_early, NULL, 1 },
/* Enable -fsched-pressure for first pass instruction scheduling. */ /* Enable -fsched-pressure for first pass instruction scheduling. */
{ OPT_LEVELS_1_PLUS, OPT_fsched_pressure, NULL, 1 }, { OPT_LEVELS_1_PLUS, OPT_fsched_pressure, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_funroll_loops, NULL, 1 }, /* Enable -munroll-only-small-loops with -funroll-loops to unroll small
loops at -O2 and above by default. */
{ OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_funroll_loops, NULL, 1 },
{ OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_munroll_only_small_loops, NULL, 1 },
/* -fweb and -frename-registers are useless in general for rs6000,
turn them off. */
{ OPT_LEVELS_ALL, OPT_fweb, NULL, 0 },
{ OPT_LEVELS_ALL, OPT_frename_registers, NULL, 0 },
{ OPT_LEVELS_NONE, 0, NULL, 0 } { OPT_LEVELS_NONE, 0, NULL, 0 }
}; };
......
...@@ -1428,6 +1428,9 @@ static const struct attribute_spec rs6000_attribute_table[] = ...@@ -1428,6 +1428,9 @@ static const struct attribute_spec rs6000_attribute_table[] =
#undef TARGET_VECTORIZE_DESTROY_COST_DATA #undef TARGET_VECTORIZE_DESTROY_COST_DATA
#define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
#undef TARGET_LOOP_UNROLL_ADJUST
#define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust
#undef TARGET_INIT_BUILTINS #undef TARGET_INIT_BUILTINS
#define TARGET_INIT_BUILTINS rs6000_init_builtins #define TARGET_INIT_BUILTINS rs6000_init_builtins
#undef TARGET_BUILTIN_DECL #undef TARGET_BUILTIN_DECL
...@@ -4540,25 +4543,12 @@ rs6000_option_override_internal (bool global_init_p) ...@@ -4540,25 +4543,12 @@ rs6000_option_override_internal (bool global_init_p)
global_options.x_param_values, global_options.x_param_values,
global_options_set.x_param_values); global_options_set.x_param_values);
/* unroll very small loops 2 time if no -funroll-loops. */ /* Explicit -funroll-loops turns -munroll-only-small-loops off. */
if (!global_options_set.x_flag_unroll_loops if (((global_options_set.x_flag_unroll_loops && flag_unroll_loops)
&& !global_options_set.x_flag_unroll_all_loops) || (global_options_set.x_flag_unroll_all_loops
{ && flag_unroll_all_loops))
maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 2, && !global_options_set.x_unroll_only_small_loops)
global_options.x_param_values, unroll_only_small_loops = 0;
global_options_set.x_param_values);
maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 20,
global_options.x_param_values,
global_options_set.x_param_values);
/* If fweb or frename-registers are not specificed in command-line,
do not turn them on implicitly. */
if (!global_options_set.x_flag_web)
global_options.x_flag_web = 0;
if (!global_options_set.x_flag_rename_registers)
global_options.x_flag_rename_registers = 0;
}
/* If using typedef char *va_list, signal that /* If using typedef char *va_list, signal that
__builtin_va_start (&ap, 0) can be optimized to __builtin_va_start (&ap, 0) can be optimized to
...@@ -5105,6 +5095,25 @@ rs6000_destroy_cost_data (void *data) ...@@ -5105,6 +5095,25 @@ rs6000_destroy_cost_data (void *data)
free (data); free (data);
} }
/* Implement targetm.loop_unroll_adjust. */
static unsigned
rs6000_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
{
if (unroll_only_small_loops)
{
/* TODO: This is hardcoded to 10 right now. It can be refined, for
example we may want to unroll very small loops more times (4 perhaps).
We also should use a PARAM for this. */
if (loop->ninsns <= 10)
return MIN (2, nunroll);
else
return 0;
}
return nunroll;
}
/* Handler for the Mathematical Acceleration Subsystem (mass) interface to a /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
library with vectorized intrinsics. */ library with vectorized intrinsics. */
......
...@@ -501,6 +501,10 @@ moptimize-swaps ...@@ -501,6 +501,10 @@ moptimize-swaps
Target Undocumented Var(rs6000_optimize_swaps) Init(1) Save Target Undocumented Var(rs6000_optimize_swaps) Init(1) Save
Analyze and remove doubleword swaps from VSX computations. Analyze and remove doubleword swaps from VSX computations.
munroll-only-small-loops
Target Undocumented Var(unroll_only_small_loops) Init(0) Save
; Use conservative small loop unrolling.
mpower9-misc mpower9-misc
Target Undocumented Report Mask(P9_MISC) Var(rs6000_isa_flags) Target Undocumented Report Mask(P9_MISC) Var(rs6000_isa_flags)
Use certain scalar instructions added in ISA 3.0. Use certain scalar instructions added in ISA 3.0.
......
2019-11-11 Jiufu Guo <guojiufu@linux.ibm.com>
PR tree-optimization/88760
* gcc.dg/pr59643.c: Update back to r277550.
2019-11-10 Paul Thomas <pault@gcc.gnu.org> 2019-11-10 Paul Thomas <pault@gcc.gnu.org>
PR fortran/92123 PR fortran/92123
......
/* PR tree-optimization/59643 */ /* PR tree-optimization/59643 */
/* { dg-do compile } */ /* { dg-do compile } */
/* { dg-options "-O3 -fdump-tree-pcom-details" } */ /* { dg-options "-O3 -fdump-tree-pcom-details" } */
/* { dg-additional-options "--param max-unrolled-insns=400" { target { powerpc*-*-* } } } */
/* Implicit threashold of max-unrolled-insn on ppc at O3 is too small for the
loop of this case. */
void void
foo (double *a, double *b, double *c, double d, double e, int n) foo (double *a, double *b, double *c, double d, double e, int n)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment