Commit 8a8330b7 by Vladimir Makarov Committed by Vladimir Makarov

re PR rtl-optimization/69847 (Spec 2006 403.gcc slows down with -mlra vs. reload on PowerPC)

2016-08-02  Vladimir Makarov  <vmakarov@redhat.com>

	PR rtl-optimization/69847
	* lra-int.h (struct lra-reg): Use restore_rtx instead of
	restore_regno.
	(lra_rtx_hash): New.
	* lra.c (initialize_lra_reg_info_element): Use restore_rtx instead
	of restore_regno.
	(lra_rtx_hash): Rename and move lra-remat.c::rtx_hash.
	* lra-remat.c (rtx_hash): Rename and Move to lra.c.
	* lra-spills.c (lra_final_code_change): Don't delete insn when the
	next insn is USE with the same reg as the current insn source.
	* lra-constraints.c (curr_insn_transform): Use restore_rtx instead
	of restore_regno.
	(lra_constraints_init): Call initiate_invariants.
	(lra_constraints_finish): Call finish_invariants.
	(struct invariant, invariant_t, invariant_ptr_t): New.
	(const_invariant_ptr_t, invariants, invariants_pool): New.
	(invariant_table, invariant_hash, invariant_eq_p): New.
	(insert_invariant, initiate_invariants, finish_invariants): New.
	(clear_invariants, invalid_invariant_regs): New.
	(inherit_reload_reg, split_reg, fix_bb_live_info): Use restore_rtx
	instead of restore_regno.
	(invariant_p, process_invariant_for_inheritance): New.
	(inherit_in_ebb): Implement invariant inheritance.
	(lra_inheritance): Initialize and finalize invalid_invariant_regs.
	(remove_inheritance_pseudos): Implement undoing invariant
	inheritance.
	(undo_optional_reloads, lra_undo_inheritance): Use restore_rtx
	instead of restore_regno.
	* lra-assigns.c (regno_live_length): New.
	(reload_pseudo_compare_func): Use regno_live_length.
	(assign_by_spills): Use restore_rtx instead of restore_regno.
	(lra_assign): Ditto.  Initiate regno_live_length.

From-SVN: r238991
parent 354c5470
2016-08-02 Vladimir Makarov <vmakarov@redhat.com>
PR rtl-optimization/69847
* lra-int.h (struct lra-reg): Use restore_rtx instead of
restore_regno.
(lra_rtx_hash): New.
* lra.c (initialize_lra_reg_info_element): Use restore_rtx instead
of restore_regno.
(lra_rtx_hash): Rename and move lra-remat.c::rtx_hash.
* lra-remat.c (rtx_hash): Rename and Move to lra.c.
* lra-spills.c (lra_final_code_change): Don't delete insn when the
next insn is USE with the same reg as the current insn source.
* lra-constraints.c (curr_insn_transform): Use restore_rtx instead
of restore_regno.
(lra_constraints_init): Call initiate_invariants.
(lra_constraints_finish): Call finish_invariants.
(struct invariant, invariant_t, invariant_ptr_t): New.
(const_invariant_ptr_t, invariants, invariants_pool): New.
(invariant_table, invariant_hash, invariant_eq_p): New.
(insert_invariant, initiate_invariants, finish_invariants): New.
(clear_invariants, invalid_invariant_regs): New.
(inherit_reload_reg, split_reg, fix_bb_live_info): Use restore_rtx
instead of restore_regno.
(invariant_p, process_invariant_for_inheritance): New.
(inherit_in_ebb): Implement invariant inheritance.
(lra_inheritance): Initialize and finalize invalid_invariant_regs.
(remove_inheritance_pseudos): Implement undoing invariant
inheritance.
(undo_optional_reloads, lra_undo_inheritance): Use restore_rtx
instead of restore_regno.
* lra-assigns.c (regno_live_length): New.
(reload_pseudo_compare_func): Use regno_live_length.
(assign_by_spills): Use restore_rtx instead of restore_regno.
(lra_assign): Ditto. Initiate regno_live_length.
2016-02-08 James Greenhalgh <james.greenhalgh@arm.com> 2016-02-08 James Greenhalgh <james.greenhalgh@arm.com>
* config/aarch64/arm_neon.h (vminnm_f64): Add back missing 'f' from * config/aarch64/arm_neon.h (vminnm_f64): Add back missing 'f' from
......
...@@ -107,6 +107,10 @@ static bool former_reload_pseudo_spill_p; ...@@ -107,6 +107,10 @@ static bool former_reload_pseudo_spill_p;
lra_get_allocno_class. It is used to speed up the code. */ lra_get_allocno_class. It is used to speed up the code. */
static enum reg_class *regno_allocno_class_array; static enum reg_class *regno_allocno_class_array;
/* Array containing lengths of pseudo live ranges. It is used to
speed up the code. */
static int *regno_live_length;
/* Information about the thread to which a pseudo belongs. Threads are /* Information about the thread to which a pseudo belongs. Threads are
a set of connected reload and inheritance pseudos with the same set of a set of connected reload and inheritance pseudos with the same set of
available hard registers. Lone registers belong to their own threads. */ available hard registers. Lone registers belong to their own threads. */
...@@ -227,6 +231,11 @@ reload_pseudo_compare_func (const void *v1p, const void *v2p) ...@@ -227,6 +231,11 @@ reload_pseudo_compare_func (const void *v1p, const void *v2p)
/* Put pseudos from the thread nearby. */ /* Put pseudos from the thread nearby. */
if ((diff = regno_assign_info[r1].first - regno_assign_info[r2].first) != 0) if ((diff = regno_assign_info[r1].first - regno_assign_info[r2].first) != 0)
return diff; return diff;
/* Prefer pseudos with longer live ranges. It sets up better
prefered hard registers for the thread pseudos and decreases
register-register moves between the thread pseudos. */
if ((diff = regno_live_length[r2] - regno_live_length[r1]) != 0)
return diff;
/* If regs are equally good, sort by their numbers, so that the /* If regs are equally good, sort by their numbers, so that the
results of qsort leave nothing to chance. */ results of qsort leave nothing to chance. */
return r1 - r2; return r1 - r2;
...@@ -1300,7 +1309,8 @@ find_all_spills_for (int regno) ...@@ -1300,7 +1309,8 @@ find_all_spills_for (int regno)
static void static void
assign_by_spills (void) assign_by_spills (void)
{ {
int i, n, nfails, iter, regno, hard_regno, cost, restore_regno; int i, n, nfails, iter, regno, hard_regno, cost;
rtx restore_rtx;
rtx_insn *insn; rtx_insn *insn;
bitmap_head changed_insns, do_not_assign_nonreload_pseudos; bitmap_head changed_insns, do_not_assign_nonreload_pseudos;
unsigned int u, conflict_regno; unsigned int u, conflict_regno;
...@@ -1333,6 +1343,8 @@ assign_by_spills (void) ...@@ -1333,6 +1343,8 @@ assign_by_spills (void)
for (i = 0; i < n; i++) for (i = 0; i < n; i++)
{ {
regno = sorted_pseudos[i]; regno = sorted_pseudos[i];
if (reg_renumber[regno] >= 0)
continue;
if (lra_dump_file != NULL) if (lra_dump_file != NULL)
fprintf (lra_dump_file, " Assigning to %d " fprintf (lra_dump_file, " Assigning to %d "
"(cl=%s, orig=%d, freq=%d, tfirst=%d, tfreq=%d)...\n", "(cl=%s, orig=%d, freq=%d, tfirst=%d, tfreq=%d)...\n",
...@@ -1471,21 +1483,25 @@ assign_by_spills (void) ...@@ -1471,21 +1483,25 @@ assign_by_spills (void)
such inheritance or split pseudos. */ such inheritance or split pseudos. */
bitmap_initialize (&do_not_assign_nonreload_pseudos, &reg_obstack); bitmap_initialize (&do_not_assign_nonreload_pseudos, &reg_obstack);
EXECUTE_IF_SET_IN_BITMAP (&lra_inheritance_pseudos, 0, u, bi) EXECUTE_IF_SET_IN_BITMAP (&lra_inheritance_pseudos, 0, u, bi)
if ((restore_regno = lra_reg_info[u].restore_regno) >= 0 if ((restore_rtx = lra_reg_info[u].restore_rtx) != NULL_RTX
&& REG_P (restore_rtx)
&& reg_renumber[u] < 0 && reg_renumber[u] < 0
&& bitmap_bit_p (&lra_inheritance_pseudos, u)) && bitmap_bit_p (&lra_inheritance_pseudos, u))
bitmap_set_bit (&do_not_assign_nonreload_pseudos, restore_regno); bitmap_set_bit (&do_not_assign_nonreload_pseudos, REGNO (restore_rtx));
EXECUTE_IF_SET_IN_BITMAP (&lra_split_regs, 0, u, bi) EXECUTE_IF_SET_IN_BITMAP (&lra_split_regs, 0, u, bi)
if ((restore_regno = lra_reg_info[u].restore_regno) >= 0 if ((restore_rtx = lra_reg_info[u].restore_rtx) != NULL_RTX
&& reg_renumber[u] >= 0) && reg_renumber[u] >= 0)
bitmap_set_bit (&do_not_assign_nonreload_pseudos, restore_regno); {
lra_assert (REG_P (restore_rtx));
bitmap_set_bit (&do_not_assign_nonreload_pseudos, REGNO (restore_rtx));
}
for (n = 0, i = FIRST_PSEUDO_REGISTER; i < max_regno; i++) for (n = 0, i = FIRST_PSEUDO_REGISTER; i < max_regno; i++)
if (((i < lra_constraint_new_regno_start if (((i < lra_constraint_new_regno_start
&& ! bitmap_bit_p (&do_not_assign_nonreload_pseudos, i)) && ! bitmap_bit_p (&do_not_assign_nonreload_pseudos, i))
|| (bitmap_bit_p (&lra_inheritance_pseudos, i) || (bitmap_bit_p (&lra_inheritance_pseudos, i)
&& lra_reg_info[i].restore_regno >= 0) && lra_reg_info[i].restore_rtx != NULL_RTX)
|| (bitmap_bit_p (&lra_split_regs, i) || (bitmap_bit_p (&lra_split_regs, i)
&& lra_reg_info[i].restore_regno >= 0) && lra_reg_info[i].restore_rtx != NULL_RTX)
|| bitmap_bit_p (&lra_subreg_reload_pseudos, i) || bitmap_bit_p (&lra_subreg_reload_pseudos, i)
|| bitmap_bit_p (&lra_optional_reload_pseudos, i)) || bitmap_bit_p (&lra_optional_reload_pseudos, i))
&& reg_renumber[i] < 0 && lra_reg_info[i].nrefs != 0 && reg_renumber[i] < 0 && lra_reg_info[i].nrefs != 0
...@@ -1513,7 +1529,7 @@ assign_by_spills (void) ...@@ -1513,7 +1529,7 @@ assign_by_spills (void)
enum reg_class spill_class; enum reg_class spill_class;
if (targetm.spill_class == NULL if (targetm.spill_class == NULL
|| lra_reg_info[regno].restore_regno < 0 || lra_reg_info[regno].restore_rtx == NULL_RTX
|| ! bitmap_bit_p (&lra_inheritance_pseudos, regno) || ! bitmap_bit_p (&lra_inheritance_pseudos, regno)
|| (spill_class || (spill_class
= ((enum reg_class) = ((enum reg_class)
...@@ -1570,8 +1586,17 @@ lra_assign (void) ...@@ -1570,8 +1586,17 @@ lra_assign (void)
sorted_pseudos = XNEWVEC (int, max_regno); sorted_pseudos = XNEWVEC (int, max_regno);
sorted_reload_pseudos = XNEWVEC (int, max_regno); sorted_reload_pseudos = XNEWVEC (int, max_regno);
regno_allocno_class_array = XNEWVEC (enum reg_class, max_regno); regno_allocno_class_array = XNEWVEC (enum reg_class, max_regno);
regno_live_length = XNEWVEC (int, max_regno);
for (i = FIRST_PSEUDO_REGISTER; i < max_regno; i++) for (i = FIRST_PSEUDO_REGISTER; i < max_regno; i++)
regno_allocno_class_array[i] = lra_get_allocno_class (i); {
int l;
lra_live_range_t r;
regno_allocno_class_array[i] = lra_get_allocno_class (i);
for (l = 0, r = lra_reg_info[i].live_ranges; r != NULL; r = r->next)
l += r->finish - r->start + 1;
regno_live_length[i] = l;
}
former_reload_pseudo_spill_p = false; former_reload_pseudo_spill_p = false;
init_regno_assign_info (); init_regno_assign_info ();
bitmap_initialize (&all_spilled_pseudos, &reg_obstack); bitmap_initialize (&all_spilled_pseudos, &reg_obstack);
...@@ -1594,7 +1619,7 @@ lra_assign (void) ...@@ -1594,7 +1619,7 @@ lra_assign (void)
EXECUTE_IF_SET_IN_BITMAP (&all_spilled_pseudos, 0, u, bi) EXECUTE_IF_SET_IN_BITMAP (&all_spilled_pseudos, 0, u, bi)
/* We ignore spilled pseudos created on last inheritance pass /* We ignore spilled pseudos created on last inheritance pass
because they will be removed. */ because they will be removed. */
if (lra_reg_info[u].restore_regno < 0) if (lra_reg_info[u].restore_rtx == NULL_RTX)
{ {
no_spills_p = false; no_spills_p = false;
break; break;
...@@ -1613,6 +1638,7 @@ lra_assign (void) ...@@ -1613,6 +1638,7 @@ lra_assign (void)
} }
bitmap_clear (&insns_to_process); bitmap_clear (&insns_to_process);
finish_regno_assign_info (); finish_regno_assign_info ();
free (regno_live_length);
free (regno_allocno_class_array); free (regno_allocno_class_array);
free (sorted_pseudos); free (sorted_pseudos);
free (sorted_reload_pseudos); free (sorted_reload_pseudos);
...@@ -1631,3 +1657,4 @@ lra_assign (void) ...@@ -1631,3 +1657,4 @@ lra_assign (void)
LRA_MAX_ASSIGNMENT_ITERATION_NUMBER); LRA_MAX_ASSIGNMENT_ITERATION_NUMBER);
return no_spills_p; return no_spills_p;
} }
...@@ -99,9 +99,9 @@ struct lra_reg ...@@ -99,9 +99,9 @@ struct lra_reg
*non-debug* insns. */ *non-debug* insns. */
int nrefs, freq; int nrefs, freq;
int last_reload; int last_reload;
/* Regno used to undo the inheritance. It can be non-zero only /* rtx used to undo the inheritance. It can be non-null only
between couple of inheritance and undo inheritance passes. */ between subsequent inheritance and undo inheritance passes. */
int restore_regno; rtx restore_rtx;
/* Value holding by register. If the pseudos have the same value /* Value holding by register. If the pseudos have the same value
they do not conflict. */ they do not conflict. */
int val; int val;
...@@ -285,6 +285,7 @@ extern lra_insn_recog_data_t *lra_insn_recog_data; ...@@ -285,6 +285,7 @@ extern lra_insn_recog_data_t *lra_insn_recog_data;
extern int lra_curr_reload_num; extern int lra_curr_reload_num;
extern void lra_dump_bitmap_with_title (const char *, bitmap, int); extern void lra_dump_bitmap_with_title (const char *, bitmap, int);
extern hashval_t lra_rtx_hash (rtx x);
extern void lra_push_insn (rtx_insn *); extern void lra_push_insn (rtx_insn *);
extern void lra_push_insn_by_uid (unsigned int); extern void lra_push_insn_by_uid (unsigned int);
extern void lra_push_insn_and_update_insn_regno_info (rtx_insn *); extern void lra_push_insn_and_update_insn_regno_info (rtx_insn *);
......
...@@ -167,92 +167,6 @@ get_remat_bb_data_by_index (int index) ...@@ -167,92 +167,6 @@ get_remat_bb_data_by_index (int index)
/* Recursive hash function for RTL X. */
static hashval_t
rtx_hash (rtx x)
{
int i, j;
enum rtx_code code;
const char *fmt;
hashval_t val = 0;
if (x == 0)
return val;
code = GET_CODE (x);
val += (int) code + 4095;
/* Some RTL can be compared nonrecursively. */
switch (code)
{
case REG:
return val + REGNO (x);
case LABEL_REF:
return iterative_hash_object (XEXP (x, 0), val);
case SYMBOL_REF:
return iterative_hash_object (XSTR (x, 0), val);
case SCRATCH:
case CONST_DOUBLE:
case CONST_INT:
case CONST_VECTOR:
return val;
default:
break;
}
/* Hash the elements. */
fmt = GET_RTX_FORMAT (code);
for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
{
switch (fmt[i])
{
case 'w':
val += XWINT (x, i);
break;
case 'n':
case 'i':
val += XINT (x, i);
break;
case 'V':
case 'E':
val += XVECLEN (x, i);
for (j = 0; j < XVECLEN (x, i); j++)
val += rtx_hash (XVECEXP (x, i, j));
break;
case 'e':
val += rtx_hash (XEXP (x, i));
break;
case 'S':
case 's':
val += htab_hash_string (XSTR (x, i));
break;
case 'u':
case '0':
case 't':
break;
/* It is believed that rtx's at this level will never
contain anything but integers and other rtx's, except for
within LABEL_REFs and SYMBOL_REFs. */
default:
abort ();
}
}
return val;
}
/* Hash table for the candidates. Different insns (e.g. structurally /* Hash table for the candidates. Different insns (e.g. structurally
the same insns or even insns with different unused output regs) can the same insns or even insns with different unused output regs) can
be represented by the same candidate in the table. */ be represented by the same candidate in the table. */
......
...@@ -693,7 +693,7 @@ lra_final_code_change (void) ...@@ -693,7 +693,7 @@ lra_final_code_change (void)
{ {
int i, hard_regno; int i, hard_regno;
basic_block bb; basic_block bb;
rtx_insn *insn, *curr; rtx_insn *insn, *curr, *next_insn;
int max_regno = max_reg_num (); int max_regno = max_reg_num ();
for (i = FIRST_PSEUDO_REGISTER; i < max_regno; i++) for (i = FIRST_PSEUDO_REGISTER; i < max_regno; i++)
...@@ -727,7 +727,12 @@ lra_final_code_change (void) ...@@ -727,7 +727,12 @@ lra_final_code_change (void)
if (NONJUMP_INSN_P (insn) && GET_CODE (pat) == SET if (NONJUMP_INSN_P (insn) && GET_CODE (pat) == SET
&& REG_P (SET_SRC (pat)) && REG_P (SET_DEST (pat)) && REG_P (SET_SRC (pat)) && REG_P (SET_DEST (pat))
&& REGNO (SET_SRC (pat)) == REGNO (SET_DEST (pat)) && REGNO (SET_SRC (pat)) == REGNO (SET_DEST (pat))
&& ! return_regno_p (REGNO (SET_SRC (pat)))) && (! return_regno_p (REGNO (SET_SRC (pat)))
|| (next_insn = next_nondebug_insn (insn)) == NULL_RTX
|| ! INSN_P (next_insn)
|| GET_CODE (PATTERN (next_insn)) != USE
|| ! REG_P (XEXP (PATTERN (next_insn), 0))
|| REGNO (SET_SRC (pat)) != REGNO (XEXP (PATTERN (next_insn), 0))))
{ {
lra_invalidate_insn_data (insn); lra_invalidate_insn_data (insn);
delete_insn (insn); delete_insn (insn);
......
...@@ -1286,7 +1286,7 @@ initialize_lra_reg_info_element (int i) ...@@ -1286,7 +1286,7 @@ initialize_lra_reg_info_element (int i)
lra_reg_info[i].live_ranges = NULL; lra_reg_info[i].live_ranges = NULL;
lra_reg_info[i].nrefs = lra_reg_info[i].freq = 0; lra_reg_info[i].nrefs = lra_reg_info[i].freq = 0;
lra_reg_info[i].last_reload = 0; lra_reg_info[i].last_reload = 0;
lra_reg_info[i].restore_regno = -1; lra_reg_info[i].restore_rtx = NULL_RTX;
lra_reg_info[i].val = get_new_reg_value (); lra_reg_info[i].val = get_new_reg_value ();
lra_reg_info[i].offset = 0; lra_reg_info[i].offset = 0;
lra_reg_info[i].copies = NULL; lra_reg_info[i].copies = NULL;
...@@ -1621,6 +1621,92 @@ lra_get_insn_regs (int uid) ...@@ -1621,6 +1621,92 @@ lra_get_insn_regs (int uid)
/* Recursive hash function for RTL X. */
hashval_t
lra_rtx_hash (rtx x)
{
int i, j;
enum rtx_code code;
const char *fmt;
hashval_t val = 0;
if (x == 0)
return val;
code = GET_CODE (x);
val += (int) code + 4095;
/* Some RTL can be compared nonrecursively. */
switch (code)
{
case REG:
return val + REGNO (x);
case LABEL_REF:
return iterative_hash_object (XEXP (x, 0), val);
case SYMBOL_REF:
return iterative_hash_object (XSTR (x, 0), val);
case SCRATCH:
case CONST_DOUBLE:
case CONST_INT:
case CONST_VECTOR:
return val;
default:
break;
}
/* Hash the elements. */
fmt = GET_RTX_FORMAT (code);
for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
{
switch (fmt[i])
{
case 'w':
val += XWINT (x, i);
break;
case 'n':
case 'i':
val += XINT (x, i);
break;
case 'V':
case 'E':
val += XVECLEN (x, i);
for (j = 0; j < XVECLEN (x, i); j++)
val += lra_rtx_hash (XVECEXP (x, i, j));
break;
case 'e':
val += lra_rtx_hash (XEXP (x, i));
break;
case 'S':
case 's':
val += htab_hash_string (XSTR (x, i));
break;
case 'u':
case '0':
case 't':
break;
/* It is believed that rtx's at this level will never
contain anything but integers and other rtx's, except for
within LABEL_REFs and SYMBOL_REFs. */
default:
abort ();
}
}
return val;
}
/* This page contains code dealing with stack of the insns which /* This page contains code dealing with stack of the insns which
should be processed by the next constraint pass. */ should be processed by the next constraint pass. */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment