Commit 8a8330b7 by Vladimir Makarov Committed by Vladimir Makarov

re PR rtl-optimization/69847 (Spec 2006 403.gcc slows down with -mlra vs. reload on PowerPC)

2016-08-02  Vladimir Makarov  <vmakarov@redhat.com>

	PR rtl-optimization/69847
	* lra-int.h (struct lra-reg): Use restore_rtx instead of
	restore_regno.
	(lra_rtx_hash): New.
	* lra.c (initialize_lra_reg_info_element): Use restore_rtx instead
	of restore_regno.
	(lra_rtx_hash): Rename and move lra-remat.c::rtx_hash.
	* lra-remat.c (rtx_hash): Rename and Move to lra.c.
	* lra-spills.c (lra_final_code_change): Don't delete insn when the
	next insn is USE with the same reg as the current insn source.
	* lra-constraints.c (curr_insn_transform): Use restore_rtx instead
	of restore_regno.
	(lra_constraints_init): Call initiate_invariants.
	(lra_constraints_finish): Call finish_invariants.
	(struct invariant, invariant_t, invariant_ptr_t): New.
	(const_invariant_ptr_t, invariants, invariants_pool): New.
	(invariant_table, invariant_hash, invariant_eq_p): New.
	(insert_invariant, initiate_invariants, finish_invariants): New.
	(clear_invariants, invalid_invariant_regs): New.
	(inherit_reload_reg, split_reg, fix_bb_live_info): Use restore_rtx
	instead of restore_regno.
	(invariant_p, process_invariant_for_inheritance): New.
	(inherit_in_ebb): Implement invariant inheritance.
	(lra_inheritance): Initialize and finalize invalid_invariant_regs.
	(remove_inheritance_pseudos): Implement undoing invariant
	inheritance.
	(undo_optional_reloads, lra_undo_inheritance): Use restore_rtx
	instead of restore_regno.
	* lra-assigns.c (regno_live_length): New.
	(reload_pseudo_compare_func): Use regno_live_length.
	(assign_by_spills): Use restore_rtx instead of restore_regno.
	(lra_assign): Ditto.  Initiate regno_live_length.

From-SVN: r238991
parent 354c5470
2016-08-02 Vladimir Makarov <vmakarov@redhat.com>
PR rtl-optimization/69847
* lra-int.h (struct lra-reg): Use restore_rtx instead of
restore_regno.
(lra_rtx_hash): New.
* lra.c (initialize_lra_reg_info_element): Use restore_rtx instead
of restore_regno.
(lra_rtx_hash): Rename and move lra-remat.c::rtx_hash.
* lra-remat.c (rtx_hash): Rename and Move to lra.c.
* lra-spills.c (lra_final_code_change): Don't delete insn when the
next insn is USE with the same reg as the current insn source.
* lra-constraints.c (curr_insn_transform): Use restore_rtx instead
of restore_regno.
(lra_constraints_init): Call initiate_invariants.
(lra_constraints_finish): Call finish_invariants.
(struct invariant, invariant_t, invariant_ptr_t): New.
(const_invariant_ptr_t, invariants, invariants_pool): New.
(invariant_table, invariant_hash, invariant_eq_p): New.
(insert_invariant, initiate_invariants, finish_invariants): New.
(clear_invariants, invalid_invariant_regs): New.
(inherit_reload_reg, split_reg, fix_bb_live_info): Use restore_rtx
instead of restore_regno.
(invariant_p, process_invariant_for_inheritance): New.
(inherit_in_ebb): Implement invariant inheritance.
(lra_inheritance): Initialize and finalize invalid_invariant_regs.
(remove_inheritance_pseudos): Implement undoing invariant
inheritance.
(undo_optional_reloads, lra_undo_inheritance): Use restore_rtx
instead of restore_regno.
* lra-assigns.c (regno_live_length): New.
(reload_pseudo_compare_func): Use regno_live_length.
(assign_by_spills): Use restore_rtx instead of restore_regno.
(lra_assign): Ditto. Initiate regno_live_length.
2016-02-08 James Greenhalgh <james.greenhalgh@arm.com>
* config/aarch64/arm_neon.h (vminnm_f64): Add back missing 'f' from
......
......@@ -107,6 +107,10 @@ static bool former_reload_pseudo_spill_p;
lra_get_allocno_class. It is used to speed up the code. */
static enum reg_class *regno_allocno_class_array;
/* Array containing lengths of pseudo live ranges. It is used to
speed up the code. */
static int *regno_live_length;
/* Information about the thread to which a pseudo belongs. Threads are
a set of connected reload and inheritance pseudos with the same set of
available hard registers. Lone registers belong to their own threads. */
......@@ -227,6 +231,11 @@ reload_pseudo_compare_func (const void *v1p, const void *v2p)
/* Put pseudos from the thread nearby. */
if ((diff = regno_assign_info[r1].first - regno_assign_info[r2].first) != 0)
return diff;
/* Prefer pseudos with longer live ranges. It sets up better
prefered hard registers for the thread pseudos and decreases
register-register moves between the thread pseudos. */
if ((diff = regno_live_length[r2] - regno_live_length[r1]) != 0)
return diff;
/* If regs are equally good, sort by their numbers, so that the
results of qsort leave nothing to chance. */
return r1 - r2;
......@@ -1300,7 +1309,8 @@ find_all_spills_for (int regno)
static void
assign_by_spills (void)
{
int i, n, nfails, iter, regno, hard_regno, cost, restore_regno;
int i, n, nfails, iter, regno, hard_regno, cost;
rtx restore_rtx;
rtx_insn *insn;
bitmap_head changed_insns, do_not_assign_nonreload_pseudos;
unsigned int u, conflict_regno;
......@@ -1333,6 +1343,8 @@ assign_by_spills (void)
for (i = 0; i < n; i++)
{
regno = sorted_pseudos[i];
if (reg_renumber[regno] >= 0)
continue;
if (lra_dump_file != NULL)
fprintf (lra_dump_file, " Assigning to %d "
"(cl=%s, orig=%d, freq=%d, tfirst=%d, tfreq=%d)...\n",
......@@ -1471,21 +1483,25 @@ assign_by_spills (void)
such inheritance or split pseudos. */
bitmap_initialize (&do_not_assign_nonreload_pseudos, &reg_obstack);
EXECUTE_IF_SET_IN_BITMAP (&lra_inheritance_pseudos, 0, u, bi)
if ((restore_regno = lra_reg_info[u].restore_regno) >= 0
if ((restore_rtx = lra_reg_info[u].restore_rtx) != NULL_RTX
&& REG_P (restore_rtx)
&& reg_renumber[u] < 0
&& bitmap_bit_p (&lra_inheritance_pseudos, u))
bitmap_set_bit (&do_not_assign_nonreload_pseudos, restore_regno);
bitmap_set_bit (&do_not_assign_nonreload_pseudos, REGNO (restore_rtx));
EXECUTE_IF_SET_IN_BITMAP (&lra_split_regs, 0, u, bi)
if ((restore_regno = lra_reg_info[u].restore_regno) >= 0
if ((restore_rtx = lra_reg_info[u].restore_rtx) != NULL_RTX
&& reg_renumber[u] >= 0)
bitmap_set_bit (&do_not_assign_nonreload_pseudos, restore_regno);
{
lra_assert (REG_P (restore_rtx));
bitmap_set_bit (&do_not_assign_nonreload_pseudos, REGNO (restore_rtx));
}
for (n = 0, i = FIRST_PSEUDO_REGISTER; i < max_regno; i++)
if (((i < lra_constraint_new_regno_start
&& ! bitmap_bit_p (&do_not_assign_nonreload_pseudos, i))
|| (bitmap_bit_p (&lra_inheritance_pseudos, i)
&& lra_reg_info[i].restore_regno >= 0)
&& lra_reg_info[i].restore_rtx != NULL_RTX)
|| (bitmap_bit_p (&lra_split_regs, i)
&& lra_reg_info[i].restore_regno >= 0)
&& lra_reg_info[i].restore_rtx != NULL_RTX)
|| bitmap_bit_p (&lra_subreg_reload_pseudos, i)
|| bitmap_bit_p (&lra_optional_reload_pseudos, i))
&& reg_renumber[i] < 0 && lra_reg_info[i].nrefs != 0
......@@ -1513,7 +1529,7 @@ assign_by_spills (void)
enum reg_class spill_class;
if (targetm.spill_class == NULL
|| lra_reg_info[regno].restore_regno < 0
|| lra_reg_info[regno].restore_rtx == NULL_RTX
|| ! bitmap_bit_p (&lra_inheritance_pseudos, regno)
|| (spill_class
= ((enum reg_class)
......@@ -1570,8 +1586,17 @@ lra_assign (void)
sorted_pseudos = XNEWVEC (int, max_regno);
sorted_reload_pseudos = XNEWVEC (int, max_regno);
regno_allocno_class_array = XNEWVEC (enum reg_class, max_regno);
regno_live_length = XNEWVEC (int, max_regno);
for (i = FIRST_PSEUDO_REGISTER; i < max_regno; i++)
regno_allocno_class_array[i] = lra_get_allocno_class (i);
{
int l;
lra_live_range_t r;
regno_allocno_class_array[i] = lra_get_allocno_class (i);
for (l = 0, r = lra_reg_info[i].live_ranges; r != NULL; r = r->next)
l += r->finish - r->start + 1;
regno_live_length[i] = l;
}
former_reload_pseudo_spill_p = false;
init_regno_assign_info ();
bitmap_initialize (&all_spilled_pseudos, &reg_obstack);
......@@ -1594,7 +1619,7 @@ lra_assign (void)
EXECUTE_IF_SET_IN_BITMAP (&all_spilled_pseudos, 0, u, bi)
/* We ignore spilled pseudos created on last inheritance pass
because they will be removed. */
if (lra_reg_info[u].restore_regno < 0)
if (lra_reg_info[u].restore_rtx == NULL_RTX)
{
no_spills_p = false;
break;
......@@ -1613,6 +1638,7 @@ lra_assign (void)
}
bitmap_clear (&insns_to_process);
finish_regno_assign_info ();
free (regno_live_length);
free (regno_allocno_class_array);
free (sorted_pseudos);
free (sorted_reload_pseudos);
......@@ -1631,3 +1657,4 @@ lra_assign (void)
LRA_MAX_ASSIGNMENT_ITERATION_NUMBER);
return no_spills_p;
}
......@@ -99,9 +99,9 @@ struct lra_reg
*non-debug* insns. */
int nrefs, freq;
int last_reload;
/* Regno used to undo the inheritance. It can be non-zero only
between couple of inheritance and undo inheritance passes. */
int restore_regno;
/* rtx used to undo the inheritance. It can be non-null only
between subsequent inheritance and undo inheritance passes. */
rtx restore_rtx;
/* Value holding by register. If the pseudos have the same value
they do not conflict. */
int val;
......@@ -285,6 +285,7 @@ extern lra_insn_recog_data_t *lra_insn_recog_data;
extern int lra_curr_reload_num;
extern void lra_dump_bitmap_with_title (const char *, bitmap, int);
extern hashval_t lra_rtx_hash (rtx x);
extern void lra_push_insn (rtx_insn *);
extern void lra_push_insn_by_uid (unsigned int);
extern void lra_push_insn_and_update_insn_regno_info (rtx_insn *);
......
......@@ -167,92 +167,6 @@ get_remat_bb_data_by_index (int index)
/* Recursive hash function for RTL X. */
static hashval_t
rtx_hash (rtx x)
{
int i, j;
enum rtx_code code;
const char *fmt;
hashval_t val = 0;
if (x == 0)
return val;
code = GET_CODE (x);
val += (int) code + 4095;
/* Some RTL can be compared nonrecursively. */
switch (code)
{
case REG:
return val + REGNO (x);
case LABEL_REF:
return iterative_hash_object (XEXP (x, 0), val);
case SYMBOL_REF:
return iterative_hash_object (XSTR (x, 0), val);
case SCRATCH:
case CONST_DOUBLE:
case CONST_INT:
case CONST_VECTOR:
return val;
default:
break;
}
/* Hash the elements. */
fmt = GET_RTX_FORMAT (code);
for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
{
switch (fmt[i])
{
case 'w':
val += XWINT (x, i);
break;
case 'n':
case 'i':
val += XINT (x, i);
break;
case 'V':
case 'E':
val += XVECLEN (x, i);
for (j = 0; j < XVECLEN (x, i); j++)
val += rtx_hash (XVECEXP (x, i, j));
break;
case 'e':
val += rtx_hash (XEXP (x, i));
break;
case 'S':
case 's':
val += htab_hash_string (XSTR (x, i));
break;
case 'u':
case '0':
case 't':
break;
/* It is believed that rtx's at this level will never
contain anything but integers and other rtx's, except for
within LABEL_REFs and SYMBOL_REFs. */
default:
abort ();
}
}
return val;
}
/* Hash table for the candidates. Different insns (e.g. structurally
the same insns or even insns with different unused output regs) can
be represented by the same candidate in the table. */
......
......@@ -693,7 +693,7 @@ lra_final_code_change (void)
{
int i, hard_regno;
basic_block bb;
rtx_insn *insn, *curr;
rtx_insn *insn, *curr, *next_insn;
int max_regno = max_reg_num ();
for (i = FIRST_PSEUDO_REGISTER; i < max_regno; i++)
......@@ -727,7 +727,12 @@ lra_final_code_change (void)
if (NONJUMP_INSN_P (insn) && GET_CODE (pat) == SET
&& REG_P (SET_SRC (pat)) && REG_P (SET_DEST (pat))
&& REGNO (SET_SRC (pat)) == REGNO (SET_DEST (pat))
&& ! return_regno_p (REGNO (SET_SRC (pat))))
&& (! return_regno_p (REGNO (SET_SRC (pat)))
|| (next_insn = next_nondebug_insn (insn)) == NULL_RTX
|| ! INSN_P (next_insn)
|| GET_CODE (PATTERN (next_insn)) != USE
|| ! REG_P (XEXP (PATTERN (next_insn), 0))
|| REGNO (SET_SRC (pat)) != REGNO (XEXP (PATTERN (next_insn), 0))))
{
lra_invalidate_insn_data (insn);
delete_insn (insn);
......
......@@ -1286,7 +1286,7 @@ initialize_lra_reg_info_element (int i)
lra_reg_info[i].live_ranges = NULL;
lra_reg_info[i].nrefs = lra_reg_info[i].freq = 0;
lra_reg_info[i].last_reload = 0;
lra_reg_info[i].restore_regno = -1;
lra_reg_info[i].restore_rtx = NULL_RTX;
lra_reg_info[i].val = get_new_reg_value ();
lra_reg_info[i].offset = 0;
lra_reg_info[i].copies = NULL;
......@@ -1621,6 +1621,92 @@ lra_get_insn_regs (int uid)
/* Recursive hash function for RTL X. */
hashval_t
lra_rtx_hash (rtx x)
{
int i, j;
enum rtx_code code;
const char *fmt;
hashval_t val = 0;
if (x == 0)
return val;
code = GET_CODE (x);
val += (int) code + 4095;
/* Some RTL can be compared nonrecursively. */
switch (code)
{
case REG:
return val + REGNO (x);
case LABEL_REF:
return iterative_hash_object (XEXP (x, 0), val);
case SYMBOL_REF:
return iterative_hash_object (XSTR (x, 0), val);
case SCRATCH:
case CONST_DOUBLE:
case CONST_INT:
case CONST_VECTOR:
return val;
default:
break;
}
/* Hash the elements. */
fmt = GET_RTX_FORMAT (code);
for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
{
switch (fmt[i])
{
case 'w':
val += XWINT (x, i);
break;
case 'n':
case 'i':
val += XINT (x, i);
break;
case 'V':
case 'E':
val += XVECLEN (x, i);
for (j = 0; j < XVECLEN (x, i); j++)
val += lra_rtx_hash (XVECEXP (x, i, j));
break;
case 'e':
val += lra_rtx_hash (XEXP (x, i));
break;
case 'S':
case 's':
val += htab_hash_string (XSTR (x, i));
break;
case 'u':
case '0':
case 't':
break;
/* It is believed that rtx's at this level will never
contain anything but integers and other rtx's, except for
within LABEL_REFs and SYMBOL_REFs. */
default:
abort ();
}
}
return val;
}
/* This page contains code dealing with stack of the insns which
should be processed by the next constraint pass. */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment