Commit 2b778c9d by Vladimir Makarov Committed by Vladimir Makarov

re PR rtl-optimization/55342 ([LRA,x86] Non-optimal code for simple loop with LRA)

2013-07-05  Vladimir Makarov  <vmakarov@redhat.com>

	PR rtl-optimization/55342
	* lra-int.h (lra_subreg_reload_pseudos): New.
	* lra.c: Add undoing optional reloads to the block diagram.
	(lra_subreg_reload_pseudos): New.
	(lra_optional_reload_pseudos): Change comments.
	(lra): Init and clear lra_subreg_reload_pseudos.  Clear
	lra_optional_reload_pseudos after undo transformations.
	* lra-assigns.c (pseudo_prefix_title): New.
	(lra_setup_reg_renumber): Use it.
	(spill_for): Ditto.  Check subreg reload pseudos too.
	(assign_by_spills): Consider subreg reload pseudos too.
	* lra-constraints.c (simplify_operand_subreg): Use
	lra_subreg_reload_pseudos instead of lra_optional_reload_pseudos.
	(curr_insn_transform): Recognize and do optional reloads.
	(undo_optional_reloads): New.
	(lra_undo_inheritance): Call undo_optional_reloads.

2013-07-05  Vladimir Makarov  <vmakarov@redhat.com>

	PR rtl-optimization/55342
	* gcc.target/i386/pr55342.c: New.

From-SVN: r200723
parent 0ba6aff3
2013-07-05 Vladimir Makarov <vmakarov@redhat.com>
PR rtl-optimization/55342
* lra-int.h (lra_subreg_reload_pseudos): New.
* lra.c: Add undoing optional reloads to the block diagram.
(lra_subreg_reload_pseudos): New.
(lra_optional_reload_pseudos): Change comments.
(lra): Init and clear lra_subreg_reload_pseudos. Clear
lra_optional_reload_pseudos after undo transformations.
* lra-assigns.c (pseudo_prefix_title): New.
(lra_setup_reg_renumber): Use it.
(spill_for): Ditto. Check subreg reload pseudos too.
(assign_by_spills): Consider subreg reload pseudos too.
* lra-constraints.c (simplify_operand_subreg): Use
lra_subreg_reload_pseudos instead of lra_optional_reload_pseudos.
(curr_insn_transform): Recognize and do optional reloads.
(undo_optional_reloads): New.
(lra_undo_inheritance): Call undo_optional_reloads.
2013-07-05 Thomas Quinot <quinot@adacore.com> 2013-07-05 Thomas Quinot <quinot@adacore.com>
* tree-complex.c (expand_complex_operations_1): Fix typo. * tree-complex.c (expand_complex_operations_1): Fix typo.
......
...@@ -672,6 +672,19 @@ update_hard_regno_preference (int regno, int hard_regno, int div) ...@@ -672,6 +672,19 @@ update_hard_regno_preference (int regno, int hard_regno, int div)
} }
} }
/* Return prefix title for pseudo REGNO. */
static const char *
pseudo_prefix_title (int regno)
{
return
(regno < lra_constraint_new_regno_start ? ""
: bitmap_bit_p (&lra_inheritance_pseudos, regno) ? "inheritance "
: bitmap_bit_p (&lra_split_regs, regno) ? "split "
: bitmap_bit_p (&lra_optional_reload_pseudos, regno) ? "optional reload "
: bitmap_bit_p (&lra_subreg_reload_pseudos, regno) ? "subreg reload "
: "reload ");
}
/* Update REG_RENUMBER and other pseudo preferences by assignment of /* Update REG_RENUMBER and other pseudo preferences by assignment of
HARD_REGNO to pseudo REGNO and print about it if PRINT_P. */ HARD_REGNO to pseudo REGNO and print about it if PRINT_P. */
void void
...@@ -692,13 +705,7 @@ lra_setup_reg_renumber (int regno, int hard_regno, bool print_p) ...@@ -692,13 +705,7 @@ lra_setup_reg_renumber (int regno, int hard_regno, bool print_p)
lra_hard_reg_usage[hr + i] += lra_reg_info[regno].freq; lra_hard_reg_usage[hr + i] += lra_reg_info[regno].freq;
if (print_p && lra_dump_file != NULL) if (print_p && lra_dump_file != NULL)
fprintf (lra_dump_file, " Assign %d to %sr%d (freq=%d)\n", fprintf (lra_dump_file, " Assign %d to %sr%d (freq=%d)\n",
reg_renumber[regno], reg_renumber[regno], pseudo_prefix_title (regno),
regno < lra_constraint_new_regno_start
? ""
: bitmap_bit_p (&lra_inheritance_pseudos, regno) ? "inheritance "
: bitmap_bit_p (&lra_split_regs, regno) ? "split "
: bitmap_bit_p (&lra_optional_reload_pseudos, regno)
? "optional reload ": "reload ",
regno, lra_reg_info[regno].freq); regno, lra_reg_info[regno].freq);
if (hard_regno >= 0) if (hard_regno >= 0)
{ {
...@@ -844,6 +851,7 @@ spill_for (int regno, bitmap spilled_pseudo_bitmap) ...@@ -844,6 +851,7 @@ spill_for (int regno, bitmap spilled_pseudo_bitmap)
if ((int) spill_regno >= lra_constraint_new_regno_start if ((int) spill_regno >= lra_constraint_new_regno_start
&& ! bitmap_bit_p (&lra_inheritance_pseudos, spill_regno) && ! bitmap_bit_p (&lra_inheritance_pseudos, spill_regno)
&& ! bitmap_bit_p (&lra_split_regs, spill_regno) && ! bitmap_bit_p (&lra_split_regs, spill_regno)
&& ! bitmap_bit_p (&lra_subreg_reload_pseudos, spill_regno)
&& ! bitmap_bit_p (&lra_optional_reload_pseudos, spill_regno)) && ! bitmap_bit_p (&lra_optional_reload_pseudos, spill_regno))
goto fail; goto fail;
insn_pseudos_num = 0; insn_pseudos_num = 0;
...@@ -953,14 +961,7 @@ spill_for (int regno, bitmap spilled_pseudo_bitmap) ...@@ -953,14 +961,7 @@ spill_for (int regno, bitmap spilled_pseudo_bitmap)
{ {
if (lra_dump_file != NULL) if (lra_dump_file != NULL)
fprintf (lra_dump_file, " Spill %sr%d(hr=%d, freq=%d) for r%d\n", fprintf (lra_dump_file, " Spill %sr%d(hr=%d, freq=%d) for r%d\n",
((int) spill_regno < lra_constraint_new_regno_start pseudo_prefix_title (spill_regno),
? ""
: bitmap_bit_p (&lra_inheritance_pseudos, spill_regno)
? "inheritance "
: bitmap_bit_p (&lra_split_regs, spill_regno)
? "split "
: bitmap_bit_p (&lra_optional_reload_pseudos, spill_regno)
? "optional reload " : "reload "),
spill_regno, reg_renumber[spill_regno], spill_regno, reg_renumber[spill_regno],
lra_reg_info[spill_regno].freq, regno); lra_reg_info[spill_regno].freq, regno);
update_lives (spill_regno, true); update_lives (spill_regno, true);
...@@ -1176,6 +1177,7 @@ assign_by_spills (void) ...@@ -1176,6 +1177,7 @@ assign_by_spills (void)
bitmap_initialize (&changed_insns, &reg_obstack); bitmap_initialize (&changed_insns, &reg_obstack);
bitmap_initialize (&non_reload_pseudos, &reg_obstack); bitmap_initialize (&non_reload_pseudos, &reg_obstack);
bitmap_ior (&non_reload_pseudos, &lra_inheritance_pseudos, &lra_split_regs); bitmap_ior (&non_reload_pseudos, &lra_inheritance_pseudos, &lra_split_regs);
bitmap_ior_into (&non_reload_pseudos, &lra_subreg_reload_pseudos);
bitmap_ior_into (&non_reload_pseudos, &lra_optional_reload_pseudos); bitmap_ior_into (&non_reload_pseudos, &lra_optional_reload_pseudos);
for (iter = 0; iter <= 1; iter++) for (iter = 0; iter <= 1; iter++)
{ {
...@@ -1350,6 +1352,7 @@ assign_by_spills (void) ...@@ -1350,6 +1352,7 @@ assign_by_spills (void)
&& lra_reg_info[i].restore_regno >= 0) && lra_reg_info[i].restore_regno >= 0)
|| (bitmap_bit_p (&lra_split_regs, i) || (bitmap_bit_p (&lra_split_regs, i)
&& lra_reg_info[i].restore_regno >= 0) && lra_reg_info[i].restore_regno >= 0)
|| bitmap_bit_p (&lra_subreg_reload_pseudos, i)
|| bitmap_bit_p (&lra_optional_reload_pseudos, i)) || bitmap_bit_p (&lra_optional_reload_pseudos, i))
&& reg_renumber[i] < 0 && lra_reg_info[i].nrefs != 0 && reg_renumber[i] < 0 && lra_reg_info[i].nrefs != 0
&& regno_allocno_class_array[i] != NO_REGS) && regno_allocno_class_array[i] != NO_REGS)
......
...@@ -1228,7 +1228,7 @@ simplify_operand_subreg (int nop, enum machine_mode reg_mode) ...@@ -1228,7 +1228,7 @@ simplify_operand_subreg (int nop, enum machine_mode reg_mode)
if (get_reload_reg (curr_static_id->operand[nop].type, reg_mode, reg, if (get_reload_reg (curr_static_id->operand[nop].type, reg_mode, reg,
rclass, "subreg reg", &new_reg)) rclass, "subreg reg", &new_reg))
{ {
bitmap_set_bit (&lra_optional_reload_pseudos, REGNO (new_reg)); bitmap_set_bit (&lra_subreg_reload_pseudos, REGNO (new_reg));
if (type != OP_OUT if (type != OP_OUT
|| GET_MODE_SIZE (GET_MODE (reg)) > GET_MODE_SIZE (mode)) || GET_MODE_SIZE (GET_MODE (reg)) > GET_MODE_SIZE (mode))
{ {
...@@ -3183,6 +3183,8 @@ curr_insn_transform (void) ...@@ -3183,6 +3183,8 @@ curr_insn_transform (void)
for (i = 0; i < n_operands; i++) for (i = 0; i < n_operands; i++)
{ {
int regno;
bool optional_p = false;
rtx old, new_reg; rtx old, new_reg;
rtx op = *curr_id->operand_loc[i]; rtx op = *curr_id->operand_loc[i];
...@@ -3205,6 +3207,21 @@ curr_insn_transform (void) ...@@ -3205,6 +3207,21 @@ curr_insn_transform (void)
current one. */ current one. */
reg_renumber[regno] = -1; reg_renumber[regno] = -1;
} }
/* We can do an optional reload. If the pseudo got a hard
reg, we might improve the code through inheritance. If
it does not get a hard register we coalesce memory/memory
moves later. Ignore move insns to avoid cycling. */
if (! lra_simple_p
&& lra_undo_inheritance_iter < LRA_MAX_INHERITANCE_PASSES
&& goal_alt[i] != NO_REGS && REG_P (op)
&& (regno = REGNO (op)) >= FIRST_PSEUDO_REGISTER
&& reg_renumber[regno] < 0
&& (curr_insn_set == NULL_RTX
|| !(REG_P (SET_SRC (curr_insn_set))
|| MEM_P (SET_SRC (curr_insn_set))
|| GET_CODE (SET_SRC (curr_insn_set)) == SUBREG)))
optional_p = true;
else
continue; continue;
} }
...@@ -3328,6 +3345,21 @@ curr_insn_transform (void) ...@@ -3328,6 +3345,21 @@ curr_insn_transform (void)
/* We must generate code in any case when function /* We must generate code in any case when function
process_alt_operands decides that it is possible. */ process_alt_operands decides that it is possible. */
gcc_unreachable (); gcc_unreachable ();
if (optional_p)
{
lra_assert (REG_P (op));
regno = REGNO (op);
op = *curr_id->operand_loc[i]; /* Substitution. */
if (GET_CODE (op) == SUBREG)
op = SUBREG_REG (op);
gcc_assert (REG_P (op) && (int) REGNO (op) >= new_regno_start);
bitmap_set_bit (&lra_optional_reload_pseudos, REGNO (op));
lra_reg_info[REGNO (op)].restore_regno = regno;
if (lra_dump_file != NULL)
fprintf (lra_dump_file,
" Making reload reg %d for reg %d optional\n",
REGNO (op), regno);
}
} }
if (before != NULL_RTX || after != NULL_RTX if (before != NULL_RTX || after != NULL_RTX
|| max_regno_before != max_reg_num ()) || max_regno_before != max_reg_num ())
...@@ -5273,6 +5305,100 @@ remove_inheritance_pseudos (bitmap remove_pseudos) ...@@ -5273,6 +5305,100 @@ remove_inheritance_pseudos (bitmap remove_pseudos)
return change_p; return change_p;
} }
/* If optional reload pseudos failed to get a hard register or was not
inherited, it is better to remove optional reloads. We do this
transformation after undoing inheritance to figure out necessity to
remove optional reloads easier. Return true if we do any
change. */
static bool
undo_optional_reloads (void)
{
bool change_p;
unsigned int regno, uid;
bitmap_iterator bi, bi2;
rtx insn, set, src, dest;
bitmap_head removed_optional_reload_pseudos, insn_bitmap;
bitmap_initialize (&removed_optional_reload_pseudos, &reg_obstack);
bitmap_copy (&removed_optional_reload_pseudos, &lra_optional_reload_pseudos);
EXECUTE_IF_SET_IN_BITMAP (&lra_optional_reload_pseudos, 0, regno, bi)
if (reg_renumber[regno] >= 0)
EXECUTE_IF_SET_IN_BITMAP (&lra_reg_info[regno].insn_bitmap, 0, uid, bi2)
{
insn = lra_insn_recog_data[uid]->insn;
if ((set = single_set (insn)) == NULL_RTX)
continue;
src = SET_SRC (set);
dest = SET_DEST (set);
if (! REG_P (src) || ! REG_P (dest))
continue;
if ((REGNO (src) == regno
&& lra_reg_info[regno].restore_regno != (int) REGNO (dest))
|| (REGNO (dest) == regno
&& lra_reg_info[regno].restore_regno != (int) REGNO (src)))
{
/* Optional reload was inherited. Keep it. */
bitmap_clear_bit (&removed_optional_reload_pseudos, regno);
if (lra_dump_file != NULL)
fprintf (lra_dump_file, "Keep optional reload reg %d\n", regno);
}
}
change_p = ! bitmap_empty_p (&removed_optional_reload_pseudos);
bitmap_initialize (&insn_bitmap, &reg_obstack);
EXECUTE_IF_SET_IN_BITMAP (&removed_optional_reload_pseudos, 0, regno, bi)
{
if (lra_dump_file != NULL)
fprintf (lra_dump_file, "Remove optional reload reg %d\n", regno);
bitmap_copy (&insn_bitmap, &lra_reg_info[regno].insn_bitmap);
EXECUTE_IF_SET_IN_BITMAP (&insn_bitmap, 0, uid, bi2)
{
insn = lra_insn_recog_data[uid]->insn;
if ((set = single_set (insn)) != NULL_RTX)
{
src = SET_SRC (set);
dest = SET_DEST (set);
if (REG_P (src) && REG_P (dest)
&& ((REGNO (src) == regno
&& (lra_reg_info[regno].restore_regno
== (int) REGNO (dest)))
|| (REGNO (dest) == regno
&& (lra_reg_info[regno].restore_regno
== (int) REGNO (src)))))
{
if (lra_dump_file != NULL)
{
fprintf (lra_dump_file, " Deleting move %u\n",
INSN_UID (insn));
dump_insn_slim (lra_dump_file, insn);
}
lra_set_insn_deleted (insn);
continue;
}
/* We should not worry about generation memory-memory
moves here as if the corresponding inheritance did
not work (inheritance pseudo did not get a hard reg),
we remove the inheritance pseudo and the optional
reload. */
}
substitute_pseudo (&insn, regno,
regno_reg_rtx[lra_reg_info[regno].restore_regno]);
lra_update_insn_regno_info (insn);
if (lra_dump_file != NULL)
{
fprintf (lra_dump_file,
" Restoring original insn:\n");
dump_insn_slim (lra_dump_file, insn);
}
}
}
/* Clear restore_regnos. */
EXECUTE_IF_SET_IN_BITMAP (&lra_optional_reload_pseudos, 0, regno, bi)
lra_reg_info[regno].restore_regno = -1;
bitmap_clear (&insn_bitmap);
bitmap_clear (&removed_optional_reload_pseudos);
return change_p;
}
/* Entry function for undoing inheritance/split transformation. Return true /* Entry function for undoing inheritance/split transformation. Return true
if we did any RTL change in this pass. */ if we did any RTL change in this pass. */
bool bool
...@@ -5335,5 +5461,6 @@ lra_undo_inheritance (void) ...@@ -5335,5 +5461,6 @@ lra_undo_inheritance (void)
lra_reg_info[regno].restore_regno = -1; lra_reg_info[regno].restore_regno = -1;
EXECUTE_IF_SET_IN_BITMAP (&lra_split_regs, 0, regno, bi) EXECUTE_IF_SET_IN_BITMAP (&lra_split_regs, 0, regno, bi)
lra_reg_info[regno].restore_regno = -1; lra_reg_info[regno].restore_regno = -1;
change_p = undo_optional_reloads () || change_p;
return change_p; return change_p;
} }
...@@ -321,6 +321,7 @@ extern int lra_new_regno_start; ...@@ -321,6 +321,7 @@ extern int lra_new_regno_start;
extern int lra_constraint_new_regno_start; extern int lra_constraint_new_regno_start;
extern bitmap_head lra_inheritance_pseudos; extern bitmap_head lra_inheritance_pseudos;
extern bitmap_head lra_split_regs; extern bitmap_head lra_split_regs;
extern bitmap_head lra_subreg_reload_pseudos;
extern bitmap_head lra_optional_reload_pseudos; extern bitmap_head lra_optional_reload_pseudos;
extern int lra_constraint_new_insn_uid_start; extern int lra_constraint_new_insn_uid_start;
......
...@@ -43,13 +43,13 @@ along with GCC; see the file COPYING3. If not see ...@@ -43,13 +43,13 @@ along with GCC; see the file COPYING3. If not see
Here is block diagram of LRA passes: Here is block diagram of LRA passes:
--------------------- ------------------------
--------------- | Undo inheritance | --------------- --------------- | Undo inheritance for | ---------------
| Memory-memory | | for spilled pseudos)| | New (and old) | | Memory-memory | | spilled pseudos, | | New (and old) |
| move coalesce |<---| and splits (for |<-- | pseudos | | move coalesce |<---| splits for pseudos got |<-- | pseudos |
--------------- | pseudos got the | | assignment | --------------- | the same hard regs, | | assignment |
Start | | same hard regs) | --------------- Start | | and optional reloads | ---------------
| | --------------------- ^ | | ------------------------ ^
V | ---------------- | V | ---------------- |
----------- V | Update virtual | | ----------- V | Update virtual | |
| Remove |----> ------------>| register | | | Remove |----> ------------>| register | |
...@@ -2187,10 +2187,16 @@ bitmap_head lra_inheritance_pseudos; ...@@ -2187,10 +2187,16 @@ bitmap_head lra_inheritance_pseudos;
/* Split regnos before the new spill pass. */ /* Split regnos before the new spill pass. */
bitmap_head lra_split_regs; bitmap_head lra_split_regs;
/* Reload pseudo regnos before the new assign pass which still can be /* Reload pseudo regnos before the new assignmnet pass which still can
spilled after the assinment pass. */ be spilled after the assinment pass as memory is also accepted in
insns for the reload pseudos. */
bitmap_head lra_optional_reload_pseudos; bitmap_head lra_optional_reload_pseudos;
/* Pseudo regnos used for subreg reloads before the new assignment
pass. Such pseudos still can be spilled after the assinment
pass. */
bitmap_head lra_subreg_reload_pseudos;
/* First UID of insns generated before a new spill pass. */ /* First UID of insns generated before a new spill pass. */
int lra_constraint_new_insn_uid_start; int lra_constraint_new_insn_uid_start;
...@@ -2296,6 +2302,7 @@ lra (FILE *f) ...@@ -2296,6 +2302,7 @@ lra (FILE *f)
bitmap_initialize (&lra_inheritance_pseudos, &reg_obstack); bitmap_initialize (&lra_inheritance_pseudos, &reg_obstack);
bitmap_initialize (&lra_split_regs, &reg_obstack); bitmap_initialize (&lra_split_regs, &reg_obstack);
bitmap_initialize (&lra_optional_reload_pseudos, &reg_obstack); bitmap_initialize (&lra_optional_reload_pseudos, &reg_obstack);
bitmap_initialize (&lra_subreg_reload_pseudos, &reg_obstack);
live_p = false; live_p = false;
if (get_frame_size () != 0 && crtl->stack_alignment_needed) if (get_frame_size () != 0 && crtl->stack_alignment_needed)
/* If we have a stack frame, we must align it now. The stack size /* If we have a stack frame, we must align it now. The stack size
...@@ -2356,8 +2363,9 @@ lra (FILE *f) ...@@ -2356,8 +2363,9 @@ lra (FILE *f)
if (! live_p) if (! live_p)
lra_clear_live_ranges (); lra_clear_live_ranges ();
} }
}
bitmap_clear (&lra_optional_reload_pseudos); bitmap_clear (&lra_optional_reload_pseudos);
}
bitmap_clear (&lra_subreg_reload_pseudos);
bitmap_clear (&lra_inheritance_pseudos); bitmap_clear (&lra_inheritance_pseudos);
bitmap_clear (&lra_split_regs); bitmap_clear (&lra_split_regs);
if (! lra_need_for_spills_p ()) if (! lra_need_for_spills_p ())
......
2013-07-05 Vladimir Makarov <vmakarov@redhat.com>
PR rtl-optimization/55342
* gcc.target/i386/pr55342.c: New.
2013-07-05 Marcus Shawcroft <marcus.shawcroft@arm.com> 2013-07-05 Marcus Shawcroft <marcus.shawcroft@arm.com>
* gcc.dg/pr57518.c: Adjust scan-rtl-dump-not pattern. * gcc.dg/pr57518.c: Adjust scan-rtl-dump-not pattern.
......
/* PR rtl-optimization/55342 */
/* { dg-do compile } */
/* { dg-options "-O2" } */
/* { dg-final { scan-assembler-not "notb" } } */
void convert_image(unsigned char *in, unsigned char *out, int size) {
int i;
unsigned char * read = in,
* write = out;
for(i = 0; i < size; i++) {
unsigned char r = *read++;
unsigned char g = *read++;
unsigned char b = *read++;
unsigned char c, m, y, k, tmp;
c = 255 - r;
m = 255 - g;
y = 255 - b;
if (c < m)
k = ((c) > (y)?(y):(c));
else
k = ((m) > (y)?(y):(m));
*write++ = c - k;
*write++ = m - k;
*write++ = y - k;
*write++ = k;
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment