Commit 078134e5 by Georg-Johann Lay Committed by Georg-Johann Lay

New avt target pass to work around performance loss by PR fix.

gcc/
	New avt target pass to work around performance loss by PR fix.
	PR target/71676
	PR target/71678
	* config/avr/avr.md (casesi_<mode>_sequence) [qi,hi]: New insn.
	(*cmp<mode>) [qi,qq,uqq,hi,hq,uhq,ha,uha]: Rename to cmp<mode>3.
	* config/avr/predicates.md (extend_operator): New.
	* config/avr/avr-passes.def (avr_pass_casesi): Register new pass.
	* config/avr/avr-protos.h (avr_casei_sequence_check_operands)
	(make_avr_pass_casesi): New prototypes.
	* config/avr/avr.c (print-rtl.h): Include it.
	(pass_data avr_pass_data_casesi): Data for new pass.
	(avr_pass_casesi): New class implementing rtl_opt_pass .avr-casesi.
	(make_avr_pass_casesi, avr_parallel_insn_from_insns)
	(avr_is_casesi_sequence, avr_casei_sequence_check_operands)
	(avr_optimize_casesi): New functions.
gcc/testsuite/
	PR target/71676
	PR target/71678
	* gcc.target/avr/pr71676-2.c: New test.

From-SVN: r241504
parent c7212a15
2016-10-25 Georg-Johann Lay <avr@gjlay.de>
New avt target pass to work around performance loss by PR fix.
PR target/71676
PR target/71678
* config/avr/avr.md (casesi_<mode>_sequence) [qi,hi]: New insn.
(*cmp<mode>) [qi,qq,uqq,hi,hq,uhq,ha,uha]: Rename to cmp<mode>3.
* config/avr/predicates.md (extend_operator): New.
* config/avr/avr-passes.def (avr_pass_casesi): Register new pass.
* config/avr/avr-protos.h (avr_casei_sequence_check_operands)
(make_avr_pass_casesi): New prototypes.
* config/avr/avr.c (print-rtl.h): Include it.
(pass_data avr_pass_data_casesi): Data for new pass.
(avr_pass_casesi): New class implementing rtl_opt_pass .avr-casesi.
(make_avr_pass_casesi, avr_parallel_insn_from_insns)
(avr_is_casesi_sequence, avr_casei_sequence_check_operands)
(avr_optimize_casesi): New functions.
2016-10-25 Georg-Johann Lay <avr@gjlay.de>
Pitchumani Sivanupandi <pitchumani.sivanupandi@microchip.com>
PR target/71676
......
......@@ -17,6 +17,17 @@
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
/* casesi uses a SImode switch index which is quite costly as most code will
work on HImode or QImode. The following pass runs right after .expand and
tries to fix such situations by operating on the original mode. This
reduces code size and register pressure.
The assertion is that the code generated by casesi is unaltered and a
a sign-extend or zero-extend from QImode or HImode precedes the casesi
insns withaout any insns in between. */
INSERT_PASS_AFTER (pass_expand, 1, avr_pass_casesi);
/* This avr-specific pass (re)computes insn notes, in particular REG_DEAD
notes which are used by `avr.c::reg_unused_after' and branch offset
computations. These notes must be correct, i.e. there must be no
......
......@@ -129,6 +129,7 @@ extern bool avr_mem_memx_p (rtx);
extern bool avr_load_libgcc_p (rtx);
extern bool avr_xload_libgcc_p (machine_mode);
extern rtx avr_eval_addr_attrib (rtx x);
extern bool avr_casei_sequence_check_operands (rtx *xop);
static inline unsigned
regmask (machine_mode mode, unsigned regno)
......@@ -158,6 +159,7 @@ namespace gcc { class context; }
class rtl_opt_pass;
extern rtl_opt_pass *make_avr_pass_recompute_notes (gcc::context *);
extern rtl_opt_pass *make_avr_pass_casesi (gcc::context *);
/* From avr-log.c */
......
......@@ -48,6 +48,7 @@
#include "builtins.h"
#include "context.h"
#include "tree-pass.h"
#include "print-rtl.h"
/* This file should be included last. */
#include "target-def.h"
......@@ -334,6 +335,41 @@ public:
}
}; // avr_pass_recompute_notes
static const pass_data avr_pass_data_casesi =
{
RTL_PASS, // type
"", // name (will be patched)
OPTGROUP_NONE, // optinfo_flags
TV_DF_SCAN, // tv_id
0, // properties_required
0, // properties_provided
0, // properties_destroyed
0, // todo_flags_start
0 // todo_flags_finish
};
class avr_pass_casesi : public rtl_opt_pass
{
public:
avr_pass_casesi (gcc::context *ctxt, const char *name)
: rtl_opt_pass (avr_pass_data_casesi, ctxt)
{
this->name = name;
}
void avr_rest_of_handle_casesi (function*);
virtual bool gate (function*) { return optimize > 0; }
virtual unsigned int execute (function *func)
{
avr_rest_of_handle_casesi (func);
return 0;
}
}; // avr_pass_casesi
} // anon namespace
rtl_opt_pass*
......@@ -342,6 +378,320 @@ make_avr_pass_recompute_notes (gcc::context *ctxt)
return new avr_pass_recompute_notes (ctxt, "avr-notes-free-cfg");
}
rtl_opt_pass*
make_avr_pass_casesi (gcc::context *ctxt)
{
return new avr_pass_casesi (ctxt, "avr-casesi");
}
/* Make one parallel insn with all the patterns from insns i[0]..i[5]. */
static rtx_insn*
avr_parallel_insn_from_insns (rtx_insn *i[6])
{
rtvec vec = gen_rtvec (6, PATTERN (i[0]), PATTERN (i[1]), PATTERN (i[2]),
PATTERN (i[3]), PATTERN (i[4]), PATTERN (i[5]));
start_sequence();
emit (gen_rtx_PARALLEL (VOIDmode, vec));
rtx_insn *insn = get_insns();
end_sequence();
return insn;
}
/* Return true if we see an insn stream generated by casesi expander together
with an extension to SImode of the switch value.
If this is the case, fill in the insns from casesi to INSNS[1..5] and
the SImode extension to INSNS[0]. Moreover, extract the operands of
pattern casesi_<mode>_sequence forged from the sequence to recog_data. */
static bool
avr_is_casesi_sequence (basic_block bb, rtx_insn *insn, rtx_insn *insns[6])
{
rtx set_5, set_0;
/* A first and quick test for a casesi sequences. As a side effect of
the test, harvest respective insns to INSNS[0..5]. */
if (!(JUMP_P (insns[5] = insn)
// casesi is the only insn that comes up with UNSPEC_INDEX_JMP,
// hence the following test ensures that we are actually dealing
// with code from casesi.
&& (set_5 = single_set (insns[5]))
&& UNSPEC == GET_CODE (SET_SRC (set_5))
&& UNSPEC_INDEX_JMP == XINT (SET_SRC (set_5), 1)
&& (insns[4] = prev_real_insn (insns[5]))
&& (insns[3] = prev_real_insn (insns[4]))
&& (insns[2] = prev_real_insn (insns[3]))
&& (insns[1] = prev_real_insn (insns[2]))
// Insn prior to casesi.
&& (insns[0] = prev_real_insn (insns[1]))
&& (set_0 = single_set (insns[0]))
&& extend_operator (SET_SRC (set_0), SImode)))
{
return false;
}
if (dump_file)
{
fprintf (dump_file, ";; Sequence from casesi in "
"[bb %d]:\n\n", bb->index);
for (int i = 0; i < 6; i++)
print_rtl_single (dump_file, insns[i]);
}
/* We have to deal with quite some operands. Extracting them by hand
would be tedious, therefore wrap the insn patterns into a parallel,
run recog against it and then use insn extract to get the operands. */
rtx_insn *xinsn = avr_parallel_insn_from_insns (insns);
INSN_CODE (xinsn) = recog (PATTERN (xinsn), xinsn, NULL /* num_clobbers */);
/* Failing to recognize means that someone changed the casesi expander or
that some passes prior to this one performed some unexpected changes.
Gracefully drop such situations instead of aborting. */
if (INSN_CODE (xinsn) < 0)
{
if (dump_file)
fprintf (dump_file, ";; Sequence not recognized, giving up.\n\n");
return false;
}
gcc_assert (CODE_FOR_casesi_qi_sequence == INSN_CODE (xinsn)
|| CODE_FOR_casesi_hi_sequence == INSN_CODE (xinsn));
extract_insn (xinsn);
// Assert on the anatomy of xinsn's operands we are going to work with.
gcc_assert (11 == recog_data.n_operands);
gcc_assert (4 == recog_data.n_dups);
if (dump_file)
{
fprintf (dump_file, ";; Operands extracted:\n");
for (int i = 0; i < recog_data.n_operands; i++)
avr_fdump (dump_file, ";; $%d = %r\n", i, recog_data.operand[i]);
fprintf (dump_file, "\n");
}
return true;
}
/* Perform some extra checks on operands of casesi_<mode>_sequence.
Not all operand dependencies can be described by means of predicates.
This function performs left over checks and should always return true.
Returning false means that someone changed the casesi expander but did
not adjust casesi_<mode>_sequence. */
bool
avr_casei_sequence_check_operands (rtx *xop)
{
rtx sub_5 = NULL_RTX;
if (AVR_HAVE_EIJMP_EICALL
// The last clobber op of the tablejump.
&& xop[8] == all_regs_rtx[24])
{
// $6 is: (subreg:SI ($5) 0)
sub_5 = xop[6];
}
if (!AVR_HAVE_EIJMP_EICALL
// $6 is: (plus:HI (subreg:SI ($5) 0)
// (label_ref ($3)))
&& PLUS == GET_CODE (xop[6])
&& LABEL_REF == GET_CODE (XEXP (xop[6], 1))
&& rtx_equal_p (xop[3], XEXP (XEXP (xop[6], 1), 0))
// The last clobber op of the tablejump.
&& xop[8] == const0_rtx)
{
sub_5 = XEXP (xop[6], 0);
}
if (sub_5
&& SUBREG_P (sub_5)
&& 0 == SUBREG_BYTE (sub_5)
&& rtx_equal_p (xop[5], SUBREG_REG (sub_5)))
return true;
if (dump_file)
fprintf (dump_file, "\n;; Failed condition for casesi_<mode>_sequence\n\n");
return false;
}
/* INSNS[1..5] is a sequence as generated by casesi and INSNS[0] is an
extension of an 8-bit or 16-bit integer to SImode. XOP contains the
operands of INSNS as extracted by insn_extract from pattern
casesi_<mode>_sequence:
$0: SImode reg switch value as result of $9.
$1: Negative of smallest index in switch.
$2: Number of entries in switch.
$3: Label to table.
$4: Label if out-of-bounds.
$5: $0 + $1.
$6: 3-byte PC: subreg:HI ($5) + label_ref ($3)
2-byte PC: subreg:HI ($5)
$7: HI reg index into table (Z or pseudo)
$8: R24 or const0_rtx (to be clobbered)
$9: Extension to SImode of an 8-bit or 16-bit integer register $10.
$10: QImode or HImode register input of $9.
Try to optimize this sequence, i.e. use the original HImode / QImode
switch value instead of SImode. */
static void
avr_optimize_casesi (rtx_insn *insns[6], rtx *xop)
{
// Original mode of the switch value; this is QImode or HImode.
machine_mode mode = GET_MODE (xop[10]);
// How the original switch value was extended to SImode; this is
// SIGN_EXTEND or ZERO_EXTEND.
enum rtx_code code = GET_CODE (xop[9]);
// Lower index, upper index (plus one) and range of case calues.
HOST_WIDE_INT low_idx = -INTVAL (xop[1]);
HOST_WIDE_INT num_idx = INTVAL (xop[2]);
HOST_WIDE_INT hig_idx = low_idx + num_idx;
// Maximum ranges of (un)signed QImode resp. HImode.
int imin = QImode == mode ? INT8_MIN : INT16_MIN;
int imax = QImode == mode ? INT8_MAX : INT16_MAX;
unsigned umax = QImode == mode ? UINT8_MAX : UINT16_MAX;
// Testing the case range and whether it fits into the range of the
// (un)signed mode. This test should actually always pass because it
// makes no sense to have case values outside the mode range. Notice
// that case labels which are unreachable because they are outside the
// mode of the switch value (e.g. "case -1" for uint8_t) have already
// been thrown away by the middle-end.
if (SIGN_EXTEND == code
&& low_idx >= imin
&& hig_idx <= imax)
{
// ok
}
else if (ZERO_EXTEND == code
&& low_idx >= 0
&& (unsigned) hig_idx <= umax)
{
// ok
}
else
{
if (dump_file)
fprintf (dump_file, ";; Case ranges too big, giving up.\n\n");
return;
}
// Do normalization of switch value $10 and out-of-bound check in its
// original mode instead of in SImode. Use a newly created pseudo.
// This will replace insns[1..2].
start_sequence();
rtx_insn *seq1, *seq2, *last1, *last2;
rtx reg = copy_to_mode_reg (mode, xop[10]);
rtx (*gen_add)(rtx,rtx,rtx) = QImode == mode ? gen_addqi3 : gen_addhi3;
rtx (*gen_cmp)(rtx,rtx) = QImode == mode ? gen_cmpqi3 : gen_cmphi3;
emit_insn (gen_add (reg, reg, gen_int_mode (-low_idx, mode)));
emit_insn (gen_cmp (reg, gen_int_mode (num_idx, mode)));
seq1 = get_insns();
last1 = get_last_insn();
end_sequence();
emit_insn_before (seq1, insns[1]);
// After the out-of-bounds test and corresponding branch, use a
// 16-bit index. If QImode is used, extend it to HImode first.
// This will replace insns[4].
start_sequence();
if (QImode == mode)
reg = force_reg (HImode, gen_rtx_fmt_e (code, HImode, reg));
rtx pat_4 = AVR_3_BYTE_PC
? gen_movhi (xop[7], reg)
: gen_addhi3 (xop[7], reg, gen_rtx_LABEL_REF (VOIDmode, xop[3]));
emit_insn (pat_4);
seq2 = get_insns();
last2 = get_last_insn();
end_sequence();
emit_insn_after (seq2, insns[4]);
if (dump_file)
{
fprintf (dump_file, ";; New insns: ");
for (rtx_insn *insn = seq1; ; insn = NEXT_INSN (insn))
{
fprintf (dump_file, "%d, ", INSN_UID (insn));
if (insn == last1)
break;
}
for (rtx_insn *insn = seq2; ; insn = NEXT_INSN (insn))
{
fprintf (dump_file, "%d%s", INSN_UID (insn),
insn == last2 ? ".\n\n" : ", ");
if (insn == last2)
break;
}
fprintf (dump_file, ";; Deleting insns: %d, %d, %d.\n\n",
INSN_UID (insns[1]), INSN_UID (insns[2]), INSN_UID (insns[4]));
}
// Pseudodelete the SImode and subreg of SImode insns. We don't care
// about the extension insns[0]: Its result is now unused and other
// passes will clean it up.
SET_INSN_DELETED (insns[1]);
SET_INSN_DELETED (insns[2]);
SET_INSN_DELETED (insns[4]);
}
void
avr_pass_casesi::avr_rest_of_handle_casesi (function *func)
{
basic_block bb;
FOR_EACH_BB_FN (bb, func)
{
rtx_insn *insn, *insns[6];
FOR_BB_INSNS (bb, insn)
{
if (avr_is_casesi_sequence (bb, insn, insns))
{
avr_optimize_casesi (insns, recog_data.operand);
}
}
}
}
/* Set `avr_arch' as specified by `-mmcu='.
Return true on success. */
......
......@@ -4598,9 +4598,9 @@
(set_attr "length" "4")])
;; "*cmpqi"
;; "*cmpqq" "*cmpuqq"
(define_insn "*cmp<mode>"
;; "cmpqi3"
;; "cmpqq3" "cmpuqq3"
(define_insn "cmp<mode>3"
[(set (cc0)
(compare (match_operand:ALL1 0 "register_operand" "r ,r,d")
(match_operand:ALL1 1 "nonmemory_operand" "Y00,r,i")))]
......@@ -4640,10 +4640,10 @@
[(set_attr "cc" "compare")
(set_attr "length" "2")])
;; "*cmphi"
;; "*cmphq" "*cmpuhq"
;; "*cmpha" "*cmpuha"
(define_insn "*cmp<mode>"
;; "cmphi3"
;; "cmphq3" "cmpuhq3"
;; "cmpha3" "cmpuha3"
(define_insn "cmp<mode>3"
[(set (cc0)
(compare (match_operand:ALL2 0 "register_operand" "!w ,r ,r,d ,r ,d,r")
(match_operand:ALL2 1 "nonmemory_operand" "Y00,Y00,r,s ,s ,M,n Ynn")))
......@@ -5153,6 +5153,27 @@
(set_attr "cc" "clobber")])
;; FIXME: casesi comes up with an SImode switch value $0 which
;; is quite some overhead because most code would use HI or
;; even QI. We add an AVR specific pass .avr-casesi which
;; tries to recover from the superfluous extension to SImode.
;;
;; Using "tablejump" could be a way out, but this also does
;; not perform in a satisfying manner as the middle end will
;; already multiply the table index by 2. Note that this
;; multiplication is performed by libgcc's __tablejump2__.
;; The multiplication there, however, runs *after* the table
;; start (a byte address) has been added, not before it like
;; "tablejump" will do.
;;
;; The preferred solution would be to let the middle ends pass
;; down information on the index as an additional casesi operand.
;;
;; If this expander is changed, you'll likely have to go through
;; "casesi_<mode>_sequence" (used to recog + extract casesi
;; sequences in pass .avr-casesi) and propagate all adjustments
;; also to that pattern and the code of the extra pass.
(define_expand "casesi"
[(parallel [(set (match_dup 5)
(plus:SI (match_operand:SI 0 "register_operand")
......@@ -5198,6 +5219,49 @@
})
;; This insn is used only for easy operand extraction.
;; The elements must match an extension to SImode plus
;; a sequence generated by casesi above.
;; "casesi_qi_sequence"
;; "casesi_hi_sequence"
(define_insn "casesi_<mode>_sequence"
[(set (match_operand:SI 0 "register_operand")
(match_operator:SI 9 "extend_operator"
[(match_operand:QIHI 10 "register_operand")]))
;; What follows is a matcher for code from casesi.
;; We keep the same operand numbering (except for $9 and $10
;; which don't appear in casesi).
(parallel [(set (match_operand:SI 5 "register_operand")
(plus:SI (match_dup 0)
(match_operand:SI 1 "const_int_operand")))
(clobber (scratch:QI))])
(parallel [(set (cc0)
(compare (match_dup 5)
(match_operand:SI 2 "const_int_operand")))
(clobber (scratch:QI))])
(set (pc)
(if_then_else (gtu (cc0)
(const_int 0))
(label_ref (match_operand 4))
(pc)))
(set (match_operand:HI 7 "register_operand")
(match_operand:HI 6))
(parallel [(set (pc)
(unspec:HI [(match_dup 7)] UNSPEC_INDEX_JMP))
(use (label_ref (match_operand 3)))
(clobber (match_dup 7))
(clobber (match_operand:QI 8))])]
"optimize
&& avr_casei_sequence_check_operands (operands)"
{ gcc_unreachable(); }
)
;; ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
;; This instruction sets Z flag
......@@ -5550,7 +5614,7 @@
;; This peephole avoids code like
;;
;; TST Rn ; *cmpqi
;; TST Rn ; cmpqi3
;; BREQ .+2 ; branch
;; RJMP .Lm
;;
......
......@@ -178,6 +178,10 @@
(and (match_operand 0 "comparison_operator")
(not (match_code "gt,gtu,le,leu"))))
;; True for SIGN_EXTEND, ZERO_EXTEND.
(define_predicate "extend_operator"
(match_code "sign_extend,zero_extend"))
;; Return true if OP is a valid call operand.
(define_predicate "call_insn_operand"
(and (match_code "mem")
......
2016-10-25 Georg-Johann Lay <avr@gjlay.de>
PR target/71676
PR target/71678
* gcc.target/avr/pr71676-2.c: New test.
2016-10-25 Georg-Johann Lay <avr@gjlay.de>
Pitchumani Sivanupandi <pitchumani.sivanupandi@microchip.com>
PR target/71676
......
/* { dg-do compile } */
/* { dg-options "-dp -w -Os -fno-tree-switch-conversion" } */
#define MK_FUN(NAME, TYP, V) \
unsigned char __attribute__((noinline,noclone)) \
select_## NAME (TYP x, unsigned char y) \
{ \
switch (x) \
{ \
case V + 0: return 0 + y; \
case V + 1: return 1; \
case V + 2: return 2 + y; \
case V + 3: return 3; \
case V + 4: return 4 + y; \
case V + 5: return 5; \
case V + 6: return 6 + y; \
case V + 7: return 7; \
case V + 8: return 8 + y; \
case V + 9: return 9; \
case V + 10: return 10 + y; \
case V + 11: return 11; \
case V + 12: return 12 + y; \
case V + 13: return 13; \
case V + 14: return 14 + y; \
case V + 15: return 15; \
} \
return x; \
}
MK_FUN (0_s8, signed char, 0)
MK_FUN (0_u8, unsigned char, 0)
MK_FUN (0_s16, signed int, 0)
MK_FUN (0_u16, unsigned int, 0)
MK_FUN (m4_s8, signed char, -4)
MK_FUN (m4_u8, unsigned char, -4)
MK_FUN (m4_s16, signed int, -4)
MK_FUN (m4_u16, unsigned int, -4)
MK_FUN (4_s8, signed char, 4)
MK_FUN (4_u8, unsigned char, 4)
MK_FUN (4_s16, signed int, 4)
MK_FUN (4_u16, unsigned int, 4)
/* { dg-final { scan-assembler-not "extendqisi" } } */
/* { dg-final { scan-assembler-not "extendhisi" } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment