Commit b4eca9c8 by Oleg Endo

re PR target/51244 ([SH] Inefficient conditional branch and code around T bit)

	PR target/51244
	* config/sh/sh.md (negsi_cond, negdi_cond, stack_protect_test): Remove
	get_t_reg_rtx when invoking gen_branch_true or gen_branch_false.
	(*zero_extend<mode>si2_compact): Convert to insn_and_split.  Convert
	zero extensions of T bit stores to reg moves in splitter.  Remove
	obsolete unnamed peephole2 that caught zero extensions after negc T bit
	stores.
	(*branch_true_eq, *branch_false_ne): Delete.
	(branch_true, branch_false): Convert insn to expander.  Move actual
	insn logic to...
	(*cbranch_t): ...this new insn_and_split.  Try to find preceding
	redundant T bit stores and tests and combine them with the conditional
	branch if possible in the splitter.
	(movrt_xor, *movt_movrt): New insn_and_split.
	* config/sh/predicates.md (cbranch_treg_value): New predicate.
	* config/sh/sh-protos.h (sh_eval_treg_value): Forward declare...
	* config/sh/sh.c (sh_eval_treg_value): ...this new function.
	(expand_cbranchsi4, expand_cbranchdi4): Remove get_t_reg_rtx
	when invoking gen_branch_true or gen_branch_false.

	PR target/51244
	* gcc.target/sh/pr51244-13.c: New.
	* gcc.target/sh/pr51244-14.c: New.
	* gcc.target/sh/pr51244-15.c: New.
	* gcc.target/sh/pr51244-16.c: New.

From-SVN: r192387
parent 76a2a3f7
2012-10-12 Oleg Endo <olegendo@gcc.gnu.org>
PR target/51244
* config/sh/sh.md (negsi_cond, negdi_cond, stack_protect_test): Remove
get_t_reg_rtx when invoking gen_branch_true or gen_branch_false.
(*zero_extend<mode>si2_compact): Convert to insn_and_split. Convert
zero extensions of T bit stores to reg moves in splitter. Remove
obsolete unnamed peephole2 that caught zero extensions after negc T bit
stores.
(*branch_true_eq, *branch_false_ne): Delete.
(branch_true, branch_false): Convert insn to expander. Move actual
insn logic to...
(*cbranch_t): ...this new insn_and_split. Try to find preceding
redundant T bit stores and tests and combine them with the conditional
branch if possible in the splitter.
(movrt_xor, *movt_movrt): New insn_and_split.
* config/sh/predicates.md (cbranch_treg_value): New predicate.
* config/sh/sh-protos.h (sh_eval_treg_value): Forward declare...
* config/sh/sh.c (sh_eval_treg_value): ...this new function.
(expand_cbranchsi4, expand_cbranchdi4): Remove get_t_reg_rtx
when invoking gen_branch_true or gen_branch_false.
2012-10-11 Uros Bizjak <ubizjak@gmail.com>
* config/alpha/alpha.md (IMODE): New mode iterator.
......
......@@ -1048,6 +1048,14 @@
}
})
;; A predicate that returns true if OP is a valid construct around the T bit
;; that can be used as an operand for conditional branches.
(define_predicate "cbranch_treg_value"
(match_code "eq,ne,reg,subreg,xor,sign_extend,zero_extend")
{
return sh_eval_treg_value (op) >= 0;
})
;; Returns true of OP is arith_reg_operand or t_reg_operand.
(define_predicate "arith_reg_or_t_reg_operand"
(ior (match_operand 0 "arith_reg_operand")
......
......@@ -162,6 +162,7 @@ extern bool sh_cfun_trap_exit_p (void);
extern void sh_canonicalize_comparison (enum rtx_code&, rtx&, rtx&,
enum machine_mode mode = VOIDmode);
extern rtx sh_find_equiv_gbr_addr (rtx cur_insn, rtx mem);
extern int sh_eval_treg_value (rtx op);
#endif /* RTX_CODE */
extern void sh_cpu_cpp_builtins (cpp_reader* pfile);
......
......@@ -2059,7 +2059,7 @@ prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
void
expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
{
rtx (*branch_expander) (rtx, rtx) = gen_branch_true;
rtx (*branch_expander) (rtx) = gen_branch_true;
comparison = prepare_cbranch_operands (operands, SImode, comparison);
switch (comparison)
{
......@@ -2071,7 +2071,7 @@ expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
emit_insn (gen_rtx_SET (VOIDmode, get_t_reg_rtx (),
gen_rtx_fmt_ee (comparison, SImode,
operands[1], operands[2])));
rtx jump = emit_jump_insn (branch_expander (operands[3], get_t_reg_rtx ()));
rtx jump = emit_jump_insn (branch_expander (operands[3]));
if (probability >= 0)
add_reg_note (jump, REG_BR_PROB, GEN_INT (probability));
}
......@@ -2123,7 +2123,7 @@ expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
if (TARGET_CMPEQDI_T)
{
emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
emit_jump_insn (gen_branch_true (operands[3], get_t_reg_rtx ()));
emit_jump_insn (gen_branch_true (operands[3]));
return true;
}
msw_skip = NE;
......@@ -2150,7 +2150,7 @@ expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
if (TARGET_CMPEQDI_T)
{
emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
emit_jump_insn (gen_branch_false (operands[3], get_t_reg_rtx ()));
emit_jump_insn (gen_branch_false (operands[3]));
return true;
}
msw_taken = NE;
......@@ -2281,6 +2281,43 @@ expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
return true;
}
/* Given an operand, return 1 if the evaluated operand plugged into an
if_then_else will result in a branch_true, 0 if branch_false, or
-1 if neither nor applies. The truth table goes like this:
op | cmpval | code | result
---------+--------+---------+--------------------
T (0) | 0 | EQ (1) | 0 = 0 ^ (0 == 1)
T (0) | 1 | EQ (1) | 1 = 0 ^ (1 == 1)
T (0) | 0 | NE (0) | 1 = 0 ^ (0 == 0)
T (0) | 1 | NE (0) | 0 = 0 ^ (1 == 0)
!T (1) | 0 | EQ (1) | 1 = 1 ^ (0 == 1)
!T (1) | 1 | EQ (1) | 0 = 1 ^ (1 == 1)
!T (1) | 0 | NE (0) | 0 = 1 ^ (0 == 0)
!T (1) | 1 | NE (0) | 1 = 1 ^ (1 == 0) */
int
sh_eval_treg_value (rtx op)
{
enum rtx_code code = GET_CODE (op);
if ((code != EQ && code != NE) || !CONST_INT_P (XEXP (op, 1)))
return -1;
int cmpop = code == EQ ? 1 : 0;
int cmpval = INTVAL (XEXP (op, 1));
if (cmpval != 0 && cmpval != 1)
return -1;
int t;
if (t_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
t = 0;
else if (negt_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
t = 1;
else
return -1;
return t ^ (cmpval == cmpop);
}
/* Emit INSN, possibly in a PARALLEL with an USE of fpscr for SH4. */
static void
......@@ -2485,9 +2522,9 @@ sh_emit_compare_and_branch (rtx *operands, enum machine_mode mode)
sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
if (branch_code == code)
emit_jump_insn (gen_branch_true (operands[3], get_t_reg_rtx ()));
emit_jump_insn (gen_branch_true (operands[3]));
else
emit_jump_insn (gen_branch_false (operands[3], get_t_reg_rtx ()));
emit_jump_insn (gen_branch_false (operands[3]));
}
void
......@@ -2521,7 +2558,7 @@ sh_emit_compare_and_set (rtx *operands, enum machine_mode mode)
{
lab = gen_label_rtx ();
sh_emit_scc_to_t (EQ, op0, op1);
emit_jump_insn (gen_branch_true (lab, get_t_reg_rtx ()));
emit_jump_insn (gen_branch_true (lab));
code = GT;
}
else
......
2012-10-12 Oleg Endo <olegendo@gcc.gnu.org>
PR target/51244
* gcc.target/sh/pr51244-13.c: New.
* gcc.target/sh/pr51244-14.c: New.
* gcc.target/sh/pr51244-15.c: New.
* gcc.target/sh/pr51244-16.c: New.
2012-10-11 Paolo Carlini <paolo.carlini@oracle.com>
PR c++/51878
......
/* This is a case extracted from CSiBE which contained the following
sequence:
shll r0
movt r0
tst r0,r0
bf .L11
where the 'tst r0,r0' before the branch can be omitted by inverting the
branch condition. The tested function contains two other tst insns. If
everything goes as expected we will be seeing only those other two tst
insns. */
/* { dg-do compile { target "sh*-*-*" } } */
/* { dg-options "-O2" } */
/* { dg-skip-if "" { "sh*-*-*" } { "-m5*" } { "" } } */
/* { dg-final { scan-assembler-times "tst" 2 } } */
static __inline__ int
__test_bit (unsigned long nr, volatile void * addr)
{
/* This is on purpose. */
int oldbit;
return oldbit & 1;
}
static __inline__ int
__constant_test_bit (unsigned long nr, volatile void * addr)
{
return (((volatile char *) addr)[(nr>>3)^7] & (1<<(nr&7))) != 0;
}
struct list_head
{
struct list_head *next, *prev;
};
static inline void
__list_del (struct list_head *prev, struct list_head *next)
{
next->prev = prev;
prev->next = next;
}
static inline void
list_del (struct list_head *entry)
{
__list_del(entry->prev, entry->next);
entry->next = 0;
entry->prev = 0;
}
extern int nr_active_pages;
extern int nr_inactive_pages;
extern struct list_head active_list;
typedef struct page
{
unsigned long flags;
struct list_head lru;
} mem_map_t;
void
activate_page_nolock (struct page * page)
{
if ((__builtin_constant_p((6))
? __constant_test_bit((6),(&(page)->flags))
: __test_bit((6),(&(page)->flags)) )
&& !(__builtin_constant_p((7))
? __constant_test_bit((7),(&(page)->flags))
: __test_bit((7),(&(page)->flags)) ))
{
list_del(&(page)->lru);
nr_inactive_pages--;
if (!(__builtin_constant_p(6) ? __constant_test_bit((6),(&(page)->flags))
: __test_bit((6),(&(page)->flags))))
printk("", "", 43);
if ((__builtin_constant_p(7) ? __constant_test_bit((7),(&(page)->flags))
: __test_bit((7),(&(page)->flags))))
printk("", "", 43);
(__builtin_constant_p(7) ? __constant_set_bit((7),(&(page)->flags))
: __set_bit((7),(&(page)->flags)) );
list_add(&(page)->lru, &active_list);
nr_active_pages++;
}
}
/* This is a case extracted from CSiBE which would sometimes contain the
following sequence:
cmp/eq r12,r13
movt r0
xor #1,r0
extu.b r0,r0
movt r3
tst r0,r0
bf/s .L35
where the negated T bit store did not combine properly. Since there are
other movt insns we only check for the xor and the extu. */
/* { dg-do compile { target "sh*-*-*" } } */
/* { dg-options "-O2" } */
/* { dg-skip-if "" { "sh*-*-*" } { "-m5*" } { "" } } */
/* { dg-final { scan-assembler-not "xor|extu" } } */
typedef struct transaction_s transaction_t;
struct journal_head
{
transaction_t * b_transaction;
struct journal_head *b_cpnext, *b_cpprev;
};
struct transaction_s
{
struct journal_head * t_checkpoint_list;
transaction_t *t_cpnext, *t_cpprev;
};
struct journal_s
{
transaction_t * j_checkpoint_transactions;
unsigned long j_first, j_last;
};
typedef struct journal_s journal_t;
extern int __try_to_free_cp_buf (struct journal_head *jh);
extern int __cleanup_transaction (journal_t *journal, transaction_t *transaction);
extern void __flush_batch (void **bhs, int *batch_count);
extern void* jh2bh (void*);
static int
__flush_buffer (journal_t *journal, struct journal_head *jh,
void **bhs, int *batch_count, int *drop_count)
{
void *bh = jh2bh (jh);
int ret = 0;
if (bh)
{
bhs[*batch_count] = bh;
(*batch_count)++;
if (*batch_count == 64)
ret = 1;
}
else
{
int last_buffer = 0;
if (jh->b_cpnext == jh)
last_buffer = 1;
if (__try_to_free_cp_buf (jh))
{
(*drop_count)++;
ret = last_buffer;
}
}
return ret;
}
int
log_do_checkpoint (journal_t *journal, int nblocks)
{
transaction_t *transaction, *last_transaction, *next_transaction;
int batch_count = 0;
void *bhs[64];
repeat:
transaction = journal->j_checkpoint_transactions;
if (transaction == ((void *)0))
return 0;
last_transaction = transaction->t_cpprev;
next_transaction = transaction;
do
{
struct journal_head *jh, *last_jh, *next_jh;
int drop_count = 0;
int cleanup_ret, retry = 0;
transaction = next_transaction;
next_transaction = transaction->t_cpnext;
jh = transaction->t_checkpoint_list;
last_jh = jh->b_cpprev;
next_jh = jh;
do
{
jh = next_jh;
next_jh = jh->b_cpnext;
retry = __flush_buffer(journal, jh, bhs, &batch_count, &drop_count);
} while (jh != last_jh && !retry);
if (retry)
goto repeat;
cleanup_ret = __cleanup_transaction(journal, transaction);
goto repeat;
} while (transaction != last_transaction);
}
/* Check that the redundant test removal code in the *cbranch_t split works
as expected on non-SH2A targets. Because on SH2A the movrt instruction
is used, this test is re-used and checked differently in pr51244-16.c. */
/* { dg-do compile { target "sh*-*-*" } } */
/* { dg-options "-O2" } */
/* { dg-skip-if "" { "sh*-*-*" } { "-m5*" "-m2a*" } { "" } } */
/* { dg-final { scan-assembler-times "tst" 6 } } */
/* { dg-final { scan-assembler-times "movt" 6 } } */
/* { dg-final { scan-assembler-times "xor" 3 } } */
/* { dg-final { scan-assembler-not "extu|exts|negc" } } */
typedef char bool;
int
test_0 (int a, int b, int c, int* d)
{
/* non SH2A: 1x tst, 1x movt, 1x xor
SH2A: 1x tst, 1x movrt */
bool x = a == 0;
d[2] = !x;
return x ? b : c;
}
int
test_1 (int a, int b, int c, int* d)
{
/* 1x tst, 1x movt */
bool x = a != 0;
d[2] = !x;
return x ? b : c;
}
int
test_2 (int a, int b, int c, char* d)
{
/* Check that there is no sign/zero-extension before the store.
non SH2A: 1x tst, 1x movt, 1x xor
SH2A: 1x tst, 1x movrt */
bool x = a == 0;
d[2] = !x;
return x ? b : c;
}
int
test_3 (int a, int b, int c, char* d)
{
/* Check that there is no sign/zero-extension before the store.
1x tst, 1x movt */
bool x = a != 0;
d[2] = !x;
return x ? b : c;
}
int
test_4 (int a, int b, int c, char* d)
{
/* 1x tst, 1x movt */
bool x = a != 0;
d[2] = !x;
return !x ? b : c;
}
int
test_5 (int a, int b, int c, char* d)
{
/* non SH2A: 1x tst, 1x movt, 1x xor
SH2A: 1x tst, 1x movrt */
bool x = a == 0;
d[2] = !x;
return !x ? b : c;
}
/* Check that the redundant test removal code in the *cbranch_t split works
as expected on SH2A targets. */
/* { dg-do compile { target "sh*-*-*" } } */
/* { dg-options "-O2" } */
/* { dg-skip-if "" { "sh*-*-*" } { "*" } { "-m2a*" } } */
/* { dg-final { scan-assembler-times "tst" 6 } } */
/* { dg-final { scan-assembler-times "movt" 3 } } */
/* { dg-final { scan-assembler-times "movrt" 3 } } */
/* { dg-final { scan-assembler-not "extu|exts|negc" } } */
#include "pr51244-15.c"
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment