Commit d400fda3 by Richard Henderson Committed by Richard Henderson

aarch64: Improve cas generation

Do not zero-extend the input to the cas for subword operations;
instead, use the appropriate zero-extending compare insns.
Correct the predicates and constraints for immediate expected operand.

	* config/aarch64/aarch64.c (aarch64_gen_compare_reg_maybe_ze): New.
	(aarch64_split_compare_and_swap): Use it.
	(aarch64_expand_compare_and_swap): Likewise.  Remove convert_modes;
	test oldval against the proper predicate.
	* config/aarch64/atomics.md (@atomic_compare_and_swap<ALLI>):
	Use nonmemory_operand for expected.
	(cas_short_expected_pred): New.
	(@aarch64_compare_and_swap<SHORT>): Use it; use "rn" not "rI" to match.
	(@aarch64_compare_and_swap<GPI>): Use "rn" not "rI" for expected.
	* config/aarch64/predicates.md (aarch64_plushi_immediate): New.
	(aarch64_plushi_operand): New.

From-SVN: r265657
parent 77f33f44
2018-10-31 Richard Henderson <richard.henderson@linaro.org> 2018-10-31 Richard Henderson <richard.henderson@linaro.org>
* config/aarch64/aarch64.c (aarch64_gen_compare_reg_maybe_ze): New.
(aarch64_split_compare_and_swap): Use it.
(aarch64_expand_compare_and_swap): Likewise. Remove convert_modes;
test oldval against the proper predicate.
* config/aarch64/atomics.md (@atomic_compare_and_swap<ALLI>):
Use nonmemory_operand for expected.
(cas_short_expected_pred): New.
(@aarch64_compare_and_swap<SHORT>): Use it; use "rn" not "rI" to match.
(@aarch64_compare_and_swap<GPI>): Use "rn" not "rI" for expected.
* config/aarch64/predicates.md (aarch64_plushi_immediate): New.
(aarch64_plushi_operand): New.
* config/aarch64/aarch64.c (aarch64_expand_compare_and_swap): * config/aarch64/aarch64.c (aarch64_expand_compare_and_swap):
Force oldval into the rval register for TARGET_LSE; emit the compare Force oldval into the rval register for TARGET_LSE; emit the compare
during initial expansion so that it may be deleted if unused. during initial expansion so that it may be deleted if unused.
...@@ -1614,6 +1614,33 @@ aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y) ...@@ -1614,6 +1614,33 @@ aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
return cc_reg; return cc_reg;
} }
/* Similarly, but maybe zero-extend Y if Y_MODE < SImode. */
static rtx
aarch64_gen_compare_reg_maybe_ze (RTX_CODE code, rtx x, rtx y,
machine_mode y_mode)
{
if (y_mode == E_QImode || y_mode == E_HImode)
{
if (CONST_INT_P (y))
y = GEN_INT (INTVAL (y) & GET_MODE_MASK (y_mode));
else
{
rtx t, cc_reg;
machine_mode cc_mode;
t = gen_rtx_ZERO_EXTEND (SImode, y);
t = gen_rtx_COMPARE (CC_SWPmode, t, x);
cc_mode = CC_SWPmode;
cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM);
emit_set_insn (cc_reg, t);
return cc_reg;
}
}
return aarch64_gen_compare_reg (code, x, y);
}
/* Build the SYMBOL_REF for __tls_get_addr. */ /* Build the SYMBOL_REF for __tls_get_addr. */
static GTY(()) rtx tls_get_addr_libfunc; static GTY(()) rtx tls_get_addr_libfunc;
...@@ -14575,8 +14602,8 @@ aarch64_emit_unlikely_jump (rtx insn) ...@@ -14575,8 +14602,8 @@ aarch64_emit_unlikely_jump (rtx insn)
void void
aarch64_expand_compare_and_swap (rtx operands[]) aarch64_expand_compare_and_swap (rtx operands[])
{ {
rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x; rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x, cc_reg;
machine_mode mode, cmp_mode; machine_mode mode, r_mode;
bval = operands[0]; bval = operands[0];
rval = operands[1]; rval = operands[1];
...@@ -14587,36 +14614,19 @@ aarch64_expand_compare_and_swap (rtx operands[]) ...@@ -14587,36 +14614,19 @@ aarch64_expand_compare_and_swap (rtx operands[])
mod_s = operands[6]; mod_s = operands[6];
mod_f = operands[7]; mod_f = operands[7];
mode = GET_MODE (mem); mode = GET_MODE (mem);
cmp_mode = mode;
/* Normally the succ memory model must be stronger than fail, but in the /* Normally the succ memory model must be stronger than fail, but in the
unlikely event of fail being ACQUIRE and succ being RELEASE we need to unlikely event of fail being ACQUIRE and succ being RELEASE we need to
promote succ to ACQ_REL so that we don't lose the acquire semantics. */ promote succ to ACQ_REL so that we don't lose the acquire semantics. */
if (is_mm_acquire (memmodel_from_int (INTVAL (mod_f))) if (is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
&& is_mm_release (memmodel_from_int (INTVAL (mod_s)))) && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
mod_s = GEN_INT (MEMMODEL_ACQ_REL); mod_s = GEN_INT (MEMMODEL_ACQ_REL);
switch (mode) r_mode = mode;
if (mode == QImode || mode == HImode)
{ {
case E_QImode: r_mode = SImode;
case E_HImode: rval = gen_reg_rtx (r_mode);
/* For short modes, we're going to perform the comparison in SImode,
so do the zero-extension now. */
cmp_mode = SImode;
rval = gen_reg_rtx (SImode);
oldval = convert_modes (SImode, mode, oldval, true);
/* Fall through. */
case E_SImode:
case E_DImode:
/* Force the value into a register if needed. */
if (!aarch64_plus_operand (oldval, mode))
oldval = force_reg (cmp_mode, oldval);
break;
default:
gcc_unreachable ();
} }
if (TARGET_LSE) if (TARGET_LSE)
...@@ -14624,26 +14634,32 @@ aarch64_expand_compare_and_swap (rtx operands[]) ...@@ -14624,26 +14634,32 @@ aarch64_expand_compare_and_swap (rtx operands[])
/* The CAS insn requires oldval and rval overlap, but we need to /* The CAS insn requires oldval and rval overlap, but we need to
have a copy of oldval saved across the operation to tell if have a copy of oldval saved across the operation to tell if
the operation is successful. */ the operation is successful. */
if (mode == QImode || mode == HImode) if (reg_overlap_mentioned_p (rval, oldval))
rval = copy_to_mode_reg (SImode, gen_lowpart (SImode, oldval)); rval = copy_to_mode_reg (r_mode, oldval);
else if (reg_overlap_mentioned_p (rval, oldval))
rval = copy_to_mode_reg (mode, oldval);
else else
emit_move_insn (rval, oldval); emit_move_insn (rval, gen_lowpart (r_mode, oldval));
emit_insn (gen_aarch64_compare_and_swap_lse (mode, rval, mem, emit_insn (gen_aarch64_compare_and_swap_lse (mode, rval, mem,
newval, mod_s)); newval, mod_s));
aarch64_gen_compare_reg (EQ, rval, oldval); cc_reg = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode);
} }
else else
emit_insn (gen_aarch64_compare_and_swap (mode, rval, mem, oldval, newval, {
is_weak, mod_s, mod_f)); /* The oldval predicate varies by mode. Test it and force to reg. */
insn_code code = code_for_aarch64_compare_and_swap (mode);
if (!insn_data[code].operand[2].predicate (oldval, mode))
oldval = force_reg (mode, oldval);
if (mode == QImode || mode == HImode) emit_insn (GEN_FCN (code) (rval, mem, oldval, newval,
is_weak, mod_s, mod_f));
cc_reg = gen_rtx_REG (CCmode, CC_REGNUM);
}
if (r_mode != mode)
rval = gen_lowpart (mode, rval); rval = gen_lowpart (mode, rval);
emit_move_insn (operands[1], rval); emit_move_insn (operands[1], rval);
x = gen_rtx_REG (CCmode, CC_REGNUM); x = gen_rtx_EQ (SImode, cc_reg, const0_rtx);
x = gen_rtx_EQ (SImode, x, const0_rtx);
emit_insn (gen_rtx_SET (bval, x)); emit_insn (gen_rtx_SET (bval, x));
} }
...@@ -14758,10 +14774,10 @@ aarch64_split_compare_and_swap (rtx operands[]) ...@@ -14758,10 +14774,10 @@ aarch64_split_compare_and_swap (rtx operands[])
} }
else else
{ {
cond = aarch64_gen_compare_reg (NE, rval, oldval); cond = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode);
x = gen_rtx_NE (VOIDmode, cond, const0_rtx); x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
gen_rtx_LABEL_REF (Pmode, label2), pc_rtx); gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x)); aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
} }
......
...@@ -24,8 +24,8 @@ ...@@ -24,8 +24,8 @@
[(match_operand:SI 0 "register_operand" "") ;; bool out [(match_operand:SI 0 "register_operand" "") ;; bool out
(match_operand:ALLI 1 "register_operand" "") ;; val out (match_operand:ALLI 1 "register_operand" "") ;; val out
(match_operand:ALLI 2 "aarch64_sync_memory_operand" "") ;; memory (match_operand:ALLI 2 "aarch64_sync_memory_operand" "") ;; memory
(match_operand:ALLI 3 "general_operand" "") ;; expected (match_operand:ALLI 3 "nonmemory_operand" "") ;; expected
(match_operand:ALLI 4 "aarch64_reg_or_zero" "") ;; desired (match_operand:ALLI 4 "aarch64_reg_or_zero" "") ;; desired
(match_operand:SI 5 "const_int_operand") ;; is_weak (match_operand:SI 5 "const_int_operand") ;; is_weak
(match_operand:SI 6 "const_int_operand") ;; mod_s (match_operand:SI 6 "const_int_operand") ;; mod_s
(match_operand:SI 7 "const_int_operand")] ;; mod_f (match_operand:SI 7 "const_int_operand")] ;; mod_f
...@@ -36,19 +36,22 @@ ...@@ -36,19 +36,22 @@
} }
) )
(define_mode_attr cas_short_expected_pred
[(QI "aarch64_reg_or_imm") (HI "aarch64_plushi_operand")])
(define_insn_and_split "@aarch64_compare_and_swap<mode>" (define_insn_and_split "@aarch64_compare_and_swap<mode>"
[(set (reg:CC CC_REGNUM) ;; bool out [(set (reg:CC CC_REGNUM) ;; bool out
(unspec_volatile:CC [(const_int 0)] UNSPECV_ATOMIC_CMPSW)) (unspec_volatile:CC [(const_int 0)] UNSPECV_ATOMIC_CMPSW))
(set (match_operand:SI 0 "register_operand" "=&r") ;; val out (set (match_operand:SI 0 "register_operand" "=&r") ;; val out
(zero_extend:SI (zero_extend:SI
(match_operand:SHORT 1 "aarch64_sync_memory_operand" "+Q"))) ;; memory (match_operand:SHORT 1 "aarch64_sync_memory_operand" "+Q"))) ;; memory
(set (match_dup 1) (set (match_dup 1)
(unspec_volatile:SHORT (unspec_volatile:SHORT
[(match_operand:SI 2 "aarch64_plus_operand" "rI") ;; expected [(match_operand:SHORT 2 "<cas_short_expected_pred>" "rn") ;; expected
(match_operand:SHORT 3 "aarch64_reg_or_zero" "rZ") ;; desired (match_operand:SHORT 3 "aarch64_reg_or_zero" "rZ") ;; desired
(match_operand:SI 4 "const_int_operand") ;; is_weak (match_operand:SI 4 "const_int_operand") ;; is_weak
(match_operand:SI 5 "const_int_operand") ;; mod_s (match_operand:SI 5 "const_int_operand") ;; mod_s
(match_operand:SI 6 "const_int_operand")] ;; mod_f (match_operand:SI 6 "const_int_operand")] ;; mod_f
UNSPECV_ATOMIC_CMPSW)) UNSPECV_ATOMIC_CMPSW))
(clobber (match_scratch:SI 7 "=&r"))] (clobber (match_scratch:SI 7 "=&r"))]
"" ""
...@@ -68,7 +71,7 @@ ...@@ -68,7 +71,7 @@
(match_operand:GPI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory (match_operand:GPI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory
(set (match_dup 1) (set (match_dup 1)
(unspec_volatile:GPI (unspec_volatile:GPI
[(match_operand:GPI 2 "aarch64_plus_operand" "rI") ;; expect [(match_operand:GPI 2 "aarch64_plus_operand" "rn") ;; expect
(match_operand:GPI 3 "aarch64_reg_or_zero" "rZ") ;; desired (match_operand:GPI 3 "aarch64_reg_or_zero" "rZ") ;; desired
(match_operand:SI 4 "const_int_operand") ;; is_weak (match_operand:SI 4 "const_int_operand") ;; is_weak
(match_operand:SI 5 "const_int_operand") ;; mod_s (match_operand:SI 5 "const_int_operand") ;; mod_s
......
...@@ -114,6 +114,18 @@ ...@@ -114,6 +114,18 @@
(ior (match_operand 0 "register_operand") (ior (match_operand 0 "register_operand")
(match_operand 0 "aarch64_plus_immediate"))) (match_operand 0 "aarch64_plus_immediate")))
(define_predicate "aarch64_plushi_immediate"
(match_code "const_int")
{
HOST_WIDE_INT val = INTVAL (op);
/* The HImode value must be zero-extendable to an SImode plus_operand. */
return ((val & 0xfff) == val || sext_hwi (val & 0xf000, 16) == val);
})
(define_predicate "aarch64_plushi_operand"
(ior (match_operand 0 "register_operand")
(match_operand 0 "aarch64_plushi_immediate")))
(define_predicate "aarch64_pluslong_immediate" (define_predicate "aarch64_pluslong_immediate"
(and (match_code "const_int") (and (match_code "const_int")
(match_test "(INTVAL (op) < 0xffffff && INTVAL (op) > -0xffffff)"))) (match_test "(INTVAL (op) < 0xffffff && INTVAL (op) > -0xffffff)")))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment