Commit 7803ec5e by Richard Henderson Committed by Richard Henderson

aarch64: Improve atomic-op lse generation

Fix constraints; avoid unnecessary split.  Drop the use of the atomic_op
iterator in favor of the ATOMIC_LDOP iterator; this is simplier and more
logical for ldclr aka bic.

	* config/aarch64/aarch64.c (aarch64_emit_bic): Remove.
	(aarch64_atomic_ldop_supported_p): Remove.
	(aarch64_gen_atomic_ldop): Remove.
	* config/aarch64/atomic.md (atomic_<atomic_optab><ALLI>):
	Fully expand LSE operations here.
	(atomic_fetch_<atomic_optab><ALLI>): Likewise.
	(atomic_<atomic_optab>_fetch<ALLI>): Likewise.
	(aarch64_atomic_<ATOMIC_LDOP><ALLI>_lse): Drop atomic_op iterator
	and use ATOMIC_LDOP instead; use register_operand for the input;
	drop the split and emit insns directly.
	(aarch64_atomic_fetch_<ATOMIC_LDOP><ALLI>_lse): Likewise.
	(aarch64_atomic_<atomic_op>_fetch<ALLI>_lse): Remove.
	(@aarch64_atomic_load<ATOMIC_LDOP><ALLI>): Remove.

From-SVN: r265660
parent 8f5603d3
2018-10-31 Richard Henderson <richard.henderson@linaro.org>
* config/aarch64/aarch64.c (aarch64_emit_bic): Remove.
(aarch64_atomic_ldop_supported_p): Remove.
(aarch64_gen_atomic_ldop): Remove.
* config/aarch64/atomic.md (atomic_<atomic_optab><ALLI>):
Fully expand LSE operations here.
(atomic_fetch_<atomic_optab><ALLI>): Likewise.
(atomic_<atomic_optab>_fetch<ALLI>): Likewise.
(aarch64_atomic_<ATOMIC_LDOP><ALLI>_lse): Drop atomic_op iterator
and use ATOMIC_LDOP instead; use register_operand for the input;
drop the split and emit insns directly.
(aarch64_atomic_fetch_<ATOMIC_LDOP><ALLI>_lse): Likewise.
(aarch64_atomic_<atomic_op>_fetch<ALLI>_lse): Remove.
(@aarch64_atomic_load<ATOMIC_LDOP><ALLI>): Remove.
* config/aarch64/aarch64.c (aarch64_emit_atomic_swap): Remove.
(aarch64_gen_atomic_ldop): Don't call it.
* config/aarch64/atomics.md (atomic_exchange<ALLI>):
......@@ -564,8 +564,6 @@ rtx aarch64_load_tp (rtx);
void aarch64_expand_compare_and_swap (rtx op[]);
void aarch64_split_compare_and_swap (rtx op[]);
bool aarch64_atomic_ldop_supported_p (enum rtx_code);
void aarch64_gen_atomic_ldop (enum rtx_code, rtx, rtx, rtx, rtx, rtx);
void aarch64_split_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx, rtx);
bool aarch64_gen_adjusted_ldpstp (rtx *, bool, scalar_mode, RTX_CODE);
......
......@@ -14663,32 +14663,6 @@ aarch64_expand_compare_and_swap (rtx operands[])
emit_insn (gen_rtx_SET (bval, x));
}
/* Test whether the target supports using a atomic load-operate instruction.
CODE is the operation and AFTER is TRUE if the data in memory after the
operation should be returned and FALSE if the data before the operation
should be returned. Returns FALSE if the operation isn't supported by the
architecture. */
bool
aarch64_atomic_ldop_supported_p (enum rtx_code code)
{
if (!TARGET_LSE)
return false;
switch (code)
{
case SET:
case AND:
case IOR:
case XOR:
case MINUS:
case PLUS:
return true;
default:
return false;
}
}
/* Emit a barrier, that is appropriate for memory model MODEL, at the end of a
sequence implementing an atomic operation. */
......@@ -14821,156 +14795,6 @@ aarch64_split_compare_and_swap (rtx operands[])
aarch64_emit_post_barrier (model);
}
/* Emit a BIC instruction. */
static void
aarch64_emit_bic (machine_mode mode, rtx dst, rtx s1, rtx s2, int shift)
{
rtx shift_rtx = GEN_INT (shift);
rtx (*gen) (rtx, rtx, rtx, rtx);
switch (mode)
{
case E_SImode: gen = gen_and_one_cmpl_lshrsi3; break;
case E_DImode: gen = gen_and_one_cmpl_lshrdi3; break;
default:
gcc_unreachable ();
}
emit_insn (gen (dst, s2, shift_rtx, s1));
}
/* Emit an atomic load+operate. CODE is the operation. OUT_DATA is the
location to store the data read from memory. OUT_RESULT is the location to
store the result of the operation. MEM is the memory location to read and
modify. MODEL_RTX is the memory ordering to use. VALUE is the second
operand for the operation. Either OUT_DATA or OUT_RESULT, but not both, can
be NULL. */
void
aarch64_gen_atomic_ldop (enum rtx_code code, rtx out_data, rtx out_result,
rtx mem, rtx value, rtx model_rtx)
{
machine_mode mode = GET_MODE (mem);
machine_mode wmode = (mode == DImode ? DImode : SImode);
const bool short_mode = (mode < SImode);
int ldop_code;
rtx src;
rtx x;
if (out_data)
out_data = gen_lowpart (mode, out_data);
if (out_result)
out_result = gen_lowpart (mode, out_result);
/* Make sure the value is in a register, putting it into a destination
register if it needs to be manipulated. */
if (!register_operand (value, mode)
|| code == AND || code == MINUS)
{
src = out_result ? out_result : out_data;
emit_move_insn (src, gen_lowpart (mode, value));
}
else
src = value;
gcc_assert (register_operand (src, mode));
/* Preprocess the data for the operation as necessary. If the operation is
a SET then emit a swap instruction and finish. */
switch (code)
{
case MINUS:
/* Negate the value and treat it as a PLUS. */
{
rtx neg_src;
/* Resize the value if necessary. */
if (short_mode)
src = gen_lowpart (wmode, src);
neg_src = gen_rtx_NEG (wmode, src);
emit_insn (gen_rtx_SET (src, neg_src));
if (short_mode)
src = gen_lowpart (mode, src);
}
/* Fall-through. */
case PLUS:
ldop_code = UNSPECV_ATOMIC_LDOP_PLUS;
break;
case IOR:
ldop_code = UNSPECV_ATOMIC_LDOP_OR;
break;
case XOR:
ldop_code = UNSPECV_ATOMIC_LDOP_XOR;
break;
case AND:
{
rtx not_src;
/* Resize the value if necessary. */
if (short_mode)
src = gen_lowpart (wmode, src);
not_src = gen_rtx_NOT (wmode, src);
emit_insn (gen_rtx_SET (src, not_src));
if (short_mode)
src = gen_lowpart (mode, src);
}
ldop_code = UNSPECV_ATOMIC_LDOP_BIC;
break;
default:
/* The operation can't be done with atomic instructions. */
gcc_unreachable ();
}
emit_insn (gen_aarch64_atomic_load (ldop_code, mode,
out_data, mem, src, model_rtx));
/* If necessary, calculate the data in memory after the update by redoing the
operation from values in registers. */
if (!out_result)
return;
if (short_mode)
{
src = gen_lowpart (wmode, src);
out_data = gen_lowpart (wmode, out_data);
out_result = gen_lowpart (wmode, out_result);
}
x = NULL_RTX;
switch (code)
{
case MINUS:
case PLUS:
x = gen_rtx_PLUS (wmode, out_data, src);
break;
case IOR:
x = gen_rtx_IOR (wmode, out_data, src);
break;
case XOR:
x = gen_rtx_XOR (wmode, out_data, src);
break;
case AND:
aarch64_emit_bic (wmode, out_result, out_data, src, 0);
return;
default:
gcc_unreachable ();
}
emit_set_insn (out_result, x);
return;
}
/* Split an atomic operation. */
void
......
......@@ -207,13 +207,37 @@
rtx (*gen) (rtx, rtx, rtx);
/* Use an atomic load-operate instruction when possible. */
if (aarch64_atomic_ldop_supported_p (<CODE>))
gen = gen_aarch64_atomic_<atomic_optab><mode>_lse;
if (TARGET_LSE)
{
switch (<CODE>)
{
case MINUS:
operands[1] = expand_simple_unop (<MODE>mode, NEG, operands[1],
NULL, 1);
/* fallthru */
case PLUS:
gen = gen_aarch64_atomic_add<mode>_lse;
break;
case IOR:
gen = gen_aarch64_atomic_ior<mode>_lse;
break;
case XOR:
gen = gen_aarch64_atomic_xor<mode>_lse;
break;
case AND:
operands[1] = expand_simple_unop (<MODE>mode, NOT, operands[1],
NULL, 1);
gen = gen_aarch64_atomic_bic<mode>_lse;
break;
default:
gcc_unreachable ();
}
operands[1] = force_reg (<MODE>mode, operands[1]);
}
else
gen = gen_aarch64_atomic_<atomic_optab><mode>;
emit_insn (gen (operands[0], operands[1], operands[2]));
DONE;
}
)
......@@ -239,22 +263,25 @@
}
)
(define_insn_and_split "aarch64_atomic_<atomic_optab><mode>_lse"
(define_insn "aarch64_atomic_<atomic_ldoptab><mode>_lse"
[(set (match_operand:ALLI 0 "aarch64_sync_memory_operand" "+Q")
(unspec_volatile:ALLI
[(atomic_op:ALLI (match_dup 0)
(match_operand:ALLI 1 "<atomic_op_operand>" "r<const_atomic>"))
(match_operand:SI 2 "const_int_operand")]
UNSPECV_ATOMIC_OP))
(unspec_volatile:ALLI
[(match_dup 0)
(match_operand:ALLI 1 "register_operand" "r")
(match_operand:SI 2 "const_int_operand")]
ATOMIC_LDOP))
(clobber (match_scratch:ALLI 3 "=&r"))]
"TARGET_LSE"
"#"
"&& reload_completed"
[(const_int 0)]
{
aarch64_gen_atomic_ldop (<CODE>, operands[3], NULL, operands[0],
operands[1], operands[2]);
DONE;
enum memmodel model = memmodel_from_int (INTVAL (operands[2]));
if (is_mm_relaxed (model))
return "ld<atomic_ldop><atomic_sfx>\t%<w>1, %<w>3, %0";
else if (is_mm_release (model))
return "ld<atomic_ldop>l<atomic_sfx>\t%<w>1, %<w>3, %0";
else if (is_mm_acquire (model) || is_mm_consume (model))
return "ld<atomic_ldop>a<atomic_sfx>\t%<w>1, %<w>3, %0";
else
return "ld<atomic_ldop>al<atomic_sfx>\t%<w>1, %<w>3, %0";
}
)
......@@ -280,7 +307,7 @@
}
)
;; Load-operate-store, returning the updated memory data.
;; Load-operate-store, returning the original memory data.
(define_expand "atomic_fetch_<atomic_optab><mode>"
[(match_operand:ALLI 0 "register_operand" "")
......@@ -293,13 +320,37 @@
rtx (*gen) (rtx, rtx, rtx, rtx);
/* Use an atomic load-operate instruction when possible. */
if (aarch64_atomic_ldop_supported_p (<CODE>))
gen = gen_aarch64_atomic_fetch_<atomic_optab><mode>_lse;
if (TARGET_LSE)
{
switch (<CODE>)
{
case MINUS:
operands[2] = expand_simple_unop (<MODE>mode, NEG, operands[2],
NULL, 1);
/* fallthru */
case PLUS:
gen = gen_aarch64_atomic_fetch_add<mode>_lse;
break;
case IOR:
gen = gen_aarch64_atomic_fetch_ior<mode>_lse;
break;
case XOR:
gen = gen_aarch64_atomic_fetch_xor<mode>_lse;
break;
case AND:
operands[2] = expand_simple_unop (<MODE>mode, NOT, operands[2],
NULL, 1);
gen = gen_aarch64_atomic_fetch_bic<mode>_lse;
break;
default:
gcc_unreachable ();
}
operands[2] = force_reg (<MODE>mode, operands[2]);
}
else
gen = gen_aarch64_atomic_fetch_<atomic_optab><mode>;
emit_insn (gen (operands[0], operands[1], operands[2], operands[3]));
DONE;
})
......@@ -326,23 +377,26 @@
}
)
(define_insn_and_split "aarch64_atomic_fetch_<atomic_optab><mode>_lse"
[(set (match_operand:ALLI 0 "register_operand" "=&r")
(match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q"))
(define_insn "aarch64_atomic_fetch_<atomic_ldoptab><mode>_lse"
[(set (match_operand:ALLI 0 "register_operand" "=r")
(match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q"))
(set (match_dup 1)
(unspec_volatile:ALLI
[(atomic_op:ALLI (match_dup 1)
(match_operand:ALLI 2 "<atomic_op_operand>" "r<const_atomic>"))
(match_operand:SI 3 "const_int_operand")]
UNSPECV_ATOMIC_LDOP))]
(unspec_volatile:ALLI
[(match_dup 1)
(match_operand:ALLI 2 "register_operand" "r")
(match_operand:SI 3 "const_int_operand")]
ATOMIC_LDOP))]
"TARGET_LSE"
"#"
"&& reload_completed"
[(const_int 0)]
{
aarch64_gen_atomic_ldop (<CODE>, operands[0], NULL, operands[1],
operands[2], operands[3]);
DONE;
enum memmodel model = memmodel_from_int (INTVAL (operands[3]));
if (is_mm_relaxed (model))
return "ld<atomic_ldop><atomic_sfx>\t%<w>2, %<w>0, %1";
else if (is_mm_acquire (model) || is_mm_consume (model))
return "ld<atomic_ldop>a<atomic_sfx>\t%<w>2, %<w>0, %1";
else if (is_mm_release (model))
return "ld<atomic_ldop>l<atomic_sfx>\t%<w>2, %<w>0, %1";
else
return "ld<atomic_ldop>al<atomic_sfx>\t%<w>2, %<w>0, %1";
}
)
......@@ -370,7 +424,7 @@
}
)
;; Load-operate-store, returning the original memory data.
;; Load-operate-store, returning the updated memory data.
(define_expand "atomic_<atomic_optab>_fetch<mode>"
[(match_operand:ALLI 0 "register_operand" "")
......@@ -380,17 +434,23 @@
(match_operand:SI 3 "const_int_operand")]
""
{
rtx (*gen) (rtx, rtx, rtx, rtx);
rtx value = operands[2];
/* Use an atomic load-operate instruction when possible. */
if (aarch64_atomic_ldop_supported_p (<CODE>))
gen = gen_aarch64_atomic_<atomic_optab>_fetch<mode>_lse;
/* Use an atomic load-operate instruction when possible. In this case
we will re-compute the result from the original mem value. */
if (TARGET_LSE)
{
rtx tmp = gen_reg_rtx (<MODE>mode);
operands[2] = force_reg (<MODE>mode, operands[2]);
emit_insn (gen_atomic_fetch_<atomic_optab><mode>
(tmp, operands[1], operands[2], operands[3]));
tmp = expand_simple_binop (<MODE>mode, <CODE>, tmp, operands[2],
operands[0], 1, OPTAB_WIDEN);
emit_move_insn (operands[0], tmp);
}
else
gen = gen_aarch64_atomic_<atomic_optab>_fetch<mode>;
emit_insn (gen (operands[0], operands[1], value, operands[3]));
{
emit_insn (gen_aarch64_atomic_<atomic_optab>_fetch<mode>
(operands[0], operands[1], operands[2], operands[3]));
}
DONE;
})
......@@ -417,29 +477,6 @@
}
)
(define_insn_and_split "aarch64_atomic_<atomic_optab>_fetch<mode>_lse"
[(set (match_operand:ALLI 0 "register_operand" "=&r")
(atomic_op:ALLI
(match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q")
(match_operand:ALLI 2 "<atomic_op_operand>" "r<const_atomic>")))
(set (match_dup 1)
(unspec_volatile:ALLI
[(match_dup 1)
(match_dup 2)
(match_operand:SI 3 "const_int_operand")]
UNSPECV_ATOMIC_LDOP))
(clobber (match_scratch:ALLI 4 "=&r"))]
"TARGET_LSE"
"#"
"&& reload_completed"
[(const_int 0)]
{
aarch64_gen_atomic_ldop (<CODE>, operands[4], operands[0], operands[1],
operands[2], operands[3]);
DONE;
}
)
(define_insn_and_split "atomic_nand_fetch<mode>"
[(set (match_operand:ALLI 0 "register_operand" "=&r")
(not:ALLI
......@@ -585,29 +622,3 @@
return "dmb\\tish";
}
)
;; ARMv8.1-A LSE instructions.
;; Atomic load-op: Load data, operate, store result, keep data.
(define_insn "@aarch64_atomic_load<atomic_ldop><mode>"
[(set (match_operand:ALLI 0 "register_operand" "=r")
(match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q"))
(set (match_dup 1)
(unspec_volatile:ALLI
[(match_dup 1)
(match_operand:ALLI 2 "register_operand")
(match_operand:SI 3 "const_int_operand")]
ATOMIC_LDOP))]
"TARGET_LSE && reload_completed"
{
enum memmodel model = memmodel_from_int (INTVAL (operands[3]));
if (is_mm_relaxed (model))
return "ld<atomic_ldop><atomic_sfx>\t%<w>2, %<w>0, %1";
else if (is_mm_acquire (model) || is_mm_consume (model))
return "ld<atomic_ldop>a<atomic_sfx>\t%<w>2, %<w>0, %1";
else if (is_mm_release (model))
return "ld<atomic_ldop>l<atomic_sfx>\t%<w>2, %<w>0, %1";
else
return "ld<atomic_ldop>al<atomic_sfx>\t%<w>2, %<w>0, %1";
})
......@@ -503,7 +503,6 @@
UNSPECV_ATOMIC_CAS ; Represent an atomic CAS.
UNSPECV_ATOMIC_SWP ; Represent an atomic SWP.
UNSPECV_ATOMIC_OP ; Represent an atomic operation.
UNSPECV_ATOMIC_LDOP ; Represent an atomic load-operation
UNSPECV_ATOMIC_LDOP_OR ; Represent an atomic load-or
UNSPECV_ATOMIC_LDOP_BIC ; Represent an atomic load-bic
UNSPECV_ATOMIC_LDOP_XOR ; Represent an atomic load-xor
......@@ -1591,6 +1590,10 @@
[(UNSPECV_ATOMIC_LDOP_OR "set") (UNSPECV_ATOMIC_LDOP_BIC "clr")
(UNSPECV_ATOMIC_LDOP_XOR "eor") (UNSPECV_ATOMIC_LDOP_PLUS "add")])
(define_int_attr atomic_ldoptab
[(UNSPECV_ATOMIC_LDOP_OR "ior") (UNSPECV_ATOMIC_LDOP_BIC "bic")
(UNSPECV_ATOMIC_LDOP_XOR "xor") (UNSPECV_ATOMIC_LDOP_PLUS "add")])
;; -------------------------------------------------------------------
;; Int Iterators Attributes.
;; -------------------------------------------------------------------
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment