Commit 641c2f8b by Matthew Wahab Committed by Matthew Wahab

[AArch64] Use atomic load-operate instructions for fetch-update patterns.

gcc/
2015-09-22  Matthew Wahab  <matthew.wahab@arm.com>

	* config/aarch64/aarch64-protos.h
	(aarch64_atomic_ldop_supported_p): Declare.
	* config/aarch64/aarch64.c (aarch64_atomic_ldop_supported_p): New.
	(enum aarch64_atomic_load_op_code): New.
	(aarch64_emit_atomic_load_op): New.
	(aarch64_gen_atomic_ldop): Update to support load-operate
	patterns.
	* config/aarch64/atomics.md (atomic_<atomic_optab><mode>): Change
	to an expander.
	(aarch64_atomic_<atomic_optab><mode>): New.
	(aarch64_atomic_<atomic_optab><mode>_lse): New.
	(atomic_fetch_<atomic_optab><mode>): Change to an expander.
	(aarch64_atomic_fetch_<atomic_optab><mode>): New.
	(aarch64_atomic_fetch_<atomic_optab><mode>_lse): New.

gcc/testsuite/
2015-09-22  Matthew Wahab  <matthew.wahab@arm.com>

	* gcc.target/aarch64/atomic-inst-ldadd.c: New.
	* gcc.target/aarch64/atomic-inst-ldlogic.c: New.

From-SVN: r228001
parent 6380d2bc
2015-09-22 Matthew Wahab <matthew.wahab@arm.com>
* config/aarch64/aarch64-protos.h
(aarch64_atomic_ldop_supported_p): Declare.
* config/aarch64/aarch64.c (aarch64_atomic_ldop_supported_p): New.
(enum aarch64_atomic_load_op_code): New.
(aarch64_emit_atomic_load_op): New.
(aarch64_gen_atomic_ldop): Update to support load-operate
patterns.
* config/aarch64/atomics.md (atomic_<atomic_optab><mode>): Change
to an expander.
(aarch64_atomic_<atomic_optab><mode>): New.
(aarch64_atomic_<atomic_optab><mode>_lse): New.
(atomic_fetch_<atomic_optab><mode>): Change to an expander.
(aarch64_atomic_fetch_<atomic_optab><mode>): New.
(aarch64_atomic_fetch_<atomic_optab><mode>_lse): New.
2015-09-22 Matthew Wahab <matthew.wahab@arm.com>
* config/aarch64/aarch64/atomics.md (UNSPECV_ATOMIC_LDOP): New.
(UNSPECV_ATOMIC_LDOP_OR): New.
(UNSPECV_ATOMIC_LDOP_BIC): New.
......@@ -378,6 +378,8 @@ rtx aarch64_load_tp (rtx);
void aarch64_expand_compare_and_swap (rtx op[]);
void aarch64_split_compare_and_swap (rtx op[]);
void aarch64_gen_atomic_cas (rtx, rtx, rtx, rtx, rtx);
bool aarch64_atomic_ldop_supported_p (enum rtx_code);
void aarch64_gen_atomic_ldop (enum rtx_code, rtx, rtx, rtx, rtx);
void aarch64_split_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx, rtx);
......
......@@ -10871,6 +10871,32 @@ aarch64_expand_compare_and_swap (rtx operands[])
emit_insn (gen_rtx_SET (bval, x));
}
/* Test whether the target supports using a atomic load-operate instruction.
CODE is the operation and AFTER is TRUE if the data in memory after the
operation should be returned and FALSE if the data before the operation
should be returned. Returns FALSE if the operation isn't supported by the
architecture. */
bool
aarch64_atomic_ldop_supported_p (enum rtx_code code)
{
if (!TARGET_LSE)
return false;
switch (code)
{
case SET:
case AND:
case IOR:
case XOR:
case MINUS:
case PLUS:
return true;
default:
return false;
}
}
/* Emit a barrier, that is appropriate for memory model MODEL, at the end of a
sequence implementing an atomic operation. */
......@@ -11013,26 +11039,169 @@ aarch64_emit_atomic_swap (machine_mode mode, rtx dst, rtx value,
emit_insn (gen (dst, mem, value, model));
}
/* Emit an atomic operation where the architecture supports it. */
/* Operations supported by aarch64_emit_atomic_load_op. */
enum aarch64_atomic_load_op_code
{
AARCH64_LDOP_PLUS, /* A + B */
AARCH64_LDOP_XOR, /* A ^ B */
AARCH64_LDOP_OR, /* A | B */
AARCH64_LDOP_BIC /* A & ~B */
};
/* Emit an atomic load-operate. */
static void
aarch64_emit_atomic_load_op (enum aarch64_atomic_load_op_code code,
machine_mode mode, rtx dst, rtx src,
rtx mem, rtx model)
{
typedef rtx (*aarch64_atomic_load_op_fn) (rtx, rtx, rtx, rtx);
const aarch64_atomic_load_op_fn plus[] =
{
gen_aarch64_atomic_loadaddqi,
gen_aarch64_atomic_loadaddhi,
gen_aarch64_atomic_loadaddsi,
gen_aarch64_atomic_loadadddi
};
const aarch64_atomic_load_op_fn eor[] =
{
gen_aarch64_atomic_loadeorqi,
gen_aarch64_atomic_loadeorhi,
gen_aarch64_atomic_loadeorsi,
gen_aarch64_atomic_loadeordi
};
const aarch64_atomic_load_op_fn ior[] =
{
gen_aarch64_atomic_loadsetqi,
gen_aarch64_atomic_loadsethi,
gen_aarch64_atomic_loadsetsi,
gen_aarch64_atomic_loadsetdi
};
const aarch64_atomic_load_op_fn bic[] =
{
gen_aarch64_atomic_loadclrqi,
gen_aarch64_atomic_loadclrhi,
gen_aarch64_atomic_loadclrsi,
gen_aarch64_atomic_loadclrdi
};
aarch64_atomic_load_op_fn gen;
int idx = 0;
switch (mode)
{
case QImode: idx = 0; break;
case HImode: idx = 1; break;
case SImode: idx = 2; break;
case DImode: idx = 3; break;
default:
gcc_unreachable ();
}
switch (code)
{
case AARCH64_LDOP_PLUS: gen = plus[idx]; break;
case AARCH64_LDOP_XOR: gen = eor[idx]; break;
case AARCH64_LDOP_OR: gen = ior[idx]; break;
case AARCH64_LDOP_BIC: gen = bic[idx]; break;
default:
gcc_unreachable ();
}
emit_insn (gen (dst, mem, src, model));
}
/* Emit an atomic load+operate. CODE is the operation. OUT_DATA is the
location to store the data read from memory. MEM is the memory location to
read and modify. MODEL_RTX is the memory ordering to use. VALUE is the
second operand for the operation. Either OUT_DATA or OUT_RESULT, but not
both, can be NULL. */
void
aarch64_gen_atomic_ldop (enum rtx_code code, rtx out_data,
rtx mem, rtx value, rtx model_rtx)
{
machine_mode mode = GET_MODE (mem);
machine_mode wmode = (mode == DImode ? DImode : SImode);
const bool short_mode = (mode < SImode);
aarch64_atomic_load_op_code ldop_code;
rtx src;
rtx x;
if (out_data)
out_data = gen_lowpart (mode, out_data);
out_data = gen_lowpart (mode, out_data);
/* Make sure the value is in a register, putting it into a destination
register if it needs to be manipulated. */
if (!register_operand (value, mode)
|| code == AND || code == MINUS)
{
src = out_data;
emit_move_insn (src, gen_lowpart (mode, value));
}
else
src = value;
gcc_assert (register_operand (src, mode));
/* Preprocess the data for the operation as necessary. If the operation is
a SET then emit a swap instruction and finish. */
switch (code)
{
case SET:
aarch64_emit_atomic_swap (mode, out_data, value, mem, model_rtx);
aarch64_emit_atomic_swap (mode, out_data, src, mem, model_rtx);
return;
case MINUS:
/* Negate the value and treat it as a PLUS. */
{
rtx neg_src;
/* Resize the value if necessary. */
if (short_mode)
src = gen_lowpart (wmode, src);
neg_src = gen_rtx_NEG (wmode, src);
emit_insn (gen_rtx_SET (src, neg_src));
if (short_mode)
src = gen_lowpart (mode, src);
}
/* Fall-through. */
case PLUS:
ldop_code = AARCH64_LDOP_PLUS;
break;
case IOR:
ldop_code = AARCH64_LDOP_OR;
break;
case XOR:
ldop_code = AARCH64_LDOP_XOR;
break;
case AND:
{
rtx not_src;
/* Resize the value if necessary. */
if (short_mode)
src = gen_lowpart (wmode, src);
not_src = gen_rtx_NOT (wmode, src);
emit_insn (gen_rtx_SET (src, not_src));
if (short_mode)
src = gen_lowpart (mode, src);
}
ldop_code = AARCH64_LDOP_BIC;
break;
default:
/* The operation can't be done with atomic instructions. */
gcc_unreachable ();
}
aarch64_emit_atomic_load_op (ldop_code, mode, out_data, src, mem, model_rtx);
}
/* Split an atomic operation. */
......
......@@ -225,23 +225,63 @@
}
)
(define_insn_and_split "atomic_<atomic_optab><mode>"
(define_expand "atomic_<atomic_optab><mode>"
[(match_operand:ALLI 0 "aarch64_sync_memory_operand" "")
(atomic_op:ALLI
(match_operand:ALLI 1 "<atomic_op_operand>" "")
(match_operand:SI 2 "const_int_operand"))]
""
{
rtx (*gen) (rtx, rtx, rtx);
/* Use an atomic load-operate instruction when possible. */
if (aarch64_atomic_ldop_supported_p (<CODE>))
gen = gen_aarch64_atomic_<atomic_optab><mode>_lse;
else
gen = gen_aarch64_atomic_<atomic_optab><mode>;
emit_insn (gen (operands[0], operands[1], operands[2]));
DONE;
}
)
(define_insn_and_split "aarch64_atomic_<atomic_optab><mode>"
[(set (match_operand:ALLI 0 "aarch64_sync_memory_operand" "+Q")
(unspec_volatile:ALLI
[(atomic_op:ALLI (match_dup 0)
(match_operand:ALLI 1 "<atomic_op_operand>" "r<const_atomic>"))
(match_operand:SI 2 "const_int_operand")]
UNSPECV_ATOMIC_OP))
(clobber (reg:CC CC_REGNUM))
(clobber (match_scratch:ALLI 3 "=&r"))
(clobber (match_scratch:SI 4 "=&r"))]
""
"#"
"&& reload_completed"
[(const_int 0)]
{
aarch64_split_atomic_op (<CODE>, NULL, operands[3], operands[0],
operands[1], operands[2], operands[4]);
DONE;
}
)
(define_insn_and_split "aarch64_atomic_<atomic_optab><mode>_lse"
[(set (match_operand:ALLI 0 "aarch64_sync_memory_operand" "+Q")
(unspec_volatile:ALLI
[(atomic_op:ALLI (match_dup 0)
(match_operand:ALLI 1 "<atomic_op_operand>" "r<const_atomic>"))
(match_operand:SI 2 "const_int_operand")] ;; model
(match_operand:SI 2 "const_int_operand")]
UNSPECV_ATOMIC_OP))
(clobber (reg:CC CC_REGNUM))
(clobber (match_scratch:ALLI 3 "=&r"))
(clobber (match_scratch:SI 4 "=&r"))]
""
(clobber (match_scratch:ALLI 3 "=&r"))]
"TARGET_LSE"
"#"
"&& reload_completed"
[(const_int 0)]
{
aarch64_split_atomic_op (<CODE>, NULL, operands[3], operands[0],
operands[1], operands[2], operands[4]);
aarch64_gen_atomic_ldop (<CODE>, operands[3], operands[0],
operands[1], operands[2]);
DONE;
}
)
......@@ -268,7 +308,30 @@
}
)
(define_insn_and_split "atomic_fetch_<atomic_optab><mode>"
;; Load-operate-store, returning the updated memory data.
(define_expand "atomic_fetch_<atomic_optab><mode>"
[(match_operand:ALLI 0 "register_operand" "")
(match_operand:ALLI 1 "aarch64_sync_memory_operand" "")
(atomic_op:ALLI
(match_operand:ALLI 2 "<atomic_op_operand>" "")
(match_operand:SI 3 "const_int_operand"))]
""
{
rtx (*gen) (rtx, rtx, rtx, rtx);
/* Use an atomic load-operate instruction when possible. */
if (aarch64_atomic_ldop_supported_p (<CODE>))
gen = gen_aarch64_atomic_fetch_<atomic_optab><mode>_lse;
else
gen = gen_aarch64_atomic_fetch_<atomic_optab><mode>;
emit_insn (gen (operands[0], operands[1], operands[2], operands[3]));
DONE;
})
(define_insn_and_split "aarch64_atomic_fetch_<atomic_optab><mode>"
[(set (match_operand:ALLI 0 "register_operand" "=&r")
(match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q"))
(set (match_dup 1)
......@@ -291,6 +354,26 @@
}
)
(define_insn_and_split "aarch64_atomic_fetch_<atomic_optab><mode>_lse"
[(set (match_operand:ALLI 0 "register_operand" "=&r")
(match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q"))
(set (match_dup 1)
(unspec_volatile:ALLI
[(atomic_op:ALLI (match_dup 1)
(match_operand:ALLI 2 "<atomic_op_operand>" "r<const_atomic>"))
(match_operand:SI 3 "const_int_operand")]
UNSPECV_ATOMIC_LDOP))]
"TARGET_LSE"
"#"
"&& reload_completed"
[(const_int 0)]
{
aarch64_gen_atomic_ldop (<CODE>, operands[0], operands[1],
operands[2], operands[3]);
DONE;
}
)
(define_insn_and_split "atomic_fetch_nand<mode>"
[(set (match_operand:ALLI 0 "register_operand" "=&r")
(match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q"))
......
2015-09-22 Matthew Wahab <matthew.wahab@arm.com>
* gcc.target/aarch64/atomic-inst-ldadd.c: New.
* gcc.target/aarch64/atomic-inst-ldlogic.c: New.
2015-09-22 Matthew Wahab <matthew.wahab@arm.com>
* gcc.target/aarch64/atomic-inst-ops.inc: (TEST_MODEL): New.
(TEST_ONE): New.
* gcc.target/aarch64/atomic-inst-swap.c: New.
......
/* { dg-do compile } */
/* { dg-options "-O2 -march=armv8-a+lse" } */
/* Test ARMv8.1-A Load-ADD instruction. */
#include "atomic-inst-ops.inc"
#define TEST TEST_ONE
#define LOAD_ADD(FN, TY, MODEL) \
TY FNNAME (FN, TY) (TY* val, TY* foo) \
{ \
return __atomic_fetch_add (val, foo, MODEL); \
}
#define LOAD_ADD_NORETURN(FN, TY, MODEL) \
void FNNAME (FN, TY) (TY* val, TY* foo) \
{ \
__atomic_fetch_add (val, foo, MODEL); \
}
#define LOAD_SUB(FN, TY, MODEL) \
TY FNNAME (FN, TY) (TY* val, TY* foo) \
{ \
return __atomic_fetch_sub (val, foo, MODEL); \
}
#define LOAD_SUB_NORETURN(FN, TY, MODEL) \
void FNNAME (FN, TY) (TY* val, TY* foo) \
{ \
__atomic_fetch_sub (val, foo, MODEL); \
}
TEST (load_add, LOAD_ADD)
TEST (load_add_notreturn, LOAD_ADD_NORETURN)
TEST (load_sub, LOAD_SUB)
TEST (load_sub_notreturn, LOAD_SUB_NORETURN)
/* { dg-final { scan-assembler-times "ldaddb\t" 8} } */
/* { dg-final { scan-assembler-times "ldaddab\t" 16} } */
/* { dg-final { scan-assembler-times "ldaddlb\t" 8} } */
/* { dg-final { scan-assembler-times "ldaddalb\t" 16} } */
/* { dg-final { scan-assembler-times "ldaddh\t" 8} } */
/* { dg-final { scan-assembler-times "ldaddah\t" 16} } */
/* { dg-final { scan-assembler-times "ldaddlh\t" 8} } */
/* { dg-final { scan-assembler-times "ldaddalh\t" 16} } */
/* { dg-final { scan-assembler-times "ldadd\t" 16} } */
/* { dg-final { scan-assembler-times "ldadda\t" 32} } */
/* { dg-final { scan-assembler-times "ldaddl\t" 16} } */
/* { dg-final { scan-assembler-times "ldaddal\t" 32} } */
/* { dg-final { scan-assembler-not "ldaxr\t" } } */
/* { dg-final { scan-assembler-not "stlxr\t" } } */
/* { dg-final { scan-assembler-not "dmb" } } */
/* { dg-do compile } */
/* { dg-options "-O2 -march=armv8-a+lse" } */
/* Test ARMv8.1-A LD<logic-op> instruction. */
#include "atomic-inst-ops.inc"
#define TEST TEST_ONE
#define LOAD_OR(FN, TY, MODEL) \
TY FNNAME (FN, TY) (TY* val, TY* foo) \
{ \
return __atomic_fetch_or (val, foo, MODEL); \
}
#define LOAD_OR_NORETURN(FN, TY, MODEL) \
void FNNAME (FN, TY) (TY* val, TY* foo) \
{ \
__atomic_fetch_or (val, foo, MODEL); \
}
#define LOAD_AND(FN, TY, MODEL) \
TY FNNAME (FN, TY) (TY* val, TY* foo) \
{ \
return __atomic_fetch_and (val, foo, MODEL); \
}
#define LOAD_AND_NORETURN(FN, TY, MODEL) \
void FNNAME (FN, TY) (TY* val, TY* foo) \
{ \
__atomic_fetch_and (val, foo, MODEL); \
}
#define LOAD_XOR(FN, TY, MODEL) \
TY FNNAME (FN, TY) (TY* val, TY* foo) \
{ \
return __atomic_fetch_xor (val, foo, MODEL); \
}
#define LOAD_XOR_NORETURN(FN, TY, MODEL) \
void FNNAME (FN, TY) (TY* val, TY* foo) \
{ \
__atomic_fetch_xor (val, foo, MODEL); \
}
TEST (load_or, LOAD_OR)
TEST (load_or_notreturn, LOAD_OR_NORETURN)
TEST (load_and, LOAD_AND)
TEST (load_and_notreturn, LOAD_AND_NORETURN)
TEST (load_xor, LOAD_XOR)
TEST (load_xor_notreturn, LOAD_XOR_NORETURN)
/* Load-OR. */
/* { dg-final { scan-assembler-times "ldsetb\t" 4} } */
/* { dg-final { scan-assembler-times "ldsetab\t" 8} } */
/* { dg-final { scan-assembler-times "ldsetlb\t" 4} } */
/* { dg-final { scan-assembler-times "ldsetalb\t" 8} } */
/* { dg-final { scan-assembler-times "ldseth\t" 4} } */
/* { dg-final { scan-assembler-times "ldsetah\t" 8} } */
/* { dg-final { scan-assembler-times "ldsetlh\t" 4} } */
/* { dg-final { scan-assembler-times "ldsetalh\t" 8} } */
/* { dg-final { scan-assembler-times "ldset\t" 8} } */
/* { dg-final { scan-assembler-times "ldseta\t" 16} } */
/* { dg-final { scan-assembler-times "ldsetl\t" 8} } */
/* { dg-final { scan-assembler-times "ldsetal\t" 16} } */
/* Load-AND. */
/* { dg-final { scan-assembler-times "ldclrb\t" 4} } */
/* { dg-final { scan-assembler-times "ldclrab\t" 8} } */
/* { dg-final { scan-assembler-times "ldclrlb\t" 4} } */
/* { dg-final { scan-assembler-times "ldclralb\t" 8} } */
/* { dg-final { scan-assembler-times "ldclrh\t" 4} } */
/* { dg-final { scan-assembler-times "ldclrah\t" 8} } */
/* { dg-final { scan-assembler-times "ldclrlh\t" 4} } */
/* { dg-final { scan-assembler-times "ldclralh\t" 8} } */
/* { dg-final { scan-assembler-times "ldclr\t" 8} */
/* { dg-final { scan-assembler-times "ldclra\t" 16} } */
/* { dg-final { scan-assembler-times "ldclrl\t" 8} } */
/* { dg-final { scan-assembler-times "ldclral\t" 16} } */
/* Load-XOR. */
/* { dg-final { scan-assembler-times "ldeorb\t" 4} } */
/* { dg-final { scan-assembler-times "ldeorab\t" 8} } */
/* { dg-final { scan-assembler-times "ldeorlb\t" 4} } */
/* { dg-final { scan-assembler-times "ldeoralb\t" 8} } */
/* { dg-final { scan-assembler-times "ldeorh\t" 4} } */
/* { dg-final { scan-assembler-times "ldeorah\t" 8} } */
/* { dg-final { scan-assembler-times "ldeorlh\t" 4} } */
/* { dg-final { scan-assembler-times "ldeoralh\t" 8} } */
/* { dg-final { scan-assembler-times "ldeor\t" 8} */
/* { dg-final { scan-assembler-times "ldeora\t" 16} } */
/* { dg-final { scan-assembler-times "ldeorl\t" 8} } */
/* { dg-final { scan-assembler-times "ldeoral\t" 16} } */
/* { dg-final { scan-assembler-not "ldaxr\t" } } */
/* { dg-final { scan-assembler-not "stlxr\t" } } */
/* { dg-final { scan-assembler-not "dmb" } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment