Commit 78ce265b by Richard Henderson Committed by Richard Henderson

s390: Convert from sync to atomic optabs

        * config/s390/s390.c (s390_emit_compare_and_swap): Update for
        atomic_compare_and_swapsi_internal name change.
        (s390_expand_insv): Try RISBG last, not requiring operands in
        registers but forcing them there.  Try a limited form of ICM.
        (s390_two_part_insv): New.
        (s390_expand_cs_hqi): Use it.  Update for atomic optabs.
        * config/s390/s390.md (UNSPEC_MOVA): New.
        (TDSI): Remove mode iterator.
        (DGPR, DINT, TDI): New mode iterators.
        (td): New mode attribute.
        (mem_signal_fence): New.
        (mem_thread_fence): Rename from memory_barrier; update for model arg.
        (mem_thread_fence_1): Rename from *memory_barrier.
        (atomic_load<DINT>, atomic_store<DINT>): New expanders.
        (atomic_loaddi_1, atomic_loadti_1): New.
        (atomic_storedi_1, atomic_storeti_1): New.
        (sync_compare_and_swap<TDSI>, sync_compare_and_swap<HQI>): Remove.
        (atomic_compare_and_swap<DGPR>, atomic_compare_and_swap<HQI>): New.
        (atomic_compare_and_swap<DGPR>_internal): New.
        (*sync_compare_and_swap<DW>, *sync_compare_and_swap<GPR>): Remove.
        (*atomic_compare_and_swap<TDI>_1): New.
        (*atomic_compare_and_swapdi_2, *atomic_compare_and_swapsi_3): New.
        (sync_<ATOMIC_Z196><GPR>, sync_old_<ATOMIC_Z196><GPR>): Remove.
        (atomic_fetch_<ATOMIC_Z196><GPR>): New.
        (atomic_fetch_<ATOMC_Z196><GPR>_iaf): New.
        (sync_<ATOMIC><HQI>, sync_old_<ATOMIC><HQI>): Remove.
        (sync_new_<ATOMIC><HQI>): Remove.
        (atomic_<ATOMIC><HQI>, atomic_fetch_<ATOMIC><HQI>): New.
        (atomic_<ATOMIC>_fetch<HQI>): New.
        (sync_lock_test_and_set<HQI>): Remove.
        (atomic_exchange<HQI>): New.

From-SVN: r190209
parent 3e6011cf
2012-08-07 Richard Henderson <rth@redhat.com>
* config/s390/s390.c (s390_emit_compare_and_swap): Update for
atomic_compare_and_swapsi_internal name change.
(s390_expand_insv): Try RISBG last, not requiring operands in
registers but forcing them there. Try a limited form of ICM.
(s390_two_part_insv): New.
(s390_expand_cs_hqi): Use it. Update for atomic optabs.
* config/s390/s390.md (UNSPEC_MOVA): New.
(TDSI): Remove mode iterator.
(DGPR, DINT, TDI): New mode iterators.
(td): New mode attribute.
(mem_signal_fence): New.
(mem_thread_fence): Rename from memory_barrier; update for model arg.
(mem_thread_fence_1): Rename from *memory_barrier.
(atomic_load<DINT>, atomic_store<DINT>): New expanders.
(atomic_loaddi_1, atomic_loadti_1): New.
(atomic_storedi_1, atomic_storeti_1): New.
(sync_compare_and_swap<TDSI>, sync_compare_and_swap<HQI>): Remove.
(atomic_compare_and_swap<DGPR>, atomic_compare_and_swap<HQI>): New.
(atomic_compare_and_swap<DGPR>_internal): New.
(*sync_compare_and_swap<DW>, *sync_compare_and_swap<GPR>): Remove.
(*atomic_compare_and_swap<TDI>_1): New.
(*atomic_compare_and_swapdi_2, *atomic_compare_and_swapsi_3): New.
(sync_<ATOMIC_Z196><GPR>, sync_old_<ATOMIC_Z196><GPR>): Remove.
(atomic_fetch_<ATOMIC_Z196><GPR>): New.
(atomic_fetch_<ATOMC_Z196><GPR>_iaf): New.
(sync_<ATOMIC><HQI>, sync_old_<ATOMIC><HQI>): Remove.
(sync_new_<ATOMIC><HQI>): Remove.
(atomic_<ATOMIC><HQI>, atomic_fetch_<ATOMIC><HQI>): New.
(atomic_<ATOMIC>_fetch<HQI>): New.
(sync_lock_test_and_set<HQI>): Remove.
(atomic_exchange<HQI>): New.
2012-08-07 Paul Brook <paul@codesourcery.com> 2012-08-07 Paul Brook <paul@codesourcery.com>
Sandra Loosemore <sandra@codesourcery.com> Sandra Loosemore <sandra@codesourcery.com>
......
...@@ -85,7 +85,8 @@ extern void s390_expand_setmem (rtx, rtx, rtx); ...@@ -85,7 +85,8 @@ extern void s390_expand_setmem (rtx, rtx, rtx);
extern bool s390_expand_cmpmem (rtx, rtx, rtx, rtx); extern bool s390_expand_cmpmem (rtx, rtx, rtx, rtx);
extern bool s390_expand_addcc (enum rtx_code, rtx, rtx, rtx, rtx, rtx); extern bool s390_expand_addcc (enum rtx_code, rtx, rtx, rtx, rtx, rtx);
extern bool s390_expand_insv (rtx, rtx, rtx, rtx); extern bool s390_expand_insv (rtx, rtx, rtx, rtx);
extern void s390_expand_cs_hqi (enum machine_mode, rtx, rtx, rtx, rtx); extern void s390_expand_cs_hqi (enum machine_mode, rtx, rtx, rtx,
rtx, rtx, bool);
extern void s390_expand_atomic (enum machine_mode, enum rtx_code, extern void s390_expand_atomic (enum machine_mode, enum rtx_code,
rtx, rtx, rtx, bool); rtx, rtx, rtx, bool);
extern rtx s390_return_addr_rtx (int, rtx); extern rtx s390_return_addr_rtx (int, rtx);
......
...@@ -896,10 +896,12 @@ s390_emit_compare (enum rtx_code code, rtx op0, rtx op1) ...@@ -896,10 +896,12 @@ s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
conditional branch testing the result. */ conditional branch testing the result. */
static rtx static rtx
s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem, rtx cmp, rtx new_rtx) s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem,
rtx cmp, rtx new_rtx)
{ {
emit_insn (gen_sync_compare_and_swapsi (old, mem, cmp, new_rtx)); emit_insn (gen_atomic_compare_and_swapsi_internal (old, mem, cmp, new_rtx));
return s390_emit_compare (code, gen_rtx_REG (CCZ1mode, CC_REGNUM), const0_rtx); return s390_emit_compare (code, gen_rtx_REG (CCZ1mode, CC_REGNUM),
const0_rtx);
} }
/* Emit a jump instruction to TARGET. If COND is NULL_RTX, emit an /* Emit a jump instruction to TARGET. If COND is NULL_RTX, emit an
...@@ -4548,106 +4550,146 @@ s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src) ...@@ -4548,106 +4550,146 @@ s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
{ {
int bitsize = INTVAL (op1); int bitsize = INTVAL (op1);
int bitpos = INTVAL (op2); int bitpos = INTVAL (op2);
enum machine_mode mode = GET_MODE (dest);
enum machine_mode smode;
int smode_bsize, mode_bsize;
rtx op, clobber;
/* On z10 we can use the risbg instruction to implement insv. */ /* Generate INSERT IMMEDIATE (IILL et al). */
if (TARGET_Z10 /* (set (ze (reg)) (const_int)). */
&& ((GET_MODE (dest) == DImode && GET_MODE (src) == DImode) if (TARGET_ZARCH
|| (GET_MODE (dest) == SImode && GET_MODE (src) == SImode))) && register_operand (dest, word_mode)
&& (bitpos % 16) == 0
&& (bitsize % 16) == 0
&& const_int_operand (src, VOIDmode))
{ {
rtx op; HOST_WIDE_INT val = INTVAL (src);
rtx clobber; int regpos = bitpos + bitsize;
op = gen_rtx_SET (GET_MODE(src), while (regpos > bitpos)
gen_rtx_ZERO_EXTRACT (GET_MODE (dest), dest, op1, op2), {
src); enum machine_mode putmode;
clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM)); int putsize;
emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber)));
if (TARGET_EXTIMM && (regpos % 32 == 0) && (regpos >= bitpos + 32))
putmode = SImode;
else
putmode = HImode;
putsize = GET_MODE_BITSIZE (putmode);
regpos -= putsize;
emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
GEN_INT (putsize),
GEN_INT (regpos)),
gen_int_mode (val, putmode));
val >>= putsize;
}
gcc_assert (regpos == bitpos);
return true; return true;
} }
/* We need byte alignment. */ smode = smallest_mode_for_size (bitsize, MODE_INT);
if (bitsize % BITS_PER_UNIT) smode_bsize = GET_MODE_BITSIZE (smode);
return false; mode_bsize = GET_MODE_BITSIZE (mode);
/* Generate STORE CHARACTERS UNDER MASK (STCM et al). */
if (bitpos == 0 if (bitpos == 0
&& memory_operand (dest, VOIDmode) && (bitsize % BITS_PER_UNIT) == 0
&& MEM_P (dest)
&& (register_operand (src, word_mode) && (register_operand (src, word_mode)
|| const_int_operand (src, VOIDmode))) || const_int_operand (src, VOIDmode)))
{ {
/* Emit standard pattern if possible. */ /* Emit standard pattern if possible. */
enum machine_mode mode = smallest_mode_for_size (bitsize, MODE_INT); if (smode_bsize == bitsize)
if (GET_MODE_BITSIZE (mode) == bitsize) {
emit_move_insn (adjust_address (dest, mode, 0), gen_lowpart (mode, src)); emit_move_insn (adjust_address (dest, smode, 0),
gen_lowpart (smode, src));
return true;
}
/* (set (ze (mem)) (const_int)). */ /* (set (ze (mem)) (const_int)). */
else if (const_int_operand (src, VOIDmode)) else if (const_int_operand (src, VOIDmode))
{ {
int size = bitsize / BITS_PER_UNIT; int size = bitsize / BITS_PER_UNIT;
rtx src_mem = adjust_address (force_const_mem (word_mode, src), BLKmode, rtx src_mem = adjust_address (force_const_mem (word_mode, src),
GET_MODE_SIZE (word_mode) - size); BLKmode,
UNITS_PER_WORD - size);
dest = adjust_address (dest, BLKmode, 0); dest = adjust_address (dest, BLKmode, 0);
set_mem_size (dest, size); set_mem_size (dest, size);
s390_expand_movmem (dest, src_mem, GEN_INT (size)); s390_expand_movmem (dest, src_mem, GEN_INT (size));
return true;
} }
/* (set (ze (mem)) (reg)). */ /* (set (ze (mem)) (reg)). */
else if (register_operand (src, word_mode)) else if (register_operand (src, word_mode))
{ {
if (bitsize <= GET_MODE_BITSIZE (SImode)) if (bitsize <= 32)
emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, op1, emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, op1,
const0_rtx), src); const0_rtx), src);
else else
{ {
/* Emit st,stcmh sequence. */ /* Emit st,stcmh sequence. */
int stcmh_width = bitsize - GET_MODE_BITSIZE (SImode); int stcmh_width = bitsize - 32;
int size = stcmh_width / BITS_PER_UNIT; int size = stcmh_width / BITS_PER_UNIT;
emit_move_insn (adjust_address (dest, SImode, size), emit_move_insn (adjust_address (dest, SImode, size),
gen_lowpart (SImode, src)); gen_lowpart (SImode, src));
set_mem_size (dest, size); set_mem_size (dest, size);
emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, GEN_INT emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
(stcmh_width), const0_rtx), GEN_INT (stcmh_width),
gen_rtx_LSHIFTRT (word_mode, src, GEN_INT const0_rtx),
(GET_MODE_BITSIZE (SImode)))); gen_rtx_LSHIFTRT (word_mode, src, GEN_INT (32)));
}
return true;
} }
} }
else
return false;
/* Generate INSERT CHARACTERS UNDER MASK (IC, ICM et al). */
if ((bitpos % BITS_PER_UNIT) == 0
&& (bitsize % BITS_PER_UNIT) == 0
&& (bitpos & 32) == ((bitpos + bitsize - 1) & 32)
&& MEM_P (src)
&& (mode == DImode || mode == SImode)
&& register_operand (dest, mode))
{
/* Emit a strict_low_part pattern if possible. */
if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize)
{
op = gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (smode, dest));
op = gen_rtx_SET (VOIDmode, op, gen_lowpart (smode, src));
clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber)));
return true; return true;
} }
/* (set (ze (reg)) (const_int)). */ /* ??? There are more powerful versions of ICM that are not
if (TARGET_ZARCH completely represented in the md file. */
&& register_operand (dest, word_mode) }
&& (bitpos % 16) == 0
&& (bitsize % 16) == 0 /* For z10, generate ROTATE THEN INSERT SELECTED BITS (RISBG et al). */
&& const_int_operand (src, VOIDmode)) if (TARGET_Z10 && (mode == DImode || mode == SImode))
{ {
HOST_WIDE_INT val = INTVAL (src); enum machine_mode mode_s = GET_MODE (src);
int regpos = bitpos + bitsize;
while (regpos > bitpos) if (mode_s == VOIDmode)
{ {
enum machine_mode putmode; /* Assume const_int etc already in the proper mode. */
int putsize; src = force_reg (mode, src);
}
else if (mode_s != mode)
{
gcc_assert (GET_MODE_BITSIZE (mode_s) >= bitsize);
src = force_reg (mode_s, src);
src = gen_lowpart (mode, src);
}
if (TARGET_EXTIMM && (regpos % 32 == 0) && (regpos >= bitpos + 32)) op = gen_rtx_SET (mode,
putmode = SImode; gen_rtx_ZERO_EXTRACT (mode, dest, op1, op2),
else src);
putmode = HImode; clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber)));
putsize = GET_MODE_BITSIZE (putmode);
regpos -= putsize;
emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
GEN_INT (putsize),
GEN_INT (regpos)),
gen_int_mode (val, putmode));
val >>= putsize;
}
gcc_assert (regpos == bitpos);
return true; return true;
} }
...@@ -4717,91 +4759,133 @@ init_alignment_context (struct alignment_context *ac, rtx mem, ...@@ -4717,91 +4759,133 @@ init_alignment_context (struct alignment_context *ac, rtx mem,
/* As we already have some offset, evaluate the remaining distance. */ /* As we already have some offset, evaluate the remaining distance. */
ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset, ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset,
NULL_RTX, 1, OPTAB_DIRECT); NULL_RTX, 1, OPTAB_DIRECT);
} }
/* Shift is the byte count, but we need the bitcount. */ /* Shift is the byte count, but we need the bitcount. */
ac->shift = expand_simple_binop (SImode, MULT, ac->shift, GEN_INT (BITS_PER_UNIT), ac->shift = expand_simple_binop (SImode, ASHIFT, ac->shift, GEN_INT (3),
NULL_RTX, 1, OPTAB_DIRECT); NULL_RTX, 1, OPTAB_DIRECT);
/* Calculate masks. */ /* Calculate masks. */
ac->modemask = expand_simple_binop (SImode, ASHIFT, ac->modemask = expand_simple_binop (SImode, ASHIFT,
GEN_INT (GET_MODE_MASK (mode)), ac->shift, GEN_INT (GET_MODE_MASK (mode)),
NULL_RTX, 1, OPTAB_DIRECT); ac->shift, NULL_RTX, 1, OPTAB_DIRECT);
ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask, NULL_RTX, 1); ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask,
NULL_RTX, 1);
}
/* A subroutine of s390_expand_cs_hqi. Insert INS into VAL. If possible,
use a single insv insn into SEQ2. Otherwise, put prep insns in SEQ1 and
perform the merge in SEQ2. */
static rtx
s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
enum machine_mode mode, rtx val, rtx ins)
{
rtx tmp;
if (ac->aligned)
{
start_sequence ();
tmp = copy_to_mode_reg (SImode, val);
if (s390_expand_insv (tmp, GEN_INT (GET_MODE_BITSIZE (mode)),
const0_rtx, ins))
{
*seq1 = NULL;
*seq2 = get_insns ();
end_sequence ();
return tmp;
}
end_sequence ();
}
/* Failed to use insv. Generate a two part shift and mask. */
start_sequence ();
tmp = s390_expand_mask_and_shift (ins, mode, ac->shift);
*seq1 = get_insns ();
end_sequence ();
start_sequence ();
tmp = expand_simple_binop (SImode, IOR, tmp, val, NULL_RTX, 1, OPTAB_DIRECT);
*seq2 = get_insns ();
end_sequence ();
return tmp;
} }
/* Expand an atomic compare and swap operation for HImode and QImode. MEM is /* Expand an atomic compare and swap operation for HImode and QImode. MEM is
the memory location, CMP the old value to compare MEM with and NEW_RTX the value the memory location, CMP the old value to compare MEM with and NEW_RTX the
to set if CMP == MEM. value to set if CMP == MEM. */
CMP is never in memory for compare_and_swap_cc because
expand_bool_compare_and_swap puts it into a register for later compare. */
void void
s390_expand_cs_hqi (enum machine_mode mode, rtx target, rtx mem, rtx cmp, rtx new_rtx) s390_expand_cs_hqi (enum machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
rtx cmp, rtx new_rtx, bool is_weak)
{ {
struct alignment_context ac; struct alignment_context ac;
rtx cmpv, newv, val, resv, cc; rtx cmpv, newv, val, resv, cc, seq0, seq1, seq2, seq3;
rtx res = gen_reg_rtx (SImode); rtx res = gen_reg_rtx (SImode);
rtx csloop = gen_label_rtx (); rtx csloop = NULL, csend = NULL;
rtx csend = gen_label_rtx ();
gcc_assert (register_operand (target, VOIDmode)); gcc_assert (register_operand (vtarget, VOIDmode));
gcc_assert (MEM_P (mem)); gcc_assert (MEM_P (mem));
init_alignment_context (&ac, mem, mode); init_alignment_context (&ac, mem, mode);
/* Shift the values to the correct bit positions. */
if (!(ac.aligned && MEM_P (cmp)))
cmp = s390_expand_mask_and_shift (cmp, mode, ac.shift);
if (!(ac.aligned && MEM_P (new_rtx)))
new_rtx = s390_expand_mask_and_shift (new_rtx, mode, ac.shift);
/* Load full word. Subsequent loads are performed by CS. */ /* Load full word. Subsequent loads are performed by CS. */
val = expand_simple_binop (SImode, AND, ac.memsi, ac.modemaski, val = expand_simple_binop (SImode, AND, ac.memsi, ac.modemaski,
NULL_RTX, 1, OPTAB_DIRECT); NULL_RTX, 1, OPTAB_DIRECT);
/* Prepare insertions of cmp and new_rtx into the loaded value. When
possible, we try to use insv to make this happen efficiently. If
that fails we'll generate code both inside and outside the loop. */
cmpv = s390_two_part_insv (&ac, &seq0, &seq2, mode, val, cmp);
newv = s390_two_part_insv (&ac, &seq1, &seq3, mode, val, new_rtx);
if (seq0)
emit_insn (seq0);
if (seq1)
emit_insn (seq1);
/* Start CS loop. */ /* Start CS loop. */
if (!is_weak)
{
/* Begin assuming success. */
emit_move_insn (btarget, const1_rtx);
csloop = gen_label_rtx ();
csend = gen_label_rtx ();
emit_label (csloop); emit_label (csloop);
}
/* val = "<mem>00..0<mem>" /* val = "<mem>00..0<mem>"
* cmp = "00..0<cmp>00..0" * cmp = "00..0<cmp>00..0"
* new = "00..0<new>00..0" * new = "00..0<new>00..0"
*/ */
/* Patch cmp and new with val at correct position. */ emit_insn (seq2);
if (ac.aligned && MEM_P (cmp)) emit_insn (seq3);
{
cmpv = force_reg (SImode, val); cc = s390_emit_compare_and_swap (EQ, res, ac.memsi, cmpv, newv);
store_bit_field (cmpv, GET_MODE_BITSIZE (mode), 0, if (is_weak)
0, 0, SImode, cmp); emit_insn (gen_cstorecc4 (btarget, cc, XEXP (cc, 0), XEXP (cc, 1)));
}
else else
cmpv = force_reg (SImode, expand_simple_binop (SImode, IOR, cmp, val,
NULL_RTX, 1, OPTAB_DIRECT));
if (ac.aligned && MEM_P (new_rtx))
{ {
newv = force_reg (SImode, val);
store_bit_field (newv, GET_MODE_BITSIZE (mode), 0,
0, 0, SImode, new_rtx);
}
else
newv = force_reg (SImode, expand_simple_binop (SImode, IOR, new_rtx, val,
NULL_RTX, 1, OPTAB_DIRECT));
/* Jump to end if we're done (likely?). */ /* Jump to end if we're done (likely?). */
s390_emit_jump (csend, s390_emit_compare_and_swap (EQ, res, ac.memsi, s390_emit_jump (csend, cc);
cmpv, newv));
/* Check for changes outside mode. */ /* Check for changes outside mode, and loop internal if so. */
resv = expand_simple_binop (SImode, AND, res, ac.modemaski, resv = expand_simple_binop (SImode, AND, res, ac.modemaski,
NULL_RTX, 1, OPTAB_DIRECT); NULL_RTX, 1, OPTAB_DIRECT);
cc = s390_emit_compare (NE, resv, val); cc = s390_emit_compare (NE, resv, val);
emit_move_insn (val, resv); emit_move_insn (val, resv);
/* Loop internal if so. */
s390_emit_jump (csloop, cc); s390_emit_jump (csloop, cc);
/* Failed. */
emit_move_insn (btarget, const0_rtx);
emit_label (csend); emit_label (csend);
}
/* Return the correct part of the bitfield. */ /* Return the correct part of the bitfield. */
convert_move (target, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift, convert_move (vtarget, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
NULL_RTX, 1, OPTAB_DIRECT), 1); NULL_RTX, 1, OPTAB_DIRECT), 1);
} }
......
...@@ -84,6 +84,7 @@ ...@@ -84,6 +84,7 @@
; Atomic Support ; Atomic Support
UNSPEC_MB UNSPEC_MB
UNSPEC_MOVA
; TLS relocation specifiers ; TLS relocation specifiers
UNSPEC_TLSGD UNSPEC_TLSGD
...@@ -349,21 +350,19 @@ ...@@ -349,21 +350,19 @@
(define_mode_iterator DD_DF [DF DD]) (define_mode_iterator DD_DF [DF DD])
(define_mode_iterator TD_TF [TF TD]) (define_mode_iterator TD_TF [TF TD])
;; This mode iterator allows 31-bit and 64-bit TDSI patterns to be generated
;; from the same template.
(define_mode_iterator TDSI [(TI "TARGET_64BIT") DI SI])
;; These mode iterators allow 31-bit and 64-bit GPR patterns to be generated ;; These mode iterators allow 31-bit and 64-bit GPR patterns to be generated
;; from the same template. ;; from the same template.
(define_mode_iterator GPR [(DI "TARGET_ZARCH") SI]) (define_mode_iterator GPR [(DI "TARGET_ZARCH") SI])
(define_mode_iterator DGPR [(TI "TARGET_ZARCH") DI SI])
(define_mode_iterator DSI [DI SI]) (define_mode_iterator DSI [DI SI])
(define_mode_iterator TDI [TI DI])
;; These mode iterators allow :P to be used for patterns that operate on ;; These mode iterators allow :P to be used for patterns that operate on
;; pointer-sized quantities. Exactly one of the two alternatives will match. ;; pointer-sized quantities. Exactly one of the two alternatives will match.
(define_mode_iterator P [(DI "TARGET_64BIT") (SI "!TARGET_64BIT")]) (define_mode_iterator P [(DI "TARGET_64BIT") (SI "!TARGET_64BIT")])
;; These macros refer to the actual word_mode of the configuration. This is equal ;; These macros refer to the actual word_mode of the configuration.
;; to Pmode except on 31-bit machines in zarch mode. ;; This is equal to Pmode except on 31-bit machines in zarch mode.
(define_mode_iterator DW [(TI "TARGET_ZARCH") (DI "!TARGET_ZARCH")]) (define_mode_iterator DW [(TI "TARGET_ZARCH") (DI "!TARGET_ZARCH")])
(define_mode_iterator W [(DI "TARGET_ZARCH") (SI "!TARGET_ZARCH")]) (define_mode_iterator W [(DI "TARGET_ZARCH") (SI "!TARGET_ZARCH")])
...@@ -379,6 +378,7 @@ ...@@ -379,6 +378,7 @@
;; same template. ;; same template.
(define_mode_iterator INT [(DI "TARGET_ZARCH") SI HI QI]) (define_mode_iterator INT [(DI "TARGET_ZARCH") SI HI QI])
(define_mode_iterator INTALL [TI DI SI HI QI]) (define_mode_iterator INTALL [TI DI SI HI QI])
(define_mode_iterator DINT [(TI "TARGET_ZARCH") DI SI HI QI])
;; This iterator allows some 'ashift' and 'lshiftrt' pattern to be defined from ;; This iterator allows some 'ashift' and 'lshiftrt' pattern to be defined from
;; the same template. ;; the same template.
...@@ -487,6 +487,9 @@ ...@@ -487,6 +487,9 @@
;; and "cds" in DImode. ;; and "cds" in DImode.
(define_mode_attr tg [(TI "g") (DI "")]) (define_mode_attr tg [(TI "g") (DI "")])
;; In TDI templates, a string like "c<d>sg".
(define_mode_attr td [(TI "d") (DI "")])
;; In GPR templates, a string like "c<gf>dbr" will expand to "cgdbr" in DImode ;; In GPR templates, a string like "c<gf>dbr" will expand to "cgdbr" in DImode
;; and "cfdbr" in SImode. ;; and "cfdbr" in SImode.
(define_mode_attr gf [(DI "g") (SI "f")]) (define_mode_attr gf [(DI "g") (SI "f")])
...@@ -8739,126 +8742,258 @@ ...@@ -8739,126 +8742,258 @@
;; ;;
; ;
; memory barrier pattern. ; memory barrier patterns.
; ;
(define_expand "memory_barrier" (define_expand "mem_signal_fence"
[(set (match_dup 0) [(match_operand:SI 0 "const_int_operand")] ;; model
(unspec:BLK [(match_dup 0)] UNSPEC_MB))]
"" ""
{ {
operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); /* The s390 memory model is strong enough not to require any
MEM_VOLATILE_P (operands[0]) = 1; barrier in order to synchronize a thread with itself. */
DONE;
})
(define_expand "mem_thread_fence"
[(match_operand:SI 0 "const_int_operand")] ;; model
""
{
/* Unless this is a SEQ_CST fence, the s390 memory model is strong
enough not to require barriers of any kind. */
if (INTVAL (operands[0]) == MEMMODEL_SEQ_CST)
{
rtx mem = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
MEM_VOLATILE_P (mem) = 1;
emit_insn (gen_mem_thread_fence_1 (mem));
}
DONE;
}) })
(define_insn "*memory_barrier" ; Although bcr is superscalar on Z10, this variant will never
; become part of an execution group.
(define_insn "mem_thread_fence_1"
[(set (match_operand:BLK 0 "" "") [(set (match_operand:BLK 0 "" "")
(unspec:BLK [(match_dup 0)] UNSPEC_MB))] (unspec:BLK [(match_dup 0)] UNSPEC_MB))]
"" ""
"bcr\t15,0" "bcr\t15,0"
[(set_attr "op_type" "RR")]) [(set_attr "op_type" "RR")])
; Although bcr is superscalar on Z10, this variant will never become part of ;
; an execution group. ; atomic load/store operations
;
; Atomic loads need not examine the memory model at all.
(define_expand "atomic_load<mode>"
[(match_operand:DINT 0 "register_operand") ;; output
(match_operand:DINT 1 "memory_operand") ;; memory
(match_operand:SI 2 "const_int_operand")] ;; model
""
{
if (<MODE>mode == TImode)
emit_insn (gen_atomic_loadti_1 (operands[0], operands[1]));
else if (<MODE>mode == DImode && !TARGET_ZARCH)
emit_insn (gen_atomic_loaddi_1 (operands[0], operands[1]));
else
emit_move_insn (operands[0], operands[1]);
DONE;
})
; Different from movdi_31 in that we want no splitters.
(define_insn "atomic_loaddi_1"
[(set (match_operand:DI 0 "register_operand" "=d,d,!*f,!*f")
(unspec:DI [(match_operand:DI 1 "memory_operand" "Q,S,R,T")]
UNSPEC_MOVA))]
"!TARGET_ZARCH"
"@
lm\t%0,%M0,%S1
lmy\t%0,%M0,%S1
ld\t%0,%1
ldy\t%0,%1"
[(set_attr "op_type" "RS,RSY,RS,RSY")
(set_attr "type" "lm,lm,floaddf,floaddf")])
(define_insn "atomic_loadti_1"
[(set (match_operand:TI 0 "register_operand" "=r")
(unspec:TI [(match_operand:TI 1 "memory_operand" "RT")]
UNSPEC_MOVA))]
"TARGET_ZARCH"
"lpq\t%0,%1"
[(set_attr "op_type" "RXY")
(set_attr "type" "other")])
; Atomic stores must(?) enforce sequential consistency.
(define_expand "atomic_store<mode>"
[(match_operand:DINT 0 "memory_operand") ;; memory
(match_operand:DINT 1 "register_operand") ;; input
(match_operand:SI 2 "const_int_operand")] ;; model
""
{
enum memmodel model = (enum memmodel) INTVAL (operands[2]);
if (<MODE>mode == TImode)
emit_insn (gen_atomic_storeti_1 (operands[0], operands[1]));
else if (<MODE>mode == DImode && !TARGET_ZARCH)
emit_insn (gen_atomic_storedi_1 (operands[0], operands[1]));
else
emit_move_insn (operands[0], operands[1]);
if (model == MEMMODEL_SEQ_CST)
emit_insn (gen_mem_thread_fence (operands[2]));
DONE;
})
; Different from movdi_31 in that we want no splitters.
(define_insn "atomic_storedi_1"
[(set (match_operand:DI 0 "memory_operand" "=Q,S,R,T")
(unspec:DI [(match_operand:DI 1 "register_operand" "d,d,!*f,!*f")]
UNSPEC_MOVA))]
"!TARGET_ZARCH"
"@
stm\t%1,%N1,%S0
stmy\t%1,%N1,%S0
std %1,%0
stdy %1,%0"
[(set_attr "op_type" "RS,RSY,RS,RSY")
(set_attr "type" "stm,stm,fstoredf,fstoredf")])
(define_insn "atomic_storeti_1"
[(set (match_operand:TI 0 "memory_operand" "=RT")
(unspec:TI [(match_operand:TI 1 "register_operand" "r")]
UNSPEC_MOVA))]
"TARGET_ZARCH"
"stpq\t%1,%0"
[(set_attr "op_type" "RXY")
(set_attr "type" "other")])
; ;
; compare and swap patterns. ; compare and swap patterns.
; ;
(define_expand "sync_compare_and_swap<mode>" (define_expand "atomic_compare_and_swap<mode>"
[(match_operand:SI 0 "register_operand") ;; bool success output
(match_operand:DGPR 1 "register_operand") ;; oldval output
(match_operand:DGPR 2 "memory_operand") ;; memory
(match_operand:DGPR 3 "register_operand") ;; expected intput
(match_operand:DGPR 4 "register_operand") ;; newval intput
(match_operand:SI 5 "const_int_operand") ;; is_weak
(match_operand:SI 6 "const_int_operand") ;; success model
(match_operand:SI 7 "const_int_operand")] ;; failure model
""
{
rtx cc, cmp;
emit_insn (gen_atomic_compare_and_swap<mode>_internal
(operands[1], operands[2], operands[3], operands[4]));
cc = gen_rtx_REG (CCZ1mode, CC_REGNUM);
cmp = gen_rtx_EQ (SImode, cc, const0_rtx);
emit_insn (gen_cstorecc4 (operands[0], cmp, cc, const0_rtx));
DONE;
})
(define_expand "atomic_compare_and_swap<mode>"
[(match_operand:SI 0 "register_operand") ;; bool success output
(match_operand:HQI 1 "register_operand") ;; oldval output
(match_operand:HQI 2 "memory_operand") ;; memory
(match_operand:HQI 3 "general_operand") ;; expected intput
(match_operand:HQI 4 "general_operand") ;; newval intput
(match_operand:SI 5 "const_int_operand") ;; is_weak
(match_operand:SI 6 "const_int_operand") ;; success model
(match_operand:SI 7 "const_int_operand")] ;; failure model
""
{
s390_expand_cs_hqi (<MODE>mode, operands[0], operands[1], operands[2],
operands[3], operands[4], INTVAL (operands[5]));
DONE;
})
(define_expand "atomic_compare_and_swap<mode>_internal"
[(parallel [(parallel
[(set (match_operand:TDSI 0 "register_operand" "") [(set (match_operand:DGPR 0 "register_operand")
(match_operand:TDSI 1 "memory_operand" "")) (match_operand:DGPR 1 "memory_operand"))
(set (match_dup 1) (set (match_dup 1)
(unspec_volatile:TDSI (unspec_volatile:DGPR
[(match_dup 1) [(match_dup 1)
(match_operand:TDSI 2 "register_operand" "") (match_operand:DGPR 2 "register_operand")
(match_operand:TDSI 3 "register_operand" "")] (match_operand:DGPR 3 "register_operand")]
UNSPECV_CAS)) UNSPECV_CAS))
(set (reg:CCZ1 CC_REGNUM) (set (reg:CCZ1 CC_REGNUM)
(compare:CCZ1 (match_dup 1) (match_dup 2)))])] (compare:CCZ1 (match_dup 1) (match_dup 2)))])]
"") "")
(define_expand "sync_compare_and_swap<mode>" ; cdsg, csg
[(parallel (define_insn "*atomic_compare_and_swap<mode>_1"
[(set (match_operand:HQI 0 "register_operand" "") [(set (match_operand:TDI 0 "register_operand" "=r")
(match_operand:HQI 1 "memory_operand" "")) (match_operand:TDI 1 "memory_operand" "+QS"))
(set (match_dup 1) (set (match_dup 1)
(unspec_volatile:HQI (unspec_volatile:TDI
[(match_dup 1) [(match_dup 1)
(match_operand:HQI 2 "general_operand" "") (match_operand:TDI 2 "register_operand" "0")
(match_operand:HQI 3 "general_operand" "")] (match_operand:TDI 3 "register_operand" "r")]
UNSPECV_CAS)) UNSPECV_CAS))
(clobber (reg:CC CC_REGNUM))])] (set (reg:CCZ1 CC_REGNUM)
"" (compare:CCZ1 (match_dup 1) (match_dup 2)))]
"s390_expand_cs_hqi (<MODE>mode, operands[0], operands[1], "TARGET_ZARCH"
operands[2], operands[3]); DONE;") "c<td>sg\t%0,%3,%S1"
[(set_attr "op_type" "RSY")
(set_attr "type" "sem")])
; cds, cdsg ; cds, cdsy
(define_insn "*sync_compare_and_swap<mode>" (define_insn "*atomic_compare_and_swapdi_2"
[(set (match_operand:DW 0 "register_operand" "=r") [(set (match_operand:DI 0 "register_operand" "=r,r")
(match_operand:DW 1 "memory_operand" "+Q")) (match_operand:DI 1 "memory_operand" "+Q,S"))
(set (match_dup 1) (set (match_dup 1)
(unspec_volatile:DW (unspec_volatile:DI
[(match_dup 1) [(match_dup 1)
(match_operand:DW 2 "register_operand" "0") (match_operand:DI 2 "register_operand" "0,0")
(match_operand:DW 3 "register_operand" "r")] (match_operand:DI 3 "register_operand" "r,r")]
UNSPECV_CAS)) UNSPECV_CAS))
(set (reg:CCZ1 CC_REGNUM) (set (reg:CCZ1 CC_REGNUM)
(compare:CCZ1 (match_dup 1) (match_dup 2)))] (compare:CCZ1 (match_dup 1) (match_dup 2)))]
"" "!TARGET_ZARCH"
"cds<tg>\t%0,%3,%S1" "@
[(set_attr "op_type" "RS<TE>") cds\t%0,%3,%S1
cdsy\t%0,%3,%S1"
[(set_attr "op_type" "RS,RSY")
(set_attr "type" "sem")]) (set_attr "type" "sem")])
; cs, csg ; cs, csy
(define_insn "*sync_compare_and_swap<mode>" (define_insn "*atomic_compare_and_swapsi_3"
[(set (match_operand:GPR 0 "register_operand" "=r") [(set (match_operand:SI 0 "register_operand" "=r,r")
(match_operand:GPR 1 "memory_operand" "+Q")) (match_operand:SI 1 "memory_operand" "+Q,S"))
(set (match_dup 1) (set (match_dup 1)
(unspec_volatile:GPR (unspec_volatile:SI
[(match_dup 1) [(match_dup 1)
(match_operand:GPR 2 "register_operand" "0") (match_operand:SI 2 "register_operand" "0,0")
(match_operand:GPR 3 "register_operand" "r")] (match_operand:SI 3 "register_operand" "r,r")]
UNSPECV_CAS)) UNSPECV_CAS))
(set (reg:CCZ1 CC_REGNUM) (set (reg:CCZ1 CC_REGNUM)
(compare:CCZ1 (match_dup 1) (match_dup 2)))] (compare:CCZ1 (match_dup 1) (match_dup 2)))]
"" ""
"cs<g>\t%0,%3,%S1" "@
[(set_attr "op_type" "RS<E>") cs\t%0,%3,%S1
csy\t%0,%3,%S1"
[(set_attr "op_type" "RS,RSY")
(set_attr "type" "sem")]) (set_attr "type" "sem")])
; ;
; Other atomic instruction patterns. ; Other atomic instruction patterns.
; ;
(define_expand "sync_lock_test_and_set<mode>"
[(match_operand:HQI 0 "register_operand")
(match_operand:HQI 1 "memory_operand")
(match_operand:HQI 2 "general_operand")]
""
"s390_expand_atomic (<MODE>mode, SET, operands[0], operands[1],
operands[2], false); DONE;")
; z196 load and add, xor, or and and instructions ; z196 load and add, xor, or and and instructions
; lan, lang, lao, laog, lax, laxg, laa, laag (define_expand "atomic_fetch_<atomic><mode>"
(define_insn "sync_<atomic><mode>" [(match_operand:GPR 0 "register_operand") ;; val out
[(parallel (ATOMIC_Z196:GPR
[(set (match_operand:GPR 0 "memory_operand" "+QS") (match_operand:GPR 1 "memory_operand") ;; memory
(unspec_volatile:GPR (match_operand:GPR 2 "register_operand")) ;; val in
[(ATOMIC_Z196:GPR (match_dup 0) (match_operand:SI 3 "const_int_operand")] ;; model
(match_operand:GPR 1 "general_operand" "d"))]
UNSPECV_ATOMIC_OP))
(clobber (match_scratch:GPR 2 "=d"))
(clobber (reg:CC CC_REGNUM))])]
"TARGET_Z196" "TARGET_Z196"
"la<noxa><g>\t%2,%1,%0") {
emit_insn (gen_atomic_fetch_<atomic><mode>_iaf
(operands[0], operands[1], operands[2]));
DONE;
})
; lan, lang, lao, laog, lax, laxg, laa, laag ; lan, lang, lao, laog, lax, laxg, laa, laag
(define_insn "sync_old_<atomic><mode>" (define_insn "atomic_fetch_<atomic><mode>_iaf"
[(parallel
[(set (match_operand:GPR 0 "register_operand" "=d") [(set (match_operand:GPR 0 "register_operand" "=d")
(match_operand:GPR 1 "memory_operand" "+QS")) (match_operand:GPR 1 "memory_operand" "+QS"))
(set (match_dup 1) (set (match_dup 1)
...@@ -8866,37 +9001,65 @@ ...@@ -8866,37 +9001,65 @@
[(ATOMIC_Z196:GPR (match_dup 1) [(ATOMIC_Z196:GPR (match_dup 1)
(match_operand:GPR 2 "general_operand" "d"))] (match_operand:GPR 2 "general_operand" "d"))]
UNSPECV_ATOMIC_OP)) UNSPECV_ATOMIC_OP))
(clobber (reg:CC CC_REGNUM))])] (clobber (reg:CC CC_REGNUM))]
"TARGET_Z196" "TARGET_Z196"
"la<noxa><g>\t%0,%2,%1") "la<noxa><g>\t%0,%2,%1"
[(set_attr "op_type" "RSY")
(set_attr "type" "sem")])
;; For SImode and larger, the optabs.c code will do just fine in
;; expanding a compare-and-swap loop. For QI/HImode, we can do
;; better by expanding our own loop.
(define_expand "sync_<atomic><mode>" (define_expand "atomic_<atomic><mode>"
[(set (match_operand:HQI 0 "memory_operand") [(ATOMIC:HQI
(ATOMIC:HQI (match_dup 0) (match_operand:HQI 0 "memory_operand") ;; memory
(match_operand:HQI 1 "general_operand")))] (match_operand:HQI 1 "general_operand")) ;; val in
(match_operand:SI 2 "const_int_operand")] ;; model
"" ""
"s390_expand_atomic (<MODE>mode, <CODE>, NULL_RTX, operands[0], {
operands[1], false); DONE;") s390_expand_atomic (<MODE>mode, <CODE>, NULL_RTX, operands[0],
operands[1], false);
DONE;
})
(define_expand "sync_old_<atomic><mode>" (define_expand "atomic_fetch_<atomic><mode>"
[(set (match_operand:HQI 0 "register_operand") [(match_operand:HQI 0 "register_operand") ;; val out
(match_operand:HQI 1 "memory_operand")) (ATOMIC:HQI
(set (match_dup 1) (match_operand:HQI 1 "memory_operand") ;; memory
(ATOMIC:HQI (match_dup 1) (match_operand:HQI 2 "general_operand")) ;; val in
(match_operand:HQI 2 "general_operand")))] (match_operand:SI 3 "const_int_operand")] ;; model
"" ""
"s390_expand_atomic (<MODE>mode, <CODE>, operands[0], operands[1], {
operands[2], false); DONE;") s390_expand_atomic (<MODE>mode, <CODE>, operands[0], operands[1],
operands[2], false);
(define_expand "sync_new_<atomic><mode>" DONE;
[(set (match_operand:HQI 0 "register_operand") })
(ATOMIC:HQI (match_operand:HQI 1 "memory_operand")
(match_operand:HQI 2 "general_operand"))) (define_expand "atomic_<atomic>_fetch<mode>"
(set (match_dup 1) (ATOMIC:HQI (match_dup 1) (match_dup 2)))] [(match_operand:HQI 0 "register_operand") ;; val out
(ATOMIC:HQI
(match_operand:HQI 1 "memory_operand") ;; memory
(match_operand:HQI 2 "general_operand")) ;; val in
(match_operand:SI 3 "const_int_operand")] ;; model
""
{
s390_expand_atomic (<MODE>mode, <CODE>, operands[0], operands[1],
operands[2], true);
DONE;
})
(define_expand "atomic_exchange<mode>"
[(match_operand:HQI 0 "register_operand") ;; val out
(match_operand:HQI 1 "memory_operand") ;; memory
(match_operand:HQI 2 "general_operand") ;; val in
(match_operand:SI 3 "const_int_operand")] ;; model
"" ""
"s390_expand_atomic (<MODE>mode, <CODE>, operands[0], operands[1], {
operands[2], true); DONE;") s390_expand_atomic (<MODE>mode, SET, operands[0], operands[1],
operands[2], false);
DONE;
})
;; ;;
;;- Miscellaneous instructions. ;;- Miscellaneous instructions.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment