Commit adedd5c1 by Jakub Jelinek Committed by Jakub Jelinek

re PR target/49244 (__sync or __atomic builtins will not emit 'lock bts/btr/btc')

	PR target/49244
	* tree-ssa-ccp.c: Include stor-layout.h and optabs-query.h.
	(optimize_atomic_bit_test_and): New function.
	(pass_fold_builtins::execute): Use it.
	* optabs.def (atomic_bit_test_and_set_optab,
	atomic_bit_test_and_complement_optab,
	atomic_bit_test_and_reset_optab): New optabs.
	* internal-fn.def (ATOMIC_BIT_TEST_AND_SET,
	ATOMIC_BIT_TEST_AND_COMPLEMENT, ATOMIC_BIT_TEST_AND_RESET): New ifns.
	* builtins.h (expand_ifn_atomic_bit_test_and): New prototype.
	* builtins.c (expand_ifn_atomic_bit_test_and): New function.
	* internal-fn.c (expand_ATOMIC_BIT_TEST_AND_SET,
	expand_ATOMIC_BIT_TEST_AND_COMPLEMENT,
	expand_ATOMIC_BIT_TEST_AND_RESET): New functions.
	* doc/md.texi (atomic_bit_test_and_set@var{mode},
	atomic_bit_test_and_complement@var{mode},
	atomic_bit_test_and_reset@var{mode}): Document.
	* config/i386/sync.md (atomic_bit_test_and_set<mode>,
	atomic_bit_test_and_complement<mode>,
	atomic_bit_test_and_reset<mode>): New expanders.
	(atomic_bit_test_and_set<mode>_1,
	atomic_bit_test_and_complement<mode>_1,
	atomic_bit_test_and_reset<mode>_1): New insns.

	* gcc.target/i386/pr49244-1.c: New test.
	* gcc.target/i386/pr49244-2.c: New test.

From-SVN: r235813
parent 50891606
2016-05-03 Jakub Jelinek <jakub@redhat.com>
PR target/49244
* tree-ssa-ccp.c: Include stor-layout.h and optabs-query.h.
(optimize_atomic_bit_test_and): New function.
(pass_fold_builtins::execute): Use it.
* optabs.def (atomic_bit_test_and_set_optab,
atomic_bit_test_and_complement_optab,
atomic_bit_test_and_reset_optab): New optabs.
* internal-fn.def (ATOMIC_BIT_TEST_AND_SET,
ATOMIC_BIT_TEST_AND_COMPLEMENT, ATOMIC_BIT_TEST_AND_RESET): New ifns.
* builtins.h (expand_ifn_atomic_bit_test_and): New prototype.
* builtins.c (expand_ifn_atomic_bit_test_and): New function.
* internal-fn.c (expand_ATOMIC_BIT_TEST_AND_SET,
expand_ATOMIC_BIT_TEST_AND_COMPLEMENT,
expand_ATOMIC_BIT_TEST_AND_RESET): New functions.
* doc/md.texi (atomic_bit_test_and_set@var{mode},
atomic_bit_test_and_complement@var{mode},
atomic_bit_test_and_reset@var{mode}): Document.
* config/i386/sync.md (atomic_bit_test_and_set<mode>,
atomic_bit_test_and_complement<mode>,
atomic_bit_test_and_reset<mode>): New expanders.
(atomic_bit_test_and_set<mode>_1,
atomic_bit_test_and_complement<mode>_1,
atomic_bit_test_and_reset<mode>_1): New insns.
2016-05-03 Richard Sandiford <richard.sandiford@arm.com>
PR rtl-optimization/70687
......
......@@ -5310,6 +5310,90 @@ expand_builtin_atomic_fetch_op (machine_mode mode, tree exp, rtx target,
return ret;
}
/* Expand IFN_ATOMIC_BIT_TEST_AND_* internal function. */
void
expand_ifn_atomic_bit_test_and (gcall *call)
{
tree ptr = gimple_call_arg (call, 0);
tree bit = gimple_call_arg (call, 1);
tree flag = gimple_call_arg (call, 2);
tree lhs = gimple_call_lhs (call);
enum memmodel model = MEMMODEL_SYNC_SEQ_CST;
machine_mode mode = TYPE_MODE (TREE_TYPE (flag));
enum rtx_code code;
optab optab;
struct expand_operand ops[5];
gcc_assert (flag_inline_atomics);
if (gimple_call_num_args (call) == 4)
model = get_memmodel (gimple_call_arg (call, 3));
rtx mem = get_builtin_sync_mem (ptr, mode);
rtx val = expand_expr_force_mode (bit, mode);
switch (gimple_call_internal_fn (call))
{
case IFN_ATOMIC_BIT_TEST_AND_SET:
code = IOR;
optab = atomic_bit_test_and_set_optab;
break;
case IFN_ATOMIC_BIT_TEST_AND_COMPLEMENT:
code = XOR;
optab = atomic_bit_test_and_complement_optab;
break;
case IFN_ATOMIC_BIT_TEST_AND_RESET:
code = AND;
optab = atomic_bit_test_and_reset_optab;
break;
default:
gcc_unreachable ();
}
if (lhs == NULL_TREE)
{
val = expand_simple_binop (mode, ASHIFT, const1_rtx,
val, NULL_RTX, true, OPTAB_DIRECT);
if (code == AND)
val = expand_simple_unop (mode, NOT, val, NULL_RTX, true);
expand_atomic_fetch_op (const0_rtx, mem, val, code, model, false);
return;
}
rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
enum insn_code icode = direct_optab_handler (optab, mode);
gcc_assert (icode != CODE_FOR_nothing);
create_output_operand (&ops[0], target, mode);
create_fixed_operand (&ops[1], mem);
create_convert_operand_to (&ops[2], val, mode, true);
create_integer_operand (&ops[3], model);
create_integer_operand (&ops[4], integer_onep (flag));
if (maybe_expand_insn (icode, 5, ops))
return;
rtx bitval = val;
val = expand_simple_binop (mode, ASHIFT, const1_rtx,
val, NULL_RTX, true, OPTAB_DIRECT);
rtx maskval = val;
if (code == AND)
val = expand_simple_unop (mode, NOT, val, NULL_RTX, true);
rtx result = expand_atomic_fetch_op (gen_reg_rtx (mode), mem, val,
code, model, false);
if (integer_onep (flag))
{
result = expand_simple_binop (mode, ASHIFTRT, result, bitval,
NULL_RTX, true, OPTAB_DIRECT);
result = expand_simple_binop (mode, AND, result, const1_rtx, target,
true, OPTAB_DIRECT);
}
else
result = expand_simple_binop (mode, AND, result, maskval, target, true,
OPTAB_DIRECT);
if (result != target)
emit_move_insn (target, result);
}
/* Expand an atomic clear operation.
void _atomic_clear (BOOL *obj, enum memmodel)
EXP is the call expression. */
......
......@@ -71,6 +71,7 @@ extern tree std_fn_abi_va_list (tree);
extern tree std_canonical_va_list_type (tree);
extern void std_expand_builtin_va_start (tree, rtx);
extern void expand_builtin_trap (void);
extern void expand_ifn_atomic_bit_test_and (gcall *);
extern rtx expand_builtin (tree, rtx, rtx, machine_mode, int);
extern rtx expand_builtin_with_bounds (tree, rtx, rtx, machine_mode, int);
extern enum built_in_function builtin_mathfn_code (const_tree);
......
......@@ -605,3 +605,114 @@
(clobber (reg:CC FLAGS_REG))]
""
"lock{%;} %K2<logic>{<imodesuffix>}\t{%1, %0|%0, %1}")
(define_expand "atomic_bit_test_and_set<mode>"
[(match_operand:SWI248 0 "register_operand")
(match_operand:SWI248 1 "memory_operand")
(match_operand:SWI248 2 "nonmemory_operand")
(match_operand:SI 3 "const_int_operand") ;; model
(match_operand:SI 4 "const_int_operand")]
""
{
emit_insn (gen_atomic_bit_test_and_set<mode>_1 (operands[1], operands[2],
operands[3]));
rtx tem = gen_reg_rtx (QImode);
ix86_expand_setcc (tem, EQ, gen_rtx_REG (CCCmode, FLAGS_REG), const0_rtx);
rtx result = convert_modes (<MODE>mode, QImode, tem, 1);
if (operands[4] == const0_rtx)
result = expand_simple_binop (<MODE>mode, ASHIFT, result,
operands[2], operands[0], 0, OPTAB_DIRECT);
if (result != operands[0])
emit_move_insn (operands[0], result);
DONE;
})
(define_insn "atomic_bit_test_and_set<mode>_1"
[(set (reg:CCC FLAGS_REG)
(compare:CCC
(unspec_volatile:SWI248
[(match_operand:SWI248 0 "memory_operand" "+m")
(match_operand:SI 2 "const_int_operand")] ;; model
UNSPECV_XCHG)
(const_int 0)))
(set (zero_extract:SWI248 (match_dup 0)
(const_int 1)
(match_operand:SWI248 1 "nonmemory_operand" "rN"))
(const_int 1))]
""
"lock{%;} %K2bts{<imodesuffix>}\t{%1, %0|%0, %1}")
(define_expand "atomic_bit_test_and_complement<mode>"
[(match_operand:SWI248 0 "register_operand")
(match_operand:SWI248 1 "memory_operand")
(match_operand:SWI248 2 "nonmemory_operand")
(match_operand:SI 3 "const_int_operand") ;; model
(match_operand:SI 4 "const_int_operand")]
""
{
emit_insn (gen_atomic_bit_test_and_complement<mode>_1 (operands[1],
operands[2],
operands[3]));
rtx tem = gen_reg_rtx (QImode);
ix86_expand_setcc (tem, EQ, gen_rtx_REG (CCCmode, FLAGS_REG), const0_rtx);
rtx result = convert_modes (<MODE>mode, QImode, tem, 1);
if (operands[4] == const0_rtx)
result = expand_simple_binop (<MODE>mode, ASHIFT, result,
operands[2], operands[0], 0, OPTAB_DIRECT);
if (result != operands[0])
emit_move_insn (operands[0], result);
DONE;
})
(define_insn "atomic_bit_test_and_complement<mode>_1"
[(set (reg:CCC FLAGS_REG)
(compare:CCC
(unspec_volatile:SWI248
[(match_operand:SWI248 0 "memory_operand" "+m")
(match_operand:SI 2 "const_int_operand")] ;; model
UNSPECV_XCHG)
(const_int 0)))
(set (zero_extract:SWI248 (match_dup 0)
(const_int 1)
(match_operand:SWI248 1 "nonmemory_operand" "rN"))
(not:SWI248 (zero_extract:SWI248 (match_dup 0)
(const_int 1)
(match_dup 1))))]
""
"lock{%;} %K2btc{<imodesuffix>}\t{%1, %0|%0, %1}")
(define_expand "atomic_bit_test_and_reset<mode>"
[(match_operand:SWI248 0 "register_operand")
(match_operand:SWI248 1 "memory_operand")
(match_operand:SWI248 2 "nonmemory_operand")
(match_operand:SI 3 "const_int_operand") ;; model
(match_operand:SI 4 "const_int_operand")]
""
{
emit_insn (gen_atomic_bit_test_and_reset<mode>_1 (operands[1], operands[2],
operands[3]));
rtx tem = gen_reg_rtx (QImode);
ix86_expand_setcc (tem, EQ, gen_rtx_REG (CCCmode, FLAGS_REG), const0_rtx);
rtx result = convert_modes (<MODE>mode, QImode, tem, 1);
if (operands[4] == const0_rtx)
result = expand_simple_binop (<MODE>mode, ASHIFT, result,
operands[2], operands[0], 0, OPTAB_DIRECT);
if (result != operands[0])
emit_move_insn (operands[0], result);
DONE;
})
(define_insn "atomic_bit_test_and_reset<mode>_1"
[(set (reg:CCC FLAGS_REG)
(compare:CCC
(unspec_volatile:SWI248
[(match_operand:SWI248 0 "memory_operand" "+m")
(match_operand:SI 2 "const_int_operand")] ;; model
UNSPECV_XCHG)
(const_int 0)))
(set (zero_extract:SWI248 (match_dup 0)
(const_int 1)
(match_operand:SWI248 1 "nonmemory_operand" "rN"))
(const_int 0))]
""
"lock{%;} %K2btr{<imodesuffix>}\t{%1, %0|%0, %1}")
......@@ -6909,6 +6909,33 @@ The specific value that defines "set" is implementation defined, and
is normally based on what is performed by the native atomic test and set
instruction.
@cindex @code{atomic_bit_test_and_set@var{mode}} instruction pattern
@cindex @code{atomic_bit_test_and_complement@var{mode}} instruction pattern
@cindex @code{atomic_bit_test_and_reset@var{mode}} instruction pattern
@item @samp{atomic_bit_test_and_set@var{mode}}
@itemx @samp{atomic_bit_test_and_complement@var{mode}}
@itemx @samp{atomic_bit_test_and_reset@var{mode}}
These patterns emit code for an atomic bitwise operation on memory with memory
model semantics, and return the original value of the specified bit.
Operand 0 is an output operand which contains the value of the specified bit
from the memory location before the operation was performed. Operand 1 is the
memory on which the atomic operation is performed. Operand 2 is the bit within
the operand, starting with least significant bit. Operand 3 is the memory model
to be used by the operation. Operand 4 is a flag - it is @code{const1_rtx}
if operand 0 should contain the original value of the specified bit in the
least significant bit of the operand, and @code{const0_rtx} if the bit should
be in its original position in the operand.
@code{atomic_bit_test_and_set@var{mode}} atomically sets the specified bit after
remembering its original value, @code{atomic_bit_test_and_complement@var{mode}}
inverts the specified bit and @code{atomic_bit_test_and_reset@var{mode}} clears
the specified bit.
If these patterns are not defined, attempts will be made to use
@code{atomic_fetch_or@var{mode}}, @code{atomic_fetch_xor@var{mode}} or
@code{atomic_fetch_and@var{mode}} instruction patterns, or their @code{sync}
counterparts. If none of these are available a compare-and-swap
loop will be used.
@cindex @code{mem_thread_fence@var{mode}} instruction pattern
@item @samp{mem_thread_fence@var{mode}}
This pattern emits code required to implement a thread fence with
......
......@@ -39,6 +39,7 @@ along with GCC; see the file COPYING3. If not see
#include "expr.h"
#include "ubsan.h"
#include "recog.h"
#include "builtins.h"
/* The names of each internal function, indexed by function number. */
const char *const internal_fn_name_array[] = {
......@@ -2118,6 +2119,30 @@ expand_SET_EDOM (internal_fn, gcall *)
#endif
}
/* Expand atomic bit test and set. */
static void
expand_ATOMIC_BIT_TEST_AND_SET (internal_fn, gcall *call)
{
expand_ifn_atomic_bit_test_and (call);
}
/* Expand atomic bit test and complement. */
static void
expand_ATOMIC_BIT_TEST_AND_COMPLEMENT (internal_fn, gcall *call)
{
expand_ifn_atomic_bit_test_and (call);
}
/* Expand atomic bit test and reset. */
static void
expand_ATOMIC_BIT_TEST_AND_RESET (internal_fn, gcall *call)
{
expand_ifn_atomic_bit_test_and (call);
}
/* Expand a call to FN using the operands in STMT. FN has a single
output operand and NARGS input operands. */
......
......@@ -189,6 +189,11 @@ DEF_INTERNAL_FN (GOACC_REDUCTION, ECF_NOTHROW | ECF_LEAF, NULL)
current target. */
DEF_INTERNAL_FN (SET_EDOM, ECF_LEAF | ECF_NOTHROW, NULL)
/* Atomic functions. */
DEF_INTERNAL_FN (ATOMIC_BIT_TEST_AND_SET, ECF_LEAF | ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (ATOMIC_BIT_TEST_AND_COMPLEMENT, ECF_LEAF | ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (ATOMIC_BIT_TEST_AND_RESET, ECF_LEAF | ECF_NOTHROW, NULL)
#undef DEF_INTERNAL_INT_FN
#undef DEF_INTERNAL_FLT_FN
#undef DEF_INTERNAL_OPTAB_FN
......
......@@ -337,6 +337,9 @@ OPTAB_D (atomic_add_fetch_optab, "atomic_add_fetch$I$a")
OPTAB_D (atomic_add_optab, "atomic_add$I$a")
OPTAB_D (atomic_and_fetch_optab, "atomic_and_fetch$I$a")
OPTAB_D (atomic_and_optab, "atomic_and$I$a")
OPTAB_D (atomic_bit_test_and_set_optab, "atomic_bit_test_and_set$I$a")
OPTAB_D (atomic_bit_test_and_complement_optab, "atomic_bit_test_and_complement$I$a")
OPTAB_D (atomic_bit_test_and_reset_optab, "atomic_bit_test_and_reset$I$a")
OPTAB_D (atomic_compare_and_swap_optab, "atomic_compare_and_swap$I$a")
OPTAB_D (atomic_exchange_optab, "atomic_exchange$I$a")
OPTAB_D (atomic_fetch_add_optab, "atomic_fetch_add$I$a")
......
2016-05-03 Jakub Jelinek <jakub@redhat.com>
PR target/49244
* gcc.target/i386/pr49244-1.c: New test.
* gcc.target/i386/pr49244-2.c: New test.
2016-05-03 Bernd Schmidt <bschmidt@redhat.com>
PR rtl-optimization/44281
......
/* PR target/49244 */
/* { dg-do compile } */
/* { dg-options "-O2" } */
void bar (void);
__attribute__((noinline, noclone)) int
f1 (int *a, int bit)
{
unsigned int mask = (1u << bit);
return (__sync_fetch_and_or (a, mask) & mask) != 0;
}
__attribute__((noinline, noclone)) int
f2 (int *a, int bit)
{
unsigned int mask = (1u << bit);
unsigned int t1 = __atomic_fetch_or (a, mask, __ATOMIC_RELAXED);
unsigned int t2 = t1 & mask;
return t2 != 0;
}
__attribute__((noinline, noclone)) long int
f3 (long int *a, int bit)
{
unsigned long int mask = (1ul << bit);
return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) == 0;
}
__attribute__((noinline, noclone)) int
f4 (int *a)
{
unsigned int mask = (1u << 7);
return (__sync_fetch_and_or (a, mask) & mask) != 0;
}
__attribute__((noinline, noclone)) int
f5 (int *a)
{
unsigned int mask = (1u << 13);
return (__atomic_fetch_or (a, mask, __ATOMIC_RELAXED) & mask) != 0;
}
__attribute__((noinline, noclone)) int
f6 (int *a)
{
unsigned int mask = (1u << 0);
return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
}
__attribute__((noinline, noclone)) void
f7 (int *a, int bit)
{
unsigned int mask = (1u << bit);
if ((__sync_fetch_and_xor (a, mask) & mask) != 0)
bar ();
}
__attribute__((noinline, noclone)) void
f8 (int *a, int bit)
{
unsigned int mask = (1u << bit);
if ((__atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask) == 0)
bar ();
}
__attribute__((noinline, noclone)) int
f9 (int *a, int bit)
{
unsigned int mask = (1u << bit);
return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
}
__attribute__((noinline, noclone)) int
f10 (int *a)
{
unsigned int mask = (1u << 7);
return (__sync_fetch_and_xor (a, mask) & mask) != 0;
}
__attribute__((noinline, noclone)) int
f11 (int *a)
{
unsigned int mask = (1u << 13);
return (__atomic_fetch_xor (a, mask, __ATOMIC_RELAXED) & mask) != 0;
}
__attribute__((noinline, noclone)) int
f12 (int *a)
{
unsigned int mask = (1u << 0);
return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
}
__attribute__((noinline, noclone)) int
f13 (int *a, int bit)
{
unsigned int mask = (1u << bit);
return (__sync_fetch_and_and (a, ~mask) & mask) != 0;
}
__attribute__((noinline, noclone)) int
f14 (int *a, int bit)
{
unsigned int mask = (1u << bit);
return (__atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask) != 0;
}
__attribute__((noinline, noclone)) int
f15 (int *a, int bit)
{
unsigned int mask = (1u << bit);
return (__atomic_fetch_and (a, ~mask, __ATOMIC_SEQ_CST) & mask) != 0;
}
__attribute__((noinline, noclone)) int
f16 (int *a)
{
unsigned int mask = (1u << 7);
return (__sync_fetch_and_and (a, ~mask) & mask) != 0;
}
__attribute__((noinline, noclone)) int
f17 (int *a)
{
unsigned int mask = (1u << 13);
return (__atomic_fetch_and (a, ~mask, __ATOMIC_RELAXED) & mask) != 0;
}
__attribute__((noinline, noclone)) int
f18 (int *a)
{
unsigned int mask = (1u << 0);
return (__atomic_fetch_and (a, ~mask, __ATOMIC_SEQ_CST) & mask) != 0;
}
__attribute__((noinline, noclone)) unsigned long int
f19 (unsigned long int *a, int bit)
{
unsigned long int mask = (1ul << bit);
return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
}
__attribute__((noinline, noclone)) unsigned long int
f20 (unsigned long int *a)
{
unsigned long int mask = (1ul << 7);
return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask) == 0;
}
__attribute__((noinline, noclone)) int
f21 (int *a, int bit)
{
unsigned int mask = (1u << bit);
return (__sync_fetch_and_or (a, mask) & mask);
}
__attribute__((noinline, noclone)) unsigned long int
f22 (unsigned long int *a)
{
unsigned long int mask = (1ul << 7);
return (__atomic_xor_fetch (a, mask, __ATOMIC_SEQ_CST) & mask);
}
__attribute__((noinline, noclone)) unsigned long int
f23 (unsigned long int *a)
{
unsigned long int mask = (1ul << 7);
return (__atomic_fetch_xor (a, mask, __ATOMIC_SEQ_CST) & mask);
}
__attribute__((noinline, noclone)) unsigned short int
f24 (unsigned short int *a)
{
unsigned short int mask = (1u << 7);
return (__sync_fetch_and_or (a, mask) & mask) != 0;
}
__attribute__((noinline, noclone)) unsigned short int
f25 (unsigned short int *a)
{
unsigned short int mask = (1u << 7);
return (__atomic_fetch_or (a, mask, __ATOMIC_SEQ_CST) & mask) != 0;
}
/* { dg-final { scan-assembler-times "lock;?\[ \t\]*bts" 9 } } */
/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btc" 10 } } */
/* { dg-final { scan-assembler-times "lock;?\[ \t\]*btr" 6 } } */
/* PR target/49244 */
/* { dg-do run } */
/* { dg-options "-O2 -g" } */
int cnt;
__attribute__((noinline, noclone)) void
bar (void)
{
cnt++;
}
#include "pr49244-1.c"
int a;
long int b;
unsigned long int c;
unsigned short int d;
int
main ()
{
__atomic_store_n (&a, 15, __ATOMIC_RELAXED);
if (f1 (&a, 2) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 15
|| f1 (&a, 4) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 31)
__builtin_abort ();
if (f2 (&a, 1) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 31
|| f2 (&a, 5) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 63)
__builtin_abort ();
__atomic_store_n (&b, 24, __ATOMIC_RELAXED);
if (f3 (&b, 2) != 1 || __atomic_load_n (&b, __ATOMIC_RELAXED) != 28
|| f3 (&b, 3) != 0 || __atomic_load_n (&b, __ATOMIC_RELAXED) != 28)
__builtin_abort ();
__atomic_store_n (&a, 0, __ATOMIC_RELAXED);
if (f4 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 128
|| f4 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 128)
__builtin_abort ();
if (f5 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320
|| f5 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320)
__builtin_abort ();
if (f6 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321
|| f6 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
__builtin_abort ();
if (cnt != 0
|| (f7 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
|| (f7 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
__builtin_abort ();
if ((f8 (&a, 7), cnt) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
|| (f8 (&a, 7), cnt) != 2 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
__builtin_abort ();
if (f9 (&a, 13) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 129
|| f9 (&a, 13) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
__builtin_abort ();
if (f10 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
|| f10 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
__builtin_abort ();
if (f11 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 129
|| f11 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
__builtin_abort ();
if (f12 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8320
|| f12 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8321)
__builtin_abort ();
if (f13 (&a, 7) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
|| f13 (&a, 7) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193)
__builtin_abort ();
if (f14 (&a, 13) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1
|| f14 (&a, 13) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1)
__builtin_abort ();
if (f15 (&a, 0) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0
|| f15 (&a, 0) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0)
__builtin_abort ();
__atomic_store_n (&a, 8321, __ATOMIC_RELAXED);
if (f16 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193
|| f16 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 8193)
__builtin_abort ();
if (f17 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1
|| f17 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 1)
__builtin_abort ();
if (f18 (&a) != 1 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0
|| f18 (&a) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 0)
__builtin_abort ();
if (f19 (&c, 7) != 1 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 128
|| f19 (&c, 7) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 0)
__builtin_abort ();
if (f20 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 128
|| f20 (&c) != 1 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 0)
__builtin_abort ();
__atomic_store_n (&a, 128, __ATOMIC_RELAXED);
if (f21 (&a, 4) != 0 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 144
|| f21 (&a, 4) != 16 || __atomic_load_n (&a, __ATOMIC_RELAXED) != 144)
__builtin_abort ();
__atomic_store_n (&c, 1, __ATOMIC_RELAXED);
if (f22 (&c) != 128 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 129
|| f22 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 1)
__builtin_abort ();
if (f23 (&c) != 0 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 129
|| f23 (&c) != 128 || __atomic_load_n (&c, __ATOMIC_RELAXED) != 1)
__builtin_abort ();
if (f24 (&d) != 0 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 128
|| f24 (&d) != 1 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 128)
__builtin_abort ();
__atomic_store_n (&d, 1, __ATOMIC_RELAXED);
if (f25 (&d) != 0 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 129
|| f25 (&d) != 1 || __atomic_load_n (&d, __ATOMIC_RELAXED) != 129
|| cnt != 2)
__builtin_abort ();
return 0;
}
......@@ -140,6 +140,8 @@ along with GCC; see the file COPYING3. If not see
#include "builtins.h"
#include "tree-chkp.h"
#include "cfgloop.h"
#include "stor-layout.h"
#include "optabs-query.h"
/* Possible lattice values. */
......@@ -2697,6 +2699,224 @@ optimize_unreachable (gimple_stmt_iterator i)
return ret;
}
/* Optimize
mask_2 = 1 << cnt_1;
_4 = __atomic_fetch_or_* (ptr_6, mask_2, _3);
_5 = _4 & mask_2;
to
_4 = ATOMIC_BIT_TEST_AND_SET (ptr_6, cnt_1, 0, _3);
_5 = _4;
If _5 is only used in _5 != 0 or _5 == 0 comparisons, 1
is passed instead of 0, and the builtin just returns a zero
or 1 value instead of the actual bit.
Similarly for __sync_fetch_and_or_* (without the ", _3" part
in there), and/or if mask_2 is a power of 2 constant.
Similarly for xor instead of or, use ATOMIC_BIT_TEST_AND_COMPLEMENT
in that case. And similarly for and instead of or, except that
the second argument to the builtin needs to be one's complement
of the mask instead of mask. */
static void
optimize_atomic_bit_test_and (gimple_stmt_iterator *gsip,
enum internal_fn fn, bool has_model_arg,
bool after)
{
gimple *call = gsi_stmt (*gsip);
tree lhs = gimple_call_lhs (call);
use_operand_p use_p;
gimple *use_stmt;
tree mask, bit;
optab optab;
if (!flag_inline_atomics
|| optimize_debug
|| !gimple_call_builtin_p (call, BUILT_IN_NORMAL)
|| !lhs
|| SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
|| !single_imm_use (lhs, &use_p, &use_stmt)
|| !is_gimple_assign (use_stmt)
|| gimple_assign_rhs_code (use_stmt) != BIT_AND_EXPR
|| !gimple_vdef (call))
return;
switch (fn)
{
case IFN_ATOMIC_BIT_TEST_AND_SET:
optab = atomic_bit_test_and_set_optab;
break;
case IFN_ATOMIC_BIT_TEST_AND_COMPLEMENT:
optab = atomic_bit_test_and_complement_optab;
break;
case IFN_ATOMIC_BIT_TEST_AND_RESET:
optab = atomic_bit_test_and_reset_optab;
break;
default:
return;
}
if (optab_handler (optab, TYPE_MODE (TREE_TYPE (lhs))) == CODE_FOR_nothing)
return;
mask = gimple_call_arg (call, 1);
tree use_lhs = gimple_assign_lhs (use_stmt);
if (!use_lhs)
return;
if (TREE_CODE (mask) == INTEGER_CST)
{
if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
mask = const_unop (BIT_NOT_EXPR, TREE_TYPE (mask), mask);
mask = fold_convert (TREE_TYPE (lhs), mask);
int ibit = tree_log2 (mask);
if (ibit < 0)
return;
bit = build_int_cst (TREE_TYPE (lhs), ibit);
}
else if (TREE_CODE (mask) == SSA_NAME)
{
gimple *g = SSA_NAME_DEF_STMT (mask);
if (fn == IFN_ATOMIC_BIT_TEST_AND_RESET)
{
if (!is_gimple_assign (g)
|| gimple_assign_rhs_code (g) != BIT_NOT_EXPR)
return;
mask = gimple_assign_rhs1 (g);
if (TREE_CODE (mask) != SSA_NAME)
return;
g = SSA_NAME_DEF_STMT (mask);
}
if (!is_gimple_assign (g)
|| gimple_assign_rhs_code (g) != LSHIFT_EXPR
|| !integer_onep (gimple_assign_rhs1 (g)))
return;
bit = gimple_assign_rhs2 (g);
}
else
return;
if (gimple_assign_rhs1 (use_stmt) == lhs)
{
if (!operand_equal_p (gimple_assign_rhs2 (use_stmt), mask, 0))
return;
}
else if (gimple_assign_rhs2 (use_stmt) != lhs
|| !operand_equal_p (gimple_assign_rhs1 (use_stmt), mask, 0))
return;
bool use_bool = true;
bool has_debug_uses = false;
imm_use_iterator iter;
gimple *g;
if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use_lhs))
use_bool = false;
FOR_EACH_IMM_USE_STMT (g, iter, use_lhs)
{
enum tree_code code = ERROR_MARK;
tree op0, op1;
if (is_gimple_debug (g))
{
has_debug_uses = true;
continue;
}
else if (is_gimple_assign (g))
switch (gimple_assign_rhs_code (g))
{
case COND_EXPR:
op1 = gimple_assign_rhs1 (g);
code = TREE_CODE (op1);
op0 = TREE_OPERAND (op1, 0);
op1 = TREE_OPERAND (op1, 1);
break;
case EQ_EXPR:
case NE_EXPR:
code = gimple_assign_rhs_code (g);
op0 = gimple_assign_rhs1 (g);
op1 = gimple_assign_rhs2 (g);
break;
default:
break;
}
else if (gimple_code (g) == GIMPLE_COND)
{
code = gimple_cond_code (g);
op0 = gimple_cond_lhs (g);
op1 = gimple_cond_rhs (g);
}
if ((code == EQ_EXPR || code == NE_EXPR)
&& op0 == use_lhs
&& integer_zerop (op1))
{
use_operand_p use_p;
int n = 0;
FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
n++;
if (n == 1)
continue;
}
use_bool = false;
BREAK_FROM_IMM_USE_STMT (iter);
}
tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
tree flag = build_int_cst (TREE_TYPE (lhs), use_bool);
if (has_model_arg)
g = gimple_build_call_internal (fn, 4, gimple_call_arg (call, 0),
bit, flag, gimple_call_arg (call, 2));
else
g = gimple_build_call_internal (fn, 3, gimple_call_arg (call, 0),
bit, flag);
gimple_call_set_lhs (g, new_lhs);
gimple_set_location (g, gimple_location (call));
gimple_set_vuse (g, gimple_vuse (call));
gimple_set_vdef (g, gimple_vdef (call));
SSA_NAME_DEF_STMT (gimple_vdef (call)) = g;
gimple_stmt_iterator gsi = *gsip;
gsi_insert_after (&gsi, g, GSI_NEW_STMT);
if (after)
{
/* The internal function returns the value of the specified bit
before the atomic operation. If we are interested in the value
of the specified bit after the atomic operation (makes only sense
for xor, otherwise the bit content is compile time known),
we need to invert the bit. */
g = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
BIT_XOR_EXPR, new_lhs,
use_bool ? build_int_cst (TREE_TYPE (lhs), 1)
: mask);
new_lhs = gimple_assign_lhs (g);
gsi_insert_after (&gsi, g, GSI_NEW_STMT);
}
if (use_bool && has_debug_uses)
{
tree temp = make_node (DEBUG_EXPR_DECL);
DECL_ARTIFICIAL (temp) = 1;
TREE_TYPE (temp) = TREE_TYPE (lhs);
DECL_MODE (temp) = TYPE_MODE (TREE_TYPE (lhs));
tree t = build2 (LSHIFT_EXPR, TREE_TYPE (lhs), new_lhs, bit);
g = gimple_build_debug_bind (temp, t, g);
gsi_insert_after (&gsi, g, GSI_NEW_STMT);
FOR_EACH_IMM_USE_STMT (g, iter, use_lhs)
if (is_gimple_debug (g))
{
use_operand_p use_p;
FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
SET_USE (use_p, temp);
update_stmt (g);
}
}
SSA_NAME_OCCURS_IN_ABNORMAL_PHI (new_lhs)
= SSA_NAME_OCCURS_IN_ABNORMAL_PHI (use_lhs);
replace_uses_by (use_lhs, new_lhs);
gsi = gsi_for_stmt (use_stmt);
gsi_remove (&gsi, true);
release_defs (use_stmt);
gsi_remove (gsip, true);
release_ssa_name (lhs);
}
/* A simple pass that attempts to fold all builtin functions. This pass
is run after we've propagated as many constants as we can. */
......@@ -2806,6 +3026,78 @@ pass_fold_builtins::execute (function *fun)
cfg_changed = true;
break;
case BUILT_IN_ATOMIC_FETCH_OR_1:
case BUILT_IN_ATOMIC_FETCH_OR_2:
case BUILT_IN_ATOMIC_FETCH_OR_4:
case BUILT_IN_ATOMIC_FETCH_OR_8:
case BUILT_IN_ATOMIC_FETCH_OR_16:
optimize_atomic_bit_test_and (&i,
IFN_ATOMIC_BIT_TEST_AND_SET,
true, false);
break;
case BUILT_IN_SYNC_FETCH_AND_OR_1:
case BUILT_IN_SYNC_FETCH_AND_OR_2:
case BUILT_IN_SYNC_FETCH_AND_OR_4:
case BUILT_IN_SYNC_FETCH_AND_OR_8:
case BUILT_IN_SYNC_FETCH_AND_OR_16:
optimize_atomic_bit_test_and (&i,
IFN_ATOMIC_BIT_TEST_AND_SET,
false, false);
break;
case BUILT_IN_ATOMIC_FETCH_XOR_1:
case BUILT_IN_ATOMIC_FETCH_XOR_2:
case BUILT_IN_ATOMIC_FETCH_XOR_4:
case BUILT_IN_ATOMIC_FETCH_XOR_8:
case BUILT_IN_ATOMIC_FETCH_XOR_16:
optimize_atomic_bit_test_and
(&i, IFN_ATOMIC_BIT_TEST_AND_COMPLEMENT, true, false);
break;
case BUILT_IN_SYNC_FETCH_AND_XOR_1:
case BUILT_IN_SYNC_FETCH_AND_XOR_2:
case BUILT_IN_SYNC_FETCH_AND_XOR_4:
case BUILT_IN_SYNC_FETCH_AND_XOR_8:
case BUILT_IN_SYNC_FETCH_AND_XOR_16:
optimize_atomic_bit_test_and
(&i, IFN_ATOMIC_BIT_TEST_AND_COMPLEMENT, false, false);
break;
case BUILT_IN_ATOMIC_XOR_FETCH_1:
case BUILT_IN_ATOMIC_XOR_FETCH_2:
case BUILT_IN_ATOMIC_XOR_FETCH_4:
case BUILT_IN_ATOMIC_XOR_FETCH_8:
case BUILT_IN_ATOMIC_XOR_FETCH_16:
optimize_atomic_bit_test_and
(&i, IFN_ATOMIC_BIT_TEST_AND_COMPLEMENT, true, true);
break;
case BUILT_IN_SYNC_XOR_AND_FETCH_1:
case BUILT_IN_SYNC_XOR_AND_FETCH_2:
case BUILT_IN_SYNC_XOR_AND_FETCH_4:
case BUILT_IN_SYNC_XOR_AND_FETCH_8:
case BUILT_IN_SYNC_XOR_AND_FETCH_16:
optimize_atomic_bit_test_and
(&i, IFN_ATOMIC_BIT_TEST_AND_COMPLEMENT, false, true);
break;
case BUILT_IN_ATOMIC_FETCH_AND_1:
case BUILT_IN_ATOMIC_FETCH_AND_2:
case BUILT_IN_ATOMIC_FETCH_AND_4:
case BUILT_IN_ATOMIC_FETCH_AND_8:
case BUILT_IN_ATOMIC_FETCH_AND_16:
optimize_atomic_bit_test_and (&i,
IFN_ATOMIC_BIT_TEST_AND_RESET,
true, false);
break;
case BUILT_IN_SYNC_FETCH_AND_AND_1:
case BUILT_IN_SYNC_FETCH_AND_AND_2:
case BUILT_IN_SYNC_FETCH_AND_AND_4:
case BUILT_IN_SYNC_FETCH_AND_AND_8:
case BUILT_IN_SYNC_FETCH_AND_AND_16:
optimize_atomic_bit_test_and (&i,
IFN_ATOMIC_BIT_TEST_AND_RESET,
false, false);
break;
case BUILT_IN_VA_START:
case BUILT_IN_VA_END:
case BUILT_IN_VA_COPY:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment