Commit 2fe0a208 by Marek Polacek Committed by Marek Polacek

re PR c/68908 (inefficient code for _Atomic operations)

	PR c/68908
	* c-typeck.c (build_atomic_assign): Improve commentary.  Add
	optimization to use __atomic_fetch_* built-in if possible.

	* gcc.dg/atomic/c11-atomic-exec-6.c: New test.
	* gcc.dg/atomic/c11-atomic-exec-7.c: New test.
	* gcc.dg/atomic/stdatomic-op-5.c: New test.

From-SVN: r232052
parent 8db883aa
2016-01-04 Marek Polacek <polacek@redhat.com>
PR c/68908
* c-typeck.c (build_atomic_assign): Improve commentary. Add
optimization to use __atomic_fetch_* built-in if possible.
2015-12-23 Thomas Schwinge <thomas@codesourcery.com>
* c-parser.c (c_parser_oacc_clause_use_device): Merge function
......
......@@ -3698,9 +3698,9 @@ pointer_diff (location_t loc, tree op0, tree op1)
return convert (restype, result);
}
/* Expand atomic compound assignments into an approriate sequence as
specified by the C11 standard section 6.5.16.2.
given
/* Expand atomic compound assignments into an appropriate sequence as
specified by the C11 standard section 6.5.16.2.
_Atomic T1 E1
T2 E2
E1 op= E2
......@@ -3732,26 +3732,25 @@ loop:
done:
feupdateenv (&fenv);
Also note that the compiler is simply issuing the generic form of
the atomic operations. This requires temp(s) and has their address
taken. The atomic processing is smart enough to figure out when the
size of an object can utilize a lock-free version, and convert the
built-in call to the appropriate lock-free routine. The optimizers
will then dispose of any temps that are no longer required, and
lock-free implementations are utilized as long as there is target
support for the required size.
The compiler will issue the __atomic_fetch_* built-in when possible,
otherwise it will generate the generic form of the atomic operations.
This requires temp(s) and has their address taken. The atomic processing
is smart enough to figure out when the size of an object can utilize
a lock-free version, and convert the built-in call to the appropriate
lock-free routine. The optimizers will then dispose of any temps that
are no longer required, and lock-free implementations are utilized as
long as there is target support for the required size.
If the operator is NOP_EXPR, then this is a simple assignment, and
an __atomic_store is issued to perform the assignment rather than
the above loop.
*/
the above loop. */
/* Build an atomic assignment at LOC, expanding into the proper
sequence to store LHS MODIFYCODE= RHS. Return a value representing
the result of the operation, unless RETURN_OLD_P in which case
the result of the operation, unless RETURN_OLD_P, in which case
return the old value of LHS (this is only for postincrement and
postdecrement). */
static tree
build_atomic_assign (location_t loc, tree lhs, enum tree_code modifycode,
tree rhs, bool return_old_p)
......@@ -3818,6 +3817,93 @@ build_atomic_assign (location_t loc, tree lhs, enum tree_code modifycode,
return build2 (COMPOUND_EXPR, nonatomic_lhs_type, compound_stmt, val);
}
/* Attempt to implement the atomic operation as an __atomic_fetch_* or
__atomic_*_fetch built-in rather than a CAS loop. atomic_bool type
isn't applicable for such builtins. ??? Do we want to handle enums? */
if ((TREE_CODE (lhs_type) == INTEGER_TYPE || POINTER_TYPE_P (lhs_type))
&& TREE_CODE (rhs_type) == INTEGER_TYPE)
{
built_in_function fncode;
switch (modifycode)
{
case PLUS_EXPR:
case POINTER_PLUS_EXPR:
fncode = (return_old_p
? BUILT_IN_ATOMIC_FETCH_ADD_N
: BUILT_IN_ATOMIC_ADD_FETCH_N);
break;
case MINUS_EXPR:
fncode = (return_old_p
? BUILT_IN_ATOMIC_FETCH_SUB_N
: BUILT_IN_ATOMIC_SUB_FETCH_N);
break;
case BIT_AND_EXPR:
fncode = (return_old_p
? BUILT_IN_ATOMIC_FETCH_AND_N
: BUILT_IN_ATOMIC_AND_FETCH_N);
break;
case BIT_IOR_EXPR:
fncode = (return_old_p
? BUILT_IN_ATOMIC_FETCH_OR_N
: BUILT_IN_ATOMIC_OR_FETCH_N);
break;
case BIT_XOR_EXPR:
fncode = (return_old_p
? BUILT_IN_ATOMIC_FETCH_XOR_N
: BUILT_IN_ATOMIC_XOR_FETCH_N);
break;
default:
goto cas_loop;
}
/* We can only use "_1" through "_16" variants of the atomic fetch
built-ins. */
unsigned HOST_WIDE_INT size = tree_to_uhwi (TYPE_SIZE_UNIT (lhs_type));
if (size != 1 && size != 2 && size != 4 && size != 8 && size != 16)
goto cas_loop;
/* If this is a pointer type, we need to multiply by the size of
the pointer target type. */
if (POINTER_TYPE_P (lhs_type))
{
if (!COMPLETE_TYPE_P (TREE_TYPE (lhs_type))
/* ??? This would introduce -Wdiscarded-qualifiers
warning: __atomic_fetch_* expect volatile void *
type as the first argument. (Assignments between
atomic and non-atomic objects are OK.) */
|| TYPE_RESTRICT (lhs_type))
goto cas_loop;
tree sz = TYPE_SIZE_UNIT (TREE_TYPE (lhs_type));
rhs = fold_build2_loc (loc, MULT_EXPR, ptrdiff_type_node,
convert (ptrdiff_type_node, rhs),
convert (ptrdiff_type_node, sz));
}
/* Build __atomic_fetch_* (&lhs, &val, SEQ_CST), or
__atomic_*_fetch (&lhs, &val, SEQ_CST). */
fndecl = builtin_decl_explicit (fncode);
params->quick_push (lhs_addr);
params->quick_push (rhs);
params->quick_push (seq_cst);
func_call = c_build_function_call_vec (loc, vNULL, fndecl, params, NULL);
newval = create_tmp_var_raw (nonatomic_lhs_type);
TREE_ADDRESSABLE (newval) = 1;
TREE_NO_WARNING (newval) = 1;
rhs = build4 (TARGET_EXPR, nonatomic_lhs_type, newval, func_call,
NULL_TREE, NULL_TREE);
SET_EXPR_LOCATION (rhs, loc);
add_stmt (rhs);
/* Finish the compound statement. */
compound_stmt = c_end_compound_stmt (loc, compound_stmt, false);
/* NEWVAL is the value which was stored, return a COMPOUND_STMT of
the statement and that value. */
return build2 (COMPOUND_EXPR, nonatomic_lhs_type, compound_stmt, newval);
}
cas_loop:
/* Create the variables and labels required for the op= form. */
old = create_tmp_var_raw (nonatomic_lhs_type);
old_addr = build_unary_op (loc, ADDR_EXPR, old, 0);
......
2016-01-04 Marek Polacek <polacek@redhat.com>
PR c/68908
* gcc.dg/atomic/c11-atomic-exec-6.c: New test.
* gcc.dg/atomic/c11-atomic-exec-7.c: New test.
* gcc.dg/atomic/stdatomic-op-5.c: New test.
2016-01-04 Eric Botcazou <ebotcazou@adacore.com>
* gcc.target/sparc/20160104-2.c: New test.
......
/* Test we do correct thing for adding to / subtracting from a pointer,
i.e. that the multiplication by the size of the pointer target type
still occurs. */
/* { dg-do run } */
/* { dg-options "-std=c11 -pedantic-errors" } */
#define TEST_POINTER_ADD_SUB(TYPE) \
do \
{ \
TYPE a[3][3]; \
TYPE (*_Atomic q)[3] = &a[0]; \
++q; \
if (q != &a[1]) \
__builtin_abort (); \
q++; \
if (q != &a[2]) \
__builtin_abort (); \
--q; \
if (q != &a[1]) \
__builtin_abort (); \
q--; \
if (q != &a[0]) \
__builtin_abort (); \
q += 2; \
if (q != &a[2]) \
__builtin_abort (); \
q -= 2; \
if (q != &a[0]) \
__builtin_abort (); \
} \
while (0)
int
main (void)
{
TEST_POINTER_ADD_SUB (_Bool);
TEST_POINTER_ADD_SUB (char);
TEST_POINTER_ADD_SUB (signed char);
TEST_POINTER_ADD_SUB (unsigned char);
TEST_POINTER_ADD_SUB (signed short);
TEST_POINTER_ADD_SUB (unsigned short);
TEST_POINTER_ADD_SUB (signed int);
TEST_POINTER_ADD_SUB (unsigned int);
TEST_POINTER_ADD_SUB (signed long);
TEST_POINTER_ADD_SUB (unsigned long);
TEST_POINTER_ADD_SUB (signed long long);
TEST_POINTER_ADD_SUB (unsigned long long);
TEST_POINTER_ADD_SUB (float);
TEST_POINTER_ADD_SUB (double);
TEST_POINTER_ADD_SUB (long double);
TEST_POINTER_ADD_SUB (_Complex float);
TEST_POINTER_ADD_SUB (_Complex double);
TEST_POINTER_ADD_SUB (_Complex long double);
}
/* Test we're able use __atomic_fetch_* where possible and verify
we generate correct code. */
/* { dg-do run } */
/* { dg-options "-std=c11 -pedantic-errors -fdump-tree-original" } */
#include <stdatomic.h>
#include <limits.h>
extern void abort (void);
#define TEST_TYPE(TYPE, MAX) \
do \
{ \
struct S { char a[(MAX) + 1]; }; \
TYPE t = 1; \
struct S a[2][2]; \
struct S (*_Atomic p)[2] = &a[0]; \
p += t; \
if (p != &a[1]) \
abort (); \
p -= t; \
if (p != &a[0]) \
abort (); \
} \
while (0)
int
main (void)
{
TEST_TYPE (signed char, UCHAR_MAX);
TEST_TYPE (signed short, USHRT_MAX);
}
/* { dg-final { scan-tree-dump-not "__atomic_compare_exchange" "original" } } */
/* Test we're able use __atomic_fetch_* where possible and verify
we generate correct code. */
/* { dg-do run } */
/* { dg-options "-std=c11 -pedantic-errors -fdump-tree-original" } */
#include <stdatomic.h>
extern void abort (void);
static void
test_inc_dec (void)
{
atomic_int i = ATOMIC_VAR_INIT (1);
i++;
if (i != 2)
abort ();
i--;
if (i != 1)
abort ();
++i;
if (i != 2)
abort ();
--i;
if (i != 1)
abort ();
if (++i != 2)
abort ();
if (i++ != 2)
abort ();
if (i != 3)
abort ();
if (i-- != 3)
abort ();
if (i != 2)
abort ();
}
static void
test_add_sub (void)
{
atomic_int i = ATOMIC_VAR_INIT (1);
i += 2;
if (i != 3)
abort ();
i -= 2;
if (i != 1)
abort ();
if ((i += 2) != 3)
abort ();
if ((i -= 2) != 1)
abort ();
}
static void
test_and (void)
{
atomic_int i = ATOMIC_VAR_INIT (5);
i &= 4;
if (i != 4)
abort ();
if ((i &= 4) != 4)
abort ();
}
static void
test_xor (void)
{
atomic_int i = ATOMIC_VAR_INIT (5);
i ^= 2;
if (i != 7)
abort ();
if ((i ^= 4) != 3)
abort ();
}
static void
test_or (void)
{
atomic_int i = ATOMIC_VAR_INIT (5);
i |= 2;
if (i != 7)
abort ();
if ((i |= 8) != 15)
abort ();
}
static void
test_ptr (atomic_int *p)
{
++*p;
if (*p != 2)
abort ();
*p += 2;
if (*p != 4)
abort ();
(*p)++;
if (*p != 5)
abort ();
--*p;
if (*p != 4)
abort ();
(*p)--;
if (*p != 3)
abort ();
*p -= 2;
if (*p != 1)
abort ();
atomic_int j = ATOMIC_VAR_INIT (0);
j += *p;
if (j != 1)
abort ();
j -= *p;
if (j != 0)
abort ();
}
int
main (void)
{
atomic_int i = ATOMIC_VAR_INIT (1);
test_inc_dec ();
test_add_sub ();
test_and ();
test_xor ();
test_or ();
test_ptr (&i);
}
/* { dg-final { scan-tree-dump-not "__atomic_compare_exchange" "original" } } */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment