Commit 849a76a5 by Jakub Jelinek Committed by Jakub Jelinek

re PR middle-end/66867 (Suboptimal code generation for atomic_compare_exchange)

	PR middle-end/66867
	* builtins.c (expand_ifn_atomic_compare_exchange_into_call,
	expand_ifn_atomic_compare_exchange): New functions.
	* internal-fn.c (expand_ATOMIC_COMPARE_EXCHANGE): New function.
	* tree.h (build_call_expr_internal_loc): Rename to ...
	(build_call_expr_internal_loc_array): ... this.  Fix up type of
	last argument.
	* internal-fn.def (ATOMIC_COMPARE_EXCHANGE): New internal fn.
	* predict.c (expr_expected_value_1): Handle IMAGPART_EXPR of
	ATOMIC_COMPARE_EXCHANGE result.
	* builtins.h (expand_ifn_atomic_compare_exchange): New prototype.
	* gimple-fold.h (optimize_atomic_compare_exchange_p,
	fold_builtin_atomic_compare_exchange): New prototypes.
	* gimple-fold.c (optimize_atomic_compare_exchange_p,
	fold_builtin_atomic_compare_exchange): New functions..
	* tree-ssa.c (execute_update_addresses_taken): If
	optimize_atomic_compare_exchange_p, ignore &var in 2nd argument
	of call when finding addressable vars, and if such var becomes
	non-addressable, call fold_builtin_atomic_compare_exchange.

From-SVN: r237814
parent a8264058
2016-06-28 Jakub Jelinek <jakub@redhat.com>
PR middle-end/66867
* builtins.c (expand_ifn_atomic_compare_exchange_into_call,
expand_ifn_atomic_compare_exchange): New functions.
* internal-fn.c (expand_ATOMIC_COMPARE_EXCHANGE): New function.
* tree.h (build_call_expr_internal_loc): Rename to ...
(build_call_expr_internal_loc_array): ... this. Fix up type of
last argument.
* internal-fn.def (ATOMIC_COMPARE_EXCHANGE): New internal fn.
* predict.c (expr_expected_value_1): Handle IMAGPART_EXPR of
ATOMIC_COMPARE_EXCHANGE result.
* builtins.h (expand_ifn_atomic_compare_exchange): New prototype.
* gimple-fold.h (optimize_atomic_compare_exchange_p,
fold_builtin_atomic_compare_exchange): New prototypes.
* gimple-fold.c (optimize_atomic_compare_exchange_p,
fold_builtin_atomic_compare_exchange): New functions..
* tree-ssa.c (execute_update_addresses_taken): If
optimize_atomic_compare_exchange_p, ignore &var in 2nd argument
of call when finding addressable vars, and if such var becomes
non-addressable, call fold_builtin_atomic_compare_exchange.
2016-06-27 Segher Boessenkool <segher@kernel.crashing.org>
PR target/71670
......
......@@ -5158,6 +5158,123 @@ expand_builtin_atomic_compare_exchange (machine_mode mode, tree exp,
return target;
}
/* Helper function for expand_ifn_atomic_compare_exchange - expand
internal ATOMIC_COMPARE_EXCHANGE call into __atomic_compare_exchange_N
call. The weak parameter must be dropped to match the expected parameter
list and the expected argument changed from value to pointer to memory
slot. */
static void
expand_ifn_atomic_compare_exchange_into_call (gcall *call, machine_mode mode)
{
unsigned int z;
vec<tree, va_gc> *vec;
vec_alloc (vec, 5);
vec->quick_push (gimple_call_arg (call, 0));
tree expected = gimple_call_arg (call, 1);
rtx x = assign_stack_temp_for_type (mode, GET_MODE_SIZE (mode),
TREE_TYPE (expected));
rtx expd = expand_expr (expected, x, mode, EXPAND_NORMAL);
if (expd != x)
emit_move_insn (x, expd);
tree v = make_tree (TREE_TYPE (expected), x);
vec->quick_push (build1 (ADDR_EXPR,
build_pointer_type (TREE_TYPE (expected)), v));
vec->quick_push (gimple_call_arg (call, 2));
/* Skip the boolean weak parameter. */
for (z = 4; z < 6; z++)
vec->quick_push (gimple_call_arg (call, z));
built_in_function fncode
= (built_in_function) ((int) BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1
+ exact_log2 (GET_MODE_SIZE (mode)));
tree fndecl = builtin_decl_explicit (fncode);
tree fn = build1 (ADDR_EXPR, build_pointer_type (TREE_TYPE (fndecl)),
fndecl);
tree exp = build_call_vec (boolean_type_node, fn, vec);
tree lhs = gimple_call_lhs (call);
rtx boolret = expand_call (exp, NULL_RTX, lhs == NULL_TREE);
if (lhs)
{
rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
if (GET_MODE (boolret) != mode)
boolret = convert_modes (mode, GET_MODE (boolret), boolret, 1);
x = force_reg (mode, x);
write_complex_part (target, boolret, true);
write_complex_part (target, x, false);
}
}
/* Expand IFN_ATOMIC_COMPARE_EXCHANGE internal function. */
void
expand_ifn_atomic_compare_exchange (gcall *call)
{
int size = tree_to_shwi (gimple_call_arg (call, 3)) & 255;
gcc_assert (size == 1 || size == 2 || size == 4 || size == 8 || size == 16);
machine_mode mode = mode_for_size (BITS_PER_UNIT * size, MODE_INT, 0);
rtx expect, desired, mem, oldval, boolret;
enum memmodel success, failure;
tree lhs;
bool is_weak;
source_location loc
= expansion_point_location_if_in_system_header (gimple_location (call));
success = get_memmodel (gimple_call_arg (call, 4));
failure = get_memmodel (gimple_call_arg (call, 5));
if (failure > success)
{
warning_at (loc, OPT_Winvalid_memory_model,
"failure memory model cannot be stronger than success "
"memory model for %<__atomic_compare_exchange%>");
success = MEMMODEL_SEQ_CST;
}
if (is_mm_release (failure) || is_mm_acq_rel (failure))
{
warning_at (loc, OPT_Winvalid_memory_model,
"invalid failure memory model for "
"%<__atomic_compare_exchange%>");
failure = MEMMODEL_SEQ_CST;
success = MEMMODEL_SEQ_CST;
}
if (!flag_inline_atomics)
{
expand_ifn_atomic_compare_exchange_into_call (call, mode);
return;
}
/* Expand the operands. */
mem = get_builtin_sync_mem (gimple_call_arg (call, 0), mode);
expect = expand_expr_force_mode (gimple_call_arg (call, 1), mode);
desired = expand_expr_force_mode (gimple_call_arg (call, 2), mode);
is_weak = (tree_to_shwi (gimple_call_arg (call, 3)) & 256) != 0;
boolret = NULL;
oldval = NULL;
if (!expand_atomic_compare_and_swap (&boolret, &oldval, mem, expect, desired,
is_weak, success, failure))
{
expand_ifn_atomic_compare_exchange_into_call (call, mode);
return;
}
lhs = gimple_call_lhs (call);
if (lhs)
{
rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
if (GET_MODE (boolret) != mode)
boolret = convert_modes (mode, GET_MODE (boolret), boolret, 1);
write_complex_part (target, boolret, true);
write_complex_part (target, oldval, false);
}
}
/* Expand the __atomic_load intrinsic:
TYPE __atomic_load (TYPE *object, enum memmodel)
EXP is the CALL_EXPR.
......
......@@ -72,6 +72,7 @@ extern tree std_canonical_va_list_type (tree);
extern void std_expand_builtin_va_start (tree, rtx);
extern void expand_builtin_trap (void);
extern void expand_ifn_atomic_bit_test_and (gcall *);
extern void expand_ifn_atomic_compare_exchange (gcall *);
extern rtx expand_builtin (tree, rtx, rtx, machine_mode, int);
extern rtx expand_builtin_with_bounds (tree, rtx, rtx, machine_mode, int);
extern enum built_in_function builtin_mathfn_code (const_tree);
......
......@@ -2953,6 +2953,133 @@ fold_internal_goacc_dim (const gimple *call)
return result;
}
/* Return true if stmt is __atomic_compare_exchange_N call which is suitable
for conversion into ATOMIC_COMPARE_EXCHANGE if the second argument is
&var where var is only addressable because of such calls. */
bool
optimize_atomic_compare_exchange_p (gimple *stmt)
{
if (gimple_call_num_args (stmt) != 6
|| !flag_inline_atomics
|| !optimize
|| (flag_sanitize & (SANITIZE_THREAD | SANITIZE_ADDRESS)) != 0
|| !gimple_call_builtin_p (stmt, BUILT_IN_NORMAL)
|| !gimple_vdef (stmt)
|| !gimple_vuse (stmt))
return false;
tree fndecl = gimple_call_fndecl (stmt);
switch (DECL_FUNCTION_CODE (fndecl))
{
case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_1:
case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_2:
case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_4:
case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_8:
case BUILT_IN_ATOMIC_COMPARE_EXCHANGE_16:
break;
default:
return false;
}
tree expected = gimple_call_arg (stmt, 1);
if (TREE_CODE (expected) != ADDR_EXPR
|| !SSA_VAR_P (TREE_OPERAND (expected, 0))
|| !is_gimple_reg_type (TREE_TYPE (TREE_OPERAND (expected, 0)))
|| !auto_var_in_fn_p (TREE_OPERAND (expected, 0), current_function_decl)
|| TREE_THIS_VOLATILE (TREE_TYPE (TREE_OPERAND (expected, 0)))
|| TREE_CODE (TREE_TYPE (TREE_OPERAND (expected, 0))) == VECTOR_TYPE
|| TREE_CODE (TREE_TYPE (TREE_OPERAND (expected, 0))) == COMPLEX_TYPE)
return false;
tree weak = gimple_call_arg (stmt, 3);
if (!integer_zerop (weak) && !integer_onep (weak))
return false;
tree parmt = TYPE_ARG_TYPES (TREE_TYPE (fndecl));
tree itype = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (parmt)));
machine_mode mode = TYPE_MODE (itype);
if (direct_optab_handler (atomic_compare_and_swap_optab, mode)
== CODE_FOR_nothing
&& optab_handler (sync_compare_and_swap_optab, mode) == CODE_FOR_nothing)
return false;
if (int_size_in_bytes (TREE_TYPE (TREE_OPERAND (expected, 0)))
!= GET_MODE_SIZE (mode))
return false;
return true;
}
/* Fold
r = __atomic_compare_exchange_N (p, &e, d, w, s, f);
into
_Complex uintN_t t = ATOMIC_COMPARE_EXCHANGE (p, e, d, w * 256 + N, s, f);
i = IMAGPART_EXPR <t>;
r = (_Bool) i;
e = REALPART_EXPR <t>; */
void
fold_builtin_atomic_compare_exchange (gimple_stmt_iterator *gsi)
{
gimple *stmt = gsi_stmt (*gsi);
tree fndecl = gimple_call_fndecl (stmt);
tree parmt = TYPE_ARG_TYPES (TREE_TYPE (fndecl));
tree itype = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (parmt)));
tree ctype = build_complex_type (itype);
tree expected = TREE_OPERAND (gimple_call_arg (stmt, 1), 0);
gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (expected)),
expected);
gsi_insert_before (gsi, g, GSI_SAME_STMT);
gimple_stmt_iterator gsiret = gsi_for_stmt (g);
if (!useless_type_conversion_p (itype, TREE_TYPE (expected)))
{
g = gimple_build_assign (make_ssa_name (itype), VIEW_CONVERT_EXPR,
build1 (VIEW_CONVERT_EXPR, itype,
gimple_assign_lhs (g)));
gsi_insert_before (gsi, g, GSI_SAME_STMT);
}
int flag = (integer_onep (gimple_call_arg (stmt, 3)) ? 256 : 0)
+ int_size_in_bytes (itype);
g = gimple_build_call_internal (IFN_ATOMIC_COMPARE_EXCHANGE, 6,
gimple_call_arg (stmt, 0),
gimple_assign_lhs (g),
gimple_call_arg (stmt, 2),
build_int_cst (integer_type_node, flag),
gimple_call_arg (stmt, 4),
gimple_call_arg (stmt, 5));
tree lhs = make_ssa_name (ctype);
gimple_call_set_lhs (g, lhs);
gimple_set_vdef (g, gimple_vdef (stmt));
gimple_set_vuse (g, gimple_vuse (stmt));
SSA_NAME_DEF_STMT (gimple_vdef (g)) = g;
if (gimple_call_lhs (stmt))
{
gsi_insert_before (gsi, g, GSI_SAME_STMT);
g = gimple_build_assign (make_ssa_name (itype), IMAGPART_EXPR,
build1 (IMAGPART_EXPR, itype, lhs));
gsi_insert_before (gsi, g, GSI_SAME_STMT);
g = gimple_build_assign (gimple_call_lhs (stmt), NOP_EXPR,
gimple_assign_lhs (g));
}
gsi_replace (gsi, g, true);
g = gimple_build_assign (make_ssa_name (itype), REALPART_EXPR,
build1 (REALPART_EXPR, itype, lhs));
gsi_insert_after (gsi, g, GSI_NEW_STMT);
if (!useless_type_conversion_p (TREE_TYPE (expected), itype))
{
g = gimple_build_assign (make_ssa_name (TREE_TYPE (expected)),
VIEW_CONVERT_EXPR,
build1 (VIEW_CONVERT_EXPR, TREE_TYPE (expected),
gimple_assign_lhs (g)));
gsi_insert_after (gsi, g, GSI_NEW_STMT);
}
g = gimple_build_assign (expected, SSA_NAME, gimple_assign_lhs (g));
gsi_insert_after (gsi, g, GSI_NEW_STMT);
*gsi = gsiret;
}
/* Return true if ARG0 CODE ARG1 in infinite signed precision operation
doesn't fit into TYPE. The test for overflow should be regardless of
-fwrapv, and even for unsigned types. */
......
......@@ -32,6 +32,8 @@ extern tree maybe_fold_and_comparisons (enum tree_code, tree, tree,
enum tree_code, tree, tree);
extern tree maybe_fold_or_comparisons (enum tree_code, tree, tree,
enum tree_code, tree, tree);
extern bool optimize_atomic_compare_exchange_p (gimple *);
extern void fold_builtin_atomic_compare_exchange (gimple_stmt_iterator *);
extern bool arith_overflowed_p (enum tree_code, const_tree, const_tree,
const_tree);
extern tree no_follow_ssa_edges (tree);
......
......@@ -2185,6 +2185,14 @@ expand_ATOMIC_BIT_TEST_AND_RESET (internal_fn, gcall *call)
expand_ifn_atomic_bit_test_and (call);
}
/* Expand atomic bit test and set. */
static void
expand_ATOMIC_COMPARE_EXCHANGE (internal_fn, gcall *call)
{
expand_ifn_atomic_compare_exchange (call);
}
/* Expand a call to FN using the operands in STMT. FN has a single
output operand and NARGS input operands. */
......
......@@ -193,6 +193,7 @@ DEF_INTERNAL_FN (SET_EDOM, ECF_LEAF | ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (ATOMIC_BIT_TEST_AND_SET, ECF_LEAF | ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (ATOMIC_BIT_TEST_AND_COMPLEMENT, ECF_LEAF | ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (ATOMIC_BIT_TEST_AND_RESET, ECF_LEAF | ECF_NOTHROW, NULL)
DEF_INTERNAL_FN (ATOMIC_COMPARE_EXCHANGE, ECF_LEAF | ECF_NOTHROW, NULL)
#undef DEF_INTERNAL_INT_FN
#undef DEF_INTERNAL_FLT_FN
......
......@@ -2134,6 +2134,25 @@ expr_expected_value_1 (tree type, tree op0, enum tree_code code,
if (TREE_CONSTANT (op0))
return op0;
if (code == IMAGPART_EXPR)
{
if (TREE_CODE (TREE_OPERAND (op0, 0)) == SSA_NAME)
{
def = SSA_NAME_DEF_STMT (TREE_OPERAND (op0, 0));
if (is_gimple_call (def)
&& gimple_call_internal_p (def)
&& (gimple_call_internal_fn (def)
== IFN_ATOMIC_COMPARE_EXCHANGE))
{
/* Assume that any given atomic operation has low contention,
and thus the compare-and-swap operation succeeds. */
if (predictor)
*predictor = PRED_COMPARE_AND_SWAP;
return build_one_cst (TREE_TYPE (op0));
}
}
}
if (code != SSA_NAME)
return NULL_TREE;
......
......@@ -1414,8 +1414,21 @@ execute_update_addresses_taken (void)
enum gimple_code code = gimple_code (stmt);
tree decl;
/* Note all addresses taken by the stmt. */
gimple_ior_addresses_taken (addresses_taken, stmt);
if (code == GIMPLE_CALL
&& optimize_atomic_compare_exchange_p (stmt))
{
/* For __atomic_compare_exchange_N if the second argument
is &var, don't mark var addressable;
if it becomes non-addressable, we'll rewrite it into
ATOMIC_COMPARE_EXCHANGE call. */
tree arg = gimple_call_arg (stmt, 1);
gimple_call_set_arg (stmt, 1, null_pointer_node);
gimple_ior_addresses_taken (addresses_taken, stmt);
gimple_call_set_arg (stmt, 1, arg);
}
else
/* Note all addresses taken by the stmt. */
gimple_ior_addresses_taken (addresses_taken, stmt);
/* If we have a call or an assignment, see if the lhs contains
a local decl that requires not to be a gimple register. */
......@@ -1657,6 +1670,16 @@ execute_update_addresses_taken (void)
else if (gimple_code (stmt) == GIMPLE_CALL)
{
unsigned i;
if (optimize_atomic_compare_exchange_p (stmt))
{
tree expected = gimple_call_arg (stmt, 1);
if (bitmap_bit_p (suitable_for_renaming,
DECL_UID (TREE_OPERAND (expected, 0))))
{
fold_builtin_atomic_compare_exchange (&gsi);
continue;
}
}
for (i = 0; i < gimple_call_num_args (stmt); ++i)
{
tree *argp = gimple_call_arg_ptr (stmt, i);
......
......@@ -3985,8 +3985,8 @@ extern tree build_call_expr_loc (location_t, tree, int, ...);
extern tree build_call_expr (tree, int, ...);
extern tree build_call_expr_internal_loc (location_t, enum internal_fn,
tree, int, ...);
extern tree build_call_expr_internal_loc (location_t, enum internal_fn,
tree, int, tree *);
extern tree build_call_expr_internal_loc_array (location_t, enum internal_fn,
tree, int, const tree *);
extern tree maybe_build_call_expr_loc (location_t, combined_fn, tree,
int, ...);
extern tree build_string_literal (int, const char *);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment