Check in patch/merge from cxx-mem-model Branch

From-SVN: r181031

Check in patch/merge from cxx-mem-model Branch
From-SVN: r181031
86951993 · Andrew Macleod · a8a058f6 · 86951993 · 86951993 · 86951993
Commit 86951993 authored 13 years ago by Andrew Macleod
118 changed files
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
+2011-11-06  Andrew Macleod  <amacleod@redhat.com>
+	    Richard Henderson  <rth@redhat.com>
+	    Aldy Hernandez  <aldyh@redhat.com>
+
+	Merged from cxx-mem-model.
+
+	* cppbuiltin.c (define__GNUC__): Define __ATOMIC memory models
+	* coretypes.h (enum memmodel): New.  enumerated memory model type.
+	* Makefile.in (cppbuiltin.o) Add missing dependency on $(TREE_H)
+	* genopinit,c (optabs): Add atomic direct optab handlers.
+	* sync-builtins.def (BUILT_IN_ATOMIC_*): New builtins.
+	* builtin-types.def (BT_CONST_VOLATILE_PTR,
+	BT_FN_I{1,2,4,8,16}_CONST_VPTR_INT, BT_FN_VOID_VPTR_INT,
+	BT_FN_BOOL_VPTR_INT, BT_FN_BOOL_SIZE_CONST_VPTR,
+	BT_FN_I{1,2,4,8,16}_VPTR_I{1,2,4,8,16}_INT,
+	BT_FN_VOID_VPTR_I{1,2,4,8,16}_INT, BT_FN_VOID_SIZE_VPTR_PTR_INT,
+	BT_FN_VOID_SIZE_CONST_VPTR_PTR_INT, BT_FN_VOID_SIZE_VPTR_PTR_PTR_INT,
+	BT_FN_BOOL_VPTR_PTR_I{1,2,4,8,16}_BOOL_INT_INT): New builtin types.
+	* expr.h (expand_atomic_*): Add prototypes.
+	(expand_{bool,val}_compare_and_swap): Remove prototypes.
+	* c-typeck.c (build_function_call_vec): Don't reprocess __atomic
+	parameters.
+	* common.opt (Winvalid-memory-model): New warning flag.
+	(finline-atomics): New. Flag to disable atomic inlining.
+	* params.h (ALLOW_LOAD_DATA_RACES): New.
+	(ALLOW_PACKED_LOAD_DATA_RACES): New.
+	(ALLOW_PACKED_STORE_DATA_RACES): New.
+	* params.def (PARAM_ALLOW_LOAD_DATA_RACES): New.
+	(PARAM_ALLOW_PACKED_LOAD_DATA_RACES): New.
+	(PARAM_ALLOW_PACKED_STORE_DATA_RACES): New.
+	* builtins.c (is_builtin_name): Handle __atomic.
+	(get_memmodel): New.  Extract memory model.
+	(expand_expr_force_mode): New. Factor out common code for ensuring an
+	integer argument is in the proper mode.
+	(expand_builtin_sync_operation): Remove ignore param.  Always call
+	expand_atomic_fetch_op instead of the old expanders.
+	(expand_builtin_compare_and_swap,
+	expand_builtin_sync_lock_test_and_set): Use expand_expr_force_mode,
+	call atomic expanders instead of sync expanders.
+	(expand_builtin_sync_lock_release): Call atomic_store expander.
+	(expand_builtin_atomic_compare_exchange, expand_builtin_atomic_load,
+	expand_builtin_atomic_store, expand_builtin_atomic_fetch_op): New.
+	(expand_builtin_atomic_exchange): New.
+	(fold_builtin_atomic_always_lock_free,
+	expand_builtin_atomic_always_lock_free,
+	fold_builtin_atomic_is_lock_free, expand_builtin_atomic_is_lock_free):
+	New.
+	(expand_builtin_mem_thread_fence, expand_builtin_atomic_thread_fence,
+	expand_builtin_atomic_signal_fence): New.
+	(expand_builtin_mem_signal_fence): New.
+	(expand_builtin): Add cases for BUILT_IN_ATOMIC_*.
+	(fold_builtin_2): Add cases for BUILT_IN_ATOMIC_{IS,ALWAYS}_LOCK_FREE.
+	* optabs.h (DOI_atomic_*): Define new atomics.
+	(atomic_*_optab): Define.
+	(can_compare_and_swap_p, expand_atomic_compare_and_swap): New
+	prototypes.
+	* optabs.c (expand_sync_operation, expand_sync_fetch_operation): Remove.
+	(expand_sync_lock_test_and_set): Remove.
+	(expand_atomic_load, expand_atomic_store): New.
+	(expand_atomic_exchange): New. 
+	(expand_atomic_compare_and_swap): New.  Implements
+	atomic_compare_exchange via compare and swap.
+	(struct atomic_op_functions): Opcode table struct for fetch ops.
+	(get_atomic_op_for_code): New.  Return an opcode table entry.
+	(maybe_emit_op): New.  Try to emit a fetch op.
+	(expand_atomic_fetch_op): New.
+	(expand_val_compare_and_swap_1): Remove.
+	(expand_val_compare_and_swap, expand_bool_compare_and_swap): Remove.
+	(expand_atomic_compare_and_swap): Rename from
+	expand_atomic_compare_exchange.  Rewrite to return both success and
+	oldval return values; expand via both atomic and sync optabs.
+	(can_compare_and_swap_p): New.
+	(expand_compare_and_swap_loop): Use expand_atomic_compare_and_swap.
+	(maybe_gen_insn): Handle 7 and 8 operands.
+	* omp-low.c (expand_omp_atomic_fetch_op): Don't test individual
+	fetch_op optabs, only test can_compare_and_swap_p.  Use __atomic
+	builtins instead of __sync builtins.
+	(expand_omp_atomic_pipeline): Use can_compare_and_swap_p.
+	* doc/extend.texi: Document __atomic built-in functions.
+	* doc/invoke.texi: Document data race parameters.
+	* doc/md.texi: Document atomic patterns.
+	* config/i386/i386.md (UNSPEC_MOVA): New.
+	(UNSPECV_CMPXCHG): Split into ...
+	(UNSPECV_CMPXCHG_1, UNSPECV_CMPXCHG_2,
+	UNSPECV_CMPXCHG_3, UNSPECV_CMPXCHG_4): New.
+	* config/i386/sync.md (ATOMIC): New mode iterator.
+	(atomic_load<ATOMIC>, atomic_store<ATOMIC>): New.
+	(atomic_loaddi_fpu, atomic_storedi_fpu, movdi_via_fpu): New.
+	(mem_thread_fence): Rename from memory_barrier.
+	Handle the added memory model parameter.
+	(mfence_nosse): Rename from memory_barrier_nosse.
+	(sync_compare_and_swap<CASMODE>): Split into ...
+	(atomic_compare_and_swap<SWI124>): this and ...
+	(atomic_compare_and_swap<CASMODE>): this.  Handle the new parameters.
+	(atomic_compare_and_swap_single<SWI>): Rename from
+	sync_compare_and_swap<SWI>; rewrite to use split unspecs.
+	(atomic_compare_and_swap_double<DCASMODE>): Rename from
+	sync_double_compare_and_swap<DCASMODE>; rewrite to use split unspecs.
+	(*atomic_compare_and_swap_doubledi_pic): Rename from
+	sync_double_compare_and_swapdi_pic; rewrite to use split unspecs.
+	(atomic_fetch_add<SWI>): Rename from sync_old_add<SWI>; add memory
+	model parameter.
+	(*atomic_fetch_add_cmp<SWI>): Similarly.
+	(atomic_add<SWI>, atomic<any_logic><SWI>): Similarly.
+	(atomic_sub<SWI>): Similarly.  Use x86_maybe_negate_const_int.
+	(sync_lock_test_and_set<SWI>): Merge with ...
+	(atomic_exchange<SWI>): ... this.
+
 2011-11-6  Richard Guenther  <rguenther@suse.de>

 	* ipa-prop.c (ipa_modify_call_arguments): Re-compute
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -3978,7 +3978,7 @@ PREPROCESSOR_DEFINES = \

 CFLAGS-cppbuiltin.o += $(PREPROCESSOR_DEFINES) -DBASEVER=$(BASEVER_s)
 cppbuiltin.o: cppbuiltin.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
-	cppbuiltin.h Makefile
+	$(TREE_H) cppbuiltin.h Makefile

 CFLAGS-cppdefault.o += $(PREPROCESSOR_DEFINES)
 cppdefault.o: cppdefault.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \

--- a/gcc/builtin-types.def
+++ b/gcc/builtin-types.def
@@ -95,6 +95,10 @@ DEF_PRIMITIVE_TYPE (BT_VOLATILE_PTR,
 		    build_pointer_type
 		     (build_qualified_type (void_type_node,
 					    TYPE_QUAL_VOLATILE)))
+DEF_PRIMITIVE_TYPE (BT_CONST_VOLATILE_PTR,
+		    build_pointer_type
+		     (build_qualified_type (void_type_node,
+					  TYPE_QUAL_VOLATILE|TYPE_QUAL_CONST)))
 DEF_PRIMITIVE_TYPE (BT_PTRMODE, (*lang_hooks.types.type_for_mode)(ptr_mode, 0))
 DEF_PRIMITIVE_TYPE (BT_INT_PTR, integer_ptr_type_node)
 DEF_PRIMITIVE_TYPE (BT_FLOAT_PTR, float_ptr_type_node)
@@ -315,6 +319,20 @@ DEF_FUNCTION_TYPE_2 (BT_FN_BOOL_LONGPTR_LONGPTR,
 		     BT_BOOL, BT_PTR_LONG, BT_PTR_LONG)
 DEF_FUNCTION_TYPE_2 (BT_FN_BOOL_ULONGLONGPTR_ULONGLONGPTR,
 		     BT_BOOL, BT_PTR_ULONGLONG, BT_PTR_ULONGLONG)
+DEF_FUNCTION_TYPE_2 (BT_FN_I1_CONST_VPTR_INT, BT_I1, BT_CONST_VOLATILE_PTR,
+		     BT_INT)
+DEF_FUNCTION_TYPE_2 (BT_FN_I2_CONST_VPTR_INT, BT_I2, BT_CONST_VOLATILE_PTR,
+		     BT_INT)
+DEF_FUNCTION_TYPE_2 (BT_FN_I4_CONST_VPTR_INT, BT_I4, BT_CONST_VOLATILE_PTR,
+		     BT_INT)
+DEF_FUNCTION_TYPE_2 (BT_FN_I8_CONST_VPTR_INT, BT_I8, BT_CONST_VOLATILE_PTR,
+		     BT_INT)
+DEF_FUNCTION_TYPE_2 (BT_FN_I16_CONST_VPTR_INT, BT_I16, BT_CONST_VOLATILE_PTR,
+		     BT_INT)
+DEF_FUNCTION_TYPE_2 (BT_FN_VOID_VPTR_INT, BT_VOID, BT_VOLATILE_PTR, BT_INT)
+DEF_FUNCTION_TYPE_2 (BT_FN_BOOL_VPTR_INT, BT_BOOL, BT_VOLATILE_PTR, BT_INT)
+DEF_FUNCTION_TYPE_2 (BT_FN_BOOL_SIZE_CONST_VPTR, BT_BOOL, BT_SIZE,
+		     BT_CONST_VOLATILE_PTR)

 DEF_POINTER_TYPE (BT_PTR_FN_VOID_PTR_PTR, BT_FN_VOID_PTR_PTR)

@@ -383,6 +401,16 @@ DEF_FUNCTION_TYPE_3 (BT_FN_VOID_OMPFN_PTR_UINT, BT_VOID, BT_PTR_FN_VOID_PTR,
 		     BT_PTR, BT_UINT)
 DEF_FUNCTION_TYPE_3 (BT_FN_PTR_CONST_PTR_INT_SIZE, BT_PTR,
 		     BT_CONST_PTR, BT_INT, BT_SIZE)
+DEF_FUNCTION_TYPE_3 (BT_FN_I1_VPTR_I1_INT, BT_I1, BT_VOLATILE_PTR, BT_I1, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_I2_VPTR_I2_INT, BT_I2, BT_VOLATILE_PTR, BT_I2, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_I4_VPTR_I4_INT, BT_I4, BT_VOLATILE_PTR, BT_I4, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_I8_VPTR_I8_INT, BT_I8, BT_VOLATILE_PTR, BT_I8, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_I16_VPTR_I16_INT, BT_I16, BT_VOLATILE_PTR, BT_I16, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I1_INT, BT_VOID, BT_VOLATILE_PTR, BT_I1, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I2_INT, BT_VOID, BT_VOLATILE_PTR, BT_I2, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I4_INT, BT_VOID, BT_VOLATILE_PTR, BT_I4, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I8_INT, BT_VOID, BT_VOLATILE_PTR, BT_I8, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I16_INT, BT_VOID, BT_VOLATILE_PTR, BT_I16, BT_INT)

 DEF_FUNCTION_TYPE_4 (BT_FN_SIZE_CONST_PTR_SIZE_SIZE_FILEPTR,
 		     BT_SIZE, BT_CONST_PTR, BT_SIZE, BT_SIZE, BT_FILEPTR)
@@ -402,6 +430,10 @@ DEF_FUNCTION_TYPE_4 (BT_FN_VOID_OMPFN_PTR_UINT_UINT,
 		     BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, BT_UINT, BT_UINT)
 DEF_FUNCTION_TYPE_4 (BT_FN_VOID_PTR_WORD_WORD_PTR,
 		     BT_VOID, BT_PTR, BT_WORD, BT_WORD, BT_PTR)
+DEF_FUNCTION_TYPE_4 (BT_FN_VOID_SIZE_VPTR_PTR_INT, BT_VOID, BT_SIZE,
+		     BT_VOLATILE_PTR, BT_PTR, BT_INT)
+DEF_FUNCTION_TYPE_4 (BT_FN_VOID_SIZE_CONST_VPTR_PTR_INT, BT_VOID, BT_SIZE,
+		     BT_CONST_VOLATILE_PTR, BT_PTR, BT_INT)

 DEF_FUNCTION_TYPE_5 (BT_FN_INT_STRING_INT_SIZE_CONST_STRING_VALIST_ARG,
 		     BT_INT, BT_STRING, BT_INT, BT_SIZE, BT_CONST_STRING,
@@ -409,6 +441,9 @@ DEF_FUNCTION_TYPE_5 (BT_FN_INT_STRING_INT_SIZE_CONST_STRING_VALIST_ARG,
 DEF_FUNCTION_TYPE_5 (BT_FN_BOOL_LONG_LONG_LONG_LONGPTR_LONGPTR,
 		     BT_BOOL, BT_LONG, BT_LONG, BT_LONG,
 		     BT_PTR_LONG, BT_PTR_LONG)
+DEF_FUNCTION_TYPE_5 (BT_FN_VOID_SIZE_VPTR_PTR_PTR_INT, BT_VOID, BT_SIZE,
+		     BT_VOLATILE_PTR, BT_PTR, BT_PTR, BT_INT)
+

 DEF_FUNCTION_TYPE_6 (BT_FN_INT_STRING_SIZE_INT_SIZE_CONST_STRING_VALIST_ARG,
 		     BT_INT, BT_STRING, BT_SIZE, BT_INT, BT_SIZE,
@@ -422,6 +457,24 @@ DEF_FUNCTION_TYPE_6 (BT_FN_VOID_OMPFN_PTR_UINT_LONG_LONG_LONG,
 DEF_FUNCTION_TYPE_6 (BT_FN_BOOL_BOOL_ULL_ULL_ULL_ULLPTR_ULLPTR,
 		     BT_BOOL, BT_BOOL, BT_ULONGLONG, BT_ULONGLONG,
 		     BT_ULONGLONG, BT_PTR_ULONGLONG, BT_PTR_ULONGLONG)
+DEF_FUNCTION_TYPE_6 (BT_FN_BOOL_VPTR_PTR_I1_BOOL_INT_INT, 
+		     BT_BOOL, BT_VOLATILE_PTR, BT_PTR, BT_I1, BT_BOOL, BT_INT,
+		     BT_INT)
+DEF_FUNCTION_TYPE_6 (BT_FN_BOOL_VPTR_PTR_I2_BOOL_INT_INT, 
+		     BT_BOOL, BT_VOLATILE_PTR, BT_PTR, BT_I2, BT_BOOL, BT_INT,
+		     BT_INT)
+DEF_FUNCTION_TYPE_6 (BT_FN_BOOL_VPTR_PTR_I4_BOOL_INT_INT, 
+		     BT_BOOL, BT_VOLATILE_PTR, BT_PTR, BT_I4, BT_BOOL, BT_INT,
+		     BT_INT)
+DEF_FUNCTION_TYPE_6 (BT_FN_BOOL_VPTR_PTR_I8_BOOL_INT_INT, 
+		     BT_BOOL, BT_VOLATILE_PTR, BT_PTR, BT_I8, BT_BOOL, BT_INT,
+		     BT_INT)
+DEF_FUNCTION_TYPE_6 (BT_FN_BOOL_VPTR_PTR_I16_BOOL_INT_INT, 
+		     BT_BOOL, BT_VOLATILE_PTR, BT_PTR, BT_I16, BT_BOOL, BT_INT,
+		     BT_INT)
+DEF_FUNCTION_TYPE_6 (BT_FN_BOOL_SIZE_VPTR_PTR_PTR_INT_INT, BT_BOOL, BT_SIZE,
+		     BT_VOLATILE_PTR, BT_PTR, BT_PTR, BT_INT, BT_INT)
+

 DEF_FUNCTION_TYPE_7 (BT_FN_VOID_OMPFN_PTR_UINT_LONG_LONG_LONG_LONG,
 		     BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, BT_UINT,

--- a/gcc/builtins.c
+++ b/gcc/builtins.c
--- a/gcc/c-family/ChangeLog
+++ b/gcc/c-family/ChangeLog
+2011-11-06  Andrew MacLeod  <amacleod@redhat.com>
+	    Richard Henderson  <rth@redhat.com>
+
+	Merged from cxx-mem-model.
+
+	* c-cppbuiltin.c (c_cpp_builtins): Test both atomic and sync patterns.
+	* c-common.c (sync_resolve_params, sync_resolve_return): Only tweak 
+	parameters that are the same type size.
+	(get_atomic_generic_size): New.  Find size of generic
+	atomic function parameters and do typechecking.
+	(add_atomic_size_parameter): New.  Insert size into parameter list.
+	(resolve_overloaded_atomic_exchange): Restructure __atomic_exchange to
+	either __atomic_exchange_n or external library call.
+	(resolve_overloaded_atomic_compare_exchange): Restructure 
+	__atomic_compare_exchange to either _n variant or external library call.
+	(resolve_overloaded_atomic_load): Restructure __atomic_load to either 
+	__atomic_load_n or an external library call.
+	(resolve_overloaded_atomic_store): Restructure __atomic_store to either
+	__atomic_store_n or an external library call.
+	(resolve_overloaded_builtin): Handle new __atomic builtins.
+
 2011-11-04  Eric Botcazou  <ebotcazou@adacore.com>

 	PR c++/50608

--- a/gcc/c-family/c-common.c
+++ b/gcc/c-family/c-common.c
--- a/gcc/c-family/c-cppbuiltin.c
+++ b/gcc/c-family/c-cppbuiltin.c
@@ -758,30 +758,50 @@ c_cpp_builtins (cpp_reader *pfile)

  /* Tell source code if the compiler makes sync_compare_and_swap
     builtins available.  */
-#ifdef HAVE_sync_compare_and_swapqi
-  if (HAVE_sync_compare_and_swapqi)
-    cpp_define (pfile, "__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1");
+#ifndef HAVE_sync_compare_and_swapqi
+#define HAVE_sync_compare_and_swapqi 0
+#endif
+#ifndef HAVE_atomic_compare_and_swapqi
+#define HAVE_atomic_compare_and_swapqi 0
 #endif
+  if (HAVE_sync_compare_and_swapqi || HAVE_atomic_compare_and_swapqi)
+    cpp_define (pfile, "__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1");

-#ifdef HAVE_sync_compare_and_swaphi
-  if (HAVE_sync_compare_and_swaphi)
-    cpp_define (pfile, "__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2");
+#ifndef HAVE_sync_compare_and_swaphi
+#define HAVE_sync_compare_and_swaphi 0
 #endif
+#ifndef HAVE_atomic_compare_and_swaphi
+#define HAVE_atomic_compare_and_swaphi 0
+#endif
+  if (HAVE_sync_compare_and_swaphi || HAVE_atomic_compare_and_swaphi)
+    cpp_define (pfile, "__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2");

-#ifdef HAVE_sync_compare_and_swapsi
-  if (HAVE_sync_compare_and_swapsi)
-    cpp_define (pfile, "__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4");
+#ifndef HAVE_sync_compare_and_swapsi
+#define HAVE_sync_compare_and_swapsi 0
+#endif
+#ifndef HAVE_atomic_compare_and_swapsi
+#define HAVE_atomic_compare_and_swapsi 0
 #endif
+  if (HAVE_sync_compare_and_swapsi || HAVE_atomic_compare_and_swapsi)
+    cpp_define (pfile, "__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4");

-#ifdef HAVE_sync_compare_and_swapdi
-  if (HAVE_sync_compare_and_swapdi)
-    cpp_define (pfile, "__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8");
+#ifndef HAVE_sync_compare_and_swapdi
+#define HAVE_sync_compare_and_swapdi 0
 #endif
+#ifndef HAVE_atomic_compare_and_swapdi
+#define HAVE_atomic_compare_and_swapdi 0
+#endif
+  if (HAVE_sync_compare_and_swapdi || HAVE_atomic_compare_and_swapdi)
+    cpp_define (pfile, "__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8");

-#ifdef HAVE_sync_compare_and_swapti
-  if (HAVE_sync_compare_and_swapti)
-    cpp_define (pfile, "__GCC_HAVE_SYNC_COMPARE_AND_SWAP_16");
+#ifndef HAVE_sync_compare_and_swapti
+#define HAVE_sync_compare_and_swapti 0
+#endif
+#ifndef HAVE_atomic_compare_and_swapti
+#define HAVE_atomic_compare_and_swapti 0
 #endif
+  if (HAVE_sync_compare_and_swapti || HAVE_atomic_compare_and_swapti)
+    cpp_define (pfile, "__GCC_HAVE_SYNC_COMPARE_AND_SWAP_16");

 #ifdef DWARF2_UNWIND_INFO
  if (dwarf2out_do_cfi_asm ())

--- a/gcc/c-typeck.c
+++ b/gcc/c-typeck.c
@@ -2717,6 +2717,10 @@ build_function_call_vec (location_t loc, tree function, VEC(tree,gc) *params,

      name = DECL_NAME (function);
      fundecl = function;
+      /* Atomic functions have type checking/casting already done.  They are 
+	 often rewritten and don't match the original parameter list.  */
+      if (name && !strncmp (IDENTIFIER_POINTER (name), "__atomic_", 9))
+        origtypes = NULL;
    }
  if (TREE_CODE (TREE_TYPE (function)) == FUNCTION_TYPE)
    function = function_to_pointer_conversion (loc, function);

--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -551,6 +551,10 @@ Winline
 Common Var(warn_inline) Warning
 Warn when an inlined function cannot be inlined

+Winvalid-memory-model
+Common Var(warn_invalid_memory_model) Init(1) Warning
+Warn when an atomic memory model parameter is known to be outside the valid range.
+
 Wlarger-than-
 Common RejectNegative Joined Warning Undocumented Alias(Wlarger-than=)

@@ -1266,6 +1270,10 @@ finline-limit=
 Common RejectNegative Joined UInteger
 -finline-limit=<number>	Limit the size of inlined functions to <number>

+finline-atomics
+Common Report Var(flag_inline_atomics) Init(1) Optimization
+Inline __atomic operations when a lock free instruction sequence is available.
+
 finstrument-functions
 Common Report Var(flag_instrument_function_entry_exit)
 Instrument function entry and exit with profiling calls

--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -248,6 +248,9 @@
  ;; For BMI2 support
  UNSPEC_PDEP
  UNSPEC_PEXT
+
+  ;; For __atomic support
+  UNSPEC_MOVA
 ])

 (define_c_enum "unspecv" [
@@ -262,7 +265,10 @@
  UNSPECV_ALIGN
  UNSPECV_MONITOR
  UNSPECV_MWAIT
-  UNSPECV_CMPXCHG
+  UNSPECV_CMPXCHG_1
+  UNSPECV_CMPXCHG_2
+  UNSPECV_CMPXCHG_3
+  UNSPECV_CMPXCHG_4
  UNSPECV_XCHG
  UNSPECV_LOCK
  UNSPECV_PROLOGUE_USE

--- a/gcc/config/i386/sync.md
+++ b/gcc/config/i386/sync.md
--- a/gcc/coretypes.h
+++ b/gcc/coretypes.h
@@ -181,5 +181,18 @@ union _dont_use_tree_here_;

 #endif

+/* Memory model types for the __atomic* builtins. 
+   This must match the order in libstdc++-v3/include/bits/atomic_base.h.  */
+enum memmodel
+{
+  MEMMODEL_RELAXED = 0,
+  MEMMODEL_CONSUME = 1,
+  MEMMODEL_ACQUIRE = 2,
+  MEMMODEL_RELEASE = 3,
+  MEMMODEL_ACQ_REL = 4,
+  MEMMODEL_SEQ_CST = 5,
+  MEMMODEL_LAST = 6
+};
+
 #endif /* coretypes.h */

--- a/gcc/cppbuiltin.c
+++ b/gcc/cppbuiltin.c
@@ -66,6 +66,12 @@ define__GNUC__ (cpp_reader *pfile)
  cpp_define_formatted (pfile, "__GNUC_MINOR__=%d", minor);
  cpp_define_formatted (pfile, "__GNUC_PATCHLEVEL__=%d", patchlevel);
  cpp_define_formatted (pfile, "__VERSION__=\"%s\"", version_string);
+  cpp_define_formatted (pfile, "__ATOMIC_RELAXED=%d", MEMMODEL_RELAXED);
+  cpp_define_formatted (pfile, "__ATOMIC_SEQ_CST=%d", MEMMODEL_SEQ_CST);
+  cpp_define_formatted (pfile, "__ATOMIC_ACQUIRE=%d", MEMMODEL_ACQUIRE);
+  cpp_define_formatted (pfile, "__ATOMIC_RELEASE=%d", MEMMODEL_RELEASE);
+  cpp_define_formatted (pfile, "__ATOMIC_ACQ_REL=%d", MEMMODEL_ACQ_REL);
+  cpp_define_formatted (pfile, "__ATOMIC_CONSUME=%d", MEMMODEL_CONSUME);
 }



--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -9163,11 +9163,26 @@ The maximum number of conditional stores paires that can be sunk.  Set to 0
 if either vectorization (@option{-ftree-vectorize}) or if-conversion
 (@option{-ftree-loop-if-convert}) is disabled.  The default is 2.

+@item allow-load-data-races
+Allow optimizers to introduce new data races on loads.
+Set to 1 to allow, otherwise to 0.  This option is enabled by default
+unless implicitly set by the @option{-fmemory-model=} option.
+
 @item allow-store-data-races
 Allow optimizers to introduce new data races on stores.
 Set to 1 to allow, otherwise to 0.  This option is enabled by default
 unless implicitly set by the @option{-fmemory-model=} option.

+@item allow-packed-load-data-races
+Allow optimizers to introduce new data races on packed data loads.
+Set to 1 to allow, otherwise to 0.  This option is enabled by default
+unless implicitly set by the @option{-fmemory-model=} option.
+
+@item allow-packed-store-data-races
+Allow optimizers to introduce new data races on packed data stores.
+Set to 1 to allow, otherwise to 0.  This option is enabled by default
+unless implicitly set by the @option{-fmemory-model=} option.
+
 @item case-values-threshold
 The smallest number of different values for which it is best to use a
 jump-table instead of a tree of conditional branches.  If the value is
@@ -13180,7 +13195,8 @@ This option will enable GCC to use CMPXCHG16B instruction in generated code.
 CMPXCHG16B allows for atomic operations on 128-bit double quadword (or oword)
 data types.  This is useful for high resolution counters that could be updated
 by multiple processors (or cores).  This instruction is generated as part of
-atomic built-in functions: see @ref{Atomic Builtins} for details.
+atomic built-in functions: see @ref{__sync Builtins} or
+@ref{__atomic Builtins} for details.

 @item -msahf
 @opindex msahf

--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -5699,6 +5699,155 @@ released only after all previous memory operations have completed.
 If this pattern is not defined, then a @code{memory_barrier} pattern
 will be emitted, followed by a store of the value to the memory operand.

+@cindex @code{atomic_compare_and_swap@var{mode}} instruction pattern
+@item @samp{atomic_compare_and_swap@var{mode}} 
+This pattern, if defined, emits code for an atomic compare-and-swap
+operation with memory model semantics.  Operand 2 is the memory on which
+the atomic operation is performed.  Operand 0 is an output operand which
+is set to true or false based on whether the operation succeeded.  Operand
+1 is an output operand which is set to the contents of the memory before
+the operation was attempted.  Operand 3 is the value that is expected to
+be in memory.  Operand 4 is the value to put in memory if the expected
+value is found there.  Operand 5 is set to 1 if this compare and swap is to
+be treated as a weak operation.  Operand 6 is the memory model to be used
+if the operation is a success.  Operand 7 is the memory model to be used
+if the operation fails.
+
+If memory referred to in operand 2 contains the value in operand 3, then
+operand 4 is stored in memory pointed to by operand 2 and fencing based on
+the memory model in operand 6 is issued.  
+
+If memory referred to in operand 2 does not contain the value in operand 3,
+then fencing based on the memory model in operand 7 is issued.
+
+If a target does not support weak compare-and-swap operations, or the port
+elects not to implement weak operations, the argument in operand 5 can be
+ignored.  Note a strong implementation must be provided.
+
+If this pattern is not provided, the @code{__atomic_compare_exchange}
+built-in functions will utilize the legacy @code{sync_compare_and_swap}
+pattern with an @code{__ATOMIC_SEQ_CST} memory model.
+
+@cindex @code{atomic_load@var{mode}} instruction pattern
+@item @samp{atomic_load@var{mode}}
+This pattern implements an atomic load operation with memory model
+semantics.  Operand 1 is the memory address being loaded from.  Operand 0
+is the result of the load.  Operand 2 is the memory model to be used for
+the load operation.
+
+If not present, the @code{__atomic_load} built-in function will either
+resort to a normal load with memory barriers, or a compare-and-swap
+operation if a normal load would not be atomic.
+
+@cindex @code{atomic_store@var{mode}} instruction pattern
+@item @samp{atomic_store@var{mode}}
+This pattern implements an atomic store operation with memory model
+semantics.  Operand 0 is the memory address being stored to.  Operand 1
+is the value to be written.  Operand 2 is the memory model to be used for
+the operation.
+
+If not present, the @code{__atomic_store} built-in function will attempt to
+perform a normal store and surround it with any required memory fences.  If
+the store would not be atomic, then an @code{__atomic_exchange} is
+attempted with the result being ignored.
+
+@cindex @code{atomic_exchange@var{mode}} instruction pattern
+@item @samp{atomic_exchange@var{mode}}
+This pattern implements an atomic exchange operation with memory model
+semantics.  Operand 1 is the memory location the operation is performed on.
+Operand 0 is an output operand which is set to the original value contained
+in the memory pointed to by operand 1.  Operand 2 is the value to be
+stored.  Operand 3 is the memory model to be used.
+
+If this pattern is not present, the built-in function
+@code{__atomic_exchange} will attempt to preform the operation with a
+compare and swap loop.
+
+@cindex @code{atomic_add@var{mode}} instruction pattern
+@cindex @code{atomic_sub@var{mode}} instruction pattern
+@cindex @code{atomic_or@var{mode}} instruction pattern
+@cindex @code{atomic_and@var{mode}} instruction pattern
+@cindex @code{atomic_xor@var{mode}} instruction pattern
+@cindex @code{atomic_nand@var{mode}} instruction pattern
+@item @samp{atomic_add@var{mode}}, @samp{atomic_sub@var{mode}}
+@itemx @samp{atomic_or@var{mode}}, @samp{atomic_and@var{mode}}
+@itemx @samp{atomic_xor@var{mode}}, @samp{atomic_nand@var{mode}}
+
+These patterns emit code for an atomic operation on memory with memory
+model semantics. Operand 0 is the memory on which the atomic operation is
+performed.  Operand 1 is the second operand to the binary operator.
+Operand 2 is the memory model to be used by the operation.
+
+If these patterns are not defined, attempts will be made to use legacy
+@code{sync} patterns, or equivilent patterns which return a result.  If
+none of these are available a compare-and-swap loop will be used.
+
+@cindex @code{atomic_fetch_add@var{mode}} instruction pattern
+@cindex @code{atomic_fetch_sub@var{mode}} instruction pattern
+@cindex @code{atomic_fetch_or@var{mode}} instruction pattern
+@cindex @code{atomic_fetch_and@var{mode}} instruction pattern
+@cindex @code{atomic_fetch_xor@var{mode}} instruction pattern
+@cindex @code{atomic_fetch_nand@var{mode}} instruction pattern
+@item @samp{atomic_fetch_add@var{mode}}, @samp{atomic_fetch_sub@var{mode}}
+@itemx @samp{atomic_fetch_or@var{mode}}, @samp{atomic_fetch_and@var{mode}}
+@itemx @samp{atomic_fetch_xor@var{mode}}, @samp{atomic_fetch_nand@var{mode}}
+
+These patterns emit code for an atomic operation on memory with memory
+model semantics, and return the original value. Operand 0 is an output 
+operand which contains the value of the memory location before the 
+operation was performed.  Operand 1 is the memory on which the atomic 
+operation is performed.  Operand 2 is the second operand to the binary
+operator.  Operand 3 is the memory model to be used by the operation.
+
+If these patterns are not defined, attempts will be made to use legacy
+@code{sync} patterns.  If none of these are available a compare-and-swap
+loop will be used.
+
+@cindex @code{atomic_add_fetch@var{mode}} instruction pattern
+@cindex @code{atomic_sub_fetch@var{mode}} instruction pattern
+@cindex @code{atomic_or_fetch@var{mode}} instruction pattern
+@cindex @code{atomic_and_fetch@var{mode}} instruction pattern
+@cindex @code{atomic_xor_fetch@var{mode}} instruction pattern
+@cindex @code{atomic_nand_fetch@var{mode}} instruction pattern
+@item @samp{atomic_add_fetch@var{mode}}, @samp{atomic_sub_fetch@var{mode}}
+@itemx @samp{atomic_or_fetch@var{mode}}, @samp{atomic_and_fetch@var{mode}}
+@itemx @samp{atomic_xor_fetch@var{mode}}, @samp{atomic_nand_fetch@var{mode}}
+
+These patterns emit code for an atomic operation on memory with memory
+model semantics and return the result after the operation is performed.
+Operand 0 is an output operand which contains the value after the
+operation.  Operand 1 is the memory on which the atomic operation is
+performed.  Operand 2 is the second operand to the binary operator.
+Operand 3 is the memory model to be used by the operation.
+
+If these patterns are not defined, attempts will be made to use legacy
+@code{sync} patterns, or equivilent patterns which return the result before
+the operation followed by the arithmetic operation required to produce the
+result.  If none of these are available a compare-and-swap loop will be
+used.
+
+@cindex @code{mem_thread_fence@var{mode}} instruction pattern
+@item @samp{mem_thread_fence@var{mode}}
+This pattern emits code required to implement a thread fence with
+memory model semantics.  Operand 0 is the memory model to be used.
+
+If this pattern is not specified, all memory models except
+@code{__ATOMIC_RELAXED} will result in issuing a @code{sync_synchronize}
+barrier pattern.
+
+@cindex @code{mem_signal_fence@var{mode}} instruction pattern
+@item @samp{mem_signal_fence@var{mode}}
+This pattern emits code required to implement a signal fence with
+memory model semantics.  Operand 0 is the memory model to be used.
+
+This pattern should impact the compiler optimizers the same way that
+mem_signal_fence does, but it does not need to issue any barrier
+instructions.
+
+If this pattern is not specified, all memory models except
+@code{__ATOMIC_RELAXED} will result in issuing a @code{sync_synchronize}
+barrier pattern.
+
 @cindex @code{stack_protect_set} instruction pattern
 @item @samp{stack_protect_set}


--- a/gcc/expr.h
+++ b/gcc/expr.h
@@ -212,11 +212,17 @@ int can_conditionally_move_p (enum machine_mode mode);
 rtx emit_conditional_add (rtx, enum rtx_code, rtx, rtx, enum machine_mode,
 			  rtx, rtx, enum machine_mode, int);

-rtx expand_val_compare_and_swap (rtx, rtx, rtx, rtx);
-rtx expand_bool_compare_and_swap (rtx, rtx, rtx, rtx);
 rtx expand_sync_operation (rtx, rtx, enum rtx_code);
 rtx expand_sync_fetch_operation (rtx, rtx, enum rtx_code, bool, rtx);
-rtx expand_sync_lock_test_and_set (rtx, rtx, rtx);
+
+rtx expand_atomic_exchange (rtx, rtx, rtx, enum memmodel);
+rtx expand_atomic_load (rtx, rtx, enum memmodel);
+rtx expand_atomic_store (rtx, rtx, enum memmodel);
+rtx expand_atomic_fetch_op (rtx, rtx, rtx, enum rtx_code, enum memmodel, 
+			      bool);
+void expand_atomic_thread_fence (enum memmodel);
+void expand_atomic_signal_fence (enum memmodel);
+

 /* Functions from expmed.c:  */

@@ -248,6 +254,7 @@ extern void expand_builtin_setjmp_receiver (rtx);
 extern rtx expand_builtin_saveregs (void);
 extern void expand_builtin_trap (void);
 extern rtx builtin_strncpy_read_str (void *, HOST_WIDE_INT, enum machine_mode);
+extern void expand_builtin_mem_thread_fence (enum memmodel);

 /* Functions from expr.c:  */


--- a/gcc/fortran/ChangeLog
+++ b/gcc/fortran/ChangeLog
+2011-11-06  Andrew MacLeod  <amacleod@redhat.com>
+	    Aldy Hernandez  <aldyh@redhat.com>
+
+	Merged from cxx-mem-model.
+
+	* types.def: (BT_SIZE, BT_CONST_VOLATILE_PTR, BT_FN_VOID_INT,
+	BT_FN_I{1,2,4,8,16}_CONST_VPTR_INT, BT_FN_VOID_VPTR_INT,
+	BT_FN_BOOL_VPTR_INT, BT_FN_BOOL_SIZE_CONST_VPTR,
+	BT_FN_VOID_VPTR_I{1,2,4,8,16}_INT, BT_FN_VOID_SIZE_VPTR_PTR_INT,
+	BT_FN_VOID_SIZE_CONST_VPTR_PTR_INT, BT_FN_VOID_SIZE_VPTR_PTR_PTR_INT,
+	BT_FN_BOOL_VPTR_PTR_I{1,2,4,8,16}_BOOL_INT_INT,
+	BT_FN_I{1,2,4,8,16}_VPTR_I{1,2,4,8,16}_INT): New types.
+
 2011-11-04  Mikael Morin  <mikael@gcc.gnu.org>

 	PR fortran/43829

--- a/gcc/fortran/types.def
+++ b/gcc/fortran/types.def
@@ -57,6 +57,7 @@ DEF_PRIMITIVE_TYPE (BT_UINT, unsigned_type_node)
 DEF_PRIMITIVE_TYPE (BT_LONG, long_integer_type_node)
 DEF_PRIMITIVE_TYPE (BT_ULONGLONG, long_long_unsigned_type_node)
 DEF_PRIMITIVE_TYPE (BT_WORD, (*lang_hooks.types.type_for_mode) (word_mode, 1))
+DEF_PRIMITIVE_TYPE (BT_SIZE, size_type_node)

 DEF_PRIMITIVE_TYPE (BT_I1, builtin_type_for_size (BITS_PER_UNIT*1, 1))
 DEF_PRIMITIVE_TYPE (BT_I2, builtin_type_for_size (BITS_PER_UNIT*2, 1))
@@ -70,7 +71,10 @@ DEF_PRIMITIVE_TYPE (BT_VOLATILE_PTR,
                    build_pointer_type
                     (build_qualified_type (void_type_node,
                                            TYPE_QUAL_VOLATILE)))
-
+DEF_PRIMITIVE_TYPE (BT_CONST_VOLATILE_PTR,
+		    build_pointer_type
+		     (build_qualified_type (void_type_node,
+					  TYPE_QUAL_VOLATILE|TYPE_QUAL_CONST)))
 DEF_POINTER_TYPE (BT_PTR_LONG, BT_LONG)
 DEF_POINTER_TYPE (BT_PTR_ULONGLONG, BT_ULONGLONG)
 DEF_POINTER_TYPE (BT_PTR_PTR, BT_PTR)
@@ -85,6 +89,8 @@ DEF_FUNCTION_TYPE_1 (BT_FN_VOID_PTRPTR, BT_VOID, BT_PTR_PTR)
 DEF_FUNCTION_TYPE_1 (BT_FN_VOID_VPTR, BT_VOID, BT_VOLATILE_PTR)
 DEF_FUNCTION_TYPE_1 (BT_FN_UINT_UINT, BT_UINT, BT_UINT)
 DEF_FUNCTION_TYPE_1 (BT_FN_PTR_PTR, BT_PTR, BT_PTR)
+DEF_FUNCTION_TYPE_1 (BT_FN_VOID_INT, BT_VOID, BT_INT)
+

 DEF_POINTER_TYPE (BT_PTR_FN_VOID_PTR, BT_FN_VOID_PTR)

@@ -98,6 +104,21 @@ DEF_FUNCTION_TYPE_2 (BT_FN_I4_VPTR_I4, BT_I4, BT_VOLATILE_PTR, BT_I4)
 DEF_FUNCTION_TYPE_2 (BT_FN_I8_VPTR_I8, BT_I8, BT_VOLATILE_PTR, BT_I8)
 DEF_FUNCTION_TYPE_2 (BT_FN_I16_VPTR_I16, BT_I16, BT_VOLATILE_PTR, BT_I16)
 DEF_FUNCTION_TYPE_2 (BT_FN_VOID_PTR_PTR, BT_VOID, BT_PTR, BT_PTR)
+DEF_FUNCTION_TYPE_2 (BT_FN_I1_CONST_VPTR_INT, BT_I1, BT_CONST_VOLATILE_PTR,
+		     BT_INT)
+DEF_FUNCTION_TYPE_2 (BT_FN_I2_CONST_VPTR_INT, BT_I2, BT_CONST_VOLATILE_PTR,
+		     BT_INT)
+DEF_FUNCTION_TYPE_2 (BT_FN_I4_CONST_VPTR_INT, BT_I4, BT_CONST_VOLATILE_PTR,
+		     BT_INT)
+DEF_FUNCTION_TYPE_2 (BT_FN_I8_CONST_VPTR_INT, BT_I8, BT_CONST_VOLATILE_PTR,
+		     BT_INT)
+DEF_FUNCTION_TYPE_2 (BT_FN_I16_CONST_VPTR_INT, BT_I16, BT_CONST_VOLATILE_PTR,
+		     BT_INT)
+DEF_FUNCTION_TYPE_2 (BT_FN_VOID_VPTR_INT, BT_VOID, BT_VOLATILE_PTR, BT_INT)
+DEF_FUNCTION_TYPE_2 (BT_FN_BOOL_VPTR_INT, BT_BOOL, BT_VOLATILE_PTR, BT_INT)
+DEF_FUNCTION_TYPE_2 (BT_FN_BOOL_SIZE_CONST_VPTR, BT_BOOL, BT_SIZE,
+		     BT_CONST_VOLATILE_PTR)
+

 DEF_POINTER_TYPE (BT_PTR_FN_VOID_PTR_PTR, BT_FN_VOID_PTR_PTR)

@@ -119,15 +140,31 @@ DEF_FUNCTION_TYPE_3 (BT_FN_I16_VPTR_I16_I16, BT_I16, BT_VOLATILE_PTR,
 		     BT_I16, BT_I16)
 DEF_FUNCTION_TYPE_3 (BT_FN_VOID_OMPFN_PTR_UINT, BT_VOID, BT_PTR_FN_VOID_PTR,
                     BT_PTR, BT_UINT)
+DEF_FUNCTION_TYPE_3 (BT_FN_I1_VPTR_I1_INT, BT_I1, BT_VOLATILE_PTR, BT_I1, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_I2_VPTR_I2_INT, BT_I2, BT_VOLATILE_PTR, BT_I2, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_I4_VPTR_I4_INT, BT_I4, BT_VOLATILE_PTR, BT_I4, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_I8_VPTR_I8_INT, BT_I8, BT_VOLATILE_PTR, BT_I8, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_I16_VPTR_I16_INT, BT_I16, BT_VOLATILE_PTR, BT_I16, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I1_INT, BT_VOID, BT_VOLATILE_PTR, BT_I1, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I2_INT, BT_VOID, BT_VOLATILE_PTR, BT_I2, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I4_INT, BT_VOID, BT_VOLATILE_PTR, BT_I4, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I8_INT, BT_VOID, BT_VOLATILE_PTR, BT_I8, BT_INT)
+DEF_FUNCTION_TYPE_3 (BT_FN_VOID_VPTR_I16_INT, BT_VOID, BT_VOLATILE_PTR, BT_I16, BT_INT)

 DEF_FUNCTION_TYPE_4 (BT_FN_VOID_OMPFN_PTR_UINT_UINT,
                     BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, BT_UINT, BT_UINT)
 DEF_FUNCTION_TYPE_4 (BT_FN_VOID_PTR_WORD_WORD_PTR,
 		     BT_VOID, BT_PTR, BT_WORD, BT_WORD, BT_PTR)
+DEF_FUNCTION_TYPE_4 (BT_FN_VOID_SIZE_VPTR_PTR_INT, BT_VOID, BT_SIZE,
+		     BT_VOLATILE_PTR, BT_PTR, BT_INT)
+DEF_FUNCTION_TYPE_4 (BT_FN_VOID_SIZE_CONST_VPTR_PTR_INT, BT_VOID, BT_SIZE,
+		     BT_CONST_VOLATILE_PTR, BT_PTR, BT_INT)

 DEF_FUNCTION_TYPE_5 (BT_FN_BOOL_LONG_LONG_LONG_LONGPTR_LONGPTR,
                     BT_BOOL, BT_LONG, BT_LONG, BT_LONG,
 		     BT_PTR_LONG, BT_PTR_LONG)
+DEF_FUNCTION_TYPE_5 (BT_FN_VOID_SIZE_VPTR_PTR_PTR_INT, BT_VOID, BT_SIZE,
+		     BT_VOLATILE_PTR, BT_PTR, BT_PTR, BT_INT)

 DEF_FUNCTION_TYPE_6 (BT_FN_BOOL_LONG_LONG_LONG_LONG_LONGPTR_LONGPTR,
                     BT_BOOL, BT_LONG, BT_LONG, BT_LONG, BT_LONG,
@@ -138,6 +175,23 @@ DEF_FUNCTION_TYPE_6 (BT_FN_VOID_OMPFN_PTR_UINT_LONG_LONG_LONG,
 DEF_FUNCTION_TYPE_6 (BT_FN_BOOL_BOOL_ULL_ULL_ULL_ULLPTR_ULLPTR,
 		     BT_BOOL, BT_BOOL, BT_ULONGLONG, BT_ULONGLONG,
 		     BT_ULONGLONG, BT_PTR_ULONGLONG, BT_PTR_ULONGLONG)
+DEF_FUNCTION_TYPE_6 (BT_FN_BOOL_VPTR_PTR_I1_BOOL_INT_INT, 
+		     BT_BOOL, BT_VOLATILE_PTR, BT_PTR, BT_I1, BT_BOOL, BT_INT,
+		     BT_INT)
+DEF_FUNCTION_TYPE_6 (BT_FN_BOOL_VPTR_PTR_I2_BOOL_INT_INT, 
+		     BT_BOOL, BT_VOLATILE_PTR, BT_PTR, BT_I2, BT_BOOL, BT_INT,
+		     BT_INT)
+DEF_FUNCTION_TYPE_6 (BT_FN_BOOL_VPTR_PTR_I4_BOOL_INT_INT, 
+		     BT_BOOL, BT_VOLATILE_PTR, BT_PTR, BT_I4, BT_BOOL, BT_INT,
+		     BT_INT)
+DEF_FUNCTION_TYPE_6 (BT_FN_BOOL_VPTR_PTR_I8_BOOL_INT_INT, 
+		     BT_BOOL, BT_VOLATILE_PTR, BT_PTR, BT_I8, BT_BOOL, BT_INT,
+		     BT_INT)
+DEF_FUNCTION_TYPE_6 (BT_FN_BOOL_VPTR_PTR_I16_BOOL_INT_INT, 
+		     BT_BOOL, BT_VOLATILE_PTR, BT_PTR, BT_I16, BT_BOOL, BT_INT,
+		     BT_INT)
+DEF_FUNCTION_TYPE_6 (BT_FN_BOOL_SIZE_VPTR_PTR_PTR_INT_INT, BT_BOOL, BT_SIZE,
+		     BT_VOLATILE_PTR, BT_PTR, BT_PTR, BT_INT, BT_INT)

 DEF_FUNCTION_TYPE_7 (BT_FN_VOID_OMPFN_PTR_UINT_LONG_LONG_LONG_LONG,
                     BT_VOID, BT_PTR_FN_VOID_PTR, BT_PTR, BT_UINT,

--- a/gcc/genopinit.c
+++ b/gcc/genopinit.c
@@ -243,6 +243,28 @@ static const char * const optabs[] =
  "set_direct_optab_handler (sync_compare_and_swap_optab, $A, CODE_FOR_$(sync_compare_and_swap$I$a$))",
  "set_direct_optab_handler (sync_lock_test_and_set_optab, $A, CODE_FOR_$(sync_lock_test_and_set$I$a$))",
  "set_direct_optab_handler (sync_lock_release_optab, $A, CODE_FOR_$(sync_lock_release$I$a$))",
+  "set_direct_optab_handler (atomic_exchange_optab, $A, CODE_FOR_$(atomic_exchange$I$a$))",
+  "set_direct_optab_handler (atomic_compare_and_swap_optab, $A, CODE_FOR_$(atomic_compare_and_swap$I$a$))",
+  "set_direct_optab_handler (atomic_load_optab, $A, CODE_FOR_$(atomic_load$I$a$))",
+  "set_direct_optab_handler (atomic_store_optab, $A, CODE_FOR_$(atomic_store$I$a$))",
+  "set_direct_optab_handler (atomic_add_fetch_optab, $A, CODE_FOR_$(atomic_add_fetch$I$a$))",
+  "set_direct_optab_handler (atomic_sub_fetch_optab, $A, CODE_FOR_$(atomic_sub_fetch$I$a$))",
+  "set_direct_optab_handler (atomic_and_fetch_optab, $A, CODE_FOR_$(atomic_and_fetch$I$a$))",
+  "set_direct_optab_handler (atomic_nand_fetch_optab, $A, CODE_FOR_$(atomic_nand_fetch$I$a$))",
+  "set_direct_optab_handler (atomic_xor_fetch_optab, $A, CODE_FOR_$(atomic_xor_fetch$I$a$))",
+  "set_direct_optab_handler (atomic_or_fetch_optab, $A, CODE_FOR_$(atomic_or_fetch$I$a$))",
+  "set_direct_optab_handler (atomic_fetch_add_optab, $A, CODE_FOR_$(atomic_fetch_add$I$a$))",
+  "set_direct_optab_handler (atomic_fetch_sub_optab, $A, CODE_FOR_$(atomic_fetch_sub$I$a$))",
+  "set_direct_optab_handler (atomic_fetch_and_optab, $A, CODE_FOR_$(atomic_fetch_and$I$a$))",
+  "set_direct_optab_handler (atomic_fetch_nand_optab, $A, CODE_FOR_$(atomic_fetch_nand$I$a$))",
+  "set_direct_optab_handler (atomic_fetch_xor_optab, $A, CODE_FOR_$(atomic_fetch_xor$I$a$))",
+  "set_direct_optab_handler (atomic_fetch_or_optab, $A, CODE_FOR_$(atomic_fetch_or$I$a$))",
+  "set_direct_optab_handler (atomic_add_optab, $A, CODE_FOR_$(atomic_add$I$a$))",
+  "set_direct_optab_handler (atomic_sub_optab, $A, CODE_FOR_$(atomic_sub$I$a$))",
+  "set_direct_optab_handler (atomic_and_optab, $A, CODE_FOR_$(atomic_and$I$a$))",
+  "set_direct_optab_handler (atomic_nand_optab, $A, CODE_FOR_$(atomic_nand$I$a$))",
+  "set_direct_optab_handler (atomic_xor_optab, $A, CODE_FOR_$(atomic_xor$I$a$))",
+  "set_direct_optab_handler (atomic_or_optab, $A, CODE_FOR_$(atomic_or$I$a$))",
  "set_optab_handler (vec_set_optab, $A, CODE_FOR_$(vec_set$a$))",
  "set_optab_handler (vec_extract_optab, $A, CODE_FOR_$(vec_extract$a$))",
  "set_optab_handler (vec_extract_even_optab, $A, CODE_FOR_$(vec_extract_even$a$))",

--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -4998,7 +4998,7 @@ expand_omp_atomic_store (basic_block load_bb, tree addr)
 }

 /* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
-   operation as a __sync_fetch_and_op builtin.  INDEX is log2 of the
+   operation as a __atomic_fetch_op builtin.  INDEX is log2 of the
   size of the data type, and thus usable to find the index of the builtin
   decl.  Returns false if the expression is not of the proper form.  */

@@ -5009,13 +5009,14 @@ expand_omp_atomic_fetch_op (basic_block load_bb,
 {
  enum built_in_function oldbase, newbase, tmpbase;
  tree decl, itype, call;
-  direct_optab optab, oldoptab, newoptab;
  tree lhs, rhs;
  basic_block store_bb = single_succ (load_bb);
  gimple_stmt_iterator gsi;
  gimple stmt;
  location_t loc;
+  enum tree_code code;
  bool need_old, need_new;
+  enum machine_mode imode;

  /* We expect to find the following sequences:

@@ -5047,47 +5048,34 @@ expand_omp_atomic_fetch_op (basic_block load_bb,
    return false;

  /* Check for one of the supported fetch-op operations.  */
-  switch (gimple_assign_rhs_code (stmt))
+  code = gimple_assign_rhs_code (stmt);
+  switch (code)
    {
    case PLUS_EXPR:
    case POINTER_PLUS_EXPR:
-      oldbase = BUILT_IN_SYNC_FETCH_AND_ADD_N;
-      newbase = BUILT_IN_SYNC_ADD_AND_FETCH_N;
-      optab = sync_add_optab;
-      oldoptab = sync_old_add_optab;
-      newoptab = sync_new_add_optab;
+      oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
+      newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
      break;
    case MINUS_EXPR:
-      oldbase = BUILT_IN_SYNC_FETCH_AND_SUB_N;
-      newbase = BUILT_IN_SYNC_SUB_AND_FETCH_N;
-      optab = sync_add_optab;
-      oldoptab = sync_old_add_optab;
-      newoptab = sync_new_add_optab;
+      oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
+      newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
      break;
    case BIT_AND_EXPR:
-      oldbase = BUILT_IN_SYNC_FETCH_AND_AND_N;
-      newbase = BUILT_IN_SYNC_AND_AND_FETCH_N;
-      optab = sync_and_optab;
-      oldoptab = sync_old_and_optab;
-      newoptab = sync_new_and_optab;
+      oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
+      newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
      break;
    case BIT_IOR_EXPR:
-      oldbase = BUILT_IN_SYNC_FETCH_AND_OR_N;
-      newbase = BUILT_IN_SYNC_OR_AND_FETCH_N;
-      optab = sync_ior_optab;
-      oldoptab = sync_old_ior_optab;
-      newoptab = sync_new_ior_optab;
+      oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
+      newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
      break;
    case BIT_XOR_EXPR:
-      oldbase = BUILT_IN_SYNC_FETCH_AND_XOR_N;
-      newbase = BUILT_IN_SYNC_XOR_AND_FETCH_N;
-      optab = sync_xor_optab;
-      oldoptab = sync_old_xor_optab;
-      newoptab = sync_new_xor_optab;
+      oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
+      newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
      break;
    default:
      return false;
    }
+
  /* Make sure the expression is of the proper form.  */
  if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
    rhs = gimple_assign_rhs2 (stmt);
@@ -5103,37 +5091,25 @@ expand_omp_atomic_fetch_op (basic_block load_bb,
  if (decl == NULL_TREE)
    return false;
  itype = TREE_TYPE (TREE_TYPE (decl));
+  imode = TYPE_MODE (itype);

-  if (need_new)
-    {
-      /* expand_sync_fetch_operation can always compensate when interested
-	 in the new value.  */
-      if (direct_optab_handler (newoptab, TYPE_MODE (itype))
-	  == CODE_FOR_nothing
-	  && direct_optab_handler (oldoptab, TYPE_MODE (itype))
-	     == CODE_FOR_nothing)
-	return false;
-    }
-  else if (need_old)
-    {
-      /* When interested in the old value, expand_sync_fetch_operation
-	 can compensate only if the operation is reversible.  AND and OR
-	 are not reversible.  */
-      if (direct_optab_handler (oldoptab, TYPE_MODE (itype))
-	  == CODE_FOR_nothing
-	  && (oldbase == BUILT_IN_SYNC_FETCH_AND_AND_N
-	      || oldbase == BUILT_IN_SYNC_FETCH_AND_OR_N
-	      || direct_optab_handler (newoptab, TYPE_MODE (itype))
-		 == CODE_FOR_nothing))
-	return false;
-    }
-  else if (direct_optab_handler (optab, TYPE_MODE (itype)) == CODE_FOR_nothing)
+  /* We could test all of the various optabs involved, but the fact of the
+     matter is that (with the exception of i486 vs i586 and xadd) all targets
+     that support any atomic operaton optab also implements compare-and-swap.
+     Let optabs.c take care of expanding any compare-and-swap loop.  */
+  if (!can_compare_and_swap_p (imode))
    return false;

  gsi = gsi_last_bb (load_bb);
  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
-  call = build_call_expr_loc (loc, decl, 2, addr,
-			      fold_convert_loc (loc, itype, rhs));
+
+  /* OpenMP does not imply any barrier-like semantics on its atomic ops.
+     It only requires that the operation happen atomically.  Thus we can
+     use the RELAXED memory model.  */
+  call = build_call_expr_loc (loc, decl, 3, addr,
+			      fold_convert_loc (loc, itype, rhs),
+			      build_int_cst (NULL, MEMMODEL_RELAXED));
+
  if (need_old || need_new)
    {
      lhs = need_old ? loaded_val : stored_val;
@@ -5182,6 +5158,8 @@ expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
  edge e;
  enum built_in_function fncode;

+  /* ??? We need a non-pointer interface to __atomic_compare_exchange in
+     order to use the RELAXED memory model effectively.  */
  fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
 				    + index + 1);
  cmpxchg = builtin_decl_explicit (fncode);
@@ -5190,8 +5168,7 @@ expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
  type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr)));
  itype = TREE_TYPE (TREE_TYPE (cmpxchg));

-  if (direct_optab_handler (sync_compare_and_swap_optab, TYPE_MODE (itype))
-      == CODE_FOR_nothing)
+  if (!can_compare_and_swap_p (TYPE_MODE (itype)))
    return false;

  /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD.  */

--- a/gcc/optabs.c
+++ b/gcc/optabs.c
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -695,6 +695,34 @@ enum direct_optab_index
  /* Atomic clear with release semantics.  */
  DOI_sync_lock_release,

+  /* Atomic operations with memory model parameters. */
+  DOI_atomic_exchange,
+  DOI_atomic_compare_and_swap,
+  DOI_atomic_load,
+  DOI_atomic_store,
+  DOI_atomic_add_fetch,
+  DOI_atomic_sub_fetch,
+  DOI_atomic_and_fetch,
+  DOI_atomic_nand_fetch,
+  DOI_atomic_xor_fetch,
+  DOI_atomic_or_fetch,
+  DOI_atomic_fetch_add,
+  DOI_atomic_fetch_sub,
+  DOI_atomic_fetch_and,
+  DOI_atomic_fetch_nand,
+  DOI_atomic_fetch_xor,
+  DOI_atomic_fetch_or,
+  DOI_atomic_add,
+  DOI_atomic_sub,
+  DOI_atomic_and,
+  DOI_atomic_nand,
+  DOI_atomic_xor,
+  DOI_atomic_or,
+  DOI_atomic_always_lock_free,
+  DOI_atomic_is_lock_free,
+  DOI_atomic_thread_fence,
+  DOI_atomic_signal_fence,
+
  /* Vector permutation.  */
  DOI_vec_perm,
  DOI_vec_perm_const,
@@ -744,6 +772,60 @@ typedef struct direct_optab_d *direct_optab;
  (&direct_optab_table[(int) DOI_sync_lock_test_and_set])
 #define sync_lock_release_optab \
  (&direct_optab_table[(int) DOI_sync_lock_release])
+
+#define atomic_exchange_optab \
+  (&direct_optab_table[(int) DOI_atomic_exchange])
+#define atomic_compare_and_swap_optab \
+  (&direct_optab_table[(int) DOI_atomic_compare_and_swap])
+#define atomic_load_optab \
+  (&direct_optab_table[(int) DOI_atomic_load])
+#define atomic_store_optab \
+  (&direct_optab_table[(int) DOI_atomic_store])
+#define atomic_add_fetch_optab \
+  (&direct_optab_table[(int) DOI_atomic_add_fetch])
+#define atomic_sub_fetch_optab \
+  (&direct_optab_table[(int) DOI_atomic_sub_fetch])
+#define atomic_and_fetch_optab \
+  (&direct_optab_table[(int) DOI_atomic_and_fetch])
+#define atomic_nand_fetch_optab \
+  (&direct_optab_table[(int) DOI_atomic_nand_fetch])
+#define atomic_xor_fetch_optab \
+  (&direct_optab_table[(int) DOI_atomic_xor_fetch])
+#define atomic_or_fetch_optab \
+  (&direct_optab_table[(int) DOI_atomic_or_fetch])
+#define atomic_fetch_add_optab \
+  (&direct_optab_table[(int) DOI_atomic_fetch_add])
+#define atomic_fetch_sub_optab \
+  (&direct_optab_table[(int) DOI_atomic_fetch_sub])
+#define atomic_fetch_and_optab \
+  (&direct_optab_table[(int) DOI_atomic_fetch_and])
+#define atomic_fetch_nand_optab \
+  (&direct_optab_table[(int) DOI_atomic_fetch_nand])
+#define atomic_fetch_xor_optab \
+  (&direct_optab_table[(int) DOI_atomic_fetch_xor])
+#define atomic_fetch_or_optab \
+  (&direct_optab_table[(int) DOI_atomic_fetch_or])
+#define atomic_add_optab \
+  (&direct_optab_table[(int) DOI_atomic_add])
+#define atomic_sub_optab \
+  (&direct_optab_table[(int) DOI_atomic_sub])
+#define atomic_and_optab \
+  (&direct_optab_table[(int) DOI_atomic_and])
+#define atomic_nand_optab \
+  (&direct_optab_table[(int) DOI_atomic_nand])
+#define atomic_xor_optab \
+  (&direct_optab_table[(int) DOI_atomic_xor])
+#define atomic_or_optab \
+  (&direct_optab_table[(int) DOI_atomic_or])
+#define atomic_always_lock_free_optab \
+  (&direct_optab_table[(int) DOI_atomic_always_lock_free])
+#define atomic_is_lock_free_optab \
+  (&direct_optab_table[(int) DOI_atomic_is_lock_free])
+#define atomic_thread_fence_optab \
+  (&direct_optab_table[(int) DOI_atomic_thread_fence])
+#define atomic_signal_fence_optab \
+  (&direct_optab_table[(int) DOI_atomic_signal_fence])
+
 #define vec_perm_optab (&direct_optab_table[DOI_vec_perm])
 #define vec_perm_const_optab (&direct_optab_table[(int) DOI_vec_perm_const])

@@ -883,6 +965,13 @@ extern void expand_float (rtx, rtx, int);
 /* Return the insn_code for a FLOAT_EXPR.  */
 enum insn_code can_float_p (enum machine_mode, enum machine_mode, int);

+/* Return true if there is an inline compare and swap pattern.  */
+extern bool can_compare_and_swap_p (enum machine_mode);
+
+/* Generate code for a compare and swap.  */
+extern bool expand_atomic_compare_and_swap (rtx *, rtx *, rtx, rtx, rtx, bool,
+					    enum memmodel, enum memmodel);
+
 /* Check whether an operation represented by the code CODE is a
   convert operation that is supported by the target platform in
   vector form */

--- a/gcc/params.def
+++ b/gcc/params.def
@@ -921,11 +921,26 @@ DEFPARAM (PARAM_CASE_VALUES_THRESHOLD,
          0, 0, 0)

 /* Data race flags for C++0x memory model compliance.  */
+DEFPARAM (PARAM_ALLOW_LOAD_DATA_RACES,
+	  "allow-load-data-races",
+	  "Allow new data races on loads to be introduced",
+	  1, 0, 1)
+
 DEFPARAM (PARAM_ALLOW_STORE_DATA_RACES,
 	  "allow-store-data-races",
 	  "Allow new data races on stores to be introduced",
 	  1, 0, 1)

+DEFPARAM (PARAM_ALLOW_PACKED_LOAD_DATA_RACES,
+	  "allow-packed-load-data-races",
+	  "Allow new data races on packed data loads to be introduced",
+	  1, 0, 1)
+
+DEFPARAM (PARAM_ALLOW_PACKED_STORE_DATA_RACES,
+	  "allow-packed-store-data-races",
+	  "Allow new data races on packed data stores to be introduced",
+	  1, 0, 1)
+
 /* Reassociation width to be used by tree reassoc optimization.  */
 DEFPARAM (PARAM_TREE_REASSOC_WIDTH,
 	  "tree-reassoc-width",

--- a/gcc/params.h
+++ b/gcc/params.h
@@ -211,6 +211,13 @@ extern void init_param_values (int *params);
  PARAM_VALUE (PARAM_MIN_NONDEBUG_INSN_UID)
 #define MAX_STORES_TO_SINK \
  PARAM_VALUE (PARAM_MAX_STORES_TO_SINK)
+#define ALLOW_LOAD_DATA_RACES \
+  PARAM_VALUE (PARAM_ALLOW_LOAD_DATA_RACES)
 #define ALLOW_STORE_DATA_RACES \
  PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES)
+#define ALLOW_PACKED_LOAD_DATA_RACES \
+  PARAM_VALUE (PARAM_ALLOW_PACKED_LOAD_DATA_RACES)
+#define ALLOW_PACKED_STORE_DATA_RACES \
+  PARAM_VALUE (PARAM_ALLOW_PACKED_STORE_DATA_RACES)
+
 #endif /* ! GCC_PARAMS_H */
--- a/gcc/sync-builtins.def
+++ b/gcc/sync-builtins.def
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
+2011-11-06  Andrew MacLeod  <amacleod@redhat.com>
+	    Richard Henderson  <rth@redhat.com>
+	    Aldy Hernandez  <aldyh@redhat.com>
+
+	Merged from cxx-mem-model.
+
+	* lib/target-supports.exp (check_effective_target_sync_int_128,
+	check_effective_target_sync_long_long): Check whether the target
+	supports 64 and 128 bit __sync builtins.
+	(check_effective_target_cas_char): New.
+	(check_effective_target_cas_int): New.
+	* gcc.dg/dg.exp: Exclude simulate-thread tests.
+	* gcc.dg/atomic-noinline[-aux].c: New.  Make a variety of atomics calls.
+	* gcc.dg/atomic-generic[-aux].c: New. Test that generic functions
+	produce the expected library calls.
+	* gcc.dg/atomic-fence.c: New functional tests.
+	* gcc.dg/atomic-param.c: New.  Checl for illegal number of parameters.
+	* gcc.dg/atomic-invalid.c: New.  Test invalid parameters.
+	* gcc.dg/atomic-lockfree[-aux].c: New tests.
+	* gcc.dg/atomic-compare-exchange-{1-5}.c: New functional tests.
+	* gcc.dg/atomic-op-[1-5].c: New.  Test atomic fetch functionality.
+	* gcc.dg/atomic-exchange-{1-5}.c: New functional tests.
+	* gcc.dg/atomic-load-{1-5}.c: New functional tests.
+	* gcc.dg/atomic-store-{1-5}.c: New functional tests.
+	* gcc.dg/simulate-thread/atomic-load-int128.c: New. Verify int128 loads
+	are atomic.
+	* gcc.dg/simulate-thread/atomic-load-longlong.c: New. Verify 8 byte
+	loads are atomic.
+	* gcc.dg/simulate-thread/atomic-load-int.c: New. Verify 4 byte loads
+	are atomic.
+	* gcc.dg/simulate-thread/atomic-load-short.c: New. Verify 2 byte loads
+	are atomic.
+	* gcc.dg/simulate-thread/atomic-other-int128.c: New. Verify other
+	int128 operations are atomic.
+	* gcc.dg/simulate-thread/atomic-other-int.c: New. Verify other 4 byte
+	operations are atomic.
+	* gcc.dg/simulate-thread/atomic-other-longlong.c: New. Verify 8 byte
+	operations are atomic.
+	* gcc.dg/simulate-thread/atomic-other-short.c: New. Verify other 2 byte
+	operations are atomic.
+	* gcc.dg/simulate-thread/speculative-store.c: New. Verify speculative
+	stores aren't moved out of a loop.
+	* gcc.dg/simulate-thread/strict-align-global.c: New. Verify small
+	globals don't overwrite neighbouring globals.
+	* gcc.dg/simulate-thread/subfields.c: New. Verify struct component
+	writes dont overwrite neighbouring components.
+	* c-c++-common/gomp/atomic-10.c: Use cas_int; match __atomic builtin.
+	* c-c++-common/gomp/atomic-3.c: Likewise.
+	* c-c++-common/gomp/atomic-9.c: Likewise.
+	* gcc.dg/gomp/atomic-1.c, gcc.dg/gomp/atomic-2.c,
+	gcc.dg/gomp/atomic-3.c, gcc.dg/gomp/atomic-4.c, gcc.dg/gomp/atomic-7.c,
+	gcc.dg/gomp/atomic-8.c, gcc.dg/gomp/atomic-9.c,
+	gcc.dg/gomp/atomic-10.c, gcc.dg/gomp/atomic-12.c,
+	gcc.dg/gomp/atomic-13.c, gcc.dg/gomp/atomic-14.c,
+	gcc.dg/gomp/atomic-15.c: Move to c-c++-common/gomp/.
+	* g++.dg/gomp/atomic-1.C, g++.dg/gomp/atomic-2.C,
+	g++.dg/gomp/atomic-3.C, g++.dg/gomp/atomic-4.C, g++.dg/gomp/atomic-7.C,
+	g++.dg/gomp/atomic-8.C, g++.dg/gomp/atomic-9.C,
+	g++.dg/gomp/atomic-10.C, g++.dg/gomp/atomic-11.C,
+	g++.dg/gomp/atomic-12.C, g++.dg/gomp/atomic-13.C,
+	g++.dg/gomp/atomic-15.C: Remove.
+	* gcc.dg/gomp/gomp.exp, g++.dg/gomp/gomp.exp: Run c-c++-common tests.
+	* gcc.dg/gomp/atomic-11.c: Remove test.
+
 2011-11-06  Ira Rosen  <ira.rosen@linaro.org>

 	* gcc.dg/vect/bb-slp-cond-1.c: New test.

--- a/gcc/testsuite/g++.dg/gomp/atomic-1.C
+++ b/gcc/testsuite/g++.dg/gomp/atomic-1.C
--- a/gcc/testsuite/gcc.dg/gomp/atomic-10.c
+++ b/gcc/testsuite/gcc.dg/gomp/atomic-10.c
 /* PR middle-end/28046 */
 /* { dg-do compile } */
 /* { dg-options "-fopenmp -fdump-tree-ompexp" } */
+/* { dg-require-effective-target cas_int } */

 int a[3], b;
 struct C { int x; int y; } c;
@@ -20,5 +21,5 @@ foo (void)
  *baz () += bar ();
 }

-/* { dg-final { scan-tree-dump-times "__sync_fetch_and_add" 4 "ompexp" { target i?86-*-* x86_64-*-* ia64-*-* powerpc*-*-* alpha*-*-* } } } */
+/* { dg-final { scan-tree-dump-times "__atomic_fetch_add" 4 "ompexp" } } */
 /* { dg-final { cleanup-tree-dump "ompexp" } } */
--- a/gcc/testsuite/g++.dg/gomp/atomic-11.C
+++ b/gcc/testsuite/g++.dg/gomp/atomic-11.C
--- a/gcc/testsuite/gcc.dg/gomp/atomic-13.c
+++ b/gcc/testsuite/gcc.dg/gomp/atomic-13.c
--- a/gcc/testsuite/g++.dg/gomp/atomic-13.C
+++ b/gcc/testsuite/g++.dg/gomp/atomic-13.C
--- a/gcc/testsuite/gcc.dg/gomp/atomic-15.c
+++ b/gcc/testsuite/gcc.dg/gomp/atomic-15.c
--- a/gcc/testsuite/g++.dg/gomp/atomic-2.C
+++ b/gcc/testsuite/g++.dg/gomp/atomic-2.C
--- a/gcc/testsuite/g++.dg/gomp/atomic-3.C
+++ b/gcc/testsuite/g++.dg/gomp/atomic-3.C
 /* { dg-do compile } */
 /* { dg-options "-fopenmp -fdump-tree-ompexp" } */
+/* { dg-require-effective-target cas_int } */

 int *xyzzy;

@@ -9,5 +10,5 @@ void f1(void)
    xyzzy++;
 }

-/* { dg-final { scan-tree-dump-times "xyzzy, 4" 1 "ompexp" { target i?86-*-* x86_64-*-* ia64-*-* powerpc*-*-* alpha*-*-* } } } */
+/* { dg-final { scan-tree-dump-times "xyzzy, 4" 1 "ompexp" } } */
 /* { dg-final { cleanup-tree-dump "ompexp" } } */
--- a/gcc/testsuite/g++.dg/gomp/atomic-4.C
+++ b/gcc/testsuite/g++.dg/gomp/atomic-4.C
--- a/gcc/testsuite/g++.dg/gomp/atomic-7.C
+++ b/gcc/testsuite/g++.dg/gomp/atomic-7.C
--- a/gcc/testsuite/g++.dg/gomp/atomic-8.C
+++ b/gcc/testsuite/g++.dg/gomp/atomic-8.C
--- a/gcc/testsuite/g++.dg/gomp/atomic-9.C
+++ b/gcc/testsuite/g++.dg/gomp/atomic-9.C
 /* { dg-do compile } */
 /* { dg-options "-fopenmp -fdump-tree-ompexp" } */
+/* { dg-require-effective-target cas_int } */

 volatile int *bar(void);

@@ -9,5 +10,5 @@ void f1(void)
    *bar() += 1;
 }

-/* { dg-final { scan-tree-dump-times "__sync_fetch_and_add" 1 "ompexp" { target i?86-*-* x86_64-*-* ia64-*-* powerpc*-*-* alpha*-*-* } } } */
+/* { dg-final { scan-tree-dump-times "__atomic_fetch_add" 1 "ompexp" } } */
 /* { dg-final { cleanup-tree-dump "ompexp" } } */
--- a/gcc/testsuite/g++.dg/dg.exp
+++ b/gcc/testsuite/g++.dg/dg.exp
@@ -48,6 +48,7 @@ set tests [prune $tests $srcdir/$subdir/tree-prof/*]
 set tests [prune $tests $srcdir/$subdir/torture/*]
 set tests [prune $tests $srcdir/$subdir/graphite/*]
 set tests [prune $tests $srcdir/$subdir/guality/*]
+set tests [prune $tests $srcdir/$subdir/simulate-thread/*]

 # Main loop.
 dg-runtest $tests "" $DEFAULT_CXXFLAGS

--- a/gcc/testsuite/g++.dg/gomp/atomic-10.C
+++ b/gcc/testsuite/g++.dg/gomp/atomic-10.C
-// PR middle-end/28046
-// { dg-do compile }
-// { dg-options "-fopenmp -fdump-tree-ompexp" }
-
-int a[3], b;
-struct C { int x; int y; } c;
-
-int bar (void), *baz (void);
-
-void
-foo (void)
-{
-#pragma omp atomic
-  a[2] += bar ();
-#pragma omp atomic
-  b += bar ();
-#pragma omp atomic
-  c.y += bar ();
-#pragma omp atomic
-  *baz () += bar ();
-}
-
-// { dg-final { scan-tree-dump-times "__sync_fetch_and_add" 4 "ompexp" { target i?86-*-* x86_64-*-* ia64-*-* powerpc*-*-* alpha*-*-* } } }
-// { dg-final { cleanup-tree-dump "ompexp" } }
--- a/gcc/testsuite/g++.dg/gomp/atomic-12.C
+++ b/gcc/testsuite/g++.dg/gomp/atomic-12.C
-/* PR middle-end/45423 */
-/* { dg-do compile } */
-/* { dg-options "-fopenmp -fdump-tree-gimple -g0 -O2" } */
-/* atomicvar should never be referenced in between the barrier and
-   following #pragma omp atomic_load.  */
-/* { dg-final { scan-tree-dump-not "barrier\[^#\]*atomicvar" "gimple" } } */
-/* { dg-final { cleanup-tree-dump "gimple" } } */
-
-#include "atomic-11.C"
--- a/gcc/testsuite/g++.dg/gomp/atomic-15.C
+++ b/gcc/testsuite/g++.dg/gomp/atomic-15.C
-// { dg-do compile }
-// { dg-options "-fopenmp" }
-
-int x = 6;
-
-int
-main ()
-{
-  int v;
-  #pragma omp atomic
-    x = x * 7 + 6;	// { dg-error "expected" }
-  #pragma omp atomic
-    x = x * 7 ^ 6;	// { dg-error "expected" }
-  #pragma omp atomic update
-    x = x - 8 + 6;	// { dg-error "expected" }
-  #pragma omp atomic
-    x = x ^ 7 | 2;	// { dg-error "expected" }
-  #pragma omp atomic
-    x = x / 7 * 2;	// { dg-error "expected" }
-  #pragma omp atomic
-    x = x / 7 / 2;	// { dg-error "expected" }
-  #pragma omp atomic capture
-    v = x = x | 6;	// { dg-error "invalid operator" }
-  #pragma omp atomic capture
-    { v = x; x = x * 7 + 6; }	// { dg-error "expected" }
-  #pragma omp atomic capture
-    { v = x; x = x * 7 ^ 6; }	// { dg-error "expected" }
-  #pragma omp atomic capture
-    { v = x; x = x - 8 + 6; }	// { dg-error "expected" }
-  #pragma omp atomic capture
-    { v = x; x = x ^ 7 | 2; }	// { dg-error "expected" }
-  #pragma omp atomic capture
-    { v = x; x = x / 7 * 2; }	// { dg-error "expected" }
-  #pragma omp atomic capture
-    { v = x; x = x / 7 / 2; }	// { dg-error "expected" }
-  #pragma omp atomic capture
-    { x = x * 7 + 6; v = x; }	// { dg-error "expected" }
-  #pragma omp atomic capture
-    { x = x * 7 ^ 6; v = x; }	// { dg-error "expected" }
-  #pragma omp atomic capture
-    { x = x - 8 + 6; v = x; }	// { dg-error "expected" }
-  #pragma omp atomic capture
-    { x = x ^ 7 | 2; v = x; }	// { dg-error "expected" }
-  (void) v;
-  return 0;
-}
--- a/gcc/testsuite/g++.dg/gomp/gomp.exp
+++ b/gcc/testsuite/g++.dg/gomp/gomp.exp
@@ -27,7 +27,7 @@ if ![check_effective_target_fopenmp] {
 dg-init

 # Main loop.
-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.C]] "" "-fopenmp"
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.C $srcdir/c-c++-common/gomp/*.c]] "" "-fopenmp"

 # All done.
 dg-finish
--- a/gcc/testsuite/g++.dg/simulate-thread/atomics-1.C
+++ b/gcc/testsuite/g++.dg/simulate-thread/atomics-1.C
+/* { dg-do link } */
+/* { dg-options "-std=c++0x" } */
+/* { dg-final { simulate-thread } } */
+
+/* Test that atomic int and atomic char work properly.  */
+
+using namespace std;
+
+#include <atomic>
+#include <limits.h>
+#include <stdio.h>
+#include "simulate-thread.h"
+
+atomic<int> atomi;
+atomic<char> atomc;
+
+/* No need for parallel threads to do anything */
+void simulate_thread_other_threads()
+{
+}
+
+/* Verify after every instruction is executed, that the atmoic int and
+   char have one of the 2 legitimate values. */
+int simulate_thread_step_verify()
+{
+  if (atomi != 0 && atomi != INT_MAX)
+    {
+      printf ("FAIL: invalid intermediate result for atomi (%d).\n",
+	      (int)atomi);
+      return 1;
+    }
+  if (atomc != 0 && atomc != CHAR_MAX)
+    {
+      printf ("FAIL: invalid intermediate result for atomc (%d).\n",
+	      (int)atomc);
+      return 1;
+    }
+  return 0;
+}
+
+
+/* Verify that both atmoics have the corerct value.  */
+int simulate_thread_final_verify()
+{
+  if (atomi != INT_MAX)
+    {
+      printf ("FAIL: invalid final result for atomi (%d).\n",
+	      (int)atomi);
+      return 1;
+    }
+  if (atomc != CHAR_MAX)
+    {
+      printf ("FAIL: invalid final result for atomc (%d).\n",
+	      (int)atomc);
+      return 1;
+    }
+  return 0;
+}
+
+/* Test a store to an atomic int and an atomic char. */
+__attribute__((noinline))
+void simulate_thread_main()
+{
+  atomi = INT_MAX;
+  atomc = CHAR_MAX;
+}
+
+int main ()
+{
+  simulate_thread_main();
+  simulate_thread_done();
+  return 0;
+}
--- a/gcc/testsuite/g++.dg/simulate-thread/atomics-2.C
+++ b/gcc/testsuite/g++.dg/simulate-thread/atomics-2.C
+/* { dg-do link } */
+/* { dg-options "-std=c++0x" } */
+/* { dg-final { simulate-thread } } */
+
+using namespace std;
+
+#include <atomic>
+#include <limits.h>
+#include <stdio.h>
+#include "simulate-thread.h"
+
+atomic_int atomi;
+
+/* Non-atomic.  Use a type wide enough to possibly coerce GCC into
+   moving things around.  */
+long double j;
+
+
+/* Test that an atomic store synchronizes with an atomic load.
+
+   In this case, test that the store to <j> happens-before the atomic
+   store to <atomi>.  Make sure the compiler does not reorder the
+   stores.  */
+__attribute__((noinline))
+void simulate_thread_main()
+{
+  j = 13.0;
+  atomi.store(1);
+}
+
+int main ()
+{
+  simulate_thread_main();
+  simulate_thread_done();
+  return 0;
+}
+
+void simulate_thread_other_threads()
+{
+}
+
+/* Verify that side-effects before an atomic store are correctly
+   synchronized with the an atomic load to the same location.  */
+int simulate_thread_step_verify()
+{
+  if (atomi.load() == 1 && j != 13.0)
+    {
+      printf ("FAIL: invalid synchronization for atomic load/store.\n");
+      return 1;
+    }
+  return 0;
+}
+
+
+int simulate_thread_final_verify()
+{
+  return simulate_thread_step_verify();
+}
--- a/gcc/testsuite/g++.dg/simulate-thread/bitfields-2.C
+++ b/gcc/testsuite/g++.dg/simulate-thread/bitfields-2.C
+/* { dg-do link } */
+/* { dg-options "--param allow-load-data-races=0 --param allow-store-data-races=0" } */
+/* { dg-final { simulate-thread } } */
+
+/* Test that setting <var.a> does not touch either <var.b> or <var.c>.
+   In the C++ memory model, non contiguous bitfields ("a" and "c"
+   here) should be considered as distinct memory locations, so we
+   can't use bit twiddling to set either one.  */
+
+#include <stdio.h>
+#include "simulate-thread.h"
+
+#define CONSTA 12
+
+static int global;
+struct S
+{
+  unsigned int a : 4;
+  unsigned char b;
+  unsigned int c : 6;
+} var;
+
+__attribute__((noinline))
+void set_a()
+{
+  var.a = CONSTA;
+}
+
+void simulate_thread_other_threads()
+{
+  ++global;
+  var.b = global;
+  var.c = global;
+}
+
+int simulate_thread_step_verify()
+{
+  int ret = 0;
+  if (var.b != global)
+    {
+      printf ("FAIL: Unexpected value: var.b is %d, should be %d\n",
+	      var.b, global);
+      ret = 1;
+    }
+  if (var.c != global)
+    {
+      printf ("FAIL: Unexpected value: var.c is %d, should be %d\n",
+	      var.c, global);
+      ret = 1;
+    }
+  return ret;
+}
+
+int simulate_thread_final_verify()
+{
+  int ret = simulate_thread_step_verify();
+  if (var.a != CONSTA)
+    {
+      printf ("FAIL: Unexpected value: var.a is %d, should be %d\n",
+	      var.a, CONSTA);
+      ret = 1;
+    }
+  return ret;
+}
+
+__attribute__((noinline))
+void simulate_thread_main()
+{
+  set_a();
+}
+
+int main()
+{
+  simulate_thread_main();
+  simulate_thread_done();
+  return 0;
+}
--- a/gcc/testsuite/g++.dg/simulate-thread/bitfields.C
+++ b/gcc/testsuite/g++.dg/simulate-thread/bitfields.C
+/* { dg-do link } */
+/* { dg-options "--param allow-load-data-races=0 --param allow-store-data-races=0" } */
+/* { dg-final { simulate-thread } } */
+
+/* Test that setting <var.a> does not touch either <var.b> or <var.c>.
+   In the C++ memory model, non contiguous bitfields ("a" and "c"
+   here) should be considered as distinct memory locations, so we
+   can't use bit twiddling to set either one.  */
+
+#include <stdio.h>
+#include "simulate-thread.h"
+
+#define CONSTA 12
+
+static int global;
+struct S
+{
+  /* On x86-64, the volatile causes us to access <a> with a 32-bit
+     access, and thus trigger this test.  */
+  volatile unsigned int a : 4;
+
+  unsigned char b;
+  unsigned int c : 6;
+} var;
+
+__attribute__((noinline))
+void set_a()
+{
+  var.a = CONSTA;
+}
+
+void simulate_thread_other_threads()
+{
+  ++global;
+  var.b = global;
+  var.c = global;
+}
+
+int simulate_thread_step_verify()
+{
+  int ret = 0;
+  if (var.b != global)
+    {
+      printf ("FAIL: Unexpected value: var.b is %d, should be %d\n",
+	      var.b, global);
+      ret = 1;
+    }
+  if (var.c != global)
+    {
+      printf ("FAIL: Unexpected value: var.c is %d, should be %d\n",
+	      var.c, global);
+      ret = 1;
+    }
+  return ret;
+}
+
+int simulate_thread_final_verify()
+{
+  int ret = simulate_thread_step_verify();
+  if (var.a != CONSTA)
+    {
+      printf ("FAIL: Unexpected value: var.a is %d, should be %d\n",
+	      var.a, CONSTA);
+      ret = 1;
+    }
+  return ret;
+}
+
+__attribute__((noinline))
+void simulate_thread_main()
+{
+  set_a();
+}
+
+int main ()
+{
+  simulate_thread_main();
+  simulate_thread_done();
+  return 0;
+}
--- a/gcc/testsuite/gcc.dg/atomic-compare-exchange-1.c
+++ b/gcc/testsuite/gcc.dg/atomic-compare-exchange-1.c
+/* Test __atomic routines for existence and proper execution on 1 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_char_short } */
+
+/* Test the execution of the __atomic_compare_exchange_n builtin for a char.  */
+
+extern void abort(void);
+
+char v = 0;
+char expected = 0;
+char max = ~0;
+char desired = ~0;
+char zero = 0;
+
+#define STRONG 0
+#define WEAK 1
+
+main ()
+{
+
+  if (!__atomic_compare_exchange_n (&v, &expected, max, STRONG , __ATOMIC_RELAXED, __ATOMIC_RELAXED)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) 
+    abort ();
+  if (expected != max)
+    abort ();
+
+  if (!__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != max)
+    abort ();
+  if (v != 0)
+    abort ();
+
+  if (__atomic_compare_exchange_n (&v, &expected, desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (!__atomic_compare_exchange_n (&v, &expected, desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+  if (v != max)
+    abort ();
+
+  /* Now test the generic version.  */
+
+  v = 0;
+
+  if (!__atomic_compare_exchange (&v, &expected, &max, STRONG, __ATOMIC_RELAXED, __ATOMIC_RELAXED))
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) 
+    abort ();
+  if (expected != max)
+    abort ();
+
+  if (!__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != max)
+    abort ();
+  if (v != 0)
+    abort ();
+
+  if (__atomic_compare_exchange (&v, &expected, &desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (!__atomic_compare_exchange (&v, &expected, &desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+  if (v != max)
+    abort ();
+
+  return 0;
+}
--- a/gcc/testsuite/gcc.dg/atomic-compare-exchange-2.c
+++ b/gcc/testsuite/gcc.dg/atomic-compare-exchange-2.c
+/* Test __atomic routines for existence and proper execution on 2 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_char_short } */
+
+/* Test the execution of the __atomic_compare_exchange_n builtin for a short.  */
+
+extern void abort(void);
+
+short v = 0;
+short expected = 0;
+short max = ~0;
+short desired = ~0;
+short zero = 0;
+
+#define STRONG 0
+#define WEAK 1
+
+main ()
+{
+
+  if (!__atomic_compare_exchange_n (&v, &expected, max, STRONG , __ATOMIC_RELAXED, __ATOMIC_RELAXED)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) 
+    abort ();
+  if (expected != max)
+    abort ();
+
+  if (!__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != max)
+    abort ();
+  if (v != 0)
+    abort ();
+
+  if (__atomic_compare_exchange_n (&v, &expected, desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (!__atomic_compare_exchange_n (&v, &expected, desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+  if (v != max)
+    abort ();
+
+  /* Now test the generic version.  */
+
+  v = 0;
+
+  if (!__atomic_compare_exchange (&v, &expected, &max, STRONG, __ATOMIC_RELAXED, __ATOMIC_RELAXED))
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) 
+    abort ();
+  if (expected != max)
+    abort ();
+
+  if (!__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != max)
+    abort ();
+  if (v != 0)
+    abort ();
+
+  if (__atomic_compare_exchange (&v, &expected, &desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (!__atomic_compare_exchange (&v, &expected, &desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+  if (v != max)
+    abort ();
+
+  return 0;
+}
--- a/gcc/testsuite/gcc.dg/atomic-compare-exchange-3.c
+++ b/gcc/testsuite/gcc.dg/atomic-compare-exchange-3.c
+/* Test __atomic routines for existence and proper execution on 4 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_int_long } */
+
+/* Test the execution of the __atomic_compare_exchange_n builtin for an int.  */
+
+extern void abort(void);
+
+int v = 0;
+int expected = 0;
+int max = ~0;
+int desired = ~0;
+int zero = 0;
+
+#define STRONG 0
+#define WEAK 1
+
+main ()
+{
+
+  if (!__atomic_compare_exchange_n (&v, &expected, max, STRONG , __ATOMIC_RELAXED, __ATOMIC_RELAXED)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) 
+    abort ();
+  if (expected != max)
+    abort ();
+
+  if (!__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != max)
+    abort ();
+  if (v != 0)
+    abort ();
+
+  if (__atomic_compare_exchange_n (&v, &expected, desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (!__atomic_compare_exchange_n (&v, &expected, desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+  if (v != max)
+    abort ();
+
+  /* Now test the generic version.  */
+
+  v = 0;
+
+  if (!__atomic_compare_exchange (&v, &expected, &max, STRONG, __ATOMIC_RELAXED, __ATOMIC_RELAXED))
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) 
+    abort ();
+  if (expected != max)
+    abort ();
+
+  if (!__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != max)
+    abort ();
+  if (v != 0)
+    abort ();
+
+  if (__atomic_compare_exchange (&v, &expected, &desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (!__atomic_compare_exchange (&v, &expected, &desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+  if (v != max)
+    abort ();
+
+  return 0;
+}
--- a/gcc/testsuite/gcc.dg/atomic-compare-exchange-4.c
+++ b/gcc/testsuite/gcc.dg/atomic-compare-exchange-4.c
+/* Test __atomic routines for existence and proper execution on 8 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_long_long } */
+/* { dg-options "" } */
+
+/* Test the execution of __atomic_compare_exchange_n builtin for a long_long.  */
+
+extern void abort(void);
+
+long long v = 0;
+long long expected = 0;
+long long max = ~0;
+long long desired = ~0;
+long long zero = 0;
+
+#define STRONG 0
+#define WEAK 1
+
+main ()
+{
+
+  if (!__atomic_compare_exchange_n (&v, &expected, max, STRONG , __ATOMIC_RELAXED, __ATOMIC_RELAXED)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) 
+    abort ();
+  if (expected != max)
+    abort ();
+
+  if (!__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != max)
+    abort ();
+  if (v != 0)
+    abort ();
+
+  if (__atomic_compare_exchange_n (&v, &expected, desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (!__atomic_compare_exchange_n (&v, &expected, desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+  if (v != max)
+    abort ();
+
+  /* Now test the generic version.  */
+
+  v = 0;
+
+  if (!__atomic_compare_exchange (&v, &expected, &max, STRONG, __ATOMIC_RELAXED, __ATOMIC_RELAXED))
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) 
+    abort ();
+  if (expected != max)
+    abort ();
+
+  if (!__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != max)
+    abort ();
+  if (v != 0)
+    abort ();
+
+  if (__atomic_compare_exchange (&v, &expected, &desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (!__atomic_compare_exchange (&v, &expected, &desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+  if (v != max)
+    abort ();
+
+  return 0;
+}
--- a/gcc/testsuite/gcc.dg/atomic-compare-exchange-5.c
+++ b/gcc/testsuite/gcc.dg/atomic-compare-exchange-5.c
+/* Test __atomic routines for existence and proper execution on 16 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_int_128 } */
+/* { dg-options "-mcx16" { target { x86_64-*-* } } } */
+
+/* Test the execution of __atomic_compare_exchange_n builtin for an int_128.  */
+
+extern void abort(void);
+
+__int128_t v = 0;
+__int128_t expected = 0;
+__int128_t max = ~0;
+__int128_t desired = ~0;
+__int128_t zero = 0;
+
+#define STRONG 0
+#define WEAK 1
+
+main ()
+{
+
+  if (!__atomic_compare_exchange_n (&v, &expected, max, STRONG , __ATOMIC_RELAXED, __ATOMIC_RELAXED)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) 
+    abort ();
+  if (expected != max)
+    abort ();
+
+  if (!__atomic_compare_exchange_n (&v, &expected, 0, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != max)
+    abort ();
+  if (v != 0)
+    abort ();
+
+  if (__atomic_compare_exchange_n (&v, &expected, desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (!__atomic_compare_exchange_n (&v, &expected, desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+  if (v != max)
+    abort ();
+
+  /* Now test the generic version.  */
+
+  v = 0;
+
+  if (!__atomic_compare_exchange (&v, &expected, &max, STRONG, __ATOMIC_RELAXED, __ATOMIC_RELAXED))
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) 
+    abort ();
+  if (expected != max)
+    abort ();
+
+  if (!__atomic_compare_exchange (&v, &expected, &zero, STRONG , __ATOMIC_RELEASE, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != max)
+    abort ();
+  if (v != 0)
+    abort ();
+
+  if (__atomic_compare_exchange (&v, &expected, &desired, WEAK, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+
+  if (!__atomic_compare_exchange (&v, &expected, &desired, STRONG , __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) 
+    abort ();
+  if (expected != 0)
+    abort ();
+  if (v != max)
+    abort ();
+
+  return 0;
+}
--- a/gcc/testsuite/gcc.dg/atomic-exchange-1.c
+++ b/gcc/testsuite/gcc.dg/atomic-exchange-1.c
+/* Test __atomic routines for existence and proper execution on 1 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_char_short } */
+
+/* Test the execution of the __atomic_exchange_n builtin for a char.  */
+
+extern void abort(void);
+
+char v, count, ret;
+
+main ()
+{
+  v = 0;
+  count = 0;
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELAXED) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQUIRE) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELEASE) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQ_REL) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_SEQ_CST) !=  count++) 
+    abort ();
+
+  /* Now test the generic version.  */
+
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_RELAXED);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQUIRE);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_RELEASE);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQ_REL);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_SEQ_CST);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  return 0;
+}
--- a/gcc/testsuite/gcc.dg/atomic-exchange-2.c
+++ b/gcc/testsuite/gcc.dg/atomic-exchange-2.c
+/* Test __atomic routines for existence and proper execution on 2 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_char_short } */
+
+/* Test the execution of the __atomic_X builtin for a short.  */
+
+extern void abort(void);
+
+short v, count, ret;
+
+main ()
+{
+  v = 0;
+  count = 0;
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELAXED) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQUIRE) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELEASE) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQ_REL) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_SEQ_CST) !=  count++) 
+    abort ();
+
+  /* Now test the generic version.  */
+
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_RELAXED);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQUIRE);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_RELEASE);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQ_REL);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_SEQ_CST);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  return 0;
+}
--- a/gcc/testsuite/gcc.dg/atomic-exchange-3.c
+++ b/gcc/testsuite/gcc.dg/atomic-exchange-3.c
+/* Test __atomic routines for existence and proper execution on 4 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_int_long } */
+
+/* Test the execution of the __atomic_X builtin for an int.  */
+
+extern void abort(void);
+
+int v, count, ret;
+
+main ()
+{
+  v = 0;
+  count = 0;
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELAXED) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQUIRE) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELEASE) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQ_REL) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_SEQ_CST) !=  count++) 
+    abort ();
+
+  /* Now test the generic version.  */
+
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_RELAXED);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQUIRE);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_RELEASE);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQ_REL);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_SEQ_CST);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  return 0;
+}
--- a/gcc/testsuite/gcc.dg/atomic-exchange-4.c
+++ b/gcc/testsuite/gcc.dg/atomic-exchange-4.c
+/* Test __atomic routines for existence and proper execution on 8 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_long_long } */
+/* { dg-options "" } */
+
+/* Test the execution of the __atomic_X builtin for a long_long.  */
+
+extern void abort(void);
+
+long long v, count, ret;
+
+main ()
+{
+  v = 0;
+  count = 0;
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELAXED) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQUIRE) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELEASE) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQ_REL) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_SEQ_CST) !=  count++) 
+    abort ();
+
+  /* Now test the generic version.  */
+
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_RELAXED);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQUIRE);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_RELEASE);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQ_REL);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_SEQ_CST);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  return 0;
+}
--- a/gcc/testsuite/gcc.dg/atomic-exchange-5.c
+++ b/gcc/testsuite/gcc.dg/atomic-exchange-5.c
+/* Test __atomic routines for existence and proper execution on 16 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_int_128 } */
+/* { dg-options "-mcx16" { target { x86_64-*-* } } } */
+
+/* Test the execution of the __atomic_X builtin for a 16 byte value.  */
+
+extern void abort(void);
+
+__int128_t v, count, ret;
+
+main ()
+{
+  v = 0;
+  count = 0;
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELAXED) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQUIRE) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_RELEASE) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_ACQ_REL) !=  count++) 
+    abort ();
+
+  if (__atomic_exchange_n (&v, count + 1, __ATOMIC_SEQ_CST) !=  count++) 
+    abort ();
+
+  /* Now test the generic version.  */
+
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_RELAXED);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQUIRE);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_RELEASE);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_ACQ_REL);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  __atomic_exchange (&v, &count, &ret, __ATOMIC_SEQ_CST);
+  if (ret != count - 1 || v != count)
+    abort ();
+  count++;
+
+  return 0;
+}
--- a/gcc/testsuite/gcc.dg/atomic-fence.c
+++ b/gcc/testsuite/gcc.dg/atomic-fence.c
+/* Test __atomic routines for existence and execution with each valid 
+   memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_char_short } */
+
+
+/* Test that __atomic_{thread,signal}_fence builtins execute.  */
+
+main ()
+{
+  __atomic_thread_fence (__ATOMIC_RELAXED);
+  __atomic_thread_fence (__ATOMIC_CONSUME);
+  __atomic_thread_fence (__ATOMIC_ACQUIRE);
+  __atomic_thread_fence (__ATOMIC_RELEASE);
+  __atomic_thread_fence (__ATOMIC_ACQ_REL);
+  __atomic_thread_fence (__ATOMIC_SEQ_CST);
+
+  __atomic_signal_fence (__ATOMIC_RELAXED);
+  __atomic_signal_fence (__ATOMIC_CONSUME);
+  __atomic_signal_fence (__ATOMIC_ACQUIRE);
+  __atomic_signal_fence (__ATOMIC_RELEASE);
+  __atomic_signal_fence (__ATOMIC_ACQ_REL);
+  __atomic_signal_fence (__ATOMIC_SEQ_CST);
+
+  return 0;
+}
+
--- a/gcc/testsuite/gcc.dg/atomic-generic-aux.c
+++ b/gcc/testsuite/gcc.dg/atomic-generic-aux.c
+/* Supply a set of generic atomic functions to test the compiler make the
+   calls properly.  */
+/* { dg-do compile } */
+/* { dg-options "-w" } */
+
+/* Test that the generic builtins make calls as expected.  */
+
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+
+void
+__atomic_exchange (size_t size, void *obj, void *val, void *ret, int model)
+{
+  /* Copy old value into *ret.  */
+  memcpy (ret, obj, size);
+  /* Copy val into object.  */
+  memcpy (obj, val, size);
+}
+
+
+bool
+__atomic_compare_exchange (size_t size, void *obj, void *expected, 
+			   void *desired, int model1, int model2)
+{
+  if (!memcmp (obj, expected, size))
+    {
+      memcpy (obj, desired, size);
+      return true;
+    }
+  memcpy (expected, obj, size);
+  return false;
+}
+
+
+void __atomic_load (size_t size, void *obj, void *ret, int model)
+{
+  memcpy (ret, obj, size);
+}
+
+
+void __atomic_store (size_t size, void *obj, void *val, int model)
+{
+  memcpy (obj, val, size);
+}
--- a/gcc/testsuite/gcc.dg/atomic-generic.c
+++ b/gcc/testsuite/gcc.dg/atomic-generic.c
+/* Test generic __atomic routines for proper function calling.
+   memory model.  */
+/* { dg-options "-w" } */
+/* { dg-do run } */
+/* { dg-additional-sources "atomic-generic-aux.c" } */
+
+/* Test that the generioc atomic builtins execute as expected..
+   sync-mem-generic-aux.c supplies a functional external entry point for 
+   the 4 generic functions.  */
+
+#include <stdlib.h>
+#include <stdbool.h>
+
+extern void abort();
+
+typedef struct test {
+  int array[10];
+} test_struct;
+
+test_struct zero = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+test_struct ones = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 };
+test_struct a,b;
+
+int size = sizeof (test_struct);
+/* Test for consistency on sizes 1, 2, 4, 8, 16 and 32.  */
+main ()
+{
+  test_struct c;
+
+  __atomic_store (&a, &zero, __ATOMIC_RELAXED);
+  if (memcmp (&a, &zero, size))
+    abort ();
+
+  __atomic_exchange (&a, &ones, &c, __ATOMIC_SEQ_CST);
+  if (memcmp (&c, &zero, size))
+    abort ();
+  if (memcmp (&a, &ones, size))
+    abort ();
+
+  __atomic_load (&a, &b, __ATOMIC_RELAXED);
+  if (memcmp (&b, &ones, size))
+    abort ();
+
+  if (!__atomic_compare_exchange (&a, &b, &zero, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED))
+    abort();
+  if (memcmp (&a, &zero, size))
+    abort ();
+
+  if (__atomic_compare_exchange (&a, &b, &ones, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED))
+    abort();
+  if (memcmp (&b, &zero, size))
+    abort ();
+
+  return 0;
+}
+
--- a/gcc/testsuite/gcc.dg/atomic-invalid.c
+++ b/gcc/testsuite/gcc.dg/atomic-invalid.c
+/* Test __atomic routines for invalid memory model errors. This only needs
+   to be tested on a single size.  */
+/* { dg-do compile } */
+/* { dg-require-effective-target sync_int_long } */
+
+#include <stddef.h>
+
+int i, e, b;
+size_t s;
+
+main ()
+{
+  __atomic_compare_exchange_n (&i, &e, 1, 0, __ATOMIC_RELAXED, __ATOMIC_SEQ_CST); /* { dg-error "failure memory model cannot be stronger" } */
+  __atomic_compare_exchange_n (&i, &e, 1, 0, __ATOMIC_SEQ_CST, __ATOMIC_RELEASE); /* { dg-error "invalid failure memory" } */
+  __atomic_compare_exchange_n (&i, &e, 1, 1, __ATOMIC_SEQ_CST, __ATOMIC_ACQ_REL); /* { dg-error "invalid failure memory" } */
+
+  __atomic_exchange_n (&i, 1, __ATOMIC_CONSUME); /* { dg-error "invalid memory model" } */
+
+  __atomic_load_n (&i, __ATOMIC_RELEASE); /* { dg-error "invalid memory model" } */
+  __atomic_load_n (&i, __ATOMIC_ACQ_REL); /* { dg-error "invalid memory model" } */
+
+  __atomic_store_n (&i, 1, __ATOMIC_ACQUIRE); /* { dg-error "invalid memory model" } */
+  __atomic_store_n (&i, 1, __ATOMIC_CONSUME); /* { dg-error "invalid memory model" } */
+  __atomic_store_n (&i, 1, __ATOMIC_ACQ_REL); /* { dg-error "invalid memory model" } */
+
+  i = __atomic_always_lock_free (s, NULL); /* { dg-error "non-constant argument" } */
+
+  __atomic_load_n (&i, 44); /* { dg-warning "invalid memory model" } */
+}
--- a/gcc/testsuite/gcc.dg/atomic-load-1.c
+++ b/gcc/testsuite/gcc.dg/atomic-load-1.c
+/* Test __atomic routines for existence and proper execution on 1 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_char_short } */
+
+
+/* Test the execution of the __atomic_load_n builtin for a char.  */
+
+extern void abort(void);
+
+char v, count;
+
+main ()
+{
+  v = 0;
+  count = 0;
+
+  if (__atomic_load_n (&v, __ATOMIC_RELAXED) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  if (__atomic_load_n (&v, __ATOMIC_ACQUIRE) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  if (__atomic_load_n (&v, __ATOMIC_CONSUME) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  if (__atomic_load_n (&v, __ATOMIC_SEQ_CST) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  /* Now test the generic variants.  */
+
+  __atomic_load (&v, &count, __ATOMIC_RELAXED);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+  __atomic_load (&v, &count, __ATOMIC_ACQUIRE);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+  __atomic_load (&v, &count, __ATOMIC_CONSUME);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+  __atomic_load (&v, &count, __ATOMIC_SEQ_CST);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+  return 0;
+}
+
--- a/gcc/testsuite/gcc.dg/atomic-load-2.c
+++ b/gcc/testsuite/gcc.dg/atomic-load-2.c
+/* Test __atomic routines for existence and proper execution on 2 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_char_short } */
+
+
+/* Test the execution of the __atomic_load_n builtin for a short.  */
+
+extern void abort(void);
+
+short v, count;
+
+
+main ()
+{
+  v = 0;
+  count = 0;
+
+  if (__atomic_load_n (&v, __ATOMIC_RELAXED) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  if (__atomic_load_n (&v, __ATOMIC_ACQUIRE) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  if (__atomic_load_n (&v, __ATOMIC_CONSUME) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  if (__atomic_load_n (&v, __ATOMIC_SEQ_CST) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  /* Now test the generic variants.  */
+
+  __atomic_load (&v, &count, __ATOMIC_RELAXED);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+  __atomic_load (&v, &count, __ATOMIC_ACQUIRE);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+  __atomic_load (&v, &count, __ATOMIC_CONSUME);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+  __atomic_load (&v, &count, __ATOMIC_SEQ_CST);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+
+  return 0;
+}
+
--- a/gcc/testsuite/gcc.dg/atomic-load-3.c
+++ b/gcc/testsuite/gcc.dg/atomic-load-3.c
+/* Test __atomic routines for existence and proper execution on 4 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_int_long } */
+
+extern void abort(void);
+
+int v, count;
+
+
+main ()
+{
+  v = 0;
+  count = 0;
+
+  if (__atomic_load_n (&v, __ATOMIC_RELAXED) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  if (__atomic_load_n (&v, __ATOMIC_ACQUIRE) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  if (__atomic_load_n (&v, __ATOMIC_CONSUME) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  if (__atomic_load_n (&v, __ATOMIC_SEQ_CST) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  /* Now test the generic variants.  */
+
+  __atomic_load (&v, &count, __ATOMIC_RELAXED);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+  __atomic_load (&v, &count, __ATOMIC_ACQUIRE);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+  __atomic_load (&v, &count, __ATOMIC_CONSUME);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+  __atomic_load (&v, &count, __ATOMIC_SEQ_CST);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+
+  return 0;
+}
+
--- a/gcc/testsuite/gcc.dg/atomic-load-4.c
+++ b/gcc/testsuite/gcc.dg/atomic-load-4.c
+/* Test __atomic routines for existence and proper execution on 8 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_long_long } */
+/* { dg-options "" } */
+
+extern void abort(void);
+
+long long v, count;
+
+main ()
+{
+  v = 0;
+  count = 0;
+
+  if (__atomic_load_n (&v, __ATOMIC_RELAXED) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  if (__atomic_load_n (&v, __ATOMIC_ACQUIRE) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  if (__atomic_load_n (&v, __ATOMIC_CONSUME) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  if (__atomic_load_n (&v, __ATOMIC_SEQ_CST) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  /* Now test the generic variants.  */
+
+  __atomic_load (&v, &count, __ATOMIC_RELAXED);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+  __atomic_load (&v, &count, __ATOMIC_ACQUIRE);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+  __atomic_load (&v, &count, __ATOMIC_CONSUME);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+  __atomic_load (&v, &count, __ATOMIC_SEQ_CST);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+
+  return 0;
+}
+
--- a/gcc/testsuite/gcc.dg/atomic-load-5.c
+++ b/gcc/testsuite/gcc.dg/atomic-load-5.c
+/* Test __atomic routines for existence and proper execution on 16 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_int_128 } */
+/* { dg-options "-mcx16" { target { x86_64-*-* } } } */
+
+extern void abort(void);
+
+__int128_t v, count;
+
+main ()
+{
+  v = 0;
+  count = 0;
+
+  if (__atomic_load_n (&v, __ATOMIC_RELAXED) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  if (__atomic_load_n (&v, __ATOMIC_ACQUIRE) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  if (__atomic_load_n (&v, __ATOMIC_CONSUME) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  if (__atomic_load_n (&v, __ATOMIC_SEQ_CST) != count++) 
+    abort(); 
+  else 
+    v++;
+
+  /* Now test the generic variants.  */
+
+  __atomic_load (&v, &count, __ATOMIC_RELAXED);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+  __atomic_load (&v, &count, __ATOMIC_ACQUIRE);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+  __atomic_load (&v, &count, __ATOMIC_CONSUME);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+  __atomic_load (&v, &count, __ATOMIC_SEQ_CST);
+  if (count != v)
+    abort(); 
+  else 
+    v++;
+
+
+  return 0;
+}
+
--- a/gcc/testsuite/gcc.dg/atomic-lockfree-aux.c
+++ b/gcc/testsuite/gcc.dg/atomic-lockfree-aux.c
+/* Test supply a __atomic_is_lock_free routine for lock-free tests.  */
+/* Just compile it on its own.  */
+/* { dg-do compile } */
+/* { dg-options "-w" } */
+
+/* Test that __atomic_{is,always}_lock_free builtins execute.  */
+
+#include <stdlib.h>
+
+/* Supply a builtin external function which returns a non-standard value so
+   it can be detected that it was called.  */
+int 
+__atomic_is_lock_free (size_t s, void *p)
+{
+  return 2;
+}
+
--- a/gcc/testsuite/gcc.dg/atomic-lockfree.c
+++ b/gcc/testsuite/gcc.dg/atomic-lockfree.c
+/* Test __atomic routines for existence and execution with each valid 
+   memory model.  */
+/* { dg-options "-w" } */
+/* { dg-do run } */
+/* { dg-additional-sources "atomic-lockfree-aux.c" } */
+
+/* Test that __atomic_{is,always}_lock_free builtins execute.
+   sync-mem-lockfree-aux.c supplies and external entry point for 
+   __atomic_is_lock_free which always returns a 2. We can detect the 
+   external routine was called if 2 is returned since that is not a valid
+   result normally.  */
+
+#include <stdlib.h>
+
+extern void abort();
+
+int r1, r2;
+
+/* Test for consistency on sizes 1, 2, 4, 8, 16 and 32.  */
+main ()
+{
+  
+  r1 = __atomic_always_lock_free (sizeof(char), 0);
+  r2 = __atomic_is_lock_free (sizeof(char), 0);
+  /* If always lock free, then is_lock_free must also be true.  */
+  if (r1)
+    { 
+      if (r2 != 1)  
+	abort ();
+    }
+  else
+    {
+      /* If it is not lock free, then the external routine must be called.  */
+      if (r2 != 2) 
+	abort ();
+    }
+  
+  r1 = __atomic_always_lock_free (2, 0);
+  r2 = __atomic_is_lock_free (2, 0);
+  /* If always lock free, then is_lock_free must also be true.  */
+  if (r1)
+    { 
+      if (r2 != 1)  
+	abort ();
+    }
+  else
+    {
+      /* If it is not lock free, then the external routine must be called.  */
+      if (r2 != 2) 
+	abort ();
+    }
+
+   
+  r1 = __atomic_always_lock_free (4, 0);
+  r2 = __atomic_is_lock_free (4, 0);     /* Try passing in a variable.  */
+  /* If always lock free, then is_lock_free must also be true.  */
+  if (r1)
+    { 
+      if (r2 != 1)  
+	abort ();
+    }
+  else
+    {
+      /* If it is not lock free, then the external routine must be called.  */
+      if (r2 != 2) 
+	abort ();
+    }
+
+   
+  r1 = __atomic_always_lock_free (8, 0);
+  r2 = __atomic_is_lock_free (8, 0);
+  /* If always lock free, then is_lock_free must also be true.  */
+  if (r1)
+    { 
+      if (r2 != 1)  
+	abort ();
+    }
+  else
+    {
+      /* If it is not lock free, then the external routine must be called.  */
+      if (r2 != 2) 
+	abort ();
+    }
+
+   
+  r1 = __atomic_always_lock_free (16, 0);
+  r2 = __atomic_is_lock_free (16, 0);
+  /* If always lock free, then is_lock_free must also be true.  */
+  if (r1)
+    { 
+      if (r2 != 1)  
+	abort ();
+    }
+  else
+    {
+      /* If it is not lock free, then the external routine must be called.  */
+      if (r2 != 2) 
+	abort ();
+    }
+
+   
+  r1 = __atomic_always_lock_free (32, 0);
+  r2 = __atomic_is_lock_free (32, 0);
+  /* If always lock free, then is_lock_free must also be true.  */
+  if (r1)
+    { 
+      if (r2 != 1)  
+	abort ();
+    }
+  else
+    {
+      /* If it is not lock free, then the external routine must be called.  */
+      if (r2 != 2) 
+	abort ();
+    }
+
+ 
+  return 0;
+}
+
--- a/gcc/testsuite/gcc.dg/atomic-noinline-aux.c
+++ b/gcc/testsuite/gcc.dg/atomic-noinline-aux.c
+/* Supply a set of generic atomic functions to test the compiler make the
+   calls properly.  */
+/* { dg-do compile } */
+/* { dg-options "-w" } */
+
+/* Test that the generic builtins make calls as expected.  This file provides
+   the exact entry points the test file will require.  All these routines
+   simply set the first parameter to 1, and the caller will test for that.  */
+
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+
+
+char 
+__atomic_exchange_1 (char *p, char t, int i)
+{
+  *p = 1;
+}
+
+short
+__atomic_load_2 (short *p, int i)
+{ 
+  *p = 1;
+}
+
+void
+__atomic_store_1 (char *p, char v, int i)
+{
+  *p = 1;
+}
+
+int __atomic_compare_exchange_2 (short *p, short *a, short b, int x, int y, int z)
+{
+  *p = 1;
+}
+
+char __atomic_fetch_add_1 (char *p, char v, int i)
+{
+  *p = 1;
+}
+
+short __atomic_fetch_add_2 (short *p, short v, short i)
+{
+  *p = 1;
+}
+
+int __atomic_is_lock_free (int i, void *p)
+{
+  return 10;
+}
--- a/gcc/testsuite/gcc.dg/atomic-noinline.c
+++ b/gcc/testsuite/gcc.dg/atomic-noinline.c
+/* Test generic __atomic routines for proper function calling.
+   memory model.  */
+/* { dg-options "-w -fno-inline-atomics" } */
+/* { dg-do run } */
+/* { dg-additional-sources "atomic-noinline-aux.c" } */
+
+/* Test that -fno-inline-atomics works as expected.  
+   atomic-generic-aux provide the expected routines which simply set the
+   value of the first parameter to */
+
+#include <stdlib.h>
+#include <stdbool.h>
+
+extern void abort();
+
+short as,bs,cs;
+char ac,bc,cc;
+
+main ()
+{
+
+  ac = __atomic_exchange_n (&bc, cc, __ATOMIC_RELAXED);
+  if (bc != 1)
+    abort ();
+
+  as = __atomic_load_n (&bs, __ATOMIC_SEQ_CST);
+  if (bs != 1)
+    abort ();
+
+  __atomic_store_n (&ac, bc, __ATOMIC_RELAXED);
+  if (ac != 1)
+    abort ();
+
+  __atomic_compare_exchange_n (&as, &bs, cs, 0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
+  if (as != 1)
+    abort ();
+
+  ac = __atomic_fetch_add (&cc, 15, __ATOMIC_SEQ_CST);
+  if (cc != 1)
+    abort ();
+
+  /* This should be translated to __atomic_fetch_add for the library */
+  as = __atomic_add_fetch (&cs, 10, __ATOMIC_RELAXED);
+
+  if (cs != 1)
+    abort ();
+
+  /* The fake external function should return 10.  */
+  if (__atomic_is_lock_free (4, 0) != 10)
+    abort ();
+   
+  return 0;
+}
+
+
+
--- a/gcc/testsuite/gcc.dg/atomic-op-1.c
+++ b/gcc/testsuite/gcc.dg/atomic-op-1.c
--- a/gcc/testsuite/gcc.dg/atomic-op-2.c
+++ b/gcc/testsuite/gcc.dg/atomic-op-2.c
--- a/gcc/testsuite/gcc.dg/atomic-op-3.c
+++ b/gcc/testsuite/gcc.dg/atomic-op-3.c
--- a/gcc/testsuite/gcc.dg/atomic-op-4.c
+++ b/gcc/testsuite/gcc.dg/atomic-op-4.c
--- a/gcc/testsuite/gcc.dg/atomic-op-5.c
+++ b/gcc/testsuite/gcc.dg/atomic-op-5.c
--- a/gcc/testsuite/gcc.dg/atomic-param.c
+++ b/gcc/testsuite/gcc.dg/atomic-param.c
+/* Test __atomic routines for invalid memory model errors. This only needs
+   to be tested on a single size.  */
+/* { dg-do compile } */
+/* { dg-require-effective-target sync_int_long } */
+
+int i;
+
+main ()
+{
+
+  __atomic_exchange_n (&i, 1); /* { dg-error "too few arguments" } */
+  __atomic_exchange_n (&i, 1, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); /* { dg-error "too many arguments" } */
+}
--- a/gcc/testsuite/gcc.dg/atomic-store-1.c
+++ b/gcc/testsuite/gcc.dg/atomic-store-1.c
+/* Test __atomic routines for existence and proper execution on 1 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_char_short } */
+
+/* Test the execution of the __atomic_store_n builtin for a char.  */
+
+extern void abort(void);
+
+char v, count;
+
+main ()
+{
+  v = 0;
+  count = 0;
+
+  __atomic_store_n (&v, count + 1, __ATOMIC_RELAXED);
+  if (v != ++count)
+    abort ();
+
+  __atomic_store_n (&v, count + 1, __ATOMIC_RELEASE);
+  if (v != ++count)
+    abort ();
+
+  __atomic_store_n (&v, count + 1, __ATOMIC_SEQ_CST);
+  if (v != ++count)
+    abort ();
+
+  /* Now test the generic variant.  */
+  count++;
+
+  __atomic_store (&v, &count, __ATOMIC_RELAXED);
+  if (v != count++)
+    abort ();
+
+  __atomic_store (&v, &count, __ATOMIC_RELEASE);
+  if (v != count++)
+    abort ();
+
+  __atomic_store (&v, &count, __ATOMIC_SEQ_CST);
+  if (v != count)
+    abort ();
+
+
+  return 0;
+}
+
--- a/gcc/testsuite/gcc.dg/atomic-store-2.c
+++ b/gcc/testsuite/gcc.dg/atomic-store-2.c
+/* Test __atomic routines for existence and proper execution on 2 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_char_short } */
+
+/* Test the execution of the __atomic_store_n builtin for a short.  */
+
+extern void abort(void);
+
+short v, count;
+
+main ()
+{
+  v = 0;
+  count = 0;
+
+  __atomic_store_n (&v, count + 1, __ATOMIC_RELAXED);
+  if (v != ++count)
+    abort ();
+
+  __atomic_store_n (&v, count + 1, __ATOMIC_RELEASE);
+  if (v != ++count)
+    abort ();
+
+  __atomic_store_n (&v, count + 1, __ATOMIC_SEQ_CST);
+  if (v != ++count)
+    abort ();
+
+  /* Now test the generic variant.  */
+  count++;
+
+  __atomic_store (&v, &count, __ATOMIC_RELAXED);
+  if (v != count++)
+    abort ();
+
+  __atomic_store (&v, &count, __ATOMIC_RELEASE);
+  if (v != count++)
+    abort ();
+
+  __atomic_store (&v, &count, __ATOMIC_SEQ_CST);
+  if (v != count)
+    abort ();
+
+  return 0;
+}
+
--- a/gcc/testsuite/gcc.dg/atomic-store-3.c
+++ b/gcc/testsuite/gcc.dg/atomic-store-3.c
+/* Test __atomic routines for existence and proper execution on 4 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_int_long } */
+
+/* Test the execution of the __atomic_store_n builtin for an int.  */
+
+extern void abort(void);
+
+int v, count;
+
+main ()
+{
+  v = 0;
+  count = 0;
+
+  __atomic_store_n (&v, count + 1, __ATOMIC_RELAXED);
+  if (v != ++count)
+    abort ();
+
+  __atomic_store_n (&v, count + 1, __ATOMIC_RELEASE);
+  if (v != ++count)
+    abort ();
+
+  __atomic_store_n (&v, count + 1, __ATOMIC_SEQ_CST);
+  if (v != ++count)
+    abort ();
+
+  /* Now test the generic variant.  */
+  count++;
+
+  __atomic_store (&v, &count, __ATOMIC_RELAXED);
+  if (v != count++)
+    abort ();
+
+  __atomic_store (&v, &count, __ATOMIC_RELEASE);
+  if (v != count++)
+    abort ();
+
+  __atomic_store (&v, &count, __ATOMIC_SEQ_CST);
+  if (v != count)
+    abort ();
+
+
+  return 0;
+}
+
--- a/gcc/testsuite/gcc.dg/atomic-store-4.c
+++ b/gcc/testsuite/gcc.dg/atomic-store-4.c
+/* Test __atomic routines for existence and proper execution on 8 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_long_long } */
+/* { dg-options "" } */
+
+/* Test the execution of the __atomic_store_n builtin for a long long.  */
+
+extern void abort(void);
+
+long long v, count;
+
+main ()
+{
+  v = 0;
+  count = 0;
+
+  __atomic_store_n (&v, count + 1, __ATOMIC_RELAXED);
+  if (v != ++count)
+    abort ();
+
+  __atomic_store_n (&v, count + 1, __ATOMIC_RELEASE);
+  if (v != ++count)
+    abort ();
+
+  __atomic_store_n (&v, count + 1, __ATOMIC_SEQ_CST);
+  if (v != ++count)
+    abort ();
+
+  /* Now test the generic variant.  */
+  count++;
+
+  __atomic_store (&v, &count, __ATOMIC_RELAXED);
+  if (v != count++)
+    abort ();
+
+  __atomic_store (&v, &count, __ATOMIC_RELEASE);
+  if (v != count++)
+    abort ();
+
+  __atomic_store (&v, &count, __ATOMIC_SEQ_CST);
+  if (v != count)
+    abort ();
+
+
+  return 0;
+}
+
--- a/gcc/testsuite/gcc.dg/atomic-store-5.c
+++ b/gcc/testsuite/gcc.dg/atomic-store-5.c
+/* Test __atomic routines for existence and proper execution on 16 byte 
+   values with each valid memory model.  */
+/* { dg-do run } */
+/* { dg-require-effective-target sync_int_128 } */
+/* { dg-options "-mcx16" { target { x86_64-*-* } } } */
+
+/* Test the execution of the __atomic_store_n builtin for a 16 byte value.  */
+
+extern void abort(void);
+
+__int128_t v, count;
+
+main ()
+{
+  v = 0;
+  count = 0;
+
+  __atomic_store_n (&v, count + 1, __ATOMIC_RELAXED);
+  if (v != ++count)
+    abort ();
+
+  __atomic_store_n (&v, count + 1, __ATOMIC_RELEASE);
+  if (v != ++count)
+    abort ();
+
+  __atomic_store_n (&v, count + 1, __ATOMIC_SEQ_CST);
+  if (v != ++count)
+    abort ();
+
+  /* Now test the generic variant.  */
+  count++;
+
+  __atomic_store (&v, &count, __ATOMIC_RELAXED);
+  if (v != count++)
+    abort ();
+
+  __atomic_store (&v, &count, __ATOMIC_RELEASE);
+  if (v != count++)
+    abort ();
+
+  __atomic_store (&v, &count, __ATOMIC_SEQ_CST);
+  if (v != count)
+    abort ();
+
+
+  return 0;
+}
+
--- a/gcc/testsuite/gcc.dg/gomp/atomic-1.c
+++ b/gcc/testsuite/gcc.dg/gomp/atomic-1.c
-/* { dg-do compile } */
-
-int x;
-volatile int y;
-volatile unsigned char z;
-
-void f1(void)
-{
-  #pragma omp atomic
-    x++;
-  #pragma omp atomic
-    x--;
-  #pragma omp atomic
-    ++x;
-  #pragma omp atomic
-    --x;
-  #pragma omp atomic
-    x += 1;
-  #pragma omp atomic
-    x -= y;
-  #pragma omp atomic
-    x |= 1;
-  #pragma omp atomic
-    x &= 1;
-  #pragma omp atomic
-    x ^= 1;
-  #pragma omp atomic
-    x *= 3;
-  #pragma omp atomic
-    x /= 3;
-  #pragma omp atomic
-    x /= 3;
-  #pragma omp atomic
-    x <<= 3;
-  #pragma omp atomic
-    x >>= 3;
-}
-
-void f2(void)
-{
-  #pragma omp atomic
-    y++;
-  #pragma omp atomic
-    y--;
-  #pragma omp atomic
-    ++y;
-  #pragma omp atomic
-    --y;
-  #pragma omp atomic
-    y += 1;
-  #pragma omp atomic
-    y -= x;
-  #pragma omp atomic
-    y |= 1;
-  #pragma omp atomic
-    y &= 1;
-  #pragma omp atomic
-    y ^= 1;
-  #pragma omp atomic
-    y *= 3;
-  #pragma omp atomic
-    y /= 3;
-  #pragma omp atomic
-    y /= 3;
-  #pragma omp atomic
-    y <<= 3;
-  #pragma omp atomic
-    y >>= 3;
-}
-
-void f3(void)
-{
-  #pragma omp atomic
-    z++;
-  #pragma omp atomic
-    z--;
-  #pragma omp atomic
-    ++z;
-  #pragma omp atomic
-    --z;
-  #pragma omp atomic
-    z += 1;
-  #pragma omp atomic
-    z |= 1;
-  #pragma omp atomic
-    z &= 1;
-  #pragma omp atomic
-    z ^= 1;
-  #pragma omp atomic
-    z *= 3;
-  #pragma omp atomic
-    z /= 3;
-  #pragma omp atomic
-    z /= 3;
-  #pragma omp atomic
-    z <<= 3;
-  #pragma omp atomic
-    z >>= 3;
-}
--- a/gcc/testsuite/gcc.dg/gomp/atomic-11.c
+++ b/gcc/testsuite/gcc.dg/gomp/atomic-11.c
-/* PR middle-end/36877 */
-/* { dg-do compile } */
-/* { dg-options "-fopenmp" } */
-/* { dg-options "-fopenmp -march=i386" { target { { i?86-*-* x86_64-*-* } && ia32 } } } */
-
-int i;
-float f;
-
-void foo (void)
-{
-#pragma omp atomic
-  i++;
-#pragma omp atomic
-  f += 1.0;
-}
-
-/* { dg-final { scan-assembler-not "__sync_(fetch|add|bool|val)" { target i?86-*-* x86_64-*-* powerpc*-*-* ia64-*-* s390*-*-* sparc*-*-* } } } */
--- a/gcc/testsuite/gcc.dg/gomp/atomic-12.c
+++ b/gcc/testsuite/gcc.dg/gomp/atomic-12.c
-/* PR middle-end/45423 */
-/* { dg-do compile } */
-/* { dg-options "-fopenmp -fdump-tree-gimple -g0" } */
-/* atomicvar should never be referenced in between the barrier and
-   following #pragma omp atomic_load.  */
-/* { dg-final { scan-tree-dump-not "barrier\[^#\]*atomicvar" "gimple" } } */
-/* { dg-final { cleanup-tree-dump "gimple" } } */
-
-#ifdef __cplusplus
-bool atomicvar, c;
-#else
-_Bool atomicvar, c;
-#endif
-int i, atomicvar2, c2;
-
-int
-foo (void)
-{
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar |= -1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar |= 0;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar |= 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar |= 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar |= c;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar ^= -1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar ^= 0;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar ^= 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar ^= 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar ^= c;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar &= -1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar &= 0;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar &= 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar &= 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar &= c;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar += -1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar += 0;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar += 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar += 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar += c;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar -= -1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar -= 0;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar -= 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar -= 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar -= c;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar *= -1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar *= 0;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar *= 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar *= 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar *= c;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar /= -1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar /= 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar /= 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar /= c;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar <<= 0;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar <<= 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar <<= 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar <<= i;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar >>= 0;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar >>= 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar >>= 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar >>= i;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar++;
-  #pragma omp barrier
-  #pragma omp atomic
-    ++atomicvar;
-  #pragma omp barrier
-#ifndef __cplusplus
-  #pragma omp atomic
-    atomicvar--;
-  #pragma omp barrier
-  #pragma omp atomic
-    --atomicvar;
-  #pragma omp barrier
-#endif
-  return 0;
-}
-
-int
-bar (void)
-{
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 |= -1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 |= 0;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 |= 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 |= 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 |= c2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 ^= -1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 ^= 0;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 ^= 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 ^= 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 ^= c2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 &= -1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 &= 0;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 &= 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 &= 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 &= c2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 += -1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 += 0;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 += 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 += 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 += c2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 -= -1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 -= 0;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 -= 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 -= 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 -= c2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 *= -1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 *= 0;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 *= 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 *= 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 *= c2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 /= -1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 /= 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 /= 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 /= c2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 <<= 0;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 <<= 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 <<= 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 <<= i;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 >>= 0;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 >>= 1;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 >>= 2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2 >>= i;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2++;
-  #pragma omp barrier
-  #pragma omp atomic
-    ++atomicvar2;
-  #pragma omp barrier
-  #pragma omp atomic
-    atomicvar2--;
-  #pragma omp barrier
-  #pragma omp atomic
-    --atomicvar2;
-  #pragma omp barrier
-  return 0;
-}
--- a/gcc/testsuite/gcc.dg/gomp/atomic-14.c
+++ b/gcc/testsuite/gcc.dg/gomp/atomic-14.c
-/* PR middle-end/45423 */
-/* { dg-do compile } */
-/* { dg-options "-fopenmp" } */
-
-#ifdef __cplusplus
-bool *baz ();
-#else
-_Bool *baz ();
-#endif
-int *bar ();
-
-int
-foo (void)
-{
-  #pragma omp barrier
-  #pragma omp atomic
-    (*bar ())++;
-  #pragma omp barrier
-  #pragma omp atomic
-    ++(*bar ());
-  #pragma omp barrier
-  #pragma omp atomic
-    (*bar ())--;
-  #pragma omp barrier
-  #pragma omp atomic
-    --(*bar ());
-  #pragma omp barrier
-  #pragma omp atomic
-    (*baz ())++;
-  #pragma omp barrier
-  #pragma omp atomic
-    ++(*baz ());
-#ifndef __cplusplus
-  #pragma omp barrier
-  #pragma omp atomic
-    (*baz ())--;
-  #pragma omp barrier
-  #pragma omp atomic
-    --(*baz ());
-  #pragma omp barrier
-#endif
-  return 0;
-}
--- a/gcc/testsuite/gcc.dg/gomp/atomic-2.c
+++ b/gcc/testsuite/gcc.dg/gomp/atomic-2.c
-/* { dg-do compile } */
-
-float x, y;
-
-void f1(void)
-{
-  #pragma omp atomic
-    x++;
-  #pragma omp atomic
-    x--;
-  #pragma omp atomic
-    ++x;
-  #pragma omp atomic
-    --x;
-  #pragma omp atomic
-    x += 1;
-  #pragma omp atomic
-    x -= y;
-  #pragma omp atomic
-    x *= 3;
-  #pragma omp atomic
-    x /= 3;
-}
--- a/gcc/testsuite/gcc.dg/gomp/atomic-3.c
+++ b/gcc/testsuite/gcc.dg/gomp/atomic-3.c
-/* { dg-do compile } */
-/* { dg-options "-fopenmp -fdump-tree-ompexp" } */
-
-int *xyzzy;
-
-void f1(void)
-{
-  #pragma omp atomic
-    xyzzy++;
-}
-
-/* { dg-final { scan-tree-dump-times "xyzzy, 4" 1 "ompexp" { target i?86-*-* x86_64-*-* ia64-*-* powerpc*-*-* alpha*-*-* } } } */
-/* { dg-final { cleanup-tree-dump "ompexp" } } */
--- a/gcc/testsuite/gcc.dg/gomp/atomic-4.c
+++ b/gcc/testsuite/gcc.dg/gomp/atomic-4.c
-/* { dg-do compile } */
-
-int a[4];
-int *p;
-struct S { int x; int y[4]; } s;
-int *bar(void);
-
-void f1(void)
-{
-  #pragma omp atomic
-    a[4] += 1;
-  #pragma omp atomic
-    *p += 1;
-  #pragma omp atomic
-    s.x += 1;
-  #pragma omp atomic
-    s.y[*p] += 1;
-  #pragma omp atomic
-    s.y[*p] *= 42;
-  #pragma omp atomic
-    *bar() += 1;
-  #pragma omp atomic
-    *bar() *= 42;
-}
--- a/gcc/testsuite/gcc.dg/gomp/atomic-7.c
+++ b/gcc/testsuite/gcc.dg/gomp/atomic-7.c
-/* { dg-do compile } */
-
-double x, y;
-
-void f2(void)
-{
-  #pragma omp atomic
-    y++;
-  #pragma omp atomic
-    y--;
-  #pragma omp atomic
-    ++y;
-  #pragma omp atomic
-    --y;
-  #pragma omp atomic
-    y += 1;
-  #pragma omp atomic
-    y -= x;
-  #pragma omp atomic
-    y *= 3;
-  #pragma omp atomic
-    y /= 3;
-}
--- a/gcc/testsuite/gcc.dg/gomp/atomic-8.c
+++ b/gcc/testsuite/gcc.dg/gomp/atomic-8.c
-/* { dg-do compile } */
-
-long double z;
-
-void f3(void)
-{
-  #pragma omp atomic
-    z++;
-  #pragma omp atomic
-    z--;
-  #pragma omp atomic
-    ++z;
-  #pragma omp atomic
-    --z;
-  #pragma omp atomic
-    z += 1;
-  #pragma omp atomic
-    z *= 3;
-  #pragma omp atomic
-    z /= 3;
-}
--- a/gcc/testsuite/gcc.dg/gomp/atomic-9.c
+++ b/gcc/testsuite/gcc.dg/gomp/atomic-9.c
-/* { dg-do compile } */
-/* { dg-options "-fopenmp -fdump-tree-ompexp" } */
-
-volatile int *bar(void);
-
-void f1(void)
-{
-  #pragma omp atomic
-    *bar() += 1;
-}
-
-/* { dg-final { scan-tree-dump-times "__sync_fetch_and_add" 1 "ompexp" { target i?86-*-* x86_64-*-* ia64-*-* powerpc*-*-* alpha*-*-* } } } */
-/* { dg-final { cleanup-tree-dump "ompexp" } } */
--- a/gcc/testsuite/gcc.dg/gomp/gomp.exp
+++ b/gcc/testsuite/gcc.dg/gomp/gomp.exp
@@ -29,8 +29,7 @@ if ![check_effective_target_fopenmp] {
 dg-init

 # Main loop.
-dg-runtest [lsort [find $srcdir/$subdir *.c]] \
-	"" "-fopenmp"
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.c $srcdir/c-c++-common/gomp/*.c]] "" "-fopenmp"

 # All done.
 dg-finish
--- a/gcc/testsuite/gcc.dg/simulate-thread/atomic-load-int.c
+++ b/gcc/testsuite/gcc.dg/simulate-thread/atomic-load-int.c
+/* { dg-do link } */
+/* { dg-require-effective-target sync_int_long } */
+/* { dg-final { simulate-thread } } */
+
+
+#include <stdio.h>
+#include "simulate-thread.h"
+
+
+/* Testing load for atomicity is a little trickier.  
+
+   Set up the atomic value so that it changes value after every instruction 
+   is executed.
+
+   Simply alternating between 2 values wouldn't be sufficient since a load of
+   one part, followed by the load of the second part 2 instructions later would
+   appear to be valid.
+
+   set up a table of 16 values which change a bit in every byte of the value 
+   each time, this will give us a 16 instruction cycle before repetition
+   kicks in, which should be sufficient to detect any issues.  Just to be sure,
+   we also change the table cycle size during execution. 
+   
+   The end result is that all loads should always get one of the values from
+   the table. Any other pattern means the load failed.  */
+
+unsigned int ret;
+unsigned int value = 0;
+unsigned int result = 0;
+unsigned int table[16] = {
+0x00000000, 
+0x11111111, 
+0x22222222, 
+0x33333333,
+0x44444444,
+0x55555555,
+0x66666666,
+0x77777777,
+0x88888888,
+0x99999999,
+0xAAAAAAAA,
+0xBBBBBBBB,
+0xCCCCCCCC,
+0xDDDDDDDD,
+0xEEEEEEEE,
+0xFFFFFFFF
+};
+
+int table_cycle_size = 16;
+
+/* Return 0 if 'result' is a valid value to have loaded.  */
+int verify_result ()
+{
+  int x;
+  int found = 0;
+
+  /* Check entire table for valid values.  */
+  for (x = 0; x < 16 ; x++)
+    if (result == table[x])
+      {
+	found = 1;
+	break;
+      }
+
+  if (!found)
+    printf("FAIL: Invalid result returned from fetch\n");
+
+  return !found;
+}
+
+/* Iterate VALUE through the different valid values. */
+void simulate_thread_other_threads ()
+{
+  static int current = 0;
+
+  if (++current >= table_cycle_size)
+    current = 0;
+  value = table[current];
+}
+
+int simulate_thread_step_verify ()
+{
+  return verify_result ();
+}
+
+int simulate_thread_final_verify ()
+{
+  return verify_result ();
+}
+
+__attribute__((noinline))
+void simulate_thread_main()
+{
+  int x;
+
+  /* Execute loads with value changing at various cyclic values.  */
+  for (table_cycle_size = 16; table_cycle_size > 4 ; table_cycle_size--)
+    {
+      ret = __atomic_load_n (&value, __ATOMIC_SEQ_CST);
+      /* In order to verify the returned value (which is not atomic), it needs
+	 to be atomically stored into another variable and check that.  */
+      __atomic_store_n (&result, ret, __ATOMIC_SEQ_CST);
+
+      /* Execute the fetch/store a couple of times just to ensure the cycles
+         have a chance to be interesting.  */
+      ret = __atomic_load_n (&value, __ATOMIC_SEQ_CST);
+      __atomic_store_n (&result, ret, __ATOMIC_SEQ_CST);
+    }
+}
+
+main()
+{
+  simulate_thread_main ();
+  simulate_thread_done ();
+  return 0;
+}
--- a/gcc/testsuite/gcc.dg/simulate-thread/atomic-load-int128.c
+++ b/gcc/testsuite/gcc.dg/simulate-thread/atomic-load-int128.c
+/* { dg-do link } */
+/* { dg-require-effective-target sync_int_128 } */
+/* { dg-options "-mcx16" { target { x86_64-*-* i?86-*-* } } } */
+/* { dg-final { simulate-thread } } */
+
+#include <stdio.h>
+#include "simulate-thread.h"
+
+
+/* Testing load for atomicity is a little trickier.  
+
+   Set up the atomic value so that it changes value after every instruction 
+   is executed.
+
+   Simply alternating between 2 values wouldn't be sufficient since a load of
+   one part, followed by the load of the second part 2 instructions later would
+   appear to be valid.
+
+   set up a table of 16 values which change a bit in every byte of the value 
+   each time, this will give us a 16 instruction cycle before repetition
+   kicks in, which should be sufficient to detect any issues.  Just to be sure,
+   we also change the table cycle size during execution. 
+   
+   The end result is that all loads should always get one of the values from
+   the table. Any other pattern means the load failed.  */
+
+__int128_t ret;
+__int128_t value = 0;
+__int128_t result = 0;
+__int128_t table[16] = {
+0x0000000000000000, 
+0x1111111111111111, 
+0x2222222222222222, 
+0x3333333333333333,
+0x4444444444444444,
+0x5555555555555555,
+0x6666666666666666,
+0x7777777777777777,
+0x8888888888888888,
+0x9999999999999999,
+0xAAAAAAAAAAAAAAAA,
+0xBBBBBBBBBBBBBBBB,
+0xCCCCCCCCCCCCCCCC,
+0xDDDDDDDDDDDDDDDD,
+0xEEEEEEEEEEEEEEEE,
+0xFFFFFFFFFFFFFFFF
+};
+
+int table_cycle_size = 16;
+
+/* Since we don't have 128 bit constants, we have to properly pad the table.  */
+void fill_table()
+{
+  int x;
+  for (x = 0; x < 16; x++)
+    {
+      ret = table[x];
+      ret = (ret << 64) | ret;
+      table[x] = ret;
+    }
+}
+
+/* Return 0 if 'result' is a valid value to have loaded.  */
+int verify_result ()
+{
+  int x;
+  int found = 0;
+
+  /* Check entire table for valid values.  */
+  for (x = 0; x < 16; x++)
+    if (result == table[x])
+      {
+	found = 1;
+	break;
+      }
+
+  if (!found)
+    printf("FAIL: Invalid result returned from fetch\n");
+
+  return !found;
+}
+
+/* Iterate VALUE through the different valid values. */
+void simulate_thread_other_threads ()
+{
+  static int current = 0;
+
+  if (++current >= table_cycle_size)
+    current = 0;
+  value = table[current];
+}
+
+int simulate_thread_step_verify ()
+{
+  return verify_result ();
+}
+
+int simulate_thread_final_verify ()
+{
+  return verify_result ();
+}
+
+__attribute__((noinline))
+void simulate_thread_main()
+{
+  int x;
+
+  /* Make sure value starts with an atomic value now.  */
+  __atomic_store_n (&value, ret, __ATOMIC_SEQ_CST);
+
+  /* Execute loads with value changing at various cyclic values.  */
+  for (table_cycle_size = 16; table_cycle_size > 4 ; table_cycle_size--)
+    {
+      ret = __atomic_load_n (&value, __ATOMIC_SEQ_CST);
+      /* In order to verify the returned value (which is not atomic), it needs
+	 to be atomically stored into another variable and check that.  */
+      __atomic_store_n (&result, ret, __ATOMIC_SEQ_CST);
+
+      /* Execute the fetch/store a couple of times just to ensure the cycles
+         have a chance to be interesting.  */
+      ret = __atomic_load_n (&value, __ATOMIC_SEQ_CST);
+      __atomic_store_n (&result, ret, __ATOMIC_SEQ_CST);
+    }
+}
+
+main()
+{
+  fill_table ();
+  simulate_thread_main ();
+  simulate_thread_done ();
+  return 0;
+}
--- a/gcc/testsuite/gcc.dg/simulate-thread/atomic-load-longlong.c
+++ b/gcc/testsuite/gcc.dg/simulate-thread/atomic-load-longlong.c
+/* { dg-do link } */
+/* { dg-require-effective-target sync_long_long } */
+/* { dg-options "" } */
+/* { dg-final { simulate-thread } } */
+
+
+#include <stdio.h>
+#include "simulate-thread.h"
+
+
+/* Testing load for atomicity is a little trickier.  
+
+   Set up the atomic value so that it changes value after every instruction 
+   is executed.
+
+   Simply alternating between 2 values wouldn't be sufficient since a load of
+   one part, followed by the load of the second part 2 instructions later would
+   appear to be valid.
+
+   set up a table of 16 values which change a bit in every byte of the value 
+   each time, this will give us a 16 instruction cycle before repetition
+   kicks in, which should be sufficient to detect any issues.  Just to be sure,
+   we also change the table cycle size during execution. 
+   
+   The end result is that all loads should always get one of the values from
+   the table. Any other pattern means the load failed.  */
+
+unsigned long long ret;
+unsigned long long value = 0;
+unsigned long long result = 0;
+unsigned long long table[16] = {
+0x0000000000000000, 
+0x1111111111111111, 
+0x2222222222222222, 
+0x3333333333333333,
+0x4444444444444444,
+0x5555555555555555,
+0x6666666666666666,
+0x7777777777777777,
+0x8888888888888888,
+0x9999999999999999,
+0xAAAAAAAAAAAAAAAA,
+0xBBBBBBBBBBBBBBBB,
+0xCCCCCCCCCCCCCCCC,
+0xDDDDDDDDDDDDDDDD,
+0xEEEEEEEEEEEEEEEE,
+0xFFFFFFFFFFFFFFFF
+};
+
+int table_cycle_size = 16;
+
+/* Return 0 if 'result' is a valid value to have loaded.  */
+int verify_result ()
+{
+  int x;
+  int found = 0;
+
+  /* Check entire table for valid values.  */
+  for (x = 0; x < 16 ; x++)
+    if (result == table[x])
+      {
+	found = 1;
+	break;
+      }
+
+  if (!found)
+    printf("FAIL: Invalid result returned from fetch\n");
+
+  return !found;
+}
+
+/* Iterate VALUE through the different valid values. */
+void simulate_thread_other_threads ()
+{
+  static int current = 0;
+
+  if (++current >= table_cycle_size)
+    current = 0;
+  value = table[current];
+}
+
+int simulate_thread_step_verify ()
+{
+  return verify_result ();
+}
+
+int simulate_thread_final_verify ()
+{
+  return verify_result ();
+}
+
+__attribute__((noinline))
+void simulate_thread_main()
+{
+  int x;
+
+  /* Execute loads with value changing at various cyclic values.  */
+  for (table_cycle_size = 16; table_cycle_size > 4 ; table_cycle_size--)
+    {
+      ret = __atomic_load_n (&value, __ATOMIC_SEQ_CST);
+      /* In order to verify the returned value (which is not atomic), it needs
+	 to be atomically stored into another variable and check that.  */
+      __atomic_store_n (&result, ret, __ATOMIC_SEQ_CST);
+
+      /* Execute the fetch/store a couple of times just to ensure the cycles
+         have a chance to be interesting.  */
+      ret = __atomic_load_n (&value, __ATOMIC_SEQ_CST);
+      __atomic_store_n (&result, ret, __ATOMIC_SEQ_CST);
+    }
+}
+
+main()
+{
+  simulate_thread_main ();
+  simulate_thread_done ();
+  return 0;
+}
--- a/gcc/testsuite/gcc.dg/simulate-thread/atomic-load-short.c
+++ b/gcc/testsuite/gcc.dg/simulate-thread/atomic-load-short.c
+/* { dg-do link } */
+/* { dg-require-effective-target sync_char_short } */
+/* { dg-final { simulate-thread } } */
+
+
+#include <stdio.h>
+#include "simulate-thread.h"
+
+
+/* Testing load for atomicity is a little trickier.  
+
+   Set up the atomic value so that it changes value after every instruction 
+   is executed.
+
+   Simply alternating between 2 values wouldn't be sufficient since a load of
+   one part, followed by the load of the second part 2 instructions later would
+   appear to be valid.
+
+   set up a table of 16 values which change a bit in every byte of the value 
+   each time, this will give us a 16 instruction cycle before repetition
+   kicks in, which should be sufficient to detect any issues.  Just to be sure,
+   we also change the table cycle size during execution. 
+   
+   The end result is that all loads should always get one of the values from
+   the table. Any other pattern means the load failed.  */
+
+unsigned short ret;
+unsigned short value = 0;
+unsigned short result = 0;
+unsigned short table[16] = {
+0x0000, 
+0x1111, 
+0x2222, 
+0x3333,
+0x4444,
+0x5555,
+0x6666,
+0x7777,
+0x8888,
+0x9999,
+0xAAAA,
+0xBBBB,
+0xCCCC,
+0xDDDD,
+0xEEEE,
+0xFFFF
+};
+
+int table_cycle_size = 16;
+
+/* Return 0 if 'result' is a valid value to have loaded.  */
+int verify_result ()
+{
+  int x;
+  int found = 0;
+
+  /* Check entire table for valid values.  */
+  for (x = 0; x < 16 ; x++)
+    if (result == table[x])
+      {
+	found = 1;
+	break;
+      }
+
+  if (!found)
+    printf("FAIL: Invalid result returned from fetch\n");
+
+  return !found;
+}
+
+/* Iterate VALUE through the different valid values. */
+void simulate_thread_other_threads ()
+{
+  static int current = 0;
+
+  if (++current >= table_cycle_size)
+    current = 0;
+  value = table[current];
+}
+
+int simulate_thread_step_verify ()
+{
+  return verify_result ();
+}
+
+int simulate_thread_final_verify ()
+{
+  return verify_result ();
+}
+
+__attribute__((noinline))
+void simulate_thread_main()
+{
+  int x;
+
+  /* Execute loads with value changing at various cyclic values.  */
+  for (table_cycle_size = 16; table_cycle_size > 4 ; table_cycle_size--)
+    {
+      ret = __atomic_load_n (&value, __ATOMIC_SEQ_CST);
+      /* In order to verify the returned value (which is not atomic), it needs
+	 to be atomically stored into another variable and check that.  */
+      __atomic_store_n (&result, ret, __ATOMIC_SEQ_CST);
+
+      /* Execute the fetch/store a couple of times just to ensure the cycles
+         have a chance to be interesting.  */
+      ret = __atomic_load_n (&value, __ATOMIC_SEQ_CST);
+      __atomic_store_n (&result, ret, __ATOMIC_SEQ_CST);
+    }
+}
+
+main()
+{
+  simulate_thread_main ();
+  simulate_thread_done ();
+  return 0;
+}
--- a/gcc/testsuite/gcc.dg/simulate-thread/atomic-other-int.c
+++ b/gcc/testsuite/gcc.dg/simulate-thread/atomic-other-int.c
+/* { dg-do link } */
+/* { dg-require-effective-target sync_int_long } */
+/* { dg-final { simulate-thread } } */
+
+
+#include <stdio.h>
+#include "simulate-thread.h"
+
+/* Test all the __sync routines for proper atomicity on 4 byte values.  */
+
+unsigned int zero = 0;
+unsigned int max = ~0;
+
+unsigned int changing_value = 0;
+unsigned int value = 0;
+unsigned int ret;
+
+void test_abort()
+{
+  static int reported = 0;
+  if (!reported)
+    {
+      printf ("FAIL: improper execution of __sync builtin.\n");
+      reported = 1;
+    }
+}
+
+void simulate_thread_other_threads ()
+{
+}
+
+int simulate_thread_step_verify ()
+{
+  if (value != zero && value != max)
+    {
+      printf ("FAIL: invalid intermediate result for value.\n");
+      return 1;
+    }
+  return 0;
+}
+
+int simulate_thread_final_verify ()
+{
+  if (value != 0)
+    {
+      printf ("FAIL: invalid final result for value.\n");
+      return 1;
+    }
+  return 0;
+}
+
+/* All values written to 'value' alternate between 'zero' and
+   'max'. Any other value detected by simulate_thread_step_verify()
+   between instructions would indicate that the value was only
+   partially written, and would thus fail this atomicity test.
+
+   This function tests each different __atomic routine once, with
+   the exception of the load instruction which requires special
+   testing.  */
+__attribute__((noinline))
+void simulate_thread_main()
+{
+  
+  ret = __atomic_exchange_n (&value, max, __ATOMIC_SEQ_CST);
+  if (ret != zero || value != max)
+    test_abort();
+
+  __atomic_store_n (&value, zero, __ATOMIC_SEQ_CST);
+  if (value != zero)
+    test_abort();
+
+  ret = __atomic_fetch_add (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != zero)
+    test_abort ();
+
+  ret = __atomic_fetch_sub (&value, max, __ATOMIC_SEQ_CST);
+  if (value != zero || ret != max)
+    test_abort ();
+
+  ret = __atomic_fetch_or (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != zero)
+    test_abort ();
+
+  ret = __atomic_fetch_and (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != max)
+    test_abort ();
+
+  ret = __atomic_fetch_xor (&value, max, __ATOMIC_SEQ_CST);
+  if (value != zero || ret != max)
+    test_abort ();
+
+  ret = __atomic_add_fetch (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != max)
+    test_abort ();
+
+  ret = __atomic_sub_fetch (&value, max, __ATOMIC_SEQ_CST);
+  if (value != zero || ret != zero)
+    test_abort ();
+
+  ret = __atomic_or_fetch (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != max)
+    test_abort ();
+
+  ret = __atomic_and_fetch (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != max)
+    test_abort ();
+
+  ret = __atomic_xor_fetch (&value, max, __ATOMIC_SEQ_CST);
+  if (value != zero || ret != zero)
+    test_abort ();
+}
+
+main ()
+{
+  simulate_thread_main ();
+  simulate_thread_done ();
+  return 0;
+}
--- a/gcc/testsuite/gcc.dg/simulate-thread/atomic-other-int128.c
+++ b/gcc/testsuite/gcc.dg/simulate-thread/atomic-other-int128.c
+/* { dg-do link } */
+/* { dg-require-effective-target sync_int_128 } */
+/* { dg-options "-mcx16" { target { x86_64-*-* i?86-*-*] } } } */
+/* { dg-final { simulate-thread } } */
+
+#include <stdio.h>
+#include "simulate-thread.h"
+
+/* Test all the __sync routines for proper atomicity on 16 byte values.  */
+
+__int128_t zero = 0;
+__int128_t max = ~0;
+__int128_t changing_value = 0;
+__int128_t value = 0;
+__int128_t ret;
+
+void test_abort()
+{
+  static int reported = 0;
+  if (!reported)
+    {
+      printf ("FAIL: improper execution of __sync builtin.\n");
+      reported = 1;
+    }
+}
+
+void simulate_thread_other_threads ()
+{
+}
+
+int simulate_thread_step_verify ()
+{
+  if (value != zero && value != max)
+    {
+      printf ("FAIL: invalid intermediate result for value.\n");
+      return 1;
+    }
+  return 0;
+}
+
+int simulate_thread_final_verify ()
+{
+  if (value != 0)
+    {
+      printf ("FAIL: invalid final result for value.\n");
+      return 1;
+    }
+  return 0;
+}
+
+/* All values written to 'value' alternate between 'zero' and 'max'. Any other
+   value detected by simulate_thread_step_verify() between instructions would indicate
+   that the value was only partially written, and would thus fail this 
+   atomicity test.  
+
+   This function tests each different __atomic routine once, with the
+   exception of the load instruction which requires special testing.  */
+__attribute__((noinline))
+void simulate_thread_main()
+{
+  
+  ret = __atomic_exchange_n (&value, max, __ATOMIC_SEQ_CST);
+  if (ret != zero || value != max)
+    test_abort();
+
+  __atomic_store_n (&value, zero, __ATOMIC_SEQ_CST);
+  if (value != zero)
+    test_abort();
+
+  ret = __atomic_fetch_add (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != zero)
+    test_abort ();
+
+  ret = __atomic_fetch_sub (&value, max, __ATOMIC_SEQ_CST);
+  if (value != zero || ret != max)
+    test_abort ();
+
+  ret = __atomic_fetch_or (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != zero)
+    test_abort ();
+
+  ret = __atomic_fetch_and (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != max)
+    test_abort ();
+
+  ret = __atomic_fetch_xor (&value, max, __ATOMIC_SEQ_CST);
+  if (value != zero || ret != max)
+    test_abort ();
+
+  ret = __atomic_add_fetch (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != max)
+    test_abort ();
+
+  ret = __atomic_sub_fetch (&value, max, __ATOMIC_SEQ_CST);
+  if (value != zero || ret != zero)
+    test_abort ();
+
+  ret = __atomic_or_fetch (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != max)
+    test_abort ();
+
+  ret = __atomic_and_fetch (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != max)
+    test_abort ();
+
+  ret = __atomic_xor_fetch (&value, max, __ATOMIC_SEQ_CST);
+  if (value != zero || ret != zero)
+    test_abort ();
+}
+
+int main()
+{
+  simulate_thread_main ();
+  simulate_thread_done ();
+  return 0;
+}
--- a/gcc/testsuite/gcc.dg/simulate-thread/atomic-other-longlong.c
+++ b/gcc/testsuite/gcc.dg/simulate-thread/atomic-other-longlong.c
+/* { dg-do link } */
+/* { dg-require-effective-target sync_long_long } */
+/* { dg-options "" } */
+/* { dg-final { simulate-thread } } */
+
+
+#include <stdio.h>
+#include "simulate-thread.h"
+
+/* Test all the __sync routines for proper atomicity on 8 byte values.  */
+
+unsigned long long zero = 0;
+unsigned long long max = ~0;
+
+unsigned long long changing_value = 0;
+unsigned long long value = 0;
+unsigned long long ret;
+
+void test_abort()
+{
+  static int reported = 0;
+  if (!reported)
+    {
+      printf ("FAIL: improper execution of __sync builtin.\n");
+      reported = 1;
+    }
+}
+
+void simulate_thread_other_threads ()
+{
+}
+
+int simulate_thread_step_verify ()
+{
+  if (value != zero && value != max)
+    {
+      printf ("FAIL: invalid intermediate result for value.\n");
+      return 1;
+    }
+  return 0;
+}
+
+int simulate_thread_final_verify ()
+{
+  if (value != 0)
+    {
+      printf ("FAIL: invalid final result for value.\n");
+      return 1;
+    }
+  return 0;
+}
+
+/* All values written to 'value' alternate between 'zero' and 'max'. Any other
+   value detected by simulate_thread_step_verify() between instructions would indicate
+   that the value was only partially written, and would thus fail this 
+   atomicity test.  
+
+   This function tests each different __atomic routine once, with the
+   exception of the load instruction which requires special testing.  */
+__attribute__((noinline))
+void simulate_thread_main()
+{
+  ret = __atomic_exchange_n (&value, max, __ATOMIC_SEQ_CST);
+  if (ret != zero || value != max)
+    test_abort();
+
+  __atomic_store_n (&value, zero, __ATOMIC_SEQ_CST);
+  if (value != zero)
+    test_abort();
+
+  ret = __atomic_fetch_add (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != zero)
+    test_abort ();
+
+  ret = __atomic_fetch_sub (&value, max, __ATOMIC_SEQ_CST);
+  if (value != zero || ret != max)
+    test_abort ();
+
+  ret = __atomic_fetch_or (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != zero)
+    test_abort ();
+
+  ret = __atomic_fetch_and (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != max)
+    test_abort ();
+
+  ret = __atomic_fetch_xor (&value, max, __ATOMIC_SEQ_CST);
+  if (value != zero || ret != max)
+    test_abort ();
+
+  ret = __atomic_add_fetch (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != max)
+    test_abort ();
+
+  ret = __atomic_sub_fetch (&value, max, __ATOMIC_SEQ_CST);
+  if (value != zero || ret != zero)
+    test_abort ();
+
+  ret = __atomic_or_fetch (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != max)
+    test_abort ();
+
+  ret = __atomic_and_fetch (&value, max, __ATOMIC_SEQ_CST);
+  if (value != max || ret != max)
+    test_abort ();
+
+  ret = __atomic_xor_fetch (&value, max, __ATOMIC_SEQ_CST);
+  if (value != zero || ret != zero)
+    test_abort ();
+}
+
+int main ()
+{
+  simulate_thread_main ();
+  simulate_thread_done ();
+  return 0;
+}
--- a/gcc/testsuite/gcc.dg/simulate-thread/atomic-other-short.c
+++ b/gcc/testsuite/gcc.dg/simulate-thread/atomic-other-short.c
--- a/gcc/testsuite/gcc.dg/simulate-thread/speculative-store.c
+++ b/gcc/testsuite/gcc.dg/simulate-thread/speculative-store.c
--- a/gcc/testsuite/gcc.dg/simulate-thread/strict-align-global.c
+++ b/gcc/testsuite/gcc.dg/simulate-thread/strict-align-global.c
--- a/gcc/testsuite/gcc.dg/simulate-thread/subfields.c
+++ b/gcc/testsuite/gcc.dg/simulate-thread/subfields.c
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
--- a/libstdc++-v3/include/Makefile.am
+++ b/libstdc++-v3/include/Makefile.am
--- a/libstdc++-v3/include/Makefile.in
+++ b/libstdc++-v3/include/Makefile.in
--- a/libstdc++-v3/include/bits/atomic_0.h
+++ b/libstdc++-v3/include/bits/atomic_0.h
--- a/libstdc++-v3/include/bits/atomic_2.h
+++ b/libstdc++-v3/include/bits/atomic_2.h
--- a/libstdc++-v3/include/bits/atomic_base.h
+++ b/libstdc++-v3/include/bits/atomic_base.h
--- a/libstdc++-v3/include/std/atomic
+++ b/libstdc++-v3/include/std/atomic
--- a/libstdc++-v3/src/Makefile.am
+++ b/libstdc++-v3/src/Makefile.am
--- a/libstdc++-v3/src/Makefile.in
+++ b/libstdc++-v3/src/Makefile.in
--- a/libstdc++-v3/src/atomic.cc
+++ b/libstdc++-v3/src/atomic.cc
--- a/libstdc++-v3/testsuite/29_atomics/atomic/cons/user_pod.cc
+++ b/libstdc++-v3/testsuite/29_atomics/atomic/cons/user_pod.cc
--- a/libstdc++-v3/testsuite/29_atomics/atomic/requirements/explicit_instantiation/1.cc
+++ b/libstdc++-v3/testsuite/29_atomics/atomic/requirements/explicit_instantiation/1.cc
--- a/libstdc++-v3/testsuite/29_atomics/headers/atomic/macros.cc
+++ b/libstdc++-v3/testsuite/29_atomics/headers/atomic/macros.cc