jatomic.hpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342
  1. /*##############################################################################
  2. HPCC SYSTEMS software Copyright (C) 2012 HPCC Systems®.
  3. Licensed under the Apache License, Version 2.0 (the "License");
  4. you may not use this file except in compliance with the License.
  5. You may obtain a copy of the License at
  6. http://www.apache.org/licenses/LICENSE-2.0
  7. Unless required by applicable law or agreed to in writing, software
  8. distributed under the License is distributed on an "AS IS" BASIS,
  9. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  10. See the License for the specific language governing permissions and
  11. limitations under the License.
  12. ############################################################################## */
  13. #ifndef JATOMIC_HPP
  14. #define JATOMIC_HPP
  15. #include "platform.h"
  16. #include <atomic>
  17. #ifdef _WIN32
  18. inline static void spinPause() { YieldProcessor(); }
  19. #elif defined(_ARCH_X86_64_) || defined(_ARCH_X86_)
  20. # include "x86intrin.h"
  21. # if defined(_ARCH_X86_)
  22. inline static void spinPause() { __pause(); }
  23. // or could use
  24. // __asm__ __volatile__ ("rep; nop" ::: "memory");
  25. // __asm__ __volatile__ ("pause" ::: "memory");
  26. # else
  27. inline static void spinPause() { _mm_pause(); }
  28. # endif
  29. #elif defined(_ARCH_PPC64EL_)
  30. inline static void spinPause() { } // MORE: Is there an equivalent?
  31. #elif defined(_ARCH_ARM64_)
  32. inline static void spinPause() { } // MORE: Is there an equivalent?
  33. #else
  34. inline static void spinPause() { }
  35. #endif
  36. template <typename T>
  37. auto add_fetch(T & value, decltype(value.load()) delta, std::memory_order order = std::memory_order_seq_cst) -> decltype(value.load()) { return value.fetch_add(delta, order) + delta; }
  38. template <typename T>
  39. auto sub_fetch(T & value, decltype(value.load()) delta, std::memory_order order = std::memory_order_seq_cst) -> decltype(value.load()) { return value.fetch_sub(delta, order) - delta; }
  40. //Use this class for stats which are gathered, but the values read from other threads do not need to be synchronized
  41. //NOTE: Counts will never be lost, but the values read from another thread may be inconsistent.
  42. //E.g., thread 1 updates x than y, thread 2 may read an updated value of y, but an old value of x.
  43. template <typename T>
  44. class RelaxedAtomic : public std::atomic<T>
  45. {
  46. public:
  47. typedef std::atomic<T> BASE;
  48. RelaxedAtomic() noexcept = default;
  49. inline constexpr RelaxedAtomic(T _value) noexcept : BASE(_value) { }
  50. ~RelaxedAtomic() noexcept = default;
  51. RelaxedAtomic(const RelaxedAtomic& _value) { BASE::store(_value.load()); }
  52. RelaxedAtomic& operator=(const RelaxedAtomic&) = delete;
  53. inline operator T() const noexcept { return load(); }
  54. inline T operator=(T _value) noexcept { store(_value); return _value; }
  55. inline T operator++() noexcept { return BASE::fetch_add(1, std::memory_order_relaxed)+1; } // ++x
  56. inline T operator--() noexcept { return BASE::fetch_sub(1, std::memory_order_relaxed)-1; } // --x
  57. inline T operator++(int) noexcept { return BASE::fetch_add(1, std::memory_order_relaxed); } // x++
  58. inline T operator--(int) noexcept { return BASE::fetch_sub(1, std::memory_order_relaxed); } // x--
  59. inline T operator+=(int v) noexcept { return BASE::fetch_add(v, std::memory_order_relaxed)+v; }
  60. inline T operator-=(int v) noexcept { return BASE::fetch_sub(v, std::memory_order_relaxed)-v; }
  61. inline void store(T _value, std::memory_order order = std::memory_order_relaxed) noexcept { BASE::store(_value, order); }
  62. inline T load(std::memory_order order = std::memory_order_relaxed) const noexcept { return BASE::load(order); }
  63. inline T exchange(T _value, std::memory_order order = std::memory_order_relaxed) noexcept { return BASE::exchange(_value, order); }
  64. inline T fetch_add(T _value, std::memory_order order = std::memory_order_relaxed) noexcept { return BASE::fetch_add(_value, order); }
  65. inline T fetch_sub(T _value, std::memory_order order = std::memory_order_relaxed) noexcept { return BASE::fetch_add(_value, order); }
  66. inline T add_fetch(T _value, std::memory_order order = std::memory_order_relaxed) noexcept { return ::add_fetch(*this, _value, order); }
  67. inline T sub_fetch(T _value, std::memory_order order = std::memory_order_relaxed) noexcept { return ::sub_fetch(*this, _value, order); }
  68. inline void store_max(T _value) noexcept { while (_value > load()) _value = BASE::exchange(_value, std::memory_order_acq_rel); }
  69. inline void store_min(T _value) noexcept { while (_value < load()) _value = BASE::exchange(_value, std::memory_order_acq_rel); }
  70. };
  71. // Class to accumulate values locally and only add atomically once
  72. template <typename T>
  73. class ScopedAtomic
  74. {
  75. public:
  76. inline ScopedAtomic(RelaxedAtomic<T> &_gval) : lval(0), gval(_gval) {}
  77. inline ~ScopedAtomic() { if (lval) gval.fetch_add(lval); }
  78. ScopedAtomic(const ScopedAtomic&) = delete;
  79. ScopedAtomic& operator=(const ScopedAtomic&) = delete;
  80. inline operator T() const noexcept { return lval; }
  81. inline T operator=(T _value) noexcept { lval = _value; return _value; }
  82. inline T operator++() noexcept { return ++lval; }
  83. inline T operator--() noexcept { return --lval; }
  84. inline T operator++(int) noexcept { return lval++; }
  85. inline T operator--(int) noexcept { return lval--; }
  86. inline T operator+=(int v) noexcept { return lval += v; }
  87. inline T operator-=(int v) noexcept { return lval -= v; }
  88. private:
  89. T lval;
  90. RelaxedAtomic<T> &gval;
  91. };
  92. //Currently compare_exchange_weak in gcc forces a write to memory which is painful in highly contended situations. The
  93. //See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66867 for some details. Marked as fixed for gcc 7.
  94. //The symbol HAS_EFFICIENT_CAS should be defined if this bug is fixed, and/or there is no fallback implementation (e.g., windows)
  95. //MCK verified gcc 7.1+ and all recent clang are ok
  96. #if defined(_WIN32)
  97. # define HAS_EFFICIENT_CAS
  98. #elif defined(__GNUC__) && (__GNUC__ > 7 || (__GNUC__ == 7 && __GNUC_MINOR__ >= 1))
  99. # define HAS_EFFICIENT_CAS
  100. #elif defined(__clang__)
  101. # define HAS_EFFICIENT_CAS
  102. #endif
  103. #if defined(HAS_EFFICIENT_CAS)
  104. template <typename x>
  105. bool compare_exchange_efficient(x & value, decltype(value.load()) & expected, decltype(value.load()) desired, std::memory_order order = std::memory_order_seq_cst)
  106. {
  107. return value.compare_exchange_weak(expected, desired, order);
  108. }
  109. template <typename x>
  110. bool compare_exchange_efficient(x & value, decltype(value.load()) & expected, decltype(value.load()) desired, std::memory_order successOrder = std::memory_order_seq_cst, std::memory_order failureOrder = std::memory_order_seq_cst)
  111. {
  112. return value.compare_exchange_weak(expected, desired, successOrder, failureOrder);
  113. }
  114. #else
  115. template <typename x>
  116. //If HAS_EFFICIENT_CAS is not defined, the expected value is not updated => expected is not a reference
  117. bool compare_exchange_efficient(x & value, decltype(value.load()) expected, decltype(value.load()) desired, std::memory_order order = std::memory_order_seq_cst)
  118. {
  119. decltype(value.load()) * nastyCast = reinterpret_cast<decltype(value.load()) *>(&value);
  120. return __sync_bool_compare_and_swap(nastyCast, expected, desired);
  121. }
  122. template <typename x>
  123. bool compare_exchange_efficient(x & value, decltype(value.load()) expected, decltype(value.load()) desired, std::memory_order successOrder = std::memory_order_seq_cst, std::memory_order failureOrder = std::memory_order_seq_cst)
  124. {
  125. decltype(value.load()) * nastyCast = reinterpret_cast<decltype(value.load()) *>(&value);
  126. return __sync_bool_compare_and_swap(nastyCast, expected, desired);
  127. }
  128. #endif
  129. #ifdef _WIN32
  130. #include <intrin.h>
  131. extern "C"
  132. {
  133. LONG __cdecl _InterlockedIncrement(LONG volatile *Addend);
  134. LONG __cdecl _InterlockedDecrement(LONG volatile *Addend);
  135. LONG __cdecl _InterlockedCompareExchange(LONG volatile * Dest, LONG Exchange, LONG Comp);
  136. }
  137. #pragma intrinsic (_InterlockedCompareExchange)
  138. #define InterlockedCompareExchange _InterlockedCompareExchange
  139. #pragma intrinsic (_InterlockedIncrement)
  140. #define InterlockedIncrement _InterlockedIncrement
  141. #pragma intrinsic (_InterlockedDecrement)
  142. #define InterlockedDecrement _InterlockedDecrement
  143. #pragma intrinsic (_InterlockedExchangeAdd)
  144. #define InterlockedExchangeAdd _InterlockedExchangeAdd
  145. typedef volatile long atomic_t;
  146. #define ATOMIC_INIT(i) (i)
  147. #define atomic_inc(v) InterlockedIncrement(v)
  148. #define atomic_inc_and_test(v) (InterlockedIncrement(v) == 0)
  149. #define atomic_dec(v) InterlockedDecrement(v)
  150. #define atomic_dec_and_test(v) (InterlockedDecrement(v) == 0)
  151. #define atomic_dec_and_read(v) InterlockedDecrement(v)
  152. #define atomic_read(v) (*v)
  153. #define atomic_set(v,i) ((*v) = (i))
  154. #define atomic_xchg(i, v) InterlockedExchange(v, i)
  155. #define atomic_add(v,i) InterlockedExchangeAdd(v,i)
  156. #define atomic_add_and_read(v,i) InterlockedAdd(v,i)
  157. #define atomic_add_exchange(v, i) InterlockedExchangeAdd(v,i)
  158. #define atomic_xchg_ptr(p, v) InterlockedExchangePointer(v,p)
  159. #if defined (_MSC_VER) && (_MSC_VER <= 1200)
  160. #define atomic_cas(v,newvalue,expectedvalue) (InterlockedCompareExchange((PVOID *)(v),(PVOID)(long)(newvalue),(PVOID)(long)(expectedvalue))==(PVOID)(long)(expectedvalue))
  161. #define atomic_cas_ptr(v, newvalue,expectedvalue) atomic_cas(v,(long)newvalue,(long)expectedvalue)
  162. #else
  163. #define atomic_cas(v,newvalue,expectedvalue) (InterlockedCompareExchange(v,newvalue,expectedvalue)==expectedvalue)
  164. #define atomic_cas_ptr(v, newvalue,expectedvalue) (InterlockedCompareExchangePointer(v,newvalue,expectedvalue)==expectedvalue)
  165. #endif
  166. //Used to prevent a compiler reordering volatile and non-volatile loads/stores
  167. #define compiler_memory_barrier() _ReadWriteBarrier()
  168. #define atomic_acquire(v) atomic_cas(v, 1, 0)
  169. #define atomic_release(v) { compiler_memory_barrier(); atomic_set(v, 0); }
  170. #elif defined(__GNUC__)
  171. typedef struct { volatile int counter; } atomic_t;
  172. #define ATOMIC_INIT(i) { (i) }
  173. #define atomic_read(v) ((v)->counter)
  174. #define atomic_set(v,i) (((v)->counter) = (i))
  175. static __inline__ bool atomic_dec_and_test(atomic_t *v)
  176. {
  177. // returns (--*v==0)
  178. return (__sync_add_and_fetch(&v->counter,-1)==0);
  179. }
  180. static __inline__ bool atomic_inc_and_test(atomic_t *v)
  181. {
  182. // returns (++*v==0)
  183. return (__sync_add_and_fetch(&v->counter,1)==0);
  184. }
  185. static __inline__ void atomic_inc(atomic_t *v)
  186. {
  187. // (*v)++
  188. __sync_add_and_fetch(&v->counter,1);
  189. }
  190. static __inline__ void atomic_dec(atomic_t *v)
  191. {
  192. // (*v)--
  193. __sync_add_and_fetch(&v->counter,-1);
  194. }
  195. static __inline__ int atomic_dec_and_read(atomic_t *v)
  196. {
  197. // (*v)--, return *v;
  198. return __sync_add_and_fetch(&v->counter,-1);
  199. }
  200. static __inline__ int atomic_xchg(int i, atomic_t *v)
  201. {
  202. // int ret = *v; *v = i; return v;
  203. return __sync_lock_test_and_set(&v->counter,i); // actually an xchg
  204. }
  205. static __inline__ void atomic_add(atomic_t *v,int i)
  206. {
  207. // (*v) += i;
  208. __sync_add_and_fetch(&v->counter,i);
  209. }
  210. static __inline__ int atomic_add_and_read(atomic_t *v,int i)
  211. {
  212. // (*v) += i; return *v;
  213. return __sync_add_and_fetch(&v->counter,i);
  214. }
  215. static __inline__ int atomic_add_exchange(atomic_t *v,int i)
  216. {
  217. // int ret = *v; (*v) += i; return ret;
  218. return __sync_fetch_and_add(&v->counter,i);
  219. }
  220. static __inline__ bool atomic_cas(atomic_t *v,int newvalue, int expectedvalue)
  221. {
  222. // bool ret = (*v==expectedvalue); if (ret) *v = newvalue; return ret;
  223. return __sync_bool_compare_and_swap(&v->counter, expectedvalue, newvalue);
  224. }
  225. static __inline__ void * atomic_xchg_ptr(void *p, void **v)
  226. {
  227. // void * ret = *v; (*v) = p; return ret;
  228. return (void *)__sync_lock_test_and_set((memsize_t *)v,(memsize_t)p);
  229. }
  230. static __inline__ bool atomic_cas_ptr(void **v,void *newvalue, void *expectedvalue)
  231. {
  232. // bool ret = (*v==expectedvalue); if (ret) *v = newvalue; return ret;
  233. return __sync_bool_compare_and_swap((memsize_t *)v, (memsize_t)expectedvalue, (memsize_t)newvalue);
  234. }
  235. #define compiler_memory_barrier() asm volatile("": : :"memory")
  236. static __inline__ bool atomic_acquire(atomic_t *v)
  237. {
  238. #if defined(_ARCH_X86_64_) || defined(_ARCH_X86_)
  239. //For some reason gcc targeting x86 generates code for atomic_cas() that requires fewer registers
  240. return atomic_cas(v, 1, 0);
  241. #else
  242. return __sync_lock_test_and_set(&v->counter, 1) == 0;
  243. #endif
  244. }
  245. static __inline__ void atomic_release(atomic_t *v)
  246. {
  247. #if defined(_ARCH_X86_64_) || defined(_ARCH_X86_)
  248. //x86 has a strong memory model, so the following code is sufficient, and some older gcc compilers generate
  249. //an unnecessary mfence instruction, so for x86 use the following which generates better code.
  250. compiler_memory_barrier();
  251. atomic_set(v, 0);
  252. #else
  253. __sync_lock_release(&v->counter);
  254. #endif
  255. }
  256. #else // other unix
  257. //Truely awful implementations of atomic operations...
  258. typedef volatile int atomic_t;
  259. int jlib_decl poor_atomic_dec_and_read(atomic_t * v);
  260. bool jlib_decl poor_atomic_inc_and_test(atomic_t * v);
  261. int jlib_decl poor_atomic_xchg(int i, atomic_t * v);
  262. void jlib_decl poor_atomic_add(atomic_t * v, int i);
  263. int jlib_decl poor_atomic_add_and_read(atomic_t * v, int i);
  264. int jlib_decl poor_atomic_add_exchange(atomic_t * v, int i);
  265. bool jlib_decl poor_atomic_cas(atomic_t * v, int newvalue, int expectedvalue);
  266. void jlib_decl *poor_atomic_xchg_ptr(void *p, void **v);
  267. bool jlib_decl poor_atomic_cas_ptr(void ** v, void *newvalue, void *expectedvalue);
  268. void jlib_decl poor_compiler_memory_barrier();
  269. #define ATOMIC_INIT(i) (i)
  270. #define atomic_inc(v) (void)poor_atomic_inc_and_test(v)
  271. #define atomic_inc_and_test(v) poor_atomic_inc_and_test(v)
  272. #define atomic_dec(v) (void)poor_atomic_dec_and_read(v)
  273. #define atomic_dec_and_read(v) poor_atomic_dec_and_read(v)
  274. #define atomic_dec_and_test(v) (poor_atomic_dec_and_read(v)==0)
  275. #define atomic_read(v) (*v)
  276. #define atomic_set(v,i) ((*v) = (i))
  277. #define atomic_xchg(i, v) poor_atomic_xchg(i, v)
  278. #define atomic_add(v,i) poor_atomic_add(v, i)
  279. #define atomic_add_and_read(v,i) poor_atomic_add_and_read(v, i)
  280. #define atomic_add_exchange(v, i) poor_atomic_add_exchange(v, i)
  281. #define atomic_cas(v,newvalue,expectedvalue) poor_atomic_cas(v,newvalue,expectedvalue)
  282. #define atomic_xchg_ptr(p, v) poor_atomic_xchg_ptr(p, v)
  283. #define atomic_cas_ptr(v,newvalue,expectedvalue) poor_atomic_cas_ptr(v,newvalue,expectedvalue)
  284. #define compiler_memory_barrier() poor_compiler_memory_barrier()
  285. #define atomic_acquire(v) atomic_cas(v, 1, 0)
  286. #define atomic_release(v) { compiler_memory_barrier(); atomic_set(v, 0); }
  287. #endif
  288. #endif