00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032 #ifndef _GLIBCXX_PARALLEL_COMPATIBILITY_H
00033 #define _GLIBCXX_PARALLEL_COMPATIBILITY_H 1
00034
00035 #include <parallel/types.h>
00036 #include <parallel/base.h>
00037
00038 #if defined(__SUNPRO_CC) && defined(__sparc)
00039 #include <sys/atomic.h>
00040 #endif
00041
00042 #if !defined(_WIN32) || defined (__CYGWIN__)
00043 #include <sched.h>
00044 #endif
00045
00046 #if defined(_MSC_VER)
00047 #include <Windows.h>
00048 #include <intrin.h>
00049 #undef max
00050 #undef min
00051 #endif
00052
00053 #ifdef __MINGW32__
00054
00055
00056
00057 extern "C"
00058 __attribute((dllimport)) void __attribute__((stdcall)) Sleep (unsigned long);
00059 #endif
00060
00061 namespace __gnu_parallel
00062 {
00063 #if defined(__ICC)
00064 template<typename _MustBeInt = int>
00065 int32_t __faa32(int32_t* __x, int32_t __inc)
00066 {
00067 asm volatile("lock xadd %0,%1"
00068 : "=__r" (__inc), "=__m" (*__x)
00069 : "0" (__inc)
00070 : "memory");
00071 return __inc;
00072 }
00073 #if defined(__x86_64)
00074 template<typename _MustBeInt = int>
00075 int64_t __faa64(int64_t* __x, int64_t __inc)
00076 {
00077 asm volatile("lock xadd %0,%1"
00078 : "=__r" (__inc), "=__m" (*__x)
00079 : "0" (__inc)
00080 : "memory");
00081 return __inc;
00082 }
00083 #endif
00084 #endif
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094 inline int32_t
00095 __fetch_and_add_32(volatile int32_t* __ptr, int32_t __addend)
00096 {
00097 #if defined(__ICC) //x86 version
00098 return _InterlockedExchangeAdd((void*)__ptr, __addend);
00099 #elif defined(__ECC) //IA-64 version
00100 return _InterlockedExchangeAdd((void*)__ptr, __addend);
00101 #elif defined(__ICL) || defined(_MSC_VER)
00102 return _InterlockedExchangeAdd(reinterpret_cast<volatile long*>(__ptr),
00103 __addend);
00104 #elif defined(__GNUC__)
00105 return __sync_fetch_and_add(__ptr, __addend);
00106 #elif defined(__SUNPRO_CC) && defined(__sparc)
00107 volatile int32_t __before, __after;
00108 do
00109 {
00110 __before = *__ptr;
00111 __after = __before + __addend;
00112 } while (atomic_cas_32((volatile unsigned int*)__ptr, __before,
00113 __after) != __before);
00114 return __before;
00115 #else //fallback, slow
00116 #pragma message("slow __fetch_and_add_32")
00117 int32_t __res;
00118 #pragma omp critical
00119 {
00120 __res = *__ptr;
00121 *(__ptr) += __addend;
00122 }
00123 return __res;
00124 #endif
00125 }
00126
00127
00128
00129
00130
00131
00132
00133 inline int64_t
00134 __fetch_and_add_64(volatile int64_t* __ptr, int64_t __addend)
00135 {
00136 #if defined(__ICC) && defined(__x86_64) //x86 version
00137 return __faa64<int>((int64_t*)__ptr, __addend);
00138 #elif defined(__ECC) //IA-64 version
00139 return _InterlockedExchangeAdd64((void*)__ptr, __addend);
00140 #elif defined(__ICL) || defined(_MSC_VER)
00141 #ifndef _WIN64
00142 _GLIBCXX_PARALLEL_ASSERT(false);
00143 return 0;
00144 #else
00145 return _InterlockedExchangeAdd64(__ptr, __addend);
00146 #endif
00147 #elif defined(__GNUC__) && defined(__x86_64)
00148 return __sync_fetch_and_add(__ptr, __addend);
00149 #elif defined(__GNUC__) && defined(__i386) && \
00150 (defined(__i686) || defined(__pentium4) || defined(__athlon) \
00151 || defined(__k8) || defined(__core2))
00152 return __sync_fetch_and_add(__ptr, __addend);
00153 #elif defined(__SUNPRO_CC) && defined(__sparc)
00154 volatile int64_t __before, __after;
00155 do
00156 {
00157 __before = *__ptr;
00158 __after = __before + __addend;
00159 } while (atomic_cas_64((volatile unsigned long long*)__ptr, __before,
00160 __after) != __before);
00161 return __before;
00162 #else //fallback, slow
00163 #if defined(__GNUC__) && defined(__i386)
00164
00165
00166 #endif
00167 #pragma message("slow __fetch_and_add_64")
00168 int64_t __res;
00169 #pragma omp critical
00170 {
00171 __res = *__ptr;
00172 *(__ptr) += __addend;
00173 }
00174 return __res;
00175 #endif
00176 }
00177
00178
00179
00180
00181
00182
00183
00184 template<typename _Tp>
00185 inline _Tp
00186 __fetch_and_add(volatile _Tp* __ptr, _Tp __addend)
00187 {
00188 if (sizeof(_Tp) == sizeof(int32_t))
00189 return
00190 (_Tp)__fetch_and_add_32((volatile int32_t*) __ptr, (int32_t)__addend);
00191 else if (sizeof(_Tp) == sizeof(int64_t))
00192 return
00193 (_Tp)__fetch_and_add_64((volatile int64_t*) __ptr, (int64_t)__addend);
00194 else
00195 _GLIBCXX_PARALLEL_ASSERT(false);
00196 }
00197
00198
00199 #if defined(__ICC)
00200
00201 template<typename _MustBeInt = int>
00202 inline int32_t
00203 __cas32(volatile int32_t* __ptr, int32_t __old, int32_t __nw)
00204 {
00205 int32_t __before;
00206 __asm__ __volatile__("lock; cmpxchgl %1,%2"
00207 : "=a"(__before)
00208 : "q"(__nw), "__m"(*(volatile long long*)(__ptr)),
00209 "0"(__old)
00210 : "memory");
00211 return __before;
00212 }
00213
00214 #if defined(__x86_64)
00215 template<typename _MustBeInt = int>
00216 inline int64_t
00217 __cas64(volatile int64_t *__ptr, int64_t __old, int64_t __nw)
00218 {
00219 int64_t __before;
00220 __asm__ __volatile__("lock; cmpxchgq %1,%2"
00221 : "=a"(__before)
00222 : "q"(__nw), "__m"(*(volatile long long*)(__ptr)),
00223 "0"(__old)
00224 : "memory");
00225 return __before;
00226 }
00227 #endif
00228
00229 #endif
00230
00231
00232
00233
00234
00235
00236
00237
00238
00239 inline bool
00240 __compare_and_swap_32(volatile int32_t* __ptr, int32_t __comparand,
00241 int32_t __replacement)
00242 {
00243 #if defined(__ICC) //x86 version
00244 return _InterlockedCompareExchange((void*)__ptr, __replacement,
00245 __comparand) == __comparand;
00246 #elif defined(__ECC) //IA-64 version
00247 return _InterlockedCompareExchange((void*)__ptr, __replacement,
00248 __comparand) == __comparand;
00249 #elif defined(__ICL) || defined(_MSC_VER)
00250 return _InterlockedCompareExchange(
00251 reinterpret_cast<volatile long*>(__ptr),
00252 __replacement, __comparand)
00253 == __comparand;
00254 #elif defined(__GNUC__)
00255 return __sync_bool_compare_and_swap(__ptr, __comparand, __replacement);
00256 #elif defined(__SUNPRO_CC) && defined(__sparc)
00257 return atomic_cas_32((volatile unsigned int*)__ptr, __comparand,
00258 __replacement) == __comparand;
00259 #else
00260 #pragma message("slow __compare_and_swap_32")
00261 bool __res = false;
00262 #pragma omp critical
00263 {
00264 if (*__ptr == __comparand)
00265 {
00266 *__ptr = __replacement;
00267 __res = true;
00268 }
00269 }
00270 return __res;
00271 #endif
00272 }
00273
00274
00275
00276
00277
00278
00279
00280
00281
00282 inline bool
00283 __compare_and_swap_64(volatile int64_t* __ptr, int64_t __comparand,
00284 int64_t __replacement)
00285 {
00286 #if defined(__ICC) && defined(__x86_64) //x86 version
00287 return __cas64<int>(__ptr, __comparand, __replacement) == __comparand;
00288 #elif defined(__ECC) //IA-64 version
00289 return _InterlockedCompareExchange64((void*)__ptr, __replacement,
00290 __comparand) == __comparand;
00291 #elif defined(__ICL) || defined(_MSC_VER)
00292 #ifndef _WIN64
00293 _GLIBCXX_PARALLEL_ASSERT(false);
00294 return 0;
00295 #else
00296 return _InterlockedCompareExchange64(__ptr, __replacement,
00297 __comparand) == __comparand;
00298 #endif
00299
00300 #elif defined(__GNUC__) && defined(__x86_64)
00301 return __sync_bool_compare_and_swap(__ptr, __comparand, __replacement);
00302 #elif defined(__GNUC__) && defined(__i386) && \
00303 (defined(__i686) || defined(__pentium4) || defined(__athlon) \
00304 || defined(__k8) || defined(__core2))
00305 return __sync_bool_compare_and_swap(__ptr, __comparand, __replacement);
00306 #elif defined(__SUNPRO_CC) && defined(__sparc)
00307 return atomic_cas_64((volatile unsigned long long*)__ptr,
00308 __comparand, __replacement) == __comparand;
00309 #else
00310 #if defined(__GNUC__) && defined(__i386)
00311
00312
00313 #endif
00314 #pragma message("slow __compare_and_swap_64")
00315 bool __res = false;
00316 #pragma omp critical
00317 {
00318 if (*__ptr == __comparand)
00319 {
00320 *__ptr = __replacement;
00321 __res = true;
00322 }
00323 }
00324 return __res;
00325 #endif
00326 }
00327
00328
00329
00330
00331
00332
00333
00334
00335 template<typename _Tp>
00336 inline bool
00337 __compare_and_swap(volatile _Tp* __ptr, _Tp __comparand, _Tp __replacement)
00338 {
00339 if (sizeof(_Tp) == sizeof(int32_t))
00340 return __compare_and_swap_32((volatile int32_t*) __ptr,
00341 (int32_t)__comparand,
00342 (int32_t)__replacement);
00343 else if (sizeof(_Tp) == sizeof(int64_t))
00344 return __compare_and_swap_64((volatile int64_t*) __ptr,
00345 (int64_t)__comparand,
00346 (int64_t)__replacement);
00347 else
00348 _GLIBCXX_PARALLEL_ASSERT(false);
00349 }
00350
00351
00352
00353 inline void
00354 __yield()
00355 {
00356 #if defined (_WIN32) && !defined (__CYGWIN__)
00357 Sleep(0);
00358 #else
00359 sched_yield();
00360 #endif
00361 }
00362 }
00363
00364 #endif