00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033 #ifndef _GLIBCXX_PARALLEL_PARTIAL_SUM_H
00034 #define _GLIBCXX_PARALLEL_PARTIAL_SUM_H 1
00035
00036 #include <omp.h>
00037 #include <new>
00038 #include <bits/stl_algobase.h>
00039 #include <parallel/parallel.h>
00040 #include <parallel/numericfwd.h>
00041
00042 namespace __gnu_parallel
00043 {
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054 template<typename _IIter,
00055 typename _OutputIterator,
00056 typename _BinaryOperation>
00057 _OutputIterator
00058 __parallel_partial_sum_basecase(_IIter __begin, _IIter __end,
00059 _OutputIterator __result,
00060 _BinaryOperation __bin_op,
00061 typename std::iterator_traits <_IIter>::value_type __value)
00062 {
00063 if (__begin == __end)
00064 return __result;
00065
00066 while (__begin != __end)
00067 {
00068 __value = __bin_op(__value, *__begin);
00069 *__result = __value;
00070 ++__result;
00071 ++__begin;
00072 }
00073 return __result;
00074 }
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086 template<typename _IIter,
00087 typename _OutputIterator,
00088 typename _BinaryOperation>
00089 _OutputIterator
00090 __parallel_partial_sum_linear(_IIter __begin, _IIter __end,
00091 _OutputIterator __result,
00092 _BinaryOperation __bin_op,
00093 typename std::iterator_traits<_IIter>::difference_type __n)
00094 {
00095 typedef std::iterator_traits<_IIter> _TraitsType;
00096 typedef typename _TraitsType::value_type _ValueType;
00097 typedef typename _TraitsType::difference_type _DifferenceType;
00098
00099 if (__begin == __end)
00100 return __result;
00101
00102 _ThreadIndex __num_threads =
00103 std::min<_DifferenceType>(__get_max_threads(), __n - 1);
00104
00105 if (__num_threads < 2)
00106 {
00107 *__result = *__begin;
00108 return __parallel_partial_sum_basecase(__begin + 1, __end,
00109 __result + 1, __bin_op,
00110 *__begin);
00111 }
00112
00113 _DifferenceType* __borders;
00114 _ValueType* __sums;
00115
00116 const _Settings& __s = _Settings::get();
00117
00118 # pragma omp parallel num_threads(__num_threads)
00119 {
00120 # pragma omp single
00121 {
00122 __num_threads = omp_get_num_threads();
00123
00124 __borders = new _DifferenceType[__num_threads + 2];
00125
00126 if (__s.partial_sum_dilation == 1.0f)
00127 equally_split(__n, __num_threads + 1, __borders);
00128 else
00129 {
00130 _DifferenceType __chunk_length =
00131 ((double)__n
00132 / ((double)__num_threads + __s.partial_sum_dilation)),
00133 __borderstart = __n - __num_threads * __chunk_length;
00134 __borders[0] = 0;
00135 for (_ThreadIndex __i = 1; __i < (__num_threads + 1); ++__i)
00136 {
00137 __borders[__i] = __borderstart;
00138 __borderstart += __chunk_length;
00139 }
00140 __borders[__num_threads + 1] = __n;
00141 }
00142
00143 __sums = static_cast<_ValueType*>(::operator new(sizeof(_ValueType)
00144 * __num_threads));
00145 _OutputIterator __target_end;
00146 }
00147
00148 _ThreadIndex __iam = omp_get_thread_num();
00149 if (__iam == 0)
00150 {
00151 *__result = *__begin;
00152 __parallel_partial_sum_basecase(__begin + 1,
00153 __begin + __borders[1],
00154 __result + 1,
00155 __bin_op, *__begin);
00156 ::new(&(__sums[__iam])) _ValueType(*(__result + __borders[1] - 1));
00157 }
00158 else
00159 {
00160 ::new(&(__sums[__iam]))
00161 _ValueType(__gnu_parallel::accumulate(
00162 __begin + __borders[__iam] + 1,
00163 __begin + __borders[__iam + 1],
00164 *(__begin + __borders[__iam]),
00165 __bin_op,
00166 __gnu_parallel::sequential_tag()));
00167 }
00168
00169 # pragma omp barrier
00170
00171 # pragma omp single
00172 __parallel_partial_sum_basecase(__sums + 1, __sums + __num_threads,
00173 __sums + 1, __bin_op, __sums[0]);
00174
00175 # pragma omp barrier
00176
00177
00178 __parallel_partial_sum_basecase(__begin + __borders[__iam + 1],
00179 __begin + __borders[__iam + 2],
00180 __result + __borders[__iam + 1],
00181 __bin_op, __sums[__iam]);
00182 }
00183
00184 ::operator delete(__sums);
00185 delete[] __borders;
00186
00187 return __result + __n;
00188 }
00189
00190
00191
00192
00193
00194
00195
00196 template<typename _IIter,
00197 typename _OutputIterator,
00198 typename _BinaryOperation>
00199 _OutputIterator
00200 __parallel_partial_sum(_IIter __begin, _IIter __end,
00201 _OutputIterator __result, _BinaryOperation __bin_op)
00202 {
00203 _GLIBCXX_CALL(__begin - __end)
00204
00205 typedef std::iterator_traits<_IIter> _TraitsType;
00206 typedef typename _TraitsType::value_type _ValueType;
00207 typedef typename _TraitsType::difference_type _DifferenceType;
00208
00209 _DifferenceType __n = __end - __begin;
00210
00211 switch (_Settings::get().partial_sum_algorithm)
00212 {
00213 case LINEAR:
00214
00215 return __parallel_partial_sum_linear(__begin, __end, __result,
00216 __bin_op, __n);
00217 default:
00218
00219 _GLIBCXX_PARALLEL_ASSERT(0);
00220 return __result + __n;
00221 }
00222 }
00223 }
00224
00225 #endif