partial_sum.h

Go to the documentation of this file.
00001 // -*- C++ -*-
00002 
00003 // Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
00004 //
00005 // This file is part of the GNU ISO C++ Library.  This library is free
00006 // software; you can redistribute it and/or modify it under the terms
00007 // of the GNU General Public License as published by the Free Software
00008 // Foundation; either version 3, or (at your option) any later
00009 // version.
00010 
00011 // This library is distributed in the hope that it will be useful, but
00012 // WITHOUT ANY WARRANTY; without even the implied warranty of
00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014 // General Public License for more details.
00015 
00016 // Under Section 7 of GPL version 3, you are granted additional
00017 // permissions described in the GCC Runtime Library Exception, version
00018 // 3.1, as published by the Free Software Foundation.
00019 
00020 // You should have received a copy of the GNU General Public License and
00021 // a copy of the GCC Runtime Library Exception along with this program;
00022 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
00023 // <http://www.gnu.org/licenses/>.
00024 
00025 /** @file parallel/partial_sum.h
00026  *  @brief Parallel implementation of std::partial_sum(), i.e. prefix
00027 *  sums.
00028  *  This file is a GNU parallel extension to the Standard C++ Library.
00029  */
00030 
00031 // Written by Johannes Singler.
00032 
00033 #ifndef _GLIBCXX_PARALLEL_PARTIAL_SUM_H
00034 #define _GLIBCXX_PARALLEL_PARTIAL_SUM_H 1
00035 
00036 #include <omp.h>
00037 #include <new>
00038 #include <bits/stl_algobase.h>
00039 #include <parallel/parallel.h>
00040 #include <parallel/numericfwd.h>
00041 
00042 namespace __gnu_parallel
00043 {
00044   // Problem: there is no 0-element given.
00045 
00046   /** @brief Base case prefix sum routine.
00047    *  @param __begin Begin iterator of input sequence.
00048    *  @param __end End iterator of input sequence.
00049    *  @param __result Begin iterator of output sequence.
00050    *  @param __bin_op Associative binary function.
00051    *  @param __value Start value. Must be passed since the neutral
00052    *  element is unknown in general.
00053    *  @return End iterator of output sequence. */
00054   template<typename _IIter,
00055        typename _OutputIterator,
00056        typename _BinaryOperation>
00057     _OutputIterator
00058     __parallel_partial_sum_basecase(_IIter __begin, _IIter __end,
00059                     _OutputIterator __result,
00060                     _BinaryOperation __bin_op,
00061       typename std::iterator_traits <_IIter>::value_type __value)
00062     {
00063       if (__begin == __end)
00064     return __result;
00065 
00066       while (__begin != __end)
00067     {
00068       __value = __bin_op(__value, *__begin);
00069       *__result = __value;
00070       ++__result;
00071       ++__begin;
00072     }
00073       return __result;
00074     }
00075 
00076   /** @brief Parallel partial sum implementation, two-phase approach,
00077       no recursion.
00078       *  @param __begin Begin iterator of input sequence.
00079       *  @param __end End iterator of input sequence.
00080       *  @param __result Begin iterator of output sequence.
00081       *  @param __bin_op Associative binary function.
00082       *  @param __n Length of sequence.
00083       *  @param __num_threads Number of threads to use.
00084       *  @return End iterator of output sequence.
00085       */
00086   template<typename _IIter,
00087        typename _OutputIterator,
00088        typename _BinaryOperation>
00089     _OutputIterator
00090     __parallel_partial_sum_linear(_IIter __begin, _IIter __end,
00091                   _OutputIterator __result,
00092                   _BinaryOperation __bin_op,
00093       typename std::iterator_traits<_IIter>::difference_type __n)
00094     {
00095       typedef std::iterator_traits<_IIter> _TraitsType;
00096       typedef typename _TraitsType::value_type _ValueType;
00097       typedef typename _TraitsType::difference_type _DifferenceType;
00098 
00099       if (__begin == __end)
00100     return __result;
00101 
00102       _ThreadIndex __num_threads =
00103         std::min<_DifferenceType>(__get_max_threads(), __n - 1);
00104 
00105       if (__num_threads < 2)
00106     {
00107       *__result = *__begin;
00108       return __parallel_partial_sum_basecase(__begin + 1, __end,
00109                          __result + 1, __bin_op,
00110                          *__begin);
00111     }
00112 
00113       _DifferenceType* __borders;
00114       _ValueType* __sums;
00115 
00116       const _Settings& __s = _Settings::get();
00117 
00118 #     pragma omp parallel num_threads(__num_threads)
00119       {
00120 #       pragma omp single
00121     {
00122       __num_threads = omp_get_num_threads();
00123         
00124       __borders = new _DifferenceType[__num_threads + 2];
00125 
00126       if (__s.partial_sum_dilation == 1.0f)
00127         equally_split(__n, __num_threads + 1, __borders);
00128       else
00129         {
00130           _DifferenceType __chunk_length =
00131         ((double)__n
00132          / ((double)__num_threads + __s.partial_sum_dilation)),
00133         __borderstart = __n - __num_threads * __chunk_length;
00134           __borders[0] = 0;
00135           for (_ThreadIndex __i = 1; __i < (__num_threads + 1); ++__i)
00136         {
00137           __borders[__i] = __borderstart;
00138           __borderstart += __chunk_length;
00139         }
00140           __borders[__num_threads + 1] = __n;
00141         }
00142 
00143       __sums = static_cast<_ValueType*>(::operator new(sizeof(_ValueType)
00144                                                            * __num_threads));
00145       _OutputIterator __target_end;
00146     } //single
00147 
00148         _ThreadIndex __iam = omp_get_thread_num();
00149         if (__iam == 0)
00150           {
00151             *__result = *__begin;
00152             __parallel_partial_sum_basecase(__begin + 1,
00153                         __begin + __borders[1],
00154                         __result + 1,
00155                         __bin_op, *__begin);
00156             ::new(&(__sums[__iam])) _ValueType(*(__result + __borders[1] - 1));
00157           }
00158         else
00159           {
00160             ::new(&(__sums[__iam]))
00161               _ValueType(__gnu_parallel::accumulate(
00162                                          __begin + __borders[__iam] + 1,
00163                                          __begin + __borders[__iam + 1],
00164                                          *(__begin + __borders[__iam]),
00165                                          __bin_op,
00166                                          __gnu_parallel::sequential_tag()));
00167           }
00168 
00169 #       pragma omp barrier
00170 
00171 #       pragma omp single
00172     __parallel_partial_sum_basecase(__sums + 1, __sums + __num_threads,
00173                     __sums + 1, __bin_op, __sums[0]);
00174 
00175 #       pragma omp barrier
00176 
00177     // Still same team.
00178         __parallel_partial_sum_basecase(__begin + __borders[__iam + 1],
00179                     __begin + __borders[__iam + 2],
00180                     __result + __borders[__iam + 1],
00181                     __bin_op, __sums[__iam]);
00182       } //parallel
00183 
00184       ::operator delete(__sums);
00185       delete[] __borders;
00186 
00187       return __result + __n;
00188     }
00189 
00190   /** @brief Parallel partial sum front-__end.
00191    *  @param __begin Begin iterator of input sequence.
00192    *  @param __end End iterator of input sequence.
00193    *  @param __result Begin iterator of output sequence.
00194    *  @param __bin_op Associative binary function.
00195    *  @return End iterator of output sequence. */
00196   template<typename _IIter,
00197        typename _OutputIterator,
00198        typename _BinaryOperation>
00199     _OutputIterator
00200     __parallel_partial_sum(_IIter __begin, _IIter __end,
00201                _OutputIterator __result, _BinaryOperation __bin_op)
00202     {
00203       _GLIBCXX_CALL(__begin - __end)
00204 
00205       typedef std::iterator_traits<_IIter> _TraitsType;
00206       typedef typename _TraitsType::value_type _ValueType;
00207       typedef typename _TraitsType::difference_type _DifferenceType;
00208 
00209       _DifferenceType __n = __end - __begin;
00210 
00211       switch (_Settings::get().partial_sum_algorithm)
00212     {
00213     case LINEAR:
00214       // Need an initial offset.
00215       return __parallel_partial_sum_linear(__begin, __end, __result,
00216                            __bin_op, __n);
00217     default:
00218       // Partial_sum algorithm not implemented.
00219       _GLIBCXX_PARALLEL_ASSERT(0);
00220       return __result + __n;
00221     }
00222     }
00223 }
00224 
00225 #endif /* _GLIBCXX_PARALLEL_PARTIAL_SUM_H */