00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033 #ifndef _GLIBCXX_PARALLEL_PARTIAL_SUM_H
00034 #define _GLIBCXX_PARALLEL_PARTIAL_SUM_H 1
00035
00036 #include <omp.h>
00037 #include <new>
00038 #include <bits/stl_algobase.h>
00039 #include <parallel/parallel.h>
00040 #include <parallel/numericfwd.h>
00041
00042 namespace __gnu_parallel
00043 {
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054 template<typename _IIter,
00055 typename _OutputIterator,
00056 typename _BinaryOperation>
00057 _OutputIterator
00058 __parallel_partial_sum_basecase(_IIter __begin, _IIter __end,
00059 _OutputIterator __result,
00060 _BinaryOperation __bin_op,
00061 typename std::iterator_traits <_IIter>::value_type __value)
00062 {
00063 if (__begin == __end)
00064 return __result;
00065
00066 while (__begin != __end)
00067 {
00068 __value = __bin_op(__value, *__begin);
00069 *__result = __value;
00070 ++__result;
00071 ++__begin;
00072 }
00073 return __result;
00074 }
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086 template<typename _IIter,
00087 typename _OutputIterator,
00088 typename _BinaryOperation>
00089 _OutputIterator
00090 __parallel_partial_sum_linear(_IIter __begin, _IIter __end,
00091 _OutputIterator __result,
00092 _BinaryOperation __bin_op,
00093 typename std::iterator_traits<_IIter>::difference_type __n)
00094 {
00095 typedef std::iterator_traits<_IIter> _TraitsType;
00096 typedef typename _TraitsType::value_type _ValueType;
00097 typedef typename _TraitsType::difference_type _DifferenceType;
00098
00099 if (__begin == __end)
00100 return __result;
00101
00102 _ThreadIndex __num_threads =
00103 std::min<_DifferenceType>(__get_max_threads(), __n - 1);
00104
00105 if (__num_threads < 2)
00106 {
00107 *__result = *__begin;
00108 return __parallel_partial_sum_basecase(__begin + 1, __end,
00109 __result + 1, __bin_op,
00110 *__begin);
00111 }
00112
00113 _DifferenceType* __borders;
00114 _ValueType* __sums;
00115
00116 const _Settings& __s = _Settings::get();
00117
00118 # pragma omp parallel num_threads(__num_threads)
00119 {
00120 # pragma omp single
00121 {
00122 __num_threads = omp_get_num_threads();
00123
00124 __borders = new _DifferenceType[__num_threads + 2];
00125
00126 if (__s.partial_sum_dilation == 1.0f)
00127 equally_split(__n, __num_threads + 1, __borders);
00128 else
00129 {
00130 _DifferenceType __chunk_length =
00131 ((double)__n
00132 / ((double)__num_threads + __s.partial_sum_dilation)),
00133 __borderstart = __n - __num_threads * __chunk_length;
00134 __borders[0] = 0;
00135 for (_ThreadIndex __i = 1; __i < (__num_threads + 1); ++__i)
00136 {
00137 __borders[__i] = __borderstart;
00138 __borderstart += __chunk_length;
00139 }
00140 __borders[__num_threads + 1] = __n;
00141 }
00142
00143 __sums = static_cast<_ValueType*>(::operator new(sizeof(_ValueType)
00144 * __num_threads));
00145 _OutputIterator __target_end;
00146 }
00147
00148 _ThreadIndex __iam = omp_get_thread_num();
00149 if (__iam == 0)
00150 {
00151 *__result = *__begin;
00152 __parallel_partial_sum_basecase(__begin + 1,
00153 __begin + __borders[1],
00154 __result + 1,
00155 __bin_op, *__begin);
00156 ::new(&(__sums[__iam])) _ValueType(*(__result + __borders[1] - 1));
00157 }
00158 else
00159 {
00160 ::new(&(__sums[__iam]))
00161 _ValueType(std::accumulate(__begin + __borders[__iam] + 1,
00162 __begin + __borders[__iam + 1],
00163 *(__begin + __borders[__iam]),
00164 __bin_op,
00165 __gnu_parallel::sequential_tag()));
00166 }
00167
00168 # pragma omp barrier
00169
00170 # pragma omp single
00171 __parallel_partial_sum_basecase(__sums + 1, __sums + __num_threads,
00172 __sums + 1, __bin_op, __sums[0]);
00173
00174 # pragma omp barrier
00175
00176
00177 __parallel_partial_sum_basecase(__begin + __borders[__iam + 1],
00178 __begin + __borders[__iam + 2],
00179 __result + __borders[__iam + 1],
00180 __bin_op, __sums[__iam]);
00181 }
00182
00183 ::operator delete(__sums);
00184 delete[] __borders;
00185
00186 return __result + __n;
00187 }
00188
00189
00190
00191
00192
00193
00194
00195 template<typename _IIter,
00196 typename _OutputIterator,
00197 typename _BinaryOperation>
00198 _OutputIterator
00199 __parallel_partial_sum(_IIter __begin, _IIter __end,
00200 _OutputIterator __result, _BinaryOperation __bin_op)
00201 {
00202 _GLIBCXX_CALL(__begin - __end)
00203
00204 typedef std::iterator_traits<_IIter> _TraitsType;
00205 typedef typename _TraitsType::value_type _ValueType;
00206 typedef typename _TraitsType::difference_type _DifferenceType;
00207
00208 _DifferenceType __n = __end - __begin;
00209
00210 switch (_Settings::get().partial_sum_algorithm)
00211 {
00212 case LINEAR:
00213
00214 return __parallel_partial_sum_linear(__begin, __end, __result,
00215 __bin_op, __n);
00216 default:
00217
00218 _GLIBCXX_PARALLEL_ASSERT(0);
00219 return __result + __n;
00220 }
00221 }
00222 }
00223
00224 #endif