Commit 15ac3c72 by Johannes Singler Committed by Johannes Singler

algobase.h: Replace tabs by spaces; correct line breaks.

2009-09-17  Johannes Singler  <singler@ira.uka.de>

        * include/parallel/algobase.h: Replace tabs by spaces;
        correct line breaks.
        * include/parallel/algorithmfwd.h: Likewise.
        * include/parallel/balanced_quicksort.h: Likewise.
        * include/parallel/base.h: Likewise.
        * include/parallel/checkers.h: Likewise.
        * include/parallel/compatibility.h: Likewise.
        * include/parallel/equally_split.h: Likewise.
        * include/parallel/find.h: Likewise.
        * include/parallel/for_each.h: Likewise.
        * include/parallel/for_each_selectors.h: Likewise.
        * include/parallel/iterator.h: Likewise.
        * include/parallel/list_partition.h: Likewise.
        * include/parallel/losertree.h: Likewise.
        * include/parallel/merge.h: Likewise.
        * include/parallel/multiseq_selection.h: Likewise.
        * include/parallel/multiway_merge.h: Likewise.
        * include/parallel/multiway_mergesort.h: Likewise.
        * include/parallel/numeric: Likewise.
        * include/parallel/numericfwd.h: Likewise.
        * include/parallel/omp_loop.h: Likewise.
        * include/parallel/omp_loop_static.h: Likewise.
        * include/parallel/par_loop.h: Likewise.
        * include/parallel/partial_sum.h: Likewise.
        * include/parallel/partition.h: Likewise.
        * include/parallel/queue.h: Likewise.
        * include/parallel/quicksort.h: Likewise.
        * include/parallel/random_number.h: Likewise.
        * include/parallel/random_shuffle.h: Likewise.
        * include/parallel/search.h: Likewise.
        * include/parallel/set_operations.h: Likewise.
        * include/parallel/settings.h: Likewise.
        * include/parallel/sort.h: Likewise.
        * include/parallel/types.h: Likewise.
        * include/parallel/unique_copy.h: Likewise.
        * include/parallel/workstealing.h: Likewise.
        * include/parallel/algo.h: Likewise;
        shorten _ForwardIterator to _FIterator.
        * include/parallel/find_selectors.h: Likewise.

From-SVN: r151791
parent 10e154df
2009-09-17 Johannes Singler <singler@ira.uka.de>
* include/parallel/algobase.h: Replace tabs by spaces;
correct line breaks.
* include/parallel/algorithmfwd.h: Likewise.
* include/parallel/balanced_quicksort.h: Likewise.
* include/parallel/base.h: Likewise.
* include/parallel/checkers.h: Likewise.
* include/parallel/compatibility.h: Likewise.
* include/parallel/equally_split.h: Likewise.
* include/parallel/find.h: Likewise.
* include/parallel/for_each.h: Likewise.
* include/parallel/for_each_selectors.h: Likewise.
* include/parallel/iterator.h: Likewise.
* include/parallel/list_partition.h: Likewise.
* include/parallel/losertree.h: Likewise.
* include/parallel/merge.h: Likewise.
* include/parallel/multiseq_selection.h: Likewise.
* include/parallel/multiway_merge.h: Likewise.
* include/parallel/multiway_mergesort.h: Likewise.
* include/parallel/numeric: Likewise.
* include/parallel/numericfwd.h: Likewise.
* include/parallel/omp_loop.h: Likewise.
* include/parallel/omp_loop_static.h: Likewise.
* include/parallel/par_loop.h: Likewise.
* include/parallel/partial_sum.h: Likewise.
* include/parallel/partition.h: Likewise.
* include/parallel/queue.h: Likewise.
* include/parallel/quicksort.h: Likewise.
* include/parallel/random_number.h: Likewise.
* include/parallel/random_shuffle.h: Likewise.
* include/parallel/search.h: Likewise.
* include/parallel/set_operations.h: Likewise.
* include/parallel/settings.h: Likewise.
* include/parallel/sort.h: Likewise.
* include/parallel/types.h: Likewise.
* include/parallel/unique_copy.h: Likewise.
* include/parallel/workstealing.h: Likewise.
* include/parallel/algo.h: Likewise;
shorten _ForwardIterator to _FIterator.
* include/parallel/find_selectors.h: Likewise.
2009-09-16 Johannes Singler <singler@ira.uka.de> 2009-09-16 Johannes Singler <singler@ira.uka.de>
* include/parallel/base.h: Correct some comments accidentally changed * include/parallel/base.h: Correct some comments accidentally changed
......
...@@ -108,19 +108,25 @@ template<typename _RAIter, typename _Compare> ...@@ -108,19 +108,25 @@ template<typename _RAIter, typename _Compare>
_RAIter __pivot_pos = _RAIter __pivot_pos =
__median_of_three_iterators(__begin, __begin + (__end - __begin) / 2, __median_of_three_iterators(__begin, __begin + (__end - __begin) / 2,
__end - 1, __comp); __end - 1, __comp);
#if defined(_GLIBCXX_ASSERTIONS) #if defined(_GLIBCXX_ASSERTIONS)
// Must be in between somewhere. // Must be in between somewhere.
_DifferenceType __n = __end - __begin; _DifferenceType __n = __end - __begin;
_GLIBCXX_PARALLEL_ASSERT( _GLIBCXX_PARALLEL_ASSERT(
(!__comp(*__pivot_pos, *__begin) && !__comp(*(__begin + __n / 2), *__pivot_pos)) (!__comp(*__pivot_pos, *__begin) &&
|| (!__comp(*__pivot_pos, *__begin) && !__comp(*(__end - 1), *__pivot_pos)) !__comp(*(__begin + __n / 2), *__pivot_pos))
|| (!__comp(*__pivot_pos, *(__begin + __n / 2)) && !__comp(*__begin, *__pivot_pos)) || (!__comp(*__pivot_pos, *__begin) &&
|| (!__comp(*__pivot_pos, *(__begin + __n / 2)) && !__comp(*(__end - 1), *__pivot_pos)) !__comp(*(__end - 1), *__pivot_pos))
|| (!__comp(*__pivot_pos, *(__end - 1)) && !__comp(*__begin, *__pivot_pos)) || (!__comp(*__pivot_pos, *(__begin + __n / 2)) &&
|| (!__comp(*__pivot_pos, *(__end - 1)) && !__comp(*(__begin + __n / 2), *__pivot_pos))); !__comp(*__begin, *__pivot_pos))
|| (!__comp(*__pivot_pos, *(__begin + __n / 2)) &&
!__comp(*(__end - 1), *__pivot_pos))
|| (!__comp(*__pivot_pos, *(__end - 1)) &&
!__comp(*__begin, *__pivot_pos))
|| (!__comp(*__pivot_pos, *(__end - 1)) &&
!__comp(*(__begin + __n / 2), *__pivot_pos)));
#endif #endif
// Swap pivot value to end. // Swap pivot value to end.
...@@ -183,15 +189,17 @@ template<typename _RAIter, typename _Compare> ...@@ -183,15 +189,17 @@ template<typename _RAIter, typename _Compare>
} }
// Divide step. // Divide step.
_DifferenceType __split_pos = __qsb_divide(__begin, __end, __comp, __num_threads); _DifferenceType __split_pos =
__qsb_divide(__begin, __end, __comp, __num_threads);
#if _GLIBCXX_ASSERTIONS #if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(0 <= __split_pos && __split_pos < (__end - __begin)); _GLIBCXX_PARALLEL_ASSERT(0 <= __split_pos &&
__split_pos < (__end - __begin));
#endif #endif
_ThreadIndex __num_threads_leftside = _ThreadIndex __num_threads_leftside =
std::max<_ThreadIndex>(1, std::min<_ThreadIndex>( std::max<_ThreadIndex>(1, std::min<_ThreadIndex>(
__num_threads - 1, __split_pos * __num_threads / __n)); __num_threads - 1, __split_pos * __num_threads / __n));
# pragma omp atomic # pragma omp atomic
*__tls[__iam]->_M_elements_leftover -= (_DifferenceType)1; *__tls[__iam]->_M_elements_leftover -= (_DifferenceType)1;
...@@ -284,11 +292,13 @@ template<typename _RAIter, typename _Compare> ...@@ -284,11 +292,13 @@ template<typename _RAIter, typename _Compare>
// Divide, leave pivot unchanged in last place. // Divide, leave pivot unchanged in last place.
_RAIter __split_pos1, __split_pos2; _RAIter __split_pos1, __split_pos2;
__split_pos1 = __gnu_sequential::partition(__begin, __end - 1, __pred); __split_pos1 =
__gnu_sequential::partition(__begin, __end - 1, __pred);
// Left side: < __pivot_pos; __right side: >= __pivot_pos. // Left side: < __pivot_pos; __right side: >= __pivot_pos.
#if _GLIBCXX_ASSERTIONS #if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(__begin <= __split_pos1 && __split_pos1 < __end); _GLIBCXX_PARALLEL_ASSERT(__begin <= __split_pos1
&& __split_pos1 < __end);
#endif #endif
// Swap pivot back to middle. // Swap pivot back to middle.
if (__split_pos1 != __pivot_pos) if (__split_pos1 != __pivot_pos)
...@@ -302,14 +312,14 @@ template<typename _RAIter, typename _Compare> ...@@ -302,14 +312,14 @@ template<typename _RAIter, typename _Compare>
// Very unequal split, one part smaller than one 128th // Very unequal split, one part smaller than one 128th
// elements not strictly larger than the pivot. // elements not strictly larger than the pivot.
__gnu_parallel::__unary_negate<__gnu_parallel::__binder1st __gnu_parallel::__unary_negate<__gnu_parallel::__binder1st
<_Compare, _ValueType, _ValueType, bool>, _ValueType> <_Compare, _ValueType, _ValueType, bool>, _ValueType>
__pred(__gnu_parallel::__binder1st __pred(__gnu_parallel::__binder1st
<_Compare, _ValueType, _ValueType, bool>(__comp, <_Compare, _ValueType, _ValueType, bool>(
*__pivot_pos)); __comp, *__pivot_pos));
// Find other end of pivot-equal range. // Find other end of pivot-equal range.
__split_pos2 = __gnu_sequential::partition(__split_pos1 + 1, __split_pos2 = __gnu_sequential::partition(__split_pos1 + 1,
__end, __pred); __end, __pred);
} }
else else
// Only skip the pivot. // Only skip the pivot.
...@@ -325,10 +335,10 @@ template<typename _RAIter, typename _Compare> ...@@ -325,10 +335,10 @@ template<typename _RAIter, typename _Compare>
{ {
// Right side larger. // Right side larger.
if ((__split_pos2) != __end) if ((__split_pos2) != __end)
__tl._M_leftover_parts.push_front(std::make_pair(__split_pos2, __tl._M_leftover_parts.push_front(
__end)); std::make_pair(__split_pos2, __end));
//__current.first = __begin; //already set anyway //__current.first = __begin; //already set anyway
__current.second = __split_pos1; __current.second = __split_pos1;
continue; continue;
} }
...@@ -337,10 +347,10 @@ template<typename _RAIter, typename _Compare> ...@@ -337,10 +347,10 @@ template<typename _RAIter, typename _Compare>
// Left side larger. // Left side larger.
if (__begin != __split_pos1) if (__begin != __split_pos1)
__tl._M_leftover_parts.push_front(std::make_pair(__begin, __tl._M_leftover_parts.push_front(std::make_pair(__begin,
__split_pos1)); __split_pos1));
__current.first = __split_pos2; __current.first = __split_pos2;
//__current.second = __end; //already set anyway //__current.second = __end; //already set anyway
continue; continue;
} }
} }
...@@ -367,10 +377,11 @@ template<typename _RAIter, typename _Compare> ...@@ -367,10 +377,11 @@ template<typename _RAIter, typename _Compare>
// Look for new work. // Look for new work.
bool __successfully_stolen = false; bool __successfully_stolen = false;
while (__wait && *__tl._M_elements_leftover > 0 && !__successfully_stolen while (__wait && *__tl._M_elements_leftover > 0
&& !__successfully_stolen
#if _GLIBCXX_ASSERTIONS #if _GLIBCXX_ASSERTIONS
// Possible dead-lock. // Possible dead-lock.
&& (omp_get_wtime() < (__search_start + 1.0)) && (omp_get_wtime() < (__search_start + 1.0))
#endif #endif
) )
{ {
...@@ -392,7 +403,7 @@ template<typename _RAIter, typename _Compare> ...@@ -392,7 +403,7 @@ template<typename _RAIter, typename _Compare>
{ {
sleep(1); sleep(1);
_GLIBCXX_PARALLEL_ASSERT(omp_get_wtime() _GLIBCXX_PARALLEL_ASSERT(omp_get_wtime()
< (__search_start + 1.0)); < (__search_start + 1.0));
} }
#endif #endif
if (!__successfully_stolen) if (!__successfully_stolen)
...@@ -439,11 +450,13 @@ template<typename _RAIter, typename _Compare> ...@@ -439,11 +450,13 @@ template<typename _RAIter, typename _Compare>
// Initialize thread local storage // Initialize thread local storage
_TLSType** __tls = new _TLSType*[__num_threads]; _TLSType** __tls = new _TLSType*[__num_threads];
_DifferenceType __queue_size = __num_threads * (_ThreadIndex)(log2(__n) + 1); _DifferenceType __queue_size =
__num_threads * (_ThreadIndex)(log2(__n) + 1);
for (_ThreadIndex __t = 0; __t < __num_threads; ++__t) for (_ThreadIndex __t = 0; __t < __num_threads; ++__t)
__tls[__t] = new _QSBThreadLocal<_RAIter>(__queue_size); __tls[__t] = new _QSBThreadLocal<_RAIter>(__queue_size);
// There can never be more than ceil(log2(__n)) ranges on the stack, because // There can never be more than ceil(log2(__n)) ranges on the stack,
// because
// 1. Only one processor pushes onto the stack // 1. Only one processor pushes onto the stack
// 2. The largest range has at most length __n // 2. The largest range has at most length __n
// 3. Each range is larger than half of the range remaining // 3. Each range is larger than half of the range remaining
...@@ -459,13 +472,15 @@ template<typename _RAIter, typename _Compare> ...@@ -459,13 +472,15 @@ template<typename _RAIter, typename _Compare>
} }
// Main recursion call. // Main recursion call.
__qsb_conquer(__tls, __begin, __begin + __n, __comp, 0, __num_threads, true); __qsb_conquer(
__tls, __begin, __begin + __n, __comp, 0, __num_threads, true);
#if _GLIBCXX_ASSERTIONS #if _GLIBCXX_ASSERTIONS
// All stack must be empty. // All stack must be empty.
_Piece __dummy; _Piece __dummy;
for (int __i = 1; __i < __num_threads; ++__i) for (int __i = 1; __i < __num_threads; ++__i)
_GLIBCXX_PARALLEL_ASSERT(!__tls[__i]->_M_leftover_parts.pop_back(__dummy)); _GLIBCXX_PARALLEL_ASSERT(
!__tls[__i]->_M_leftover_parts.pop_back(__dummy));
#endif #endif
for (int __i = 0; __i < __num_threads; ++__i) for (int __i = 0; __i < __num_threads; ++__i)
......
...@@ -119,7 +119,7 @@ template<typename _Size> ...@@ -119,7 +119,7 @@ template<typename _Size>
* @see decode2 * @see decode2
*/ */
inline _CASable inline _CASable
__encode2(int __a, int __b) //must all be non-negative, actually __encode2(int __a, int __b) //must all be non-negative, actually
{ {
return (((_CASable)__a) << (_CASable_bits / 2)) | (((_CASable)__b) << 0); return (((_CASable)__a) << (_CASable_bits / 2)) | (((_CASable)__b) << 0);
} }
...@@ -192,7 +192,7 @@ template<typename _Predicate, typename argument_type> ...@@ -192,7 +192,7 @@ template<typename _Predicate, typename argument_type>
/** @brief Similar to std::__binder1st, /** @brief Similar to std::__binder1st,
* but giving the argument types explicitly. */ * but giving the argument types explicitly. */
template<typename _Operation, typename _FirstArgumentType, template<typename _Operation, typename _FirstArgumentType,
typename _SecondArgumentType, typename _ResultType> typename _SecondArgumentType, typename _ResultType>
class __binder1st class __binder1st
: public std::unary_function<_SecondArgumentType, _ResultType> : public std::unary_function<_SecondArgumentType, _ResultType>
{ {
...@@ -221,7 +221,7 @@ template<typename _Operation, typename _FirstArgumentType, ...@@ -221,7 +221,7 @@ template<typename _Operation, typename _FirstArgumentType,
* explicitly. * explicitly.
*/ */
template<typename _Operation, typename _FirstArgumentType, template<typename _Operation, typename _FirstArgumentType,
typename _SecondArgumentType, typename _ResultType> typename _SecondArgumentType, typename _ResultType>
class binder2nd class binder2nd
: public std::unary_function<_FirstArgumentType, _ResultType> : public std::unary_function<_FirstArgumentType, _ResultType>
{ {
...@@ -281,7 +281,7 @@ template<typename _Tp1, typename _Tp2> ...@@ -281,7 +281,7 @@ template<typename _Tp1, typename _Tp2>
struct _Plus : public std::binary_function<_Tp1, _Tp2, _Tp1> struct _Plus : public std::binary_function<_Tp1, _Tp2, _Tp1>
{ {
typedef __typeof__(*static_cast<_Tp1*>(NULL) typedef __typeof__(*static_cast<_Tp1*>(NULL)
+ *static_cast<_Tp2*>(NULL)) __result; + *static_cast<_Tp2*>(NULL)) __result;
__result __result
operator()(const _Tp1& __x, const _Tp2& __y) const operator()(const _Tp1& __x, const _Tp2& __y) const
...@@ -293,7 +293,7 @@ template<typename _Tp> ...@@ -293,7 +293,7 @@ template<typename _Tp>
struct _Plus<_Tp, _Tp> : public std::binary_function<_Tp, _Tp, _Tp> struct _Plus<_Tp, _Tp> : public std::binary_function<_Tp, _Tp, _Tp>
{ {
typedef __typeof__(*static_cast<_Tp*>(NULL) typedef __typeof__(*static_cast<_Tp*>(NULL)
+ *static_cast<_Tp*>(NULL)) __result; + *static_cast<_Tp*>(NULL)) __result;
__result __result
operator()(const _Tp& __x, const _Tp& __y) const operator()(const _Tp& __x, const _Tp& __y) const
...@@ -306,7 +306,7 @@ template<typename _Tp1, typename _Tp2> ...@@ -306,7 +306,7 @@ template<typename _Tp1, typename _Tp2>
struct _Multiplies : public std::binary_function<_Tp1, _Tp2, _Tp1> struct _Multiplies : public std::binary_function<_Tp1, _Tp2, _Tp1>
{ {
typedef __typeof__(*static_cast<_Tp1*>(NULL) typedef __typeof__(*static_cast<_Tp1*>(NULL)
* *static_cast<_Tp2*>(NULL)) __result; * *static_cast<_Tp2*>(NULL)) __result;
__result __result
operator()(const _Tp1& __x, const _Tp2& __y) const operator()(const _Tp1& __x, const _Tp2& __y) const
...@@ -318,7 +318,7 @@ template<typename _Tp> ...@@ -318,7 +318,7 @@ template<typename _Tp>
struct _Multiplies<_Tp, _Tp> : public std::binary_function<_Tp, _Tp, _Tp> struct _Multiplies<_Tp, _Tp> : public std::binary_function<_Tp, _Tp, _Tp>
{ {
typedef __typeof__(*static_cast<_Tp*>(NULL) typedef __typeof__(*static_cast<_Tp*>(NULL)
* *static_cast<_Tp*>(NULL)) __result; * *static_cast<_Tp*>(NULL)) __result;
__result __result
operator()(const _Tp& __x, const _Tp& __y) const operator()(const _Tp& __x, const _Tp& __y) const
......
...@@ -39,7 +39,8 @@ ...@@ -39,7 +39,8 @@
namespace __gnu_parallel namespace __gnu_parallel
{ {
/** /**
* @brief Check whether @__c [__begin, @__c __end) is sorted according to @__c __comp. * @brief Check whether @__c [__begin, @__c __end) is sorted according
* to @__c __comp.
* @param __begin Begin iterator of sequence. * @param __begin Begin iterator of sequence.
* @param __end End iterator of sequence. * @param __end End iterator of sequence.
* @param __comp Comparator. * @param __comp Comparator.
...@@ -49,33 +50,34 @@ namespace __gnu_parallel ...@@ -49,33 +50,34 @@ namespace __gnu_parallel
template<typename _IIter, typename _Compare> template<typename _IIter, typename _Compare>
bool bool
__is_sorted(_IIter __begin, _IIter __end, __is_sorted(_IIter __begin, _IIter __end,
_Compare __comp _Compare __comp
= std::less<typename std::iterator_traits<_IIter>:: = std::less<typename std::iterator_traits<_IIter>::
_ValueType>()) _ValueType>())
{ {
if (__begin == __end) if (__begin == __end)
return true; return true;
_IIter __current(__begin), __recent(__begin); _IIter __current(__begin), __recent(__begin);
unsigned long long __position = 1; unsigned long long __position = 1;
for (__current++; __current != __end; __current++) for (__current++; __current != __end; __current++)
{ {
if (__comp(*__current, *__recent)) if (__comp(*__current, *__recent))
{ {
printf("__is_sorted: check failed before position %__i.\n", printf("__is_sorted: check failed before position %__i.\n",
__position); __position);
return false; return false;
} }
__recent = __current; __recent = __current;
__position++; __position++;
} }
return true; return true;
} }
/** /**
* @brief Check whether @__c [__begin, @__c __end) is sorted according to @__c __comp. * @brief Check whether @__c [__begin, @__c __end) is sorted according to
* @__c __comp.
* Prints the position in case an unordered pair is found. * Prints the position in case an unordered pair is found.
* @param __begin Begin iterator of sequence. * @param __begin Begin iterator of sequence.
* @param __end End iterator of sequence. * @param __end End iterator of sequence.
...@@ -87,36 +89,37 @@ namespace __gnu_parallel ...@@ -87,36 +89,37 @@ namespace __gnu_parallel
template<typename _IIter, typename _Compare> template<typename _IIter, typename _Compare>
bool bool
is_sorted_failure(_IIter __begin, _IIter __end, is_sorted_failure(_IIter __begin, _IIter __end,
_IIter& __first_failure, _IIter& __first_failure,
_Compare __comp _Compare __comp
= std::less<typename std::iterator_traits<_IIter>:: = std::less<typename std::iterator_traits<_IIter>::
_ValueType>()) _ValueType>())
{ {
if (__begin == __end) if (__begin == __end)
return true; return true;
_IIter __current(__begin), __recent(__begin); _IIter __current(__begin), __recent(__begin);
unsigned long long __position = 1; unsigned long long __position = 1;
for (__current++; __current != __end; __current++) for (__current++; __current != __end; __current++)
{ {
if (__comp(*__current, *__recent)) if (__comp(*__current, *__recent))
{ {
__first_failure = __current; __first_failure = __current;
printf("__is_sorted: check failed before position %lld.\n", printf("__is_sorted: check failed before position %lld.\n",
__position); __position);
return false; return false;
} }
__recent = __current; __recent = __current;
__position++; __position++;
} }
__first_failure = __end; __first_failure = __end;
return true; return true;
} }
/** /**
* @brief Check whether @__c [__begin, @__c __end) is sorted according to @__c __comp. * @brief Check whether @__c [__begin, @__c __end) is sorted according to
* @__c __comp.
* Prints all unordered pair, including the surrounding two elements. * Prints all unordered pair, including the surrounding two elements.
* @param __begin Begin iterator of sequence. * @param __begin Begin iterator of sequence.
* @param __end End iterator of sequence. * @param __end End iterator of sequence.
...@@ -127,26 +130,26 @@ namespace __gnu_parallel ...@@ -127,26 +130,26 @@ namespace __gnu_parallel
bool bool
// XXX Compare default template argument // XXX Compare default template argument
is_sorted_print_failures(_IIter __begin, _IIter __end, is_sorted_print_failures(_IIter __begin, _IIter __end,
_Compare __comp _Compare __comp
= std::less<typename std::iterator_traits = std::less<typename std::iterator_traits
<_IIter>::value_type>()) <_IIter>::value_type>())
{ {
if (__begin == __end) if (__begin == __end)
return true; return true;
_IIter __recent(__begin); _IIter __recent(__begin);
bool __ok = true; bool __ok = true;
for (_IIter __pos(__begin + 1); __pos != __end; __pos++) for (_IIter __pos(__begin + 1); __pos != __end; __pos++)
{ {
if (__comp(*__pos, *__recent)) if (__comp(*__pos, *__recent))
{ {
printf("%ld: %d %d %d %d\n", __pos - __begin, *(__pos - 2), printf("%ld: %d %d %d %d\n", __pos - __begin, *(__pos - 2),
*(__pos- 1), *__pos, *(__pos + 1)); *(__pos- 1), *__pos, *(__pos + 1));
__ok = false; __ok = false;
} }
__recent = __pos; __recent = __pos;
} }
return __ok; return __ok;
} }
} }
......
...@@ -65,9 +65,9 @@ namespace __gnu_parallel ...@@ -65,9 +65,9 @@ namespace __gnu_parallel
int32 __faa32(int32* __x, int32 __inc) int32 __faa32(int32* __x, int32 __inc)
{ {
asm volatile("lock xadd %0,%1" asm volatile("lock xadd %0,%1"
: "=__r" (__inc), "=__m" (*__x) : "=__r" (__inc), "=__m" (*__x)
: "0" (__inc) : "0" (__inc)
: "memory"); : "memory");
return __inc; return __inc;
} }
#if defined(__x86_64) #if defined(__x86_64)
...@@ -75,9 +75,9 @@ namespace __gnu_parallel ...@@ -75,9 +75,9 @@ namespace __gnu_parallel
int64 __faa64(int64* __x, int64 __inc) int64 __faa64(int64* __x, int64 __inc)
{ {
asm volatile("lock xadd %0,%1" asm volatile("lock xadd %0,%1"
: "=__r" (__inc), "=__m" (*__x) : "=__r" (__inc), "=__m" (*__x)
: "0" (__inc) : "0" (__inc)
: "memory"); : "memory");
return __inc; return __inc;
} }
#endif #endif
...@@ -94,25 +94,25 @@ namespace __gnu_parallel ...@@ -94,25 +94,25 @@ namespace __gnu_parallel
inline int32 inline int32
__fetch_and_add_32(volatile int32* __ptr, int32 __addend) __fetch_and_add_32(volatile int32* __ptr, int32 __addend)
{ {
#if defined(__ICC) //x86 version #if defined(__ICC) //x86 version
return _InterlockedExchangeAdd((void*)__ptr, __addend); return _InterlockedExchangeAdd((void*)__ptr, __addend);
#elif defined(__ECC) //IA-64 version #elif defined(__ECC) //IA-64 version
return _InterlockedExchangeAdd((void*)__ptr, __addend); return _InterlockedExchangeAdd((void*)__ptr, __addend);
#elif defined(__ICL) || defined(_MSC_VER) #elif defined(__ICL) || defined(_MSC_VER)
return _InterlockedExchangeAdd(reinterpret_cast<volatile long*>(__ptr), return _InterlockedExchangeAdd(reinterpret_cast<volatile long*>(__ptr),
__addend); __addend);
#elif defined(__GNUC__) #elif defined(__GNUC__)
return __sync_fetch_and_add(__ptr, __addend); return __sync_fetch_and_add(__ptr, __addend);
#elif defined(__SUNPRO_CC) && defined(__sparc) #elif defined(__SUNPRO_CC) && defined(__sparc)
volatile int32 __before, __after; volatile int32 __before, __after;
do do
{ {
__before = *__ptr; __before = *__ptr;
__after = __before + __addend; __after = __before + __addend;
} while (atomic_cas_32((volatile unsigned int*)__ptr, __before, } while (atomic_cas_32((volatile unsigned int*)__ptr, __before,
__after) != __before); __after) != __before);
return __before; return __before;
#else //fallback, slow #else //fallback, slow
#pragma message("slow __fetch_and_add_32") #pragma message("slow __fetch_and_add_32")
int32 __res; int32 __res;
#pragma omp critical #pragma omp critical
...@@ -133,32 +133,32 @@ namespace __gnu_parallel ...@@ -133,32 +133,32 @@ namespace __gnu_parallel
inline int64 inline int64
__fetch_and_add_64(volatile int64* __ptr, int64 __addend) __fetch_and_add_64(volatile int64* __ptr, int64 __addend)
{ {
#if defined(__ICC) && defined(__x86_64) //x86 version #if defined(__ICC) && defined(__x86_64) //x86 version
return __faa64<int>((int64*)__ptr, __addend); return __faa64<int>((int64*)__ptr, __addend);
#elif defined(__ECC) //IA-64 version #elif defined(__ECC) //IA-64 version
return _InterlockedExchangeAdd64((void*)__ptr, __addend); return _InterlockedExchangeAdd64((void*)__ptr, __addend);
#elif defined(__ICL) || defined(_MSC_VER) #elif defined(__ICL) || defined(_MSC_VER)
#ifndef _WIN64 #ifndef _WIN64
_GLIBCXX_PARALLEL_ASSERT(false); //not available in this case _GLIBCXX_PARALLEL_ASSERT(false); //not available in this case
return 0; return 0;
#else #else
return _InterlockedExchangeAdd64(__ptr, __addend); return _InterlockedExchangeAdd64(__ptr, __addend);
#endif #endif
#elif defined(__GNUC__) && defined(__x86_64) #elif defined(__GNUC__) && defined(__x86_64)
return __sync_fetch_and_add(__ptr, __addend); return __sync_fetch_and_add(__ptr, __addend);
#elif defined(__GNUC__) && defined(__i386) && \ #elif defined(__GNUC__) && defined(__i386) && \
(defined(__i686) || defined(__pentium4) || defined(__athlon)) (defined(__i686) || defined(__pentium4) || defined(__athlon))
return __sync_fetch_and_add(__ptr, __addend); return __sync_fetch_and_add(__ptr, __addend);
#elif defined(__SUNPRO_CC) && defined(__sparc) #elif defined(__SUNPRO_CC) && defined(__sparc)
volatile int64 __before, __after; volatile int64 __before, __after;
do do
{ {
__before = *__ptr; __before = *__ptr;
__after = __before + __addend; __after = __before + __addend;
} while (atomic_cas_64((volatile unsigned long long*)__ptr, __before, } while (atomic_cas_64((volatile unsigned long long*)__ptr, __before,
__after) != __before); __after) != __before);
return __before; return __before;
#else //fallback, slow #else //fallback, slow
#if defined(__GNUC__) && defined(__i386) #if defined(__GNUC__) && defined(__i386)
// XXX doesn'__t work with -march=native // XXX doesn'__t work with -march=native
//#warning "please compile with -march=i686 or better" //#warning "please compile with -march=i686 or better"
...@@ -201,9 +201,10 @@ namespace __gnu_parallel ...@@ -201,9 +201,10 @@ namespace __gnu_parallel
{ {
int32 __before; int32 __before;
__asm__ __volatile__("lock; cmpxchgl %1,%2" __asm__ __volatile__("lock; cmpxchgl %1,%2"
: "=a"(__before) : "=a"(__before)
: "q"(__nw), "__m"(*(volatile long long*)(__ptr)), "0"(__old) : "q"(__nw), "__m"(*(volatile long long*)(__ptr)),
: "memory"); "0"(__old)
: "memory");
return __before; return __before;
} }
...@@ -214,9 +215,10 @@ namespace __gnu_parallel ...@@ -214,9 +215,10 @@ namespace __gnu_parallel
{ {
int64 __before; int64 __before;
__asm__ __volatile__("lock; cmpxchgq %1,%2" __asm__ __volatile__("lock; cmpxchgq %1,%2"
: "=a"(__before) : "=a"(__before)
: "q"(__nw), "__m"(*(volatile long long*)(__ptr)), "0"(__old) : "q"(__nw), "__m"(*(volatile long long*)(__ptr)),
: "memory"); "0"(__old)
: "memory");
return __before; return __before;
} }
#endif #endif
...@@ -232,32 +234,35 @@ namespace __gnu_parallel ...@@ -232,32 +234,35 @@ namespace __gnu_parallel
* @param __replacement Replacement value. * @param __replacement Replacement value.
*/ */
inline bool inline bool
__compare_and_swap_32(volatile int32* __ptr, int32 __comparand, int32 __replacement) __compare_and_swap_32(volatile int32* __ptr, int32 __comparand,
int32 __replacement)
{ {
#if defined(__ICC) //x86 version #if defined(__ICC) //x86 version
return _InterlockedCompareExchange((void*)__ptr, __replacement, return _InterlockedCompareExchange((void*)__ptr, __replacement,
__comparand) == __comparand; __comparand) == __comparand;
#elif defined(__ECC) //IA-64 version #elif defined(__ECC) //IA-64 version
return _InterlockedCompareExchange((void*)__ptr, __replacement, return _InterlockedCompareExchange((void*)__ptr, __replacement,
__comparand) == __comparand; __comparand) == __comparand;
#elif defined(__ICL) || defined(_MSC_VER) #elif defined(__ICL) || defined(_MSC_VER)
return _InterlockedCompareExchange(reinterpret_cast<volatile long*>(__ptr), return _InterlockedCompareExchange(
__replacement, __comparand) == __comparand; reinterpret_cast<volatile long*>(__ptr),
__replacement, __comparand)
== __comparand;
#elif defined(__GNUC__) #elif defined(__GNUC__)
return __sync_bool_compare_and_swap(__ptr, __comparand, __replacement); return __sync_bool_compare_and_swap(__ptr, __comparand, __replacement);
#elif defined(__SUNPRO_CC) && defined(__sparc) #elif defined(__SUNPRO_CC) && defined(__sparc)
return atomic_cas_32((volatile unsigned int*)__ptr, __comparand, return atomic_cas_32((volatile unsigned int*)__ptr, __comparand,
__replacement) == __comparand; __replacement) == __comparand;
#else #else
#pragma message("slow __compare_and_swap_32") #pragma message("slow __compare_and_swap_32")
bool __res = false; bool __res = false;
#pragma omp critical #pragma omp critical
{ {
if (*__ptr == __comparand) if (*__ptr == __comparand)
{ {
*__ptr = __replacement; *__ptr = __replacement;
__res = true; __res = true;
} }
} }
return __res; return __res;
#endif #endif
...@@ -272,30 +277,31 @@ namespace __gnu_parallel ...@@ -272,30 +277,31 @@ namespace __gnu_parallel
* @param __replacement Replacement value. * @param __replacement Replacement value.
*/ */
inline bool inline bool
__compare_and_swap_64(volatile int64* __ptr, int64 __comparand, int64 __replacement) __compare_and_swap_64(volatile int64* __ptr, int64 __comparand,
int64 __replacement)
{ {
#if defined(__ICC) && defined(__x86_64) //x86 version #if defined(__ICC) && defined(__x86_64) //x86 version
return __cas64<int>(__ptr, __comparand, __replacement) == __comparand; return __cas64<int>(__ptr, __comparand, __replacement) == __comparand;
#elif defined(__ECC) //IA-64 version #elif defined(__ECC) //IA-64 version
return _InterlockedCompareExchange64((void*)__ptr, __replacement, return _InterlockedCompareExchange64((void*)__ptr, __replacement,
__comparand) == __comparand; __comparand) == __comparand;
#elif defined(__ICL) || defined(_MSC_VER) #elif defined(__ICL) || defined(_MSC_VER)
#ifndef _WIN64 #ifndef _WIN64
_GLIBCXX_PARALLEL_ASSERT(false); //not available in this case _GLIBCXX_PARALLEL_ASSERT(false); //not available in this case
return 0; return 0;
#else #else
return _InterlockedCompareExchange64(__ptr, __replacement, return _InterlockedCompareExchange64(__ptr, __replacement,
__comparand) == __comparand; __comparand) == __comparand;
#endif #endif
#elif defined(__GNUC__) && defined(__x86_64) #elif defined(__GNUC__) && defined(__x86_64)
return __sync_bool_compare_and_swap(__ptr, __comparand, __replacement); return __sync_bool_compare_and_swap(__ptr, __comparand, __replacement);
#elif defined(__GNUC__) && defined(__i386) && \ #elif defined(__GNUC__) && defined(__i386) && \
(defined(__i686) || defined(__pentium4) || defined(__athlon)) (defined(__i686) || defined(__pentium4) || defined(__athlon))
return __sync_bool_compare_and_swap(__ptr, __comparand, __replacement); return __sync_bool_compare_and_swap(__ptr, __comparand, __replacement);
#elif defined(__SUNPRO_CC) && defined(__sparc) #elif defined(__SUNPRO_CC) && defined(__sparc)
return atomic_cas_64((volatile unsigned long long*)__ptr, return atomic_cas_64((volatile unsigned long long*)__ptr,
__comparand, __replacement) == __comparand; __comparand, __replacement) == __comparand;
#else #else
#if defined(__GNUC__) && defined(__i386) #if defined(__GNUC__) && defined(__i386)
// XXX -march=native // XXX -march=native
...@@ -306,10 +312,10 @@ namespace __gnu_parallel ...@@ -306,10 +312,10 @@ namespace __gnu_parallel
#pragma omp critical #pragma omp critical
{ {
if (*__ptr == __comparand) if (*__ptr == __comparand)
{ {
*__ptr = __replacement; *__ptr = __replacement;
__res = true; __res = true;
} }
} }
return __res; return __res;
#endif #endif
...@@ -327,9 +333,11 @@ namespace __gnu_parallel ...@@ -327,9 +333,11 @@ namespace __gnu_parallel
__compare_and_swap(volatile _Tp* __ptr, _Tp __comparand, _Tp __replacement) __compare_and_swap(volatile _Tp* __ptr, _Tp __comparand, _Tp __replacement)
{ {
if (sizeof(_Tp) == sizeof(int32)) if (sizeof(_Tp) == sizeof(int32))
return __compare_and_swap_32((volatile int32*) __ptr, (int32)__comparand, (int32)__replacement); return __compare_and_swap_32((volatile int32*) __ptr, (int32)__comparand,
(int32)__replacement);
else if (sizeof(_Tp) == sizeof(int64)) else if (sizeof(_Tp) == sizeof(int64))
return __compare_and_swap_64((volatile int64*) __ptr, (int64)__comparand, (int64)__replacement); return __compare_and_swap_64((volatile int64*) __ptr, (int64)__comparand,
(int64)__replacement);
else else
_GLIBCXX_PARALLEL_ASSERT(false); _GLIBCXX_PARALLEL_ASSERT(false);
} }
......
...@@ -45,7 +45,8 @@ namespace __gnu_parallel ...@@ -45,7 +45,8 @@ namespace __gnu_parallel
* @returns End of splitter sequence, i.e. @__c __s+__num_threads+1 */ * @returns End of splitter sequence, i.e. @__c __s+__num_threads+1 */
template<typename _DifferenceType, typename _OutputIterator> template<typename _DifferenceType, typename _OutputIterator>
_OutputIterator _OutputIterator
equally_split(_DifferenceType __n, _ThreadIndex __num_threads, _OutputIterator __s) equally_split(_DifferenceType __n, _ThreadIndex __num_threads,
_OutputIterator __s)
{ {
_DifferenceType __chunk_length = __n / __num_threads; _DifferenceType __chunk_length = __n / __num_threads;
_DifferenceType __num_longer_chunks = __n % __num_threads; _DifferenceType __num_longer_chunks = __n % __num_threads;
...@@ -53,7 +54,8 @@ template<typename _DifferenceType, typename _OutputIterator> ...@@ -53,7 +54,8 @@ template<typename _DifferenceType, typename _OutputIterator>
for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
{ {
*__s++ = __pos; *__s++ = __pos;
__pos += (__i < __num_longer_chunks) ? (__chunk_length + 1) : __chunk_length; __pos += (__i < __num_longer_chunks) ?
(__chunk_length + 1) : __chunk_length;
} }
*__s++ = __n; *__s++ = __n;
return __s; return __s;
......
...@@ -53,9 +53,9 @@ namespace __gnu_parallel ...@@ -53,9 +53,9 @@ namespace __gnu_parallel
* @return Place of finding in both sequences. * @return Place of finding in both sequences.
*/ */
template<typename _RAIter1, template<typename _RAIter1,
typename _RAIter2, typename _RAIter2,
typename _Pred, typename _Pred,
typename _Selector> typename _Selector>
inline std::pair<_RAIter1, _RAIter2> inline std::pair<_RAIter1, _RAIter2>
__find_template(_RAIter1 __begin1, _RAIter1 __end1, __find_template(_RAIter1 __begin1, _RAIter1 __end1,
_RAIter2 __begin2, _Pred __pred, _Selector __selector) _RAIter2 __begin2, _Pred __pred, _Selector __selector)
...@@ -64,13 +64,13 @@ template<typename _RAIter1, ...@@ -64,13 +64,13 @@ template<typename _RAIter1,
{ {
case GROWING_BLOCKS: case GROWING_BLOCKS:
return __find_template(__begin1, __end1, __begin2, __pred, __selector, return __find_template(__begin1, __end1, __begin2, __pred, __selector,
growing_blocks_tag()); growing_blocks_tag());
case CONSTANT_SIZE_BLOCKS: case CONSTANT_SIZE_BLOCKS:
return __find_template(__begin1, __end1, __begin2, __pred, __selector, return __find_template(__begin1, __end1, __begin2, __pred, __selector,
constant_size_blocks_tag()); constant_size_blocks_tag());
case EQUAL_SPLIT: case EQUAL_SPLIT:
return __find_template(__begin1, __end1, __begin2, __pred, __selector, return __find_template(__begin1, __end1, __begin2, __pred, __selector,
equal_split_tag()); equal_split_tag());
default: default:
_GLIBCXX_PARALLEL_ASSERT(false); _GLIBCXX_PARALLEL_ASSERT(false);
return std::make_pair(__begin1, __begin2); return std::make_pair(__begin1, __begin2);
...@@ -90,9 +90,9 @@ template<typename _RAIter1, ...@@ -90,9 +90,9 @@ template<typename _RAIter1,
* @return Place of finding in both sequences. * @return Place of finding in both sequences.
*/ */
template<typename _RAIter1, template<typename _RAIter1,
typename _RAIter2, typename _RAIter2,
typename _Pred, typename _Pred,
typename _Selector> typename _Selector>
std::pair<_RAIter1, _RAIter2> std::pair<_RAIter1, _RAIter2>
__find_template(_RAIter1 __begin1, __find_template(_RAIter1 __begin1,
_RAIter1 __end1, _RAIter1 __end1,
...@@ -125,7 +125,8 @@ template<typename _RAIter1, ...@@ -125,7 +125,8 @@ template<typename _RAIter1,
} //single } //single
_ThreadIndex __iam = omp_get_thread_num(); _ThreadIndex __iam = omp_get_thread_num();
_DifferenceType __start = __borders[__iam], __stop = __borders[__iam + 1]; _DifferenceType __start = __borders[__iam],
__stop = __borders[__iam + 1];
_RAIter1 __i1 = __begin1 + __start; _RAIter1 __i1 = __begin1 + __start;
_RAIter2 __i2 = __begin2 + __start; _RAIter2 __i2 = __begin2 + __start;
...@@ -153,8 +154,7 @@ template<typename _RAIter1, ...@@ -153,8 +154,7 @@ template<typename _RAIter1,
delete[] __borders; delete[] __borders;
return return
std::pair<_RAIter1, _RAIter2>(__begin1 + __result, std::pair<_RAIter1, _RAIter2>(__begin1 + __result, __begin2 + __result);
__begin2 + __result);
} }
#endif #endif
...@@ -178,15 +178,14 @@ template<typename _RAIter1, ...@@ -178,15 +178,14 @@ template<typename _RAIter1,
* There are two main differences between the growing blocks and * There are two main differences between the growing blocks and
* the constant-size blocks variants. * the constant-size blocks variants.
* 1. For GB, the block size grows; for CSB, the block size is fixed. * 1. For GB, the block size grows; for CSB, the block size is fixed.
* 2. For GB, the blocks are allocated dynamically; * 2. For GB, the blocks are allocated dynamically;
* for CSB, the blocks are allocated in a predetermined manner, * for CSB, the blocks are allocated in a predetermined manner,
* namely spacial round-robin. * namely spacial round-robin.
*/ */
template<typename _RAIter1, template<typename _RAIter1,
typename _RAIter2, typename _RAIter2,
typename _Pred, typename _Pred,
typename _Selector> typename _Selector>
std::pair<_RAIter1, _RAIter2> std::pair<_RAIter1, _RAIter2>
__find_template(_RAIter1 __begin1, _RAIter1 __end1, __find_template(_RAIter1 __begin1, _RAIter1 __end1,
_RAIter2 __begin2, _Pred __pred, _Selector __selector, _RAIter2 __begin2, _Pred __pred, _Selector __selector,
...@@ -231,7 +230,7 @@ template<typename _RAIter1, ...@@ -231,7 +230,7 @@ template<typename _RAIter1,
_DifferenceType __block_size = __s.find_initial_block_size; _DifferenceType __block_size = __s.find_initial_block_size;
_DifferenceType __start = _DifferenceType __start =
__fetch_and_add<_DifferenceType>(&__next_block_start, __block_size); __fetch_and_add<_DifferenceType>(&__next_block_start, __block_size);
// Get new block, update pointer to next block. // Get new block, update pointer to next block.
_DifferenceType __stop = _DifferenceType __stop =
...@@ -250,7 +249,8 @@ template<typename _RAIter1, ...@@ -250,7 +249,8 @@ template<typename _RAIter1,
} }
__local_result = __selector._M_sequential_algorithm( __local_result = __selector._M_sequential_algorithm(
__begin1 + __start, __begin1 + __stop, __begin2 + __start, __pred); __begin1 + __start, __begin1 + __stop,
__begin2 + __start, __pred);
if (__local_result.first != (__begin1 + __stop)) if (__local_result.first != (__begin1 + __stop))
{ {
omp_set_lock(&__result_lock); omp_set_lock(&__result_lock);
...@@ -259,20 +259,22 @@ template<typename _RAIter1, ...@@ -259,20 +259,22 @@ template<typename _RAIter1,
__result = __local_result.first - __begin1; __result = __local_result.first - __begin1;
// Result cannot be in future blocks, stop algorithm. // Result cannot be in future blocks, stop algorithm.
__fetch_and_add<_DifferenceType>(&__next_block_start, __length); __fetch_and_add<_DifferenceType>(
&__next_block_start, __length);
} }
omp_unset_lock(&__result_lock); omp_unset_lock(&__result_lock);
} }
__block_size = __block_size = std::min<_DifferenceType>(
std::min<_DifferenceType>(__block_size * __s.find_increasing_factor, __block_size * __s.find_increasing_factor,
__s.find_maximum_block_size); __s.find_maximum_block_size);
// Get new block, update pointer to next block. // Get new block, update pointer to next block.
__start = __start =
__fetch_and_add<_DifferenceType>(&__next_block_start, __block_size); __fetch_and_add<_DifferenceType>(
&__next_block_start, __block_size);
__stop = ((__length < (__start + __block_size)) __stop = ((__length < (__start + __block_size))
? __length : (__start + __block_size)); ? __length : (__start + __block_size));
} }
} //parallel } //parallel
...@@ -280,8 +282,7 @@ template<typename _RAIter1, ...@@ -280,8 +282,7 @@ template<typename _RAIter1,
// Return iterator on found element. // Return iterator on found element.
return return
std::pair<_RAIter1, _RAIter2>(__begin1 + __result, std::pair<_RAIter1, _RAIter2>(__begin1 + __result, __begin2 + __result);
__begin2 + __result);
} }
#endif #endif
...@@ -307,9 +308,9 @@ template<typename _RAIter1, ...@@ -307,9 +308,9 @@ template<typename _RAIter1,
* round-robin. * round-robin.
*/ */
template<typename _RAIter1, template<typename _RAIter1,
typename _RAIter2, typename _RAIter2,
typename _Pred, typename _Pred,
typename _Selector> typename _Selector>
std::pair<_RAIter1, _RAIter2> std::pair<_RAIter1, _RAIter2>
__find_template(_RAIter1 __begin1, _RAIter1 __end1, __find_template(_RAIter1 __begin1, _RAIter1 __end1,
_RAIter2 __begin2, _Pred __pred, _Selector __selector, _RAIter2 __begin2, _Pred __pred, _Selector __selector,
...@@ -329,8 +330,8 @@ template<typename _RAIter1, ...@@ -329,8 +330,8 @@ template<typename _RAIter1,
// Try it sequentially first. // Try it sequentially first.
std::pair<_RAIter1, _RAIter2> __find_seq_result = std::pair<_RAIter1, _RAIter2> __find_seq_result =
__selector._M_sequential_algorithm(__begin1, __begin1 + __sequential_search_size, __selector._M_sequential_algorithm(
__begin2, __pred); __begin1, __begin1 + __sequential_search_size, __begin2, __pred);
if (__find_seq_result.first != (__begin1 + __sequential_search_size)) if (__find_seq_result.first != (__begin1 + __sequential_search_size))
return __find_seq_result; return __find_seq_result;
...@@ -384,7 +385,8 @@ template<typename _RAIter1, ...@@ -384,7 +385,8 @@ template<typename _RAIter1,
// Where to work. // Where to work.
__start = __iteration_start + __iam * __block_size; __start = __iteration_start + __iam * __block_size;
__stop = std::min<_DifferenceType>(__length, __start + __block_size); __stop = std::min<_DifferenceType>(
__length, __start + __block_size);
} }
} //parallel } //parallel
...@@ -392,8 +394,7 @@ template<typename _RAIter1, ...@@ -392,8 +394,7 @@ template<typename _RAIter1,
// Return iterator on found element. // Return iterator on found element.
return return
std::pair<_RAIter1, _RAIter2>(__begin1 + __result, std::pair<_RAIter1, _RAIter2>(__begin1 + __result, __begin2 + __result);
__begin2 + __result);
} }
#endif #endif
} // end namespace } // end namespace
......
...@@ -55,7 +55,7 @@ namespace __gnu_parallel ...@@ -55,7 +55,7 @@ namespace __gnu_parallel
* @param __pred Find predicate. * @param __pred Find predicate.
*/ */
template<typename _RAIter1, typename _RAIter2, template<typename _RAIter1, typename _RAIter2,
typename _Pred> typename _Pred>
bool bool
operator()(_RAIter1 __i1, _RAIter2 __i2, _Pred __pred) operator()(_RAIter1 __i1, _RAIter2 __i2, _Pred __pred)
{ return __pred(*__i1); } { return __pred(*__i1); }
...@@ -67,13 +67,13 @@ namespace __gnu_parallel ...@@ -67,13 +67,13 @@ namespace __gnu_parallel
* @param __pred Find predicate. * @param __pred Find predicate.
*/ */
template<typename _RAIter1, typename _RAIter2, template<typename _RAIter1, typename _RAIter2,
typename _Pred> typename _Pred>
std::pair<_RAIter1, _RAIter2> std::pair<_RAIter1, _RAIter2>
_M_sequential_algorithm(_RAIter1 __begin1, _M_sequential_algorithm(_RAIter1 __begin1,
_RAIter1 __end1, _RAIter1 __end1,
_RAIter2 __begin2, _Pred __pred) _RAIter2 __begin2, _Pred __pred)
{ return std::make_pair(find_if(__begin1, __end1, __pred, { return std::make_pair(find_if(__begin1, __end1, __pred,
sequential_tag()), __begin2); } sequential_tag()), __begin2); }
}; };
/** @brief Test predicate on two adjacent elements. */ /** @brief Test predicate on two adjacent elements. */
...@@ -85,12 +85,12 @@ namespace __gnu_parallel ...@@ -85,12 +85,12 @@ namespace __gnu_parallel
* @param __pred Find predicate. * @param __pred Find predicate.
*/ */
template<typename _RAIter1, typename _RAIter2, template<typename _RAIter1, typename _RAIter2,
typename _Pred> typename _Pred>
bool bool
operator()(_RAIter1 __i1, _RAIter2 __i2, _Pred __pred) operator()(_RAIter1 __i1, _RAIter2 __i2, _Pred __pred)
{ {
// Passed end iterator is one short. // Passed end iterator is one short.
return __pred(*__i1, *(__i1 + 1)); return __pred(*__i1, *(__i1 + 1));
} }
/** @brief Corresponding sequential algorithm on a sequence. /** @brief Corresponding sequential algorithm on a sequence.
...@@ -100,18 +100,18 @@ namespace __gnu_parallel ...@@ -100,18 +100,18 @@ namespace __gnu_parallel
* @param __pred Find predicate. * @param __pred Find predicate.
*/ */
template<typename _RAIter1, typename _RAIter2, template<typename _RAIter1, typename _RAIter2,
typename _Pred> typename _Pred>
std::pair<_RAIter1, _RAIter2> std::pair<_RAIter1, _RAIter2>
_M_sequential_algorithm(_RAIter1 __begin1, _M_sequential_algorithm(_RAIter1 __begin1,
_RAIter1 __end1, _RAIter1 __end1,
_RAIter2 __begin2, _Pred __pred) _RAIter2 __begin2, _Pred __pred)
{ {
// Passed end iterator is one short. // Passed end iterator is one short.
_RAIter1 spot = adjacent_find(__begin1, __end1 + 1, _RAIter1 spot = adjacent_find(__begin1, __end1 + 1,
__pred, sequential_tag()); __pred, sequential_tag());
if (spot == (__end1 + 1)) if (spot == (__end1 + 1))
spot = __end1; spot = __end1;
return std::make_pair(spot, __begin2); return std::make_pair(spot, __begin2);
} }
}; };
...@@ -125,7 +125,7 @@ namespace __gnu_parallel ...@@ -125,7 +125,7 @@ namespace __gnu_parallel
* @param __pred Find predicate. * @param __pred Find predicate.
*/ */
template<typename _RAIter1, typename _RAIter2, template<typename _RAIter1, typename _RAIter2,
typename _Pred> typename _Pred>
bool bool
operator()(_RAIter1 __i1, _RAIter2 __i2, _Pred __pred) operator()(_RAIter1 __i1, _RAIter2 __i2, _Pred __pred)
{ return !__pred(*__i1, *__i2); } { return !__pred(*__i1, *__i2); }
...@@ -138,23 +138,24 @@ namespace __gnu_parallel ...@@ -138,23 +138,24 @@ namespace __gnu_parallel
* @param __pred Find predicate. * @param __pred Find predicate.
*/ */
template<typename _RAIter1, typename _RAIter2, template<typename _RAIter1, typename _RAIter2,
typename _Pred> typename _Pred>
std::pair<_RAIter1, _RAIter2> std::pair<_RAIter1, _RAIter2>
_M_sequential_algorithm(_RAIter1 __begin1, _M_sequential_algorithm(_RAIter1 __begin1,
_RAIter1 __end1, _RAIter1 __end1,
_RAIter2 __begin2, _Pred __pred) _RAIter2 __begin2, _Pred __pred)
{ return mismatch(__begin1, __end1, __begin2, __pred, sequential_tag()); } { return mismatch(__begin1, __end1, __begin2, __pred, sequential_tag());
}
}; };
/** @brief Test predicate on several elements. */ /** @brief Test predicate on several elements. */
template<typename _ForwardIterator> template<typename _FIterator>
struct __find_first_of_selector : public __generic_find_selector struct __find_first_of_selector : public __generic_find_selector
{ {
_ForwardIterator _M_begin; _FIterator _M_begin;
_ForwardIterator _M_end; _FIterator _M_end;
explicit __find_first_of_selector(_ForwardIterator __begin, _ForwardIterator __end) explicit __find_first_of_selector(_FIterator __begin, _FIterator __end)
: _M_begin(__begin), _M_end(__end) { } : _M_begin(__begin), _M_end(__end) { }
/** @brief Test on one position. /** @brief Test on one position.
...@@ -162,15 +163,15 @@ namespace __gnu_parallel ...@@ -162,15 +163,15 @@ namespace __gnu_parallel
* @param __i2 _Iterator on second sequence (unused). * @param __i2 _Iterator on second sequence (unused).
* @param __pred Find predicate. */ * @param __pred Find predicate. */
template<typename _RAIter1, typename _RAIter2, template<typename _RAIter1, typename _RAIter2,
typename _Pred> typename _Pred>
bool bool
operator()(_RAIter1 __i1, _RAIter2 __i2, _Pred __pred) operator()(_RAIter1 __i1, _RAIter2 __i2, _Pred __pred)
{ {
for (_ForwardIterator __pos_in_candidates = _M_begin; for (_FIterator __pos_in_candidates = _M_begin;
__pos_in_candidates != _M_end; ++__pos_in_candidates) __pos_in_candidates != _M_end; ++__pos_in_candidates)
if (__pred(*__i1, *__pos_in_candidates)) if (__pred(*__i1, *__pos_in_candidates))
return true; return true;
return false; return false;
} }
/** @brief Corresponding sequential algorithm on a sequence. /** @brief Corresponding sequential algorithm on a sequence.
...@@ -179,13 +180,16 @@ namespace __gnu_parallel ...@@ -179,13 +180,16 @@ namespace __gnu_parallel
* @param __begin2 Begin iterator of second sequence. * @param __begin2 Begin iterator of second sequence.
* @param __pred Find predicate. */ * @param __pred Find predicate. */
template<typename _RAIter1, typename _RAIter2, template<typename _RAIter1, typename _RAIter2,
typename _Pred> typename _Pred>
std::pair<_RAIter1, _RAIter2> std::pair<_RAIter1, _RAIter2>
_M_sequential_algorithm(_RAIter1 __begin1, _M_sequential_algorithm(_RAIter1 __begin1,
_RAIter1 __end1, _RAIter1 __end1,
_RAIter2 __begin2, _Pred __pred) _RAIter2 __begin2, _Pred __pred)
{ return std::make_pair(find_first_of(__begin1, __end1, _M_begin, _M_end, __pred, {
sequential_tag()), __begin2); } return std::make_pair(
find_first_of(__begin1, __end1, _M_begin, _M_end, __pred,
sequential_tag()), __begin2);
}
}; };
} }
......
...@@ -56,41 +56,44 @@ namespace __gnu_parallel ...@@ -56,41 +56,44 @@ namespace __gnu_parallel
* @param __bound Maximum number of elements processed. * @param __bound Maximum number of elements processed.
* @param __parallelism_tag Parallelization method */ * @param __parallelism_tag Parallelization method */
template<typename _IIter, typename _UserOp, template<typename _IIter, typename _UserOp,
typename _Functionality, typename _Red, typename _Result> typename _Functionality, typename _Red, typename _Result>
_UserOp _UserOp
__for_each_template_random_access(_IIter __begin, _IIter __end, __for_each_template_random_access(_IIter __begin, _IIter __end,
_UserOp __user_op, _UserOp __user_op,
_Functionality& __functionality, _Functionality& __functionality,
_Red __reduction, _Result __reduction_start, _Red __reduction,
_Result& __output, typename _Result __reduction_start,
std::iterator_traits<_IIter>:: _Result& __output, typename
difference_type __bound, std::iterator_traits<_IIter>::
_Parallelism __parallelism_tag) difference_type __bound,
_Parallelism __parallelism_tag)
{ {
if (__parallelism_tag == parallel_unbalanced) if (__parallelism_tag == parallel_unbalanced)
return for_each_template_random_access_ed(__begin, __end, __user_op, return for_each_template_random_access_ed(__begin, __end, __user_op,
__functionality, __reduction, __functionality, __reduction,
__reduction_start, __reduction_start,
__output, __bound); __output, __bound);
else if (__parallelism_tag == parallel_omp_loop) else if (__parallelism_tag == parallel_omp_loop)
return for_each_template_random_access_omp_loop(__begin, __end, __user_op, return for_each_template_random_access_omp_loop(
__functionality, __begin, __end, __user_op,
__reduction, __functionality,
__reduction_start, __reduction,
__output, __bound); __reduction_start,
__output, __bound);
else if (__parallelism_tag == parallel_omp_loop_static) else if (__parallelism_tag == parallel_omp_loop_static)
return for_each_template_random_access_omp_loop(__begin, __end, __user_op, return for_each_template_random_access_omp_loop(
__functionality, __begin, __end, __user_op,
__reduction, __functionality,
__reduction_start, __reduction,
__output, __bound); __reduction_start,
else //e. g. parallel_balanced __output, __bound);
return for_each_template_random_access_workstealing(__begin, __end, else //e. g. parallel_balanced
__user_op, return for_each_template_random_access_workstealing(__begin, __end,
__functionality, __user_op,
__reduction, __functionality,
__reduction_start, __reduction,
__output, __bound); __reduction_start,
__output, __bound);
} }
} }
......
...@@ -59,10 +59,10 @@ namespace __gnu_parallel ...@@ -59,10 +59,10 @@ namespace __gnu_parallel
template<typename _Op> template<typename _Op>
bool bool
operator()(_Op& __o, _It __i) operator()(_Op& __o, _It __i)
{ {
__o(*__i); __o(*__i);
return true; return true;
} }
}; };
/** @brief std::generate() selector. */ /** @brief std::generate() selector. */
...@@ -76,9 +76,9 @@ namespace __gnu_parallel ...@@ -76,9 +76,9 @@ namespace __gnu_parallel
bool bool
operator()(_Op& __o, _It __i) operator()(_Op& __o, _It __i)
{ {
*__i = __o(); *__i = __o();
return true; return true;
} }
}; };
/** @brief std::fill() selector. */ /** @brief std::fill() selector. */
...@@ -91,10 +91,10 @@ namespace __gnu_parallel ...@@ -91,10 +91,10 @@ namespace __gnu_parallel
template<typename Val> template<typename Val>
bool bool
operator()(Val& __v, _It __i) operator()(Val& __v, _It __i)
{ {
*__i = __v; *__i = __v;
return true; return true;
} }
}; };
/** @brief std::transform() __selector, one input sequence variant. */ /** @brief std::transform() __selector, one input sequence variant. */
...@@ -107,10 +107,10 @@ namespace __gnu_parallel ...@@ -107,10 +107,10 @@ namespace __gnu_parallel
template<typename _Op> template<typename _Op>
bool bool
operator()(_Op& __o, _It __i) operator()(_Op& __o, _It __i)
{ {
*__i.second = __o(*__i.first); *__i.second = __o(*__i.first);
return true; return true;
} }
}; };
/** @brief std::transform() __selector, two input sequences variant. */ /** @brief std::transform() __selector, two input sequences variant. */
...@@ -123,10 +123,10 @@ namespace __gnu_parallel ...@@ -123,10 +123,10 @@ namespace __gnu_parallel
template<typename _Op> template<typename _Op>
bool bool
operator()(_Op& __o, _It __i) operator()(_Op& __o, _It __i)
{ {
*__i._M_third = __o(*__i._M_first, *__i._M_second); *__i._M_third = __o(*__i._M_first, *__i._M_second);
return true; return true;
} }
}; };
/** @brief std::replace() selector. */ /** @brief std::replace() selector. */
...@@ -147,9 +147,9 @@ namespace __gnu_parallel ...@@ -147,9 +147,9 @@ namespace __gnu_parallel
bool bool
operator()(_Tp& __v, _It __i) operator()(_Tp& __v, _It __i)
{ {
if (*__i == __v) if (*__i == __v)
*__i = __new_val; *__i = __new_val;
return true; return true;
} }
}; };
...@@ -171,9 +171,9 @@ namespace __gnu_parallel ...@@ -171,9 +171,9 @@ namespace __gnu_parallel
bool bool
operator()(_Op& __o, _It __i) operator()(_Op& __o, _It __i)
{ {
if (__o(*__i)) if (__o(*__i))
*__i = __new_val; *__i = __new_val;
return true; return true;
} }
}; };
...@@ -188,7 +188,7 @@ namespace __gnu_parallel ...@@ -188,7 +188,7 @@ namespace __gnu_parallel
template<typename Val> template<typename Val>
_Diff _Diff
operator()(Val& __v, _It __i) operator()(Val& __v, _It __i)
{ return (__v == *__i) ? 1 : 0; } { return (__v == *__i) ? 1 : 0; }
}; };
/** @brief std::count_if () selector. */ /** @brief std::count_if () selector. */
...@@ -202,7 +202,7 @@ namespace __gnu_parallel ...@@ -202,7 +202,7 @@ namespace __gnu_parallel
template<typename _Op> template<typename _Op>
_Diff _Diff
operator()(_Op& __o, _It __i) operator()(_Op& __o, _It __i)
{ return (__o(*__i)) ? 1 : 0; } { return (__o(*__i)) ? 1 : 0; }
}; };
/** @brief std::accumulate() selector. */ /** @brief std::accumulate() selector. */
...@@ -214,8 +214,9 @@ namespace __gnu_parallel ...@@ -214,8 +214,9 @@ namespace __gnu_parallel
* @param __i iterator referencing object. * @param __i iterator referencing object.
* @return The current value. */ * @return The current value. */
template<typename _Op> template<typename _Op>
typename std::iterator_traits<_It>::value_type operator()(_Op __o, _It __i) typename std::iterator_traits<_It>::value_type
{ return *__i; } operator()(_Op __o, _It __i)
{ return *__i; }
}; };
/** @brief std::inner_product() selector. */ /** @brief std::inner_product() selector. */
...@@ -242,11 +243,11 @@ namespace __gnu_parallel ...@@ -242,11 +243,11 @@ namespace __gnu_parallel
template<typename _Op> template<typename _Op>
_Tp _Tp
operator()(_Op __mult, _It __current) operator()(_Op __mult, _It __current)
{ {
typename std::iterator_traits<_It>::difference_type __position typename std::iterator_traits<_It>::difference_type __position
= __current - __begin1_iterator; = __current - __begin1_iterator;
return __mult(*__current, *(begin2_iterator + __position)); return __mult(*__current, *(begin2_iterator + __position));
} }
}; };
/** @brief Selector that just returns the passed iterator. */ /** @brief Selector that just returns the passed iterator. */
...@@ -260,24 +261,25 @@ namespace __gnu_parallel ...@@ -260,24 +261,25 @@ namespace __gnu_parallel
template<typename _Op> template<typename _Op>
_It _It
operator()(_Op __o, _It __i) operator()(_Op __o, _It __i)
{ return __i; } { return __i; }
}; };
/** @brief Selector that returns the difference between two adjacent /** @brief Selector that returns the difference between two adjacent
* __elements. * __elements.
*/ */
template<typename _It> template<typename _It>
struct __adjacent_difference_selector : public __generic_for_each_selector<_It> struct __adjacent_difference_selector :
public __generic_for_each_selector<_It>
{ {
template<typename _Op> template<typename _Op>
bool bool
operator()(_Op& __o, _It __i) operator()(_Op& __o, _It __i)
{ {
typename _It::first_type __go_back_one = __i.first; typename _It::first_type __go_back_one = __i.first;
--__go_back_one; --__go_back_one;
*__i.__second = __o(*__i.__first, *__go_back_one); *__i.__second = __o(*__i.__first, *__go_back_one);
return true; return true;
} }
}; };
// XXX move into type_traits? // XXX move into type_traits?
...@@ -315,10 +317,10 @@ namespace __gnu_parallel ...@@ -315,10 +317,10 @@ namespace __gnu_parallel
_It _It
operator()(_It __x, _It __y) operator()(_It __x, _It __y)
{ {
if (__comp(*__x, *__y)) if (__comp(*__x, *__y))
return __x; return __x;
else else
return __y; return __y;
} }
}; };
...@@ -334,10 +336,10 @@ namespace __gnu_parallel ...@@ -334,10 +336,10 @@ namespace __gnu_parallel
_It _It
operator()(_It __x, _It __y) operator()(_It __x, _It __y)
{ {
if (__comp(*__x, *__y)) if (__comp(*__x, *__y))
return __y; return __y;
else else
return __x; return __x;
} }
}; };
...@@ -353,7 +355,7 @@ namespace __gnu_parallel ...@@ -353,7 +355,7 @@ namespace __gnu_parallel
template<typename _Result, typename _Addend> template<typename _Result, typename _Addend>
_Result _Result
operator()(const _Result& __x, const _Addend& __y) operator()(const _Result& __x, const _Addend& __y)
{ return __binop(__x, __y); } { return __binop(__x, __y); }
}; };
} }
......
...@@ -40,7 +40,8 @@ namespace __gnu_parallel ...@@ -40,7 +40,8 @@ namespace __gnu_parallel
/** @brief A pair of iterators. The usual iterator operations are /** @brief A pair of iterators. The usual iterator operations are
* applied to both child iterators. * applied to both child iterators.
*/ */
template<typename _Iterator1, typename _Iterator2, typename _IteratorCategory> template<typename _Iterator1, typename _Iterator2,
typename _IteratorCategory>
class _IteratorPair : public std::pair<_Iterator1, _Iterator2> class _IteratorPair : public std::pair<_Iterator1, _Iterator2>
{ {
private: private:
...@@ -57,16 +58,16 @@ namespace __gnu_parallel ...@@ -57,16 +58,16 @@ namespace __gnu_parallel
_IteratorPair() { } _IteratorPair() { }
_IteratorPair(const _Iterator1& __first, const _Iterator2& __second) _IteratorPair(const _Iterator1& __first, const _Iterator2& __second)
: _Base(__first, __second) { } : _Base(__first, __second) { }
// Pre-increment operator. // Pre-increment operator.
_IteratorPair& _IteratorPair&
operator++() operator++()
{ {
++_Base::first; ++_Base::first;
++_Base::second; ++_Base::second;
return *this; return *this;
} }
// Post-increment operator. // Post-increment operator.
...@@ -78,9 +79,9 @@ namespace __gnu_parallel ...@@ -78,9 +79,9 @@ namespace __gnu_parallel
_IteratorPair& _IteratorPair&
operator--() operator--()
{ {
--_Base::first; --_Base::first;
--_Base::second; --_Base::second;
return *this; return *this;
} }
// Post-decrement operator. // Post-decrement operator.
...@@ -95,14 +96,15 @@ namespace __gnu_parallel ...@@ -95,14 +96,15 @@ namespace __gnu_parallel
_IteratorPair& _IteratorPair&
operator=(const _IteratorPair& __other) operator=(const _IteratorPair& __other)
{ {
_Base::first = __other.first; _Base::first = __other.first;
_Base::second = __other.second; _Base::second = __other.second;
return *this; return *this;
} }
_IteratorPair _IteratorPair
operator+(difference_type __delta) const operator+(difference_type __delta) const
{ return _IteratorPair(_Base::first + __delta, _Base::second + __delta); } { return _IteratorPair(_Base::first + __delta, _Base::second + __delta);
}
difference_type difference_type
operator-(const _IteratorPair& __other) const operator-(const _IteratorPair& __other) const
...@@ -114,7 +116,7 @@ namespace __gnu_parallel ...@@ -114,7 +116,7 @@ namespace __gnu_parallel
applied to all three child iterators. applied to all three child iterators.
*/ */
template<typename _Iterator1, typename _Iterator2, typename _Iterator3, template<typename _Iterator1, typename _Iterator2, typename _Iterator3,
typename _IteratorCategory> typename _IteratorCategory>
class _IteratorTriple class _IteratorTriple
{ {
public: public:
...@@ -132,21 +134,21 @@ namespace __gnu_parallel ...@@ -132,21 +134,21 @@ namespace __gnu_parallel
_IteratorTriple() { } _IteratorTriple() { }
_IteratorTriple(const _Iterator1& __first, const _Iterator2& __second, _IteratorTriple(const _Iterator1& __first, const _Iterator2& __second,
const _Iterator3& __third) const _Iterator3& __third)
{ {
_M_first = __first; _M_first = __first;
_M_second = __second; _M_second = __second;
_M_third = __third; _M_third = __third;
} }
// Pre-increment operator. // Pre-increment operator.
_IteratorTriple& _IteratorTriple&
operator++() operator++()
{ {
++_M_first; ++_M_first;
++_M_second; ++_M_second;
++_M_third; ++_M_third;
return *this; return *this;
} }
// Post-increment operator. // Post-increment operator.
...@@ -158,10 +160,10 @@ namespace __gnu_parallel ...@@ -158,10 +160,10 @@ namespace __gnu_parallel
_IteratorTriple& _IteratorTriple&
operator--() operator--()
{ {
--_M_first; --_M_first;
--_M_second; --_M_second;
--_M_third; --_M_third;
return *this; return *this;
} }
// Post-decrement operator. // Post-decrement operator.
...@@ -176,15 +178,16 @@ namespace __gnu_parallel ...@@ -176,15 +178,16 @@ namespace __gnu_parallel
_IteratorTriple& _IteratorTriple&
operator=(const _IteratorTriple& __other) operator=(const _IteratorTriple& __other)
{ {
_M_first = __other._M_first; _M_first = __other._M_first;
_M_second = __other._M_second; _M_second = __other._M_second;
_M_third = __other._M_third; _M_third = __other._M_third;
return *this; return *this;
} }
_IteratorTriple _IteratorTriple
operator+(difference_type __delta) const operator+(difference_type __delta) const
{ return _IteratorTriple(_M_first + __delta, _M_second + __delta, _M_third + __delta); } { return _IteratorTriple(_M_first + __delta, _M_second + __delta,
_M_third + __delta); }
difference_type difference_type
operator-(const _IteratorTriple& __other) const operator-(const _IteratorTriple& __other) const
......
...@@ -48,17 +48,17 @@ namespace __gnu_parallel ...@@ -48,17 +48,17 @@ namespace __gnu_parallel
template<typename _IIter> template<typename _IIter>
void void
__shrink_and_double(std::vector<_IIter>& __os_starts, __shrink_and_double(std::vector<_IIter>& __os_starts,
size_t& __count_to_two, size_t& __range_length, size_t& __count_to_two, size_t& __range_length,
const bool __make_twice) const bool __make_twice)
{ {
++__count_to_two; ++__count_to_two;
if (not __make_twice or __count_to_two < 2) if (not __make_twice or __count_to_two < 2)
__shrink(__os_starts, __count_to_two, __range_length); __shrink(__os_starts, __count_to_two, __range_length);
else else
{ {
__os_starts.resize((__os_starts.size() - 1) * 2 + 1); __os_starts.resize((__os_starts.size() - 1) * 2 + 1);
__count_to_two = 0; __count_to_two = 0;
} }
} }
/** @brief Combines two ranges into one and thus halves the number of ranges. /** @brief Combines two ranges into one and thus halves the number of ranges.
...@@ -68,11 +68,11 @@ namespace __gnu_parallel ...@@ -68,11 +68,11 @@ namespace __gnu_parallel
template<typename _IIter> template<typename _IIter>
void void
__shrink(std::vector<_IIter>& __os_starts, size_t& __count_to_two, __shrink(std::vector<_IIter>& __os_starts, size_t& __count_to_two,
size_t& __range_length) size_t& __range_length)
{ {
for (typename std::vector<_IIter>::size_type __i = 0; for (typename std::vector<_IIter>::size_type __i = 0;
__i <= (__os_starts.size() / 2); ++__i) __i <= (__os_starts.size() / 2); ++__i)
__os_starts[__i] = __os_starts[__i * 2]; __os_starts[__i] = __os_starts[__i * 2];
__range_length *= 2; __range_length *= 2;
} }
...@@ -98,17 +98,17 @@ namespace __gnu_parallel ...@@ -98,17 +98,17 @@ namespace __gnu_parallel
template<typename _IIter, typename _FunctorType> template<typename _IIter, typename _FunctorType>
size_t size_t
list_partition(const _IIter __begin, const _IIter __end, list_partition(const _IIter __begin, const _IIter __end,
_IIter* __starts, size_t* __lengths, const int __num_parts, _IIter* __starts, size_t* __lengths, const int __num_parts,
_FunctorType& __f, int __oversampling = 0) _FunctorType& __f, int __oversampling = 0)
{ {
bool __make_twice = false; bool __make_twice = false;
// The resizing algorithm is chosen according to the oversampling factor. // The resizing algorithm is chosen according to the oversampling factor.
if (__oversampling == 0) if (__oversampling == 0)
{ {
__make_twice = true; __make_twice = true;
__oversampling = 1; __oversampling = 1;
} }
std::vector<_IIter> __os_starts(2 * __oversampling * __num_parts + 1); std::vector<_IIter> __os_starts(2 * __oversampling * __num_parts + 1);
...@@ -119,27 +119,28 @@ namespace __gnu_parallel ...@@ -119,27 +119,28 @@ namespace __gnu_parallel
size_t __range_length = 1; size_t __range_length = 1;
size_t __count_to_two = 0; size_t __count_to_two = 0;
while (__it != __end) while (__it != __end)
{ {
__cur = __next; __cur = __next;
for (; __cur < __os_starts.size() and __it != __end; ++__cur) for (; __cur < __os_starts.size() and __it != __end; ++__cur)
{ {
for (__dist_limit += __range_length; for (__dist_limit += __range_length;
__dist < __dist_limit and __it != __end; ++__dist) __dist < __dist_limit and __it != __end; ++__dist)
{ {
__f(__it); __f(__it);
++__it; ++__it;
} }
__os_starts[__cur] = __it; __os_starts[__cur] = __it;
} }
// Must compare for end and not __cur < __os_starts.size() , because // Must compare for end and not __cur < __os_starts.size() , because
// __cur could be == __os_starts.size() as well // __cur could be == __os_starts.size() as well
if (__it == __end) if (__it == __end)
break; break;
__shrink_and_double(__os_starts, __count_to_two, __range_length, __make_twice); __shrink_and_double(__os_starts, __count_to_two, __range_length,
__next = __os_starts.size() / 2 + 1; __make_twice);
} __next = __os_starts.size() / 2 + 1;
}
// Calculation of the parts (one must be extracted from __current // Calculation of the parts (one must be extracted from __current
// because the partition beginning at __end, consists only of // because the partition beginning at __end, consists only of
...@@ -152,19 +153,20 @@ namespace __gnu_parallel ...@@ -152,19 +153,20 @@ namespace __gnu_parallel
// Smallest partitions. // Smallest partitions.
for (int __i = 1; __i < (__num_parts + 1 - __size_greater); ++__i) for (int __i = 1; __i < (__num_parts + 1 - __size_greater); ++__i)
{ {
__lengths[__i - 1] = __size_part * __range_length; __lengths[__i - 1] = __size_part * __range_length;
__index += __size_part; __index += __size_part;
__starts[__i] = __os_starts[__index]; __starts[__i] = __os_starts[__index];
} }
// Biggest partitions. // Biggest partitions.
for (int __i = __num_parts + 1 - __size_greater; __i <= __num_parts; ++__i) for (int __i = __num_parts + 1 - __size_greater; __i <= __num_parts;
{ ++__i)
__lengths[__i - 1] = (__size_part+1) * __range_length; {
__index += (__size_part+1); __lengths[__i - 1] = (__size_part+1) * __range_length;
__starts[__i] = __os_starts[__index]; __index += (__size_part+1);
} __starts[__i] = __os_starts[__index];
}
// Correction of the end size (the end iteration has not finished). // Correction of the end size (the end iteration has not finished).
__lengths[__num_parts - 1] -= (__dist_limit - __dist); __lengths[__num_parts - 1] -= (__dist_limit - __dist);
......
...@@ -106,7 +106,8 @@ public: ...@@ -106,7 +106,8 @@ public:
_M_offset = _M_k; _M_offset = _M_k;
// Avoid default-constructing _M_losers[]._M_key // Avoid default-constructing _M_losers[]._M_key
_M_losers = static_cast<_Loser*>(::operator new(2 * _M_k * sizeof(_Loser))); _M_losers
= static_cast<_Loser*>(::operator new(2 * _M_k * sizeof(_Loser)));
for (unsigned int __i = _M_ik - 1; __i < _M_k; ++__i) for (unsigned int __i = _M_ik - 1; __i < _M_k; ++__i)
_M_losers[__i + _M_k]._M_sup = true; _M_losers[__i + _M_k]._M_sup = true;
...@@ -187,7 +188,7 @@ public: ...@@ -187,7 +188,7 @@ public:
unsigned int __right = __init_winner (2 * __root + 1); unsigned int __right = __init_winner (2 * __root + 1);
if (_M_losers[__right]._M_sup if (_M_losers[__right]._M_sup
|| (!_M_losers[__left]._M_sup || (!_M_losers[__left]._M_sup
&& !_M_comp(_M_losers[__right]._M_key, _M_losers[__left]._M_key))) && !_M_comp(_M_losers[__right]._M_key, _M_losers[__left]._M_key)))
{ {
// Left one is less or equal. // Left one is less or equal.
_M_losers[__root] = _M_losers[__right]; _M_losers[__root] = _M_losers[__right];
...@@ -223,7 +224,8 @@ public: ...@@ -223,7 +224,8 @@ public:
for (unsigned int __pos = (_M_k + _M_source) / 2; __pos > 0; __pos /= 2) for (unsigned int __pos = (_M_k + _M_source) / 2; __pos > 0; __pos /= 2)
{ {
// The smaller one gets promoted, ties are broken by _M_source. // The smaller one gets promoted, ties are broken by _M_source.
if ((_M_sup && (!_M_losers[__pos]._M_sup || _M_losers[__pos]._M_source < _M_source)) if ((_M_sup && (!_M_losers[__pos]._M_sup
|| _M_losers[__pos]._M_source < _M_source))
|| (!_M_sup && !_M_losers[__pos]._M_sup || (!_M_sup && !_M_losers[__pos]._M_sup
&& ((_M_comp(_M_losers[__pos]._M_key, _M_key)) && ((_M_comp(_M_losers[__pos]._M_key, _M_key))
|| (!_M_comp(_M_key, _M_losers[__pos]._M_key) || (!_M_comp(_M_key, _M_losers[__pos]._M_key)
...@@ -280,9 +282,9 @@ public: ...@@ -280,9 +282,9 @@ public:
{ {
unsigned int __left = __init_winner (2 * __root); unsigned int __left = __init_winner (2 * __root);
unsigned int __right = __init_winner (2 * __root + 1); unsigned int __right = __init_winner (2 * __root + 1);
if (_M_losers[__right]._M_sup || if (_M_losers[__right]._M_sup
(!_M_losers[__left]._M_sup || (!_M_losers[__left]._M_sup
&& !_M_comp(_M_losers[__right]._M_key, _M_losers[__left]._M_key))) && !_M_comp(_M_losers[__right]._M_key, _M_losers[__left]._M_key)))
{ {
// Left one is less or equal. // Left one is less or equal.
_M_losers[__root] = _M_losers[__right]; _M_losers[__root] = _M_losers[__right];
...@@ -320,7 +322,8 @@ public: ...@@ -320,7 +322,8 @@ public:
for (unsigned int __pos = (_M_k + _M_source) / 2; __pos > 0; __pos /= 2) for (unsigned int __pos = (_M_k + _M_source) / 2; __pos > 0; __pos /= 2)
{ {
// The smaller one gets promoted. // The smaller one gets promoted.
if (_M_sup || (!_M_losers[__pos]._M_sup && _M_comp(_M_losers[__pos]._M_key, _M_key))) if (_M_sup || (!_M_losers[__pos]._M_sup
&& _M_comp(_M_losers[__pos]._M_key, _M_key)))
{ {
// The other one is smaller. // The other one is smaller.
std::swap(_M_losers[__pos]._M_sup, _M_sup); std::swap(_M_losers[__pos]._M_sup, _M_sup);
...@@ -414,8 +417,9 @@ public: ...@@ -414,8 +417,9 @@ public:
unsigned int __left = __init_winner (2 * __root); unsigned int __left = __init_winner (2 * __root);
unsigned int __right = __init_winner (2 * __root + 1); unsigned int __right = __init_winner (2 * __root + 1);
if (_M_losers[__right]._M_sup if (_M_losers[__right]._M_sup
|| (!_M_losers[__left]._M_sup && !_M_comp(*_M_losers[__right]._M_keyp, || (!_M_losers[__left]._M_sup
*_M_losers[__left]._M_keyp))) && !_M_comp(*_M_losers[__right]._M_keyp,
*_M_losers[__left]._M_keyp)))
{ {
// Left one is less or equal. // Left one is less or equal.
_M_losers[__root] = _M_losers[__right]; _M_losers[__root] = _M_losers[__right];
...@@ -445,7 +449,8 @@ public: ...@@ -445,7 +449,8 @@ public:
for (unsigned int __pos = (_M_k + _M_source) / 2; __pos > 0; __pos /= 2) for (unsigned int __pos = (_M_k + _M_source) / 2; __pos > 0; __pos /= 2)
{ {
// The smaller one gets promoted, ties are broken by _M_source. // The smaller one gets promoted, ties are broken by _M_source.
if ((_M_sup && (!_M_losers[__pos]._M_sup || _M_losers[__pos]._M_source < _M_source)) || if ((_M_sup && (!_M_losers[__pos]._M_sup ||
_M_losers[__pos]._M_source < _M_source)) ||
(!_M_sup && !_M_losers[__pos]._M_sup && (!_M_sup && !_M_losers[__pos]._M_sup &&
((_M_comp(*_M_losers[__pos]._M_keyp, *_M_keyp)) || ((_M_comp(*_M_losers[__pos]._M_keyp, *_M_keyp)) ||
(!_M_comp(*_M_keyp, *_M_losers[__pos]._M_keyp) (!_M_comp(*_M_keyp, *_M_losers[__pos]._M_keyp)
...@@ -495,7 +500,8 @@ public: ...@@ -495,7 +500,8 @@ public:
unsigned int __right = __init_winner (2 * __root + 1); unsigned int __right = __init_winner (2 * __root + 1);
if (_M_losers[__right]._M_sup if (_M_losers[__right]._M_sup
|| (!_M_losers[__left]._M_sup || (!_M_losers[__left]._M_sup
&& !_M_comp(*_M_losers[__right]._M_keyp, *_M_losers[__left]._M_keyp))) && !_M_comp(*_M_losers[__right]._M_keyp,
*_M_losers[__left]._M_keyp)))
{ {
// Left one is less or equal. // Left one is less or equal.
_M_losers[__root] = _M_losers[__right]; _M_losers[__root] = _M_losers[__right];
...@@ -525,7 +531,8 @@ public: ...@@ -525,7 +531,8 @@ public:
for (unsigned int __pos = (_M_k + _M_source) / 2; __pos > 0; __pos /= 2) for (unsigned int __pos = (_M_k + _M_source) / 2; __pos > 0; __pos /= 2)
{ {
// The smaller one gets promoted. // The smaller one gets promoted.
if (_M_sup || (!_M_losers[__pos]._M_sup && _M_comp(*_M_losers[__pos]._M_keyp, *_M_keyp))) if (_M_sup || (!_M_losers[__pos]._M_sup
&& _M_comp(*_M_losers[__pos]._M_keyp, *_M_keyp)))
{ {
// The other one is smaller. // The other one is smaller.
std::swap(_M_losers[__pos]._M_sup, _M_sup); std::swap(_M_losers[__pos]._M_sup, _M_sup);
...@@ -576,7 +583,8 @@ public: ...@@ -576,7 +583,8 @@ public:
_M_k = 1 << (__log2(_M_ik - 1) + 1); _M_k = 1 << (__log2(_M_ik - 1) + 1);
_M_offset = _M_k; _M_offset = _M_k;
// Avoid default-constructing _M_losers[]._M_key // Avoid default-constructing _M_losers[]._M_key
_M_losers = static_cast<_Loser*>(::operator new(2 * _M_k * sizeof(_Loser))); _M_losers
= static_cast<_Loser*>(::operator new(2 * _M_k * sizeof(_Loser)));
for (unsigned int __i = _M_k + _M_ik - 1; __i < (2 * _M_k); ++__i) for (unsigned int __i = _M_k + _M_ik - 1; __i < (2 * _M_k); ++__i)
{ {
...@@ -677,7 +685,8 @@ public: ...@@ -677,7 +685,8 @@ public:
{ {
// The smaller one gets promoted, ties are broken by _M_source. // The smaller one gets promoted, ties are broken by _M_source.
if (_M_comp(_M_losers[__pos]._M_key, _M_key) if (_M_comp(_M_losers[__pos]._M_key, _M_key)
|| (!_M_comp(_M_key, _M_losers[__pos]._M_key) && _M_losers[__pos]._M_source < _M_source)) || (!_M_comp(_M_key, _M_losers[__pos]._M_key)
&& _M_losers[__pos]._M_source < _M_source))
{ {
// The other one is smaller. // The other one is smaller.
std::swap(_M_losers[__pos]._M_source, _M_source); std::swap(_M_losers[__pos]._M_source, _M_source);
...@@ -914,7 +923,8 @@ public: ...@@ -914,7 +923,8 @@ public:
{ {
// The smaller one gets promoted, ties are broken by _M_source. // The smaller one gets promoted, ties are broken by _M_source.
if (_M_comp(*_M_losers[__pos]._M_keyp, *_M_keyp) if (_M_comp(*_M_losers[__pos]._M_keyp, *_M_keyp)
|| (!_M_comp(*_M_keyp, *_M_losers[__pos]._M_keyp) && _M_losers[__pos]._M_source < _M_source)) || (!_M_comp(*_M_keyp, *_M_losers[__pos]._M_keyp)
&& _M_losers[__pos]._M_source < _M_source))
{ {
// The other one is smaller. // The other one is smaller.
std::swap(_M_losers[__pos]._M_source, _M_source); std::swap(_M_losers[__pos]._M_source, _M_source);
......
...@@ -51,36 +51,36 @@ namespace __gnu_parallel ...@@ -51,36 +51,36 @@ namespace __gnu_parallel
* @param __comp Comparator. * @param __comp Comparator.
* @return Output end iterator. */ * @return Output end iterator. */
template<typename _RAIter1, typename _RAIter2, template<typename _RAIter1, typename _RAIter2,
typename _OutputIterator, typename _DifferenceTp, typename _OutputIterator, typename _DifferenceTp,
typename _Compare> typename _Compare>
_OutputIterator _OutputIterator
__merge_advance_usual(_RAIter1& __begin1, __merge_advance_usual(_RAIter1& __begin1,
_RAIter1 __end1, _RAIter1 __end1,
_RAIter2& __begin2, _RAIter2& __begin2,
_RAIter2 __end2, _OutputIterator __target, _RAIter2 __end2, _OutputIterator __target,
_DifferenceTp __max_length, _Compare __comp) _DifferenceTp __max_length, _Compare __comp)
{ {
typedef _DifferenceTp _DifferenceType; typedef _DifferenceTp _DifferenceType;
while (__begin1 != __end1 && __begin2 != __end2 && __max_length > 0) while (__begin1 != __end1 && __begin2 != __end2 && __max_length > 0)
{ {
// array1[__i1] < array0[i0] // array1[__i1] < array0[i0]
if (__comp(*__begin2, *__begin1)) if (__comp(*__begin2, *__begin1))
*__target++ = *__begin2++; *__target++ = *__begin2++;
else else
*__target++ = *__begin1++; *__target++ = *__begin1++;
--__max_length; --__max_length;
} }
if (__begin1 != __end1) if (__begin1 != __end1)
{ {
__target = std::copy(__begin1, __begin1 + __max_length, __target); __target = std::copy(__begin1, __begin1 + __max_length, __target);
__begin1 += __max_length; __begin1 += __max_length;
} }
else else
{ {
__target = std::copy(__begin2, __begin2 + __max_length, __target); __target = std::copy(__begin2, __begin2 + __max_length, __target);
__begin2 += __max_length; __begin2 += __max_length;
} }
return __target; return __target;
} }
...@@ -100,56 +100,56 @@ namespace __gnu_parallel ...@@ -100,56 +100,56 @@ namespace __gnu_parallel
* @param __comp Comparator. * @param __comp Comparator.
* @return Output end iterator. */ * @return Output end iterator. */
template<typename _RAIter1, typename _RAIter2, template<typename _RAIter1, typename _RAIter2,
typename _OutputIterator, typename _DifferenceTp, typename _OutputIterator, typename _DifferenceTp,
typename _Compare> typename _Compare>
_OutputIterator _OutputIterator
__merge_advance_movc(_RAIter1& __begin1, __merge_advance_movc(_RAIter1& __begin1,
_RAIter1 __end1, _RAIter1 __end1,
_RAIter2& __begin2, _RAIter2& __begin2,
_RAIter2 __end2, _RAIter2 __end2,
_OutputIterator __target, _OutputIterator __target,
_DifferenceTp __max_length, _Compare __comp) _DifferenceTp __max_length, _Compare __comp)
{ {
typedef _DifferenceTp _DifferenceType; typedef _DifferenceTp _DifferenceType;
typedef typename std::iterator_traits<_RAIter1>::value_type typedef typename std::iterator_traits<_RAIter1>::value_type
value_type1; value_type1;
typedef typename std::iterator_traits<_RAIter2>::value_type typedef typename std::iterator_traits<_RAIter2>::value_type
value_type2; value_type2;
#if _GLIBCXX_ASSERTIONS #if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(__max_length >= 0); _GLIBCXX_PARALLEL_ASSERT(__max_length >= 0);
#endif #endif
while (__begin1 != __end1 && __begin2 != __end2 && __max_length > 0) while (__begin1 != __end1 && __begin2 != __end2 && __max_length > 0)
{ {
_RAIter1 __next1 = __begin1 + 1; _RAIter1 __next1 = __begin1 + 1;
_RAIter2 __next2 = __begin2 + 1; _RAIter2 __next2 = __begin2 + 1;
value_type1 __element1 = *__begin1; value_type1 __element1 = *__begin1;
value_type2 __element2 = *__begin2; value_type2 __element2 = *__begin2;
if (__comp(__element2, __element1)) if (__comp(__element2, __element1))
{ {
__element1 = __element2; __element1 = __element2;
__begin2 = __next2; __begin2 = __next2;
} }
else else
__begin1 = __next1; __begin1 = __next1;
*__target = __element1; *__target = __element1;
++__target; ++__target;
--__max_length; --__max_length;
} }
if (__begin1 != __end1) if (__begin1 != __end1)
{ {
__target = std::copy(__begin1, __begin1 + __max_length, __target); __target = std::copy(__begin1, __begin1 + __max_length, __target);
__begin1 += __max_length; __begin1 += __max_length;
} }
else else
{ {
__target = std::copy(__begin2, __begin2 + __max_length, __target); __target = std::copy(__begin2, __begin2 + __max_length, __target);
__begin2 += __max_length; __begin2 += __max_length;
} }
return __target; return __target;
} }
...@@ -168,18 +168,18 @@ namespace __gnu_parallel ...@@ -168,18 +168,18 @@ namespace __gnu_parallel
* @param __comp Comparator. * @param __comp Comparator.
* @return Output end iterator. */ * @return Output end iterator. */
template<typename _RAIter1, typename _RAIter2, template<typename _RAIter1, typename _RAIter2,
typename _OutputIterator, typename _DifferenceTp, typename _OutputIterator, typename _DifferenceTp,
typename _Compare> typename _Compare>
inline _OutputIterator inline _OutputIterator
__merge_advance(_RAIter1& __begin1, _RAIter1 __end1, __merge_advance(_RAIter1& __begin1, _RAIter1 __end1,
_RAIter2& __begin2, _RAIter2 __end2, _RAIter2& __begin2, _RAIter2 __end2,
_OutputIterator __target, _DifferenceTp __max_length, _OutputIterator __target, _DifferenceTp __max_length,
_Compare __comp) _Compare __comp)
{ {
_GLIBCXX_CALL(__max_length) _GLIBCXX_CALL(__max_length)
return __merge_advance_movc(__begin1, __end1, __begin2, __end2, __target, return __merge_advance_movc(__begin1, __end1, __begin2, __end2, __target,
__max_length, __comp); __max_length, __comp);
} }
/** @brief Merge routine fallback to sequential in case the /** @brief Merge routine fallback to sequential in case the
...@@ -193,19 +193,19 @@ namespace __gnu_parallel ...@@ -193,19 +193,19 @@ namespace __gnu_parallel
* @param __comp Comparator. * @param __comp Comparator.
* @return Output end iterator. */ * @return Output end iterator. */
template<typename _RAIter1, typename _RAIter2, template<typename _RAIter1, typename _RAIter2,
typename _RAIter3, typename _Compare> typename _RAIter3, typename _Compare>
inline _RAIter3 inline _RAIter3
__parallel_merge_advance(_RAIter1& __begin1, __parallel_merge_advance(_RAIter1& __begin1,
_RAIter1 __end1, _RAIter1 __end1,
_RAIter2& __begin2, _RAIter2& __begin2,
// different iterators, parallel implementation // different iterators, parallel implementation
// not available // not available
_RAIter2 __end2, _RAIter2 __end2,
_RAIter3 __target, typename _RAIter3 __target, typename
std::iterator_traits<_RAIter1>:: std::iterator_traits<_RAIter1>::
difference_type __max_length, _Compare __comp) difference_type __max_length, _Compare __comp)
{ return __merge_advance(__begin1, __end1, __begin2, __end2, __target, { return __merge_advance(__begin1, __end1, __begin2, __end2, __target,
__max_length, __comp); } __max_length, __comp); }
/** @brief Parallel merge routine being able to merge only the @__c /** @brief Parallel merge routine being able to merge only the @__c
* __max_length smallest elements. * __max_length smallest elements.
...@@ -223,28 +223,28 @@ namespace __gnu_parallel ...@@ -223,28 +223,28 @@ namespace __gnu_parallel
* @return Output end iterator. * @return Output end iterator.
*/ */
template<typename _RAIter1, typename _RAIter3, template<typename _RAIter1, typename _RAIter3,
typename _Compare> typename _Compare>
inline _RAIter3 inline _RAIter3
__parallel_merge_advance(_RAIter1& __begin1, __parallel_merge_advance(_RAIter1& __begin1,
_RAIter1 __end1, _RAIter1 __end1,
_RAIter1& __begin2, _RAIter1& __begin2,
_RAIter1 __end2, _RAIter1 __end2,
_RAIter3 __target, typename _RAIter3 __target, typename
std::iterator_traits<_RAIter1>:: std::iterator_traits<_RAIter1>::
difference_type __max_length, _Compare __comp) difference_type __max_length, _Compare __comp)
{ {
typedef typename typedef typename
std::iterator_traits<_RAIter1>::value_type _ValueType; std::iterator_traits<_RAIter1>::value_type _ValueType;
typedef typename std::iterator_traits<_RAIter1>:: typedef typename std::iterator_traits<_RAIter1>::
difference_type _DifferenceType1 /* == difference_type2 */; difference_type _DifferenceType1 /* == difference_type2 */;
typedef typename std::iterator_traits<_RAIter3>:: typedef typename std::iterator_traits<_RAIter3>::
difference_type _DifferenceType3; difference_type _DifferenceType3;
typedef typename std::pair<_RAIter1, _RAIter1> typedef typename std::pair<_RAIter1, _RAIter1>
_IteratorPair; _IteratorPair;
_IteratorPair _IteratorPair
seqs[2] = { std::make_pair(__begin1, __end1), seqs[2] = { std::make_pair(__begin1, __end1),
std::make_pair(__begin2, __end2) }; std::make_pair(__begin2, __end2) };
_RAIter3 _RAIter3
__target_end = parallel_multiway_merge __target_end = parallel_multiway_merge
< /* __stable = */ true, /* __sentinels = */ false>( < /* __stable = */ true, /* __sentinels = */ false>(
...@@ -256,6 +256,6 @@ namespace __gnu_parallel ...@@ -256,6 +256,6 @@ namespace __gnu_parallel
return __target_end; return __target_end;
} }
} //namespace __gnu_parallel } //namespace __gnu_parallel
#endif /* _GLIBCXX_PARALLEL_MERGE_H */ #endif /* _GLIBCXX_PARALLEL_MERGE_H */
...@@ -111,7 +111,7 @@ template<typename _RAIter, typename _DifferenceTp> ...@@ -111,7 +111,7 @@ template<typename _RAIter, typename _DifferenceTp>
for (_DifferenceType __i = 0; __i < __num_samples; ++__i) for (_DifferenceType __i = 0; __i < __num_samples; ++__i)
::new(&(__sd->_M_samples[__iam * __num_samples + __i])) ::new(&(__sd->_M_samples[__iam * __num_samples + __i]))
_ValueType(__sd->_M_source[__sd->_M_starts[__iam] + __es[__i + 1]]); _ValueType(__sd->_M_source[__sd->_M_starts[__iam] + __es[__i + 1]]);
delete[] __es; delete[] __es;
} }
...@@ -144,25 +144,28 @@ template<typename _RAIter, typename _Compare, ...@@ -144,25 +144,28 @@ template<typename _RAIter, typename _Compare,
seqs(__sd->_M_num_threads); seqs(__sd->_M_num_threads);
for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++) for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++)
seqs[__s] = std::make_pair(__sd->_M_temporary[__s], seqs[__s] = std::make_pair(__sd->_M_temporary[__s],
__sd->_M_temporary[__s] __sd->_M_temporary[__s]
+ (__sd->_M_starts[__s + 1] - __sd->_M_starts[__s])); + (__sd->_M_starts[__s + 1]
- __sd->_M_starts[__s]));
std::vector<_SortingPlacesIterator> _M_offsets(__sd->_M_num_threads); std::vector<_SortingPlacesIterator> _M_offsets(__sd->_M_num_threads);
// if not last thread // if not last thread
if (__iam < __sd->_M_num_threads - 1) if (__iam < __sd->_M_num_threads - 1)
multiseq_partition(seqs.begin(), seqs.end(), multiseq_partition(seqs.begin(), seqs.end(),
__sd->_M_starts[__iam + 1], _M_offsets.begin(), __comp); __sd->_M_starts[__iam + 1], _M_offsets.begin(),
__comp);
for (int __seq = 0; __seq < __sd->_M_num_threads; __seq++) for (int __seq = 0; __seq < __sd->_M_num_threads; __seq++)
{ {
// for each sequence // for each sequence
if (__iam < (__sd->_M_num_threads - 1)) if (__iam < (__sd->_M_num_threads - 1))
__sd->_M_pieces[__iam][__seq]._M_end = _M_offsets[__seq] - seqs[__seq].first; __sd->_M_pieces[__iam][__seq]._M_end
= _M_offsets[__seq] - seqs[__seq].first;
else else
// very end of this sequence // very end of this sequence
__sd->_M_pieces[__iam][__seq]._M_end = __sd->_M_pieces[__iam][__seq]._M_end =
__sd->_M_starts[__seq + 1] - __sd->_M_starts[__seq]; __sd->_M_starts[__seq + 1] - __sd->_M_starts[__seq];
} }
# pragma omp barrier # pragma omp barrier
...@@ -171,7 +174,8 @@ template<typename _RAIter, typename _Compare, ...@@ -171,7 +174,8 @@ template<typename _RAIter, typename _Compare,
{ {
// For each sequence. // For each sequence.
if (__iam > 0) if (__iam > 0)
__sd->_M_pieces[__iam][__seq]._M_begin = __sd->_M_pieces[__iam - 1][__seq]._M_end; __sd->_M_pieces[__iam][__seq]._M_begin =
__sd->_M_pieces[__iam - 1][__seq]._M_end;
else else
// Absolute beginning. // Absolute beginning.
__sd->_M_pieces[__iam][__seq]._M_begin = 0; __sd->_M_pieces[__iam][__seq]._M_begin = 0;
...@@ -204,7 +208,8 @@ template<typename _RAIter, typename _Compare, ...@@ -204,7 +208,8 @@ template<typename _RAIter, typename _Compare,
# pragma omp single # pragma omp single
__gnu_sequential::sort(__sd->_M_samples, __gnu_sequential::sort(__sd->_M_samples,
__sd->_M_samples + (__num_samples * __sd->_M_num_threads), __sd->_M_samples
+ (__num_samples * __sd->_M_num_threads),
__comp); __comp);
# pragma omp barrier # pragma omp barrier
...@@ -224,17 +229,19 @@ template<typename _RAIter, typename _Compare, ...@@ -224,17 +229,19 @@ template<typename _RAIter, typename _Compare,
// Absolute beginning. // Absolute beginning.
__sd->_M_pieces[__iam][__s]._M_begin = 0; __sd->_M_pieces[__iam][__s]._M_begin = 0;
if ((__num_samples * (__iam + 1)) < (__num_samples * __sd->_M_num_threads)) if ((__num_samples * (__iam + 1)) <
(__num_samples * __sd->_M_num_threads))
__sd->_M_pieces[__iam][__s]._M_end = __sd->_M_pieces[__iam][__s]._M_end =
std::lower_bound(__sd->_M_temporary[__s], std::lower_bound(__sd->_M_temporary[__s],
__sd->_M_temporary[__s] __sd->_M_temporary[__s]
+ (__sd->_M_starts[__s + 1] - __sd->_M_starts[__s]), + (__sd->_M_starts[__s + 1] - __sd->_M_starts[__s]),
__sd->_M_samples[__num_samples * (__iam + 1)], __sd->_M_samples[__num_samples * (__iam + 1)],
__comp) __comp)
- __sd->_M_temporary[__s]; - __sd->_M_temporary[__s];
else else
// Absolute end. // Absolute end.
__sd->_M_pieces[__iam][__s]._M_end = __sd->_M_starts[__s + 1] - __sd->_M_starts[__s]; __sd->_M_pieces[__iam][__s]._M_end = __sd->_M_starts[__s + 1]
- __sd->_M_starts[__s];
} }
} }
}; };
...@@ -283,8 +290,8 @@ template<typename Seq_RAIter, typename _RAIter, ...@@ -283,8 +290,8 @@ template<typename Seq_RAIter, typename _RAIter,
_Compare& __comp, _Compare& __comp,
DiffType __length_am) const DiffType __length_am) const
{ {
stable_multiway_merge(__seqs_begin, __seqs_end, __target, __length_am, __comp, stable_multiway_merge(__seqs_begin, __seqs_end, __target, __length_am,
sequential_tag()); __comp, sequential_tag());
} }
}; };
...@@ -322,7 +329,8 @@ template<bool __stable, bool __exact, typename _RAIter, ...@@ -322,7 +329,8 @@ template<bool __stable, bool __exact, typename _RAIter,
_ThreadIndex __iam = omp_get_thread_num(); _ThreadIndex __iam = omp_get_thread_num();
// Length of this thread's chunk, before merging. // Length of this thread's chunk, before merging.
_DifferenceType __length_local = __sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam]; _DifferenceType __length_local
= __sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam];
// Sort in temporary storage, leave space for sentinel. // Sort in temporary storage, leave space for sentinel.
...@@ -333,12 +341,15 @@ template<bool __stable, bool __exact, typename _RAIter, ...@@ -333,12 +341,15 @@ template<bool __stable, bool __exact, typename _RAIter,
::operator new(sizeof(_ValueType) * (__length_local + 1))); ::operator new(sizeof(_ValueType) * (__length_local + 1)));
// Copy there. // Copy there.
std::uninitialized_copy(__sd->_M_source + __sd->_M_starts[__iam], std::uninitialized_copy(
__sd->_M_source + __sd->_M_starts[__iam] + __length_local, __sd->_M_source + __sd->_M_starts[__iam],
__sd->_M_temporary[__iam]); __sd->_M_source + __sd->_M_starts[__iam] + __length_local,
__sd->_M_temporary[__iam]);
__possibly_stable_sort<__stable, _SortingPlacesIterator, _Compare>() __possibly_stable_sort<__stable, _SortingPlacesIterator, _Compare>()
(__sd->_M_temporary[__iam], __sd->_M_temporary[__iam] + __length_local, __comp); (__sd->_M_temporary[__iam],
__sd->_M_temporary[__iam] + __length_local,
__comp);
// Invariant: locally sorted subsequence in sd->_M_temporary[__iam], // Invariant: locally sorted subsequence in sd->_M_temporary[__iam],
// __sd->_M_temporary[__iam] + __length_local. // __sd->_M_temporary[__iam] + __length_local.
...@@ -355,7 +366,8 @@ template<bool __stable, bool __exact, typename _RAIter, ...@@ -355,7 +366,8 @@ template<bool __stable, bool __exact, typename _RAIter,
_DifferenceType __offset = 0, __length_am = 0; _DifferenceType __offset = 0, __length_am = 0;
for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++) for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++)
{ {
__length_am += __sd->_M_pieces[__iam][__s]._M_end - __sd->_M_pieces[__iam][__s]._M_begin; __length_am += __sd->_M_pieces[__iam][__s]._M_end
- __sd->_M_pieces[__iam][__s]._M_begin;
__offset += __sd->_M_pieces[__iam][__s]._M_begin; __offset += __sd->_M_pieces[__iam][__s]._M_begin;
} }
...@@ -367,8 +379,9 @@ template<bool __stable, bool __exact, typename _RAIter, ...@@ -367,8 +379,9 @@ template<bool __stable, bool __exact, typename _RAIter,
for (int __s = 0; __s < __sd->_M_num_threads; ++__s) for (int __s = 0; __s < __sd->_M_num_threads; ++__s)
{ {
seqs[__s] = seqs[__s] =
std::make_pair(__sd->_M_temporary[__s] + __sd->_M_pieces[__iam][__s]._M_begin, std::make_pair(
__sd->_M_temporary[__s] + __sd->_M_pieces[__iam][__s]._M_end); __sd->_M_temporary[__s] + __sd->_M_pieces[__iam][__s]._M_begin,
__sd->_M_temporary[__s] + __sd->_M_pieces[__iam][__s]._M_end);
} }
__possibly_stable_multiway_merge< __possibly_stable_multiway_merge<
...@@ -420,7 +433,7 @@ template<bool __stable, bool __exact, typename _RAIter, ...@@ -420,7 +433,7 @@ template<bool __stable, bool __exact, typename _RAIter,
# pragma omp parallel num_threads(__num_threads) # pragma omp parallel num_threads(__num_threads)
{ {
__num_threads = omp_get_num_threads(); //no more threads than requested __num_threads = omp_get_num_threads(); //no more threads than requested
# pragma omp single # pragma omp single
{ {
...@@ -432,7 +445,7 @@ template<bool __stable, bool __exact, typename _RAIter, ...@@ -432,7 +445,7 @@ template<bool __stable, bool __exact, typename _RAIter,
if (!__exact) if (!__exact)
{ {
_DifferenceType size = _DifferenceType size =
(_Settings::get().sort_mwms_oversampling * __num_threads - 1) (_Settings::get().sort_mwms_oversampling * __num_threads - 1)
* __num_threads; * __num_threads;
__sd._M_samples = static_cast<_ValueType*>( __sd._M_samples = static_cast<_ValueType*>(
::operator new(size * sizeof(_ValueType))); ::operator new(size * sizeof(_ValueType)));
...@@ -441,10 +454,12 @@ template<bool __stable, bool __exact, typename _RAIter, ...@@ -441,10 +454,12 @@ template<bool __stable, bool __exact, typename _RAIter,
__sd._M_samples = NULL; __sd._M_samples = NULL;
__sd._M_offsets = new _DifferenceType[__num_threads - 1]; __sd._M_offsets = new _DifferenceType[__num_threads - 1];
__sd._M_pieces = new std::vector<_Piece<_DifferenceType> >[__num_threads]; __sd._M_pieces
= new std::vector<_Piece<_DifferenceType> >[__num_threads];
for (int __s = 0; __s < __num_threads; ++__s) for (int __s = 0; __s < __num_threads; ++__s)
__sd._M_pieces[__s].resize(__num_threads); __sd._M_pieces[__s].resize(__num_threads);
_M_starts = __sd._M_starts = new _DifferenceType[__num_threads + 1]; _M_starts = __sd._M_starts
= new _DifferenceType[__num_threads + 1];
_DifferenceType __chunk_length = __n / __num_threads; _DifferenceType __chunk_length = __n / __num_threads;
_DifferenceType __split = __n % __num_threads; _DifferenceType __split = __n % __num_threads;
...@@ -452,7 +467,8 @@ template<bool __stable, bool __exact, typename _RAIter, ...@@ -452,7 +467,8 @@ template<bool __stable, bool __exact, typename _RAIter,
for (int __i = 0; __i < __num_threads; ++__i) for (int __i = 0; __i < __num_threads; ++__i)
{ {
_M_starts[__i] = __pos; _M_starts[__i] = __pos;
__pos += (__i < __split) ? (__chunk_length + 1) : __chunk_length; __pos += (__i < __split)
? (__chunk_length + 1) : __chunk_length;
} }
_M_starts[__num_threads] = __pos; _M_starts[__num_threads] = __pos;
} //single } //single
......
...@@ -61,24 +61,24 @@ namespace __parallel ...@@ -61,24 +61,24 @@ namespace __parallel
template<typename _IIter, typename _Tp, typename _BinaryOper> template<typename _IIter, typename _Tp, typename _BinaryOper>
_Tp _Tp
accumulate(_IIter, _IIter, _Tp, _BinaryOper, accumulate(_IIter, _IIter, _Tp, _BinaryOper,
__gnu_parallel::sequential_tag); __gnu_parallel::sequential_tag);
template<typename _IIter, typename _Tp, typename _BinaryOper> template<typename _IIter, typename _Tp, typename _BinaryOper>
_Tp _Tp
accumulate(_IIter, _IIter, _Tp, _BinaryOper, accumulate(_IIter, _IIter, _Tp, _BinaryOper,
__gnu_parallel::_Parallelism); __gnu_parallel::_Parallelism);
template<typename _IIter, typename _Tp, typename _BinaryOper, template<typename _IIter, typename _Tp, typename _BinaryOper,
typename _Tag> typename _Tag>
_Tp _Tp
__accumulate_switch(_IIter, _IIter, _Tp, _BinaryOper, _Tag); __accumulate_switch(_IIter, _IIter, _Tp, _BinaryOper, _Tag);
template<typename _RAIter, typename _Tp, typename _BinaryOper> template<typename _RAIter, typename _Tp, typename _BinaryOper>
_Tp _Tp
__accumulate_switch(_RAIter, _RAIter, _Tp, _BinaryOper, __accumulate_switch(_RAIter, _RAIter, _Tp, _BinaryOper,
random_access_iterator_tag, random_access_iterator_tag,
__gnu_parallel::_Parallelism __parallelism __gnu_parallel::_Parallelism __parallelism
= __gnu_parallel::parallel_unbalanced); = __gnu_parallel::parallel_unbalanced);
template<typename _IIter, typename _OIter> template<typename _IIter, typename _OIter>
_OIter _OIter
...@@ -91,36 +91,36 @@ namespace __parallel ...@@ -91,36 +91,36 @@ namespace __parallel
template<typename _IIter, typename _OIter> template<typename _IIter, typename _OIter>
_OIter _OIter
adjacent_difference(_IIter, _IIter, _OIter, adjacent_difference(_IIter, _IIter, _OIter,
__gnu_parallel::sequential_tag); __gnu_parallel::sequential_tag);
template<typename _IIter, typename _OIter, typename _BinaryOper> template<typename _IIter, typename _OIter, typename _BinaryOper>
_OIter _OIter
adjacent_difference(_IIter, _IIter, _OIter, _BinaryOper, adjacent_difference(_IIter, _IIter, _OIter, _BinaryOper,
__gnu_parallel::sequential_tag); __gnu_parallel::sequential_tag);
template<typename _IIter, typename _OIter> template<typename _IIter, typename _OIter>
_OIter _OIter
adjacent_difference(_IIter, _IIter, _OIter, adjacent_difference(_IIter, _IIter, _OIter,
__gnu_parallel::_Parallelism); __gnu_parallel::_Parallelism);
template<typename _IIter, typename _OIter, typename _BinaryOper> template<typename _IIter, typename _OIter, typename _BinaryOper>
_OIter _OIter
adjacent_difference(_IIter, _IIter, _OIter, _BinaryOper, adjacent_difference(_IIter, _IIter, _OIter, _BinaryOper,
__gnu_parallel::_Parallelism); __gnu_parallel::_Parallelism);
template<typename _IIter, typename _OIter, typename _BinaryOper, template<typename _IIter, typename _OIter, typename _BinaryOper,
typename _Tag1, typename _Tag2> typename _Tag1, typename _Tag2>
_OIter _OIter
__adjacent_difference_switch(_IIter, _IIter, _OIter, _BinaryOper, __adjacent_difference_switch(_IIter, _IIter, _OIter, _BinaryOper,
_Tag1, _Tag2); _Tag1, _Tag2);
template<typename _IIter, typename _OIter, typename _BinaryOper> template<typename _IIter, typename _OIter, typename _BinaryOper>
_OIter _OIter
__adjacent_difference_switch(_IIter, _IIter, _OIter, _BinaryOper, __adjacent_difference_switch(_IIter, _IIter, _OIter, _BinaryOper,
random_access_iterator_tag, random_access_iterator_tag,
random_access_iterator_tag, random_access_iterator_tag,
__gnu_parallel::_Parallelism __parallelism __gnu_parallel::_Parallelism __parallelism
= __gnu_parallel::parallel_unbalanced); = __gnu_parallel::parallel_unbalanced);
template<typename _IIter1, typename _IIter2, typename _Tp> template<typename _IIter1, typename _IIter2, typename _Tp>
_Tp _Tp
...@@ -129,46 +129,46 @@ namespace __parallel ...@@ -129,46 +129,46 @@ namespace __parallel
template<typename _IIter1, typename _IIter2, typename _Tp> template<typename _IIter1, typename _IIter2, typename _Tp>
_Tp _Tp
inner_product(_IIter1, _IIter1, _IIter2, _Tp, inner_product(_IIter1, _IIter1, _IIter2, _Tp,
__gnu_parallel::sequential_tag); __gnu_parallel::sequential_tag);
template<typename _IIter1, typename _IIter2, typename _Tp> template<typename _IIter1, typename _IIter2, typename _Tp>
_Tp _Tp
inner_product(_IIter1, _IIter1, _IIter2, _Tp, inner_product(_IIter1, _IIter1, _IIter2, _Tp,
__gnu_parallel::_Parallelism); __gnu_parallel::_Parallelism);
template<typename _IIter1, typename _IIter2, typename _Tp, template<typename _IIter1, typename _IIter2, typename _Tp,
typename _BinaryFunction1, typename _BinaryFunction2> typename _BinaryFunction1, typename _BinaryFunction2>
_Tp _Tp
inner_product(_IIter1, _IIter1, _IIter2, _Tp, inner_product(_IIter1, _IIter1, _IIter2, _Tp,
_BinaryFunction1, _BinaryFunction2); _BinaryFunction1, _BinaryFunction2);
template<typename _IIter1, typename _IIter2, typename _Tp, template<typename _IIter1, typename _IIter2, typename _Tp,
typename _BinaryFunction1, typename _BinaryFunction2> typename _BinaryFunction1, typename _BinaryFunction2>
_Tp _Tp
inner_product(_IIter1, _IIter1, _IIter2, _Tp, _BinaryFunction1, inner_product(_IIter1, _IIter1, _IIter2, _Tp, _BinaryFunction1,
_BinaryFunction2, __gnu_parallel::sequential_tag); _BinaryFunction2, __gnu_parallel::sequential_tag);
template<typename _IIter1, typename _IIter2, typename _Tp, template<typename _IIter1, typename _IIter2, typename _Tp,
typename BinaryFunction1, typename BinaryFunction2> typename BinaryFunction1, typename BinaryFunction2>
_Tp _Tp
inner_product(_IIter1, _IIter1, _IIter2, _Tp, BinaryFunction1, inner_product(_IIter1, _IIter1, _IIter2, _Tp, BinaryFunction1,
BinaryFunction2, __gnu_parallel::_Parallelism); BinaryFunction2, __gnu_parallel::_Parallelism);
template<typename _RAIter1, typename _RAIter2, typename _Tp, template<typename _RAIter1, typename _RAIter2, typename _Tp,
typename BinaryFunction1, typename BinaryFunction2> typename BinaryFunction1, typename BinaryFunction2>
_Tp _Tp
__inner_product_switch(_RAIter1, _RAIter1, _RAIter2, _Tp, BinaryFunction1, __inner_product_switch(_RAIter1, _RAIter1, _RAIter2, _Tp, BinaryFunction1,
BinaryFunction2, random_access_iterator_tag, BinaryFunction2, random_access_iterator_tag,
random_access_iterator_tag, random_access_iterator_tag,
__gnu_parallel::_Parallelism __gnu_parallel::_Parallelism
= __gnu_parallel::parallel_unbalanced); = __gnu_parallel::parallel_unbalanced);
template<typename _IIter1, typename _IIter2, typename _Tp, template<typename _IIter1, typename _IIter2, typename _Tp,
typename _BinaryFunction1, typename _BinaryFunction2, typename _BinaryFunction1, typename _BinaryFunction2,
typename _Tag1, typename _Tag2> typename _Tag1, typename _Tag2>
_Tp _Tp
__inner_product_switch(_IIter1, _IIter1, _IIter2, _Tp, _BinaryFunction1, __inner_product_switch(_IIter1, _IIter1, _IIter2, _Tp, _BinaryFunction1,
_BinaryFunction2, _Tag1, _Tag2); _BinaryFunction2, _Tag1, _Tag2);
template<typename _IIter, typename _OIter> template<typename _IIter, typename _OIter>
...@@ -178,7 +178,7 @@ namespace __parallel ...@@ -178,7 +178,7 @@ namespace __parallel
template<typename _IIter, typename _OIter, typename _BinaryOper> template<typename _IIter, typename _OIter, typename _BinaryOper>
_OIter _OIter
partial_sum(_IIter, _IIter, _OIter, _BinaryOper, partial_sum(_IIter, _IIter, _OIter, _BinaryOper,
__gnu_parallel::sequential_tag); __gnu_parallel::sequential_tag);
template<typename _IIter, typename _OIter> template<typename _IIter, typename _OIter>
_OIter _OIter
...@@ -189,14 +189,14 @@ namespace __parallel ...@@ -189,14 +189,14 @@ namespace __parallel
partial_sum(_IIter, _IIter, _OIter, _BinaryOper); partial_sum(_IIter, _IIter, _OIter, _BinaryOper);
template<typename _IIter, typename _OIter, typename _BinaryOper, template<typename _IIter, typename _OIter, typename _BinaryOper,
typename _Tag1, typename _Tag2> typename _Tag1, typename _Tag2>
_OIter _OIter
__partial_sum_switch(_IIter, _IIter, _OIter, _BinaryOper, _Tag1, _Tag2); __partial_sum_switch(_IIter, _IIter, _OIter, _BinaryOper, _Tag1, _Tag2);
template<typename _IIter, typename _OIter, typename _BinaryOper> template<typename _IIter, typename _OIter, typename _BinaryOper>
_OIter _OIter
__partial_sum_switch(_IIter, _IIter, _OIter, _BinaryOper, __partial_sum_switch(_IIter, _IIter, _OIter, _BinaryOper,
random_access_iterator_tag, random_access_iterator_tag); random_access_iterator_tag, random_access_iterator_tag);
} // end namespace } // end namespace
} // end namespace } // end namespace
......
...@@ -59,18 +59,15 @@ namespace __gnu_parallel ...@@ -59,18 +59,15 @@ namespace __gnu_parallel
* @return User-supplied functor (that may contain a part of the result). * @return User-supplied functor (that may contain a part of the result).
*/ */
template<typename _RAIter, template<typename _RAIter,
typename _Op, typename _Op,
typename _Fu, typename _Fu,
typename _Red, typename _Red,
typename _Result> typename _Result>
_Op _Op
for_each_template_random_access_omp_loop(_RAIter __begin, for_each_template_random_access_omp_loop(
_RAIter __end, _RAIter __begin, _RAIter __end, _Op __o, _Fu& __f, _Red __r,
_Op __o, _Fu& __f, _Red __r, _Result __base, _Result __base, _Result& __output,
_Result& __output, typename std::iterator_traits<_RAIter>::difference_type __bound)
typename std::iterator_traits
<_RAIter>::
difference_type __bound)
{ {
typedef typename typedef typename
std::iterator_traits<_RAIter>::difference_type std::iterator_traits<_RAIter>::difference_type
...@@ -95,7 +92,7 @@ template<typename _RAIter, ...@@ -95,7 +92,7 @@ template<typename _RAIter,
_ThreadIndex __iam = omp_get_thread_num(); _ThreadIndex __iam = omp_get_thread_num();
# pragma omp for schedule(dynamic, _Settings::get().workstealing_chunk_size) #pragma omp for schedule(dynamic, _Settings::get().workstealing_chunk_size)
for (_DifferenceType __pos = 0; __pos < __length; ++__pos) for (_DifferenceType __pos = 0; __pos < __length; ++__pos)
__thread_results[__iam] = __thread_results[__iam] =
__r(__thread_results[__iam], __f(__o, __begin+__pos)); __r(__thread_results[__iam], __f(__o, __begin+__pos));
......
...@@ -59,18 +59,15 @@ namespace __gnu_parallel ...@@ -59,18 +59,15 @@ namespace __gnu_parallel
* @return User-supplied functor (that may contain a part of the result). * @return User-supplied functor (that may contain a part of the result).
*/ */
template<typename _RAIter, template<typename _RAIter,
typename _Op, typename _Op,
typename _Fu, typename _Fu,
typename _Red, typename _Red,
typename _Result> typename _Result>
_Op _Op
for_each_template_random_access_omp_loop_static(_RAIter __begin, for_each_template_random_access_omp_loop_static(
_RAIter __end, _RAIter __begin, _RAIter __end, _Op __o, _Fu& __f, _Red __r,
_Op __o, _Fu& __f, _Red __r, _Result __base, _Result& __output,
_Result __base, _Result& __output, typename std::iterator_traits<_RAIter>::difference_type __bound)
typename std::iterator_traits
<_RAIter>::
difference_type __bound)
{ {
typedef typename typedef typename
std::iterator_traits<_RAIter>::difference_type std::iterator_traits<_RAIter>::difference_type
...@@ -95,9 +92,10 @@ template<typename _RAIter, ...@@ -95,9 +92,10 @@ template<typename _RAIter,
_ThreadIndex __iam = omp_get_thread_num(); _ThreadIndex __iam = omp_get_thread_num();
# pragma omp for schedule(static, _Settings::get().workstealing_chunk_size) #pragma omp for schedule(static, _Settings::get().workstealing_chunk_size)
for (_DifferenceType __pos = 0; __pos < __length; ++__pos) for (_DifferenceType __pos = 0; __pos < __length; ++__pos)
__thread_results[__iam] = __r(__thread_results[__iam], __f(__o, __begin+__pos)); __thread_results[__iam] = __r(__thread_results[__iam],
__f(__o, __begin+__pos));
} //parallel } //parallel
for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
......
...@@ -60,18 +60,15 @@ namespace __gnu_parallel ...@@ -60,18 +60,15 @@ namespace __gnu_parallel
* @return User-supplied functor (that may contain a part of the result). * @return User-supplied functor (that may contain a part of the result).
*/ */
template<typename _RAIter, template<typename _RAIter,
typename _Op, typename _Op,
typename _Fu, typename _Fu,
typename _Red, typename _Red,
typename _Result> typename _Result>
_Op _Op
for_each_template_random_access_ed(_RAIter __begin, for_each_template_random_access_ed(
_RAIter __end, _RAIter __begin, _RAIter __end, _Op __o, _Fu& __f, _Red __r,
_Op __o, _Fu& __f, _Red __r, _Result __base, _Result __base, _Result& __output,
_Result& __output, typename std::iterator_traits<_RAIter>::difference_type __bound)
typename std::iterator_traits
<_RAIter>::
difference_type __bound)
{ {
typedef std::iterator_traits<_RAIter> _TraitsType; typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::difference_type _DifferenceType; typedef typename _TraitsType::difference_type _DifferenceType;
...@@ -87,15 +84,17 @@ template<typename _RAIter, ...@@ -87,15 +84,17 @@ template<typename _RAIter,
# pragma omp single # pragma omp single
{ {
__num_threads = omp_get_num_threads(); __num_threads = omp_get_num_threads();
__thread_results = static_cast<_Result*>( __thread_results =
::operator new(__num_threads * sizeof(_Result))); static_cast<_Result*>(
::operator new(__num_threads * sizeof(_Result)));
__constructed = new bool[__num_threads]; __constructed = new bool[__num_threads];
} }
_ThreadIndex __iam = omp_get_thread_num(); _ThreadIndex __iam = omp_get_thread_num();
// Neutral element. // Neutral element.
_Result* __reduct = static_cast<_Result*>(::operator new(sizeof(_Result))); _Result* __reduct =
static_cast<_Result*>(::operator new(sizeof(_Result)));
_DifferenceType _DifferenceType
__start = equally_split_point(__length, __num_threads, __iam), __start = equally_split_point(__length, __num_threads, __iam),
......
...@@ -52,13 +52,13 @@ namespace __gnu_parallel ...@@ -52,13 +52,13 @@ namespace __gnu_parallel
* element is unknown in general. * element is unknown in general.
* @return End iterator of output sequence. */ * @return End iterator of output sequence. */
template<typename _IIter, template<typename _IIter,
typename _OutputIterator, typename _OutputIterator,
typename _BinaryOperation> typename _BinaryOperation>
_OutputIterator _OutputIterator
__parallel_partial_sum_basecase(_IIter __begin, _IIter __end, __parallel_partial_sum_basecase(
_OutputIterator __result, _BinaryOperation __bin_op, _IIter __begin, _IIter __end, _OutputIterator __result,
typename std::iterator_traits _BinaryOperation __bin_op,
<_IIter>::value_type __value) typename std::iterator_traits <_IIter>::value_type __value)
{ {
if (__begin == __end) if (__begin == __end)
return __result; return __result;
...@@ -84,13 +84,13 @@ template<typename _IIter, ...@@ -84,13 +84,13 @@ template<typename _IIter,
* @return End iterator of output sequence. * @return End iterator of output sequence.
*/ */
template<typename _IIter, template<typename _IIter,
typename _OutputIterator, typename _OutputIterator,
typename _BinaryOperation> typename _BinaryOperation>
_OutputIterator _OutputIterator
__parallel_partial_sum_linear(_IIter __begin, _IIter __end, __parallel_partial_sum_linear(
_OutputIterator __result, _BinaryOperation __bin_op, _IIter __begin, _IIter __end, _OutputIterator __result,
typename std::iterator_traits _BinaryOperation __bin_op,
<_IIter>::difference_type __n) typename std::iterator_traits<_IIter>::difference_type __n)
{ {
typedef std::iterator_traits<_IIter> _TraitsType; typedef std::iterator_traits<_IIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType; typedef typename _TraitsType::value_type _ValueType;
...@@ -128,8 +128,8 @@ template<typename _IIter, ...@@ -128,8 +128,8 @@ template<typename _IIter,
{ {
_DifferenceType __chunk_length = _DifferenceType __chunk_length =
((double)__n ((double)__n
/ ((double)__num_threads + __s.partial_sum_dilation)), / ((double)__num_threads + __s.partial_sum_dilation)),
__borderstart = __n - __num_threads * __chunk_length; __borderstart = __n - __num_threads * __chunk_length;
__borders[0] = 0; __borders[0] = 0;
for (int __i = 1; __i < (__num_threads + 1); ++__i) for (int __i = 1; __i < (__num_threads + 1); ++__i)
{ {
...@@ -140,7 +140,7 @@ template<typename _IIter, ...@@ -140,7 +140,7 @@ template<typename _IIter,
} }
__sums = static_cast<_ValueType*>(::operator new(sizeof(_ValueType) __sums = static_cast<_ValueType*>(::operator new(sizeof(_ValueType)
* __num_threads)); * __num_threads));
_OutputIterator __target_end; _OutputIterator __target_end;
} //single } //single
...@@ -148,33 +148,35 @@ template<typename _IIter, ...@@ -148,33 +148,35 @@ template<typename _IIter,
if (__iam == 0) if (__iam == 0)
{ {
*__result = *__begin; *__result = *__begin;
__parallel_partial_sum_basecase(__begin + 1, __begin + __borders[1], __parallel_partial_sum_basecase(
__result + 1, __bin_op, *__begin); __begin + 1, __begin + __borders[1], __result + 1,
__bin_op, *__begin);
::new(&(__sums[__iam])) _ValueType(*(__result + __borders[1] - 1)); ::new(&(__sums[__iam])) _ValueType(*(__result + __borders[1] - 1));
} }
else else
{ {
::new(&(__sums[__iam])) ::new(&(__sums[__iam]))
_ValueType(std::accumulate(__begin + __borders[__iam] + 1, _ValueType(std::accumulate(__begin + __borders[__iam] + 1,
__begin + __borders[__iam + 1], __begin + __borders[__iam + 1],
*(__begin + __borders[__iam]), *(__begin + __borders[__iam]),
__bin_op, __bin_op,
__gnu_parallel::sequential_tag())); __gnu_parallel::sequential_tag()));
} }
# pragma omp barrier # pragma omp barrier
# pragma omp single # pragma omp single
__parallel_partial_sum_basecase( __parallel_partial_sum_basecase(__sums + 1, __sums + __num_threads,
__sums + 1, __sums + __num_threads, __sums + 1, __bin_op, __sums[0]); __sums + 1, __bin_op, __sums[0]);
# pragma omp barrier # pragma omp barrier
// Still same team. // Still same team.
__parallel_partial_sum_basecase(__begin + __borders[__iam + 1], __parallel_partial_sum_basecase(
__begin + __borders[__iam + 2], __begin + __borders[__iam + 1],
__result + __borders[__iam + 1], __bin_op, __begin + __borders[__iam + 2],
__sums[__iam]); __result + __borders[__iam + 1],
__bin_op, __sums[__iam]);
} //parallel } //parallel
::operator delete(__sums); ::operator delete(__sums);
...@@ -190,8 +192,8 @@ template<typename _IIter, ...@@ -190,8 +192,8 @@ template<typename _IIter,
* @param __bin_op Associative binary function. * @param __bin_op Associative binary function.
* @return End iterator of output sequence. */ * @return End iterator of output sequence. */
template<typename _IIter, template<typename _IIter,
typename _OutputIterator, typename _OutputIterator,
typename _BinaryOperation> typename _BinaryOperation>
_OutputIterator _OutputIterator
__parallel_partial_sum(_IIter __begin, _IIter __end, __parallel_partial_sum(_IIter __begin, _IIter __end,
_OutputIterator __result, _BinaryOperation __bin_op) _OutputIterator __result, _BinaryOperation __bin_op)
...@@ -208,7 +210,8 @@ template<typename _IIter, ...@@ -208,7 +210,8 @@ template<typename _IIter,
{ {
case LINEAR: case LINEAR:
// Need an initial offset. // Need an initial offset.
return __parallel_partial_sum_linear(__begin, __end, __result, __bin_op, __n); return __parallel_partial_sum_linear(
__begin, __end, __result, __bin_op, __n);
default: default:
// Partial_sum algorithm not implemented. // Partial_sum algorithm not implemented.
_GLIBCXX_PARALLEL_ASSERT(0); _GLIBCXX_PARALLEL_ASSERT(0);
......
...@@ -88,9 +88,10 @@ template<typename _RAIter, typename _Predicate> ...@@ -88,9 +88,10 @@ template<typename _RAIter, typename _Predicate>
__reserved_right = new bool[__num_threads]; __reserved_right = new bool[__num_threads];
if (__s.partition_chunk_share > 0.0) if (__s.partition_chunk_share > 0.0)
__chunk_size = std::max<_DifferenceType>(__s.partition_chunk_size, __chunk_size = std::max<_DifferenceType>(
(double)__n * __s.partition_chunk_share __s.partition_chunk_size,
/ (double)__num_threads); (double)__n * __s.partition_chunk_share /
(double)__num_threads);
else else
__chunk_size = __s.partition_chunk_size; __chunk_size = __s.partition_chunk_size;
} }
...@@ -99,7 +100,8 @@ template<typename _RAIter, typename _Predicate> ...@@ -99,7 +100,8 @@ template<typename _RAIter, typename _Predicate>
{ {
# pragma omp single # pragma omp single
{ {
_DifferenceType __num_chunks = (__right - __left + 1) / __chunk_size; _DifferenceType __num_chunks
= (__right - __left + 1) / __chunk_size;
for (int __r = 0; __r < __num_threads; ++__r) for (int __r = 0; __r < __num_threads; ++__r)
{ {
...@@ -198,7 +200,8 @@ template<typename _RAIter, typename _Predicate> ...@@ -198,7 +200,8 @@ template<typename _RAIter, typename _Predicate>
&& __thread_left_border >= __leftnew) && __thread_left_border >= __leftnew)
{ {
// Chunk already in place, reserve spot. // Chunk already in place, reserve spot.
__reserved_left[(__left - (__thread_left_border + 1)) / __chunk_size] __reserved_left
[(__left - (__thread_left_border + 1)) / __chunk_size]
= true; = true;
} }
...@@ -208,7 +211,7 @@ template<typename _RAIter, typename _Predicate> ...@@ -208,7 +211,7 @@ template<typename _RAIter, typename _Predicate>
{ {
// Chunk already in place, reserve spot. // Chunk already in place, reserve spot.
__reserved_right[((__thread_right_border - 1) - __right) __reserved_right[((__thread_right_border - 1) - __right)
/ __chunk_size] = true; / __chunk_size] = true;
} }
# pragma omp barrier # pragma omp barrier
...@@ -233,9 +236,9 @@ template<typename _RAIter, typename _Predicate> ...@@ -233,9 +236,9 @@ template<typename _RAIter, typename _Predicate>
#endif #endif
std::swap_ranges(__begin + __thread_left_border std::swap_ranges(__begin + __thread_left_border
- (__chunk_size - 1), - (__chunk_size - 1),
__begin + __thread_left_border + 1, __begin + __thread_left_border + 1,
__begin + __swapstart); __begin + __swapstart);
} }
if (thread_right >= __thread_right_border if (thread_right >= __thread_right_border
...@@ -257,9 +260,10 @@ template<typename _RAIter, typename _Predicate> ...@@ -257,9 +260,10 @@ template<typename _RAIter, typename _Predicate>
_GLIBCXX_PARALLEL_ASSERT(__swapstart != -1); _GLIBCXX_PARALLEL_ASSERT(__swapstart != -1);
#endif #endif
std::swap_ranges(__begin + __thread_right_border, std::swap_ranges(
__begin + __thread_right_border + __chunk_size, __begin + __thread_right_border,
__begin + __swapstart); __begin + __thread_right_border + __chunk_size,
__begin + __swapstart);
} }
#if _GLIBCXX_ASSERTIONS #if _GLIBCXX_ASSERTIONS
# pragma omp barrier # pragma omp barrier
...@@ -328,7 +332,7 @@ template<typename _RAIter, typename _Predicate> ...@@ -328,7 +332,7 @@ template<typename _RAIter, typename _Predicate>
template<typename _RAIter, typename _Compare> template<typename _RAIter, typename _Compare>
void void
parallel_nth_element(_RAIter __begin, _RAIter __nth, parallel_nth_element(_RAIter __begin, _RAIter __nth,
_RAIter __end, _Compare __comp) _RAIter __end, _Compare __comp)
{ {
typedef std::iterator_traits<_RAIter> _TraitsType; typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType; typedef typename _TraitsType::value_type _ValueType;
...@@ -355,18 +359,19 @@ template<typename _RAIter, typename _Compare> ...@@ -355,18 +359,19 @@ template<typename _RAIter, typename _Compare>
__pivot_pos = __end - 1; __pivot_pos = __end - 1;
// XXX _Compare must have first__ValueType, second__ValueType, // XXX _Compare must have first__ValueType, second__ValueType,
// _ResultType // _ResultType
// _Compare == __gnu_parallel::_Lexicographic<S, int, // _Compare == __gnu_parallel::_Lexicographic<S, int,
// __gnu_parallel::_Less<S, S> > // __gnu_parallel::_Less<S, S> >
// __pivot_pos == std::pair<S, int>* // __pivot_pos == std::pair<S, int>*
// XXX binder2nd only for _RAIters?? // XXX binder2nd only for _RAIters??
__gnu_parallel::binder2nd<_Compare, _ValueType, _ValueType, bool> __gnu_parallel::binder2nd<_Compare, _ValueType, _ValueType, bool>
__pred(__comp, *__pivot_pos); __pred(__comp, *__pivot_pos);
// Divide, leave pivot unchanged in last place. // Divide, leave pivot unchanged in last place.
_RAIter __split_pos1, __split_pos2; _RAIter __split_pos1, __split_pos2;
__split_pos1 = __begin + __parallel_partition(__begin, __end - 1, __pred, __split_pos1 = __begin
__get_max_threads()); + __parallel_partition(__begin, __end - 1, __pred,
__get_max_threads());
// Left side: < __pivot_pos; __right side: >= __pivot_pos // Left side: < __pivot_pos; __right side: >= __pivot_pos
...@@ -377,18 +382,18 @@ template<typename _RAIter, typename _Compare> ...@@ -377,18 +382,18 @@ template<typename _RAIter, typename _Compare>
// In case all elements are equal, __split_pos1 == 0 // In case all elements are equal, __split_pos1 == 0
if ((__split_pos1 + 1 - __begin) < (__n >> 7) if ((__split_pos1 + 1 - __begin) < (__n >> 7)
|| (__end - __split_pos1) < (__n >> 7)) || (__end - __split_pos1) < (__n >> 7))
{ {
// Very unequal split, one part smaller than one 128th // Very unequal split, one part smaller than one 128th
// elements not strictly larger than the pivot. // elements not strictly larger than the pivot.
__gnu_parallel::__unary_negate<__gnu_parallel:: __gnu_parallel::__unary_negate<__gnu_parallel::
__binder1st<_Compare, _ValueType, _ValueType, bool>, _ValueType> __binder1st<_Compare, _ValueType, _ValueType, bool>, _ValueType>
__pred(__gnu_parallel::__binder1st<_Compare, _ValueType, __pred(__gnu_parallel::__binder1st<_Compare, _ValueType,
_ValueType, bool>(__comp, *__pivot_pos)); _ValueType, bool>(__comp, *__pivot_pos));
// Find other end of pivot-equal range. // Find other end of pivot-equal range.
__split_pos2 = __gnu_sequential::partition(__split_pos1 + 1, __split_pos2 = __gnu_sequential::partition(__split_pos1 + 1,
__end, __pred); __end, __pred);
} }
else else
// Only skip the pivot. // Only skip the pivot.
...@@ -415,8 +420,8 @@ template<typename _RAIter, typename _Compare> ...@@ -415,8 +420,8 @@ template<typename _RAIter, typename _Compare>
template<typename _RAIter, typename _Compare> template<typename _RAIter, typename _Compare>
void void
parallel_partial_sort(_RAIter __begin, parallel_partial_sort(_RAIter __begin,
_RAIter __middle, _RAIter __middle,
_RAIter __end, _Compare __comp) _RAIter __end, _Compare __comp)
{ {
parallel_nth_element(__begin, __middle, __end, __comp); parallel_nth_element(__begin, __middle, __end, __comp);
std::sort(__begin, __middle, __comp); std::sort(__begin, __middle, __comp);
......
...@@ -59,7 +59,7 @@ namespace __gnu_parallel ...@@ -59,7 +59,7 @@ namespace __gnu_parallel
_SequenceIndex _M_max_size; _SequenceIndex _M_max_size;
/** @brief Cyclic __begin and __end pointers contained in one /** @brief Cyclic __begin and __end pointers contained in one
atomically changeable value. */ atomically changeable value. */
_GLIBCXX_VOLATILE _CASable _M_borders; _GLIBCXX_VOLATILE _CASable _M_borders;
public: public:
...@@ -67,9 +67,9 @@ namespace __gnu_parallel ...@@ -67,9 +67,9 @@ namespace __gnu_parallel
* @param _M_max_size Maximal number of elements to be contained. */ * @param _M_max_size Maximal number of elements to be contained. */
_RestrictedBoundedConcurrentQueue(_SequenceIndex _M_max_size) _RestrictedBoundedConcurrentQueue(_SequenceIndex _M_max_size)
{ {
this->_M_max_size = _M_max_size; this->_M_max_size = _M_max_size;
_M_base = new _Tp[_M_max_size]; _M_base = new _Tp[_M_max_size];
_M_borders = __encode2(0, 0); _M_borders = __encode2(0, 0);
#pragma omp flush #pragma omp flush
} }
...@@ -82,16 +82,16 @@ namespace __gnu_parallel ...@@ -82,16 +82,16 @@ namespace __gnu_parallel
void void
push_front(const _Tp& __t) push_front(const _Tp& __t)
{ {
_CASable __former_borders = _M_borders; _CASable __former_borders = _M_borders;
int __former_front, __former_back; int __former_front, __former_back;
decode2(__former_borders, __former_front, __former_back); decode2(__former_borders, __former_front, __former_back);
*(_M_base + __former_front % _M_max_size) = __t; *(_M_base + __former_front % _M_max_size) = __t;
#if _GLIBCXX_ASSERTIONS #if _GLIBCXX_ASSERTIONS
// Otherwise: front - back > _M_max_size eventually. // Otherwise: front - back > _M_max_size eventually.
_GLIBCXX_PARALLEL_ASSERT(((__former_front + 1) - __former_back) _GLIBCXX_PARALLEL_ASSERT(((__former_front + 1) - __former_back)
<= _M_max_size); <= _M_max_size);
#endif #endif
__fetch_and_add(&_M_borders, __encode2(1, 0)); __fetch_and_add(&_M_borders, __encode2(1, 0));
} }
/** @brief Pops one element from the queue at the front end. /** @brief Pops one element from the queue at the front end.
...@@ -99,50 +99,56 @@ namespace __gnu_parallel ...@@ -99,50 +99,56 @@ namespace __gnu_parallel
bool bool
pop_front(_Tp& __t) pop_front(_Tp& __t)
{ {
int __former_front, __former_back; int __former_front, __former_back;
#pragma omp flush #pragma omp flush
decode2(_M_borders, __former_front, __former_back); decode2(_M_borders, __former_front, __former_back);
while (__former_front > __former_back) while (__former_front > __former_back)
{ {
// Chance. // Chance.
_CASable __former_borders = __encode2(__former_front, __former_back); _CASable
_CASable __new_borders = __encode2(__former_front - 1, __former_back); __former_borders = __encode2(__former_front, __former_back);
if (__compare_and_swap(&_M_borders, __former_borders, __new_borders)) _CASable
{ __new_borders = __encode2(__former_front - 1, __former_back);
__t = *(_M_base + (__former_front - 1) % _M_max_size); if (__compare_and_swap(
return true; &_M_borders, __former_borders, __new_borders))
} {
__t = *(_M_base + (__former_front - 1) % _M_max_size);
return true;
}
#pragma omp flush #pragma omp flush
decode2(_M_borders, __former_front, __former_back); decode2(_M_borders, __former_front, __former_back);
} }
return false; return false;
} }
/** @brief Pops one element from the queue at the front end. /** @brief Pops one element from the queue at the front end.
* Must not be called concurrently with pop_front(). */ * Must not be called concurrently with pop_front(). */
bool bool
pop_back(_Tp& __t) //queue behavior pop_back(_Tp& __t) //queue behavior
{ {
int __former_front, __former_back; int __former_front, __former_back;
#pragma omp flush #pragma omp flush
decode2(_M_borders, __former_front, __former_back); decode2(_M_borders, __former_front, __former_back);
while (__former_front > __former_back) while (__former_front > __former_back)
{ {
// Chance. // Chance.
_CASable __former_borders = __encode2(__former_front, __former_back); _CASable
_CASable __new_borders = __encode2(__former_front, __former_back + 1); __former_borders = __encode2(__former_front, __former_back);
if (__compare_and_swap(&_M_borders, __former_borders, __new_borders)) _CASable
{ __new_borders = __encode2(__former_front, __former_back + 1);
__t = *(_M_base + __former_back % _M_max_size); if (__compare_and_swap(
return true; &_M_borders, __former_borders, __new_borders))
} {
__t = *(_M_base + __former_back % _M_max_size);
return true;
}
#pragma omp flush #pragma omp flush
decode2(_M_borders, __former_front, __former_back); decode2(_M_borders, __former_front, __former_back);
} }
return false; return false;
} }
}; };
} //namespace __gnu_parallel } //namespace __gnu_parallel
#undef _GLIBCXX_VOLATILE #undef _GLIBCXX_VOLATILE
......
...@@ -49,12 +49,12 @@ namespace __gnu_parallel ...@@ -49,12 +49,12 @@ namespace __gnu_parallel
template<typename _RAIter, typename _Compare> template<typename _RAIter, typename _Compare>
typename std::iterator_traits<_RAIter>::difference_type typename std::iterator_traits<_RAIter>::difference_type
__parallel_sort_qs_divide(_RAIter __begin, __parallel_sort_qs_divide(_RAIter __begin,
_RAIter __end, _RAIter __end,
_Compare __comp, typename std::iterator_traits _Compare __comp, typename std::iterator_traits
<_RAIter>::difference_type __pivot_rank, <_RAIter>::difference_type __pivot_rank,
typename std::iterator_traits typename std::iterator_traits
<_RAIter>::difference_type <_RAIter>::difference_type
__num_samples, _ThreadIndex __num_threads) __num_samples, _ThreadIndex __num_threads)
{ {
typedef std::iterator_traits<_RAIter> _TraitsType; typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType; typedef typename _TraitsType::value_type _ValueType;
...@@ -65,15 +65,15 @@ namespace __gnu_parallel ...@@ -65,15 +65,15 @@ namespace __gnu_parallel
// Allocate uninitialized, to avoid default constructor. // Allocate uninitialized, to avoid default constructor.
_ValueType* __samples = _ValueType* __samples =
static_cast<_ValueType*>(::operator new(__num_samples static_cast<_ValueType*>(::operator new(__num_samples
* sizeof(_ValueType))); * sizeof(_ValueType)));
for (_DifferenceType __s = 0; __s < __num_samples; ++__s) for (_DifferenceType __s = 0; __s < __num_samples; ++__s)
{ {
const unsigned long long __index = static_cast<unsigned long long>(__s) const unsigned long long __index
* __n / __num_samples; = static_cast<unsigned long long>(__s) * __n / __num_samples;
::new(&(__samples[__s])) _ValueType(__begin[__index]); ::new(&(__samples[__s])) _ValueType(__begin[__index]);
} }
__gnu_sequential::sort(__samples, __samples + __num_samples, __comp); __gnu_sequential::sort(__samples, __samples + __num_samples, __comp);
...@@ -99,47 +99,47 @@ namespace __gnu_parallel ...@@ -99,47 +99,47 @@ namespace __gnu_parallel
template<typename _RAIter, typename _Compare> template<typename _RAIter, typename _Compare>
void void
__parallel_sort_qs_conquer(_RAIter __begin, __parallel_sort_qs_conquer(_RAIter __begin,
_RAIter __end, _RAIter __end,
_Compare __comp, _Compare __comp,
_ThreadIndex __num_threads) _ThreadIndex __num_threads)
{ {
typedef std::iterator_traits<_RAIter> _TraitsType; typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType; typedef typename _TraitsType::value_type _ValueType;
typedef typename _TraitsType::difference_type _DifferenceType; typedef typename _TraitsType::difference_type _DifferenceType;
if (__num_threads <= 1) if (__num_threads <= 1)
{ {
__gnu_sequential::sort(__begin, __end, __comp); __gnu_sequential::sort(__begin, __end, __comp);
return; return;
} }
_DifferenceType __n = __end - __begin, __pivot_rank; _DifferenceType __n = __end - __begin, __pivot_rank;
if (__n <= 1) if (__n <= 1)
return; return;
_ThreadIndex __num_threads_left; _ThreadIndex __num_threads_left;
if ((__num_threads % 2) == 1) if ((__num_threads % 2) == 1)
__num_threads_left = __num_threads / 2 + 1; __num_threads_left = __num_threads / 2 + 1;
else else
__num_threads_left = __num_threads / 2; __num_threads_left = __num_threads / 2;
__pivot_rank = __n * __num_threads_left / __num_threads; __pivot_rank = __n * __num_threads_left / __num_threads;
_DifferenceType __split = _DifferenceType __split =
__parallel_sort_qs_divide(__begin, __end, __comp, __pivot_rank, __parallel_sort_qs_divide(__begin, __end, __comp, __pivot_rank,
_Settings::get().sort_qs_num_samples_preset, _Settings::get().sort_qs_num_samples_preset,
__num_threads); __num_threads);
#pragma omp parallel sections num_threads(2) #pragma omp parallel sections num_threads(2)
{ {
#pragma omp section #pragma omp section
__parallel_sort_qs_conquer(__begin, __begin + __split, __parallel_sort_qs_conquer(__begin, __begin + __split,
__comp, __num_threads_left); __comp, __num_threads_left);
#pragma omp section #pragma omp section
__parallel_sort_qs_conquer(__begin + __split, __end, __parallel_sort_qs_conquer(__begin + __split, __end,
__comp, __num_threads - __num_threads_left); __comp, __num_threads - __num_threads_left);
} }
} }
...@@ -155,9 +155,9 @@ namespace __gnu_parallel ...@@ -155,9 +155,9 @@ namespace __gnu_parallel
template<typename _RAIter, typename _Compare> template<typename _RAIter, typename _Compare>
void void
__parallel_sort_qs(_RAIter __begin, __parallel_sort_qs(_RAIter __begin,
_RAIter __end, _RAIter __end,
_Compare __comp, _Compare __comp,
_ThreadIndex __num_threads) _ThreadIndex __num_threads)
{ {
_GLIBCXX_CALL(__n) _GLIBCXX_CALL(__n)
...@@ -171,7 +171,8 @@ namespace __gnu_parallel ...@@ -171,7 +171,8 @@ namespace __gnu_parallel
if (__num_threads > __n) if (__num_threads > __n)
__num_threads = static_cast<_ThreadIndex>(__n); __num_threads = static_cast<_ThreadIndex>(__n);
__parallel_sort_qs_conquer(__begin, __begin + __n, __comp, __num_threads); __parallel_sort_qs_conquer(
__begin, __begin + __n, __comp, __num_threads);
} }
} //namespace __gnu_parallel } //namespace __gnu_parallel
......
...@@ -41,14 +41,14 @@ namespace __gnu_parallel ...@@ -41,14 +41,14 @@ namespace __gnu_parallel
class _RandomNumber class _RandomNumber
{ {
private: private:
std::tr1::mt19937 _M_mt; std::tr1::mt19937 _M_mt;
uint64 _M_supremum; uint64 _M_supremum;
uint64 _M_rand_sup; uint64 _M_rand_sup;
double _M_supremum_reciprocal; double _M_supremum_reciprocal;
double _M_rand_sup_reciprocal; double _M_rand_sup_reciprocal;
// Assumed to be twice as long as the usual random number. // Assumed to be twice as long as the usual random number.
uint64 __cache; uint64 __cache;
// Bit results. // Bit results.
int __bits_left; int __bits_left;
...@@ -56,17 +56,17 @@ namespace __gnu_parallel ...@@ -56,17 +56,17 @@ namespace __gnu_parallel
static uint32 static uint32
__scale_down(uint64 __x, __scale_down(uint64 __x,
#if _GLIBCXX_SCALE_DOWN_FPU #if _GLIBCXX_SCALE_DOWN_FPU
uint64 /*_M_supremum*/, double _M_supremum_reciprocal) uint64 /*_M_supremum*/, double _M_supremum_reciprocal)
#else #else
uint64 _M_supremum, double /*_M_supremum_reciprocal*/) uint64 _M_supremum, double /*_M_supremum_reciprocal*/)
#endif #endif
{ {
#if _GLIBCXX_SCALE_DOWN_FPU #if _GLIBCXX_SCALE_DOWN_FPU
return uint32(__x * _M_supremum_reciprocal); return uint32(__x * _M_supremum_reciprocal);
#else #else
return static_cast<uint32>(__x % _M_supremum); return static_cast<uint32>(__x % _M_supremum);
#endif #endif
} }
public: public:
/** @brief Default constructor. Seed with 0. */ /** @brief Default constructor. Seed with 0. */
...@@ -94,12 +94,12 @@ namespace __gnu_parallel ...@@ -94,12 +94,12 @@ namespace __gnu_parallel
{ return __scale_down(_M_mt(), _M_supremum, _M_supremum_reciprocal); } { return __scale_down(_M_mt(), _M_supremum, _M_supremum_reciprocal); }
/** @brief Generate unsigned random 32-bit integer in the /** @brief Generate unsigned random 32-bit integer in the
interval @__c [0,local_supremum). */ interval @__c [0,local_supremum). */
uint32 uint32
operator()(uint64 local_supremum) operator()(uint64 local_supremum)
{ {
return __scale_down(_M_mt(), local_supremum, return __scale_down(_M_mt(), local_supremum,
double(local_supremum * _M_rand_sup_reciprocal)); double(local_supremum * _M_rand_sup_reciprocal));
} }
/** @brief Generate a number of random bits, run-time parameter. /** @brief Generate a number of random bits, run-time parameter.
...@@ -111,10 +111,10 @@ namespace __gnu_parallel ...@@ -111,10 +111,10 @@ namespace __gnu_parallel
__cache = __cache >> __bits; __cache = __cache >> __bits;
__bits_left -= __bits; __bits_left -= __bits;
if (__bits_left < 32) if (__bits_left < 32)
{ {
__cache |= ((uint64(_M_mt())) << __bits_left); __cache |= ((uint64(_M_mt())) << __bits_left);
__bits_left += 32; __bits_left += 32;
} }
return __res; return __res;
} }
}; };
......
...@@ -70,7 +70,7 @@ template<typename _RAIter> ...@@ -70,7 +70,7 @@ template<typename _RAIter>
_DifferenceType* _M_starts; _DifferenceType* _M_starts;
/** @brief Number of the thread that will further process the /** @brief Number of the thread that will further process the
corresponding bin. */ corresponding bin. */
_ThreadIndex* _M_bin_proc; _ThreadIndex* _M_bin_proc;
/** @brief Number of bins to distribute to. */ /** @brief Number of bins to distribute to. */
...@@ -131,7 +131,8 @@ template<typename _RAIter, typename RandomNumberGenerator> ...@@ -131,7 +131,8 @@ template<typename _RAIter, typename RandomNumberGenerator>
_DRandomShufflingGlobalData<_RAIter>* _M_sd = d->_M_sd; _DRandomShufflingGlobalData<_RAIter>* _M_sd = d->_M_sd;
// Indexing: _M_dist[bin][processor] // Indexing: _M_dist[bin][processor]
_DifferenceType __length = _M_sd->_M_starts[__iam + 1] - _M_sd->_M_starts[__iam]; _DifferenceType __length = _M_sd->_M_starts[__iam + 1] -
_M_sd->_M_starts[__iam];
_BinIndex* __oracles = new _BinIndex[__length]; _BinIndex* __oracles = new _BinIndex[__length];
_DifferenceType* _M_dist = new _DifferenceType[_M_sd->_M_num_bins + 1]; _DifferenceType* _M_dist = new _DifferenceType[_M_sd->_M_num_bins + 1];
_BinIndex* _M_bin_proc = new _BinIndex[_M_sd->_M_num_bins]; _BinIndex* _M_bin_proc = new _BinIndex[_M_sd->_M_num_bins];
...@@ -161,12 +162,13 @@ template<typename _RAIter, typename RandomNumberGenerator> ...@@ -161,12 +162,13 @@ template<typename _RAIter, typename RandomNumberGenerator>
# pragma omp single # pragma omp single
{ {
// Sum up bins, _M_sd->_M_dist[__s + 1][d->_M_num_threads] now contains the // Sum up bins, _M_sd->_M_dist[__s + 1][d->_M_num_threads] now contains
// total number of items in bin __s // the total number of items in bin __s
for (_BinIndex __s = 0; __s < _M_sd->_M_num_bins; ++__s) for (_BinIndex __s = 0; __s < _M_sd->_M_num_bins; ++__s)
__gnu_sequential::partial_sum(_M_sd->_M_dist[__s + 1], __gnu_sequential::partial_sum(
_M_sd->_M_dist[__s + 1] + d->_M_num_threads + 1, _M_sd->_M_dist[__s + 1],
_M_sd->_M_dist[__s + 1]); _M_sd->_M_dist[__s + 1] + d->_M_num_threads + 1,
_M_sd->_M_dist[__s + 1]);
} }
# pragma omp barrier # pragma omp barrier
...@@ -179,9 +181,9 @@ template<typename _RAIter, typename RandomNumberGenerator> ...@@ -179,9 +181,9 @@ template<typename _RAIter, typename RandomNumberGenerator>
for (_BinIndex __s = d->_M_bins_begin; __s < d->__bins_end; ++__s) for (_BinIndex __s = d->_M_bins_begin; __s < d->__bins_end; ++__s)
{ {
for (int __t = 0; __t < d->_M_num_threads + 1; ++__t) for (int __t = 0; __t < d->_M_num_threads + 1; ++__t)
_M_sd->_M_dist[__s + 1][__t] += __offset; _M_sd->_M_dist[__s + 1][__t] += __offset;
__offset = _M_sd->_M_dist[__s + 1][d->_M_num_threads]; __offset = _M_sd->_M_dist[__s + 1][d->_M_num_threads];
} }
_M_sd->_M_temporaries[__iam] = static_cast<_ValueType*>( _M_sd->_M_temporaries[__iam] = static_cast<_ValueType*>(
...@@ -208,7 +210,7 @@ template<typename _RAIter, typename RandomNumberGenerator> ...@@ -208,7 +210,7 @@ template<typename _RAIter, typename RandomNumberGenerator>
// Last column [d->_M_num_threads] stays unchanged. // Last column [d->_M_num_threads] stays unchanged.
::new(&(_M_temporaries[target_p][_M_dist[target_bin + 1]++])) ::new(&(_M_temporaries[target_p][_M_dist[target_bin + 1]++]))
_ValueType(*(_M_source + __i + __start)); _ValueType(*(_M_source + __i + __start));
} }
delete[] __oracles; delete[] __oracles;
...@@ -223,12 +225,15 @@ template<typename _RAIter, typename RandomNumberGenerator> ...@@ -223,12 +225,15 @@ template<typename _RAIter, typename RandomNumberGenerator>
{ {
_ValueType* __begin = _ValueType* __begin =
_M_sd->_M_temporaries[__iam] + _M_sd->_M_temporaries[__iam] +
((__b == d->_M_bins_begin) ? 0 : _M_sd->_M_dist[__b][d->_M_num_threads]), ((__b == d->_M_bins_begin)
? 0 : _M_sd->_M_dist[__b][d->_M_num_threads]),
* __end = * __end =
_M_sd->_M_temporaries[__iam] + _M_sd->_M_dist[__b + 1][d->_M_num_threads]; _M_sd->_M_temporaries[__iam] +
_M_sd->_M_dist[__b + 1][d->_M_num_threads];
__sequential_random_shuffle(__begin, __end, __rng); __sequential_random_shuffle(__begin, __end, __rng);
std::copy(__begin, __end, _M_sd->_M_source + __global_offset + std::copy(__begin, __end, _M_sd->_M_source + __global_offset +
((__b == d->_M_bins_begin) ? 0 : _M_sd->_M_dist[__b][d->_M_num_threads])); ((__b == d->_M_bins_begin)
? 0 : _M_sd->_M_dist[__b][d->_M_num_threads]));
} }
::operator delete(_M_sd->_M_temporaries[__iam]); ::operator delete(_M_sd->_M_temporaries[__iam]);
...@@ -256,11 +261,11 @@ template<typename _Tp> ...@@ -256,11 +261,11 @@ template<typename _Tp>
template<typename _RAIter, typename RandomNumberGenerator> template<typename _RAIter, typename RandomNumberGenerator>
void void
__parallel_random_shuffle_drs(_RAIter __begin, __parallel_random_shuffle_drs(_RAIter __begin,
_RAIter __end, _RAIter __end,
typename std::iterator_traits typename std::iterator_traits
<_RAIter>::difference_type __n, <_RAIter>::difference_type __n,
_ThreadIndex __num_threads, _ThreadIndex __num_threads,
RandomNumberGenerator& __rng) RandomNumberGenerator& __rng)
{ {
typedef std::iterator_traits<_RAIter> _TraitsType; typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType; typedef typename _TraitsType::value_type _ValueType;
...@@ -343,7 +348,8 @@ template<typename _RAIter, typename RandomNumberGenerator> ...@@ -343,7 +348,8 @@ template<typename _RAIter, typename RandomNumberGenerator>
_M_sd._M_dist[0][0] = 0; _M_sd._M_dist[0][0] = 0;
_M_sd._M_dist[__b][0] = 0; _M_sd._M_dist[__b][0] = 0;
} }
_M_starts = _M_sd._M_starts = new _DifferenceType[__num_threads + 1]; _M_starts = _M_sd._M_starts
= new _DifferenceType[__num_threads + 1];
int bin_cursor = 0; int bin_cursor = 0;
_M_sd._M_num_bins = _M_num_bins; _M_sd._M_num_bins = _M_num_bins;
_M_sd._M_num_bits = __log2(_M_num_bins); _M_sd._M_num_bits = __log2(_M_num_bins);
...@@ -355,7 +361,8 @@ template<typename _RAIter, typename RandomNumberGenerator> ...@@ -355,7 +361,8 @@ template<typename _RAIter, typename RandomNumberGenerator>
for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
{ {
_M_starts[__i] = __start; _M_starts[__i] = __start;
__start += (__i < __split) ? (__chunk_length + 1) : __chunk_length; __start += (__i < __split)
? (__chunk_length + 1) : __chunk_length;
int __j = __pus[__i]._M_bins_begin = bin_cursor; int __j = __pus[__i]._M_bins_begin = bin_cursor;
// Range of bins for this processor. // Range of bins for this processor.
...@@ -469,14 +476,16 @@ template<typename _RAIter, typename RandomNumberGenerator> ...@@ -469,14 +476,16 @@ template<typename _RAIter, typename RandomNumberGenerator>
} }
// Sum up bins. // Sum up bins.
__gnu_sequential::partial_sum(__dist0, __dist0 + _M_num_bins + 1, __dist0); __gnu_sequential::
partial_sum(__dist0, __dist0 + _M_num_bins + 1, __dist0);
for (int __b = 0; __b < _M_num_bins + 1; ++__b) for (int __b = 0; __b < _M_num_bins + 1; ++__b)
__dist1[__b] = __dist0[__b]; __dist1[__b] = __dist0[__b];
// Distribute according to oracles. // Distribute according to oracles.
for (_DifferenceType __i = 0; __i < __n; ++__i) for (_DifferenceType __i = 0; __i < __n; ++__i)
::new(&(__target[(__dist0[__oracles[__i]])++])) _ValueType(*(__begin + __i)); ::new(&(__target[(__dist0[__oracles[__i]])++]))
_ValueType(*(__begin + __i));
for (int __b = 0; __b < _M_num_bins; ++__b) for (int __b = 0; __b < _M_num_bins; ++__b)
{ {
...@@ -511,7 +520,8 @@ template<typename _RAIter, typename RandomNumberGenerator> ...@@ -511,7 +520,8 @@ template<typename _RAIter, typename RandomNumberGenerator>
typedef std::iterator_traits<_RAIter> _TraitsType; typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::difference_type _DifferenceType; typedef typename _TraitsType::difference_type _DifferenceType;
_DifferenceType __n = __end - __begin; _DifferenceType __n = __end - __begin;
__parallel_random_shuffle_drs(__begin, __end, __n, __get_max_threads(), __rng) ; __parallel_random_shuffle_drs(
__begin, __end, __n, __get_max_threads(), __rng) ;
} }
} }
......
...@@ -76,8 +76,8 @@ template<typename _RAIter, typename _DifferenceTp> ...@@ -76,8 +76,8 @@ template<typename _RAIter, typename _DifferenceTp>
* @param __pred Find predicate. * @param __pred Find predicate.
* @return Place of finding in first sequences. */ * @return Place of finding in first sequences. */
template<typename __RAIter1, template<typename __RAIter1,
typename __RAIter2, typename __RAIter2,
typename _Pred> typename _Pred>
__RAIter1 __RAIter1
__search_template(__RAIter1 __begin1, __RAIter1 __end1, __search_template(__RAIter1 __begin1, __RAIter1 __end1,
__RAIter2 __begin2, __RAIter2 __end2, __RAIter2 __begin2, __RAIter2 __end2,
...@@ -126,7 +126,8 @@ template<typename __RAIter1, ...@@ -126,7 +126,8 @@ template<typename __RAIter1,
_ThreadIndex __iam = omp_get_thread_num(); _ThreadIndex __iam = omp_get_thread_num();
_DifferenceType __start = __splitters[__iam], __stop = __splitters[__iam + 1]; _DifferenceType __start = __splitters[__iam],
__stop = __splitters[__iam + 1];
_DifferenceType __pos_in_pattern = 0; _DifferenceType __pos_in_pattern = 0;
bool __found_pattern = false; bool __found_pattern = false;
...@@ -156,7 +157,8 @@ template<typename __RAIter1, ...@@ -156,7 +157,8 @@ template<typename __RAIter1,
// Make safe jump. // Make safe jump.
__start += (__pos_in_pattern - __advances[__pos_in_pattern]); __start += (__pos_in_pattern - __advances[__pos_in_pattern]);
__pos_in_pattern = __pos_in_pattern =
(__advances[__pos_in_pattern] < 0) ? 0 : __advances[__pos_in_pattern]; (__advances[__pos_in_pattern] < 0) ?
0 : __advances[__pos_in_pattern];
} }
} //parallel } //parallel
......
...@@ -63,8 +63,8 @@ template<typename _IIter, typename _OutputIterator> ...@@ -63,8 +63,8 @@ template<typename _IIter, typename _OutputIterator>
} }
template<typename _IIter, template<typename _IIter,
typename _OutputIterator, typename _OutputIterator,
typename _Compare> typename _Compare>
struct symmetric_difference_func struct symmetric_difference_func
{ {
typedef std::iterator_traits<_IIter> _TraitsType; typedef std::iterator_traits<_IIter> _TraitsType;
...@@ -77,8 +77,8 @@ template<typename _IIter, ...@@ -77,8 +77,8 @@ template<typename _IIter,
_OutputIterator _OutputIterator
_M_invoke(_IIter __a, _IIter __b, _M_invoke(_IIter __a, _IIter __b,
_IIter __c, _IIter d, _IIter __c, _IIter d,
_OutputIterator __r) const _OutputIterator __r) const
{ {
while (__a != __b && __c != d) while (__a != __b && __c != d)
{ {
...@@ -105,7 +105,7 @@ template<typename _IIter, ...@@ -105,7 +105,7 @@ template<typename _IIter,
_DifferenceType _DifferenceType
__count(_IIter __a, _IIter __b, __count(_IIter __a, _IIter __b,
_IIter __c, _IIter d) const _IIter __c, _IIter d) const
{ {
_DifferenceType __counter = 0; _DifferenceType __counter = 0;
...@@ -142,8 +142,8 @@ template<typename _IIter, ...@@ -142,8 +142,8 @@ template<typename _IIter,
template<typename _IIter, template<typename _IIter,
typename _OutputIterator, typename _OutputIterator,
typename _Compare> typename _Compare>
struct __difference_func struct __difference_func
{ {
typedef std::iterator_traits<_IIter> _TraitsType; typedef std::iterator_traits<_IIter> _TraitsType;
...@@ -179,7 +179,7 @@ template<typename _IIter, ...@@ -179,7 +179,7 @@ template<typename _IIter,
_DifferenceType _DifferenceType
__count(_IIter __a, _IIter __b, __count(_IIter __a, _IIter __b,
_IIter __c, _IIter d) const _IIter __c, _IIter d) const
{ {
_DifferenceType __counter = 0; _DifferenceType __counter = 0;
...@@ -210,8 +210,8 @@ template<typename _IIter, ...@@ -210,8 +210,8 @@ template<typename _IIter,
template<typename _IIter, template<typename _IIter,
typename _OutputIterator, typename _OutputIterator,
typename _Compare> typename _Compare>
struct __intersection_func struct __intersection_func
{ {
typedef std::iterator_traits<_IIter> _TraitsType; typedef std::iterator_traits<_IIter> _TraitsType;
...@@ -246,7 +246,7 @@ template<typename _IIter, ...@@ -246,7 +246,7 @@ template<typename _IIter,
_DifferenceType _DifferenceType
__count(_IIter __a, _IIter __b, __count(_IIter __a, _IIter __b,
_IIter __c, _IIter d) const _IIter __c, _IIter d) const
{ {
_DifferenceType __counter = 0; _DifferenceType __counter = 0;
...@@ -315,7 +315,7 @@ template<class _IIter, class _OutputIterator, class _Compare> ...@@ -315,7 +315,7 @@ template<class _IIter, class _OutputIterator, class _Compare>
_DifferenceType _DifferenceType
__count(_IIter __a, _IIter __b, __count(_IIter __a, _IIter __b,
_IIter __c, _IIter d) const _IIter __c, _IIter d) const
{ {
_DifferenceType __counter = 0; _DifferenceType __counter = 0;
...@@ -348,8 +348,8 @@ template<class _IIter, class _OutputIterator, class _Compare> ...@@ -348,8 +348,8 @@ template<class _IIter, class _OutputIterator, class _Compare>
}; };
template<typename _IIter, template<typename _IIter,
typename _OutputIterator, typename _OutputIterator,
typename Operation> typename Operation>
_OutputIterator _OutputIterator
__parallel_set_operation(_IIter __begin1, _IIter __end1, __parallel_set_operation(_IIter __begin1, _IIter __end1,
_IIter __begin2, _IIter __end2, _IIter __begin2, _IIter __end2,
...@@ -370,7 +370,7 @@ template<typename _IIter, ...@@ -370,7 +370,7 @@ template<typename _IIter,
const _DifferenceType size = (__end1 - __begin1) + (__end2 - __begin2); const _DifferenceType size = (__end1 - __begin1) + (__end2 - __begin2);
const _IteratorPair __sequence[ 2 ] = const _IteratorPair __sequence[ 2 ] =
{ std::make_pair(__begin1, __end1), std::make_pair(__begin2, __end2) } ; { std::make_pair(__begin1, __end1), std::make_pair(__begin2, __end2) };
_OutputIterator return_value = __result; _OutputIterator return_value = __result;
_DifferenceType *__borders; _DifferenceType *__borders;
_IteratorPair *__block_begins; _IteratorPair *__block_begins;
...@@ -400,7 +400,8 @@ template<typename _IIter, ...@@ -400,7 +400,8 @@ template<typename _IIter,
_IIter __offset[2]; _IIter __offset[2];
const _DifferenceType __rank = __borders[__iam + 1]; const _DifferenceType __rank = __borders[__iam + 1];
multiseq_partition(__sequence, __sequence + 2, __rank, __offset, __op._M_comp); multiseq_partition(__sequence, __sequence + 2,
__rank, __offset, __op._M_comp);
// allowed to read? // allowed to read?
// together // together
...@@ -427,15 +428,16 @@ template<typename _IIter, ...@@ -427,15 +428,16 @@ template<typename _IIter,
if (__iam == 0) if (__iam == 0)
{ {
// The first thread can copy already. // The first thread can copy already.
__lengths[ __iam ] = __op._M_invoke(__block_begin.first, block_end.first, __lengths[ __iam ] =
__block_begin.second, block_end.second, __op._M_invoke(__block_begin.first, block_end.first,
__result) __block_begin.second, block_end.second, __result)
- __result; - __result;
} }
else else
{ {
__lengths[ __iam ] = __op.__count(__block_begin.first, block_end.first, __lengths[ __iam ] =
__block_begin.second, block_end.second); __op.__count(__block_begin.first, block_end.first,
__block_begin.second, block_end.second);
} }
// Make sure everyone wrote their lengths. // Make sure everyone wrote their lengths.
...@@ -453,7 +455,7 @@ template<typename _IIter, ...@@ -453,7 +455,7 @@ template<typename _IIter,
// Return the result iterator of the last block. // Return the result iterator of the last block.
return_value = __op._M_invoke( return_value = __op._M_invoke(
__block_begin.first, __end1, __block_begin.second, __end2, __r); __block_begin.first, __end1, __block_begin.second, __end2, __r);
} }
else else
...@@ -471,52 +473,56 @@ template<typename _IIter, ...@@ -471,52 +473,56 @@ template<typename _IIter,
template<typename _IIter, template<typename _IIter,
typename _OutputIterator, typename _OutputIterator,
typename _Compare> typename _Compare>
inline _OutputIterator inline _OutputIterator
__parallel_set_union(_IIter __begin1, _IIter __end1, __parallel_set_union(_IIter __begin1, _IIter __end1,
_IIter __begin2, _IIter __end2, _IIter __begin2, _IIter __end2,
_OutputIterator __result, _Compare _M_comp) _OutputIterator __result, _Compare _M_comp)
{ {
return __parallel_set_operation(__begin1, __end1, __begin2, __end2, __result, return __parallel_set_operation(__begin1, __end1, __begin2, __end2,
__union_func< _IIter, _OutputIterator, _Compare>(_M_comp)); __result, __union_func< _IIter, _OutputIterator, _Compare>(_M_comp));
} }
template<typename _IIter, template<typename _IIter,
typename _OutputIterator, typename _OutputIterator,
typename _Compare> typename _Compare>
inline _OutputIterator inline _OutputIterator
__parallel_set_intersection(_IIter __begin1, _IIter __end1, __parallel_set_intersection(_IIter __begin1, _IIter __end1,
_IIter __begin2, _IIter __end2, _IIter __begin2, _IIter __end2,
_OutputIterator __result, _Compare _M_comp) _OutputIterator __result, _Compare _M_comp)
{ {
return __parallel_set_operation(__begin1, __end1, __begin2, __end2, __result, return __parallel_set_operation(
__intersection_func<_IIter, _OutputIterator, _Compare>(_M_comp)); __begin1, __end1, __begin2, __end2, __result,
__intersection_func<_IIter, _OutputIterator, _Compare>(_M_comp));
} }
template<typename _IIter, template<typename _IIter,
typename _OutputIterator, typename _OutputIterator,
typename _Compare> typename _Compare>
inline _OutputIterator inline _OutputIterator
__parallel_set_difference(_IIter __begin1, _IIter __end1, __parallel_set_difference(_IIter __begin1, _IIter __end1,
_IIter __begin2, _IIter __end2, _IIter __begin2, _IIter __end2,
_OutputIterator __result, _Compare _M_comp) _OutputIterator __result, _Compare _M_comp)
{ {
return __parallel_set_operation(__begin1, __end1, __begin2, __end2, __result, return __parallel_set_operation(
__difference_func<_IIter, _OutputIterator, _Compare>(_M_comp)); __begin1, __end1, __begin2, __end2, __result,
__difference_func<_IIter, _OutputIterator, _Compare>(_M_comp));
} }
template<typename _IIter, template<typename _IIter,
typename _OutputIterator, typename _OutputIterator,
typename _Compare> typename _Compare>
inline _OutputIterator inline _OutputIterator
__parallel_set_symmetric_difference(_IIter __begin1, _IIter __end1, __parallel_set_symmetric_difference(_IIter __begin1, _IIter __end1,
_IIter __begin2, _IIter __end2, _IIter __begin2, _IIter __end2,
_OutputIterator __result, _Compare _M_comp) _OutputIterator __result,
_Compare _M_comp)
{ {
return __parallel_set_operation(__begin1, __end1, __begin2, __end2, __result, return __parallel_set_operation(
symmetric_difference_func<_IIter, _OutputIterator, _Compare> __begin1, __end1, __begin2, __end2, __result,
(_M_comp)); symmetric_difference_func<_IIter, _OutputIterator, _Compare>
(_M_comp));
} }
} }
......
...@@ -54,13 +54,13 @@ ...@@ -54,13 +54,13 @@
namespace __gnu_parallel namespace __gnu_parallel
{ {
//prototype //prototype
template<bool __stable, typename _RAIter, template<bool __stable, typename _RAIter,
typename _Compare, typename _Parallelism> typename _Compare, typename _Parallelism>
void void
parallel_sort(_RAIter __begin, _RAIter __end, parallel_sort(_RAIter __begin, _RAIter __end,
_Compare __comp, _Parallelism __parallelism); _Compare __comp, _Parallelism __parallelism);
/** /**
* @brief Choose multiway mergesort, splitting variant at run-time, * @brief Choose multiway mergesort, splitting variant at run-time,
* for parallel sorting. * for parallel sorting.
...@@ -138,7 +138,8 @@ namespace __gnu_parallel ...@@ -138,7 +138,8 @@ namespace __gnu_parallel
_GLIBCXX_PARALLEL_ASSERT(__stable == false); _GLIBCXX_PARALLEL_ASSERT(__stable == false);
__parallel_sort_qs(__begin, __end, __comp, __parallelism.__get_num_threads()); __parallel_sort_qs(__begin, __end, __comp,
__parallelism.__get_num_threads());
} }
/** /**
...@@ -158,7 +159,8 @@ namespace __gnu_parallel ...@@ -158,7 +159,8 @@ namespace __gnu_parallel
_GLIBCXX_PARALLEL_ASSERT(__stable == false); _GLIBCXX_PARALLEL_ASSERT(__stable == false);
__parallel_sort_qsb(__begin, __end, __comp, __parallelism.__get_num_threads()); __parallel_sort_qsb(__begin, __end, __comp,
__parallelism.__get_num_threads());
} }
...@@ -215,11 +217,13 @@ namespace __gnu_parallel ...@@ -215,11 +217,13 @@ namespace __gnu_parallel
#endif #endif
#if _GLIBCXX_QUICKSORT #if _GLIBCXX_QUICKSORT
else if (_Settings::get().sort_algorithm == QS) else if (_Settings::get().sort_algorithm == QS)
__parallel_sort_qs(__begin, __end, __comp, __parallelism.__get_num_threads()); __parallel_sort_qs(__begin, __end, __comp,
__parallelism.__get_num_threads());
#endif #endif
#if _GLIBCXX_BAL_QUICKSORT #if _GLIBCXX_BAL_QUICKSORT
else if (_Settings::get().sort_algorithm == QS_BALANCED) else if (_Settings::get().sort_algorithm == QS_BALANCED)
__parallel_sort_qsb(__begin, __end, __comp, __parallelism.__get_num_threads()); __parallel_sort_qsb(__begin, __end, __comp,
__parallelism.__get_num_threads());
#endif #endif
else else
__gnu_sequential::sort(__begin, __end, __comp); __gnu_sequential::sort(__begin, __end, __comp);
......
...@@ -149,7 +149,8 @@ namespace __gnu_parallel ...@@ -149,7 +149,8 @@ namespace __gnu_parallel
static const int _CASable_bits = sizeof(_CASable) * 8; static const int _CASable_bits = sizeof(_CASable) * 8;
/// ::_CASable with the right half of bits set to 1. /// ::_CASable with the right half of bits set to 1.
static const _CASable _CASable_mask = ((_CASable(1) << (_CASable_bits / 2)) - 1); static const _CASable _CASable_mask =
((_CASable(1) << (_CASable_bits / 2)) - 1);
} }
#endif /* _GLIBCXX_PARALLEL_TYPES_H */ #endif /* _GLIBCXX_PARALLEL_TYPES_H */
...@@ -45,11 +45,11 @@ namespace __gnu_parallel ...@@ -45,11 +45,11 @@ namespace __gnu_parallel
* @param __binary_pred Equality predicate. * @param __binary_pred Equality predicate.
* @return End iterator of result __sequence. */ * @return End iterator of result __sequence. */
template<typename _IIter, template<typename _IIter,
class _OutputIterator, class _OutputIterator,
class _BinaryPredicate> class _BinaryPredicate>
_OutputIterator _OutputIterator
__parallel_unique_copy(_IIter __first, _IIter __last, __parallel_unique_copy(_IIter __first, _IIter __last,
_OutputIterator __result, _BinaryPredicate __binary_pred) _OutputIterator __result, _BinaryPredicate __binary_pred)
{ {
_GLIBCXX_CALL(__last - __first) _GLIBCXX_CALL(__last - __first)
...@@ -72,10 +72,10 @@ template<typename _IIter, ...@@ -72,10 +72,10 @@ template<typename _IIter,
{ {
# pragma omp single # pragma omp single
{ {
__num_threads = omp_get_num_threads(); __num_threads = omp_get_num_threads();
__borders = new _DifferenceType[__num_threads + 2]; __borders = new _DifferenceType[__num_threads + 2];
equally_split(size, __num_threads + 1, __borders); equally_split(size, __num_threads + 1, __borders);
__counter = new _DifferenceType[__num_threads + 1]; __counter = new _DifferenceType[__num_threads + 1];
} }
_ThreadIndex __iam = omp_get_thread_num(); _ThreadIndex __iam = omp_get_thread_num();
...@@ -89,7 +89,7 @@ template<typename _IIter, ...@@ -89,7 +89,7 @@ template<typename _IIter,
if (__iam == 0) if (__iam == 0)
{ {
__begin = __borders[0] + 1; // == 1 __begin = __borders[0] + 1; // == 1
__end = __borders[__iam + 1]; __end = __borders[__iam + 1];
++__i; ++__i;
...@@ -112,8 +112,8 @@ template<typename _IIter, ...@@ -112,8 +112,8 @@ template<typename _IIter,
for (_IIter iter = __first + __begin; iter < __first + __end; ++iter) for (_IIter iter = __first + __begin; iter < __first + __end; ++iter)
{ {
if (!__binary_pred(*iter, *(iter - 1))) if (!__binary_pred(*iter, *(iter - 1)))
++__i; ++__i;
} }
} }
__counter[__iam] = __i; __counter[__iam] = __i;
...@@ -157,8 +157,8 @@ template<typename _IIter, ...@@ -157,8 +157,8 @@ template<typename _IIter,
for (_IIter iter = __first + __begin; iter < __first + __end; ++iter) for (_IIter iter = __first + __begin; iter < __first + __end; ++iter)
{ {
if (!__binary_pred(*iter, *(iter-1))) if (!__binary_pred(*iter, *(iter-1)))
*__iter_out++ = *iter; *__iter_out++ = *iter;
} }
} }
} }
...@@ -184,7 +184,7 @@ template<typename _IIter, class _OutputIterator> ...@@ -184,7 +184,7 @@ template<typename _IIter, class _OutputIterator>
typedef typename std::iterator_traits<_IIter>::value_type typedef typename std::iterator_traits<_IIter>::value_type
_ValueType; _ValueType;
return __parallel_unique_copy(__first, __last, __result, return __parallel_unique_copy(__first, __last, __result,
std::equal_to<_ValueType>()); std::equal_to<_ValueType>());
} }
}//namespace __gnu_parallel }//namespace __gnu_parallel
......
...@@ -91,18 +91,15 @@ template<typename _DifferenceTp> ...@@ -91,18 +91,15 @@ template<typename _DifferenceTp>
* @return User-supplied functor (that may contain a part of the result). * @return User-supplied functor (that may contain a part of the result).
*/ */
template<typename _RAIter, template<typename _RAIter,
typename _Op, typename _Op,
typename _Fu, typename _Fu,
typename _Red, typename _Red,
typename _Result> typename _Result>
_Op _Op
for_each_template_random_access_workstealing(_RAIter __begin, for_each_template_random_access_workstealing(
_RAIter __end, _RAIter __begin, _RAIter __end, _Op __op, _Fu& __f, _Red __r,
_Op __op, _Fu& __f, _Red __r, _Result __base, _Result& __output,
_Result __base, _Result& __output, typename std::iterator_traits<_RAIter>::difference_type __bound)
typename std::iterator_traits
<_RAIter>::
difference_type __bound)
{ {
_GLIBCXX_CALL(__end - __begin) _GLIBCXX_CALL(__end - __begin)
...@@ -111,13 +108,15 @@ template<typename _RAIter, ...@@ -111,13 +108,15 @@ template<typename _RAIter,
const _Settings& __s = _Settings::get(); const _Settings& __s = _Settings::get();
_DifferenceType __chunk_size = static_cast<_DifferenceType>(__s.workstealing_chunk_size); _DifferenceType __chunk_size =
static_cast<_DifferenceType>(__s.workstealing_chunk_size);
// How many jobs? // How many jobs?
_DifferenceType __length = (__bound < 0) ? (__end - __begin) : __bound; _DifferenceType __length = (__bound < 0) ? (__end - __begin) : __bound;
// To avoid false sharing in a cache line. // To avoid false sharing in a cache line.
const int __stride = __s.cache_line_size * 10 / sizeof(_Job<_DifferenceType>) + 1; const int __stride =
__s.cache_line_size * 10 / sizeof(_Job<_DifferenceType>) + 1;
// Total number of threads currently working. // Total number of threads currently working.
_ThreadIndex __busy = 0; _ThreadIndex __busy = 0;
...@@ -132,8 +131,8 @@ template<typename _RAIter, ...@@ -132,8 +131,8 @@ template<typename _RAIter,
// No more threads than jobs, at least one thread. // No more threads than jobs, at least one thread.
_ThreadIndex __num_threads = _ThreadIndex __num_threads =
__gnu_parallel::max<_ThreadIndex>(1, __gnu_parallel::max<_ThreadIndex>(1,
__gnu_parallel::min<_DifferenceType>(__length, __get_max_threads())); __gnu_parallel::min<_DifferenceType>(__length, __get_max_threads()));
# pragma omp parallel shared(__busy) num_threads(__num_threads) # pragma omp parallel shared(__busy) num_threads(__num_threads)
{ {
...@@ -184,7 +183,7 @@ template<typename _RAIter, ...@@ -184,7 +183,7 @@ template<typename _RAIter,
(__length - 1) : ((__iam + 1) * (__length / __num_threads) - 1); (__length - 1) : ((__iam + 1) * (__length / __num_threads) - 1);
__my_job._M_load = __my_job._M_last - __my_job._M_first + 1; __my_job._M_load = __my_job._M_last - __my_job._M_first + 1;
// Init result with _M_first __value (to have a base value for reduction). // Init result with _M_first value (to have a base value for reduction)
if (__my_job._M_first <= __my_job._M_last) if (__my_job._M_first <= __my_job._M_last)
{ {
// Cannot use volatile variable directly. // Cannot use volatile variable directly.
...@@ -211,13 +210,15 @@ template<typename _RAIter, ...@@ -211,13 +210,15 @@ template<typename _RAIter,
// fetch-and-add call // fetch-and-add call
// Reserve current job block (size __chunk_size) in my queue. // Reserve current job block (size __chunk_size) in my queue.
_DifferenceType current_job = _DifferenceType current_job =
__fetch_and_add<_DifferenceType>(&(__my_job._M_first), __chunk_size); __fetch_and_add<_DifferenceType>(
&(__my_job._M_first), __chunk_size);
// Update _M_load, to make the three values consistent, // Update _M_load, to make the three values consistent,
// _M_first might have been changed in the meantime // _M_first might have been changed in the meantime
__my_job._M_load = __my_job._M_last - __my_job._M_first + 1; __my_job._M_load = __my_job._M_last - __my_job._M_first + 1;
for (_DifferenceType job_counter = 0; for (_DifferenceType job_counter = 0;
job_counter < __chunk_size && current_job <= __my_job._M_last; job_counter < __chunk_size
&& current_job <= __my_job._M_last;
++job_counter) ++job_counter)
{ {
// Yes: process it! // Yes: process it!
...@@ -254,7 +255,8 @@ template<typename _RAIter, ...@@ -254,7 +255,8 @@ template<typename _RAIter,
} }
while (__busy > 0 while (__busy > 0
&& ((__supposed_load <= 0) && ((__supposed_load <= 0)
|| ((__supposed_first + __supposed_load - 1) != __supposed_last))); || ((__supposed_first + __supposed_load - 1)
!= __supposed_last)));
if (__busy == 0) if (__busy == 0)
break; break;
...@@ -273,7 +275,8 @@ template<typename _RAIter, ...@@ -273,7 +275,8 @@ template<typename _RAIter,
__stolen_first + __steal - _DifferenceType(1); __stolen_first + __steal - _DifferenceType(1);
__my_job._M_first = __stolen_first; __my_job._M_first = __stolen_first;
__my_job._M_last = __gnu_parallel::min(stolen_try, __supposed_last); __my_job._M_last =
__gnu_parallel::min(stolen_try, __supposed_last);
__my_job._M_load = __my_job._M_last - __my_job._M_first + 1; __my_job._M_load = __my_job._M_last - __my_job._M_first + 1;
// Has potential work again. // Has potential work again.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment