Commit 77d16198 by Paolo Carlini Committed by Paolo Carlini

multiway_merge.h: Simple formatting and uglification fixes.

2009-11-06  Paolo Carlini  <paolo.carlini@oracle.com>

	* include/parallel/multiway_merge.h: Simple formatting and
	uglification fixes.
	* include/parallel/find_selectors.h: Likewise.
	* include/parallel/losertree.h: Likewise.
	* include/parallel/list_partition.h: Likewise.
	* include/parallel/for_each.h: Likewise.
	* include/parallel/multiseq_selection.h: Likewise.
	* include/parallel/workstealing.h: Likewise.
	* include/parallel/par_loop.h: Likewise.
	* include/parallel/numeric: Likewise.
	* include/parallel/quicksort.h: Likewise.
	* include/parallel/equally_split.h: Likewise.
	* include/parallel/omp_loop_static.h: Likewise.
	* include/parallel/random_shuffle.h: Likewise.
	* include/parallel/balanced_quicksort.h: Likewise.
	* include/parallel/tags.h: Likewise.
	* include/parallel/set_operations.h: Likewise.
	* include/parallel/merge.h: Likewise.
	* include/parallel/unique_copy.h: Likewise.
	* include/parallel/multiway_mergesort.h: Likewise.
	* include/parallel/search.h: Likewise.
	* include/parallel/partition.h: Likewise.
	* include/parallel/partial_sum.h: Likewise.
	* include/parallel/find.h: Likewise.
	* include/parallel/queue.h: Likewise.
	* include/parallel/omp_loop.h: Likewise.
	* include/parallel/checkers.h: Likewise.
	* include/parallel/sort.h: Likewise.

From-SVN: r153966
parent b169fe9d
2009-11-06 Paolo Carlini <paolo.carlini@oracle.com>
* include/parallel/multiway_merge.h: Simple formatting and
uglification fixes.
* include/parallel/find_selectors.h: Likewise.
* include/parallel/losertree.h: Likewise.
* include/parallel/list_partition.h: Likewise.
* include/parallel/for_each.h: Likewise.
* include/parallel/multiseq_selection.h: Likewise.
* include/parallel/workstealing.h: Likewise.
* include/parallel/par_loop.h: Likewise.
* include/parallel/numeric: Likewise.
* include/parallel/quicksort.h: Likewise.
* include/parallel/equally_split.h: Likewise.
* include/parallel/omp_loop_static.h: Likewise.
* include/parallel/random_shuffle.h: Likewise.
* include/parallel/balanced_quicksort.h: Likewise.
* include/parallel/tags.h: Likewise.
* include/parallel/set_operations.h: Likewise.
* include/parallel/merge.h: Likewise.
* include/parallel/unique_copy.h: Likewise.
* include/parallel/multiway_mergesort.h: Likewise.
* include/parallel/search.h: Likewise.
* include/parallel/partition.h: Likewise.
* include/parallel/partial_sum.h: Likewise.
* include/parallel/find.h: Likewise.
* include/parallel/queue.h: Likewise.
* include/parallel/omp_loop.h: Likewise.
* include/parallel/checkers.h: Likewise.
* include/parallel/sort.h: Likewise.
2009-11-06 Jonathan Wakely <jwakely.gcc@gmail.com> 2009-11-06 Jonathan Wakely <jwakely.gcc@gmail.com>
PR libstdc++/41949 PR libstdc++/41949
......
...@@ -57,436 +57,435 @@ ...@@ -57,436 +57,435 @@
namespace __gnu_parallel namespace __gnu_parallel
{ {
/** @brief Information local to one thread in the parallel quicksort run. */ /** @brief Information local to one thread in the parallel quicksort run. */
template<typename _RAIter> template<typename _RAIter>
struct _QSBThreadLocal struct _QSBThreadLocal
{ {
typedef std::iterator_traits<_RAIter> _TraitsType; typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::difference_type _DifferenceType; typedef typename _TraitsType::difference_type _DifferenceType;
/** @brief Continuous part of the sequence, described by an /** @brief Continuous part of the sequence, described by an
iterator pair. */ iterator pair. */
typedef std::pair<_RAIter, _RAIter> _Piece; typedef std::pair<_RAIter, _RAIter> _Piece;
/** @brief Initial piece to work on. */ /** @brief Initial piece to work on. */
_Piece _M_initial; _Piece _M_initial;
/** @brief Work-stealing queue. */ /** @brief Work-stealing queue. */
_RestrictedBoundedConcurrentQueue<_Piece> _M_leftover_parts; _RestrictedBoundedConcurrentQueue<_Piece> _M_leftover_parts;
/** @brief Number of threads involved in this algorithm. */ /** @brief Number of threads involved in this algorithm. */
_ThreadIndex _M_num_threads; _ThreadIndex _M_num_threads;
/** @brief Pointer to a counter of elements left over to sort. */ /** @brief Pointer to a counter of elements left over to sort. */
volatile _DifferenceType* _M_elements_leftover; volatile _DifferenceType* _M_elements_leftover;
/** @brief The complete sequence to sort. */ /** @brief The complete sequence to sort. */
_Piece _M_global; _Piece _M_global;
/** @brief Constructor. /** @brief Constructor.
* @param __queue_size size of the work-stealing queue. */ * @param __queue_size size of the work-stealing queue. */
_QSBThreadLocal(int __queue_size) : _M_leftover_parts(__queue_size) { } _QSBThreadLocal(int __queue_size) : _M_leftover_parts(__queue_size) { }
}; };
/** @brief Balanced quicksort divide step. /** @brief Balanced quicksort divide step.
* @param __begin Begin iterator of subsequence. * @param __begin Begin iterator of subsequence.
* @param __end End iterator of subsequence. * @param __end End iterator of subsequence.
* @param __comp Comparator. * @param __comp Comparator.
* @param __num_threads Number of threads that are allowed to work on * @param __num_threads Number of threads that are allowed to work on
* this part. * this part.
* @pre @__c (__end-__begin)>=1 */ * @pre @__c (__end-__begin)>=1 */
template<typename _RAIter, typename _Compare> template<typename _RAIter, typename _Compare>
typename std::iterator_traits<_RAIter>::difference_type typename std::iterator_traits<_RAIter>::difference_type
__qsb_divide(_RAIter __begin, _RAIter __end, __qsb_divide(_RAIter __begin, _RAIter __end,
_Compare __comp, _ThreadIndex __num_threads) _Compare __comp, _ThreadIndex __num_threads)
{ {
_GLIBCXX_PARALLEL_ASSERT(__num_threads > 0); _GLIBCXX_PARALLEL_ASSERT(__num_threads > 0);
typedef std::iterator_traits<_RAIter> _TraitsType; typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType; typedef typename _TraitsType::value_type _ValueType;
typedef typename _TraitsType::difference_type _DifferenceType; typedef typename _TraitsType::difference_type _DifferenceType;
_RAIter __pivot_pos = _RAIter __pivot_pos =
__median_of_three_iterators(__begin, __begin + (__end - __begin) / 2, __median_of_three_iterators(__begin, __begin + (__end - __begin) / 2,
__end - 1, __comp); __end - 1, __comp);
#if defined(_GLIBCXX_ASSERTIONS) #if defined(_GLIBCXX_ASSERTIONS)
// Must be in between somewhere. // Must be in between somewhere.
_DifferenceType __n = __end - __begin; _DifferenceType __n = __end - __begin;
_GLIBCXX_PARALLEL_ASSERT( _GLIBCXX_PARALLEL_ASSERT((!__comp(*__pivot_pos, *__begin)
(!__comp(*__pivot_pos, *__begin) && && !__comp(*(__begin + __n / 2),
!__comp(*(__begin + __n / 2), *__pivot_pos)) *__pivot_pos))
|| (!__comp(*__pivot_pos, *__begin) && || (!__comp(*__pivot_pos, *__begin)
!__comp(*(__end - 1), *__pivot_pos)) && !__comp(*(__end - 1), *__pivot_pos))
|| (!__comp(*__pivot_pos, *(__begin + __n / 2)) && || (!__comp(*__pivot_pos, *(__begin + __n / 2))
!__comp(*__begin, *__pivot_pos)) && !__comp(*__begin, *__pivot_pos))
|| (!__comp(*__pivot_pos, *(__begin + __n / 2)) && || (!__comp(*__pivot_pos, *(__begin + __n / 2))
!__comp(*(__end - 1), *__pivot_pos)) && !__comp(*(__end - 1), *__pivot_pos))
|| (!__comp(*__pivot_pos, *(__end - 1)) && || (!__comp(*__pivot_pos, *(__end - 1))
!__comp(*__begin, *__pivot_pos)) && !__comp(*__begin, *__pivot_pos))
|| (!__comp(*__pivot_pos, *(__end - 1)) && || (!__comp(*__pivot_pos, *(__end - 1))
!__comp(*(__begin + __n / 2), *__pivot_pos))); && !__comp(*(__begin + __n / 2),
*__pivot_pos)));
#endif #endif
// Swap pivot value to end. // Swap pivot value to end.
if (__pivot_pos != (__end - 1)) if (__pivot_pos != (__end - 1))
std::swap(*__pivot_pos, *(__end - 1)); std::swap(*__pivot_pos, *(__end - 1));
__pivot_pos = __end - 1; __pivot_pos = __end - 1;
__gnu_parallel::binder2nd<_Compare, _ValueType, _ValueType, bool> __gnu_parallel::binder2nd<_Compare, _ValueType, _ValueType, bool>
__pred(__comp, *__pivot_pos); __pred(__comp, *__pivot_pos);
// Divide, returning __end - __begin - 1 in the worst case. // Divide, returning __end - __begin - 1 in the worst case.
_DifferenceType __split_pos = __parallel_partition( _DifferenceType __split_pos = __parallel_partition(__begin, __end - 1,
__begin, __end - 1, __pred, __num_threads); __pred,
__num_threads);
// Swap back pivot to middle. // Swap back pivot to middle.
std::swap(*(__begin + __split_pos), *__pivot_pos); std::swap(*(__begin + __split_pos), *__pivot_pos);
__pivot_pos = __begin + __split_pos; __pivot_pos = __begin + __split_pos;
#if _GLIBCXX_ASSERTIONS #if _GLIBCXX_ASSERTIONS
_RAIter __r; _RAIter __r;
for (__r = __begin; __r != __pivot_pos; ++__r) for (__r = __begin; __r != __pivot_pos; ++__r)
_GLIBCXX_PARALLEL_ASSERT(__comp(*__r, *__pivot_pos)); _GLIBCXX_PARALLEL_ASSERT(__comp(*__r, *__pivot_pos));
for (; __r != __end; ++__r) for (; __r != __end; ++__r)
_GLIBCXX_PARALLEL_ASSERT(!__comp(*__r, *__pivot_pos)); _GLIBCXX_PARALLEL_ASSERT(!__comp(*__r, *__pivot_pos));
#endif #endif
return __split_pos; return __split_pos;
} }
/** @brief Quicksort conquer step.
* @param __tls Array of thread-local storages.
* @param __begin Begin iterator of subsequence.
* @param __end End iterator of subsequence.
* @param __comp Comparator.
* @param __iam Number of the thread processing this function.
* @param __num_threads
* Number of threads that are allowed to work on this part. */
template<typename _RAIter, typename _Compare>
void
__qsb_conquer(_QSBThreadLocal<_RAIter>** __tls,
_RAIter __begin, _RAIter __end,
_Compare __comp,
_ThreadIndex __iam, _ThreadIndex __num_threads,
bool __parent_wait)
{
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
typedef typename _TraitsType::difference_type _DifferenceType;
_DifferenceType __n = __end - __begin;
if (__num_threads <= 1 || __n <= 1)
{
__tls[__iam]->_M_initial.first = __begin;
__tls[__iam]->_M_initial.second = __end;
__qsb_local_sort_with_helping(__tls, __comp, __iam, __parent_wait); /** @brief Quicksort conquer step.
* @param __tls Array of thread-local storages.
* @param __begin Begin iterator of subsequence.
* @param __end End iterator of subsequence.
* @param __comp Comparator.
* @param __iam Number of the thread processing this function.
* @param __num_threads
* Number of threads that are allowed to work on this part. */
template<typename _RAIter, typename _Compare>
void
__qsb_conquer(_QSBThreadLocal<_RAIter>** __tls,
_RAIter __begin, _RAIter __end,
_Compare __comp,
_ThreadIndex __iam, _ThreadIndex __num_threads,
bool __parent_wait)
{
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
typedef typename _TraitsType::difference_type _DifferenceType;
return; _DifferenceType __n = __end - __begin;
}
if (__num_threads <= 1 || __n <= 1)
{
__tls[__iam]->_M_initial.first = __begin;
__tls[__iam]->_M_initial.second = __end;
__qsb_local_sort_with_helping(__tls, __comp, __iam, __parent_wait);
return;
}
// Divide step. // Divide step.
_DifferenceType __split_pos = _DifferenceType __split_pos =
__qsb_divide(__begin, __end, __comp, __num_threads); __qsb_divide(__begin, __end, __comp, __num_threads);
#if _GLIBCXX_ASSERTIONS #if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(0 <= __split_pos && _GLIBCXX_PARALLEL_ASSERT(0 <= __split_pos &&
__split_pos < (__end - __begin)); __split_pos < (__end - __begin));
#endif #endif
_ThreadIndex __num_threads_leftside = _ThreadIndex
std::max<_ThreadIndex>(1, std::min<_ThreadIndex>( __num_threads_leftside = std::max<_ThreadIndex>
__num_threads - 1, __split_pos * __num_threads / __n)); (1, std::min<_ThreadIndex>(__num_threads - 1, __split_pos
* __num_threads / __n));
# pragma omp atomic # pragma omp atomic
*__tls[__iam]->_M_elements_leftover -= (_DifferenceType)1; *__tls[__iam]->_M_elements_leftover -= (_DifferenceType)1;
// Conquer step. // Conquer step.
# pragma omp parallel num_threads(2) # pragma omp parallel num_threads(2)
{ {
bool __wait; bool __wait;
if(omp_get_num_threads() < 2) if(omp_get_num_threads() < 2)
__wait = false; __wait = false;
else else
__wait = __parent_wait; __wait = __parent_wait;
# pragma omp sections # pragma omp sections
{ {
# pragma omp section # pragma omp section
{ {
__qsb_conquer(__tls, __begin, __begin + __split_pos, __comp, __qsb_conquer(__tls, __begin, __begin + __split_pos, __comp,
__iam, __iam, __num_threads_leftside, __wait);
__num_threads_leftside, __wait = __parent_wait;
__wait); }
__wait = __parent_wait; // The pivot_pos is left in place, to ensure termination.
}
// The pivot_pos is left in place, to ensure termination.
# pragma omp section # pragma omp section
{ {
__qsb_conquer(__tls, __begin + __split_pos + 1, __end, __comp, __qsb_conquer(__tls, __begin + __split_pos + 1, __end, __comp,
__iam + __num_threads_leftside, __iam + __num_threads_leftside,
__num_threads - __num_threads_leftside, __num_threads - __num_threads_leftside, __wait);
__wait); __wait = __parent_wait;
__wait = __parent_wait; }
} }
} }
} }
}
/**
/** * @brief Quicksort step doing load-balanced local sort.
* @brief Quicksort step doing load-balanced local sort. * @param __tls Array of thread-local storages.
* @param __tls Array of thread-local storages. * @param __comp Comparator.
* @param __comp Comparator. * @param __iam Number of the thread processing this function.
* @param __iam Number of the thread processing this function. */
*/ template<typename _RAIter, typename _Compare>
template<typename _RAIter, typename _Compare> void
void __qsb_local_sort_with_helping(_QSBThreadLocal<_RAIter>** __tls,
__qsb_local_sort_with_helping(_QSBThreadLocal<_RAIter>** __tls, _Compare& __comp, int __iam, bool __wait)
_Compare& __comp, int __iam, bool __wait) {
{ typedef std::iterator_traits<_RAIter> _TraitsType;
typedef std::iterator_traits<_RAIter> _TraitsType; typedef typename _TraitsType::value_type _ValueType;
typedef typename _TraitsType::value_type _ValueType; typedef typename _TraitsType::difference_type _DifferenceType;
typedef typename _TraitsType::difference_type _DifferenceType; typedef std::pair<_RAIter, _RAIter> _Piece;
typedef std::pair<_RAIter, _RAIter> _Piece;
_QSBThreadLocal<_RAIter>& __tl = *__tls[__iam];
_QSBThreadLocal<_RAIter>& __tl = *__tls[__iam];
_DifferenceType
_DifferenceType __base_case_n = __base_case_n = _Settings::get().sort_qsb_base_case_maximal_n;
_Settings::get().sort_qsb_base_case_maximal_n; if (__base_case_n < 2)
if (__base_case_n < 2) __base_case_n = 2;
__base_case_n = 2; _ThreadIndex __num_threads = __tl._M_num_threads;
_ThreadIndex __num_threads = __tl._M_num_threads;
// Every thread has its own random number generator.
// Every thread has its own random number generator. _RandomNumber __rng(__iam + 1);
_RandomNumber __rng(__iam + 1);
_Piece __current = __tl._M_initial;
_Piece __current = __tl._M_initial;
_DifferenceType __elements_done = 0;
_DifferenceType __elements_done = 0;
#if _GLIBCXX_ASSERTIONS #if _GLIBCXX_ASSERTIONS
_DifferenceType __total_elements_done = 0; _DifferenceType __total_elements_done = 0;
#endif #endif
for (;;) for (;;)
{ {
// Invariant: __current must be a valid (maybe empty) range. // Invariant: __current must be a valid (maybe empty) range.
_RAIter __begin = __current.first, __end = __current.second; _RAIter __begin = __current.first, __end = __current.second;
_DifferenceType __n = __end - __begin; _DifferenceType __n = __end - __begin;
if (__n > __base_case_n) if (__n > __base_case_n)
{ {
// Divide. // Divide.
_RAIter __pivot_pos = __begin + __rng(__n); _RAIter __pivot_pos = __begin + __rng(__n);
// Swap __pivot_pos value to end. // Swap __pivot_pos value to end.
if (__pivot_pos != (__end - 1)) if (__pivot_pos != (__end - 1))
std::swap(*__pivot_pos, *(__end - 1)); std::swap(*__pivot_pos, *(__end - 1));
__pivot_pos = __end - 1; __pivot_pos = __end - 1;
__gnu_parallel::binder2nd __gnu_parallel::binder2nd
<_Compare, _ValueType, _ValueType, bool> <_Compare, _ValueType, _ValueType, bool>
__pred(__comp, *__pivot_pos); __pred(__comp, *__pivot_pos);
// Divide, leave pivot unchanged in last place. // Divide, leave pivot unchanged in last place.
_RAIter __split_pos1, __split_pos2; _RAIter __split_pos1, __split_pos2;
__split_pos1 = __split_pos1 = __gnu_sequential::partition(__begin, __end - 1,
__gnu_sequential::partition(__begin, __end - 1, __pred); __pred);
// Left side: < __pivot_pos; __right side: >= __pivot_pos. // Left side: < __pivot_pos; __right side: >= __pivot_pos.
#if _GLIBCXX_ASSERTIONS #if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(__begin <= __split_pos1 _GLIBCXX_PARALLEL_ASSERT(__begin <= __split_pos1
&& __split_pos1 < __end); && __split_pos1 < __end);
#endif #endif
// Swap pivot back to middle. // Swap pivot back to middle.
if (__split_pos1 != __pivot_pos) if (__split_pos1 != __pivot_pos)
std::swap(*__split_pos1, *__pivot_pos); std::swap(*__split_pos1, *__pivot_pos);
__pivot_pos = __split_pos1; __pivot_pos = __split_pos1;
// In case all elements are equal, __split_pos1 == 0. // In case all elements are equal, __split_pos1 == 0.
if ((__split_pos1 + 1 - __begin) < (__n >> 7) if ((__split_pos1 + 1 - __begin) < (__n >> 7)
|| (__end - __split_pos1) < (__n >> 7)) || (__end - __split_pos1) < (__n >> 7))
{ {
// Very unequal split, one part smaller than one 128th // Very unequal split, one part smaller than one 128th
// elements not strictly larger than the pivot. // elements not strictly larger than the pivot.
__gnu_parallel::__unary_negate<__gnu_parallel::__binder1st __gnu_parallel::__unary_negate<__gnu_parallel::__binder1st
<_Compare, _ValueType, _ValueType, bool>, _ValueType> <_Compare, _ValueType, _ValueType, bool>, _ValueType>
__pred(__gnu_parallel::__binder1st __pred(__gnu_parallel::__binder1st
<_Compare, _ValueType, _ValueType, bool>( <_Compare, _ValueType, _ValueType, bool>
__comp, *__pivot_pos)); (__comp, *__pivot_pos));
// Find other end of pivot-equal range. // Find other end of pivot-equal range.
__split_pos2 = __gnu_sequential::partition(__split_pos1 + 1, __split_pos2 = __gnu_sequential::partition(__split_pos1 + 1,
__end, __pred); __end, __pred);
} }
else else
// Only skip the pivot. // Only skip the pivot.
__split_pos2 = __split_pos1 + 1; __split_pos2 = __split_pos1 + 1;
// Elements equal to pivot are done. // Elements equal to pivot are done.
__elements_done += (__split_pos2 - __split_pos1); __elements_done += (__split_pos2 - __split_pos1);
#if _GLIBCXX_ASSERTIONS #if _GLIBCXX_ASSERTIONS
__total_elements_done += (__split_pos2 - __split_pos1); __total_elements_done += (__split_pos2 - __split_pos1);
#endif #endif
// Always push larger part onto stack. // Always push larger part onto stack.
if (((__split_pos1 + 1) - __begin) < (__end - (__split_pos2))) if (((__split_pos1 + 1) - __begin) < (__end - (__split_pos2)))
{ {
// Right side larger. // Right side larger.
if ((__split_pos2) != __end) if ((__split_pos2) != __end)
__tl._M_leftover_parts.push_front( __tl._M_leftover_parts.push_front
std::make_pair(__split_pos2, __end)); (std::make_pair(__split_pos2, __end));
//__current.first = __begin; //already set anyway //__current.first = __begin; //already set anyway
__current.second = __split_pos1; __current.second = __split_pos1;
continue; continue;
} }
else else
{ {
// Left side larger. // Left side larger.
if (__begin != __split_pos1) if (__begin != __split_pos1)
__tl._M_leftover_parts.push_front(std::make_pair(__begin, __tl._M_leftover_parts.push_front(std::make_pair
__split_pos1)); (__begin, __split_pos1));
__current.first = __split_pos2; __current.first = __split_pos2;
//__current.second = __end; //already set anyway //__current.second = __end; //already set anyway
continue; continue;
} }
} }
else else
{ {
__gnu_sequential::sort(__begin, __end, __comp); __gnu_sequential::sort(__begin, __end, __comp);
__elements_done += __n; __elements_done += __n;
#if _GLIBCXX_ASSERTIONS #if _GLIBCXX_ASSERTIONS
__total_elements_done += __n; __total_elements_done += __n;
#endif #endif
// Prefer own stack, small pieces. // Prefer own stack, small pieces.
if (__tl._M_leftover_parts.pop_front(__current)) if (__tl._M_leftover_parts.pop_front(__current))
continue; continue;
# pragma omp atomic # pragma omp atomic
*__tl._M_elements_leftover -= __elements_done; *__tl._M_elements_leftover -= __elements_done;
__elements_done = 0; __elements_done = 0;
#if _GLIBCXX_ASSERTIONS #if _GLIBCXX_ASSERTIONS
double __search_start = omp_get_wtime(); double __search_start = omp_get_wtime();
#endif #endif
// Look for new work. // Look for new work.
bool __successfully_stolen = false; bool __successfully_stolen = false;
while (__wait && *__tl._M_elements_leftover > 0 while (__wait && *__tl._M_elements_leftover > 0
&& !__successfully_stolen && !__successfully_stolen
#if _GLIBCXX_ASSERTIONS #if _GLIBCXX_ASSERTIONS
// Possible dead-lock. // Possible dead-lock.
&& (omp_get_wtime() < (__search_start + 1.0)) && (omp_get_wtime() < (__search_start + 1.0))
#endif #endif
) )
{ {
_ThreadIndex __victim; _ThreadIndex __victim;
__victim = __rng(__num_threads); __victim = __rng(__num_threads);
// Large pieces. // Large pieces.
__successfully_stolen = (__victim != __iam) __successfully_stolen = (__victim != __iam)
&& __tls[__victim]->_M_leftover_parts.pop_back(__current); && __tls[__victim]->_M_leftover_parts.pop_back(__current);
if (!__successfully_stolen) if (!__successfully_stolen)
__yield(); __yield();
#if !defined(__ICC) && !defined(__ECC) #if !defined(__ICC) && !defined(__ECC)
# pragma omp flush # pragma omp flush
#endif #endif
} }
#if _GLIBCXX_ASSERTIONS #if _GLIBCXX_ASSERTIONS
if (omp_get_wtime() >= (__search_start + 1.0)) if (omp_get_wtime() >= (__search_start + 1.0))
{ {
sleep(1); sleep(1);
_GLIBCXX_PARALLEL_ASSERT(omp_get_wtime() _GLIBCXX_PARALLEL_ASSERT(omp_get_wtime()
< (__search_start + 1.0)); < (__search_start + 1.0));
} }
#endif #endif
if (!__successfully_stolen) if (!__successfully_stolen)
{ {
#if _GLIBCXX_ASSERTIONS #if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(*__tl._M_elements_leftover == 0); _GLIBCXX_PARALLEL_ASSERT(*__tl._M_elements_leftover == 0);
#endif #endif
return; return;
} }
} }
} }
} }
/** @brief Top-level quicksort routine.
* @param __begin Begin iterator of sequence.
* @param __end End iterator of sequence.
* @param __comp Comparator.
* @param __num_threads Number of threads that are allowed to work on
* this part.
*/
template<typename _RAIter, typename _Compare>
void
__parallel_sort_qsb(_RAIter __begin, _RAIter __end,
_Compare __comp,
_ThreadIndex __num_threads)
{
_GLIBCXX_CALL(__end - __begin)
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
typedef typename _TraitsType::difference_type _DifferenceType;
typedef std::pair<_RAIter, _RAIter> _Piece;
typedef _QSBThreadLocal<_RAIter> _TLSType;
_DifferenceType __n = __end - __begin;
if (__n <= 1)
return;
// At least one element per processor.
if (__num_threads > __n)
__num_threads = static_cast<_ThreadIndex>(__n);
// Initialize thread local storage
_TLSType** __tls = new _TLSType*[__num_threads];
_DifferenceType __queue_size =
__num_threads * (_ThreadIndex)(log2(__n) + 1);
for (_ThreadIndex __t = 0; __t < __num_threads; ++__t)
__tls[__t] = new _QSBThreadLocal<_RAIter>(__queue_size);
// There can never be more than ceil(log2(__n)) ranges on the stack,
// because
// 1. Only one processor pushes onto the stack
// 2. The largest range has at most length __n
// 3. Each range is larger than half of the range remaining
volatile _DifferenceType _M_elements_leftover = __n;
for (int __i = 0; __i < __num_threads; ++__i)
{
__tls[__i]->_M_elements_leftover = &_M_elements_leftover;
__tls[__i]->_M_num_threads = __num_threads;
__tls[__i]->_M_global = std::make_pair(__begin, __end);
// Just in case nothing is left to assign.
__tls[__i]->_M_initial = std::make_pair(__end, __end);
}
// Main recursion call. /** @brief Top-level quicksort routine.
__qsb_conquer( * @param __begin Begin iterator of sequence.
__tls, __begin, __begin + __n, __comp, 0, __num_threads, true); * @param __end End iterator of sequence.
* @param __comp Comparator.
* @param __num_threads Number of threads that are allowed to work on
* this part.
*/
template<typename _RAIter, typename _Compare>
void
__parallel_sort_qsb(_RAIter __begin, _RAIter __end,
_Compare __comp, _ThreadIndex __num_threads)
{
_GLIBCXX_CALL(__end - __begin)
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
typedef typename _TraitsType::difference_type _DifferenceType;
typedef std::pair<_RAIter, _RAIter> _Piece;
typedef _QSBThreadLocal<_RAIter> _TLSType;
_DifferenceType __n = __end - __begin;
if (__n <= 1)
return;
// At least one element per processor.
if (__num_threads > __n)
__num_threads = static_cast<_ThreadIndex>(__n);
// Initialize thread local storage
_TLSType** __tls = new _TLSType*[__num_threads];
_DifferenceType __queue_size = (__num_threads
* (_ThreadIndex)(__rd_log2(__n) + 1));
for (_ThreadIndex __t = 0; __t < __num_threads; ++__t)
__tls[__t] = new _QSBThreadLocal<_RAIter>(__queue_size);
// There can never be more than ceil(__rd_log2(__n)) ranges on the
// stack, because
// 1. Only one processor pushes onto the stack
// 2. The largest range has at most length __n
// 3. Each range is larger than half of the range remaining
volatile _DifferenceType __elements_leftover = __n;
for (int __i = 0; __i < __num_threads; ++__i)
{
__tls[__i]->_M_elements_leftover = &__elements_leftover;
__tls[__i]->_M_num_threads = __num_threads;
__tls[__i]->_M_global = std::make_pair(__begin, __end);
// Just in case nothing is left to assign.
__tls[__i]->_M_initial = std::make_pair(__end, __end);
}
// Main recursion call.
__qsb_conquer(__tls, __begin, __begin + __n, __comp, 0,
__num_threads, true);
#if _GLIBCXX_ASSERTIONS #if _GLIBCXX_ASSERTIONS
// All stack must be empty. // All stack must be empty.
_Piece __dummy; _Piece __dummy;
for (int __i = 1; __i < __num_threads; ++__i) for (int __i = 1; __i < __num_threads; ++__i)
_GLIBCXX_PARALLEL_ASSERT( _GLIBCXX_PARALLEL_ASSERT(
!__tls[__i]->_M_leftover_parts.pop_back(__dummy)); !__tls[__i]->_M_leftover_parts.pop_back(__dummy));
#endif #endif
for (int __i = 0; __i < __num_threads; ++__i) for (int __i = 0; __i < __num_threads; ++__i)
delete __tls[__i]; delete __tls[__i];
delete[] __tls; delete[] __tls;
} }
} // namespace __gnu_parallel } // namespace __gnu_parallel
#endif /* _GLIBCXX_PARALLEL_BALANCED_QUICKSORT_H */ #endif /* _GLIBCXX_PARALLEL_BALANCED_QUICKSORT_H */
...@@ -68,5 +68,6 @@ namespace __gnu_parallel ...@@ -68,5 +68,6 @@ namespace __gnu_parallel
return true; return true;
} }
}
#endif /* _GLIBCXX_PARALLEL_CHECKERS_H */ #endif /* _GLIBCXX_PARALLEL_CHECKERS_H */
// -*- C++ -*- // -*- C++ -*-
// Copyright (C) 2007, 2009 Free Software Foundation, Inc. // Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
// //
// This file is part of the GNU ISO C++ Library. This library is free // This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the terms // software; you can redistribute it and/or modify it under the terms
...@@ -33,57 +33,56 @@ ...@@ -33,57 +33,56 @@
namespace __gnu_parallel namespace __gnu_parallel
{ {
/** @brief function to split a sequence into parts of almost equal size. /** @brief function to split a sequence into parts of almost equal size.
* *
* The resulting sequence __s of length __num_threads+1 contains the splitting * The resulting sequence __s of length __num_threads+1 contains the
* positions when splitting the range [0,__n) into parts of almost * splitting positions when splitting the range [0,__n) into parts of
* equal size (plus minus 1). The first entry is 0, the last one * almost equal size (plus minus 1). The first entry is 0, the last
* n. There may result empty parts. * one n. There may result empty parts.
* @param __n Number of elements * @param __n Number of elements
* @param __num_threads Number of parts * @param __num_threads Number of parts
* @param __s Splitters * @param __s Splitters
* @returns End of __splitter sequence, i.e. @__c __s+__num_threads+1 */ * @returns End of __splitter sequence, i.e. @__c __s+__num_threads+1 */
template<typename _DifferenceType, typename _OutputIterator> template<typename _DifferenceType, typename _OutputIterator>
_OutputIterator _OutputIterator
equally_split(_DifferenceType __n, _ThreadIndex __num_threads, equally_split(_DifferenceType __n, _ThreadIndex __num_threads,
_OutputIterator __s) _OutputIterator __s)
{ {
_DifferenceType __chunk_length = __n / __num_threads; _DifferenceType __chunk_length = __n / __num_threads;
_DifferenceType __num_longer_chunks = __n % __num_threads; _DifferenceType __num_longer_chunks = __n % __num_threads;
_DifferenceType __pos = 0; _DifferenceType __pos = 0;
for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
{ {
*__s++ = __pos; *__s++ = __pos;
__pos += (__i < __num_longer_chunks) ? __pos += ((__i < __num_longer_chunks)
(__chunk_length + 1) : __chunk_length; ? (__chunk_length + 1) : __chunk_length);
} }
*__s++ = __n; *__s++ = __n;
return __s; return __s;
} }
/** @brief function to split a sequence into parts of almost equal size.
/** @brief function to split a sequence into parts of almost equal size. *
* * Returns the position of the splitting point between
* Returns the position of the splitting point between * thread number __thread_no (included) and
* thread number __thread_no (included) and * thread number __thread_no+1 (excluded).
* thread number __thread_no+1 (excluded). * @param __n Number of elements
* @param __n Number of elements * @param __num_threads Number of parts
* @param __num_threads Number of parts * @returns splitting point */
* @returns splitting point */ template<typename _DifferenceType>
template<typename _DifferenceType> _DifferenceType
_DifferenceType equally_split_point(_DifferenceType __n,
equally_split_point(_DifferenceType __n, _ThreadIndex __num_threads,
_ThreadIndex __num_threads, _ThreadIndex __thread_no)
_ThreadIndex __thread_no) {
{ _DifferenceType __chunk_length = __n / __num_threads;
_DifferenceType __chunk_length = __n / __num_threads; _DifferenceType __num_longer_chunks = __n % __num_threads;
_DifferenceType __num_longer_chunks = __n % __num_threads; if (__thread_no < __num_longer_chunks)
if (__thread_no < __num_longer_chunks) return __thread_no * (__chunk_length + 1);
return __thread_no * (__chunk_length + 1); else
else return __num_longer_chunks * (__chunk_length + 1)
return __num_longer_chunks * (__chunk_length + 1)
+ (__thread_no - __num_longer_chunks) * __chunk_length; + (__thread_no - __num_longer_chunks) * __chunk_length;
} }
} }
#endif /* _GLIBCXX_PARALLEL_EQUALLY_SPLIT_H */ #endif /* _GLIBCXX_PARALLEL_EQUALLY_SPLIT_H */
...@@ -42,360 +42,363 @@ ...@@ -42,360 +42,363 @@
namespace __gnu_parallel namespace __gnu_parallel
{ {
/** /**
* @brief Parallel std::find, switch for different algorithms. * @brief Parallel std::find, switch for different algorithms.
* @param __begin1 Begin iterator of first sequence. * @param __begin1 Begin iterator of first sequence.
* @param __end1 End iterator of first sequence. * @param __end1 End iterator of first sequence.
* @param __begin2 Begin iterator of second sequence. Must have same * @param __begin2 Begin iterator of second sequence. Must have same
* length as first sequence. * length as first sequence.
* @param __pred Find predicate. * @param __pred Find predicate.
* @param __selector _Functionality (e. g. std::find_if (), std::equal(),...) * @param __selector _Functionality (e. g. std::find_if(), std::equal(),...)
* @return Place of finding in both sequences. * @return Place of finding in both sequences.
*/ */
template<typename _RAIter1, template<typename _RAIter1,
typename _RAIter2, typename _RAIter2,
typename _Pred, typename _Pred,
typename _Selector> typename _Selector>
inline std::pair<_RAIter1, _RAIter2> inline std::pair<_RAIter1, _RAIter2>
__find_template(_RAIter1 __begin1, _RAIter1 __end1, __find_template(_RAIter1 __begin1, _RAIter1 __end1,
_RAIter2 __begin2, _Pred __pred, _Selector __selector) _RAIter2 __begin2, _Pred __pred, _Selector __selector)
{ {
switch (_Settings::get().find_algorithm) switch (_Settings::get().find_algorithm)
{ {
case GROWING_BLOCKS: case GROWING_BLOCKS:
return __find_template(__begin1, __end1, __begin2, __pred, __selector, return __find_template(__begin1, __end1, __begin2, __pred,
growing_blocks_tag()); __selector, growing_blocks_tag());
case CONSTANT_SIZE_BLOCKS: case CONSTANT_SIZE_BLOCKS:
return __find_template(__begin1, __end1, __begin2, __pred, __selector, return __find_template(__begin1, __end1, __begin2, __pred,
constant_size_blocks_tag()); __selector, constant_size_blocks_tag());
case EQUAL_SPLIT: case EQUAL_SPLIT:
return __find_template(__begin1, __end1, __begin2, __pred, __selector, return __find_template(__begin1, __end1, __begin2, __pred,
equal_split_tag()); __selector, equal_split_tag());
default: default:
_GLIBCXX_PARALLEL_ASSERT(false); _GLIBCXX_PARALLEL_ASSERT(false);
return std::make_pair(__begin1, __begin2); return std::make_pair(__begin1, __begin2);
} }
} }
#if _GLIBCXX_FIND_EQUAL_SPLIT #if _GLIBCXX_FIND_EQUAL_SPLIT
/** /**
* @brief Parallel std::find, equal splitting variant. * @brief Parallel std::find, equal splitting variant.
* @param __begin1 Begin iterator of first sequence. * @param __begin1 Begin iterator of first sequence.
* @param __end1 End iterator of first sequence. * @param __end1 End iterator of first sequence.
* @param __begin2 Begin iterator of second sequence. Second __sequence * @param __begin2 Begin iterator of second sequence. Second __sequence
* must have same length as first sequence. * must have same length as first sequence.
* @param __pred Find predicate. * @param __pred Find predicate.
* @param __selector _Functionality (e. g. std::find_if (), std::equal(),...) * @param __selector _Functionality (e. g. std::find_if(), std::equal(),...)
* @return Place of finding in both sequences. * @return Place of finding in both sequences.
*/ */
template<typename _RAIter1, template<typename _RAIter1,
typename _RAIter2, typename _RAIter2,
typename _Pred, typename _Pred,
typename _Selector> typename _Selector>
std::pair<_RAIter1, _RAIter2> std::pair<_RAIter1, _RAIter2>
__find_template(_RAIter1 __begin1, __find_template(_RAIter1 __begin1, _RAIter1 __end1,
_RAIter1 __end1, _RAIter2 __begin2, _Pred __pred,
_RAIter2 __begin2, _Selector __selector, equal_split_tag)
_Pred __pred, {
_Selector __selector, _GLIBCXX_CALL(__end1 - __begin1)
equal_split_tag)
{ typedef std::iterator_traits<_RAIter1> _TraitsType;
_GLIBCXX_CALL(__end1 - __begin1) typedef typename _TraitsType::difference_type _DifferenceType;
typedef typename _TraitsType::value_type _ValueType;
typedef std::iterator_traits<_RAIter1> _TraitsType;
typedef typename _TraitsType::difference_type _DifferenceType; _DifferenceType __length = __end1 - __begin1;
typedef typename _TraitsType::value_type _ValueType; _DifferenceType __result = __length;
_DifferenceType* __borders;
_DifferenceType __length = __end1 - __begin1;
_DifferenceType __result = __length; omp_lock_t __result_lock;
_DifferenceType* __borders; omp_init_lock(&__result_lock);
omp_lock_t __result_lock; _ThreadIndex __num_threads = __get_max_threads();
omp_init_lock(&__result_lock); # pragma omp parallel num_threads(__num_threads)
_ThreadIndex __num_threads = __get_max_threads();
# pragma omp parallel num_threads(__num_threads)
{ {
# pragma omp single # pragma omp single
{ {
__num_threads = omp_get_num_threads(); __num_threads = omp_get_num_threads();
__borders = new _DifferenceType[__num_threads + 1]; __borders = new _DifferenceType[__num_threads + 1];
equally_split(__length, __num_threads, __borders); equally_split(__length, __num_threads, __borders);
} //single } //single
_ThreadIndex __iam = omp_get_thread_num(); _ThreadIndex __iam = omp_get_thread_num();
_DifferenceType __start = __borders[__iam], _DifferenceType __start = __borders[__iam],
__stop = __borders[__iam + 1]; __stop = __borders[__iam + 1];
_RAIter1 __i1 = __begin1 + __start; _RAIter1 __i1 = __begin1 + __start;
_RAIter2 __i2 = __begin2 + __start; _RAIter2 __i2 = __begin2 + __start;
for (_DifferenceType __pos = __start; __pos < __stop; ++__pos) for (_DifferenceType __pos = __start; __pos < __stop; ++__pos)
{ {
#pragma omp flush(__result) # pragma omp flush(__result)
// Result has been set to something lower. // Result has been set to something lower.
if (__result < __pos) if (__result < __pos)
break; break;
if (__selector(__i1, __i2, __pred)) if (__selector(__i1, __i2, __pred))
{ {
omp_set_lock(&__result_lock); omp_set_lock(&__result_lock);
if (__pos < __result) if (__pos < __result)
__result = __pos; __result = __pos;
omp_unset_lock(&__result_lock); omp_unset_lock(&__result_lock);
break; break;
} }
++__i1; ++__i1;
++__i2; ++__i2;
} }
} //parallel } //parallel
omp_destroy_lock(&__result_lock); omp_destroy_lock(&__result_lock);
delete[] __borders; delete[] __borders;
return return std::pair<_RAIter1, _RAIter2>(__begin1 + __result,
std::pair<_RAIter1, _RAIter2>(__begin1 + __result, __begin2 + __result); __begin2 + __result);
} }
#endif #endif
#if _GLIBCXX_FIND_GROWING_BLOCKS #if _GLIBCXX_FIND_GROWING_BLOCKS
/** /**
* @brief Parallel std::find, growing block size variant. * @brief Parallel std::find, growing block size variant.
* @param __begin1 Begin iterator of first sequence. * @param __begin1 Begin iterator of first sequence.
* @param __end1 End iterator of first sequence. * @param __end1 End iterator of first sequence.
* @param __begin2 Begin iterator of second sequence. Second __sequence * @param __begin2 Begin iterator of second sequence. Second __sequence
* must have same length as first sequence. * must have same length as first sequence.
* @param __pred Find predicate. * @param __pred Find predicate.
* @param __selector _Functionality (e. g. std::find_if (), std::equal(),...) * @param __selector _Functionality (e. g. std::find_if(), std::equal(),...)
* @return Place of finding in both sequences. * @return Place of finding in both sequences.
* @see __gnu_parallel::_Settings::find_sequential_search_size * @see __gnu_parallel::_Settings::find_sequential_search_size
* @see __gnu_parallel::_Settings::find_initial_block_size * @see __gnu_parallel::_Settings::find_initial_block_size
* @see __gnu_parallel::_Settings::find_maximum_block_size * @see __gnu_parallel::_Settings::find_maximum_block_size
* @see __gnu_parallel::_Settings::find_increasing_factor * @see __gnu_parallel::_Settings::find_increasing_factor
* *
* There are two main differences between the growing blocks and * There are two main differences between the growing blocks and
* the constant-size blocks variants. * the constant-size blocks variants.
* 1. For GB, the block size grows; for CSB, the block size is fixed. * 1. For GB, the block size grows; for CSB, the block size is fixed.
* 2. For GB, the blocks are allocated dynamically; * 2. For GB, the blocks are allocated dynamically;
* for CSB, the blocks are allocated in a predetermined manner, * for CSB, the blocks are allocated in a predetermined manner,
* namely spacial round-robin. * namely spacial round-robin.
*/ */
template<typename _RAIter1, template<typename _RAIter1,
typename _RAIter2, typename _RAIter2,
typename _Pred, typename _Pred,
typename _Selector> typename _Selector>
std::pair<_RAIter1, _RAIter2> std::pair<_RAIter1, _RAIter2>
__find_template(_RAIter1 __begin1, _RAIter1 __end1, __find_template(_RAIter1 __begin1, _RAIter1 __end1,
_RAIter2 __begin2, _Pred __pred, _Selector __selector, _RAIter2 __begin2, _Pred __pred, _Selector __selector,
growing_blocks_tag) growing_blocks_tag)
{ {
_GLIBCXX_CALL(__end1 - __begin1) _GLIBCXX_CALL(__end1 - __begin1)
typedef std::iterator_traits<_RAIter1> _TraitsType; typedef std::iterator_traits<_RAIter1> _TraitsType;
typedef typename _TraitsType::difference_type _DifferenceType; typedef typename _TraitsType::difference_type _DifferenceType;
typedef typename _TraitsType::value_type _ValueType; typedef typename _TraitsType::value_type _ValueType;
const _Settings& __s = _Settings::get(); const _Settings& __s = _Settings::get();
_DifferenceType __length = __end1 - __begin1; _DifferenceType __length = __end1 - __begin1;
_DifferenceType __sequential_search_size = _DifferenceType
std::min<_DifferenceType>(__length, __s.find_sequential_search_size); __sequential_search_size = std::min<_DifferenceType>
(__length, __s.find_sequential_search_size);
// Try it sequentially first.
std::pair<_RAIter1, _RAIter2> __find_seq_result = // Try it sequentially first.
__selector._M_sequential_algorithm( std::pair<_RAIter1, _RAIter2>
__begin1, __begin1 + __sequential_search_size, __begin2, __pred); __find_seq_result = __selector._M_sequential_algorithm
(__begin1, __begin1 + __sequential_search_size,
if (__find_seq_result.first != (__begin1 + __sequential_search_size)) __begin2, __pred);
return __find_seq_result;
if (__find_seq_result.first != (__begin1 + __sequential_search_size))
// Index of beginning of next free block (after sequential find). return __find_seq_result;
_DifferenceType __next_block_start = __sequential_search_size;
_DifferenceType __result = __length; // Index of beginning of next free block (after sequential find).
_DifferenceType __next_block_start = __sequential_search_size;
omp_lock_t __result_lock; _DifferenceType __result = __length;
omp_init_lock(&__result_lock);
omp_lock_t __result_lock;
_ThreadIndex __num_threads = __get_max_threads(); omp_init_lock(&__result_lock);
# pragma omp parallel shared(__result) num_threads(__num_threads)
_ThreadIndex __num_threads = __get_max_threads();
# pragma omp parallel shared(__result) num_threads(__num_threads)
{ {
# pragma omp single # pragma omp single
__num_threads = omp_get_num_threads(); __num_threads = omp_get_num_threads();
// Not within first __k elements -> start parallel. // Not within first __k elements -> start parallel.
_ThreadIndex __iam = omp_get_thread_num(); _ThreadIndex __iam = omp_get_thread_num();
_DifferenceType __block_size = __s.find_initial_block_size; _DifferenceType __block_size = __s.find_initial_block_size;
_DifferenceType __start = _DifferenceType __start = __fetch_and_add<_DifferenceType>
__fetch_and_add<_DifferenceType>(&__next_block_start, __block_size); (&__next_block_start, __block_size);
// Get new block, update pointer to next block. // Get new block, update pointer to next block.
_DifferenceType __stop = _DifferenceType __stop =
std::min<_DifferenceType>(__length, __start + __block_size); std::min<_DifferenceType>(__length, __start + __block_size);
std::pair<_RAIter1, _RAIter2> __local_result; std::pair<_RAIter1, _RAIter2> __local_result;
while (__start < __length) while (__start < __length)
{ {
# pragma omp flush(__result) # pragma omp flush(__result)
// Get new value of result. // Get new value of result.
if (__result < __start) if (__result < __start)
{ {
// No chance to find first element. // No chance to find first element.
break; break;
} }
__local_result = __selector._M_sequential_algorithm( __local_result = __selector._M_sequential_algorithm
__begin1 + __start, __begin1 + __stop, (__begin1 + __start, __begin1 + __stop,
__begin2 + __start, __pred); __begin2 + __start, __pred);
if (__local_result.first != (__begin1 + __stop))
{ if (__local_result.first != (__begin1 + __stop))
omp_set_lock(&__result_lock); {
if ((__local_result.first - __begin1) < __result) omp_set_lock(&__result_lock);
{ if ((__local_result.first - __begin1) < __result)
__result = __local_result.first - __begin1; {
__result = __local_result.first - __begin1;
// Result cannot be in future blocks, stop algorithm.
__fetch_and_add<_DifferenceType>( // Result cannot be in future blocks, stop algorithm.
&__next_block_start, __length); __fetch_and_add<_DifferenceType>(&__next_block_start,
} __length);
omp_unset_lock(&__result_lock); }
} omp_unset_lock(&__result_lock);
}
__block_size = std::min<_DifferenceType>(
__block_size * __s.find_increasing_factor, __block_size = std::min<_DifferenceType>
__s.find_maximum_block_size); (__block_size * __s.find_increasing_factor,
__s.find_maximum_block_size);
// Get new block, update pointer to next block.
__start = // Get new block, update pointer to next block.
__fetch_and_add<_DifferenceType>( __start = __fetch_and_add<_DifferenceType>(&__next_block_start,
&__next_block_start, __block_size); __block_size);
__stop = ((__length < (__start + __block_size)) __stop = (__length < (__start + __block_size)
? __length : (__start + __block_size)); ? __length : (__start + __block_size));
} }
} //parallel } //parallel
omp_destroy_lock(&__result_lock); omp_destroy_lock(&__result_lock);
// Return iterator on found element. // Return iterator on found element.
return return
std::pair<_RAIter1, _RAIter2>(__begin1 + __result, __begin2 + __result); std::pair<_RAIter1, _RAIter2>(__begin1 + __result,
} __begin2 + __result);
}
#endif #endif
#if _GLIBCXX_FIND_CONSTANT_SIZE_BLOCKS #if _GLIBCXX_FIND_CONSTANT_SIZE_BLOCKS
/** /**
* @brief Parallel std::find, constant block size variant. * @brief Parallel std::find, constant block size variant.
* @param __begin1 Begin iterator of first sequence. * @param __begin1 Begin iterator of first sequence.
* @param __end1 End iterator of first sequence. * @param __end1 End iterator of first sequence.
* @param __begin2 Begin iterator of second sequence. Second __sequence * @param __begin2 Begin iterator of second sequence. Second __sequence
* must have same length as first sequence. * must have same length as first sequence.
* @param __pred Find predicate. * @param __pred Find predicate.
* @param __selector _Functionality (e. g. std::find_if (), std::equal(),...) * @param __selector _Functionality (e. g. std::find_if(), std::equal(),...)
* @return Place of finding in both sequences. * @return Place of finding in both sequences.
* @see __gnu_parallel::_Settings::find_sequential_search_size * @see __gnu_parallel::_Settings::find_sequential_search_size
* @see __gnu_parallel::_Settings::find_block_size * @see __gnu_parallel::_Settings::find_block_size
* There are two main differences between the growing blocks and the * There are two main differences between the growing blocks and the
* constant-size blocks variants. * constant-size blocks variants.
* 1. For GB, the block size grows; for CSB, the block size is fixed. * 1. For GB, the block size grows; for CSB, the block size is fixed.
* 2. For GB, the blocks are allocated dynamically; for CSB, the * 2. For GB, the blocks are allocated dynamically; for CSB, the
* blocks are allocated in a predetermined manner, namely spacial * blocks are allocated in a predetermined manner, namely spacial
* round-robin. * round-robin.
*/ */
template<typename _RAIter1, template<typename _RAIter1,
typename _RAIter2, typename _RAIter2,
typename _Pred, typename _Pred,
typename _Selector> typename _Selector>
std::pair<_RAIter1, _RAIter2> std::pair<_RAIter1, _RAIter2>
__find_template(_RAIter1 __begin1, _RAIter1 __end1, __find_template(_RAIter1 __begin1, _RAIter1 __end1,
_RAIter2 __begin2, _Pred __pred, _Selector __selector, _RAIter2 __begin2, _Pred __pred, _Selector __selector,
constant_size_blocks_tag) constant_size_blocks_tag)
{ {
_GLIBCXX_CALL(__end1 - __begin1) _GLIBCXX_CALL(__end1 - __begin1)
typedef std::iterator_traits<_RAIter1> _TraitsType; typedef std::iterator_traits<_RAIter1> _TraitsType;
typedef typename _TraitsType::difference_type _DifferenceType; typedef typename _TraitsType::difference_type _DifferenceType;
typedef typename _TraitsType::value_type _ValueType; typedef typename _TraitsType::value_type _ValueType;
const _Settings& __s = _Settings::get(); const _Settings& __s = _Settings::get();
_DifferenceType __length = __end1 - __begin1; _DifferenceType __length = __end1 - __begin1;
_DifferenceType __sequential_search_size = std::min<_DifferenceType>( _DifferenceType __sequential_search_size = std::min<_DifferenceType>
__length, __s.find_sequential_search_size); (__length, __s.find_sequential_search_size);
// Try it sequentially first. // Try it sequentially first.
std::pair<_RAIter1, _RAIter2> __find_seq_result = std::pair<_RAIter1, _RAIter2>
__selector._M_sequential_algorithm( __find_seq_result = __selector._M_sequential_algorithm
__begin1, __begin1 + __sequential_search_size, __begin2, __pred); (__begin1, __begin1 + __sequential_search_size, __begin2, __pred);
if (__find_seq_result.first != (__begin1 + __sequential_search_size)) if (__find_seq_result.first != (__begin1 + __sequential_search_size))
return __find_seq_result; return __find_seq_result;
_DifferenceType __result = __length; _DifferenceType __result = __length;
omp_lock_t __result_lock; omp_lock_t __result_lock;
omp_init_lock(&__result_lock); omp_init_lock(&__result_lock);
// Not within first __sequential_search_size elements -> start parallel. // Not within first __sequential_search_size elements -> start parallel.
_ThreadIndex __num_threads = __get_max_threads(); _ThreadIndex __num_threads = __get_max_threads();
# pragma omp parallel shared(__result) num_threads(__num_threads) # pragma omp parallel shared(__result) num_threads(__num_threads)
{ {
# pragma omp single # pragma omp single
__num_threads = omp_get_num_threads(); __num_threads = omp_get_num_threads();
_ThreadIndex __iam = omp_get_thread_num(); _ThreadIndex __iam = omp_get_thread_num();
_DifferenceType __block_size = __s.find_initial_block_size; _DifferenceType __block_size = __s.find_initial_block_size;
// First element of thread's current iteration. // First element of thread's current iteration.
_DifferenceType __iteration_start = __sequential_search_size; _DifferenceType __iteration_start = __sequential_search_size;
// Where to work (initialization). // Where to work (initialization).
_DifferenceType __start = __iteration_start + __iam * __block_size; _DifferenceType __start = __iteration_start + __iam * __block_size;
_DifferenceType __stop = _DifferenceType __stop = std::min<_DifferenceType>(__length,
std::min<_DifferenceType>(__length, __start + __block_size); __start
+ __block_size);
std::pair<_RAIter1, _RAIter2> __local_result; std::pair<_RAIter1, _RAIter2> __local_result;
while (__start < __length) while (__start < __length)
{ {
// Get new value of result. // Get new value of result.
# pragma omp flush(__result) # pragma omp flush(__result)
// No chance to find first element. // No chance to find first element.
if (__result < __start) if (__result < __start)
break; break;
__local_result = __selector._M_sequential_algorithm(
__begin1 + __start, __begin1 + __stop, __local_result = __selector._M_sequential_algorithm
__begin2 + __start, __pred); (__begin1 + __start, __begin1 + __stop,
if (__local_result.first != (__begin1 + __stop)) __begin2 + __start, __pred);
{
omp_set_lock(&__result_lock); if (__local_result.first != (__begin1 + __stop))
if ((__local_result.first - __begin1) < __result) {
__result = __local_result.first - __begin1; omp_set_lock(&__result_lock);
omp_unset_lock(&__result_lock); if ((__local_result.first - __begin1) < __result)
// Will not find better value in its interval. __result = __local_result.first - __begin1;
break; omp_unset_lock(&__result_lock);
} // Will not find better value in its interval.
break;
__iteration_start += __num_threads * __block_size; }
// Where to work. __iteration_start += __num_threads * __block_size;
__start = __iteration_start + __iam * __block_size;
__stop = std::min<_DifferenceType>( // Where to work.
__length, __start + __block_size); __start = __iteration_start + __iam * __block_size;
} __stop = std::min<_DifferenceType>(__length,
__start + __block_size);
}
} //parallel } //parallel
omp_destroy_lock(&__result_lock); omp_destroy_lock(&__result_lock);
// Return iterator on found element. // Return iterator on found element.
return return std::pair<_RAIter1, _RAIter2>(__begin1 + __result,
std::pair<_RAIter1, _RAIter2>(__begin1 + __result, __begin2 + __result); __begin2 + __result);
} }
#endif #endif
} // end namespace } // end namespace
......
...@@ -103,12 +103,12 @@ namespace __gnu_parallel ...@@ -103,12 +103,12 @@ namespace __gnu_parallel
typename _Pred> typename _Pred>
std::pair<_RAIter1, _RAIter2> std::pair<_RAIter1, _RAIter2>
_M_sequential_algorithm(_RAIter1 __begin1, _M_sequential_algorithm(_RAIter1 __begin1,
_RAIter1 __end1, _RAIter1 __end1,
_RAIter2 __begin2, _Pred __pred) _RAIter2 __begin2, _Pred __pred)
{ {
// Passed end iterator is one short. // Passed end iterator is one short.
_RAIter1 __spot = adjacent_find(__begin1, __end1 + 1, _RAIter1 __spot = adjacent_find(__begin1, __end1 + 1,
__pred, sequential_tag()); __pred, sequential_tag());
if (__spot == (__end1 + 1)) if (__spot == (__end1 + 1))
__spot = __end1; __spot = __end1;
return std::make_pair(__spot, __begin2); return std::make_pair(__spot, __begin2);
...@@ -141,56 +141,57 @@ namespace __gnu_parallel ...@@ -141,56 +141,57 @@ namespace __gnu_parallel
typename _Pred> typename _Pred>
std::pair<_RAIter1, _RAIter2> std::pair<_RAIter1, _RAIter2>
_M_sequential_algorithm(_RAIter1 __begin1, _M_sequential_algorithm(_RAIter1 __begin1,
_RAIter1 __end1, _RAIter1 __end1,
_RAIter2 __begin2, _Pred __pred) _RAIter2 __begin2, _Pred __pred)
{ return mismatch(__begin1, __end1, __begin2, __pred, sequential_tag()); { return mismatch(__begin1, __end1, __begin2,
} __pred, sequential_tag()); }
}; };
/** @brief Test predicate on several elements. */ /** @brief Test predicate on several elements. */
template<typename _FIterator> template<typename _FIterator>
struct __find_first_of_selector : public __generic_find_selector struct __find_first_of_selector : public __generic_find_selector
{ {
_FIterator _M_begin; _FIterator _M_begin;
_FIterator _M_end; _FIterator _M_end;
explicit __find_first_of_selector(_FIterator __begin, _FIterator __end) explicit __find_first_of_selector(_FIterator __begin,
: _M_begin(__begin), _M_end(__end) { } _FIterator __end)
: _M_begin(__begin), _M_end(__end) { }
/** @brief Test on one position.
* @param __i1 _Iterator on first sequence. /** @brief Test on one position.
* @param __i2 _Iterator on second sequence (unused). * @param __i1 _Iterator on first sequence.
* @param __pred Find predicate. */ * @param __i2 _Iterator on second sequence (unused).
template<typename _RAIter1, typename _RAIter2, * @param __pred Find predicate. */
typename _Pred> template<typename _RAIter1, typename _RAIter2,
bool typename _Pred>
operator()(_RAIter1 __i1, _RAIter2 __i2, _Pred __pred) bool
{ operator()(_RAIter1 __i1, _RAIter2 __i2, _Pred __pred)
for (_FIterator __pos_in_candidates = _M_begin; {
__pos_in_candidates != _M_end; ++__pos_in_candidates) for (_FIterator __pos_in_candidates = _M_begin;
if (__pred(*__i1, *__pos_in_candidates)) __pos_in_candidates != _M_end; ++__pos_in_candidates)
return true; if (__pred(*__i1, *__pos_in_candidates))
return false; return true;
} return false;
}
/** @brief Corresponding sequential algorithm on a sequence.
* @param __begin1 Begin iterator of first sequence. /** @brief Corresponding sequential algorithm on a sequence.
* @param __end1 End iterator of first sequence. * @param __begin1 Begin iterator of first sequence.
* @param __begin2 Begin iterator of second sequence. * @param __end1 End iterator of first sequence.
* @param __pred Find predicate. */ * @param __begin2 Begin iterator of second sequence.
template<typename _RAIter1, typename _RAIter2, * @param __pred Find predicate. */
typename _Pred> template<typename _RAIter1, typename _RAIter2,
std::pair<_RAIter1, _RAIter2> typename _Pred>
_M_sequential_algorithm(_RAIter1 __begin1, std::pair<_RAIter1, _RAIter2>
_RAIter1 __end1, _M_sequential_algorithm(_RAIter1 __begin1,
_RAIter2 __begin2, _Pred __pred) _RAIter1 __end1,
{ _RAIter2 __begin2, _Pred __pred)
return std::make_pair( {
find_first_of(__begin1, __end1, _M_begin, _M_end, __pred, return std::make_pair(find_first_of(__begin1, __end1,
sequential_tag()), __begin2); _M_begin, _M_end, __pred,
} sequential_tag()), __begin2);
}; }
};
} }
#endif /* _GLIBCXX_PARALLEL_FIND_SELECTORS_H */ #endif /* _GLIBCXX_PARALLEL_FIND_SELECTORS_H */
...@@ -69,31 +69,21 @@ namespace __gnu_parallel ...@@ -69,31 +69,21 @@ namespace __gnu_parallel
_Parallelism __parallelism_tag) _Parallelism __parallelism_tag)
{ {
if (__parallelism_tag == parallel_unbalanced) if (__parallelism_tag == parallel_unbalanced)
return __for_each_template_random_access_ed(__begin, __end, __user_op, return __for_each_template_random_access_ed
__functionality, __reduction, (__begin, __end, __user_op, __functionality, __reduction,
__reduction_start, __reduction_start, __output, __bound);
__output, __bound);
else if (__parallelism_tag == parallel_omp_loop) else if (__parallelism_tag == parallel_omp_loop)
return __for_each_template_random_access_omp_loop( return __for_each_template_random_access_omp_loop
__begin, __end, __user_op, (__begin, __end, __user_op, __functionality, __reduction,
__functionality, __reduction_start, __output, __bound);
__reduction,
__reduction_start,
__output, __bound);
else if (__parallelism_tag == parallel_omp_loop_static) else if (__parallelism_tag == parallel_omp_loop_static)
return __for_each_template_random_access_omp_loop( return __for_each_template_random_access_omp_loop
__begin, __end, __user_op, (__begin, __end, __user_op, __functionality, __reduction,
__functionality, __reduction_start, __output, __bound);
__reduction,
__reduction_start,
__output, __bound);
else //e. g. parallel_balanced else //e. g. parallel_balanced
return __for_each_template_random_access_workstealing(__begin, __end, return __for_each_template_random_access_workstealing
__user_op, (__begin, __end, __user_op, __functionality, __reduction,
__functionality, __reduction_start, __output, __bound);
__reduction,
__reduction_start,
__output, __bound);
} }
} }
......
...@@ -48,11 +48,11 @@ namespace __gnu_parallel ...@@ -48,11 +48,11 @@ namespace __gnu_parallel
template<typename _IIter> template<typename _IIter>
void void
__shrink_and_double(std::vector<_IIter>& __os_starts, __shrink_and_double(std::vector<_IIter>& __os_starts,
size_t& __count_to_two, size_t& __range_length, size_t& __count_to_two, size_t& __range_length,
const bool __make_twice) const bool __make_twice)
{ {
++__count_to_two; ++__count_to_two;
if (not __make_twice or __count_to_two < 2) if (!__make_twice || __count_to_two < 2)
__shrink(__os_starts, __count_to_two, __range_length); __shrink(__os_starts, __count_to_two, __range_length);
else else
{ {
...@@ -68,7 +68,7 @@ namespace __gnu_parallel ...@@ -68,7 +68,7 @@ namespace __gnu_parallel
template<typename _IIter> template<typename _IIter>
void void
__shrink(std::vector<_IIter>& __os_starts, size_t& __count_to_two, __shrink(std::vector<_IIter>& __os_starts, size_t& __count_to_two,
size_t& __range_length) size_t& __range_length)
{ {
for (typename std::vector<_IIter>::size_type __i = 0; for (typename std::vector<_IIter>::size_type __i = 0;
__i <= (__os_starts.size() / 2); ++__i) __i <= (__os_starts.size() / 2); ++__i)
...@@ -112,8 +112,8 @@ namespace __gnu_parallel ...@@ -112,8 +112,8 @@ namespace __gnu_parallel
std::vector<_IIter> __os_starts(2 * __oversampling * __num_parts + 1); std::vector<_IIter> __os_starts(2 * __oversampling * __num_parts + 1);
__os_starts[0]= __begin; __os_starts[0] = __begin;
_IIter __prev = __begin, __it = __begin; _IIter __prev = __begin, __it = __begin;
size_t __dist_limit = 0, __dist = 0; size_t __dist_limit = 0, __dist = 0;
size_t __cur = 1, __next = 1; size_t __cur = 1, __next = 1;
size_t __range_length = 1; size_t __range_length = 1;
......
...@@ -40,993 +40,1000 @@ ...@@ -40,993 +40,1000 @@
namespace __gnu_parallel namespace __gnu_parallel
{ {
/** /**
* @brief Guarded loser/tournament tree. * @brief Guarded loser/tournament tree.
* *
* The smallest element is at the top. * The smallest element is at the top.
* *
* Guarding is done explicitly through one flag _M_sup per element, * Guarding is done explicitly through one flag _M_sup per element,
* inf is not needed due to a better initialization routine. This * inf is not needed due to a better initialization routine. This
* is a well-performing variant. * is a well-performing variant.
* *
* @param _Tp the element type * @param _Tp the element type
* @param _Compare the comparator to use, defaults to std::less<_Tp> * @param _Compare the comparator to use, defaults to std::less<_Tp>
*/ */
template<typename _Tp, typename _Compare> template<typename _Tp, typename _Compare>
class _LoserTreeBase class _LoserTreeBase
{
protected:
/** @brief Internal representation of a _LoserTree element. */
struct _Loser
{ {
/** @brief flag, true iff this is a "maximum" __sentinel. */ protected:
bool _M_sup; /** @brief Internal representation of a _LoserTree element. */
/** @brief __index of the __source __sequence. */ struct _Loser
int _M_source; {
/** @brief _M_key of the element in the _LoserTree. */ /** @brief flag, true iff this is a "maximum" __sentinel. */
_Tp _M_key; bool _M_sup;
/** @brief __index of the __source __sequence. */
int _M_source;
/** @brief _M_key of the element in the _LoserTree. */
_Tp _M_key;
};
unsigned int _M_ik, _M_k, _M_offset;
/** log_2{_M_k} */
unsigned int _M_log_k;
/** @brief _LoserTree __elements. */
_Loser* _M_losers;
/** @brief _Compare to use. */
_Compare _M_comp;
/**
* @brief State flag that determines whether the _LoserTree is empty.
*
* Only used for building the _LoserTree.
*/
bool _M_first_insert;
public:
/**
* @brief The constructor.
*
* @param __k The number of sequences to merge.
* @param __comp The comparator to use.
*/
_LoserTreeBase(unsigned int __k, _Compare __comp)
: _M_comp(__comp)
{
_M_ik = __k;
// Compute log_2{_M_k} for the _Loser Tree
_M_log_k = __rd_log2(_M_ik - 1) + 1;
// Next greater power of 2.
_M_k = 1 << _M_log_k;
_M_offset = _M_k;
// Avoid default-constructing _M_losers[]._M_key
_M_losers = static_cast<_Loser*>(::operator new(2 * _M_k
* sizeof(_Loser)));
for (unsigned int __i = _M_ik - 1; __i < _M_k; ++__i)
_M_losers[__i + _M_k]._M_sup = true;
_M_first_insert = true;
}
/**
* @brief The destructor.
*/
~_LoserTreeBase()
{ ::operator delete(_M_losers); }
/**
* @brief Initializes the sequence "_M_source" with the element "__key".
*
* @param __key the element to insert
* @param __source __index of the __source __sequence
* @param __sup flag that determines whether the value to insert is an
* explicit __supremum.
*/
void
__insert_start(const _Tp& __key, int __source, bool __sup)
{
unsigned int __pos = _M_k + __source;
if(_M_first_insert)
{
// Construct all keys, so we can easily deconstruct them.
for (unsigned int __i = 0; __i < (2 * _M_k); ++__i)
new(&(_M_losers[__i]._M_key)) _Tp(__key);
_M_first_insert = false;
}
else
new(&(_M_losers[__pos]._M_key)) _Tp(__key);
_M_losers[__pos]._M_sup = __sup;
_M_losers[__pos]._M_source = __source;
}
/**
* @return the index of the sequence with the smallest element.
*/
int __get_min_source()
{ return _M_losers[0]._M_source; }
}; };
unsigned int _M_ik, _M_k, _M_offset;
/** log_2{_M_k} */
unsigned int _M_log_k;
/** @brief _LoserTree __elements. */
_Loser* _M_losers;
/** @brief _Compare to use. */
_Compare _M_comp;
/** /**
* @brief State flag that determines whether the _LoserTree is empty. * @brief Stable _LoserTree variant.
* *
* Only used for building the _LoserTree. * Provides the stable implementations of insert_start, __init_winner,
*/ * __init and __delete_min_insert.
bool _M_first_insert;
public:
/**
* @brief The constructor.
* *
* @param __k The number of sequences to merge. * Unstable variant is done using partial specialisation below.
* @param __comp The comparator to use.
*/ */
_LoserTreeBase(unsigned int __k, _Compare __comp) template<bool __stable/* default == true */, typename _Tp,
: _M_comp(__comp) typename _Compare>
class _LoserTree
: public _LoserTreeBase<_Tp, _Compare>
{ {
_M_ik = __k; typedef _LoserTreeBase<_Tp, _Compare> _Base;
using _Base::_M_k;
// Compute log_2{_M_k} for the _Loser Tree using _Base::_M_losers;
_M_log_k = __rd_log2(_M_ik - 1) + 1; using _Base::_M_first_insert;
// Next greater power of 2. public:
_M_k = 1 << _M_log_k; _LoserTree(unsigned int __k, _Compare __comp)
_M_offset = _M_k; : _Base::_LoserTreeBase(__k, __comp)
{ }
// Avoid default-constructing _M_losers[]._M_key
_M_losers unsigned int
= static_cast<_Loser*>(::operator new(2 * _M_k * sizeof(_Loser))); __init_winner(unsigned int __root)
for (unsigned int __i = _M_ik - 1; __i < _M_k; ++__i) {
_M_losers[__i + _M_k]._M_sup = true; if (__root >= _M_k)
return __root;
_M_first_insert = true; else
} {
unsigned int __left = __init_winner(2 * __root);
unsigned int __right = __init_winner(2 * __root + 1);
if (_M_losers[__right]._M_sup
|| (!_M_losers[__left]._M_sup
&& !_M_comp(_M_losers[__right]._M_key,
_M_losers[__left]._M_key)))
{
// Left one is less or equal.
_M_losers[__root] = _M_losers[__right];
return __left;
}
else
{
// Right one is less.
_M_losers[__root] = _M_losers[__left];
return __right;
}
}
}
void __init()
{ _M_losers[0] = _M_losers[__init_winner(1)]; }
/**
* @brief Delete the smallest element and insert a new element from
* the previously smallest element's sequence.
*
* This implementation is stable.
*/
// Do not pass a const reference since __key will be used as
// local variable.
void
__delete_min_insert(_Tp __key, bool __sup)
{
#if _GLIBCXX_ASSERTIONS
// no dummy sequence can ever be at the top!
_GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1);
#endif
/** int __source = _M_losers[0]._M_source;
* @brief The destructor. for (unsigned int __pos = (_M_k + __source) / 2; __pos > 0;
*/ __pos /= 2)
~_LoserTreeBase() {
{ ::operator delete(_M_losers); } // The smaller one gets promoted, ties are broken by _M_source.
if ((__sup && (!_M_losers[__pos]._M_sup
|| _M_losers[__pos]._M_source < __source))
|| (!__sup && !_M_losers[__pos]._M_sup
&& ((_M_comp(_M_losers[__pos]._M_key, __key))
|| (!_M_comp(__key, _M_losers[__pos]._M_key)
&& _M_losers[__pos]._M_source < __source))))
{
// The other one is smaller.
std::swap(_M_losers[__pos]._M_sup, __sup);
std::swap(_M_losers[__pos]._M_source, __source);
std::swap(_M_losers[__pos]._M_key, __key);
}
}
_M_losers[0]._M_sup = __sup;
_M_losers[0]._M_source = __source;
_M_losers[0]._M_key = __key;
}
};
/** /**
* @brief Initializes the sequence "_M_source" with the element "_M_key". * @brief Unstable _LoserTree variant.
* *
* @param _M_key the element to insert * Stability (non-stable here) is selected with partial specialization.
* @param _M_source __index of the __source __sequence
* @param _M_sup flag that determines whether the value to insert is an
* explicit __supremum.
*/ */
void template<typename _Tp, typename _Compare>
__insert_start(const _Tp& _M_key, int _M_source, bool _M_sup) class _LoserTree</* __stable == */false, _Tp, _Compare>
: public _LoserTreeBase<_Tp, _Compare>
{ {
unsigned int __pos = _M_k + _M_source; typedef _LoserTreeBase<_Tp, _Compare> _Base;
using _Base::_M_log_k;
if(_M_first_insert) using _Base::_M_k;
{ using _Base::_M_losers;
// Construct all keys, so we can easily deconstruct them. using _Base::_M_first_insert;
for (unsigned int __i = 0; __i < (2 * _M_k); ++__i)
new(&(_M_losers[__i]._M_key)) _Tp(_M_key); public:
_M_first_insert = false; _LoserTree(unsigned int __k, _Compare __comp)
} : _Base::_LoserTreeBase(__k, __comp)
else { }
new(&(_M_losers[__pos]._M_key)) _Tp(_M_key);
/**
_M_losers[__pos]._M_sup = _M_sup; * Computes the winner of the competition at position "__root".
_M_losers[__pos]._M_source = _M_source; *
} * Called recursively (starting at 0) to build the initial tree.
*
* @param __root __index of the "game" to start.
*/
unsigned int
__init_winner(unsigned int __root)
{
if (__root >= _M_k)
return __root;
else
{
unsigned int __left = __init_winner(2 * __root);
unsigned int __right = __init_winner(2 * __root + 1);
if (_M_losers[__right]._M_sup
|| (!_M_losers[__left]._M_sup
&& !_M_comp(_M_losers[__right]._M_key,
_M_losers[__left]._M_key)))
{
// Left one is less or equal.
_M_losers[__root] = _M_losers[__right];
return __left;
}
else
{
// Right one is less.
_M_losers[__root] = _M_losers[__left];
return __right;
}
}
}
void
__init()
{ _M_losers[0] = _M_losers[__init_winner(1)]; }
/**
* Delete the _M_key smallest element and insert the element __key
* instead.
*
* @param __key the _M_key to insert
* @param __sup true iff __key is an explicitly marked supremum
*/
// Do not pass a const reference since __key will be used as local
// variable.
void
__delete_min_insert(_Tp __key, bool __sup)
{
#if _GLIBCXX_ASSERTIONS
// no dummy sequence can ever be at the top!
_GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1);
#endif
/** int __source = _M_losers[0]._M_source;
* @return the index of the sequence with the smallest element. for (unsigned int __pos = (_M_k + __source) / 2; __pos > 0;
*/ __pos /= 2)
int __get_min_source() {
{ return _M_losers[0]._M_source; } // The smaller one gets promoted.
}; if (__sup || (!_M_losers[__pos]._M_sup
&& _M_comp(_M_losers[__pos]._M_key, __key)))
{
// The other one is smaller.
std::swap(_M_losers[__pos]._M_sup, __sup);
std::swap(_M_losers[__pos]._M_source, __source);
std::swap(_M_losers[__pos]._M_key, __key);
}
}
_M_losers[0]._M_sup = __sup;
_M_losers[0]._M_source = __source;
_M_losers[0]._M_key = __key;
}
};
/** /**
* @brief Stable _LoserTree variant. * @brief Base class of _Loser Tree implementation using pointers.
*
* Provides the stable implementations of insert_start, __init_winner,
* __init and __delete_min_insert.
*
* Unstable variant is done using partial specialisation below.
*/ */
template<bool __stable/* default == true */, typename _Tp, template<typename _Tp, typename _Compare>
typename _Compare> class _LoserTreePointerBase
class _LoserTree
: public _LoserTreeBase<_Tp, _Compare>
{
typedef _LoserTreeBase<_Tp, _Compare> _Base;
using _Base::_M_k;
using _Base::_M_losers;
using _Base::_M_first_insert;
public:
_LoserTree(unsigned int __k, _Compare __comp)
: _Base::_LoserTreeBase(__k, __comp)
{ }
unsigned int
__init_winner(unsigned int __root)
{ {
if (__root >= _M_k) protected:
return __root; /** @brief Internal representation of _LoserTree __elements. */
else struct _Loser
{ {
unsigned int __left = __init_winner (2 * __root); bool _M_sup;
unsigned int __right = __init_winner (2 * __root + 1); int _M_source;
if (_M_losers[__right]._M_sup const _Tp* _M_keyp;
|| (!_M_losers[__left]._M_sup };
&& !_M_comp(_M_losers[__right]._M_key,
_M_losers[__left]._M_key))) unsigned int _M_ik, _M_k, _M_offset;
{ _Loser* _M_losers;
// Left one is less or equal. _Compare _M_comp;
_M_losers[__root] = _M_losers[__right];
return __left; public:
} _LoserTreePointerBase(unsigned int __k,
else _Compare __comp = std::less<_Tp>())
{ : _M_comp(__comp)
// Right one is less. {
_M_losers[__root] = _M_losers[__left]; _M_ik = __k;
return __right;
} // Next greater power of 2.
} _M_k = 1 << (__rd_log2(_M_ik - 1) + 1);
} _M_offset = _M_k;
_M_losers = new _Loser[_M_k * 2];
void __init() for (unsigned int __i = _M_ik - 1; __i < _M_k; __i++)
{ _M_losers[0] = _M_losers[__init_winner(1)]; } _M_losers[__i + _M_k]._M_sup = true;
}
~_LoserTreePointerBase()
{ ::operator delete[](_M_losers); }
int __get_min_source()
{ return _M_losers[0]._M_source; }
void __insert_start(const _Tp& __key, int __source, bool __sup)
{
unsigned int __pos = _M_k + __source;
_M_losers[__pos]._M_sup = __sup;
_M_losers[__pos]._M_source = __source;
_M_losers[__pos]._M_keyp = &__key;
}
};
/** /**
* @brief Delete the smallest element and insert a new element from * @brief Stable _LoserTree implementation.
* the previously smallest element's sequence. *
* * The unstable variant is implemented using partial instantiation below.
* This implementation is stable. */
*/ template<bool __stable/* default == true */, typename _Tp, typename _Compare>
// Do not pass a const reference since _M_key will be used as class _LoserTreePointer
// local variable. : public _LoserTreePointerBase<_Tp, _Compare>
void
__delete_min_insert(_Tp _M_key, bool _M_sup)
{ {
typedef _LoserTreePointerBase<_Tp, _Compare> _Base;
using _Base::_M_k;
using _Base::_M_losers;
public:
_LoserTreePointer(unsigned int __k, _Compare __comp = std::less<_Tp>())
: _Base::_LoserTreePointerBase(__k, __comp)
{ }
unsigned int
__init_winner(unsigned int __root)
{
if (__root >= _M_k)
return __root;
else
{
unsigned int __left = __init_winner(2 * __root);
unsigned int __right = __init_winner(2 * __root + 1);
if (_M_losers[__right]._M_sup
|| (!_M_losers[__left]._M_sup
&& !_M_comp(*_M_losers[__right]._M_keyp,
*_M_losers[__left]._M_keyp)))
{
// Left one is less or equal.
_M_losers[__root] = _M_losers[__right];
return __left;
}
else
{
// Right one is less.
_M_losers[__root] = _M_losers[__left];
return __right;
}
}
}
void __init()
{ _M_losers[0] = _M_losers[__init_winner(1)]; }
void __delete_min_insert(const _Tp& __key, bool __sup)
{
#if _GLIBCXX_ASSERTIONS #if _GLIBCXX_ASSERTIONS
// no dummy sequence can ever be at the top! // no dummy sequence can ever be at the top!
_GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); _GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1);
#endif #endif
int _M_source = _M_losers[0]._M_source; const _Tp* __keyp = &__key;
for (unsigned int __pos = (_M_k + _M_source) / 2; __pos > 0; int __source = _M_losers[0]._M_source;
__pos /= 2) for (unsigned int __pos = (_M_k + __source) / 2; __pos > 0;
{ __pos /= 2)
// The smaller one gets promoted, ties are broken by _M_source. {
if ((_M_sup && (!_M_losers[__pos]._M_sup // The smaller one gets promoted, ties are broken by __source.
|| _M_losers[__pos]._M_source < _M_source)) if ((__sup && (!_M_losers[__pos]._M_sup
|| (!_M_sup && !_M_losers[__pos]._M_sup || _M_losers[__pos]._M_source < __source))
&& ((_M_comp(_M_losers[__pos]._M_key, _M_key)) || (!__sup && !_M_losers[__pos]._M_sup &&
|| (!_M_comp(_M_key, _M_losers[__pos]._M_key) ((_M_comp(*_M_losers[__pos]._M_keyp, *__keyp))
&& _M_losers[__pos]._M_source < _M_source)))) || (!_M_comp(*__keyp, *_M_losers[__pos]._M_keyp)
{ && _M_losers[__pos]._M_source < __source))))
// The other one is smaller. {
std::swap(_M_losers[__pos]._M_sup, _M_sup); // The other one is smaller.
std::swap(_M_losers[__pos]._M_source, _M_source); std::swap(_M_losers[__pos]._M_sup, __sup);
std::swap(_M_losers[__pos]._M_key, _M_key); std::swap(_M_losers[__pos]._M_source, __source);
} std::swap(_M_losers[__pos]._M_keyp, __keyp);
} }
}
_M_losers[0]._M_sup = _M_sup;
_M_losers[0]._M_source = _M_source; _M_losers[0]._M_sup = __sup;
_M_losers[0]._M_key = _M_key; _M_losers[0]._M_source = __source;
} _M_losers[0]._M_keyp = __keyp;
}; }
};
/** /**
* @brief Unstable _LoserTree variant. * @brief Unstable _LoserTree implementation.
* *
* Stability (non-stable here) is selected with partial specialization. * The stable variant is above.
*/ */
template<typename _Tp, typename _Compare> template<typename _Tp, typename _Compare>
class _LoserTree</* __stable == */false, _Tp, _Compare> class _LoserTreePointer</* __stable == */false, _Tp, _Compare>
: public _LoserTreeBase<_Tp, _Compare> : public _LoserTreePointerBase<_Tp, _Compare>
{
typedef _LoserTreeBase<_Tp, _Compare> _Base;
using _Base::_M_log_k;
using _Base::_M_k;
using _Base::_M_losers;
using _Base::_M_first_insert;
public:
_LoserTree(unsigned int __k, _Compare __comp)
: _Base::_LoserTreeBase(__k, __comp)
{ }
/**
* Computes the winner of the competition at position "__root".
*
* Called recursively (starting at 0) to build the initial tree.
*
* @param __root __index of the "game" to start.
*/
unsigned int
__init_winner(unsigned int __root)
{
if (__root >= _M_k)
return __root;
else
{
unsigned int __left = __init_winner (2 * __root);
unsigned int __right = __init_winner (2 * __root + 1);
if (_M_losers[__right]._M_sup
|| (!_M_losers[__left]._M_sup
&& !_M_comp(_M_losers[__right]._M_key,
_M_losers[__left]._M_key)))
{
// Left one is less or equal.
_M_losers[__root] = _M_losers[__right];
return __left;
}
else
{
// Right one is less.
_M_losers[__root] = _M_losers[__left];
return __right;
}
}
}
void
__init()
{ _M_losers[0] = _M_losers[__init_winner(1)]; }
/**
* Delete the _M_key smallest element and insert the element _M_key
* instead.
*
* @param _M_key the _M_key to insert
* @param _M_sup true iff _M_key is an explicitly marked supremum
*/
// Do not pass a const reference since _M_key will be used as local
// variable.
void
__delete_min_insert(_Tp _M_key, bool _M_sup)
{ {
typedef _LoserTreePointerBase<_Tp, _Compare> _Base;
using _Base::_M_k;
using _Base::_M_losers;
public:
_LoserTreePointer(unsigned int __k, _Compare __comp = std::less<_Tp>())
: _Base::_LoserTreePointerBase(__k, __comp)
{ }
unsigned int
__init_winner(unsigned int __root)
{
if (__root >= _M_k)
return __root;
else
{
unsigned int __left = __init_winner(2 * __root);
unsigned int __right = __init_winner(2 * __root + 1);
if (_M_losers[__right]._M_sup
|| (!_M_losers[__left]._M_sup
&& !_M_comp(*_M_losers[__right]._M_keyp,
*_M_losers[__left]._M_keyp)))
{
// Left one is less or equal.
_M_losers[__root] = _M_losers[__right];
return __left;
}
else
{
// Right one is less.
_M_losers[__root] = _M_losers[__left];
return __right;
}
}
}
void __init()
{ _M_losers[0] = _M_losers[__init_winner(1)]; }
void __delete_min_insert(const _Tp& __key, bool __sup)
{
#if _GLIBCXX_ASSERTIONS #if _GLIBCXX_ASSERTIONS
// no dummy sequence can ever be at the top! // no dummy sequence can ever be at the top!
_GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); _GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1);
#endif #endif
int _M_source = _M_losers[0]._M_source; const _Tp* __keyp = &__key;
for (unsigned int __pos = (_M_k + _M_source) / 2; __pos > 0; int __source = _M_losers[0]._M_source;
__pos /= 2) for (unsigned int __pos = (_M_k + __source) / 2; __pos > 0;
{ __pos /= 2)
// The smaller one gets promoted. {
if (_M_sup || (!_M_losers[__pos]._M_sup // The smaller one gets promoted.
&& _M_comp(_M_losers[__pos]._M_key, _M_key))) if (__sup || (!_M_losers[__pos]._M_sup
{ && _M_comp(*_M_losers[__pos]._M_keyp, *__keyp)))
// The other one is smaller. {
std::swap(_M_losers[__pos]._M_sup, _M_sup); // The other one is smaller.
std::swap(_M_losers[__pos]._M_source, _M_source); std::swap(_M_losers[__pos]._M_sup, __sup);
std::swap(_M_losers[__pos]._M_key, _M_key); std::swap(_M_losers[__pos]._M_source, __source);
} std::swap(_M_losers[__pos]._M_keyp, __keyp);
} }
}
_M_losers[0]._M_sup = _M_sup;
_M_losers[0]._M_source = _M_source; _M_losers[0]._M_sup = __sup;
_M_losers[0]._M_key = _M_key; _M_losers[0]._M_source = __source;
} _M_losers[0]._M_keyp = __keyp;
}; }
/**
* @brief Base class of _Loser Tree implementation using pointers.
*/
template<typename _Tp, typename _Compare>
class _LoserTreePointerBase
{
protected:
/** @brief Internal representation of _LoserTree __elements. */
struct _Loser
{
bool _M_sup;
int _M_source;
const _Tp* _M_keyp;
}; };
unsigned int _M_ik, _M_k, _M_offset; /** @brief Base class for unguarded _LoserTree implementation.
_Loser* _M_losers; *
_Compare _M_comp; * The whole element is copied into the tree structure.
*
public: * No guarding is done, therefore not a single input sequence must
_LoserTreePointerBase(unsigned int __k, * run empty. Unused __sequence heads are marked with a sentinel which
_Compare __comp = std::less<_Tp>()) * is &gt; all elements that are to be merged.
: _M_comp(__comp) *
* This is a very fast variant.
*/
template<typename _Tp, typename _Compare>
class _LoserTreeUnguardedBase
{ {
_M_ik = __k; protected:
struct _Loser
// Next greater power of 2. {
_M_k = 1 << (__rd_log2(_M_ik - 1) + 1); int _M_source;
_M_offset = _M_k; _Tp _M_key;
_M_losers = new _Loser[_M_k * 2]; };
for (unsigned int __i = _M_ik - 1; __i < _M_k; __i++)
_M_losers[__i + _M_k]._M_sup = true; unsigned int _M_ik, _M_k, _M_offset;
} _Loser* _M_losers;
_Compare _M_comp;
public:
_LoserTreeUnguardedBase(unsigned int __k, const _Tp __sentinel,
_Compare __comp = std::less<_Tp>())
: _M_comp(__comp)
{
_M_ik = __k;
// Next greater power of 2.
_M_k = 1 << (__rd_log2(_M_ik - 1) + 1);
_M_offset = _M_k;
// Avoid default-constructing _M_losers[]._M_key
_M_losers = static_cast<_Loser*>(::operator new(2 * _M_k
* sizeof(_Loser)));
for (unsigned int __i = _M_k + _M_ik - 1; __i < (2 * _M_k); ++__i)
{
_M_losers[__i]._M_key = __sentinel;
_M_losers[__i]._M_source = -1;
}
}
~_LoserTreeUnguardedBase()
{ ::operator delete(_M_losers); }
int
__get_min_source()
{
#if _GLIBCXX_ASSERTIONS
// no dummy sequence can ever be at the top!
_GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1);
#endif
return _M_losers[0]._M_source;
}
~_LoserTreePointerBase() void
{ ::operator delete[](_M_losers); } __insert_start(const _Tp& __key, int __source, bool)
{
unsigned int __pos = _M_k + __source;
int __get_min_source() new(&(_M_losers[__pos]._M_key)) _Tp(__key);
{ return _M_losers[0]._M_source; } _M_losers[__pos]._M_source = __source;
}
};
void __insert_start(const _Tp& _M_key, int _M_source, bool _M_sup) /**
* @brief Stable implementation of unguarded _LoserTree.
*
* Unstable variant is selected below with partial specialization.
*/
template<bool __stable/* default == true */, typename _Tp, typename _Compare>
class _LoserTreeUnguarded
: public _LoserTreeUnguardedBase<_Tp, _Compare>
{ {
unsigned int __pos = _M_k + _M_source; typedef _LoserTreeUnguardedBase<_Tp, _Compare> _Base;
using _Base::_M_k;
_M_losers[__pos]._M_sup = _M_sup; using _Base::_M_losers;
_M_losers[__pos]._M_source = _M_source;
_M_losers[__pos]._M_keyp = &_M_key;
}
};
/**
* @brief Stable _LoserTree implementation.
*
* The unstable variant is implemented using partial instantiation below.
*/
template<bool __stable/* default == true */, typename _Tp, typename _Compare>
class _LoserTreePointer
: public _LoserTreePointerBase<_Tp, _Compare>
{
typedef _LoserTreePointerBase<_Tp, _Compare> _Base;
using _Base::_M_k;
using _Base::_M_losers;
public: public:
_LoserTreePointer(unsigned int __k, _Compare __comp = std::less<_Tp>()) _LoserTreeUnguarded(unsigned int __k, const _Tp __sentinel,
: _Base::_LoserTreePointerBase(__k, __comp) _Compare __comp = std::less<_Tp>())
{ } : _Base::_LoserTreeUnguardedBase(__k, __sentinel, __comp)
{ }
unsigned int
__init_winner(unsigned int __root)
{
if (__root >= _M_k)
return __root;
else
{
unsigned int __left = __init_winner(2 * __root);
unsigned int __right = __init_winner(2 * __root + 1);
if (!_M_comp(_M_losers[__right]._M_key,
_M_losers[__left]._M_key))
{
// Left one is less or equal.
_M_losers[__root] = _M_losers[__right];
return __left;
}
else
{
// Right one is less.
_M_losers[__root] = _M_losers[__left];
return __right;
}
}
}
void
__init()
{
_M_losers[0] = _M_losers[__init_winner(1)];
unsigned int
__init_winner(unsigned int __root)
{
if (__root >= _M_k)
return __root;
else
{
unsigned int __left = __init_winner (2 * __root);
unsigned int __right = __init_winner (2 * __root + 1);
if (_M_losers[__right]._M_sup
|| (!_M_losers[__left]._M_sup
&& !_M_comp(*_M_losers[__right]._M_keyp,
*_M_losers[__left]._M_keyp)))
{
// Left one is less or equal.
_M_losers[__root] = _M_losers[__right];
return __left;
}
else
{
// Right one is less.
_M_losers[__root] = _M_losers[__left];
return __right;
}
}
}
void __init()
{ _M_losers[0] = _M_losers[__init_winner(1)]; }
void __delete_min_insert(const _Tp& _M_key, bool _M_sup)
{
#if _GLIBCXX_ASSERTIONS #if _GLIBCXX_ASSERTIONS
// no dummy sequence can ever be at the top! // no dummy sequence can ever be at the top at the beginning
_GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); // (0 sequences!)
_GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1);
#endif #endif
}
const _Tp* _M_keyp = &_M_key; // Do not pass a const reference since __key will be used as
int _M_source = _M_losers[0]._M_source; // local variable.
for (unsigned int __pos = (_M_k + _M_source) / 2; __pos > 0; __pos /= 2) void
{ __delete_min_insert(_Tp __key, bool)
// The smaller one gets promoted, ties are broken by _M_source. {
if ((_M_sup && (!_M_losers[__pos]._M_sup ||
_M_losers[__pos]._M_source < _M_source)) ||
(!_M_sup && !_M_losers[__pos]._M_sup &&
((_M_comp(*_M_losers[__pos]._M_keyp, *_M_keyp)) ||
(!_M_comp(*_M_keyp, *_M_losers[__pos]._M_keyp)
&& _M_losers[__pos]._M_source < _M_source))))
{
// The other one is smaller.
std::swap(_M_losers[__pos]._M_sup, _M_sup);
std::swap(_M_losers[__pos]._M_source, _M_source);
std::swap(_M_losers[__pos]._M_keyp, _M_keyp);
}
}
_M_losers[0]._M_sup = _M_sup;
_M_losers[0]._M_source = _M_source;
_M_losers[0]._M_keyp = _M_keyp;
}
};
/**
* @brief Unstable _LoserTree implementation.
*
* The stable variant is above.
*/
template<typename _Tp, typename _Compare>
class _LoserTreePointer</* __stable == */false, _Tp, _Compare>
: public _LoserTreePointerBase<_Tp, _Compare>
{
typedef _LoserTreePointerBase<_Tp, _Compare> _Base;
using _Base::_M_k;
using _Base::_M_losers;
public:
_LoserTreePointer(unsigned int __k, _Compare __comp = std::less<_Tp>())
: _Base::_LoserTreePointerBase(__k, __comp)
{ }
unsigned int
__init_winner(unsigned int __root)
{
if (__root >= _M_k)
return __root;
else
{
unsigned int __left = __init_winner (2 * __root);
unsigned int __right = __init_winner (2 * __root + 1);
if (_M_losers[__right]._M_sup
|| (!_M_losers[__left]._M_sup
&& !_M_comp(*_M_losers[__right]._M_keyp,
*_M_losers[__left]._M_keyp)))
{
// Left one is less or equal.
_M_losers[__root] = _M_losers[__right];
return __left;
}
else
{
// Right one is less.
_M_losers[__root] = _M_losers[__left];
return __right;
}
}
}
void __init()
{ _M_losers[0] = _M_losers[__init_winner(1)]; }
void __delete_min_insert(const _Tp& _M_key, bool _M_sup)
{
#if _GLIBCXX_ASSERTIONS #if _GLIBCXX_ASSERTIONS
// no dummy sequence can ever be at the top! // no dummy sequence can ever be at the top!
_GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); _GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1);
#endif #endif
const _Tp* _M_keyp = &_M_key; int __source = _M_losers[0]._M_source;
int _M_source = _M_losers[0]._M_source; for (unsigned int __pos = (_M_k + __source) / 2; __pos > 0;
for (unsigned int __pos = (_M_k + _M_source) / 2; __pos > 0; __pos /= 2)
__pos /= 2) {
{ // The smaller one gets promoted, ties are broken by _M_source.
// The smaller one gets promoted. if (_M_comp(_M_losers[__pos]._M_key, __key)
if (_M_sup || (!_M_losers[__pos]._M_sup || (!_M_comp(__key, _M_losers[__pos]._M_key)
&& _M_comp(*_M_losers[__pos]._M_keyp, *_M_keyp))) && _M_losers[__pos]._M_source < __source))
{ {
// The other one is smaller. // The other one is smaller.
std::swap(_M_losers[__pos]._M_sup, _M_sup); std::swap(_M_losers[__pos]._M_source, __source);
std::swap(_M_losers[__pos]._M_source, _M_source); std::swap(_M_losers[__pos]._M_key, __key);
std::swap(_M_losers[__pos]._M_keyp, _M_keyp); }
} }
}
_M_losers[0]._M_source = __source;
_M_losers[0]._M_sup = _M_sup; _M_losers[0]._M_key = __key;
_M_losers[0]._M_source = _M_source; }
_M_losers[0]._M_keyp = _M_keyp;
}
};
/** @brief Base class for unguarded _LoserTree implementation.
*
* The whole element is copied into the tree structure.
*
* No guarding is done, therefore not a single input sequence must
* run empty. Unused __sequence heads are marked with a sentinel which
* is &gt; all elements that are to be merged.
*
* This is a very fast variant.
*/
template<typename _Tp, typename _Compare>
class _LoserTreeUnguardedBase
{
protected:
struct _Loser
{
int _M_source;
_Tp _M_key;
}; };
unsigned int _M_ik, _M_k, _M_offset; /**
_Loser* _M_losers; * @brief Non-Stable implementation of unguarded _LoserTree.
_Compare _M_comp; *
* Stable implementation is above.
public: */
_LoserTreeUnguardedBase(unsigned int __k, const _Tp _sentinel, template<typename _Tp, typename _Compare>
_Compare __comp = std::less<_Tp>()) class _LoserTreeUnguarded</* __stable == */false, _Tp, _Compare>
: _M_comp(__comp) : public _LoserTreeUnguardedBase<_Tp, _Compare>
{
_M_ik = __k;
// Next greater power of 2.
_M_k = 1 << (__rd_log2(_M_ik - 1) + 1);
_M_offset = _M_k;
// Avoid default-constructing _M_losers[]._M_key
_M_losers
= static_cast<_Loser*>(::operator new(2 * _M_k * sizeof(_Loser)));
for (unsigned int __i = _M_k + _M_ik - 1; __i < (2 * _M_k); ++__i)
{
_M_losers[__i]._M_key = _sentinel;
_M_losers[__i]._M_source = -1;
}
}
~_LoserTreeUnguardedBase()
{ ::operator delete(_M_losers); }
int
__get_min_source()
{
#if _GLIBCXX_ASSERTIONS
// no dummy sequence can ever be at the top!
_GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1);
#endif
return _M_losers[0]._M_source;
}
void
__insert_start(const _Tp& _M_key, int _M_source, bool)
{
unsigned int __pos = _M_k + _M_source;
new(&(_M_losers[__pos]._M_key)) _Tp(_M_key);
_M_losers[__pos]._M_source = _M_source;
}
};
/**
* @brief Stable implementation of unguarded _LoserTree.
*
* Unstable variant is selected below with partial specialization.
*/
template<bool __stable/* default == true */, typename _Tp, typename _Compare>
class _LoserTreeUnguarded
: public _LoserTreeUnguardedBase<_Tp, _Compare>
{
typedef _LoserTreeUnguardedBase<_Tp, _Compare> _Base;
using _Base::_M_k;
using _Base::_M_losers;
public:
_LoserTreeUnguarded(unsigned int __k, const _Tp _sentinel,
_Compare __comp = std::less<_Tp>())
: _Base::_LoserTreeUnguardedBase(__k, _sentinel, __comp)
{ }
unsigned int
__init_winner(unsigned int __root)
{
if (__root >= _M_k)
return __root;
else
{
unsigned int __left = __init_winner (2 * __root);
unsigned int __right = __init_winner (2 * __root + 1);
if (!_M_comp(_M_losers[__right]._M_key, _M_losers[__left]._M_key))
{
// Left one is less or equal.
_M_losers[__root] = _M_losers[__right];
return __left;
}
else
{
// Right one is less.
_M_losers[__root] = _M_losers[__left];
return __right;
}
}
}
void
__init()
{ {
_M_losers[0] = _M_losers[__init_winner(1)]; typedef _LoserTreeUnguardedBase<_Tp, _Compare> _Base;
using _Base::_M_k;
#if _GLIBCXX_ASSERTIONS using _Base::_M_losers;
// no dummy sequence can ever be at the top at the beginning
// (0 sequences!)
_GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1);
#endif
}
// Do not pass a const reference since _M_key will be used as public:
// local variable. _LoserTreeUnguarded(unsigned int __k, const _Tp __sentinel,
void _Compare __comp = std::less<_Tp>())
__delete_min_insert(_Tp _M_key, bool) : _Base::_LoserTreeUnguardedBase(__k, __sentinel, __comp)
{ { }
#if _GLIBCXX_ASSERTIONS
// no dummy sequence can ever be at the top! unsigned int
_GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); __init_winner(unsigned int __root)
#endif {
if (__root >= _M_k)
int _M_source = _M_losers[0]._M_source; return __root;
for (unsigned int __pos = (_M_k + _M_source) / 2; __pos > 0; __pos /= 2) else
{ {
// The smaller one gets promoted, ties are broken by _M_source. unsigned int __left = __init_winner(2 * __root);
if (_M_comp(_M_losers[__pos]._M_key, _M_key) unsigned int __right = __init_winner(2 * __root + 1);
|| (!_M_comp(_M_key, _M_losers[__pos]._M_key)
&& _M_losers[__pos]._M_source < _M_source))
{
// The other one is smaller.
std::swap(_M_losers[__pos]._M_source, _M_source);
std::swap(_M_losers[__pos]._M_key, _M_key);
}
}
_M_losers[0]._M_source = _M_source;
_M_losers[0]._M_key = _M_key;
}
};
/**
* @brief Non-Stable implementation of unguarded _LoserTree.
*
* Stable implementation is above.
*/
template<typename _Tp, typename _Compare>
class _LoserTreeUnguarded</* __stable == */false, _Tp, _Compare>
: public _LoserTreeUnguardedBase<_Tp, _Compare>
{
typedef _LoserTreeUnguardedBase<_Tp, _Compare> _Base;
using _Base::_M_k;
using _Base::_M_losers;
public:
_LoserTreeUnguarded(unsigned int __k, const _Tp _sentinel,
_Compare __comp = std::less<_Tp>())
: _Base::_LoserTreeUnguardedBase(__k, _sentinel, __comp)
{ }
unsigned int
__init_winner(unsigned int __root)
{
if (__root >= _M_k)
return __root;
else
{
unsigned int __left = __init_winner (2 * __root);
unsigned int __right = __init_winner (2 * __root + 1);
#if _GLIBCXX_ASSERTIONS #if _GLIBCXX_ASSERTIONS
// If __left one is sentinel then __right one must be, too. // If __left one is sentinel then __right one must be, too.
if (_M_losers[__left]._M_source == -1) if (_M_losers[__left]._M_source == -1)
_GLIBCXX_PARALLEL_ASSERT(_M_losers[__right]._M_source == -1); _GLIBCXX_PARALLEL_ASSERT(_M_losers[__right]._M_source == -1);
#endif #endif
if (!_M_comp(_M_losers[__right]._M_key, _M_losers[__left]._M_key)) if (!_M_comp(_M_losers[__right]._M_key,
{ _M_losers[__left]._M_key))
// Left one is less or equal. {
_M_losers[__root] = _M_losers[__right]; // Left one is less or equal.
return __left; _M_losers[__root] = _M_losers[__right];
} return __left;
else }
{ else
// Right one is less. {
_M_losers[__root] = _M_losers[__left]; // Right one is less.
return __right; _M_losers[__root] = _M_losers[__left];
} return __right;
} }
} }
}
void
__init() void
{ __init()
_M_losers[0] = _M_losers[__init_winner(1)]; {
_M_losers[0] = _M_losers[__init_winner(1)];
#if _GLIBCXX_ASSERTIONS #if _GLIBCXX_ASSERTIONS
// no dummy sequence can ever be at the top at the beginning // no dummy sequence can ever be at the top at the beginning
// (0 sequences!) // (0 sequences!)
_GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); _GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1);
#endif #endif
} }
// Do not pass a const reference since _M_key will be used as // Do not pass a const reference since __key will be used as
// local variable. // local variable.
void void
__delete_min_insert(_Tp _M_key, bool) __delete_min_insert(_Tp __key, bool)
{ {
#if _GLIBCXX_ASSERTIONS #if _GLIBCXX_ASSERTIONS
// no dummy sequence can ever be at the top! // no dummy sequence can ever be at the top!
_GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); _GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1);
#endif #endif
int _M_source = _M_losers[0]._M_source; int __source = _M_losers[0]._M_source;
for (unsigned int __pos = (_M_k + _M_source) / 2; __pos > 0; __pos /= 2) for (unsigned int __pos = (_M_k + __source) / 2; __pos > 0;
{ __pos /= 2)
// The smaller one gets promoted. {
if (_M_comp(_M_losers[__pos]._M_key, _M_key)) // The smaller one gets promoted.
{ if (_M_comp(_M_losers[__pos]._M_key, __key))
// The other one is smaller. {
std::swap(_M_losers[__pos]._M_source, _M_source); // The other one is smaller.
std::swap(_M_losers[__pos]._M_key, _M_key); std::swap(_M_losers[__pos]._M_source, __source);
} std::swap(_M_losers[__pos]._M_key, __key);
} }
}
_M_losers[0]._M_source = _M_source;
_M_losers[0]._M_key = _M_key; _M_losers[0]._M_source = __source;
} _M_losers[0]._M_key = __key;
}; }
/** @brief Unguarded loser tree, keeping only pointers to the
* elements in the tree structure.
*
* No guarding is done, therefore not a single input sequence must
* run empty. This is a very fast variant.
*/
template<typename _Tp, typename _Compare>
class _LoserTreePointerUnguardedBase
{
protected:
struct _Loser
{
int _M_source;
const _Tp* _M_keyp;
}; };
unsigned int _M_ik, _M_k, _M_offset; /** @brief Unguarded loser tree, keeping only pointers to the
_Loser* _M_losers; * elements in the tree structure.
_Compare _M_comp; *
* No guarding is done, therefore not a single input sequence must
public: * run empty. This is a very fast variant.
*/
_LoserTreePointerUnguardedBase(unsigned int __k, const _Tp& _sentinel, template<typename _Tp, typename _Compare>
_Compare __comp = std::less<_Tp>()) class _LoserTreePointerUnguardedBase
: _M_comp(__comp)
{
_M_ik = __k;
// Next greater power of 2.
_M_k = 1 << (__rd_log2(_M_ik - 1) + 1);
_M_offset = _M_k;
// Avoid default-constructing _M_losers[]._M_key
_M_losers = new _Loser[2 * _M_k];
for (unsigned int __i = _M_k + _M_ik - 1; __i < (2 * _M_k); ++__i)
{
_M_losers[__i]._M_keyp = &_sentinel;
_M_losers[__i]._M_source = -1;
}
}
~_LoserTreePointerUnguardedBase()
{ delete[] _M_losers; }
int
__get_min_source()
{ {
protected:
struct _Loser
{
int _M_source;
const _Tp* _M_keyp;
};
unsigned int _M_ik, _M_k, _M_offset;
_Loser* _M_losers;
_Compare _M_comp;
public:
_LoserTreePointerUnguardedBase(unsigned int __k, const _Tp& __sentinel,
_Compare __comp = std::less<_Tp>())
: _M_comp(__comp)
{
_M_ik = __k;
// Next greater power of 2.
_M_k = 1 << (__rd_log2(_M_ik - 1) + 1);
_M_offset = _M_k;
// Avoid default-constructing _M_losers[]._M_key
_M_losers = new _Loser[2 * _M_k];
for (unsigned int __i = _M_k + _M_ik - 1; __i < (2 * _M_k); ++__i)
{
_M_losers[__i]._M_keyp = &__sentinel;
_M_losers[__i]._M_source = -1;
}
}
~_LoserTreePointerUnguardedBase()
{ delete[] _M_losers; }
int
__get_min_source()
{
#if _GLIBCXX_ASSERTIONS #if _GLIBCXX_ASSERTIONS
// no dummy sequence can ever be at the top! // no dummy sequence can ever be at the top!
_GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); _GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1);
#endif #endif
return _M_losers[0]._M_source; return _M_losers[0]._M_source;
} }
void void
__insert_start(const _Tp& _M_key, int _M_source, bool) __insert_start(const _Tp& __key, int __source, bool)
{ {
unsigned int __pos = _M_k + _M_source; unsigned int __pos = _M_k + __source;
_M_losers[__pos]._M_keyp = &_M_key;
_M_losers[__pos]._M_source = _M_source;
}
};
/**
* @brief Stable unguarded _LoserTree variant storing pointers.
*
* Unstable variant is implemented below using partial specialization.
*/
template<bool __stable/* default == true */, typename _Tp, typename _Compare>
class _LoserTreePointerUnguarded
: public _LoserTreePointerUnguardedBase<_Tp, _Compare>
{
typedef _LoserTreePointerUnguardedBase<_Tp, _Compare> _Base;
using _Base::_M_k;
using _Base::_M_losers;
public: _M_losers[__pos]._M_keyp = &__key;
_LoserTreePointerUnguarded(unsigned int __k, const _Tp& _sentinel, _M_losers[__pos]._M_source = __source;
_Compare __comp = std::less<_Tp>()) }
: _Base::_LoserTreePointerUnguardedBase(__k, _sentinel, __comp) };
{ }
unsigned int /**
__init_winner(unsigned int __root) * @brief Stable unguarded _LoserTree variant storing pointers.
{ *
if (__root >= _M_k) * Unstable variant is implemented below using partial specialization.
return __root; */
else template<bool __stable/* default == true */, typename _Tp, typename _Compare>
{ class _LoserTreePointerUnguarded
unsigned int __left = __init_winner (2 * __root); : public _LoserTreePointerUnguardedBase<_Tp, _Compare>
unsigned int __right = __init_winner (2 * __root + 1);
if (!_M_comp(*_M_losers[__right]._M_keyp,
*_M_losers[__left]._M_keyp))
{
// Left one is less or equal.
_M_losers[__root] = _M_losers[__right];
return __left;
}
else
{
// Right one is less.
_M_losers[__root] = _M_losers[__left];
return __right;
}
}
}
void
__init()
{ {
_M_losers[0] = _M_losers[__init_winner(1)]; typedef _LoserTreePointerUnguardedBase<_Tp, _Compare> _Base;
using _Base::_M_k;
using _Base::_M_losers;
public:
_LoserTreePointerUnguarded(unsigned int __k, const _Tp& __sentinel,
_Compare __comp = std::less<_Tp>())
: _Base::_LoserTreePointerUnguardedBase(__k, __sentinel, __comp)
{ }
unsigned int
__init_winner(unsigned int __root)
{
if (__root >= _M_k)
return __root;
else
{
unsigned int __left = __init_winner(2 * __root);
unsigned int __right = __init_winner(2 * __root + 1);
if (!_M_comp(*_M_losers[__right]._M_keyp,
*_M_losers[__left]._M_keyp))
{
// Left one is less or equal.
_M_losers[__root] = _M_losers[__right];
return __left;
}
else
{
// Right one is less.
_M_losers[__root] = _M_losers[__left];
return __right;
}
}
}
void
__init()
{
_M_losers[0] = _M_losers[__init_winner(1)];
#if _GLIBCXX_ASSERTIONS #if _GLIBCXX_ASSERTIONS
// no dummy sequence can ever be at the top at the beginning // no dummy sequence can ever be at the top at the beginning
// (0 sequences!) // (0 sequences!)
_GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); _GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1);
#endif #endif
} }
void void
__delete_min_insert(const _Tp& _M_key, bool _M_sup) __delete_min_insert(const _Tp& __key, bool __sup)
{ {
#if _GLIBCXX_ASSERTIONS #if _GLIBCXX_ASSERTIONS
// no dummy sequence can ever be at the top! // no dummy sequence can ever be at the top!
_GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); _GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1);
#endif #endif
const _Tp* _M_keyp = &_M_key; const _Tp* __keyp = &__key;
int _M_source = _M_losers[0]._M_source; int __source = _M_losers[0]._M_source;
for (unsigned int __pos = (_M_k + _M_source) / 2; __pos > 0; __pos /= 2) for (unsigned int __pos = (_M_k + __source) / 2; __pos > 0;
{ __pos /= 2)
// The smaller one gets promoted, ties are broken by _M_source. {
if (_M_comp(*_M_losers[__pos]._M_keyp, *_M_keyp) // The smaller one gets promoted, ties are broken by _M_source.
|| (!_M_comp(*_M_keyp, *_M_losers[__pos]._M_keyp) if (_M_comp(*_M_losers[__pos]._M_keyp, *__keyp)
&& _M_losers[__pos]._M_source < _M_source)) || (!_M_comp(*__keyp, *_M_losers[__pos]._M_keyp)
{ && _M_losers[__pos]._M_source < __source))
// The other one is smaller. {
std::swap(_M_losers[__pos]._M_source, _M_source); // The other one is smaller.
std::swap(_M_losers[__pos]._M_keyp, _M_keyp); std::swap(_M_losers[__pos]._M_source, __source);
} std::swap(_M_losers[__pos]._M_keyp, __keyp);
} }
}
_M_losers[0]._M_source = _M_source;
_M_losers[0]._M_keyp = _M_keyp; _M_losers[0]._M_source = __source;
} _M_losers[0]._M_keyp = __keyp;
}; }
};
/**
* @brief Unstable unguarded _LoserTree variant storing pointers. /**
* * @brief Unstable unguarded _LoserTree variant storing pointers.
* Stable variant is above. *
*/ * Stable variant is above.
template<typename _Tp, typename _Compare> */
class _LoserTreePointerUnguarded</* __stable == */false, _Tp, _Compare> template<typename _Tp, typename _Compare>
: public _LoserTreePointerUnguardedBase<_Tp, _Compare> class _LoserTreePointerUnguarded</* __stable == */false, _Tp, _Compare>
{ : public _LoserTreePointerUnguardedBase<_Tp, _Compare>
typedef _LoserTreePointerUnguardedBase<_Tp, _Compare> _Base;
using _Base::_M_k;
using _Base::_M_losers;
public:
_LoserTreePointerUnguarded(unsigned int __k, const _Tp& _sentinel,
_Compare __comp = std::less<_Tp>())
: _Base::_LoserTreePointerUnguardedBase(__k, _sentinel, __comp)
{ }
unsigned int
__init_winner(unsigned int __root)
{ {
if (__root >= _M_k) typedef _LoserTreePointerUnguardedBase<_Tp, _Compare> _Base;
return __root; using _Base::_M_k;
else using _Base::_M_losers;
{
unsigned int __left = __init_winner (2 * __root); public:
unsigned int __right = __init_winner (2 * __root + 1); _LoserTreePointerUnguarded(unsigned int __k, const _Tp& __sentinel,
_Compare __comp = std::less<_Tp>())
: _Base::_LoserTreePointerUnguardedBase(__k, __sentinel, __comp)
{ }
unsigned int
__init_winner(unsigned int __root)
{
if (__root >= _M_k)
return __root;
else
{
unsigned int __left = __init_winner(2 * __root);
unsigned int __right = __init_winner(2 * __root + 1);
#if _GLIBCXX_ASSERTIONS #if _GLIBCXX_ASSERTIONS
// If __left one is sentinel then __right one must be, too. // If __left one is sentinel then __right one must be, too.
if (_M_losers[__left]._M_source == -1) if (_M_losers[__left]._M_source == -1)
_GLIBCXX_PARALLEL_ASSERT(_M_losers[__right]._M_source == -1); _GLIBCXX_PARALLEL_ASSERT(_M_losers[__right]._M_source == -1);
#endif #endif
if (!_M_comp(*_M_losers[__right]._M_keyp, if (!_M_comp(*_M_losers[__right]._M_keyp,
*_M_losers[__left]._M_keyp)) *_M_losers[__left]._M_keyp))
{ {
// Left one is less or equal. // Left one is less or equal.
_M_losers[__root] = _M_losers[__right]; _M_losers[__root] = _M_losers[__right];
return __left; return __left;
} }
else else
{ {
// Right one is less. // Right one is less.
_M_losers[__root] = _M_losers[__left]; _M_losers[__root] = _M_losers[__left];
return __right; return __right;
} }
} }
} }
void void
__init() __init()
{ {
_M_losers[0] = _M_losers[__init_winner(1)]; _M_losers[0] = _M_losers[__init_winner(1)];
#if _GLIBCXX_ASSERTIONS #if _GLIBCXX_ASSERTIONS
// no dummy sequence can ever be at the top at the beginning // no dummy sequence can ever be at the top at the beginning
// (0 sequences!) // (0 sequences!)
_GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); _GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1);
#endif #endif
} }
void void
__delete_min_insert(const _Tp& _M_key, bool _M_sup) __delete_min_insert(const _Tp& __key, bool __sup)
{ {
#if _GLIBCXX_ASSERTIONS #if _GLIBCXX_ASSERTIONS
// no dummy sequence can ever be at the top! // no dummy sequence can ever be at the top!
_GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1); _GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1);
#endif #endif
const _Tp* _M_keyp = &_M_key; const _Tp* __keyp = &__key;
int _M_source = _M_losers[0]._M_source; int __source = _M_losers[0]._M_source;
for (unsigned int __pos = (_M_k + _M_source) / 2; __pos > 0; __pos /= 2) for (unsigned int __pos = (_M_k + __source) / 2; __pos > 0;
{ __pos /= 2)
// The smaller one gets promoted. {
if (_M_comp(*(_M_losers[__pos]._M_keyp), *_M_keyp)) // The smaller one gets promoted.
{ if (_M_comp(*(_M_losers[__pos]._M_keyp), *__keyp))
// The other one is smaller. {
std::swap(_M_losers[__pos]._M_source, _M_source); // The other one is smaller.
std::swap(_M_losers[__pos]._M_keyp, _M_keyp); std::swap(_M_losers[__pos]._M_source, __source);
} std::swap(_M_losers[__pos]._M_keyp, __keyp);
} }
}
_M_losers[0]._M_source = _M_source;
_M_losers[0]._M_keyp = _M_keyp; _M_losers[0]._M_source = __source;
} _M_losers[0]._M_keyp = __keyp;
}; }
};
} // namespace __gnu_parallel } // namespace __gnu_parallel
#endif /* _GLIBCXX_PARALLEL_LOSERTREE_H */ #endif /* _GLIBCXX_PARALLEL_LOSERTREE_H */
...@@ -54,11 +54,10 @@ namespace __gnu_parallel ...@@ -54,11 +54,10 @@ namespace __gnu_parallel
typename _OutputIterator, typename _DifferenceTp, typename _OutputIterator, typename _DifferenceTp,
typename _Compare> typename _Compare>
_OutputIterator _OutputIterator
__merge_advance_usual(_RAIter1& __begin1, __merge_advance_usual(_RAIter1& __begin1, _RAIter1 __end1,
_RAIter1 __end1, _RAIter2& __begin2, _RAIter2 __end2,
_RAIter2& __begin2, _OutputIterator __target,
_RAIter2 __end2, _OutputIterator __target, _DifferenceTp __max_length, _Compare __comp)
_DifferenceTp __max_length, _Compare __comp)
{ {
typedef _DifferenceTp _DifferenceType; typedef _DifferenceTp _DifferenceType;
while (__begin1 != __end1 && __begin2 != __end2 && __max_length > 0) while (__begin1 != __end1 && __begin2 != __end2 && __max_length > 0)
...@@ -103,12 +102,10 @@ namespace __gnu_parallel ...@@ -103,12 +102,10 @@ namespace __gnu_parallel
typename _OutputIterator, typename _DifferenceTp, typename _OutputIterator, typename _DifferenceTp,
typename _Compare> typename _Compare>
_OutputIterator _OutputIterator
__merge_advance_movc(_RAIter1& __begin1, __merge_advance_movc(_RAIter1& __begin1, _RAIter1 __end1,
_RAIter1 __end1, _RAIter2& __begin2, _RAIter2 __end2,
_RAIter2& __begin2, _OutputIterator __target,
_RAIter2 __end2, _DifferenceTp __max_length, _Compare __comp)
_OutputIterator __target,
_DifferenceTp __max_length, _Compare __comp)
{ {
typedef _DifferenceTp _DifferenceType; typedef _DifferenceTp _DifferenceType;
typedef typename std::iterator_traits<_RAIter1>::value_type typedef typename std::iterator_traits<_RAIter1>::value_type
...@@ -172,14 +169,14 @@ namespace __gnu_parallel ...@@ -172,14 +169,14 @@ namespace __gnu_parallel
typename _Compare> typename _Compare>
inline _OutputIterator inline _OutputIterator
__merge_advance(_RAIter1& __begin1, _RAIter1 __end1, __merge_advance(_RAIter1& __begin1, _RAIter1 __end1,
_RAIter2& __begin2, _RAIter2 __end2, _RAIter2& __begin2, _RAIter2 __end2,
_OutputIterator __target, _DifferenceTp __max_length, _OutputIterator __target, _DifferenceTp __max_length,
_Compare __comp) _Compare __comp)
{ {
_GLIBCXX_CALL(__max_length) _GLIBCXX_CALL(__max_length)
return __merge_advance_movc(__begin1, __end1, __begin2, __end2, __target, return __merge_advance_movc(__begin1, __end1, __begin2, __end2,
__max_length, __comp); __target, __max_length, __comp);
} }
/** @brief Merge routine fallback to sequential in case the /** @brief Merge routine fallback to sequential in case the
...@@ -195,17 +192,15 @@ namespace __gnu_parallel ...@@ -195,17 +192,15 @@ namespace __gnu_parallel
template<typename _RAIter1, typename _RAIter2, template<typename _RAIter1, typename _RAIter2,
typename _RAIter3, typename _Compare> typename _RAIter3, typename _Compare>
inline _RAIter3 inline _RAIter3
__parallel_merge_advance(_RAIter1& __begin1, __parallel_merge_advance(_RAIter1& __begin1, _RAIter1 __end1,
_RAIter1 __end1, _RAIter2& __begin2,
_RAIter2& __begin2, // different iterators, parallel implementation
// different iterators, parallel implementation // not available
// not available _RAIter2 __end2, _RAIter3 __target, typename
_RAIter2 __end2, std::iterator_traits<_RAIter1>::
_RAIter3 __target, typename difference_type __max_length, _Compare __comp)
std::iterator_traits<_RAIter1>::
difference_type __max_length, _Compare __comp)
{ return __merge_advance(__begin1, __end1, __begin2, __end2, __target, { return __merge_advance(__begin1, __end1, __begin2, __end2, __target,
__max_length, __comp); } __max_length, __comp); }
/** @brief Parallel merge routine being able to merge only the @__c /** @brief Parallel merge routine being able to merge only the @__c
* __max_length smallest elements. * __max_length smallest elements.
...@@ -225,13 +220,11 @@ namespace __gnu_parallel ...@@ -225,13 +220,11 @@ namespace __gnu_parallel
template<typename _RAIter1, typename _RAIter3, template<typename _RAIter1, typename _RAIter3,
typename _Compare> typename _Compare>
inline _RAIter3 inline _RAIter3
__parallel_merge_advance(_RAIter1& __begin1, __parallel_merge_advance(_RAIter1& __begin1, _RAIter1 __end1,
_RAIter1 __end1, _RAIter1& __begin2, _RAIter1 __end2,
_RAIter1& __begin2, _RAIter3 __target, typename
_RAIter1 __end2, std::iterator_traits<_RAIter1>::
_RAIter3 __target, typename difference_type __max_length, _Compare __comp)
std::iterator_traits<_RAIter1>::
difference_type __max_length, _Compare __comp)
{ {
typedef typename typedef typename
std::iterator_traits<_RAIter1>::value_type _ValueType; std::iterator_traits<_RAIter1>::value_type _ValueType;
...@@ -242,17 +235,14 @@ namespace __gnu_parallel ...@@ -242,17 +235,14 @@ namespace __gnu_parallel
typedef typename std::pair<_RAIter1, _RAIter1> typedef typename std::pair<_RAIter1, _RAIter1>
_IteratorPair; _IteratorPair;
_IteratorPair _IteratorPair __seqs[2] = { std::make_pair(__begin1, __end1),
seqs[2] = { std::make_pair(__begin1, __end1), std::make_pair(__begin2, __end2) };
std::make_pair(__begin2, __end2) }; _RAIter3 __target_end = parallel_multiway_merge
_RAIter3 < /* __stable = */ true, /* __sentinels = */ false>
__target_end = parallel_multiway_merge (__seqs, __seqs + 2, __target, multiway_merge_exact_splitting
< /* __stable = */ true, /* __sentinels = */ false>( < /* __stable = */ true, _IteratorPair*,
seqs, seqs + 2, __target, _Compare, _DifferenceType1>, __max_length, __comp,
multiway_merge_exact_splitting omp_get_max_threads());
< /* __stable = */ true, _IteratorPair*,
_Compare, _DifferenceType1>,
__max_length, __comp, omp_get_max_threads());
return __target_end; return __target_end;
} }
......
...@@ -53,8 +53,8 @@ namespace __gnu_parallel ...@@ -53,8 +53,8 @@ namespace __gnu_parallel
/** @brief Compare __a pair of types lexicographically, ascending. */ /** @brief Compare __a pair of types lexicographically, ascending. */
template<typename _T1, typename _T2, typename _Compare> template<typename _T1, typename _T2, typename _Compare>
class _Lexicographic class _Lexicographic
: public std::binary_function< : public std::binary_function<std::pair<_T1, _T2>,
std::pair<_T1, _T2>, std::pair<_T1, _T2>, bool> std::pair<_T1, _T2>, bool>
{ {
private: private:
_Compare& _M_comp; _Compare& _M_comp;
...@@ -142,19 +142,19 @@ namespace __gnu_parallel ...@@ -142,19 +142,19 @@ namespace __gnu_parallel
// Number of sequences, number of elements in total (possibly // Number of sequences, number of elements in total (possibly
// including padding). // including padding).
_DifferenceType __m = std::distance(__begin_seqs, __end_seqs), __N = 0, _DifferenceType __m = std::distance(__begin_seqs, __end_seqs), __nn = 0,
__nmax, __n, __r; __nmax, __n, __r;
for (int __i = 0; __i < __m; __i++) for (int __i = 0; __i < __m; __i++)
{ {
__N += std::distance(__begin_seqs[__i].first, __nn += std::distance(__begin_seqs[__i].first,
__begin_seqs[__i].second); __begin_seqs[__i].second);
_GLIBCXX_PARALLEL_ASSERT( _GLIBCXX_PARALLEL_ASSERT(
std::distance(__begin_seqs[__i].first, std::distance(__begin_seqs[__i].first,
__begin_seqs[__i].second) > 0); __begin_seqs[__i].second) > 0);
} }
if (__rank == __N) if (__rank == __nn)
{ {
for (int __i = 0; __i < __m; __i++) for (int __i = 0; __i < __m; __i++)
__begin_offsets[__i] = __begin_seqs[__i].second; // Very end. __begin_offsets[__i] = __begin_seqs[__i].second; // Very end.
...@@ -163,9 +163,9 @@ namespace __gnu_parallel ...@@ -163,9 +163,9 @@ namespace __gnu_parallel
} }
_GLIBCXX_PARALLEL_ASSERT(__m != 0); _GLIBCXX_PARALLEL_ASSERT(__m != 0);
_GLIBCXX_PARALLEL_ASSERT(__N != 0); _GLIBCXX_PARALLEL_ASSERT(__nn != 0);
_GLIBCXX_PARALLEL_ASSERT(__rank >= 0); _GLIBCXX_PARALLEL_ASSERT(__rank >= 0);
_GLIBCXX_PARALLEL_ASSERT(__rank < __N); _GLIBCXX_PARALLEL_ASSERT(__rank < __nn);
_DifferenceType* __ns = new _DifferenceType[__m]; _DifferenceType* __ns = new _DifferenceType[__m];
_DifferenceType* __a = new _DifferenceType[__m]; _DifferenceType* __a = new _DifferenceType[__m];
...@@ -401,14 +401,14 @@ namespace __gnu_parallel ...@@ -401,14 +401,14 @@ namespace __gnu_parallel
// Number of sequences, number of elements in total (possibly // Number of sequences, number of elements in total (possibly
// including padding). // including padding).
_DifferenceType __m = std::distance(__begin_seqs, __end_seqs); _DifferenceType __m = std::distance(__begin_seqs, __end_seqs);
_DifferenceType __N = 0; _DifferenceType __nn = 0;
_DifferenceType __nmax, __n, __r; _DifferenceType __nmax, __n, __r;
for (int __i = 0; __i < __m; __i++) for (int __i = 0; __i < __m; __i++)
__N += std::distance(__begin_seqs[__i].first, __nn += std::distance(__begin_seqs[__i].first,
__begin_seqs[__i].second); __begin_seqs[__i].second);
if (__m == 0 || __N == 0 || __rank < 0 || __rank >= __N) if (__m == 0 || __nn == 0 || __rank < 0 || __rank >= __nn)
{ {
// result undefined if there is no data or __rank is outside bounds // result undefined if there is no data or __rank is outside bounds
throw std::exception(); throw std::exception();
...@@ -433,7 +433,7 @@ namespace __gnu_parallel ...@@ -433,7 +433,7 @@ namespace __gnu_parallel
// Pad all lists to this length, at least as long as any ns[__i], // Pad all lists to this length, at least as long as any ns[__i],
// equality iff __nmax = 2^__k - 1 // equality iff __nmax = 2^__k - 1
__l = pow2(__r) - 1; __l = __round_up_to_pow2(__r) - 1;
for (int __i = 0; __i < __m; ++__i) for (int __i = 0; __i < __m; ++__i)
{ {
......
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -125,8 +125,7 @@ namespace __gnu_parallel ...@@ -125,8 +125,7 @@ namespace __gnu_parallel
/** @brief Split by exact splitting. */ /** @brief Split by exact splitting. */
template<typename _RAIter, typename _Compare, template<typename _RAIter, typename _Compare,
typename _SortingPlacesIterator> typename _SortingPlacesIterator>
struct _SplitConsistently<true, _RAIter, struct _SplitConsistently<true, _RAIter, _Compare, _SortingPlacesIterator>
_Compare, _SortingPlacesIterator>
{ {
void void
operator()(const _ThreadIndex __iam, operator()(const _ThreadIndex __iam,
...@@ -140,19 +139,19 @@ namespace __gnu_parallel ...@@ -140,19 +139,19 @@ namespace __gnu_parallel
std::vector<std::pair<_SortingPlacesIterator, std::vector<std::pair<_SortingPlacesIterator,
_SortingPlacesIterator> > _SortingPlacesIterator> >
seqs(__sd->_M_num_threads); __seqs(__sd->_M_num_threads);
for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++) for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++)
seqs[__s] = std::make_pair(__sd->_M_temporary[__s], __seqs[__s] = std::make_pair(__sd->_M_temporary[__s],
__sd->_M_temporary[__s] __sd->_M_temporary[__s]
+ (__sd->_M_starts[__s + 1] + (__sd->_M_starts[__s + 1]
- __sd->_M_starts[__s])); - __sd->_M_starts[__s]));
std::vector<_SortingPlacesIterator> _M_offsets(__sd->_M_num_threads); std::vector<_SortingPlacesIterator> __offsets(__sd->_M_num_threads);
// if not last thread // if not last thread
if (__iam < __sd->_M_num_threads - 1) if (__iam < __sd->_M_num_threads - 1)
multiseq_partition(seqs.begin(), seqs.end(), multiseq_partition(__seqs.begin(), __seqs.end(),
__sd->_M_starts[__iam + 1], _M_offsets.begin(), __sd->_M_starts[__iam + 1], __offsets.begin(),
__comp); __comp);
for (int __seq = 0; __seq < __sd->_M_num_threads; __seq++) for (int __seq = 0; __seq < __sd->_M_num_threads; __seq++)
...@@ -160,7 +159,7 @@ namespace __gnu_parallel ...@@ -160,7 +159,7 @@ namespace __gnu_parallel
// for each sequence // for each sequence
if (__iam < (__sd->_M_num_threads - 1)) if (__iam < (__sd->_M_num_threads - 1))
__sd->_M_pieces[__iam][__seq]._M_end __sd->_M_pieces[__iam][__seq]._M_end
= _M_offsets[__seq] - seqs[__seq].first; = __offsets[__seq] - __seqs[__seq].first;
else else
// very end of this sequence // very end of this sequence
__sd->_M_pieces[__iam][__seq]._M_end = __sd->_M_pieces[__iam][__seq]._M_end =
...@@ -185,8 +184,7 @@ namespace __gnu_parallel ...@@ -185,8 +184,7 @@ namespace __gnu_parallel
/** @brief Split by sampling. */ /** @brief Split by sampling. */
template<typename _RAIter, typename _Compare, template<typename _RAIter, typename _Compare,
typename _SortingPlacesIterator> typename _SortingPlacesIterator>
struct _SplitConsistently<false, _RAIter, _Compare, struct _SplitConsistently<false, _RAIter, _Compare, _SortingPlacesIterator>
_SortingPlacesIterator>
{ {
void void
operator()(const _ThreadIndex __iam, operator()(const _ThreadIndex __iam,
...@@ -282,10 +280,8 @@ namespace __gnu_parallel ...@@ -282,10 +280,8 @@ namespace __gnu_parallel
const _RAIter& __target, const _RAIter& __target,
_Compare& __comp, _Compare& __comp,
_DiffType __length_am) const _DiffType __length_am) const
{ { stable_multiway_merge(__seqs_begin, __seqs_end, __target,
stable_multiway_merge(__seqs_begin, __seqs_end, __target, __length_am, __length_am, __comp, sequential_tag()); }
__comp, sequential_tag());
}
}; };
template<typename Seq_RAIter, typename _RAIter, template<typename Seq_RAIter, typename _RAIter,
...@@ -298,10 +294,8 @@ namespace __gnu_parallel ...@@ -298,10 +294,8 @@ namespace __gnu_parallel
const _RAIter& __target, const _RAIter& __target,
_Compare& __comp, _Compare& __comp,
_DiffType __length_am) const _DiffType __length_am) const
{ { multiway_merge(__seqs_begin, __seqs_end, __target, __length_am,
multiway_merge(__seqs_begin, __seqs_end, __target, __length_am, __comp, __comp, sequential_tag()); }
sequential_tag());
}
}; };
/** @brief PMWMS code executed by each thread. /** @brief PMWMS code executed by each thread.
...@@ -321,8 +315,8 @@ namespace __gnu_parallel ...@@ -321,8 +315,8 @@ namespace __gnu_parallel
_ThreadIndex __iam = omp_get_thread_num(); _ThreadIndex __iam = omp_get_thread_num();
// Length of this thread's chunk, before merging. // Length of this thread's chunk, before merging.
_DifferenceType __length_local _DifferenceType __length_local =
= __sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam]; __sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam];
// Sort in temporary storage, leave space for sentinel. // Sort in temporary storage, leave space for sentinel.
...@@ -350,8 +344,7 @@ namespace __gnu_parallel ...@@ -350,8 +344,7 @@ namespace __gnu_parallel
_DifferenceType __num_samples = _DifferenceType __num_samples =
_Settings::get().sort_mwms_oversampling * __sd->_M_num_threads - 1; _Settings::get().sort_mwms_oversampling * __sd->_M_num_threads - 1;
_SplitConsistently _SplitConsistently<__exact, _RAIter, _Compare, _SortingPlacesIterator>()
<__exact, _RAIter, _Compare, _SortingPlacesIterator>()
(__iam, __sd, __comp, __num_samples); (__iam, __sd, __comp, __num_samples);
// Offset from __target __begin, __length after merging. // Offset from __target __begin, __length after merging.
...@@ -364,26 +357,24 @@ namespace __gnu_parallel ...@@ -364,26 +357,24 @@ namespace __gnu_parallel
} }
typedef std::vector< typedef std::vector<
std::pair<_SortingPlacesIterator, _SortingPlacesIterator> > std::pair<_SortingPlacesIterator, _SortingPlacesIterator> >
_SeqVector; _SeqVector;
_SeqVector seqs(__sd->_M_num_threads); _SeqVector __seqs(__sd->_M_num_threads);
for (int __s = 0; __s < __sd->_M_num_threads; ++__s) for (int __s = 0; __s < __sd->_M_num_threads; ++__s)
{ {
seqs[__s] = __seqs[__s] =
std::make_pair std::make_pair(__sd->_M_temporary[__s]
(__sd->_M_temporary[__s] + __sd->_M_pieces[__iam][__s]._M_begin, + __sd->_M_pieces[__iam][__s]._M_begin,
__sd->_M_temporary[__s] + __sd->_M_pieces[__iam][__s]._M_end); __sd->_M_temporary[__s]
+ __sd->_M_pieces[__iam][__s]._M_end);
} }
__possibly_stable_multiway_merge< __possibly_stable_multiway_merge<
__stable, __stable, typename _SeqVector::iterator,
typename _SeqVector::iterator, _RAIter, _Compare, _DifferenceType>()(__seqs.begin(), __seqs.end(),
_RAIter, __sd->_M_source + __offset, __comp,
_Compare, _DifferenceType>() __length_am);
(seqs.begin(), seqs.end(),
__sd->_M_source + __offset, __comp,
__length_am);
# pragma omp barrier # pragma omp barrier
...@@ -421,7 +412,7 @@ namespace __gnu_parallel ...@@ -421,7 +412,7 @@ namespace __gnu_parallel
// shared variables // shared variables
_PMWMSSortingData<_RAIter> __sd; _PMWMSSortingData<_RAIter> __sd;
_DifferenceType* _M_starts; _DifferenceType* __starts;
# pragma omp parallel num_threads(__num_threads) # pragma omp parallel num_threads(__num_threads)
{ {
...@@ -450,30 +441,29 @@ namespace __gnu_parallel ...@@ -450,30 +441,29 @@ namespace __gnu_parallel
= new std::vector<_Piece<_DifferenceType> >[__num_threads]; = new std::vector<_Piece<_DifferenceType> >[__num_threads];
for (int __s = 0; __s < __num_threads; ++__s) for (int __s = 0; __s < __num_threads; ++__s)
__sd._M_pieces[__s].resize(__num_threads); __sd._M_pieces[__s].resize(__num_threads);
_M_starts = __sd._M_starts __starts = __sd._M_starts = new _DifferenceType[__num_threads + 1];
= new _DifferenceType[__num_threads + 1];
_DifferenceType __chunk_length = __n / __num_threads; _DifferenceType __chunk_length = __n / __num_threads;
_DifferenceType __split = __n % __num_threads; _DifferenceType __split = __n % __num_threads;
_DifferenceType __pos = 0; _DifferenceType __pos = 0;
for (int __i = 0; __i < __num_threads; ++__i) for (int __i = 0; __i < __num_threads; ++__i)
{ {
_M_starts[__i] = __pos; __starts[__i] = __pos;
__pos += (__i < __split) __pos += ((__i < __split)
? (__chunk_length + 1) : __chunk_length; ? (__chunk_length + 1) : __chunk_length);
} }
_M_starts[__num_threads] = __pos; __starts[__num_threads] = __pos;
} //single } //single
// Now sort in parallel. // Now sort in parallel.
parallel_sort_mwms_pu<__stable, __exact>(&__sd, __comp); parallel_sort_mwms_pu<__stable, __exact>(&__sd, __comp);
} //parallel } //parallel
delete[] _M_starts; delete[] __starts;
delete[] __sd._M_temporary; delete[] __sd._M_temporary;
if (!__exact) if (!__exact)
::operator delete(__sd._M_samples); ::operator delete(__sd._M_samples);
delete[] __sd._M_offsets; delete[] __sd._M_offsets;
delete[] __sd._M_pieces; delete[] __sd._M_pieces;
......
...@@ -69,7 +69,7 @@ namespace __parallel ...@@ -69,7 +69,7 @@ namespace __parallel
__accumulate_switch(_IIter __begin, _IIter __end, __accumulate_switch(_IIter __begin, _IIter __end,
_Tp __init, _IteratorTag) _Tp __init, _IteratorTag)
{ return accumulate(__begin, __end, __init, { return accumulate(__begin, __end, __init,
__gnu_parallel::sequential_tag()); } __gnu_parallel::sequential_tag()); }
template<typename _IIter, typename _Tp, typename _BinaryOperation, template<typename _IIter, typename _Tp, typename _BinaryOperation,
typename _IteratorTag> typename _IteratorTag>
......
...@@ -74,8 +74,8 @@ namespace __gnu_parallel ...@@ -74,8 +74,8 @@ namespace __gnu_parallel
_DifferenceType; _DifferenceType;
_DifferenceType __length = __end - __begin; _DifferenceType __length = __end - __begin;
_ThreadIndex __num_threads = _ThreadIndex __num_threads = __gnu_parallel::min<_DifferenceType>
__gnu_parallel::min<_DifferenceType>(__get_max_threads(), __length); (__get_max_threads(), __length);
_Result *__thread_results; _Result *__thread_results;
...@@ -94,8 +94,8 @@ namespace __gnu_parallel ...@@ -94,8 +94,8 @@ namespace __gnu_parallel
#pragma omp for schedule(dynamic, _Settings::get().workstealing_chunk_size) #pragma omp for schedule(dynamic, _Settings::get().workstealing_chunk_size)
for (_DifferenceType __pos = 0; __pos < __length; ++__pos) for (_DifferenceType __pos = 0; __pos < __length; ++__pos)
__thread_results[__iam] = __thread_results[__iam] = __r(__thread_results[__iam],
__r(__thread_results[__iam], __f(__o, __begin+__pos)); __f(__o, __begin+__pos));
} //parallel } //parallel
for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
......
...@@ -74,8 +74,8 @@ namespace __gnu_parallel ...@@ -74,8 +74,8 @@ namespace __gnu_parallel
_DifferenceType; _DifferenceType;
_DifferenceType __length = __end - __begin; _DifferenceType __length = __end - __begin;
_ThreadIndex __num_threads = _ThreadIndex __num_threads = std::min<_DifferenceType>
std::min<_DifferenceType>(__get_max_threads(), __length); (__get_max_threads(), __length);
_Result *__thread_results; _Result *__thread_results;
......
...@@ -75,25 +75,24 @@ namespace __gnu_parallel ...@@ -75,25 +75,24 @@ namespace __gnu_parallel
_Result *__thread_results; _Result *__thread_results;
bool* __constructed; bool* __constructed;
_ThreadIndex __num_threads = _ThreadIndex __num_threads = __gnu_parallel::min<_DifferenceType>
__gnu_parallel::min<_DifferenceType>(__get_max_threads(), __length); (__get_max_threads(), __length);
# pragma omp parallel num_threads(__num_threads) # pragma omp parallel num_threads(__num_threads)
{ {
# pragma omp single # pragma omp single
{ {
__num_threads = omp_get_num_threads(); __num_threads = omp_get_num_threads();
__thread_results = __thread_results = static_cast<_Result*>
static_cast<_Result*>(::operator new(__num_threads (::operator new(__num_threads * sizeof(_Result)));
* sizeof(_Result)));
__constructed = new bool[__num_threads]; __constructed = new bool[__num_threads];
} }
_ThreadIndex __iam = omp_get_thread_num(); _ThreadIndex __iam = omp_get_thread_num();
// Neutral element. // Neutral element.
_Result* __reduct = _Result* __reduct = static_cast<_Result*>
static_cast<_Result*>(::operator new(sizeof(_Result))); (::operator new(sizeof(_Result)));
_DifferenceType _DifferenceType
__start = equally_split_point(__length, __num_threads, __iam), __start = equally_split_point(__length, __num_threads, __iam),
......
...@@ -149,9 +149,10 @@ namespace __gnu_parallel ...@@ -149,9 +149,10 @@ namespace __gnu_parallel
if (__iam == 0) if (__iam == 0)
{ {
*__result = *__begin; *__result = *__begin;
__parallel_partial_sum_basecase( __parallel_partial_sum_basecase(__begin + 1,
__begin + 1, __begin + __borders[1], __result + 1, __begin + __borders[1],
__bin_op, *__begin); __result + 1,
__bin_op, *__begin);
::new(&(__sums[__iam])) _ValueType(*(__result + __borders[1] - 1)); ::new(&(__sums[__iam])) _ValueType(*(__result + __borders[1] - 1));
} }
else else
...@@ -168,7 +169,7 @@ namespace __gnu_parallel ...@@ -168,7 +169,7 @@ namespace __gnu_parallel
# pragma omp single # pragma omp single
__parallel_partial_sum_basecase(__sums + 1, __sums + __num_threads, __parallel_partial_sum_basecase(__sums + 1, __sums + __num_threads,
__sums + 1, __bin_op, __sums[0]); __sums + 1, __bin_op, __sums[0]);
# pragma omp barrier # pragma omp barrier
......
...@@ -44,387 +44,391 @@ ...@@ -44,387 +44,391 @@
namespace __gnu_parallel namespace __gnu_parallel
{ {
/** @brief Parallel implementation of std::partition. /** @brief Parallel implementation of std::partition.
* @param __begin Begin iterator of input sequence to split. * @param __begin Begin iterator of input sequence to split.
* @param __end End iterator of input sequence to split. * @param __end End iterator of input sequence to split.
* @param __pred Partition predicate, possibly including some kind of pivot. * @param __pred Partition predicate, possibly including some kind
* @param __num_threads Maximum number of threads to use for this task. * of pivot.
* @return Number of elements not fulfilling the predicate. */ * @param __num_threads Maximum number of threads to use for this task.
template<typename _RAIter, typename _Predicate> * @return Number of elements not fulfilling the predicate. */
typename std::iterator_traits<_RAIter>::difference_type template<typename _RAIter, typename _Predicate>
__parallel_partition(_RAIter __begin, _RAIter __end, typename std::iterator_traits<_RAIter>::difference_type
_Predicate __pred, _ThreadIndex __num_threads) __parallel_partition(_RAIter __begin, _RAIter __end,
{ _Predicate __pred, _ThreadIndex __num_threads)
typedef std::iterator_traits<_RAIter> _TraitsType; {
typedef typename _TraitsType::value_type _ValueType; typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::difference_type _DifferenceType; typedef typename _TraitsType::value_type _ValueType;
typedef typename _TraitsType::difference_type _DifferenceType;
_DifferenceType __n = __end - __begin;
_DifferenceType __n = __end - __begin;
_GLIBCXX_CALL(__n)
_GLIBCXX_CALL(__n)
const _Settings& __s = _Settings::get();
const _Settings& __s = _Settings::get();
// Shared.
_GLIBCXX_VOLATILE _DifferenceType __left = 0, __right = __n - 1; // Shared.
_GLIBCXX_VOLATILE _DifferenceType __leftover_left, __leftover_right; _GLIBCXX_VOLATILE _DifferenceType __left = 0, __right = __n - 1;
_GLIBCXX_VOLATILE _DifferenceType __leftnew, __rightnew; _GLIBCXX_VOLATILE _DifferenceType __leftover_left, __leftover_right;
_GLIBCXX_VOLATILE _DifferenceType __leftnew, __rightnew;
bool* __reserved_left = NULL, * __reserved_right = NULL;
bool* __reserved_left = NULL, * __reserved_right = NULL;
_DifferenceType __chunk_size;
_DifferenceType __chunk_size;
omp_lock_t __result_lock;
omp_init_lock(&__result_lock); omp_lock_t __result_lock;
omp_init_lock(&__result_lock);
//at least two chunks per thread
if(__right - __left + 1 >= 2 * __num_threads * __chunk_size) //at least two chunks per thread
# pragma omp parallel num_threads(__num_threads) if (__right - __left + 1 >= 2 * __num_threads * __chunk_size)
{ # pragma omp parallel num_threads(__num_threads)
# pragma omp single {
{ # pragma omp single
__num_threads = omp_get_num_threads(); {
__reserved_left = new bool[__num_threads]; __num_threads = omp_get_num_threads();
__reserved_right = new bool[__num_threads]; __reserved_left = new bool[__num_threads];
__reserved_right = new bool[__num_threads];
if (__s.partition_chunk_share > 0.0)
__chunk_size = std::max<_DifferenceType>( if (__s.partition_chunk_share > 0.0)
__s.partition_chunk_size, __chunk_size = std::max<_DifferenceType>
(double)__n * __s.partition_chunk_share / (__s.partition_chunk_size, (double)__n
(double)__num_threads); * __s.partition_chunk_share / (double)__num_threads);
else else
__chunk_size = __s.partition_chunk_size; __chunk_size = __s.partition_chunk_size;
} }
while (__right - __left + 1 >= 2 * __num_threads * __chunk_size) while (__right - __left + 1 >= 2 * __num_threads * __chunk_size)
{ {
# pragma omp single # pragma omp single
{ {
_DifferenceType __num_chunks _DifferenceType __num_chunks = ((__right - __left + 1)
= (__right - __left + 1) / __chunk_size; / __chunk_size);
for (int __r = 0; __r < __num_threads; ++__r) for (int __r = 0; __r < __num_threads; ++__r)
{ {
__reserved_left[__r] = false; __reserved_left[__r] = false;
__reserved_right[__r] = false; __reserved_right[__r] = false;
} }
__leftover_left = 0; __leftover_left = 0;
__leftover_right = 0; __leftover_right = 0;
} //implicit barrier } //implicit barrier
// Private. // Private.
_DifferenceType __thread_left, __thread_left_border, _DifferenceType __thread_left, __thread_left_border,
__thread_right, __thread_right_border; __thread_right, __thread_right_border;
__thread_left = __left + 1; __thread_left = __left + 1;
// Just to satisfy the condition below. // Just to satisfy the condition below.
__thread_left_border = __thread_left - 1; __thread_left_border = __thread_left - 1;
__thread_right = __n - 1; __thread_right = __n - 1;
__thread_right_border = __thread_right + 1; __thread_right_border = __thread_right + 1;
bool __iam_finished = false; bool __iam_finished = false;
while (!__iam_finished) while (!__iam_finished)
{ {
if (__thread_left > __thread_left_border) if (__thread_left > __thread_left_border)
{ {
omp_set_lock(&__result_lock); omp_set_lock(&__result_lock);
if (__left + (__chunk_size - 1) > __right) if (__left + (__chunk_size - 1) > __right)
__iam_finished = true; __iam_finished = true;
else else
{ {
__thread_left = __left; __thread_left = __left;
__thread_left_border = __left + (__chunk_size - 1); __thread_left_border = __left + (__chunk_size - 1);
__left += __chunk_size; __left += __chunk_size;
} }
omp_unset_lock(&__result_lock); omp_unset_lock(&__result_lock);
} }
if (__thread_right < __thread_right_border) if (__thread_right < __thread_right_border)
{ {
omp_set_lock(&__result_lock); omp_set_lock(&__result_lock);
if (__left > __right - (__chunk_size - 1)) if (__left > __right - (__chunk_size - 1))
__iam_finished = true; __iam_finished = true;
else else
{ {
__thread_right = __right; __thread_right = __right;
__thread_right_border = __right - (__chunk_size - 1); __thread_right_border = __right - (__chunk_size - 1);
__right -= __chunk_size; __right -= __chunk_size;
} }
omp_unset_lock(&__result_lock); omp_unset_lock(&__result_lock);
} }
if (__iam_finished) if (__iam_finished)
break; break;
// Swap as usual. // Swap as usual.
while (__thread_left < __thread_right) while (__thread_left < __thread_right)
{ {
while (__pred(__begin[__thread_left]) while (__pred(__begin[__thread_left])
&& __thread_left <= __thread_left_border) && __thread_left <= __thread_left_border)
++__thread_left; ++__thread_left;
while (!__pred(__begin[__thread_right]) while (!__pred(__begin[__thread_right])
&& __thread_right >= __thread_right_border) && __thread_right >= __thread_right_border)
--__thread_right; --__thread_right;
if (__thread_left > __thread_left_border if (__thread_left > __thread_left_border
|| __thread_right < __thread_right_border) || __thread_right < __thread_right_border)
// Fetch new chunk(__s). // Fetch new chunk(__s).
break; break;
std::swap(__begin[__thread_left], __begin[__thread_right]); std::swap(__begin[__thread_left],
++__thread_left; __begin[__thread_right]);
--__thread_right; ++__thread_left;
} --__thread_right;
} }
}
// Now swap the leftover chunks to the right places.
if (__thread_left <= __thread_left_border) // Now swap the leftover chunks to the right places.
# pragma omp atomic if (__thread_left <= __thread_left_border)
++__leftover_left; # pragma omp atomic
if (__thread_right >= __thread_right_border) ++__leftover_left;
# pragma omp atomic if (__thread_right >= __thread_right_border)
++__leftover_right; # pragma omp atomic
++__leftover_right;
# pragma omp barrier
# pragma omp barrier
# pragma omp single
{ # pragma omp single
__leftnew = __left - __leftover_left * __chunk_size; {
__rightnew = __right + __leftover_right * __chunk_size; __leftnew = __left - __leftover_left * __chunk_size;
} __rightnew = __right + __leftover_right * __chunk_size;
}
# pragma omp barrier
# pragma omp barrier
// <=> __thread_left_border + (__chunk_size - 1) >= __leftnew
if (__thread_left <= __thread_left_border // <=> __thread_left_border + (__chunk_size - 1) >= __leftnew
&& __thread_left_border >= __leftnew) if (__thread_left <= __thread_left_border
{ && __thread_left_border >= __leftnew)
// Chunk already in place, reserve spot. {
__reserved_left // Chunk already in place, reserve spot.
[(__left - (__thread_left_border + 1)) / __chunk_size] __reserved_left[(__left - (__thread_left_border + 1))
= true; / __chunk_size] = true;
} }
// <=> __thread_right_border - (__chunk_size - 1) <= __rightnew // <=> __thread_right_border - (__chunk_size - 1) <= __rightnew
if (__thread_right >= __thread_right_border if (__thread_right >= __thread_right_border
&& __thread_right_border <= __rightnew) && __thread_right_border <= __rightnew)
{ {
// Chunk already in place, reserve spot. // Chunk already in place, reserve spot.
__reserved_right[((__thread_right_border - 1) - __right) __reserved_right[((__thread_right_border - 1) - __right)
/ __chunk_size] = true; / __chunk_size] = true;
} }
# pragma omp barrier # pragma omp barrier
if (__thread_left <= __thread_left_border if (__thread_left <= __thread_left_border
&& __thread_left_border < __leftnew) && __thread_left_border < __leftnew)
{ {
// Find spot and swap. // Find spot and swap.
_DifferenceType __swapstart = -1; _DifferenceType __swapstart = -1;
omp_set_lock(&__result_lock); omp_set_lock(&__result_lock);
for (int __r = 0; __r < __leftover_left; ++__r) for (int __r = 0; __r < __leftover_left; ++__r)
if (!__reserved_left[__r]) if (!__reserved_left[__r])
{ {
__reserved_left[__r] = true; __reserved_left[__r] = true;
__swapstart = __left - (__r + 1) * __chunk_size; __swapstart = __left - (__r + 1) * __chunk_size;
break; break;
} }
omp_unset_lock(&__result_lock); omp_unset_lock(&__result_lock);
#if _GLIBCXX_ASSERTIONS #if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(__swapstart != -1); _GLIBCXX_PARALLEL_ASSERT(__swapstart != -1);
#endif #endif
std::swap_ranges(__begin + __thread_left_border std::swap_ranges(__begin + __thread_left_border
- (__chunk_size - 1), - (__chunk_size - 1),
__begin + __thread_left_border + 1, __begin + __thread_left_border + 1,
__begin + __swapstart); __begin + __swapstart);
} }
if (__thread_right >= __thread_right_border if (__thread_right >= __thread_right_border
&& __thread_right_border > __rightnew) && __thread_right_border > __rightnew)
{ {
// Find spot and swap // Find spot and swap
_DifferenceType __swapstart = -1; _DifferenceType __swapstart = -1;
omp_set_lock(&__result_lock); omp_set_lock(&__result_lock);
for (int __r = 0; __r < __leftover_right; ++__r) for (int __r = 0; __r < __leftover_right; ++__r)
if (!__reserved_right[__r]) if (!__reserved_right[__r])
{ {
__reserved_right[__r] = true; __reserved_right[__r] = true;
__swapstart = __right + __r * __chunk_size + 1; __swapstart = __right + __r * __chunk_size + 1;
break; break;
} }
omp_unset_lock(&__result_lock); omp_unset_lock(&__result_lock);
#if _GLIBCXX_ASSERTIONS #if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(__swapstart != -1); _GLIBCXX_PARALLEL_ASSERT(__swapstart != -1);
#endif #endif
std::swap_ranges( std::swap_ranges(__begin + __thread_right_border,
__begin + __thread_right_border, __begin + __thread_right_border
__begin + __thread_right_border + __chunk_size, + __chunk_size, __begin + __swapstart);
__begin + __swapstart); }
}
#if _GLIBCXX_ASSERTIONS #if _GLIBCXX_ASSERTIONS
# pragma omp barrier # pragma omp barrier
# pragma omp single # pragma omp single
{ {
for (int __r = 0; __r < __leftover_left; ++__r) for (int __r = 0; __r < __leftover_left; ++__r)
_GLIBCXX_PARALLEL_ASSERT(__reserved_left[__r]); _GLIBCXX_PARALLEL_ASSERT(__reserved_left[__r]);
for (int __r = 0; __r < __leftover_right; ++__r) for (int __r = 0; __r < __leftover_right; ++__r)
_GLIBCXX_PARALLEL_ASSERT(__reserved_right[__r]); _GLIBCXX_PARALLEL_ASSERT(__reserved_right[__r]);
} }
# pragma omp barrier # pragma omp barrier
#endif #endif
# pragma omp barrier # pragma omp barrier
__left = __leftnew; __left = __leftnew;
__right = __rightnew; __right = __rightnew;
} }
# pragma omp flush(__left, __right)
} // end "recursion" //parallel # pragma omp flush(__left, __right)
} // end "recursion" //parallel
_DifferenceType __final_left = __left, __final_right = __right;
_DifferenceType __final_left = __left, __final_right = __right;
while (__final_left < __final_right)
{ while (__final_left < __final_right)
// Go right until key is geq than pivot. {
while (__pred(__begin[__final_left]) && __final_left < __final_right) // Go right until key is geq than pivot.
++__final_left; while (__pred(__begin[__final_left])
&& __final_left < __final_right)
// Go left until key is less than pivot. ++__final_left;
while (!__pred(__begin[__final_right]) && __final_left < __final_right)
--__final_right; // Go left until key is less than pivot.
while (!__pred(__begin[__final_right])
if (__final_left == __final_right) && __final_left < __final_right)
break; --__final_right;
std::swap(__begin[__final_left], __begin[__final_right]);
++__final_left; if (__final_left == __final_right)
--__final_right; break;
} std::swap(__begin[__final_left], __begin[__final_right]);
++__final_left;
// All elements on the left side are < piv, all elements on the --__final_right;
// right are >= piv }
delete[] __reserved_left;
delete[] __reserved_right; // All elements on the left side are < piv, all elements on the
// right are >= piv
omp_destroy_lock(&__result_lock); delete[] __reserved_left;
delete[] __reserved_right;
// Element "between" __final_left and __final_right might not have
// been regarded yet omp_destroy_lock(&__result_lock);
if (__final_left < __n && !__pred(__begin[__final_left]))
// Really swapped. // Element "between" __final_left and __final_right might not have
return __final_left; // been regarded yet
else if (__final_left < __n && !__pred(__begin[__final_left]))
return __final_left + 1; // Really swapped.
} return __final_left;
else
/** return __final_left + 1;
* @brief Parallel implementation of std::nth_element(). }
/**
* @brief Parallel implementation of std::nth_element().
* @param __begin Begin iterator of input sequence.
* @param __nth _Iterator of element that must be in position afterwards.
* @param __end End iterator of input sequence.
* @param __comp Comparator.
*/
template<typename _RAIter, typename _Compare>
void
__parallel_nth_element(_RAIter __begin, _RAIter __nth,
_RAIter __end, _Compare __comp)
{
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
typedef typename _TraitsType::difference_type _DifferenceType;
_GLIBCXX_CALL(__end - __begin)
_RAIter __split;
_RandomNumber __rng;
_DifferenceType __minimum_length =
std::max<_DifferenceType>(2, _Settings::get().partition_minimal_n);
// Break if input range to small.
while (static_cast<_SequenceIndex>(__end - __begin) >= __minimum_length)
{
_DifferenceType __n = __end - __begin;
_RAIter __pivot_pos = __begin + __rng(__n);
// Swap __pivot_pos value to end.
if (__pivot_pos != (__end - 1))
std::swap(*__pivot_pos, *(__end - 1));
__pivot_pos = __end - 1;
// _Compare must have first_value_type, second_value_type,
// result_type
// _Compare ==
// __gnu_parallel::_Lexicographic<S, int,
// __gnu_parallel::_Less<S, S> >
// __pivot_pos == std::pair<S, int>*
__gnu_parallel::binder2nd<_Compare, _ValueType, _ValueType, bool>
__pred(__comp, *__pivot_pos);
// Divide, leave pivot unchanged in last place.
_RAIter __split_pos1, __split_pos2;
__split_pos1 = __begin + __parallel_partition(__begin, __end - 1,
__pred,
__get_max_threads());
// Left side: < __pivot_pos; __right side: >= __pivot_pos
// Swap pivot back to middle.
if (__split_pos1 != __pivot_pos)
std::swap(*__split_pos1, *__pivot_pos);
__pivot_pos = __split_pos1;
// In case all elements are equal, __split_pos1 == 0
if ((__split_pos1 + 1 - __begin) < (__n >> 7)
|| (__end - __split_pos1) < (__n >> 7))
{
// Very unequal split, one part smaller than one 128th
// elements not strictly larger than the pivot.
__gnu_parallel::__unary_negate<__gnu_parallel::
__binder1st<_Compare, _ValueType,
_ValueType, bool>, _ValueType>
__pred(__gnu_parallel::__binder1st<_Compare, _ValueType,
_ValueType, bool>(__comp, *__pivot_pos));
// Find other end of pivot-equal range.
__split_pos2 = __gnu_sequential::partition(__split_pos1 + 1,
__end, __pred);
}
else
// Only skip the pivot.
__split_pos2 = __split_pos1 + 1;
// Compare iterators.
if (__split_pos2 <= __nth)
__begin = __split_pos2;
else if (__nth < __split_pos1)
__end = __split_pos1;
else
break;
}
// Only at most _Settings::partition_minimal_n __elements __left.
__gnu_sequential::sort(__begin, __end, __comp);
}
/** @brief Parallel implementation of std::partial_sort().
* @param __begin Begin iterator of input sequence. * @param __begin Begin iterator of input sequence.
* @param __nth _Iterator of element that must be in position afterwards. * @param __middle Sort until this position.
* @param __end End iterator of input sequence. * @param __end End iterator of input sequence.
* @param __comp Comparator. * @param __comp Comparator. */
*/ template<typename _RAIter, typename _Compare>
template<typename _RAIter, typename _Compare> void
void __parallel_partial_sort(_RAIter __begin,
__parallel_nth_element(_RAIter __begin, _RAIter __nth, _RAIter __middle,
_RAIter __end, _Compare __comp) _RAIter __end, _Compare __comp)
{ {
typedef std::iterator_traits<_RAIter> _TraitsType; __parallel_nth_element(__begin, __middle, __end, __comp);
typedef typename _TraitsType::value_type _ValueType; std::sort(__begin, __middle, __comp);
typedef typename _TraitsType::difference_type _DifferenceType; }
_GLIBCXX_CALL(__end - __begin)
_RAIter __split;
_RandomNumber __rng;
_DifferenceType __minimum_length =
std::max<_DifferenceType>(2, _Settings::get().partition_minimal_n);
// Break if input range to small.
while (static_cast<_SequenceIndex>(__end - __begin) >= __minimum_length)
{
_DifferenceType __n = __end - __begin;
_RAIter __pivot_pos = __begin + __rng(__n);
// Swap __pivot_pos value to end.
if (__pivot_pos != (__end - 1))
std::swap(*__pivot_pos, *(__end - 1));
__pivot_pos = __end - 1;
// _Compare must have first_value_type, second_value_type,
// result_type
// _Compare ==
// __gnu_parallel::_Lexicographic<S, int, __gnu_parallel::_Less<S, S> >
// __pivot_pos == std::pair<S, int>*
__gnu_parallel::binder2nd<_Compare, _ValueType, _ValueType, bool>
__pred(__comp, *__pivot_pos);
// Divide, leave pivot unchanged in last place.
_RAIter __split_pos1, __split_pos2;
__split_pos1 = __begin
+ __parallel_partition(__begin, __end - 1, __pred,
__get_max_threads());
// Left side: < __pivot_pos; __right side: >= __pivot_pos
// Swap pivot back to middle.
if (__split_pos1 != __pivot_pos)
std::swap(*__split_pos1, *__pivot_pos);
__pivot_pos = __split_pos1;
// In case all elements are equal, __split_pos1 == 0
if ((__split_pos1 + 1 - __begin) < (__n >> 7)
|| (__end - __split_pos1) < (__n >> 7))
{
// Very unequal split, one part smaller than one 128th
// elements not strictly larger than the pivot.
__gnu_parallel::__unary_negate<__gnu_parallel::
__binder1st<_Compare, _ValueType, _ValueType, bool>, _ValueType>
__pred(__gnu_parallel::__binder1st<_Compare, _ValueType,
_ValueType, bool>(__comp, *__pivot_pos));
// Find other end of pivot-equal range.
__split_pos2 = __gnu_sequential::partition(__split_pos1 + 1,
__end, __pred);
}
else
// Only skip the pivot.
__split_pos2 = __split_pos1 + 1;
// Compare iterators.
if (__split_pos2 <= __nth)
__begin = __split_pos2;
else if (__nth < __split_pos1)
__end = __split_pos1;
else
break;
}
// Only at most _Settings::partition_minimal_n __elements __left.
__gnu_sequential::sort(__begin, __end, __comp);
}
/** @brief Parallel implementation of std::partial_sort().
* @param __begin Begin iterator of input sequence.
* @param __middle Sort until this position.
* @param __end End iterator of input sequence.
* @param __comp Comparator. */
template<typename _RAIter, typename _Compare>
void
__parallel_partial_sort(_RAIter __begin,
_RAIter __middle,
_RAIter __end, _Compare __comp)
{
__parallel_nth_element(__begin, __middle, __end, __comp);
std::sort(__begin, __middle, __comp);
}
} //namespace __gnu_parallel } //namespace __gnu_parallel
......
...@@ -65,10 +65,10 @@ namespace __gnu_parallel ...@@ -65,10 +65,10 @@ namespace __gnu_parallel
public: public:
/** @brief Constructor. Not to be called concurrent, of course. /** @brief Constructor. Not to be called concurrent, of course.
* @param _M_max_size Maximal number of elements to be contained. */ * @param _M_max_size Maximal number of elements to be contained. */
_RestrictedBoundedConcurrentQueue(_SequenceIndex _M_max_size) _RestrictedBoundedConcurrentQueue(_SequenceIndex __max_size)
{ {
this->_M_max_size = _M_max_size; _M_max_size = __max_size;
_M_base = new _Tp[_M_max_size]; _M_base = new _Tp[__max_size];
_M_borders = __encode2(0, 0); _M_borders = __encode2(0, 0);
#pragma omp flush #pragma omp flush
} }
...@@ -105,12 +105,12 @@ namespace __gnu_parallel ...@@ -105,12 +105,12 @@ namespace __gnu_parallel
while (__former_front > __former_back) while (__former_front > __former_back)
{ {
// Chance. // Chance.
_CASable _CASable __former_borders = __encode2(__former_front,
__former_borders = __encode2(__former_front, __former_back); __former_back);
_CASable _CASable __new_borders = __encode2(__former_front - 1,
__new_borders = __encode2(__former_front - 1, __former_back); __former_back);
if (__compare_and_swap( if (__compare_and_swap(&_M_borders, __former_borders,
&_M_borders, __former_borders, __new_borders)) __new_borders))
{ {
__t = *(_M_base + (__former_front - 1) % _M_max_size); __t = *(_M_base + (__former_front - 1) % _M_max_size);
return true; return true;
...@@ -132,12 +132,12 @@ namespace __gnu_parallel ...@@ -132,12 +132,12 @@ namespace __gnu_parallel
while (__former_front > __former_back) while (__former_front > __former_back)
{ {
// Chance. // Chance.
_CASable _CASable __former_borders = __encode2(__former_front,
__former_borders = __encode2(__former_front, __former_back); __former_back);
_CASable _CASable __new_borders = __encode2(__former_front,
__new_borders = __encode2(__former_front, __former_back + 1); __former_back + 1);
if (__compare_and_swap( if (__compare_and_swap(&_M_borders, __former_borders,
&_M_borders, __former_borders, __new_borders)) __new_borders))
{ {
__t = *(_M_base + __former_back % _M_max_size); __t = *(_M_base + __former_back % _M_max_size);
return true; return true;
......
...@@ -48,13 +48,12 @@ namespace __gnu_parallel ...@@ -48,13 +48,12 @@ namespace __gnu_parallel
*/ */
template<typename _RAIter, typename _Compare> template<typename _RAIter, typename _Compare>
typename std::iterator_traits<_RAIter>::difference_type typename std::iterator_traits<_RAIter>::difference_type
__parallel_sort_qs_divide(_RAIter __begin, __parallel_sort_qs_divide(_RAIter __begin, _RAIter __end,
_RAIter __end, _Compare __comp, typename std::iterator_traits
_Compare __comp, typename std::iterator_traits <_RAIter>::difference_type __pivot_rank,
<_RAIter>::difference_type __pivot_rank, typename std::iterator_traits
typename std::iterator_traits <_RAIter>::difference_type
<_RAIter>::difference_type __num_samples, _ThreadIndex __num_threads)
__num_samples, _ThreadIndex __num_threads)
{ {
typedef std::iterator_traits<_RAIter> _TraitsType; typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType; typedef typename _TraitsType::value_type _ValueType;
...@@ -64,25 +63,24 @@ namespace __gnu_parallel ...@@ -64,25 +63,24 @@ namespace __gnu_parallel
__num_samples = std::min(__num_samples, __n); __num_samples = std::min(__num_samples, __n);
// Allocate uninitialized, to avoid default constructor. // Allocate uninitialized, to avoid default constructor.
_ValueType* __samples = _ValueType* __samples = static_cast<_ValueType*>
static_cast<_ValueType*>(::operator new(__num_samples (::operator new(__num_samples * sizeof(_ValueType)));
* sizeof(_ValueType)));
for (_DifferenceType __s = 0; __s < __num_samples; ++__s) for (_DifferenceType __s = 0; __s < __num_samples; ++__s)
{ {
const unsigned long long __index const unsigned long long __index = static_cast<unsigned long long>
= static_cast<unsigned long long>(__s) * __n / __num_samples; (__s) * __n / __num_samples;
::new(&(__samples[__s])) _ValueType(__begin[__index]); ::new(&(__samples[__s])) _ValueType(__begin[__index]);
} }
__gnu_sequential::sort(__samples, __samples + __num_samples, __comp); __gnu_sequential::sort(__samples, __samples + __num_samples, __comp);
_ValueType& pivot = __samples[__pivot_rank * __num_samples / __n]; _ValueType& __pivot = __samples[__pivot_rank * __num_samples / __n];
__gnu_parallel::binder2nd<_Compare, _ValueType, _ValueType, bool> __gnu_parallel::binder2nd<_Compare, _ValueType, _ValueType, bool>
__pred(__comp, pivot); __pred(__comp, __pivot);
_DifferenceType __split = _DifferenceType __split = __parallel_partition(__begin, __end,
__parallel_partition(__begin, __end, __pred, __num_threads); __pred, __num_threads);
::operator delete(__samples); ::operator delete(__samples);
...@@ -98,10 +96,9 @@ namespace __gnu_parallel ...@@ -98,10 +96,9 @@ namespace __gnu_parallel
*/ */
template<typename _RAIter, typename _Compare> template<typename _RAIter, typename _Compare>
void void
__parallel_sort_qs_conquer(_RAIter __begin, __parallel_sort_qs_conquer(_RAIter __begin, _RAIter __end,
_RAIter __end, _Compare __comp,
_Compare __comp, _ThreadIndex __num_threads)
_ThreadIndex __num_threads)
{ {
typedef std::iterator_traits<_RAIter> _TraitsType; typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType; typedef typename _TraitsType::value_type _ValueType;
...@@ -127,24 +124,22 @@ namespace __gnu_parallel ...@@ -127,24 +124,22 @@ namespace __gnu_parallel
__pivot_rank = __n * __num_threads_left / __num_threads; __pivot_rank = __n * __num_threads_left / __num_threads;
_DifferenceType __split = _DifferenceType __split = __parallel_sort_qs_divide
__parallel_sort_qs_divide(__begin, __end, __comp, __pivot_rank, (__begin, __end, __comp, __pivot_rank,
_Settings::get().sort_qs_num_samples_preset, _Settings::get().sort_qs_num_samples_preset, __num_threads);
__num_threads);
#pragma omp parallel sections num_threads(2) #pragma omp parallel sections num_threads(2)
{ {
#pragma omp section #pragma omp section
__parallel_sort_qs_conquer(__begin, __begin + __split, __parallel_sort_qs_conquer(__begin, __begin + __split,
__comp, __num_threads_left); __comp, __num_threads_left);
#pragma omp section #pragma omp section
__parallel_sort_qs_conquer(__begin + __split, __end, __parallel_sort_qs_conquer(__begin + __split, __end,
__comp, __num_threads - __num_threads_left); __comp, __num_threads - __num_threads_left);
} }
} }
/** @brief Unbalanced quicksort main call. /** @brief Unbalanced quicksort main call.
* @param __begin Begin iterator of input sequence. * @param __begin Begin iterator of input sequence.
* @param __end End iterator input sequence, ignored. * @param __end End iterator input sequence, ignored.
...@@ -154,10 +149,9 @@ namespace __gnu_parallel ...@@ -154,10 +149,9 @@ namespace __gnu_parallel
*/ */
template<typename _RAIter, typename _Compare> template<typename _RAIter, typename _Compare>
void void
__parallel_sort_qs(_RAIter __begin, __parallel_sort_qs(_RAIter __begin, _RAIter __end,
_RAIter __end, _Compare __comp,
_Compare __comp, _ThreadIndex __num_threads)
_ThreadIndex __num_threads)
{ {
_GLIBCXX_CALL(__n) _GLIBCXX_CALL(__n)
......
...@@ -39,492 +39,484 @@ ...@@ -39,492 +39,484 @@
namespace __gnu_parallel namespace __gnu_parallel
{ {
/** @brief Type to hold the index of a bin. /** @brief Type to hold the index of a bin.
* *
* Since many variables of this type are allocated, it should be * Since many variables of this type are allocated, it should be
* chosen as small as possible. * chosen as small as possible.
*/ */
typedef unsigned short _BinIndex; typedef unsigned short _BinIndex;
/** @brief Data known to every thread participating in /** @brief Data known to every thread participating in
__gnu_parallel::__parallel_random_shuffle(). */ __gnu_parallel::__parallel_random_shuffle(). */
template<typename _RAIter> template<typename _RAIter>
struct _DRandomShufflingGlobalData struct _DRandomShufflingGlobalData
{ {
typedef std::iterator_traits<_RAIter> _TraitsType; typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType; typedef typename _TraitsType::value_type _ValueType;
typedef typename _TraitsType::difference_type _DifferenceType; typedef typename _TraitsType::difference_type _DifferenceType;
/** @brief Begin iterator of the __source. */
_RAIter& _M_source;
/** @brief Temporary arrays for each thread. */
_ValueType** _M_temporaries;
/** @brief Two-dimensional array to hold the thread-bin distribution.
*
* Dimensions (_M_num_threads + 1) __x (_M_num_bins + 1). */
_DifferenceType** _M_dist;
/** @brief Start indexes of the threads' __chunks. */
_DifferenceType* _M_starts;
/** @brief Number of the thread that will further process the
corresponding bin. */
_ThreadIndex* _M_bin_proc;
/** @brief Number of bins to distribute to. */
int _M_num_bins;
/** @brief Number of bits needed to address the bins. */
int _M_num_bits;
/** @brief Constructor. */
_DRandomShufflingGlobalData(_RAIter& __source)
: _M_source(__source) { }
};
/** @brief Local data for a thread participating in
__gnu_parallel::__parallel_random_shuffle().
*/
template<typename _RAIter, typename _RandomNumberGenerator>
struct _DRSSorterPU
{
/** @brief Number of threads participating in total. */
int _M_num_threads;
/** @brief Begin index for bins taken care of by this thread. */
_BinIndex _M_bins_begin;
/** @brief End index for bins taken care of by this thread. */
_BinIndex __bins_end;
/** @brief Random _M_seed for this thread. */
uint32_t _M_seed;
/** @brief Pointer to global data. */
_DRandomShufflingGlobalData<_RAIter>* _M_sd;
};
/** @brief Generate a random number in @__c [0,2^logp).
* @param logp Logarithm (basis 2) of the upper range __bound.
* @param __rng Random number generator to use.
*/
template<typename _RandomNumberGenerator>
inline int
__random_number_pow2(int logp, _RandomNumberGenerator& __rng)
{ return __rng.__genrand_bits(logp); }
/** @brief Random shuffle code executed by each thread.
* @param __pus Array of thread-local data records. */
template<typename _RAIter, typename _RandomNumberGenerator>
void
__parallel_random_shuffle_drs_pu(_DRSSorterPU<_RAIter,
_RandomNumberGenerator>* __pus)
{
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
typedef typename _TraitsType::difference_type _DifferenceType;
_ThreadIndex __iam = omp_get_thread_num();
_DRSSorterPU<_RAIter, _RandomNumberGenerator>* d = &__pus[__iam];
_DRandomShufflingGlobalData<_RAIter>* _M_sd = d->_M_sd;
// Indexing: _M_dist[bin][processor]
_DifferenceType __length = _M_sd->_M_starts[__iam + 1] -
_M_sd->_M_starts[__iam];
_BinIndex* __oracles = new _BinIndex[__length];
_DifferenceType* _M_dist = new _DifferenceType[_M_sd->_M_num_bins + 1];
_BinIndex* _M_bin_proc = new _BinIndex[_M_sd->_M_num_bins];
_ValueType** _M_temporaries = new _ValueType*[d->_M_num_threads];
// Compute oracles and count appearances.
for (_BinIndex __b = 0; __b < _M_sd->_M_num_bins + 1; ++__b)
_M_dist[__b] = 0;
int _M_num_bits = _M_sd->_M_num_bits;
_RandomNumber __rng(d->_M_seed);
// First main loop.
for (_DifferenceType __i = 0; __i < __length; ++__i)
{
_BinIndex __oracle = __random_number_pow2(_M_num_bits, __rng);
__oracles[__i] = __oracle;
// To allow prefix (partial) sum. /** @brief Begin iterator of the __source. */
++(_M_dist[__oracle + 1]); _RAIter& _M_source;
}
for (_BinIndex __b = 0; __b < _M_sd->_M_num_bins + 1; ++__b) /** @brief Temporary arrays for each thread. */
_M_sd->_M_dist[__b][__iam + 1] = _M_dist[__b]; _ValueType** _M_temporaries;
# pragma omp barrier /** @brief Two-dimensional array to hold the thread-bin distribution.
*
* Dimensions (_M_num_threads + 1) __x (_M_num_bins + 1). */
_DifferenceType** _M_dist;
# pragma omp single /** @brief Start indexes of the threads' __chunks. */
{ _DifferenceType* _M_starts;
// Sum up bins, _M_sd->_M_dist[__s + 1][d->_M_num_threads] now contains
// the total number of items in bin __s
for (_BinIndex __s = 0; __s < _M_sd->_M_num_bins; ++__s)
__gnu_sequential::partial_sum(
_M_sd->_M_dist[__s + 1],
_M_sd->_M_dist[__s + 1] + d->_M_num_threads + 1,
_M_sd->_M_dist[__s + 1]);
}
# pragma omp barrier /** @brief Number of the thread that will further process the
corresponding bin. */
_ThreadIndex* _M_bin_proc;
_SequenceIndex __offset = 0, __global_offset = 0; /** @brief Number of bins to distribute to. */
for (_BinIndex __s = 0; __s < d->_M_bins_begin; ++__s) int _M_num_bins;
__global_offset += _M_sd->_M_dist[__s + 1][d->_M_num_threads];
# pragma omp barrier /** @brief Number of bits needed to address the bins. */
int _M_num_bits;
for (_BinIndex __s = d->_M_bins_begin; __s < d->__bins_end; ++__s) /** @brief Constructor. */
{ _DRandomShufflingGlobalData(_RAIter& __source)
for (int __t = 0; __t < d->_M_num_threads + 1; ++__t) : _M_source(__source) { }
_M_sd->_M_dist[__s + 1][__t] += __offset; };
__offset = _M_sd->_M_dist[__s + 1][d->_M_num_threads];
} /** @brief Local data for a thread participating in
__gnu_parallel::__parallel_random_shuffle().
*/
template<typename _RAIter, typename _RandomNumberGenerator>
struct _DRSSorterPU
{
/** @brief Number of threads participating in total. */
int _M_num_threads;
/** @brief Begin index for bins taken care of by this thread. */
_BinIndex _M_bins_begin;
/** @brief End index for bins taken care of by this thread. */
_BinIndex __bins_end;
/** @brief Random _M_seed for this thread. */
uint32_t _M_seed;
/** @brief Pointer to global data. */
_DRandomShufflingGlobalData<_RAIter>* _M_sd;
};
/** @brief Generate a random number in @__c [0,2^__logp).
* @param __logp Logarithm (basis 2) of the upper range __bound.
* @param __rng Random number generator to use.
*/
template<typename _RandomNumberGenerator>
inline int
__random_number_pow2(int __logp, _RandomNumberGenerator& __rng)
{ return __rng.__genrand_bits(__logp); }
/** @brief Random shuffle code executed by each thread.
* @param __pus Array of thread-local data records. */
template<typename _RAIter, typename _RandomNumberGenerator>
void
__parallel_random_shuffle_drs_pu(_DRSSorterPU<_RAIter,
_RandomNumberGenerator>* __pus)
{
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
typedef typename _TraitsType::difference_type _DifferenceType;
_M_sd->_M_temporaries[__iam] = static_cast<_ValueType*>( _ThreadIndex __iam = omp_get_thread_num();
::operator new(sizeof(_ValueType) * __offset)); _DRSSorterPU<_RAIter, _RandomNumberGenerator>* __d = &__pus[__iam];
_DRandomShufflingGlobalData<_RAIter>* __sd = __d->_M_sd;
# pragma omp barrier // Indexing: _M_dist[bin][processor]
_DifferenceType __length = (__sd->_M_starts[__iam + 1]
- __sd->_M_starts[__iam]);
_BinIndex* __oracles = new _BinIndex[__length];
_DifferenceType* __dist = new _DifferenceType[__sd->_M_num_bins + 1];
_BinIndex* __bin_proc = new _BinIndex[__sd->_M_num_bins];
_ValueType** __temporaries = new _ValueType*[__d->_M_num_threads];
// Draw local copies to avoid false sharing. // Compute oracles and count appearances.
for (_BinIndex __b = 0; __b < _M_sd->_M_num_bins + 1; ++__b) for (_BinIndex __b = 0; __b < __sd->_M_num_bins + 1; ++__b)
_M_dist[__b] = _M_sd->_M_dist[__b][__iam]; __dist[__b] = 0;
for (_BinIndex __b = 0; __b < _M_sd->_M_num_bins; ++__b) int __num_bits = __sd->_M_num_bits;
_M_bin_proc[__b] = _M_sd->_M_bin_proc[__b];
for (_ThreadIndex __t = 0; __t < d->_M_num_threads; ++__t)
_M_temporaries[__t] = _M_sd->_M_temporaries[__t];
_RAIter _M_source = _M_sd->_M_source; _RandomNumber __rng(__d->_M_seed);
_DifferenceType __start = _M_sd->_M_starts[__iam];
// Distribute according to oracles, second main loop. // First main loop.
for (_DifferenceType __i = 0; __i < __length; ++__i) for (_DifferenceType __i = 0; __i < __length; ++__i)
{ {
_BinIndex target_bin = __oracles[__i]; _BinIndex __oracle = __random_number_pow2(__num_bits, __rng);
_ThreadIndex target_p = _M_bin_proc[target_bin]; __oracles[__i] = __oracle;
// Last column [d->_M_num_threads] stays unchanged. // To allow prefix (partial) sum.
::new(&(_M_temporaries[target_p][_M_dist[target_bin + 1]++])) ++(__dist[__oracle + 1]);
_ValueType(*(_M_source + __i + __start)); }
}
delete[] __oracles; for (_BinIndex __b = 0; __b < __sd->_M_num_bins + 1; ++__b)
delete[] _M_dist; __sd->_M_dist[__b][__iam + 1] = __dist[__b];
delete[] _M_bin_proc;
delete[] _M_temporaries;
# pragma omp barrier # pragma omp barrier
// Shuffle bins internally. # pragma omp single
for (_BinIndex __b = d->_M_bins_begin; __b < d->__bins_end; ++__b)
{ {
_ValueType* __begin = // Sum up bins, __sd->_M_dist[__s + 1][__d->_M_num_threads] now
_M_sd->_M_temporaries[__iam] + // contains the total number of items in bin __s
((__b == d->_M_bins_begin) for (_BinIndex __s = 0; __s < __sd->_M_num_bins; ++__s)
? 0 : _M_sd->_M_dist[__b][d->_M_num_threads]), __gnu_sequential::partial_sum(__sd->_M_dist[__s + 1],
* __end = __sd->_M_dist[__s + 1]
_M_sd->_M_temporaries[__iam] + + __d->_M_num_threads + 1,
_M_sd->_M_dist[__b + 1][d->_M_num_threads]; __sd->_M_dist[__s + 1]);
__sequential_random_shuffle(__begin, __end, __rng);
std::copy(__begin, __end, _M_sd->_M_source + __global_offset +
((__b == d->_M_bins_begin)
? 0 : _M_sd->_M_dist[__b][d->_M_num_threads]));
} }
::operator delete(_M_sd->_M_temporaries[__iam]); # pragma omp barrier
}
_SequenceIndex __offset = 0, __global_offset = 0;
/** @brief Round up to the next greater power of 2. for (_BinIndex __s = 0; __s < __d->_M_bins_begin; ++__s)
* @param __x _Integer to round up */ __global_offset += __sd->_M_dist[__s + 1][__d->_M_num_threads];
template<typename _Tp>
_Tp # pragma omp barrier
__round_up_to_pow2(_Tp __x)
{ for (_BinIndex __s = __d->_M_bins_begin; __s < __d->__bins_end; ++__s)
if (__x <= 1) {
return 1; for (int __t = 0; __t < __d->_M_num_threads + 1; ++__t)
else __sd->_M_dist[__s + 1][__t] += __offset;
return (_Tp)1 << (__rd_log2(__x - 1) + 1); __offset = __sd->_M_dist[__s + 1][__d->_M_num_threads];
} }
/** @brief Main parallel random shuffle step. __sd->_M_temporaries[__iam] = static_cast<_ValueType*>
* @param __begin Begin iterator of sequence. (::operator new(sizeof(_ValueType) * __offset));
* @param __end End iterator of sequence.
* @param __n Length of sequence. # pragma omp barrier
* @param __num_threads Number of threads to use.
* @param __rng Random number generator to use. // Draw local copies to avoid false sharing.
*/ for (_BinIndex __b = 0; __b < __sd->_M_num_bins + 1; ++__b)
template<typename _RAIter, typename _RandomNumberGenerator> __dist[__b] = __sd->_M_dist[__b][__iam];
void for (_BinIndex __b = 0; __b < __sd->_M_num_bins; ++__b)
__parallel_random_shuffle_drs(_RAIter __begin, __bin_proc[__b] = __sd->_M_bin_proc[__b];
_RAIter __end, for (_ThreadIndex __t = 0; __t < __d->_M_num_threads; ++__t)
typename std::iterator_traits __temporaries[__t] = __sd->_M_temporaries[__t];
<_RAIter>::difference_type __n,
_ThreadIndex __num_threads, _RAIter __source = __sd->_M_source;
_RandomNumberGenerator& __rng) _DifferenceType __start = __sd->_M_starts[__iam];
{
typedef std::iterator_traits<_RAIter> _TraitsType; // Distribute according to oracles, second main loop.
typedef typename _TraitsType::value_type _ValueType; for (_DifferenceType __i = 0; __i < __length; ++__i)
typedef typename _TraitsType::difference_type _DifferenceType; {
_BinIndex __target_bin = __oracles[__i];
_GLIBCXX_CALL(__n) _ThreadIndex __target_p = __bin_proc[__target_bin];
const _Settings& __s = _Settings::get(); // Last column [__d->_M_num_threads] stays unchanged.
::new(&(__temporaries[__target_p][__dist[__target_bin + 1]++]))
if (__num_threads > __n) _ValueType(*(__source + __i + __start));
__num_threads = static_cast<_ThreadIndex>(__n); }
_BinIndex _M_num_bins, __num_bins_cache; delete[] __oracles;
delete[] __dist;
delete[] __bin_proc;
delete[] __temporaries;
# pragma omp barrier
// Shuffle bins internally.
for (_BinIndex __b = __d->_M_bins_begin; __b < __d->__bins_end; ++__b)
{
_ValueType* __begin =
(__sd->_M_temporaries[__iam]
+ (__b == __d->_M_bins_begin
? 0 : __sd->_M_dist[__b][__d->_M_num_threads])),
* __end = (__sd->_M_temporaries[__iam]
+ __sd->_M_dist[__b + 1][__d->_M_num_threads]);
__sequential_random_shuffle(__begin, __end, __rng);
std::copy(__begin, __end, __sd->_M_source + __global_offset
+ (__b == __d->_M_bins_begin
? 0 : __sd->_M_dist[__b][__d->_M_num_threads]));
}
::operator delete(__sd->_M_temporaries[__iam]);
}
/** @brief Round up to the next greater power of 2.
* @param __x _Integer to round up */
template<typename _Tp>
_Tp
__round_up_to_pow2(_Tp __x)
{
if (__x <= 1)
return 1;
else
return (_Tp)1 << (__rd_log2(__x - 1) + 1);
}
/** @brief Main parallel random shuffle step.
* @param __begin Begin iterator of sequence.
* @param __end End iterator of sequence.
* @param __n Length of sequence.
* @param __num_threads Number of threads to use.
* @param __rng Random number generator to use.
*/
template<typename _RAIter, typename _RandomNumberGenerator>
void
__parallel_random_shuffle_drs(_RAIter __begin, _RAIter __end,
typename std::iterator_traits
<_RAIter>::difference_type __n,
_ThreadIndex __num_threads,
_RandomNumberGenerator& __rng)
{
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
typedef typename _TraitsType::difference_type _DifferenceType;
_GLIBCXX_CALL(__n)
const _Settings& __s = _Settings::get();
if (__num_threads > __n)
__num_threads = static_cast<_ThreadIndex>(__n);
_BinIndex __num_bins, __num_bins_cache;
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
// Try the L1 cache first. // Try the L1 cache first.
// Must fit into L1. // Must fit into L1.
__num_bins_cache = std::max<_DifferenceType>( __num_bins_cache =
1, __n / (__s.L1_cache_size_lb / sizeof(_ValueType))); std::max<_DifferenceType>(1, __n / (__s.L1_cache_size_lb
__num_bins_cache = __round_up_to_pow2(__num_bins_cache); / sizeof(_ValueType)));
__num_bins_cache = __round_up_to_pow2(__num_bins_cache);
// No more buckets than TLB entries, power of 2 // No more buckets than TLB entries, power of 2
// Power of 2 and at least one element per bin, at most the TLB size. // Power of 2 and at least one element per bin, at most the TLB size.
_M_num_bins = std::min<_DifferenceType>(__n, __num_bins_cache); __num_bins = std::min<_DifferenceType>(__n, __num_bins_cache);
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
// 2 TLB entries needed per bin. // 2 TLB entries needed per bin.
_M_num_bins = std::min<_DifferenceType>(__s.TLB_size / 2, _M_num_bins); __num_bins = std::min<_DifferenceType>(__s.TLB_size / 2, __num_bins);
#endif #endif
_M_num_bins = __round_up_to_pow2(_M_num_bins); __num_bins = __round_up_to_pow2(__num_bins);
if (_M_num_bins < __num_bins_cache) if (__num_bins < __num_bins_cache)
{ {
#endif #endif
// Now try the L2 cache // Now try the L2 cache
// Must fit into L2 // Must fit into L2
__num_bins_cache = static_cast<_BinIndex>(std::max<_DifferenceType>( __num_bins_cache = static_cast<_BinIndex>
1, __n / (__s.L2_cache_size / sizeof(_ValueType)))); (std::max<_DifferenceType>(1, __n / (__s.L2_cache_size
__num_bins_cache = __round_up_to_pow2(__num_bins_cache); / sizeof(_ValueType))));
__num_bins_cache = __round_up_to_pow2(__num_bins_cache);
// No more buckets than TLB entries, power of 2.
_M_num_bins = static_cast<_BinIndex>( // No more buckets than TLB entries, power of 2.
std::min(__n, static_cast<_DifferenceType>(__num_bins_cache))); __num_bins = static_cast<_BinIndex>
// Power of 2 and at least one element per bin, at most the TLB size. (std::min(__n, static_cast<_DifferenceType>(__num_bins_cache)));
// Power of 2 and at least one element per bin, at most the TLB size.
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
// 2 TLB entries needed per bin. // 2 TLB entries needed per bin.
_M_num_bins = std::min( __num_bins = std::min(static_cast<_DifferenceType>(__s.TLB_size / 2),
static_cast<_DifferenceType>(__s.TLB_size / 2), _M_num_bins); __num_bins);
#endif #endif
_M_num_bins = __round_up_to_pow2(_M_num_bins); __num_bins = __round_up_to_pow2(__num_bins);
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
} }
#endif #endif
__num_threads = std::min<_BinIndex>(__num_threads, _M_num_bins); __num_threads = std::min<_BinIndex>(__num_threads, __num_bins);
if (__num_threads <= 1) if (__num_threads <= 1)
return __sequential_random_shuffle(__begin, __end, __rng); return __sequential_random_shuffle(__begin, __end, __rng);
_DRandomShufflingGlobalData<_RAIter> _M_sd(__begin); _DRandomShufflingGlobalData<_RAIter> __sd(__begin);
_DRSSorterPU<_RAIter, _RandomNumber >* __pus; _DRSSorterPU<_RAIter, _RandomNumber >* __pus;
_DifferenceType* _M_starts; _DifferenceType* __starts;
# pragma omp parallel num_threads(__num_threads) # pragma omp parallel num_threads(__num_threads)
{ {
_ThreadIndex __num_threads = omp_get_num_threads(); _ThreadIndex __num_threads = omp_get_num_threads();
# pragma omp single # pragma omp single
{ {
__pus = new _DRSSorterPU<_RAIter, _RandomNumber> __pus = new _DRSSorterPU<_RAIter, _RandomNumber>[__num_threads];
[__num_threads];
__sd._M_temporaries = new _ValueType*[__num_threads];
_M_sd._M_temporaries = new _ValueType*[__num_threads]; __sd._M_dist = new _DifferenceType*[__num_bins + 1];
_M_sd._M_dist = new _DifferenceType*[_M_num_bins + 1]; __sd._M_bin_proc = new _ThreadIndex[__num_bins];
_M_sd._M_bin_proc = new _ThreadIndex[_M_num_bins]; for (_BinIndex __b = 0; __b < __num_bins + 1; ++__b)
for (_BinIndex __b = 0; __b < _M_num_bins + 1; ++__b) __sd._M_dist[__b] = new _DifferenceType[__num_threads + 1];
_M_sd._M_dist[__b] = new _DifferenceType[__num_threads + 1]; for (_BinIndex __b = 0; __b < (__num_bins + 1); ++__b)
for (_BinIndex __b = 0; __b < (_M_num_bins + 1); ++__b) {
{ __sd._M_dist[0][0] = 0;
_M_sd._M_dist[0][0] = 0; __sd._M_dist[__b][0] = 0;
_M_sd._M_dist[__b][0] = 0; }
} __starts = __sd._M_starts = new _DifferenceType[__num_threads + 1];
_M_starts = _M_sd._M_starts int __bin_cursor = 0;
= new _DifferenceType[__num_threads + 1]; __sd._M_num_bins = __num_bins;
int bin_cursor = 0; __sd._M_num_bits = __rd_log2(__num_bins);
_M_sd._M_num_bins = _M_num_bins;
_M_sd._M_num_bits = __rd_log2(_M_num_bins); _DifferenceType __chunk_length = __n / __num_threads,
__split = __n % __num_threads,
_DifferenceType __chunk_length = __n / __num_threads, __start = 0;
__split = __n % __num_threads, __start = 0; _DifferenceType __bin_chunk_length = __num_bins / __num_threads,
_DifferenceType bin_chunk_length = _M_num_bins / __num_threads, __bin_split = __num_bins % __num_threads;
bin_split = _M_num_bins % __num_threads; for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) {
{ __starts[__i] = __start;
_M_starts[__i] = __start; __start += (__i < __split
__start += (__i < __split) ? (__chunk_length + 1) : __chunk_length);
? (__chunk_length + 1) : __chunk_length; int __j = __pus[__i]._M_bins_begin = __bin_cursor;
int __j = __pus[__i]._M_bins_begin = bin_cursor;
// Range of bins for this processor.
// Range of bins for this processor. __bin_cursor += (__i < __bin_split
bin_cursor += (__i < bin_split) ? ? (__bin_chunk_length + 1)
(bin_chunk_length + 1) : bin_chunk_length; : __bin_chunk_length);
__pus[__i].__bins_end = bin_cursor; __pus[__i].__bins_end = __bin_cursor;
for (; __j < bin_cursor; ++__j) for (; __j < __bin_cursor; ++__j)
_M_sd._M_bin_proc[__j] = __i; __sd._M_bin_proc[__j] = __i;
__pus[__i]._M_num_threads = __num_threads; __pus[__i]._M_num_threads = __num_threads;
__pus[__i]._M_seed = __pus[__i]._M_seed = __rng(std::numeric_limits<uint32_t>::max());
__rng(std::numeric_limits<uint32_t>::max()); __pus[__i]._M_sd = &__sd;
__pus[__i]._M_sd = &_M_sd; }
} __starts[__num_threads] = __start;
_M_starts[__num_threads] = __start; } //single
} //single // Now shuffle in parallel.
// Now shuffle in parallel. __parallel_random_shuffle_drs_pu(__pus);
__parallel_random_shuffle_drs_pu(__pus);
} // parallel } // parallel
delete[] _M_starts; delete[] __starts;
delete[] _M_sd._M_bin_proc; delete[] __sd._M_bin_proc;
for (int __s = 0; __s < (_M_num_bins + 1); ++__s) for (int __s = 0; __s < (__num_bins + 1); ++__s)
delete[] _M_sd._M_dist[__s]; delete[] __sd._M_dist[__s];
delete[] _M_sd._M_dist; delete[] __sd._M_dist;
delete[] _M_sd._M_temporaries; delete[] __sd._M_temporaries;
delete[] __pus; delete[] __pus;
} }
/** @brief Sequential cache-efficient random shuffle. /** @brief Sequential cache-efficient random shuffle.
* @param __begin Begin iterator of sequence. * @param __begin Begin iterator of sequence.
* @param __end End iterator of sequence. * @param __end End iterator of sequence.
* @param __rng Random number generator to use. * @param __rng Random number generator to use.
*/ */
template<typename _RAIter, typename _RandomNumberGenerator> template<typename _RAIter, typename _RandomNumberGenerator>
void void
__sequential_random_shuffle(_RAIter __begin, __sequential_random_shuffle(_RAIter __begin, _RAIter __end,
_RAIter __end, _RandomNumberGenerator& __rng)
_RandomNumberGenerator& __rng) {
{ typedef std::iterator_traits<_RAIter> _TraitsType;
typedef std::iterator_traits<_RAIter> _TraitsType; typedef typename _TraitsType::value_type _ValueType;
typedef typename _TraitsType::value_type _ValueType; typedef typename _TraitsType::difference_type _DifferenceType;
typedef typename _TraitsType::difference_type _DifferenceType;
_DifferenceType __n = __end - __begin; _DifferenceType __n = __end - __begin;
const _Settings& __s = _Settings::get(); const _Settings& __s = _Settings::get();
_BinIndex _M_num_bins, __num_bins_cache; _BinIndex __num_bins, __num_bins_cache;
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
// Try the L1 cache first, must fit into L1. // Try the L1 cache first, must fit into L1.
__num_bins_cache = __num_bins_cache = std::max<_DifferenceType>
std::max<_DifferenceType> (1, __n / (__s.L1_cache_size_lb / sizeof(_ValueType)));
(1, __n / (__s.L1_cache_size_lb / sizeof(_ValueType))); __num_bins_cache = __round_up_to_pow2(__num_bins_cache);
__num_bins_cache = __round_up_to_pow2(__num_bins_cache);
// No more buckets than TLB entries, power of 2
// No more buckets than TLB entries, power of 2 // Power of 2 and at least one element per bin, at most the TLB size
// Power of 2 and at least one element per bin, at most the TLB size __num_bins = std::min(__n, (_DifferenceType)__num_bins_cache);
_M_num_bins = std::min(__n, (_DifferenceType)__num_bins_cache);
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
// 2 TLB entries needed per bin // 2 TLB entries needed per bin
_M_num_bins = std::min((_DifferenceType)__s.TLB_size / 2, _M_num_bins); __num_bins = std::min((_DifferenceType)__s.TLB_size / 2, __num_bins);
#endif #endif
_M_num_bins = __round_up_to_pow2(_M_num_bins); __num_bins = __round_up_to_pow2(__num_bins);
if (_M_num_bins < __num_bins_cache) if (__num_bins < __num_bins_cache)
{ {
#endif #endif
// Now try the L2 cache, must fit into L2. // Now try the L2 cache, must fit into L2.
__num_bins_cache = __num_bins_cache = static_cast<_BinIndex>
static_cast<_BinIndex>(std::max<_DifferenceType>( (std::max<_DifferenceType>(1, __n / (__s.L2_cache_size
1, __n / (__s.L2_cache_size / sizeof(_ValueType)))); / sizeof(_ValueType))));
__num_bins_cache = __round_up_to_pow2(__num_bins_cache); __num_bins_cache = __round_up_to_pow2(__num_bins_cache);
// No more buckets than TLB entries, power of 2 // No more buckets than TLB entries, power of 2
// Power of 2 and at least one element per bin, at most the TLB size. // Power of 2 and at least one element per bin, at most the TLB size.
_M_num_bins = static_cast<_BinIndex> __num_bins = static_cast<_BinIndex>
(std::min(__n, static_cast<_DifferenceType>(__num_bins_cache))); (std::min(__n, static_cast<_DifferenceType>(__num_bins_cache)));
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
// 2 TLB entries needed per bin // 2 TLB entries needed per bin
_M_num_bins = __num_bins = std::min<_DifferenceType>(__s.TLB_size / 2, __num_bins);
std::min<_DifferenceType>(__s.TLB_size / 2, _M_num_bins);
#endif #endif
_M_num_bins = __round_up_to_pow2(_M_num_bins); __num_bins = __round_up_to_pow2(__num_bins);
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
} }
#endif #endif
int _M_num_bits = __rd_log2(_M_num_bins); int __num_bits = __rd_log2(__num_bins);
if (_M_num_bins > 1) if (__num_bins > 1)
{ {
_ValueType* __target = static_cast<_ValueType*>( _ValueType* __target =
::operator new(sizeof(_ValueType) * __n)); static_cast<_ValueType*>(::operator new(sizeof(_ValueType) * __n));
_BinIndex* __oracles = new _BinIndex[__n]; _BinIndex* __oracles = new _BinIndex[__n];
_DifferenceType* __dist0 = new _DifferenceType[_M_num_bins + 1], _DifferenceType* __dist0 = new _DifferenceType[__num_bins + 1],
* __dist1 = new _DifferenceType[_M_num_bins + 1]; * __dist1 = new _DifferenceType[__num_bins + 1];
for (int __b = 0; __b < _M_num_bins + 1; ++__b) for (int __b = 0; __b < __num_bins + 1; ++__b)
__dist0[__b] = 0; __dist0[__b] = 0;
_RandomNumber bitrng(__rng(0xFFFFFFFF)); _RandomNumber __bitrng(__rng(0xFFFFFFFF));
for (_DifferenceType __i = 0; __i < __n; ++__i)
{
_BinIndex __oracle = __random_number_pow2(_M_num_bits, bitrng);
__oracles[__i] = __oracle;
// To allow prefix (partial) sum.
++(__dist0[__oracle + 1]);
}
// Sum up bins.
__gnu_sequential::
partial_sum(__dist0, __dist0 + _M_num_bins + 1, __dist0);
for (int __b = 0; __b < _M_num_bins + 1; ++__b)
__dist1[__b] = __dist0[__b];
// Distribute according to oracles.
for (_DifferenceType __i = 0; __i < __n; ++__i)
::new(&(__target[(__dist0[__oracles[__i]])++]))
_ValueType(*(__begin + __i));
for (int __b = 0; __b < _M_num_bins; ++__b)
{
__sequential_random_shuffle(__target + __dist1[__b],
__target + __dist1[__b + 1],
__rng);
}
// Copy elements back.
std::copy(__target, __target + __n, __begin);
delete[] __dist0;
delete[] __dist1;
delete[] __oracles;
::operator delete(__target);
}
else
__gnu_sequential::random_shuffle(__begin, __end, __rng);
}
/** @brief Parallel random public call.
* @param __begin Begin iterator of sequence.
* @param __end End iterator of sequence.
* @param __rng Random number generator to use.
*/
template<typename _RAIter, typename _RandomNumberGenerator>
inline void
__parallel_random_shuffle(_RAIter __begin,
_RAIter __end,
_RandomNumberGenerator __rng = _RandomNumber())
{
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::difference_type _DifferenceType;
_DifferenceType __n = __end - __begin;
__parallel_random_shuffle_drs(
__begin, __end, __n, __get_max_threads(), __rng) ;
}
for (_DifferenceType __i = 0; __i < __n; ++__i)
{
_BinIndex __oracle = __random_number_pow2(__num_bits, __bitrng);
__oracles[__i] = __oracle;
// To allow prefix (partial) sum.
++(__dist0[__oracle + 1]);
}
// Sum up bins.
__gnu_sequential::partial_sum(__dist0, __dist0 + __num_bins + 1,
__dist0);
for (int __b = 0; __b < __num_bins + 1; ++__b)
__dist1[__b] = __dist0[__b];
// Distribute according to oracles.
for (_DifferenceType __i = 0; __i < __n; ++__i)
::new(&(__target[(__dist0[__oracles[__i]])++]))
_ValueType(*(__begin + __i));
for (int __b = 0; __b < __num_bins; ++__b)
__sequential_random_shuffle(__target + __dist1[__b],
__target + __dist1[__b + 1], __rng);
// Copy elements back.
std::copy(__target, __target + __n, __begin);
delete[] __dist0;
delete[] __dist1;
delete[] __oracles;
::operator delete(__target);
}
else
__gnu_sequential::random_shuffle(__begin, __end, __rng);
}
/** @brief Parallel random public call.
* @param __begin Begin iterator of sequence.
* @param __end End iterator of sequence.
* @param __rng Random number generator to use.
*/
template<typename _RAIter, typename _RandomNumberGenerator>
inline void
__parallel_random_shuffle(_RAIter __begin, _RAIter __end,
_RandomNumberGenerator __rng = _RandomNumber())
{
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::difference_type _DifferenceType;
_DifferenceType __n = __end - __begin;
__parallel_random_shuffle_drs(__begin, __end, __n,
__get_max_threads(), __rng);
}
} }
#endif /* _GLIBCXX_PARALLEL_RANDOM_SHUFFLE_H */ #endif /* _GLIBCXX_PARALLEL_RANDOM_SHUFFLE_H */
...@@ -38,7 +38,6 @@ ...@@ -38,7 +38,6 @@
#include <parallel/parallel.h> #include <parallel/parallel.h>
#include <parallel/equally_split.h> #include <parallel/equally_split.h>
namespace __gnu_parallel namespace __gnu_parallel
{ {
/** /**
...@@ -47,24 +46,24 @@ namespace __gnu_parallel ...@@ -47,24 +46,24 @@ namespace __gnu_parallel
* @param __length Length of sequence to search for. * @param __length Length of sequence to search for.
* @param __advances Returned __offsets. * @param __advances Returned __offsets.
*/ */
template<typename _RAIter, typename _DifferenceTp> template<typename _RAIter, typename _DifferenceTp>
void void
__calc_borders(_RAIter __elements, _DifferenceTp __length, __calc_borders(_RAIter __elements, _DifferenceTp __length,
_DifferenceTp* __off) _DifferenceTp* __off)
{ {
typedef _DifferenceTp _DifferenceType; typedef _DifferenceTp _DifferenceType;
__off[0] = -1; __off[0] = -1;
if (__length > 1) if (__length > 1)
__off[1] = 0; __off[1] = 0;
_DifferenceType __k = 0; _DifferenceType __k = 0;
for (_DifferenceType __j = 2; __j <= __length; __j++) for (_DifferenceType __j = 2; __j <= __length; __j++)
{ {
while ((__k >= 0) && !(__elements[__k] == __elements[__j-1])) while ((__k >= 0) && !(__elements[__k] == __elements[__j-1]))
__k = __off[__k]; __k = __off[__k];
__off[__j] = ++__k; __off[__j] = ++__k;
} }
} }
// Generic parallel find algorithm (requires random access iterator). // Generic parallel find algorithm (requires random access iterator).
...@@ -75,100 +74,99 @@ template<typename _RAIter, typename _DifferenceTp> ...@@ -75,100 +74,99 @@ template<typename _RAIter, typename _DifferenceTp>
* @param __end2 End iterator of second sequence. * @param __end2 End iterator of second sequence.
* @param __pred Find predicate. * @param __pred Find predicate.
* @return Place of finding in first sequences. */ * @return Place of finding in first sequences. */
template<typename __RAIter1, template<typename __RAIter1,
typename __RAIter2, typename __RAIter2,
typename _Pred> typename _Pred>
__RAIter1 __RAIter1
__search_template(__RAIter1 __begin1, __RAIter1 __end1, __search_template(__RAIter1 __begin1, __RAIter1 __end1,
__RAIter2 __begin2, __RAIter2 __end2, __RAIter2 __begin2, __RAIter2 __end2,
_Pred __pred) _Pred __pred)
{ {
typedef std::iterator_traits<__RAIter1> _TraitsType; typedef std::iterator_traits<__RAIter1> _TraitsType;
typedef typename _TraitsType::difference_type _DifferenceType; typedef typename _TraitsType::difference_type _DifferenceType;
_GLIBCXX_CALL((__end1 - __begin1) + (__end2 - __begin2)); _GLIBCXX_CALL((__end1 - __begin1) + (__end2 - __begin2));
_DifferenceType __pattern_length = __end2 - __begin2; _DifferenceType __pattern_length = __end2 - __begin2;
// Pattern too short. // Pattern too short.
if(__pattern_length <= 0) if(__pattern_length <= 0)
return __end1; return __end1;
// Last point to start search. // Last point to start search.
_DifferenceType __input_length = (__end1 - __begin1) - __pattern_length; _DifferenceType __input_length = (__end1 - __begin1) - __pattern_length;
// Where is first occurrence of pattern? defaults to end. // Where is first occurrence of pattern? defaults to end.
_DifferenceType __result = (__end1 - __begin1); _DifferenceType __result = (__end1 - __begin1);
_DifferenceType *__splitters; _DifferenceType *__splitters;
// Pattern too long. // Pattern too long.
if (__input_length < 0) if (__input_length < 0)
return __end1; return __end1;
omp_lock_t __result_lock; omp_lock_t __result_lock;
omp_init_lock(&__result_lock); omp_init_lock(&__result_lock);
_ThreadIndex __num_threads = _ThreadIndex __num_threads = std::max<_DifferenceType>
std::max<_DifferenceType>(1, (1, std::min<_DifferenceType>(__input_length,
std::min<_DifferenceType>(__input_length, __get_max_threads())); __get_max_threads()));
_DifferenceType __advances[__pattern_length]; _DifferenceType __advances[__pattern_length];
__calc_borders(__begin2, __pattern_length, __advances); __calc_borders(__begin2, __pattern_length, __advances);
# pragma omp parallel num_threads(__num_threads) # pragma omp parallel num_threads(__num_threads)
{ {
# pragma omp single # pragma omp single
{ {
__num_threads = omp_get_num_threads(); __num_threads = omp_get_num_threads();
__splitters = new _DifferenceType[__num_threads + 1]; __splitters = new _DifferenceType[__num_threads + 1];
equally_split(__input_length, __num_threads, __splitters); equally_split(__input_length, __num_threads, __splitters);
} }
_ThreadIndex __iam = omp_get_thread_num(); _ThreadIndex __iam = omp_get_thread_num();
_DifferenceType __start = __splitters[__iam], _DifferenceType __start = __splitters[__iam],
__stop = __splitters[__iam + 1]; __stop = __splitters[__iam + 1];
_DifferenceType __pos_in_pattern = 0; _DifferenceType __pos_in_pattern = 0;
bool __found_pattern = false; bool __found_pattern = false;
while (__start <= __stop && !__found_pattern) while (__start <= __stop && !__found_pattern)
{ {
// Get new value of result. // Get new value of result.
#pragma omp flush(__result) #pragma omp flush(__result)
// No chance for this thread to find first occurrence. // No chance for this thread to find first occurrence.
if (__result < __start) if (__result < __start)
break; break;
while (__pred(__begin1[__start + __pos_in_pattern], while (__pred(__begin1[__start + __pos_in_pattern],
__begin2[__pos_in_pattern])) __begin2[__pos_in_pattern]))
{ {
++__pos_in_pattern; ++__pos_in_pattern;
if (__pos_in_pattern == __pattern_length) if (__pos_in_pattern == __pattern_length)
{ {
// Found new candidate for result. // Found new candidate for result.
omp_set_lock(&__result_lock); omp_set_lock(&__result_lock);
__result = std::min(__result, __start); __result = std::min(__result, __start);
omp_unset_lock(&__result_lock); omp_unset_lock(&__result_lock);
__found_pattern = true; __found_pattern = true;
break; break;
} }
} }
// Make safe jump. // Make safe jump.
__start += (__pos_in_pattern - __advances[__pos_in_pattern]); __start += (__pos_in_pattern - __advances[__pos_in_pattern]);
__pos_in_pattern = __pos_in_pattern = (__advances[__pos_in_pattern] < 0
(__advances[__pos_in_pattern] < 0) ? ? 0 : __advances[__pos_in_pattern]);
0 : __advances[__pos_in_pattern]; }
}
} //parallel } //parallel
omp_destroy_lock(&__result_lock); omp_destroy_lock(&__result_lock);
delete[] __splitters;
// Return iterator on found element. delete[] __splitters;
return (__begin1 + __result);
} // Return iterator on found element.
return (__begin1 + __result);
}
} // end namespace } // end namespace
#endif /* _GLIBCXX_PARALLEL_SEARCH_H */ #endif /* _GLIBCXX_PARALLEL_SEARCH_H */
...@@ -41,490 +41,489 @@ ...@@ -41,490 +41,489 @@
namespace __gnu_parallel namespace __gnu_parallel
{ {
template<typename _IIter, typename _OutputIterator> template<typename _IIter, typename _OutputIterator>
_OutputIterator _OutputIterator
copy_tail(std::pair<_IIter, _IIter> __b, __copy_tail(std::pair<_IIter, _IIter> __b,
std::pair<_IIter, _IIter> __e, _OutputIterator __r) std::pair<_IIter, _IIter> __e, _OutputIterator __r)
{ {
if (__b.first != __e.first) if (__b.first != __e.first)
{
do
{
*__r++ = *__b.first++;
}
while (__b.first != __e.first);
}
else
{
while (__b.second != __e.second)
*__r++ = *__b.second++;
}
return __r;
}
template<typename _IIter,
typename _OutputIterator,
typename _Compare>
struct __symmetric_difference_func
{
typedef std::iterator_traits<_IIter> _TraitsType;
typedef typename _TraitsType::difference_type _DifferenceType;
typedef typename std::pair<_IIter, _IIter> _IteratorPair;
__symmetric_difference_func(_Compare __comp) : _M_comp(__comp) {}
_Compare _M_comp;
_OutputIterator
_M_invoke(_IIter __a, _IIter __b, _IIter __c, _IIter __d,
_OutputIterator __r) const
{ {
do while (__a != __b && __c != __d)
{ {
*__r++ = *__b.first++; if (_M_comp(*__a, *__c))
{
*__r = *__a;
++__a;
++__r;
}
else if (_M_comp(*__c, *__a))
{
*__r = *__c;
++__c;
++__r;
}
else
{
++__a;
++__c;
}
} }
while (__b.first != __e.first); return std::copy(__c, __d, std::copy(__a, __b, __r));
} }
else
_DifferenceType
__count(_IIter __a, _IIter __b, _IIter __c, _IIter d) const
{ {
while (__b.second != __e.second) _DifferenceType __counter = 0;
*__r++ = *__b.second++;
while (__a != __b && __c != d)
{
if (_M_comp(*__a, *__c))
{
++__a;
++__counter;
}
else if (_M_comp(*__c, *__a))
{
++__c;
++__counter;
}
else
{
++__a;
++__c;
}
}
return __counter + (__b - __a) + (d - __c);
} }
return __r;
}
template<typename _IIter, _OutputIterator
typename _OutputIterator, __first_empty(_IIter __c, _IIter d, _OutputIterator __out) const
typename _Compare> { return std::copy(__c, d, __out); }
struct symmetric_difference_func
{
typedef std::iterator_traits<_IIter> _TraitsType;
typedef typename _TraitsType::difference_type _DifferenceType;
typedef typename std::pair<_IIter, _IIter> _IteratorPair;
symmetric_difference_func(_Compare __comp) : _M_comp(__comp) {} _OutputIterator
__second_empty(_IIter __a, _IIter __b, _OutputIterator __out) const
{ return std::copy(__a, __b, __out); }
};
_Compare _M_comp;
_OutputIterator template<typename _IIter,
_M_invoke(_IIter __a, _IIter __b, typename _OutputIterator,
_IIter __c, _IIter d, typename _Compare>
_OutputIterator __r) const struct __difference_func
{ {
while (__a != __b && __c != d) typedef std::iterator_traits<_IIter> _TraitsType;
{ typedef typename _TraitsType::difference_type _DifferenceType;
if (_M_comp(*__a, *__c)) typedef typename std::pair<_IIter, _IIter> _IteratorPair;
{
*__r = *__a;
++__a;
++__r;
}
else if (_M_comp(*__c, *__a))
{
*__r = *__c;
++__c;
++__r;
}
else
{
++__a;
++__c;
}
}
return std::copy(__c, d, std::copy(__a, __b, __r));
}
_DifferenceType __difference_func(_Compare __comp) : _M_comp(__comp) {}
__count(_IIter __a, _IIter __b,
_IIter __c, _IIter d) const
{
_DifferenceType __counter = 0;
while (__a != __b && __c != d) _Compare _M_comp;
{
if (_M_comp(*__a, *__c))
{
++__a;
++__counter;
}
else if (_M_comp(*__c, *__a))
{
++__c;
++__counter;
}
else
{
++__a;
++__c;
}
}
return __counter + (__b - __a) + (d - __c); _OutputIterator
} _M_invoke(_IIter __a, _IIter __b, _IIter __c, _IIter d,
_OutputIterator __r) const
{
while (__a != __b && __c != d)
{
if (_M_comp(*__a, *__c))
{
*__r = *__a;
++__a;
++__r;
}
else if (_M_comp(*__c, *__a))
{ ++__c; }
else
{
++__a;
++__c;
}
}
return std::copy(__a, __b, __r);
}
_OutputIterator _DifferenceType
__first_empty(_IIter __c, _IIter d, _OutputIterator __out) const __count(_IIter __a, _IIter __b,
{ return std::copy(__c, d, __out); } _IIter __c, _IIter d) const
{
_DifferenceType __counter = 0;
_OutputIterator while (__a != __b && __c != d)
__second_empty(_IIter __a, _IIter __b, _OutputIterator __out) const {
{ return std::copy(__a, __b, __out); } if (_M_comp(*__a, *__c))
}; {
++__a;
++__counter;
}
else if (_M_comp(*__c, *__a))
{ ++__c; }
else
{ ++__a; ++__c; }
}
return __counter + (__b - __a);
}
template<typename _IIter, _OutputIterator
typename _OutputIterator, __first_empty(_IIter, _IIter, _OutputIterator __out) const
typename _Compare> { return __out; }
struct __difference_func
{
typedef std::iterator_traits<_IIter> _TraitsType;
typedef typename _TraitsType::difference_type _DifferenceType;
typedef typename std::pair<_IIter, _IIter> _IteratorPair;
__difference_func(_Compare __comp) : _M_comp(__comp) {} _OutputIterator
__second_empty(_IIter __a, _IIter __b, _OutputIterator __out) const
{ return std::copy(__a, __b, __out); }
};
_Compare _M_comp;
_OutputIterator template<typename _IIter,
_M_invoke(_IIter __a, _IIter __b, _IIter __c, _IIter d, typename _OutputIterator,
_OutputIterator __r) const typename _Compare>
struct __intersection_func
{ {
while (__a != __b && __c != d) typedef std::iterator_traits<_IIter> _TraitsType;
{ typedef typename _TraitsType::difference_type _DifferenceType;
if (_M_comp(*__a, *__c)) typedef typename std::pair<_IIter, _IIter> _IteratorPair;
{
*__r = *__a;
++__a;
++__r;
}
else if (_M_comp(*__c, *__a))
{ ++__c; }
else
{
++__a;
++__c;
}
}
return std::copy(__a, __b, __r);
}
_DifferenceType __intersection_func(_Compare __comp) : _M_comp(__comp) {}
__count(_IIter __a, _IIter __b,
_IIter __c, _IIter d) const
{
_DifferenceType __counter = 0;
while (__a != __b && __c != d) _Compare _M_comp;
{
if (_M_comp(*__a, *__c))
{
++__a;
++__counter;
}
else if (_M_comp(*__c, *__a))
{ ++__c; }
else
{ ++__a; ++__c; }
}
return __counter + (__b - __a); _OutputIterator
} _M_invoke(_IIter __a, _IIter __b, _IIter __c, _IIter __d,
_OutputIterator __r) const
{
while (__a != __b && __c != __d)
{
if (_M_comp(*__a, *__c))
{ ++__a; }
else if (_M_comp(*__c, *__a))
{ ++__c; }
else
{
*__r = *__a;
++__a;
++__c;
++__r;
}
}
inline _OutputIterator return __r;
__first_empty(_IIter __c, _IIter d, _OutputIterator __out) const }
{ return __out; }
inline _OutputIterator _DifferenceType
__second_empty(_IIter __a, _IIter __b, _OutputIterator __out) const __count(_IIter __a, _IIter __b, _IIter __c, _IIter __d) const
{ return std::copy(__a, __b, __out); } {
}; _DifferenceType __counter = 0;
while (__a != __b && __c != __d)
{
if (_M_comp(*__a, *__c))
{ ++__a; }
else if (_M_comp(*__c, *__a))
{ ++__c; }
else
{
++__a;
++__c;
++__counter;
}
}
template<typename _IIter, return __counter;
typename _OutputIterator, }
typename _Compare>
struct __intersection_func
{
typedef std::iterator_traits<_IIter> _TraitsType;
typedef typename _TraitsType::difference_type _DifferenceType;
typedef typename std::pair<_IIter, _IIter> _IteratorPair;
__intersection_func(_Compare __comp) : _M_comp(__comp) {} _OutputIterator
__first_empty(_IIter, _IIter, _OutputIterator __out) const
{ return __out; }
_Compare _M_comp; _OutputIterator
__second_empty(_IIter, _IIter, _OutputIterator __out) const
{ return __out; }
};
_OutputIterator template<class _IIter, class _OutputIterator, class _Compare>
_M_invoke(_IIter __a, _IIter __b, _IIter __c, _IIter d, struct __union_func
_OutputIterator __r) const
{ {
while (__a != __b && __c != d) typedef typename std::iterator_traits<_IIter>::difference_type
{ _DifferenceType;
if (_M_comp(*__a, *__c))
{ ++__a; }
else if (_M_comp(*__c, *__a))
{ ++__c; }
else
{
*__r = *__a;
++__a;
++__c;
++__r;
}
}
return __r; __union_func(_Compare __comp) : _M_comp(__comp) {}
}
_DifferenceType _Compare _M_comp;
__count(_IIter __a, _IIter __b,
_IIter __c, _IIter d) const
{
_DifferenceType __counter = 0;
while (__a != __b && __c != d)
{
if (_M_comp(*__a, *__c))
{ ++__a; }
else if (_M_comp(*__c, *__a))
{ ++__c; }
else
{
++__a;
++__c;
++__counter;
}
}
return __counter; _OutputIterator
} _M_invoke(_IIter __a, const _IIter __b, _IIter __c,
const _IIter __d, _OutputIterator __r) const
{
while (__a != __b && __c != __d)
{
if (_M_comp(*__a, *__c))
{
*__r = *__a;
++__a;
}
else if (_M_comp(*__c, *__a))
{
*__r = *__c;
++__c;
}
else
{
*__r = *__a;
++__a;
++__c;
}
++__r;
}
return std::copy(__c, __d, std::copy(__a, __b, __r));
}
inline _OutputIterator _DifferenceType
__first_empty(_IIter __c, _IIter d, _OutputIterator __out) const __count(_IIter __a, _IIter __b, _IIter __c, _IIter __d) const
{ return __out; } {
_DifferenceType __counter = 0;
inline _OutputIterator while (__a != __b && __c != __d)
__second_empty(_IIter __a, _IIter __b, _OutputIterator __out) const {
{ return __out; } if (_M_comp(*__a, *__c))
}; { ++__a; }
else if (_M_comp(*__c, *__a))
{ ++__c; }
else
{
++__a;
++__c;
}
++__counter;
}
template<class _IIter, class _OutputIterator, class _Compare> __counter += (__b - __a);
struct __union_func __counter += (__d - __c);
{ return __counter;
typedef typename std::iterator_traits<_IIter>::difference_type }
_DifferenceType;
__union_func(_Compare __comp) : _M_comp(__comp) {} _OutputIterator
__first_empty(_IIter __c, _IIter __d, _OutputIterator __out) const
{ return std::copy(__c, __d, __out); }
_Compare _M_comp; _OutputIterator
__second_empty(_IIter __a, _IIter __b, _OutputIterator __out) const
{ return std::copy(__a, __b, __out); }
};
template<typename _IIter,
typename _OutputIterator,
typename Operation>
_OutputIterator _OutputIterator
_M_invoke(_IIter __a, const _IIter __b, _IIter __c, __parallel_set_operation(_IIter __begin1, _IIter __end1,
const _IIter d, _OutputIterator __r) const _IIter __begin2, _IIter __end2,
_OutputIterator __result, Operation __op)
{ {
while (__a != __b && __c != d) _GLIBCXX_CALL((__end1 - __begin1) + (__end2 - __begin2))
{
if (_M_comp(*__a, *__c))
{
*__r = *__a;
++__a;
}
else if (_M_comp(*__c, *__a))
{
*__r = *__c;
++__c;
}
else
{
*__r = *__a;
++__a;
++__c;
}
++__r;
}
return std::copy(__c, d, std::copy(__a, __b, __r));
}
_DifferenceType typedef std::iterator_traits<_IIter> _TraitsType;
__count(_IIter __a, _IIter __b, typedef typename _TraitsType::difference_type _DifferenceType;
_IIter __c, _IIter d) const typedef typename std::pair<_IIter, _IIter> _IteratorPair;
{
_DifferenceType __counter = 0;
while (__a != __b && __c != d)
{
if (_M_comp(*__a, *__c))
{ ++__a; }
else if (_M_comp(*__c, *__a))
{ ++__c; }
else
{
++__a;
++__c;
}
++__counter;
}
__counter += (__b - __a); if (__begin1 == __end1)
__counter += (d - __c); return __op.__first_empty(__begin2, __end2, __result);
return __counter;
}
inline _OutputIterator if (__begin2 == __end2)
__first_empty(_IIter __c, _IIter d, _OutputIterator __out) const return __op.__second_empty(__begin1, __end1, __result);
{ return std::copy(__c, d, __out); }
inline _OutputIterator const _DifferenceType __size = (__end1 - __begin1) + (__end2 - __begin2);
__second_empty(_IIter __a, _IIter __b, _OutputIterator __out) const
{ return std::copy(__a, __b, __out); } const _IteratorPair __sequence[2] = { std::make_pair(__begin1, __end1),
}; std::make_pair(__begin2, __end2) };
_OutputIterator __return_value = __result;
template<typename _IIter, _DifferenceType *__borders;
typename _OutputIterator, _IteratorPair *__block_begins;
typename Operation> _DifferenceType* __lengths;
_OutputIterator
__parallel_set_operation(_IIter __begin1, _IIter __end1, _ThreadIndex __num_threads =
_IIter __begin2, _IIter __end2, std::min<_DifferenceType>(__get_max_threads(),
_OutputIterator __result, Operation __op) std::min(__end1 - __begin1, __end2 - __begin2));
{
_GLIBCXX_CALL((__end1 - __begin1) + (__end2 - __begin2)) # pragma omp parallel num_threads(__num_threads)
typedef std::iterator_traits<_IIter> _TraitsType;
typedef typename _TraitsType::difference_type _DifferenceType;
typedef typename std::pair<_IIter, _IIter> _IteratorPair;
if (__begin1 == __end1)
return __op.__first_empty(__begin2, __end2, __result);
if (__begin2 == __end2)
return __op.__second_empty(__begin1, __end1, __result);
const _DifferenceType size = (__end1 - __begin1) + (__end2 - __begin2);
const _IteratorPair __sequence[ 2 ] =
{ std::make_pair(__begin1, __end1), std::make_pair(__begin2, __end2) };
_OutputIterator return_value = __result;
_DifferenceType *__borders;
_IteratorPair *__block_begins;
_DifferenceType* __lengths;
_ThreadIndex __num_threads =
std::min<_DifferenceType>(__get_max_threads(),
std::min(__end1 - __begin1, __end2 - __begin2));
# pragma omp parallel num_threads(__num_threads)
{ {
# pragma omp single # pragma omp single
{ {
__num_threads = omp_get_num_threads(); __num_threads = omp_get_num_threads();
__borders = new _DifferenceType[__num_threads + 2]; __borders = new _DifferenceType[__num_threads + 2];
equally_split(size, __num_threads + 1, __borders); equally_split(__size, __num_threads + 1, __borders);
__block_begins = new _IteratorPair[__num_threads + 1]; __block_begins = new _IteratorPair[__num_threads + 1];
// Very __start. // Very __start.
__block_begins[0] = std::make_pair(__begin1, __begin2); __block_begins[0] = std::make_pair(__begin1, __begin2);
__lengths = new _DifferenceType[__num_threads]; __lengths = new _DifferenceType[__num_threads];
} //single } //single
_ThreadIndex __iam = omp_get_thread_num(); _ThreadIndex __iam = omp_get_thread_num();
// _Result from multiseq_partition. // _Result from multiseq_partition.
_IIter __offset[2]; _IIter __offset[2];
const _DifferenceType __rank = __borders[__iam + 1]; const _DifferenceType __rank = __borders[__iam + 1];
multiseq_partition(__sequence, __sequence + 2, multiseq_partition(__sequence, __sequence + 2,
__rank, __offset, __op._M_comp); __rank, __offset, __op._M_comp);
// allowed to read? // allowed to read?
// together // together
// *(__offset[ 0 ] - 1) == *__offset[ 1 ] // *(__offset[ 0 ] - 1) == *__offset[ 1 ]
if (__offset[ 0 ] != __begin1 && __offset[ 1 ] != __end2 if (__offset[ 0 ] != __begin1 && __offset[1] != __end2
&& !__op._M_comp(*(__offset[ 0 ] - 1), *__offset[ 1 ]) && !__op._M_comp(*(__offset[0] - 1), *__offset[1])
&& !__op._M_comp(*__offset[ 1 ], *(__offset[ 0 ] - 1))) && !__op._M_comp(*__offset[1], *(__offset[0] - 1)))
{ {
// Avoid split between globally equal elements: move one to // Avoid split between globally equal elements: move one to
// front in first sequence. // front in first sequence.
--__offset[ 0 ]; --__offset[0];
} }
_IteratorPair block_end = __block_begins[ __iam + 1 ] = _IteratorPair __block_end = __block_begins[__iam + 1] =
_IteratorPair(__offset[ 0 ], __offset[ 1 ]); _IteratorPair(__offset[0], __offset[1]);
// Make sure all threads have their block_begin result written out.
# pragma omp barrier
// Make sure all threads have their block_begin result written out. _IteratorPair __block_begin = __block_begins[__iam];
// Begin working for the first block, while the others except
// the last start to count.
if (__iam == 0)
{
// The first thread can copy already.
__lengths[ __iam ] =
__op._M_invoke(__block_begin.first, __block_end.first,
__block_begin.second, __block_end.second,
__result) - __result;
}
else
{
__lengths[ __iam ] =
__op.__count(__block_begin.first, __block_end.first,
__block_begin.second, __block_end.second);
}
// Make sure everyone wrote their lengths.
# pragma omp barrier # pragma omp barrier
_IteratorPair __block_begin = __block_begins[ __iam ]; _OutputIterator __r = __result;
// Begin working for the first block, while the others except if (__iam == 0)
// the last start to count. {
if (__iam == 0) // Do the last block.
{ for (int __i = 0; __i < __num_threads; ++__i)
// The first thread can copy already. __r += __lengths[__i];
__lengths[ __iam ] =
__op._M_invoke(__block_begin.first, block_end.first,
__block_begin.second, block_end.second, __result)
- __result;
}
else
{
__lengths[ __iam ] =
__op.__count(__block_begin.first, block_end.first,
__block_begin.second, block_end.second);
}
// Make sure everyone wrote their lengths. __block_begin = __block_begins[__num_threads];
# pragma omp barrier
_OutputIterator __r = __result; // Return the result iterator of the last block.
__return_value =
__op._M_invoke(__block_begin.first, __end1,
__block_begin.second, __end2, __r);
if (__iam == 0) }
{ else
// Do the last block. {
for (int __i = 0; __i < __num_threads; ++__i) for (int __i = 0; __i < __iam; ++__i)
__r += __lengths[__i]; __r += __lengths[ __i ];
__block_begin = __block_begins[__num_threads]; // Reset begins for copy pass.
__op._M_invoke(__block_begin.first, __block_end.first,
__block_begin.second, __block_end.second, __r);
}
}
return __return_value;
}
// Return the result iterator of the last block. template<typename _IIter,
return_value = __op._M_invoke( typename _OutputIterator,
__block_begin.first, __end1, __block_begin.second, __end2, __r); typename _Compare>
inline _OutputIterator
__parallel_set_union(_IIter __begin1, _IIter __end1,
_IIter __begin2, _IIter __end2,
_OutputIterator __result, _Compare __comp)
{
return __parallel_set_operation(__begin1, __end1, __begin2, __end2,
__result,
__union_func< _IIter, _OutputIterator,
_Compare>(__comp));
}
} template<typename _IIter,
else typename _OutputIterator,
{ typename _Compare>
for (int __i = 0; __i < __iam; ++__i) inline _OutputIterator
__r += __lengths[ __i ]; __parallel_set_intersection(_IIter __begin1, _IIter __end1,
_IIter __begin2, _IIter __end2,
_OutputIterator __result, _Compare __comp)
{
return __parallel_set_operation(__begin1, __end1, __begin2, __end2,
__result,
__intersection_func<_IIter,
_OutputIterator, _Compare>(__comp));
}
// Reset begins for copy pass. template<typename _IIter,
__op._M_invoke(__block_begin.first, block_end.first, typename _OutputIterator,
__block_begin.second, block_end.second, __r); typename _Compare>
} inline _OutputIterator
} __parallel_set_difference(_IIter __begin1, _IIter __end1,
return return_value;
}
template<typename _IIter,
typename _OutputIterator,
typename _Compare>
inline _OutputIterator
__parallel_set_union(_IIter __begin1, _IIter __end1,
_IIter __begin2, _IIter __end2,
_OutputIterator __result, _Compare _M_comp)
{
return __parallel_set_operation(__begin1, __end1, __begin2, __end2,
__result, __union_func< _IIter, _OutputIterator, _Compare>(_M_comp));
}
template<typename _IIter,
typename _OutputIterator,
typename _Compare>
inline _OutputIterator
__parallel_set_intersection(_IIter __begin1, _IIter __end1,
_IIter __begin2, _IIter __end2, _IIter __begin2, _IIter __end2,
_OutputIterator __result, _Compare _M_comp) _OutputIterator __result, _Compare __comp)
{ {
return __parallel_set_operation( return __parallel_set_operation(__begin1, __end1, __begin2, __end2,
__begin1, __end1, __begin2, __end2, __result, __result,
__intersection_func<_IIter, _OutputIterator, _Compare>(_M_comp)); __difference_func<_IIter,
} _OutputIterator, _Compare>(__comp));
}
template<typename _IIter,
typename _OutputIterator,
typename _Compare>
inline _OutputIterator
__parallel_set_difference(_IIter __begin1, _IIter __end1,
_IIter __begin2, _IIter __end2,
_OutputIterator __result, _Compare _M_comp)
{
return __parallel_set_operation(
__begin1, __end1, __begin2, __end2, __result,
__difference_func<_IIter, _OutputIterator, _Compare>(_M_comp));
}
template<typename _IIter,
typename _OutputIterator,
typename _Compare>
inline _OutputIterator
__parallel_set_symmetric_difference(_IIter __begin1, _IIter __end1,
_IIter __begin2, _IIter __end2,
_OutputIterator __result,
_Compare _M_comp)
{
return __parallel_set_operation(
__begin1, __end1, __begin2, __end2, __result,
symmetric_difference_func<_IIter, _OutputIterator, _Compare>
(_M_comp));
}
template<typename _IIter,
typename _OutputIterator,
typename _Compare>
inline _OutputIterator
__parallel_set_symmetric_difference(_IIter __begin1, _IIter __end1,
_IIter __begin2, _IIter __end2,
_OutputIterator __result,
_Compare __comp)
{
return __parallel_set_operation(__begin1, __end1, __begin2, __end2,
__result,
__symmetric_difference_func<_IIter,
_OutputIterator, _Compare>(__comp));
}
} }
#endif /* _GLIBCXX_PARALLEL_SET_OPERATIONS_H */ #endif /* _GLIBCXX_PARALLEL_SET_OPERATIONS_H */
...@@ -54,12 +54,12 @@ ...@@ -54,12 +54,12 @@
namespace __gnu_parallel namespace __gnu_parallel
{ {
//prototype //prototype
template<bool __stable, typename _RAIter, template<bool __stable, typename _RAIter,
typename _Compare, typename _Parallelism> typename _Compare, typename _Parallelism>
void void
__parallel_sort(_RAIter __begin, _RAIter __end, __parallel_sort(_RAIter __begin, _RAIter __end,
_Compare __comp, _Parallelism __parallelism); _Compare __comp, _Parallelism __parallelism);
/** /**
* @brief Choose multiway mergesort, splitting variant at run-time, * @brief Choose multiway mergesort, splitting variant at run-time,
...@@ -70,19 +70,19 @@ namespace __gnu_parallel ...@@ -70,19 +70,19 @@ namespace __gnu_parallel
* @callgraph * @callgraph
*/ */
template<bool __stable, typename _RAIter, typename _Compare> template<bool __stable, typename _RAIter, typename _Compare>
inline void inline void
__parallel_sort(_RAIter __begin, _RAIter __end, __parallel_sort(_RAIter __begin, _RAIter __end,
_Compare __comp, multiway_mergesort_tag __parallelism) _Compare __comp, multiway_mergesort_tag __parallelism)
{ {
_GLIBCXX_CALL(__end - __begin) _GLIBCXX_CALL(__end - __begin)
if(_Settings::get().sort_splitting == EXACT) if(_Settings::get().sort_splitting == EXACT)
parallel_sort_mwms<__stable, true> parallel_sort_mwms<__stable, true>
(__begin, __end, __comp, __parallelism.__get_num_threads()); (__begin, __end, __comp, __parallelism.__get_num_threads());
else else
parallel_sort_mwms<__stable, false> parallel_sort_mwms<__stable, false>
(__begin, __end, __comp, __parallelism.__get_num_threads()); (__begin, __end, __comp, __parallelism.__get_num_threads());
} }
/** /**
* @brief Choose multiway mergesort with exact splitting, * @brief Choose multiway mergesort with exact splitting,
...@@ -93,15 +93,16 @@ namespace __gnu_parallel ...@@ -93,15 +93,16 @@ namespace __gnu_parallel
* @callgraph * @callgraph
*/ */
template<bool __stable, typename _RAIter, typename _Compare> template<bool __stable, typename _RAIter, typename _Compare>
inline void inline void
__parallel_sort(_RAIter __begin, _RAIter __end, __parallel_sort(_RAIter __begin, _RAIter __end,
_Compare __comp, multiway_mergesort_exact_tag __parallelism) _Compare __comp,
{ multiway_mergesort_exact_tag __parallelism)
_GLIBCXX_CALL(__end - __begin) {
_GLIBCXX_CALL(__end - __begin)
parallel_sort_mwms<__stable, true> parallel_sort_mwms<__stable, true>
(__begin, __end, __comp, __parallelism.__get_num_threads()); (__begin, __end, __comp, __parallelism.__get_num_threads());
} }
/** /**
* @brief Choose multiway mergesort with splitting by sampling, * @brief Choose multiway mergesort with splitting by sampling,
...@@ -112,15 +113,16 @@ namespace __gnu_parallel ...@@ -112,15 +113,16 @@ namespace __gnu_parallel
* @callgraph * @callgraph
*/ */
template<bool __stable, typename _RAIter, typename _Compare> template<bool __stable, typename _RAIter, typename _Compare>
inline void inline void
__parallel_sort(_RAIter __begin, _RAIter __end, __parallel_sort(_RAIter __begin, _RAIter __end,
_Compare __comp, multiway_mergesort_sampling_tag __parallelism) _Compare __comp,
{ multiway_mergesort_sampling_tag __parallelism)
_GLIBCXX_CALL(__end - __begin) {
_GLIBCXX_CALL(__end - __begin)
parallel_sort_mwms<__stable, false> parallel_sort_mwms<__stable, false>
(__begin, __end, __comp, __parallelism.__get_num_threads()); (__begin, __end, __comp, __parallelism.__get_num_threads());
} }
/** /**
* @brief Choose quicksort for parallel sorting. * @brief Choose quicksort for parallel sorting.
...@@ -130,17 +132,17 @@ namespace __gnu_parallel ...@@ -130,17 +132,17 @@ namespace __gnu_parallel
* @callgraph * @callgraph
*/ */
template<bool __stable, typename _RAIter, typename _Compare> template<bool __stable, typename _RAIter, typename _Compare>
inline void inline void
__parallel_sort(_RAIter __begin, _RAIter __end, __parallel_sort(_RAIter __begin, _RAIter __end,
_Compare __comp, quicksort_tag __parallelism) _Compare __comp, quicksort_tag __parallelism)
{ {
_GLIBCXX_CALL(__end - __begin) _GLIBCXX_CALL(__end - __begin)
_GLIBCXX_PARALLEL_ASSERT(__stable == false); _GLIBCXX_PARALLEL_ASSERT(__stable == false);
__parallel_sort_qs(__begin, __end, __comp, __parallel_sort_qs(__begin, __end, __comp,
__parallelism.__get_num_threads()); __parallelism.__get_num_threads());
} }
/** /**
* @brief Choose balanced quicksort for parallel sorting. * @brief Choose balanced quicksort for parallel sorting.
...@@ -150,19 +152,18 @@ namespace __gnu_parallel ...@@ -150,19 +152,18 @@ namespace __gnu_parallel
* @param __stable Sort __stable. * @param __stable Sort __stable.
* @callgraph * @callgraph
*/ */
template<bool __stable, typename _RAIter, typename _Compare> template<bool __stable, typename _RAIter, typename _Compare>
inline void inline void
__parallel_sort(_RAIter __begin, _RAIter __end, __parallel_sort(_RAIter __begin, _RAIter __end,
_Compare __comp, balanced_quicksort_tag __parallelism) _Compare __comp, balanced_quicksort_tag __parallelism)
{ {
_GLIBCXX_CALL(__end - __begin) _GLIBCXX_CALL(__end - __begin)
_GLIBCXX_PARALLEL_ASSERT(__stable == false);
__parallel_sort_qsb(__begin, __end, __comp, _GLIBCXX_PARALLEL_ASSERT(__stable == false);
__parallelism.__get_num_threads());
}
__parallel_sort_qsb(__begin, __end, __comp,
__parallelism.__get_num_threads());
}
/** /**
* @brief Choose multiway mergesort with exact splitting, * @brief Choose multiway mergesort with exact splitting,
...@@ -173,17 +174,16 @@ namespace __gnu_parallel ...@@ -173,17 +174,16 @@ namespace __gnu_parallel
* @callgraph * @callgraph
*/ */
template<bool __stable, typename _RAIter, typename _Compare> template<bool __stable, typename _RAIter, typename _Compare>
inline void inline void
__parallel_sort(_RAIter __begin, _RAIter __end, __parallel_sort(_RAIter __begin, _RAIter __end,
_Compare __comp, default_parallel_tag __parallelism) _Compare __comp, default_parallel_tag __parallelism)
{ {
_GLIBCXX_CALL(__end - __begin) _GLIBCXX_CALL(__end - __begin)
__parallel_sort<__stable>
(__begin, __end, __comp,
multiway_mergesort_exact_tag(__parallelism.__get_num_threads()));
}
__parallel_sort<__stable>
(__begin, __end, __comp,
multiway_mergesort_exact_tag(__parallelism.__get_num_threads()));
}
/** /**
* @brief Choose a parallel sorting algorithm. * @brief Choose a parallel sorting algorithm.
...@@ -196,7 +196,7 @@ namespace __gnu_parallel ...@@ -196,7 +196,7 @@ namespace __gnu_parallel
template<bool __stable, typename _RAIter, typename _Compare> template<bool __stable, typename _RAIter, typename _Compare>
inline void inline void
__parallel_sort(_RAIter __begin, _RAIter __end, __parallel_sort(_RAIter __begin, _RAIter __end,
_Compare __comp, parallel_tag __parallelism) _Compare __comp, parallel_tag __parallelism)
{ {
_GLIBCXX_CALL(__end - __begin) _GLIBCXX_CALL(__end - __begin)
typedef std::iterator_traits<_RAIter> _TraitsType; typedef std::iterator_traits<_RAIter> _TraitsType;
......
...@@ -51,20 +51,16 @@ namespace __gnu_parallel ...@@ -51,20 +51,16 @@ namespace __gnu_parallel
public: public:
/** @brief Default constructor. Use default number of threads. */ /** @brief Default constructor. Use default number of threads. */
parallel_tag() parallel_tag()
{ { _M_num_threads = 0; }
this->_M_num_threads = 0;
}
/** @brief Default constructor. Recommend number of threads to use. /** @brief Default constructor. Recommend number of threads to use.
* @param __num_threads Desired number of threads. */ * @param __num_threads Desired number of threads. */
parallel_tag(_ThreadIndex __num_threads) parallel_tag(_ThreadIndex __num_threads)
{ { _M_num_threads = __num_threads; }
this->_M_num_threads = __num_threads;
}
/** @brief Find out desired number of threads. /** @brief Find out desired number of threads.
* @return Desired number of threads. */ * @return Desired number of threads. */
inline _ThreadIndex __get_num_threads() _ThreadIndex __get_num_threads()
{ {
if(_M_num_threads == 0) if(_M_num_threads == 0)
return omp_get_max_threads(); return omp_get_max_threads();
...@@ -74,19 +70,17 @@ namespace __gnu_parallel ...@@ -74,19 +70,17 @@ namespace __gnu_parallel
/** @brief Set the desired number of threads. /** @brief Set the desired number of threads.
* @param __num_threads Desired number of threads. */ * @param __num_threads Desired number of threads. */
inline void set_num_threads(_ThreadIndex __num_threads) void set_num_threads(_ThreadIndex __num_threads)
{ { _M_num_threads = __num_threads; }
this->_M_num_threads = __num_threads;
}
}; };
/** @brief Recommends parallel execution using the /** @brief Recommends parallel execution using the
default parallel algorithm. */ default parallel algorithm. */
struct default_parallel_tag : public parallel_tag struct default_parallel_tag : public parallel_tag
{ {
default_parallel_tag() { } default_parallel_tag() { }
default_parallel_tag(_ThreadIndex __num_threads) default_parallel_tag(_ThreadIndex __num_threads)
: parallel_tag(__num_threads) { } : parallel_tag(__num_threads) { }
}; };
/** @brief Recommends parallel execution using dynamic /** @brief Recommends parallel execution using dynamic
...@@ -114,18 +108,18 @@ namespace __gnu_parallel ...@@ -114,18 +108,18 @@ namespace __gnu_parallel
* with exact splitting, at compile time. */ * with exact splitting, at compile time. */
struct exact_tag : public parallel_tag struct exact_tag : public parallel_tag
{ {
exact_tag() { } exact_tag() { }
exact_tag(_ThreadIndex __num_threads) exact_tag(_ThreadIndex __num_threads)
: parallel_tag(__num_threads) { } : parallel_tag(__num_threads) { }
}; };
/** @brief Forces parallel merging /** @brief Forces parallel merging
* with exact splitting, at compile time. */ * with exact splitting, at compile time. */
struct sampling_tag : public parallel_tag struct sampling_tag : public parallel_tag
{ {
sampling_tag() { } sampling_tag() { }
sampling_tag(_ThreadIndex __num_threads) sampling_tag(_ThreadIndex __num_threads)
: parallel_tag(__num_threads) { } : parallel_tag(__num_threads) { }
}; };
...@@ -133,45 +127,45 @@ namespace __gnu_parallel ...@@ -133,45 +127,45 @@ namespace __gnu_parallel
* at compile time. */ * at compile time. */
struct multiway_mergesort_tag : public parallel_tag struct multiway_mergesort_tag : public parallel_tag
{ {
multiway_mergesort_tag() { } multiway_mergesort_tag() { }
multiway_mergesort_tag(_ThreadIndex __num_threads) multiway_mergesort_tag(_ThreadIndex __num_threads)
: parallel_tag(__num_threads) { } : parallel_tag(__num_threads) { }
}; };
/** @brief Forces parallel sorting using multiway mergesort /** @brief Forces parallel sorting using multiway mergesort
* with exact splitting at compile time. */ * with exact splitting at compile time. */
struct multiway_mergesort_exact_tag : public parallel_tag struct multiway_mergesort_exact_tag : public parallel_tag
{ {
multiway_mergesort_exact_tag() { } multiway_mergesort_exact_tag() { }
multiway_mergesort_exact_tag(_ThreadIndex __num_threads) multiway_mergesort_exact_tag(_ThreadIndex __num_threads)
: parallel_tag(__num_threads) { } : parallel_tag(__num_threads) { }
}; };
/** @brief Forces parallel sorting using multiway mergesort /** @brief Forces parallel sorting using multiway mergesort
* with splitting by sampling at compile time. */ * with splitting by sampling at compile time. */
struct multiway_mergesort_sampling_tag : public parallel_tag struct multiway_mergesort_sampling_tag : public parallel_tag
{ {
multiway_mergesort_sampling_tag() { } multiway_mergesort_sampling_tag() { }
multiway_mergesort_sampling_tag(_ThreadIndex __num_threads) multiway_mergesort_sampling_tag(_ThreadIndex __num_threads)
: parallel_tag(__num_threads) { } : parallel_tag(__num_threads) { }
}; };
/** @brief Forces parallel sorting using unbalanced quicksort /** @brief Forces parallel sorting using unbalanced quicksort
* at compile time. */ * at compile time. */
struct quicksort_tag : public parallel_tag struct quicksort_tag : public parallel_tag
{ {
quicksort_tag() { } quicksort_tag() { }
quicksort_tag(_ThreadIndex __num_threads) quicksort_tag(_ThreadIndex __num_threads)
: parallel_tag(__num_threads) { } : parallel_tag(__num_threads) { }
}; };
/** @brief Forces parallel sorting using balanced quicksort /** @brief Forces parallel sorting using balanced quicksort
* at compile time. */ * at compile time. */
struct balanced_quicksort_tag : public parallel_tag struct balanced_quicksort_tag : public parallel_tag
{ {
balanced_quicksort_tag() { } balanced_quicksort_tag() { }
balanced_quicksort_tag(_ThreadIndex __num_threads) balanced_quicksort_tag(_ThreadIndex __num_threads)
: parallel_tag(__num_threads) { } : parallel_tag(__num_threads) { }
}; };
......
...@@ -37,155 +37,160 @@ ...@@ -37,155 +37,160 @@
namespace __gnu_parallel namespace __gnu_parallel
{ {
/** @brief Parallel std::unique_copy(), w/__o explicit equality predicate.
/** @brief Parallel std::unique_copy(), w/__o explicit equality predicate. * @param __first Begin iterator of input sequence.
* @param __first Begin iterator of input sequence. * @param __last End iterator of input sequence.
* @param __last End iterator of input sequence. * @param __result Begin iterator of result __sequence.
* @param __result Begin iterator of result __sequence. * @param __binary_pred Equality predicate.
* @param __binary_pred Equality predicate. * @return End iterator of result __sequence. */
* @return End iterator of result __sequence. */ template<typename _IIter,
template<typename _IIter, class _OutputIterator,
class _OutputIterator, class _BinaryPredicate>
class _BinaryPredicate> _OutputIterator
_OutputIterator __parallel_unique_copy(_IIter __first, _IIter __last,
__parallel_unique_copy(_IIter __first, _IIter __last, _OutputIterator __result,
_OutputIterator __result, _BinaryPredicate __binary_pred) _BinaryPredicate __binary_pred)
{ {
_GLIBCXX_CALL(__last - __first) _GLIBCXX_CALL(__last - __first)
typedef std::iterator_traits<_IIter> _TraitsType; typedef std::iterator_traits<_IIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType; typedef typename _TraitsType::value_type _ValueType;
typedef typename _TraitsType::difference_type _DifferenceType; typedef typename _TraitsType::difference_type _DifferenceType;
_DifferenceType size = __last - __first; _DifferenceType __size = __last - __first;
if (size == 0) if (__size == 0)
return __result; return __result;
// Let the first thread process two parts. // Let the first thread process two parts.
_DifferenceType *__counter; _DifferenceType *__counter;
_DifferenceType *__borders; _DifferenceType *__borders;
_ThreadIndex __num_threads = __get_max_threads(); _ThreadIndex __num_threads = __get_max_threads();
// First part contains at least one element. // First part contains at least one element.
# pragma omp parallel num_threads(__num_threads) # pragma omp parallel num_threads(__num_threads)
{ {
# pragma omp single # pragma omp single
{
__num_threads = omp_get_num_threads();
__borders = new _DifferenceType[__num_threads + 2];
equally_split(__size, __num_threads + 1, __borders);
__counter = new _DifferenceType[__num_threads + 1];
}
_ThreadIndex __iam = omp_get_thread_num();
_DifferenceType __begin, __end;
// Check for length without duplicates
// Needed for position in output
_DifferenceType __i = 0;
_OutputIterator __out = __result;
if (__iam == 0)
{ {
__num_threads = omp_get_num_threads(); __begin = __borders[0] + 1; // == 1
__borders = new _DifferenceType[__num_threads + 2]; __end = __borders[__iam + 1];
equally_split(size, __num_threads + 1, __borders);
__counter = new _DifferenceType[__num_threads + 1]; ++__i;
*__out++ = *__first;
for (_IIter __iter = __first + __begin; __iter < __first + __end;
++__iter)
{
if (!__binary_pred(*__iter, *(__iter - 1)))
{
++__i;
*__out++ = *__iter;
}
}
} }
else
{
__begin = __borders[__iam]; //one part
__end = __borders[__iam + 1];
_ThreadIndex __iam = omp_get_thread_num(); for (_IIter __iter = __first + __begin; __iter < __first + __end;
++__iter)
{
if (!__binary_pred(*__iter, *(__iter - 1)))
++__i;
}
}
__counter[__iam] = __i;
_DifferenceType __begin, __end; // Last part still untouched.
_DifferenceType __begin_output;
// Check for length without duplicates # pragma omp barrier
// Needed for position in output
_DifferenceType __i = 0;
_OutputIterator __out = __result;
if (__iam == 0) // Store result in output on calculated positions.
{ __begin_output = 0;
__begin = __borders[0] + 1; // == 1
__end = __borders[__iam + 1];
++__i; if (__iam == 0)
*__out++ = *__first; {
for (int __t = 0; __t < __num_threads; ++__t)
__begin_output += __counter[__t];
for (_IIter iter = __first + __begin; iter < __first + __end; ++iter) __i = 0;
{
if (!__binary_pred(*iter, *(iter-1))) _OutputIterator __iter_out = __result + __begin_output;
{
++__i; __begin = __borders[__num_threads];
*__out++ = *iter; __end = __size;
}
} for (_IIter __iter = __first + __begin; __iter < __first + __end;
} ++__iter)
else {
{ if (__iter == __first
__begin = __borders[__iam]; //one part || !__binary_pred(*__iter, *(__iter - 1)))
__end = __borders[__iam + 1]; {
++__i;
for (_IIter iter = __first + __begin; iter < __first + __end; ++iter) *__iter_out++ = *__iter;
{ }
if (!__binary_pred(*iter, *(iter - 1))) }
++__i;
} __counter[__num_threads] = __i;
} }
__counter[__iam] = __i; else
{
// Last part still untouched. for (int __t = 0; __t < __iam; __t++)
_DifferenceType __begin_output; __begin_output += __counter[__t];
# pragma omp barrier _OutputIterator __iter_out = __result + __begin_output;
for (_IIter __iter = __first + __begin; __iter < __first + __end;
// Store result in output on calculated positions. ++__iter)
__begin_output = 0; {
if (!__binary_pred(*__iter, *(__iter - 1)))
if (__iam == 0) *__iter_out++ = *__iter;
{ }
for (int __t = 0; __t < __num_threads; ++__t) }
__begin_output += __counter[__t]; }
__i = 0; _DifferenceType __end_output = 0;
for (int __t = 0; __t < __num_threads + 1; __t++)
_OutputIterator __iter_out = __result + __begin_output; __end_output += __counter[__t];
__begin = __borders[__num_threads]; delete[] __borders;
__end = size;
return __result + __end_output;
for (_IIter iter = __first + __begin; iter < __first + __end; ++iter)
{
if (iter == __first || !__binary_pred(*iter, *(iter - 1)))
{
++__i;
*__iter_out++ = *iter;
}
}
__counter[__num_threads] = __i;
}
else
{
for (int __t = 0; __t < __iam; __t++)
__begin_output += __counter[__t];
_OutputIterator __iter_out = __result + __begin_output;
for (_IIter iter = __first + __begin; iter < __first + __end; ++iter)
{
if (!__binary_pred(*iter, *(iter-1)))
*__iter_out++ = *iter;
}
}
} }
_DifferenceType __end_output = 0; /** @brief Parallel std::unique_copy(), without explicit equality predicate
for (int __t = 0; __t < __num_threads + 1; __t++) * @param __first Begin iterator of input sequence.
__end_output += __counter[__t]; * @param __last End iterator of input sequence.
* @param __result Begin iterator of result __sequence.
delete[] __borders; * @return End iterator of result __sequence. */
template<typename _IIter, class _OutputIterator>
return __result + __end_output; inline _OutputIterator
} __parallel_unique_copy(_IIter __first, _IIter __last,
_OutputIterator __result)
/** @brief Parallel std::unique_copy(), without explicit equality predicate {
* @param __first Begin iterator of input sequence. typedef typename std::iterator_traits<_IIter>::value_type
* @param __last End iterator of input sequence. _ValueType;
* @param __result Begin iterator of result __sequence. return __parallel_unique_copy(__first, __last, __result,
* @return End iterator of result __sequence. */ std::equal_to<_ValueType>());
template<typename _IIter, class _OutputIterator> }
inline _OutputIterator
__parallel_unique_copy(_IIter __first, _IIter __last,
_OutputIterator __result)
{
typedef typename std::iterator_traits<_IIter>::value_type
_ValueType;
return __parallel_unique_copy(__first, __last, __result,
std::equal_to<_ValueType>());
}
}//namespace __gnu_parallel }//namespace __gnu_parallel
......
...@@ -49,261 +49,264 @@ namespace __gnu_parallel ...@@ -49,261 +49,264 @@ namespace __gnu_parallel
#define _GLIBCXX_JOB_VOLATILE volatile #define _GLIBCXX_JOB_VOLATILE volatile
/** @brief One __job for a certain thread. */ /** @brief One __job for a certain thread. */
template<typename _DifferenceTp> template<typename _DifferenceTp>
struct _Job struct _Job
{ {
typedef _DifferenceTp _DifferenceType; typedef _DifferenceTp _DifferenceType;
/** @brief First element. /** @brief First element.
* *
* Changed by owning and stealing thread. By stealing thread, * Changed by owning and stealing thread. By stealing thread,
* always incremented. */ * always incremented. */
_GLIBCXX_JOB_VOLATILE _DifferenceType _M_first; _GLIBCXX_JOB_VOLATILE _DifferenceType _M_first;
/** @brief Last element. /** @brief Last element.
* *
* Changed by owning thread only. */ * Changed by owning thread only. */
_GLIBCXX_JOB_VOLATILE _DifferenceType _M_last; _GLIBCXX_JOB_VOLATILE _DifferenceType _M_last;
/** @brief Number of elements, i.e. @__c _M_last-_M_first+1. /** @brief Number of elements, i.e. @__c _M_last-_M_first+1.
* *
* Changed by owning thread only. */ * Changed by owning thread only. */
_GLIBCXX_JOB_VOLATILE _DifferenceType _M_load; _GLIBCXX_JOB_VOLATILE _DifferenceType _M_load;
}; };
/** @brief Work stealing algorithm for random access iterators. /** @brief Work stealing algorithm for random access iterators.
* *
* Uses O(1) additional memory. Synchronization at job lists is * Uses O(1) additional memory. Synchronization at job lists is
* done with atomic operations. * done with atomic operations.
* @param __begin Begin iterator of element sequence. * @param __begin Begin iterator of element sequence.
* @param __end End iterator of element sequence. * @param __end End iterator of element sequence.
* @param __op User-supplied functor (comparator, predicate, adding * @param __op User-supplied functor (comparator, predicate, adding
* functor, ...). * functor, ...).
* @param __f Functor to "process" an element with __op (depends on * @param __f Functor to "process" an element with __op (depends on
* desired functionality, e. g. for std::for_each(), ...). * desired functionality, e. g. for std::for_each(), ...).
* @param __r Functor to "add" a single __result to the already * @param __r Functor to "add" a single __result to the already
* processed elements (depends on functionality). * processed elements (depends on functionality).
* @param __base Base value for reduction. * @param __base Base value for reduction.
* @param __output Pointer to position where final result is written to * @param __output Pointer to position where final result is written to
* @param __bound Maximum number of elements processed (e. g. for * @param __bound Maximum number of elements processed (e. g. for
* std::count_n()). * std::count_n()).
* @return User-supplied functor (that may contain a part of the result). * @return User-supplied functor (that may contain a part of the result).
*/ */
template<typename _RAIter, template<typename _RAIter,
typename _Op, typename _Op,
typename _Fu, typename _Fu,
typename _Red, typename _Red,
typename _Result> typename _Result>
_Op _Op
__for_each_template_random_access_workstealing( __for_each_template_random_access_workstealing(_RAIter __begin,
_RAIter __begin, _RAIter __end, _Op __op, _Fu& __f, _Red __r, _RAIter __end, _Op __op,
_Result __base, _Result& __output, _Fu& __f, _Red __r,
typename std::iterator_traits<_RAIter>::difference_type __bound) _Result __base,
{ _Result& __output,
_GLIBCXX_CALL(__end - __begin) typename std::iterator_traits<_RAIter>::difference_type __bound)
{
typedef std::iterator_traits<_RAIter> _TraitsType; _GLIBCXX_CALL(__end - __begin)
typedef typename _TraitsType::difference_type _DifferenceType;
typedef std::iterator_traits<_RAIter> _TraitsType;
const _Settings& __s = _Settings::get(); typedef typename _TraitsType::difference_type _DifferenceType;
_DifferenceType __chunk_size = const _Settings& __s = _Settings::get();
static_cast<_DifferenceType>(__s.workstealing_chunk_size);
_DifferenceType __chunk_size =
// How many jobs? static_cast<_DifferenceType>(__s.workstealing_chunk_size);
_DifferenceType __length = (__bound < 0) ? (__end - __begin) : __bound;
// How many jobs?
// To avoid false sharing in a cache line. _DifferenceType __length = (__bound < 0) ? (__end - __begin) : __bound;
const int __stride =
__s.cache_line_size * 10 / sizeof(_Job<_DifferenceType>) + 1; // To avoid false sharing in a cache line.
const int __stride = (__s.cache_line_size * 10
// Total number of threads currently working. / sizeof(_Job<_DifferenceType>) + 1);
_ThreadIndex __busy = 0;
// Total number of threads currently working.
_Job<_DifferenceType> *__job; _ThreadIndex __busy = 0;
omp_lock_t __output_lock; _Job<_DifferenceType> *__job;
omp_init_lock(&__output_lock);
omp_lock_t __output_lock;
// Write base value to output. omp_init_lock(&__output_lock);
__output = __base;
// Write base value to output.
// No more threads than jobs, at least one thread. __output = __base;
_ThreadIndex __num_threads =
__gnu_parallel::max<_ThreadIndex>(1, // No more threads than jobs, at least one thread.
__gnu_parallel::min<_DifferenceType>(__length, __get_max_threads())); _ThreadIndex __num_threads = __gnu_parallel::max<_ThreadIndex>
(1, __gnu_parallel::min<_DifferenceType>(__length,
# pragma omp parallel shared(__busy) num_threads(__num_threads) __get_max_threads()));
# pragma omp parallel shared(__busy) num_threads(__num_threads)
{ {
# pragma omp single # pragma omp single
{ {
__num_threads = omp_get_num_threads(); __num_threads = omp_get_num_threads();
// Create job description array. // Create job description array.
__job = new _Job<_DifferenceType>[__num_threads * __stride]; __job = new _Job<_DifferenceType>[__num_threads * __stride];
} }
// Initialization phase. // Initialization phase.
// Flags for every thread if it is doing productive work. // Flags for every thread if it is doing productive work.
bool __iam_working = false; bool __iam_working = false;
// Thread id. // Thread id.
_ThreadIndex __iam = omp_get_thread_num(); _ThreadIndex __iam = omp_get_thread_num();
// This job. // This job.
_Job<_DifferenceType>& __my_job = __job[__iam * __stride]; _Job<_DifferenceType>& __my_job = __job[__iam * __stride];
// Random number (for work stealing). // Random number (for work stealing).
_ThreadIndex __victim; _ThreadIndex __victim;
// Local value for reduction. // Local value for reduction.
_Result __result = _Result(); _Result __result = _Result();
// Number of elements to steal in one attempt. // Number of elements to steal in one attempt.
_DifferenceType __steal; _DifferenceType __steal;
// Every thread has its own random number generator // Every thread has its own random number generator
// (modulo __num_threads). // (modulo __num_threads).
_RandomNumber rand_gen(__iam, __num_threads); _RandomNumber __rand_gen(__iam, __num_threads);
// This thread is currently working. // This thread is currently working.
# pragma omp atomic # pragma omp atomic
++__busy; ++__busy;
__iam_working = true; __iam_working = true;
// How many jobs per thread? last thread gets the rest. // How many jobs per thread? last thread gets the rest.
__my_job._M_first = __my_job._M_first = static_cast<_DifferenceType>
static_cast<_DifferenceType>(__iam * (__length / __num_threads)); (__iam * (__length / __num_threads));
__my_job._M_last = (__iam == (__num_threads - 1)) ? __my_job._M_last = (__iam == (__num_threads - 1)
(__length - 1) : ((__iam + 1) * (__length / __num_threads) - 1); ? (__length - 1)
__my_job._M_load = __my_job._M_last - __my_job._M_first + 1; : ((__iam + 1) * (__length / __num_threads) - 1));
__my_job._M_load = __my_job._M_last - __my_job._M_first + 1;
// Init result with _M_first value (to have a base value for reduction) // Init result with _M_first value (to have a base value for reduction)
if (__my_job._M_first <= __my_job._M_last) if (__my_job._M_first <= __my_job._M_last)
{ {
// Cannot use volatile variable directly. // Cannot use volatile variable directly.
_DifferenceType __my_first = __my_job._M_first; _DifferenceType __my_first = __my_job._M_first;
__result = __f(__op, __begin + __my_first); __result = __f(__op, __begin + __my_first);
++__my_job._M_first; ++__my_job._M_first;
--__my_job._M_load; --__my_job._M_load;
} }
_RAIter __current; _RAIter __current;
# pragma omp barrier # pragma omp barrier
// Actual work phase // Actual work phase
// Work on own or stolen current start // Work on own or stolen current start
while (__busy > 0) while (__busy > 0)
{ {
// Work until no productive thread left. // Work until no productive thread left.
# pragma omp flush(__busy) # pragma omp flush(__busy)
// Thread has own work to do // Thread has own work to do
while (__my_job._M_first <= __my_job._M_last) while (__my_job._M_first <= __my_job._M_last)
{ {
// fetch-and-add call // fetch-and-add call
// Reserve current job block (size __chunk_size) in my queue. // Reserve current job block (size __chunk_size) in my queue.
_DifferenceType __current_job = _DifferenceType __current_job =
__fetch_and_add<_DifferenceType>( __fetch_and_add<_DifferenceType>(&(__my_job._M_first),
&(__my_job._M_first), __chunk_size); __chunk_size);
// Update _M_load, to make the three values consistent, // Update _M_load, to make the three values consistent,
// _M_first might have been changed in the meantime // _M_first might have been changed in the meantime
__my_job._M_load = __my_job._M_last - __my_job._M_first + 1; __my_job._M_load = __my_job._M_last - __my_job._M_first + 1;
for (_DifferenceType __job_counter = 0; for (_DifferenceType __job_counter = 0;
__job_counter < __chunk_size __job_counter < __chunk_size
&& __current_job <= __my_job._M_last; && __current_job <= __my_job._M_last;
++__job_counter) ++__job_counter)
{ {
// Yes: process it! // Yes: process it!
__current = __begin + __current_job; __current = __begin + __current_job;
++__current_job; ++__current_job;
// Do actual work. // Do actual work.
__result = __r(__result, __f(__op, __current)); __result = __r(__result, __f(__op, __current));
} }
# pragma omp flush(__busy) # pragma omp flush(__busy)
} }
// After reaching this point, a thread's __job list is empty. // After reaching this point, a thread's __job list is empty.
if (__iam_working) if (__iam_working)
{ {
// This thread no longer has work. // This thread no longer has work.
# pragma omp atomic # pragma omp atomic
--__busy; --__busy;
__iam_working = false; __iam_working = false;
} }
_DifferenceType __supposed_first, __supposed_last, __supposed_load; _DifferenceType __supposed_first, __supposed_last,
do __supposed_load;
{ do
// Find random nonempty deque (not own), do consistency check. {
__yield(); // Find random nonempty deque (not own), do consistency check.
__yield();
# pragma omp flush(__busy) # pragma omp flush(__busy)
__victim = rand_gen(); __victim = __rand_gen();
__supposed_first = __job[__victim * __stride]._M_first; __supposed_first = __job[__victim * __stride]._M_first;
__supposed_last = __job[__victim * __stride]._M_last; __supposed_last = __job[__victim * __stride]._M_last;
__supposed_load = __job[__victim * __stride]._M_load; __supposed_load = __job[__victim * __stride]._M_load;
} }
while (__busy > 0 while (__busy > 0
&& ((__supposed_load <= 0) && ((__supposed_load <= 0)
|| ((__supposed_first + __supposed_load - 1) || ((__supposed_first + __supposed_load - 1)
!= __supposed_last))); != __supposed_last)));
if (__busy == 0) if (__busy == 0)
break; break;
if (__supposed_load > 0) if (__supposed_load > 0)
{ {
// Has work and work to do. // Has work and work to do.
// Number of elements to steal (at least one). // Number of elements to steal (at least one).
__steal = (__supposed_load < 2) ? 1 : __supposed_load / 2; __steal = (__supposed_load < 2) ? 1 : __supposed_load / 2;
// Push __victim's current start forward. // Push __victim's current start forward.
_DifferenceType __stolen_first = _DifferenceType __stolen_first =
__fetch_and_add<_DifferenceType>( __fetch_and_add<_DifferenceType>
&(__job[__victim * __stride]._M_first), __steal); (&(__job[__victim * __stride]._M_first), __steal);
_DifferenceType __stolen_try = _DifferenceType __stolen_try = (__stolen_first + __steal
__stolen_first + __steal - _DifferenceType(1); - _DifferenceType(1));
__my_job._M_first = __stolen_first; __my_job._M_first = __stolen_first;
__my_job._M_last = __my_job._M_last = __gnu_parallel::min(__stolen_try,
__gnu_parallel::min(__stolen_try, __supposed_last); __supposed_last);
__my_job._M_load = __my_job._M_last - __my_job._M_first + 1; __my_job._M_load = __my_job._M_last - __my_job._M_first + 1;
// Has potential work again. // Has potential work again.
# pragma omp atomic # pragma omp atomic
++__busy; ++__busy;
__iam_working = true; __iam_working = true;
# pragma omp flush(__busy) # pragma omp flush(__busy)
} }
# pragma omp flush(__busy) # pragma omp flush(__busy)
} // end while __busy > 0 } // end while __busy > 0
// Add accumulated result to output. // Add accumulated result to output.
omp_set_lock(&__output_lock); omp_set_lock(&__output_lock);
__output = __r(__output, __result); __output = __r(__output, __result);
omp_unset_lock(&__output_lock); omp_unset_lock(&__output_lock);
} }
delete[] __job; delete[] __job;
// Points to last element processed (needed as return value for // Points to last element processed (needed as return value for
// some algorithms like transform) // some algorithms like transform)
__f._M_finish_iterator = __begin + __length; __f._M_finish_iterator = __begin + __length;
omp_destroy_lock(&__output_lock); omp_destroy_lock(&__output_lock);
return __op; return __op;
} }
} // end namespace } // end namespace
#endif /* _GLIBCXX_PARALLEL_WORKSTEALING_H */ #endif /* _GLIBCXX_PARALLEL_WORKSTEALING_H */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment