Commit 77d16198 by Paolo Carlini Committed by Paolo Carlini

multiway_merge.h: Simple formatting and uglification fixes.

2009-11-06  Paolo Carlini  <paolo.carlini@oracle.com>

	* include/parallel/multiway_merge.h: Simple formatting and
	uglification fixes.
	* include/parallel/find_selectors.h: Likewise.
	* include/parallel/losertree.h: Likewise.
	* include/parallel/list_partition.h: Likewise.
	* include/parallel/for_each.h: Likewise.
	* include/parallel/multiseq_selection.h: Likewise.
	* include/parallel/workstealing.h: Likewise.
	* include/parallel/par_loop.h: Likewise.
	* include/parallel/numeric: Likewise.
	* include/parallel/quicksort.h: Likewise.
	* include/parallel/equally_split.h: Likewise.
	* include/parallel/omp_loop_static.h: Likewise.
	* include/parallel/random_shuffle.h: Likewise.
	* include/parallel/balanced_quicksort.h: Likewise.
	* include/parallel/tags.h: Likewise.
	* include/parallel/set_operations.h: Likewise.
	* include/parallel/merge.h: Likewise.
	* include/parallel/unique_copy.h: Likewise.
	* include/parallel/multiway_mergesort.h: Likewise.
	* include/parallel/search.h: Likewise.
	* include/parallel/partition.h: Likewise.
	* include/parallel/partial_sum.h: Likewise.
	* include/parallel/find.h: Likewise.
	* include/parallel/queue.h: Likewise.
	* include/parallel/omp_loop.h: Likewise.
	* include/parallel/checkers.h: Likewise.
	* include/parallel/sort.h: Likewise.

From-SVN: r153966
parent b169fe9d
2009-11-06 Paolo Carlini <paolo.carlini@oracle.com>
* include/parallel/multiway_merge.h: Simple formatting and
uglification fixes.
* include/parallel/find_selectors.h: Likewise.
* include/parallel/losertree.h: Likewise.
* include/parallel/list_partition.h: Likewise.
* include/parallel/for_each.h: Likewise.
* include/parallel/multiseq_selection.h: Likewise.
* include/parallel/workstealing.h: Likewise.
* include/parallel/par_loop.h: Likewise.
* include/parallel/numeric: Likewise.
* include/parallel/quicksort.h: Likewise.
* include/parallel/equally_split.h: Likewise.
* include/parallel/omp_loop_static.h: Likewise.
* include/parallel/random_shuffle.h: Likewise.
* include/parallel/balanced_quicksort.h: Likewise.
* include/parallel/tags.h: Likewise.
* include/parallel/set_operations.h: Likewise.
* include/parallel/merge.h: Likewise.
* include/parallel/unique_copy.h: Likewise.
* include/parallel/multiway_mergesort.h: Likewise.
* include/parallel/search.h: Likewise.
* include/parallel/partition.h: Likewise.
* include/parallel/partial_sum.h: Likewise.
* include/parallel/find.h: Likewise.
* include/parallel/queue.h: Likewise.
* include/parallel/omp_loop.h: Likewise.
* include/parallel/checkers.h: Likewise.
* include/parallel/sort.h: Likewise.
2009-11-06 Jonathan Wakely <jwakely.gcc@gmail.com>
PR libstdc++/41949
......
......@@ -57,8 +57,8 @@
namespace __gnu_parallel
{
/** @brief Information local to one thread in the parallel quicksort run. */
template<typename _RAIter>
/** @brief Information local to one thread in the parallel quicksort run. */
template<typename _RAIter>
struct _QSBThreadLocal
{
typedef std::iterator_traits<_RAIter> _TraitsType;
......@@ -88,14 +88,14 @@ template<typename _RAIter>
_QSBThreadLocal(int __queue_size) : _M_leftover_parts(__queue_size) { }
};
/** @brief Balanced quicksort divide step.
/** @brief Balanced quicksort divide step.
* @param __begin Begin iterator of subsequence.
* @param __end End iterator of subsequence.
* @param __comp Comparator.
* @param __num_threads Number of threads that are allowed to work on
* this part.
* @pre @__c (__end-__begin)>=1 */
template<typename _RAIter, typename _Compare>
template<typename _RAIter, typename _Compare>
typename std::iterator_traits<_RAIter>::difference_type
__qsb_divide(_RAIter __begin, _RAIter __end,
_Compare __comp, _ThreadIndex __num_threads)
......@@ -114,19 +114,20 @@ template<typename _RAIter, typename _Compare>
// Must be in between somewhere.
_DifferenceType __n = __end - __begin;
_GLIBCXX_PARALLEL_ASSERT(
(!__comp(*__pivot_pos, *__begin) &&
!__comp(*(__begin + __n / 2), *__pivot_pos))
|| (!__comp(*__pivot_pos, *__begin) &&
!__comp(*(__end - 1), *__pivot_pos))
|| (!__comp(*__pivot_pos, *(__begin + __n / 2)) &&
!__comp(*__begin, *__pivot_pos))
|| (!__comp(*__pivot_pos, *(__begin + __n / 2)) &&
!__comp(*(__end - 1), *__pivot_pos))
|| (!__comp(*__pivot_pos, *(__end - 1)) &&
!__comp(*__begin, *__pivot_pos))
|| (!__comp(*__pivot_pos, *(__end - 1)) &&
!__comp(*(__begin + __n / 2), *__pivot_pos)));
_GLIBCXX_PARALLEL_ASSERT((!__comp(*__pivot_pos, *__begin)
&& !__comp(*(__begin + __n / 2),
*__pivot_pos))
|| (!__comp(*__pivot_pos, *__begin)
&& !__comp(*(__end - 1), *__pivot_pos))
|| (!__comp(*__pivot_pos, *(__begin + __n / 2))
&& !__comp(*__begin, *__pivot_pos))
|| (!__comp(*__pivot_pos, *(__begin + __n / 2))
&& !__comp(*(__end - 1), *__pivot_pos))
|| (!__comp(*__pivot_pos, *(__end - 1))
&& !__comp(*__begin, *__pivot_pos))
|| (!__comp(*__pivot_pos, *(__end - 1))
&& !__comp(*(__begin + __n / 2),
*__pivot_pos)));
#endif
// Swap pivot value to end.
......@@ -138,8 +139,9 @@ template<typename _RAIter, typename _Compare>
__pred(__comp, *__pivot_pos);
// Divide, returning __end - __begin - 1 in the worst case.
_DifferenceType __split_pos = __parallel_partition(
__begin, __end - 1, __pred, __num_threads);
_DifferenceType __split_pos = __parallel_partition(__begin, __end - 1,
__pred,
__num_threads);
// Swap back pivot to middle.
std::swap(*(__begin + __split_pos), *__pivot_pos);
......@@ -156,7 +158,7 @@ template<typename _RAIter, typename _Compare>
return __split_pos;
}
/** @brief Quicksort conquer step.
/** @brief Quicksort conquer step.
* @param __tls Array of thread-local storages.
* @param __begin Begin iterator of subsequence.
* @param __end End iterator of subsequence.
......@@ -164,7 +166,7 @@ template<typename _RAIter, typename _Compare>
* @param __iam Number of the thread processing this function.
* @param __num_threads
* Number of threads that are allowed to work on this part. */
template<typename _RAIter, typename _Compare>
template<typename _RAIter, typename _Compare>
void
__qsb_conquer(_QSBThreadLocal<_RAIter>** __tls,
_RAIter __begin, _RAIter __end,
......@@ -197,9 +199,10 @@ template<typename _RAIter, typename _Compare>
__split_pos < (__end - __begin));
#endif
_ThreadIndex __num_threads_leftside =
std::max<_ThreadIndex>(1, std::min<_ThreadIndex>(
__num_threads - 1, __split_pos * __num_threads / __n));
_ThreadIndex
__num_threads_leftside = std::max<_ThreadIndex>
(1, std::min<_ThreadIndex>(__num_threads - 1, __split_pos
* __num_threads / __n));
# pragma omp atomic
*__tls[__iam]->_M_elements_leftover -= (_DifferenceType)1;
......@@ -218,9 +221,7 @@ template<typename _RAIter, typename _Compare>
# pragma omp section
{
__qsb_conquer(__tls, __begin, __begin + __split_pos, __comp,
__iam,
__num_threads_leftside,
__wait);
__iam, __num_threads_leftside, __wait);
__wait = __parent_wait;
}
// The pivot_pos is left in place, to ensure termination.
......@@ -228,21 +229,20 @@ template<typename _RAIter, typename _Compare>
{
__qsb_conquer(__tls, __begin + __split_pos + 1, __end, __comp,
__iam + __num_threads_leftside,
__num_threads - __num_threads_leftside,
__wait);
__num_threads - __num_threads_leftside, __wait);
__wait = __parent_wait;
}
}
}
}
/**
/**
* @brief Quicksort step doing load-balanced local sort.
* @param __tls Array of thread-local storages.
* @param __comp Comparator.
* @param __iam Number of the thread processing this function.
*/
template<typename _RAIter, typename _Compare>
template<typename _RAIter, typename _Compare>
void
__qsb_local_sort_with_helping(_QSBThreadLocal<_RAIter>** __tls,
_Compare& __comp, int __iam, bool __wait)
......@@ -254,8 +254,8 @@ template<typename _RAIter, typename _Compare>
_QSBThreadLocal<_RAIter>& __tl = *__tls[__iam];
_DifferenceType __base_case_n =
_Settings::get().sort_qsb_base_case_maximal_n;
_DifferenceType
__base_case_n = _Settings::get().sort_qsb_base_case_maximal_n;
if (__base_case_n < 2)
__base_case_n = 2;
_ThreadIndex __num_threads = __tl._M_num_threads;
......@@ -292,8 +292,8 @@ template<typename _RAIter, typename _Compare>
// Divide, leave pivot unchanged in last place.
_RAIter __split_pos1, __split_pos2;
__split_pos1 =
__gnu_sequential::partition(__begin, __end - 1, __pred);
__split_pos1 = __gnu_sequential::partition(__begin, __end - 1,
__pred);
// Left side: < __pivot_pos; __right side: >= __pivot_pos.
#if _GLIBCXX_ASSERTIONS
......@@ -314,8 +314,8 @@ template<typename _RAIter, typename _Compare>
__gnu_parallel::__unary_negate<__gnu_parallel::__binder1st
<_Compare, _ValueType, _ValueType, bool>, _ValueType>
__pred(__gnu_parallel::__binder1st
<_Compare, _ValueType, _ValueType, bool>(
__comp, *__pivot_pos));
<_Compare, _ValueType, _ValueType, bool>
(__comp, *__pivot_pos));
// Find other end of pivot-equal range.
__split_pos2 = __gnu_sequential::partition(__split_pos1 + 1,
......@@ -335,8 +335,8 @@ template<typename _RAIter, typename _Compare>
{
// Right side larger.
if ((__split_pos2) != __end)
__tl._M_leftover_parts.push_front(
std::make_pair(__split_pos2, __end));
__tl._M_leftover_parts.push_front
(std::make_pair(__split_pos2, __end));
//__current.first = __begin; //already set anyway
__current.second = __split_pos1;
......@@ -346,8 +346,8 @@ template<typename _RAIter, typename _Compare>
{
// Left side larger.
if (__begin != __split_pos1)
__tl._M_leftover_parts.push_front(std::make_pair(__begin,
__split_pos1));
__tl._M_leftover_parts.push_front(std::make_pair
(__begin, __split_pos1));
__current.first = __split_pos2;
//__current.second = __end; //already set anyway
......@@ -417,18 +417,17 @@ template<typename _RAIter, typename _Compare>
}
}
/** @brief Top-level quicksort routine.
/** @brief Top-level quicksort routine.
* @param __begin Begin iterator of sequence.
* @param __end End iterator of sequence.
* @param __comp Comparator.
* @param __num_threads Number of threads that are allowed to work on
* this part.
*/
template<typename _RAIter, typename _Compare>
template<typename _RAIter, typename _Compare>
void
__parallel_sort_qsb(_RAIter __begin, _RAIter __end,
_Compare __comp,
_ThreadIndex __num_threads)
_Compare __comp, _ThreadIndex __num_threads)
{
_GLIBCXX_CALL(__end - __begin)
......@@ -450,20 +449,20 @@ template<typename _RAIter, typename _Compare>
// Initialize thread local storage
_TLSType** __tls = new _TLSType*[__num_threads];
_DifferenceType __queue_size =
__num_threads * (_ThreadIndex)(log2(__n) + 1);
_DifferenceType __queue_size = (__num_threads
* (_ThreadIndex)(__rd_log2(__n) + 1));
for (_ThreadIndex __t = 0; __t < __num_threads; ++__t)
__tls[__t] = new _QSBThreadLocal<_RAIter>(__queue_size);
// There can never be more than ceil(log2(__n)) ranges on the stack,
// because
// There can never be more than ceil(__rd_log2(__n)) ranges on the
// stack, because
// 1. Only one processor pushes onto the stack
// 2. The largest range has at most length __n
// 3. Each range is larger than half of the range remaining
volatile _DifferenceType _M_elements_leftover = __n;
volatile _DifferenceType __elements_leftover = __n;
for (int __i = 0; __i < __num_threads; ++__i)
{
__tls[__i]->_M_elements_leftover = &_M_elements_leftover;
__tls[__i]->_M_elements_leftover = &__elements_leftover;
__tls[__i]->_M_num_threads = __num_threads;
__tls[__i]->_M_global = std::make_pair(__begin, __end);
......@@ -472,8 +471,8 @@ template<typename _RAIter, typename _Compare>
}
// Main recursion call.
__qsb_conquer(
__tls, __begin, __begin + __n, __comp, 0, __num_threads, true);
__qsb_conquer(__tls, __begin, __begin + __n, __comp, 0,
__num_threads, true);
#if _GLIBCXX_ASSERTIONS
// All stack must be empty.
......
......@@ -68,5 +68,6 @@ namespace __gnu_parallel
return true;
}
}
#endif /* _GLIBCXX_PARALLEL_CHECKERS_H */
// -*- C++ -*-
// Copyright (C) 2007, 2009 Free Software Foundation, Inc.
// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the terms
......@@ -33,17 +33,17 @@
namespace __gnu_parallel
{
/** @brief function to split a sequence into parts of almost equal size.
/** @brief function to split a sequence into parts of almost equal size.
*
* The resulting sequence __s of length __num_threads+1 contains the splitting
* positions when splitting the range [0,__n) into parts of almost
* equal size (plus minus 1). The first entry is 0, the last one
* n. There may result empty parts.
* The resulting sequence __s of length __num_threads+1 contains the
* splitting positions when splitting the range [0,__n) into parts of
* almost equal size (plus minus 1). The first entry is 0, the last
* one n. There may result empty parts.
* @param __n Number of elements
* @param __num_threads Number of parts
* @param __s Splitters
* @returns End of __splitter sequence, i.e. @__c __s+__num_threads+1 */
template<typename _DifferenceType, typename _OutputIterator>
template<typename _DifferenceType, typename _OutputIterator>
_OutputIterator
equally_split(_DifferenceType __n, _ThreadIndex __num_threads,
_OutputIterator __s)
......@@ -54,15 +54,14 @@ template<typename _DifferenceType, typename _OutputIterator>
for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
{
*__s++ = __pos;
__pos += (__i < __num_longer_chunks) ?
(__chunk_length + 1) : __chunk_length;
__pos += ((__i < __num_longer_chunks)
? (__chunk_length + 1) : __chunk_length);
}
*__s++ = __n;
return __s;
}
/** @brief function to split a sequence into parts of almost equal size.
/** @brief function to split a sequence into parts of almost equal size.
*
* Returns the position of the splitting point between
* thread number __thread_no (included) and
......@@ -70,7 +69,7 @@ template<typename _DifferenceType, typename _OutputIterator>
* @param __n Number of elements
* @param __num_threads Number of parts
* @returns splitting point */
template<typename _DifferenceType>
template<typename _DifferenceType>
_DifferenceType
equally_split_point(_DifferenceType __n,
_ThreadIndex __num_threads,
......
......@@ -42,17 +42,17 @@
namespace __gnu_parallel
{
/**
/**
* @brief Parallel std::find, switch for different algorithms.
* @param __begin1 Begin iterator of first sequence.
* @param __end1 End iterator of first sequence.
* @param __begin2 Begin iterator of second sequence. Must have same
* length as first sequence.
* @param __pred Find predicate.
* @param __selector _Functionality (e. g. std::find_if (), std::equal(),...)
* @param __selector _Functionality (e. g. std::find_if(), std::equal(),...)
* @return Place of finding in both sequences.
*/
template<typename _RAIter1,
template<typename _RAIter1,
typename _RAIter2,
typename _Pred,
typename _Selector>
......@@ -63,14 +63,14 @@ template<typename _RAIter1,
switch (_Settings::get().find_algorithm)
{
case GROWING_BLOCKS:
return __find_template(__begin1, __end1, __begin2, __pred, __selector,
growing_blocks_tag());
return __find_template(__begin1, __end1, __begin2, __pred,
__selector, growing_blocks_tag());
case CONSTANT_SIZE_BLOCKS:
return __find_template(__begin1, __end1, __begin2, __pred, __selector,
constant_size_blocks_tag());
return __find_template(__begin1, __end1, __begin2, __pred,
__selector, constant_size_blocks_tag());
case EQUAL_SPLIT:
return __find_template(__begin1, __end1, __begin2, __pred, __selector,
equal_split_tag());
return __find_template(__begin1, __end1, __begin2, __pred,
__selector, equal_split_tag());
default:
_GLIBCXX_PARALLEL_ASSERT(false);
return std::make_pair(__begin1, __begin2);
......@@ -79,27 +79,24 @@ template<typename _RAIter1,
#if _GLIBCXX_FIND_EQUAL_SPLIT
/**
/**
* @brief Parallel std::find, equal splitting variant.
* @param __begin1 Begin iterator of first sequence.
* @param __end1 End iterator of first sequence.
* @param __begin2 Begin iterator of second sequence. Second __sequence
* must have same length as first sequence.
* @param __pred Find predicate.
* @param __selector _Functionality (e. g. std::find_if (), std::equal(),...)
* @param __selector _Functionality (e. g. std::find_if(), std::equal(),...)
* @return Place of finding in both sequences.
*/
template<typename _RAIter1,
template<typename _RAIter1,
typename _RAIter2,
typename _Pred,
typename _Selector>
std::pair<_RAIter1, _RAIter2>
__find_template(_RAIter1 __begin1,
_RAIter1 __end1,
_RAIter2 __begin2,
_Pred __pred,
_Selector __selector,
equal_split_tag)
__find_template(_RAIter1 __begin1, _RAIter1 __end1,
_RAIter2 __begin2, _Pred __pred,
_Selector __selector, equal_split_tag)
{
_GLIBCXX_CALL(__end1 - __begin1)
......@@ -132,7 +129,7 @@ template<typename _RAIter1,
_RAIter2 __i2 = __begin2 + __start;
for (_DifferenceType __pos = __start; __pos < __stop; ++__pos)
{
#pragma omp flush(__result)
# pragma omp flush(__result)
// Result has been set to something lower.
if (__result < __pos)
break;
......@@ -153,22 +150,22 @@ template<typename _RAIter1,
omp_destroy_lock(&__result_lock);
delete[] __borders;
return
std::pair<_RAIter1, _RAIter2>(__begin1 + __result, __begin2 + __result);
return std::pair<_RAIter1, _RAIter2>(__begin1 + __result,
__begin2 + __result);
}
#endif
#if _GLIBCXX_FIND_GROWING_BLOCKS
/**
/**
* @brief Parallel std::find, growing block size variant.
* @param __begin1 Begin iterator of first sequence.
* @param __end1 End iterator of first sequence.
* @param __begin2 Begin iterator of second sequence. Second __sequence
* must have same length as first sequence.
* @param __pred Find predicate.
* @param __selector _Functionality (e. g. std::find_if (), std::equal(),...)
* @param __selector _Functionality (e. g. std::find_if(), std::equal(),...)
* @return Place of finding in both sequences.
* @see __gnu_parallel::_Settings::find_sequential_search_size
* @see __gnu_parallel::_Settings::find_initial_block_size
......@@ -182,7 +179,7 @@ template<typename _RAIter1,
* for CSB, the blocks are allocated in a predetermined manner,
* namely spacial round-robin.
*/
template<typename _RAIter1,
template<typename _RAIter1,
typename _RAIter2,
typename _Pred,
typename _Selector>
......@@ -201,13 +198,15 @@ template<typename _RAIter1,
_DifferenceType __length = __end1 - __begin1;
_DifferenceType __sequential_search_size =
std::min<_DifferenceType>(__length, __s.find_sequential_search_size);
_DifferenceType
__sequential_search_size = std::min<_DifferenceType>
(__length, __s.find_sequential_search_size);
// Try it sequentially first.
std::pair<_RAIter1, _RAIter2> __find_seq_result =
__selector._M_sequential_algorithm(
__begin1, __begin1 + __sequential_search_size, __begin2, __pred);
std::pair<_RAIter1, _RAIter2>
__find_seq_result = __selector._M_sequential_algorithm
(__begin1, __begin1 + __sequential_search_size,
__begin2, __pred);
if (__find_seq_result.first != (__begin1 + __sequential_search_size))
return __find_seq_result;
......@@ -229,8 +228,8 @@ template<typename _RAIter1,
_ThreadIndex __iam = omp_get_thread_num();
_DifferenceType __block_size = __s.find_initial_block_size;
_DifferenceType __start =
__fetch_and_add<_DifferenceType>(&__next_block_start, __block_size);
_DifferenceType __start = __fetch_and_add<_DifferenceType>
(&__next_block_start, __block_size);
// Get new block, update pointer to next block.
_DifferenceType __stop =
......@@ -248,9 +247,10 @@ template<typename _RAIter1,
break;
}
__local_result = __selector._M_sequential_algorithm(
__begin1 + __start, __begin1 + __stop,
__local_result = __selector._M_sequential_algorithm
(__begin1 + __start, __begin1 + __stop,
__begin2 + __start, __pred);
if (__local_result.first != (__begin1 + __stop))
{
omp_set_lock(&__result_lock);
......@@ -259,21 +259,20 @@ template<typename _RAIter1,
__result = __local_result.first - __begin1;
// Result cannot be in future blocks, stop algorithm.
__fetch_and_add<_DifferenceType>(
&__next_block_start, __length);
__fetch_and_add<_DifferenceType>(&__next_block_start,
__length);
}
omp_unset_lock(&__result_lock);
}
__block_size = std::min<_DifferenceType>(
__block_size * __s.find_increasing_factor,
__block_size = std::min<_DifferenceType>
(__block_size * __s.find_increasing_factor,
__s.find_maximum_block_size);
// Get new block, update pointer to next block.
__start =
__fetch_and_add<_DifferenceType>(
&__next_block_start, __block_size);
__stop = ((__length < (__start + __block_size))
__start = __fetch_and_add<_DifferenceType>(&__next_block_start,
__block_size);
__stop = (__length < (__start + __block_size)
? __length : (__start + __block_size));
}
} //parallel
......@@ -282,21 +281,22 @@ template<typename _RAIter1,
// Return iterator on found element.
return
std::pair<_RAIter1, _RAIter2>(__begin1 + __result, __begin2 + __result);
std::pair<_RAIter1, _RAIter2>(__begin1 + __result,
__begin2 + __result);
}
#endif
#if _GLIBCXX_FIND_CONSTANT_SIZE_BLOCKS
/**
/**
* @brief Parallel std::find, constant block size variant.
* @param __begin1 Begin iterator of first sequence.
* @param __end1 End iterator of first sequence.
* @param __begin2 Begin iterator of second sequence. Second __sequence
* must have same length as first sequence.
* @param __pred Find predicate.
* @param __selector _Functionality (e. g. std::find_if (), std::equal(),...)
* @param __selector _Functionality (e. g. std::find_if(), std::equal(),...)
* @return Place of finding in both sequences.
* @see __gnu_parallel::_Settings::find_sequential_search_size
* @see __gnu_parallel::_Settings::find_block_size
......@@ -307,7 +307,7 @@ template<typename _RAIter1,
* blocks are allocated in a predetermined manner, namely spacial
* round-robin.
*/
template<typename _RAIter1,
template<typename _RAIter1,
typename _RAIter2,
typename _Pred,
typename _Selector>
......@@ -325,13 +325,13 @@ template<typename _RAIter1,
_DifferenceType __length = __end1 - __begin1;
_DifferenceType __sequential_search_size = std::min<_DifferenceType>(
__length, __s.find_sequential_search_size);
_DifferenceType __sequential_search_size = std::min<_DifferenceType>
(__length, __s.find_sequential_search_size);
// Try it sequentially first.
std::pair<_RAIter1, _RAIter2> __find_seq_result =
__selector._M_sequential_algorithm(
__begin1, __begin1 + __sequential_search_size, __begin2, __pred);
std::pair<_RAIter1, _RAIter2>
__find_seq_result = __selector._M_sequential_algorithm
(__begin1, __begin1 + __sequential_search_size, __begin2, __pred);
if (__find_seq_result.first != (__begin1 + __sequential_search_size))
return __find_seq_result;
......@@ -356,8 +356,9 @@ template<typename _RAIter1,
// Where to work (initialization).
_DifferenceType __start = __iteration_start + __iam * __block_size;
_DifferenceType __stop =
std::min<_DifferenceType>(__length, __start + __block_size);
_DifferenceType __stop = std::min<_DifferenceType>(__length,
__start
+ __block_size);
std::pair<_RAIter1, _RAIter2> __local_result;
......@@ -368,9 +369,11 @@ template<typename _RAIter1,
// No chance to find first element.
if (__result < __start)
break;
__local_result = __selector._M_sequential_algorithm(
__begin1 + __start, __begin1 + __stop,
__local_result = __selector._M_sequential_algorithm
(__begin1 + __start, __begin1 + __stop,
__begin2 + __start, __pred);
if (__local_result.first != (__begin1 + __stop))
{
omp_set_lock(&__result_lock);
......@@ -385,16 +388,16 @@ template<typename _RAIter1,
// Where to work.
__start = __iteration_start + __iam * __block_size;
__stop = std::min<_DifferenceType>(
__length, __start + __block_size);
__stop = std::min<_DifferenceType>(__length,
__start + __block_size);
}
} //parallel
omp_destroy_lock(&__result_lock);
// Return iterator on found element.
return
std::pair<_RAIter1, _RAIter2>(__begin1 + __result, __begin2 + __result);
return std::pair<_RAIter1, _RAIter2>(__begin1 + __result,
__begin2 + __result);
}
#endif
} // end namespace
......
......@@ -143,8 +143,8 @@ namespace __gnu_parallel
_M_sequential_algorithm(_RAIter1 __begin1,
_RAIter1 __end1,
_RAIter2 __begin2, _Pred __pred)
{ return mismatch(__begin1, __end1, __begin2, __pred, sequential_tag());
}
{ return mismatch(__begin1, __end1, __begin2,
__pred, sequential_tag()); }
};
......@@ -155,7 +155,8 @@ namespace __gnu_parallel
_FIterator _M_begin;
_FIterator _M_end;
explicit __find_first_of_selector(_FIterator __begin, _FIterator __end)
explicit __find_first_of_selector(_FIterator __begin,
_FIterator __end)
: _M_begin(__begin), _M_end(__end) { }
/** @brief Test on one position.
......@@ -186,8 +187,8 @@ namespace __gnu_parallel
_RAIter1 __end1,
_RAIter2 __begin2, _Pred __pred)
{
return std::make_pair(
find_first_of(__begin1, __end1, _M_begin, _M_end, __pred,
return std::make_pair(find_first_of(__begin1, __end1,
_M_begin, _M_end, __pred,
sequential_tag()), __begin2);
}
};
......
......@@ -69,31 +69,21 @@ namespace __gnu_parallel
_Parallelism __parallelism_tag)
{
if (__parallelism_tag == parallel_unbalanced)
return __for_each_template_random_access_ed(__begin, __end, __user_op,
__functionality, __reduction,
__reduction_start,
__output, __bound);
return __for_each_template_random_access_ed
(__begin, __end, __user_op, __functionality, __reduction,
__reduction_start, __output, __bound);
else if (__parallelism_tag == parallel_omp_loop)
return __for_each_template_random_access_omp_loop(
__begin, __end, __user_op,
__functionality,
__reduction,
__reduction_start,
__output, __bound);
return __for_each_template_random_access_omp_loop
(__begin, __end, __user_op, __functionality, __reduction,
__reduction_start, __output, __bound);
else if (__parallelism_tag == parallel_omp_loop_static)
return __for_each_template_random_access_omp_loop(
__begin, __end, __user_op,
__functionality,
__reduction,
__reduction_start,
__output, __bound);
return __for_each_template_random_access_omp_loop
(__begin, __end, __user_op, __functionality, __reduction,
__reduction_start, __output, __bound);
else //e. g. parallel_balanced
return __for_each_template_random_access_workstealing(__begin, __end,
__user_op,
__functionality,
__reduction,
__reduction_start,
__output, __bound);
return __for_each_template_random_access_workstealing
(__begin, __end, __user_op, __functionality, __reduction,
__reduction_start, __output, __bound);
}
}
......
......@@ -52,7 +52,7 @@ namespace __gnu_parallel
const bool __make_twice)
{
++__count_to_two;
if (not __make_twice or __count_to_two < 2)
if (!__make_twice || __count_to_two < 2)
__shrink(__os_starts, __count_to_two, __range_length);
else
{
......@@ -112,7 +112,7 @@ namespace __gnu_parallel
std::vector<_IIter> __os_starts(2 * __oversampling * __num_parts + 1);
__os_starts[0]= __begin;
__os_starts[0] = __begin;
_IIter __prev = __begin, __it = __begin;
size_t __dist_limit = 0, __dist = 0;
size_t __cur = 1, __next = 1;
......
......@@ -40,7 +40,7 @@
namespace __gnu_parallel
{
/**
/**
* @brief Guarded loser/tournament tree.
*
* The smallest element is at the top.
......@@ -52,7 +52,7 @@ namespace __gnu_parallel
* @param _Tp the element type
* @param _Compare the comparator to use, defaults to std::less<_Tp>
*/
template<typename _Tp, typename _Compare>
template<typename _Tp, typename _Compare>
class _LoserTreeBase
{
protected:
......@@ -105,8 +105,8 @@ template<typename _Tp, typename _Compare>
_M_offset = _M_k;
// Avoid default-constructing _M_losers[]._M_key
_M_losers
= static_cast<_Loser*>(::operator new(2 * _M_k * sizeof(_Loser)));
_M_losers = static_cast<_Loser*>(::operator new(2 * _M_k
* sizeof(_Loser)));
for (unsigned int __i = _M_ik - 1; __i < _M_k; ++__i)
_M_losers[__i + _M_k]._M_sup = true;
......@@ -120,30 +120,30 @@ template<typename _Tp, typename _Compare>
{ ::operator delete(_M_losers); }
/**
* @brief Initializes the sequence "_M_source" with the element "_M_key".
* @brief Initializes the sequence "_M_source" with the element "__key".
*
* @param _M_key the element to insert
* @param _M_source __index of the __source __sequence
* @param _M_sup flag that determines whether the value to insert is an
* @param __key the element to insert
* @param __source __index of the __source __sequence
* @param __sup flag that determines whether the value to insert is an
* explicit __supremum.
*/
void
__insert_start(const _Tp& _M_key, int _M_source, bool _M_sup)
__insert_start(const _Tp& __key, int __source, bool __sup)
{
unsigned int __pos = _M_k + _M_source;
unsigned int __pos = _M_k + __source;
if(_M_first_insert)
{
// Construct all keys, so we can easily deconstruct them.
for (unsigned int __i = 0; __i < (2 * _M_k); ++__i)
new(&(_M_losers[__i]._M_key)) _Tp(_M_key);
new(&(_M_losers[__i]._M_key)) _Tp(__key);
_M_first_insert = false;
}
else
new(&(_M_losers[__pos]._M_key)) _Tp(_M_key);
new(&(_M_losers[__pos]._M_key)) _Tp(__key);
_M_losers[__pos]._M_sup = _M_sup;
_M_losers[__pos]._M_source = _M_source;
_M_losers[__pos]._M_sup = __sup;
_M_losers[__pos]._M_source = __source;
}
/**
......@@ -161,7 +161,7 @@ template<typename _Tp, typename _Compare>
*
* Unstable variant is done using partial specialisation below.
*/
template<bool __stable/* default == true */, typename _Tp,
template<bool __stable/* default == true */, typename _Tp,
typename _Compare>
class _LoserTree
: public _LoserTreeBase<_Tp, _Compare>
......@@ -183,8 +183,8 @@ template<bool __stable/* default == true */, typename _Tp,
return __root;
else
{
unsigned int __left = __init_winner (2 * __root);
unsigned int __right = __init_winner (2 * __root + 1);
unsigned int __left = __init_winner(2 * __root);
unsigned int __right = __init_winner(2 * __root + 1);
if (_M_losers[__right]._M_sup
|| (!_M_losers[__left]._M_sup
&& !_M_comp(_M_losers[__right]._M_key,
......@@ -212,38 +212,38 @@ template<bool __stable/* default == true */, typename _Tp,
*
* This implementation is stable.
*/
// Do not pass a const reference since _M_key will be used as
// Do not pass a const reference since __key will be used as
// local variable.
void
__delete_min_insert(_Tp _M_key, bool _M_sup)
__delete_min_insert(_Tp __key, bool __sup)
{
#if _GLIBCXX_ASSERTIONS
// no dummy sequence can ever be at the top!
_GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1);
#endif
int _M_source = _M_losers[0]._M_source;
for (unsigned int __pos = (_M_k + _M_source) / 2; __pos > 0;
int __source = _M_losers[0]._M_source;
for (unsigned int __pos = (_M_k + __source) / 2; __pos > 0;
__pos /= 2)
{
// The smaller one gets promoted, ties are broken by _M_source.
if ((_M_sup && (!_M_losers[__pos]._M_sup
|| _M_losers[__pos]._M_source < _M_source))
|| (!_M_sup && !_M_losers[__pos]._M_sup
&& ((_M_comp(_M_losers[__pos]._M_key, _M_key))
|| (!_M_comp(_M_key, _M_losers[__pos]._M_key)
&& _M_losers[__pos]._M_source < _M_source))))
if ((__sup && (!_M_losers[__pos]._M_sup
|| _M_losers[__pos]._M_source < __source))
|| (!__sup && !_M_losers[__pos]._M_sup
&& ((_M_comp(_M_losers[__pos]._M_key, __key))
|| (!_M_comp(__key, _M_losers[__pos]._M_key)
&& _M_losers[__pos]._M_source < __source))))
{
// The other one is smaller.
std::swap(_M_losers[__pos]._M_sup, _M_sup);
std::swap(_M_losers[__pos]._M_source, _M_source);
std::swap(_M_losers[__pos]._M_key, _M_key);
std::swap(_M_losers[__pos]._M_sup, __sup);
std::swap(_M_losers[__pos]._M_source, __source);
std::swap(_M_losers[__pos]._M_key, __key);
}
}
_M_losers[0]._M_sup = _M_sup;
_M_losers[0]._M_source = _M_source;
_M_losers[0]._M_key = _M_key;
_M_losers[0]._M_sup = __sup;
_M_losers[0]._M_source = __source;
_M_losers[0]._M_key = __key;
}
};
......@@ -252,7 +252,7 @@ template<bool __stable/* default == true */, typename _Tp,
*
* Stability (non-stable here) is selected with partial specialization.
*/
template<typename _Tp, typename _Compare>
template<typename _Tp, typename _Compare>
class _LoserTree</* __stable == */false, _Tp, _Compare>
: public _LoserTreeBase<_Tp, _Compare>
{
......@@ -281,8 +281,8 @@ template<typename _Tp, typename _Compare>
return __root;
else
{
unsigned int __left = __init_winner (2 * __root);
unsigned int __right = __init_winner (2 * __root + 1);
unsigned int __left = __init_winner(2 * __root);
unsigned int __right = __init_winner(2 * __root + 1);
if (_M_losers[__right]._M_sup
|| (!_M_losers[__left]._M_sup
&& !_M_comp(_M_losers[__right]._M_key,
......@@ -306,47 +306,47 @@ template<typename _Tp, typename _Compare>
{ _M_losers[0] = _M_losers[__init_winner(1)]; }
/**
* Delete the _M_key smallest element and insert the element _M_key
* Delete the _M_key smallest element and insert the element __key
* instead.
*
* @param _M_key the _M_key to insert
* @param _M_sup true iff _M_key is an explicitly marked supremum
* @param __key the _M_key to insert
* @param __sup true iff __key is an explicitly marked supremum
*/
// Do not pass a const reference since _M_key will be used as local
// Do not pass a const reference since __key will be used as local
// variable.
void
__delete_min_insert(_Tp _M_key, bool _M_sup)
__delete_min_insert(_Tp __key, bool __sup)
{
#if _GLIBCXX_ASSERTIONS
// no dummy sequence can ever be at the top!
_GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1);
#endif
int _M_source = _M_losers[0]._M_source;
for (unsigned int __pos = (_M_k + _M_source) / 2; __pos > 0;
int __source = _M_losers[0]._M_source;
for (unsigned int __pos = (_M_k + __source) / 2; __pos > 0;
__pos /= 2)
{
// The smaller one gets promoted.
if (_M_sup || (!_M_losers[__pos]._M_sup
&& _M_comp(_M_losers[__pos]._M_key, _M_key)))
if (__sup || (!_M_losers[__pos]._M_sup
&& _M_comp(_M_losers[__pos]._M_key, __key)))
{
// The other one is smaller.
std::swap(_M_losers[__pos]._M_sup, _M_sup);
std::swap(_M_losers[__pos]._M_source, _M_source);
std::swap(_M_losers[__pos]._M_key, _M_key);
std::swap(_M_losers[__pos]._M_sup, __sup);
std::swap(_M_losers[__pos]._M_source, __source);
std::swap(_M_losers[__pos]._M_key, __key);
}
}
_M_losers[0]._M_sup = _M_sup;
_M_losers[0]._M_source = _M_source;
_M_losers[0]._M_key = _M_key;
_M_losers[0]._M_sup = __sup;
_M_losers[0]._M_source = __source;
_M_losers[0]._M_key = __key;
}
};
/**
/**
* @brief Base class of _Loser Tree implementation using pointers.
*/
template<typename _Tp, typename _Compare>
template<typename _Tp, typename _Compare>
class _LoserTreePointerBase
{
protected:
......@@ -383,22 +383,22 @@ template<typename _Tp, typename _Compare>
int __get_min_source()
{ return _M_losers[0]._M_source; }
void __insert_start(const _Tp& _M_key, int _M_source, bool _M_sup)
void __insert_start(const _Tp& __key, int __source, bool __sup)
{
unsigned int __pos = _M_k + _M_source;
unsigned int __pos = _M_k + __source;
_M_losers[__pos]._M_sup = _M_sup;
_M_losers[__pos]._M_source = _M_source;
_M_losers[__pos]._M_keyp = &_M_key;
_M_losers[__pos]._M_sup = __sup;
_M_losers[__pos]._M_source = __source;
_M_losers[__pos]._M_keyp = &__key;
}
};
/**
/**
* @brief Stable _LoserTree implementation.
*
* The unstable variant is implemented using partial instantiation below.
*/
template<bool __stable/* default == true */, typename _Tp, typename _Compare>
template<bool __stable/* default == true */, typename _Tp, typename _Compare>
class _LoserTreePointer
: public _LoserTreePointerBase<_Tp, _Compare>
{
......@@ -418,8 +418,8 @@ template<bool __stable/* default == true */, typename _Tp, typename _Compare>
return __root;
else
{
unsigned int __left = __init_winner (2 * __root);
unsigned int __right = __init_winner (2 * __root + 1);
unsigned int __left = __init_winner(2 * __root);
unsigned int __right = __init_winner(2 * __root + 1);
if (_M_losers[__right]._M_sup
|| (!_M_losers[__left]._M_sup
&& !_M_comp(*_M_losers[__right]._M_keyp,
......@@ -441,44 +441,45 @@ template<bool __stable/* default == true */, typename _Tp, typename _Compare>
void __init()
{ _M_losers[0] = _M_losers[__init_winner(1)]; }
void __delete_min_insert(const _Tp& _M_key, bool _M_sup)
void __delete_min_insert(const _Tp& __key, bool __sup)
{
#if _GLIBCXX_ASSERTIONS
// no dummy sequence can ever be at the top!
_GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1);
#endif
const _Tp* _M_keyp = &_M_key;
int _M_source = _M_losers[0]._M_source;
for (unsigned int __pos = (_M_k + _M_source) / 2; __pos > 0; __pos /= 2)
const _Tp* __keyp = &__key;
int __source = _M_losers[0]._M_source;
for (unsigned int __pos = (_M_k + __source) / 2; __pos > 0;
__pos /= 2)
{
// The smaller one gets promoted, ties are broken by _M_source.
if ((_M_sup && (!_M_losers[__pos]._M_sup ||
_M_losers[__pos]._M_source < _M_source)) ||
(!_M_sup && !_M_losers[__pos]._M_sup &&
((_M_comp(*_M_losers[__pos]._M_keyp, *_M_keyp)) ||
(!_M_comp(*_M_keyp, *_M_losers[__pos]._M_keyp)
&& _M_losers[__pos]._M_source < _M_source))))
// The smaller one gets promoted, ties are broken by __source.
if ((__sup && (!_M_losers[__pos]._M_sup
|| _M_losers[__pos]._M_source < __source))
|| (!__sup && !_M_losers[__pos]._M_sup &&
((_M_comp(*_M_losers[__pos]._M_keyp, *__keyp))
|| (!_M_comp(*__keyp, *_M_losers[__pos]._M_keyp)
&& _M_losers[__pos]._M_source < __source))))
{
// The other one is smaller.
std::swap(_M_losers[__pos]._M_sup, _M_sup);
std::swap(_M_losers[__pos]._M_source, _M_source);
std::swap(_M_losers[__pos]._M_keyp, _M_keyp);
std::swap(_M_losers[__pos]._M_sup, __sup);
std::swap(_M_losers[__pos]._M_source, __source);
std::swap(_M_losers[__pos]._M_keyp, __keyp);
}
}
_M_losers[0]._M_sup = _M_sup;
_M_losers[0]._M_source = _M_source;
_M_losers[0]._M_keyp = _M_keyp;
_M_losers[0]._M_sup = __sup;
_M_losers[0]._M_source = __source;
_M_losers[0]._M_keyp = __keyp;
}
};
/**
/**
* @brief Unstable _LoserTree implementation.
*
* The stable variant is above.
*/
template<typename _Tp, typename _Compare>
template<typename _Tp, typename _Compare>
class _LoserTreePointer</* __stable == */false, _Tp, _Compare>
: public _LoserTreePointerBase<_Tp, _Compare>
{
......@@ -498,8 +499,8 @@ template<typename _Tp, typename _Compare>
return __root;
else
{
unsigned int __left = __init_winner (2 * __root);
unsigned int __right = __init_winner (2 * __root + 1);
unsigned int __left = __init_winner(2 * __root);
unsigned int __right = __init_winner(2 * __root + 1);
if (_M_losers[__right]._M_sup
|| (!_M_losers[__left]._M_sup
&& !_M_comp(*_M_losers[__right]._M_keyp,
......@@ -521,36 +522,36 @@ template<typename _Tp, typename _Compare>
void __init()
{ _M_losers[0] = _M_losers[__init_winner(1)]; }
void __delete_min_insert(const _Tp& _M_key, bool _M_sup)
void __delete_min_insert(const _Tp& __key, bool __sup)
{
#if _GLIBCXX_ASSERTIONS
// no dummy sequence can ever be at the top!
_GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1);
#endif
const _Tp* _M_keyp = &_M_key;
int _M_source = _M_losers[0]._M_source;
for (unsigned int __pos = (_M_k + _M_source) / 2; __pos > 0;
const _Tp* __keyp = &__key;
int __source = _M_losers[0]._M_source;
for (unsigned int __pos = (_M_k + __source) / 2; __pos > 0;
__pos /= 2)
{
// The smaller one gets promoted.
if (_M_sup || (!_M_losers[__pos]._M_sup
&& _M_comp(*_M_losers[__pos]._M_keyp, *_M_keyp)))
if (__sup || (!_M_losers[__pos]._M_sup
&& _M_comp(*_M_losers[__pos]._M_keyp, *__keyp)))
{
// The other one is smaller.
std::swap(_M_losers[__pos]._M_sup, _M_sup);
std::swap(_M_losers[__pos]._M_source, _M_source);
std::swap(_M_losers[__pos]._M_keyp, _M_keyp);
std::swap(_M_losers[__pos]._M_sup, __sup);
std::swap(_M_losers[__pos]._M_source, __source);
std::swap(_M_losers[__pos]._M_keyp, __keyp);
}
}
_M_losers[0]._M_sup = _M_sup;
_M_losers[0]._M_source = _M_source;
_M_losers[0]._M_keyp = _M_keyp;
_M_losers[0]._M_sup = __sup;
_M_losers[0]._M_source = __source;
_M_losers[0]._M_keyp = __keyp;
}
};
/** @brief Base class for unguarded _LoserTree implementation.
/** @brief Base class for unguarded _LoserTree implementation.
*
* The whole element is copied into the tree structure.
*
......@@ -560,7 +561,7 @@ template<typename _Tp, typename _Compare>
*
* This is a very fast variant.
*/
template<typename _Tp, typename _Compare>
template<typename _Tp, typename _Compare>
class _LoserTreeUnguardedBase
{
protected:
......@@ -575,7 +576,7 @@ template<typename _Tp, typename _Compare>
_Compare _M_comp;
public:
_LoserTreeUnguardedBase(unsigned int __k, const _Tp _sentinel,
_LoserTreeUnguardedBase(unsigned int __k, const _Tp __sentinel,
_Compare __comp = std::less<_Tp>())
: _M_comp(__comp)
{
......@@ -585,12 +586,12 @@ template<typename _Tp, typename _Compare>
_M_k = 1 << (__rd_log2(_M_ik - 1) + 1);
_M_offset = _M_k;
// Avoid default-constructing _M_losers[]._M_key
_M_losers
= static_cast<_Loser*>(::operator new(2 * _M_k * sizeof(_Loser)));
_M_losers = static_cast<_Loser*>(::operator new(2 * _M_k
* sizeof(_Loser)));
for (unsigned int __i = _M_k + _M_ik - 1; __i < (2 * _M_k); ++__i)
{
_M_losers[__i]._M_key = _sentinel;
_M_losers[__i]._M_key = __sentinel;
_M_losers[__i]._M_source = -1;
}
}
......@@ -609,21 +610,21 @@ template<typename _Tp, typename _Compare>
}
void
__insert_start(const _Tp& _M_key, int _M_source, bool)
__insert_start(const _Tp& __key, int __source, bool)
{
unsigned int __pos = _M_k + _M_source;
unsigned int __pos = _M_k + __source;
new(&(_M_losers[__pos]._M_key)) _Tp(_M_key);
_M_losers[__pos]._M_source = _M_source;
new(&(_M_losers[__pos]._M_key)) _Tp(__key);
_M_losers[__pos]._M_source = __source;
}
};
/**
/**
* @brief Stable implementation of unguarded _LoserTree.
*
* Unstable variant is selected below with partial specialization.
*/
template<bool __stable/* default == true */, typename _Tp, typename _Compare>
template<bool __stable/* default == true */, typename _Tp, typename _Compare>
class _LoserTreeUnguarded
: public _LoserTreeUnguardedBase<_Tp, _Compare>
{
......@@ -631,10 +632,10 @@ template<bool __stable/* default == true */, typename _Tp, typename _Compare>
using _Base::_M_k;
using _Base::_M_losers;
public:
_LoserTreeUnguarded(unsigned int __k, const _Tp _sentinel,
public:
_LoserTreeUnguarded(unsigned int __k, const _Tp __sentinel,
_Compare __comp = std::less<_Tp>())
: _Base::_LoserTreeUnguardedBase(__k, _sentinel, __comp)
: _Base::_LoserTreeUnguardedBase(__k, __sentinel, __comp)
{ }
unsigned int
......@@ -644,9 +645,10 @@ public:
return __root;
else
{
unsigned int __left = __init_winner (2 * __root);
unsigned int __right = __init_winner (2 * __root + 1);
if (!_M_comp(_M_losers[__right]._M_key, _M_losers[__left]._M_key))
unsigned int __left = __init_winner(2 * __root);
unsigned int __right = __init_winner(2 * __root + 1);
if (!_M_comp(_M_losers[__right]._M_key,
_M_losers[__left]._M_key))
{
// Left one is less or equal.
_M_losers[__root] = _M_losers[__right];
......@@ -673,41 +675,42 @@ public:
#endif
}
// Do not pass a const reference since _M_key will be used as
// Do not pass a const reference since __key will be used as
// local variable.
void
__delete_min_insert(_Tp _M_key, bool)
__delete_min_insert(_Tp __key, bool)
{
#if _GLIBCXX_ASSERTIONS
// no dummy sequence can ever be at the top!
_GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1);
#endif
int _M_source = _M_losers[0]._M_source;
for (unsigned int __pos = (_M_k + _M_source) / 2; __pos > 0; __pos /= 2)
int __source = _M_losers[0]._M_source;
for (unsigned int __pos = (_M_k + __source) / 2; __pos > 0;
__pos /= 2)
{
// The smaller one gets promoted, ties are broken by _M_source.
if (_M_comp(_M_losers[__pos]._M_key, _M_key)
|| (!_M_comp(_M_key, _M_losers[__pos]._M_key)
&& _M_losers[__pos]._M_source < _M_source))
if (_M_comp(_M_losers[__pos]._M_key, __key)
|| (!_M_comp(__key, _M_losers[__pos]._M_key)
&& _M_losers[__pos]._M_source < __source))
{
// The other one is smaller.
std::swap(_M_losers[__pos]._M_source, _M_source);
std::swap(_M_losers[__pos]._M_key, _M_key);
std::swap(_M_losers[__pos]._M_source, __source);
std::swap(_M_losers[__pos]._M_key, __key);
}
}
_M_losers[0]._M_source = _M_source;
_M_losers[0]._M_key = _M_key;
_M_losers[0]._M_source = __source;
_M_losers[0]._M_key = __key;
}
};
/**
/**
* @brief Non-Stable implementation of unguarded _LoserTree.
*
* Stable implementation is above.
*/
template<typename _Tp, typename _Compare>
template<typename _Tp, typename _Compare>
class _LoserTreeUnguarded</* __stable == */false, _Tp, _Compare>
: public _LoserTreeUnguardedBase<_Tp, _Compare>
{
......@@ -715,10 +718,10 @@ template<typename _Tp, typename _Compare>
using _Base::_M_k;
using _Base::_M_losers;
public:
_LoserTreeUnguarded(unsigned int __k, const _Tp _sentinel,
public:
_LoserTreeUnguarded(unsigned int __k, const _Tp __sentinel,
_Compare __comp = std::less<_Tp>())
: _Base::_LoserTreeUnguardedBase(__k, _sentinel, __comp)
: _Base::_LoserTreeUnguardedBase(__k, __sentinel, __comp)
{ }
unsigned int
......@@ -728,8 +731,8 @@ public:
return __root;
else
{
unsigned int __left = __init_winner (2 * __root);
unsigned int __right = __init_winner (2 * __root + 1);
unsigned int __left = __init_winner(2 * __root);
unsigned int __right = __init_winner(2 * __root + 1);
#if _GLIBCXX_ASSERTIONS
// If __left one is sentinel then __right one must be, too.
......@@ -737,7 +740,8 @@ public:
_GLIBCXX_PARALLEL_ASSERT(_M_losers[__right]._M_source == -1);
#endif
if (!_M_comp(_M_losers[__right]._M_key, _M_losers[__left]._M_key))
if (!_M_comp(_M_losers[__right]._M_key,
_M_losers[__left]._M_key))
{
// Left one is less or equal.
_M_losers[__root] = _M_losers[__right];
......@@ -764,40 +768,41 @@ public:
#endif
}
// Do not pass a const reference since _M_key will be used as
// Do not pass a const reference since __key will be used as
// local variable.
void
__delete_min_insert(_Tp _M_key, bool)
__delete_min_insert(_Tp __key, bool)
{
#if _GLIBCXX_ASSERTIONS
// no dummy sequence can ever be at the top!
_GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1);
#endif
int _M_source = _M_losers[0]._M_source;
for (unsigned int __pos = (_M_k + _M_source) / 2; __pos > 0; __pos /= 2)
int __source = _M_losers[0]._M_source;
for (unsigned int __pos = (_M_k + __source) / 2; __pos > 0;
__pos /= 2)
{
// The smaller one gets promoted.
if (_M_comp(_M_losers[__pos]._M_key, _M_key))
if (_M_comp(_M_losers[__pos]._M_key, __key))
{
// The other one is smaller.
std::swap(_M_losers[__pos]._M_source, _M_source);
std::swap(_M_losers[__pos]._M_key, _M_key);
std::swap(_M_losers[__pos]._M_source, __source);
std::swap(_M_losers[__pos]._M_key, __key);
}
}
_M_losers[0]._M_source = _M_source;
_M_losers[0]._M_key = _M_key;
_M_losers[0]._M_source = __source;
_M_losers[0]._M_key = __key;
}
};
/** @brief Unguarded loser tree, keeping only pointers to the
* elements in the tree structure.
*
* No guarding is done, therefore not a single input sequence must
* run empty. This is a very fast variant.
*/
template<typename _Tp, typename _Compare>
/** @brief Unguarded loser tree, keeping only pointers to the
* elements in the tree structure.
*
* No guarding is done, therefore not a single input sequence must
* run empty. This is a very fast variant.
*/
template<typename _Tp, typename _Compare>
class _LoserTreePointerUnguardedBase
{
protected:
......@@ -813,7 +818,7 @@ template<typename _Tp, typename _Compare>
public:
_LoserTreePointerUnguardedBase(unsigned int __k, const _Tp& _sentinel,
_LoserTreePointerUnguardedBase(unsigned int __k, const _Tp& __sentinel,
_Compare __comp = std::less<_Tp>())
: _M_comp(__comp)
{
......@@ -827,7 +832,7 @@ template<typename _Tp, typename _Compare>
for (unsigned int __i = _M_k + _M_ik - 1; __i < (2 * _M_k); ++__i)
{
_M_losers[__i]._M_keyp = &_sentinel;
_M_losers[__i]._M_keyp = &__sentinel;
_M_losers[__i]._M_source = -1;
}
}
......@@ -846,21 +851,21 @@ template<typename _Tp, typename _Compare>
}
void
__insert_start(const _Tp& _M_key, int _M_source, bool)
__insert_start(const _Tp& __key, int __source, bool)
{
unsigned int __pos = _M_k + _M_source;
unsigned int __pos = _M_k + __source;
_M_losers[__pos]._M_keyp = &_M_key;
_M_losers[__pos]._M_source = _M_source;
_M_losers[__pos]._M_keyp = &__key;
_M_losers[__pos]._M_source = __source;
}
};
/**
/**
* @brief Stable unguarded _LoserTree variant storing pointers.
*
* Unstable variant is implemented below using partial specialization.
*/
template<bool __stable/* default == true */, typename _Tp, typename _Compare>
template<bool __stable/* default == true */, typename _Tp, typename _Compare>
class _LoserTreePointerUnguarded
: public _LoserTreePointerUnguardedBase<_Tp, _Compare>
{
......@@ -869,9 +874,9 @@ template<bool __stable/* default == true */, typename _Tp, typename _Compare>
using _Base::_M_losers;
public:
_LoserTreePointerUnguarded(unsigned int __k, const _Tp& _sentinel,
_LoserTreePointerUnguarded(unsigned int __k, const _Tp& __sentinel,
_Compare __comp = std::less<_Tp>())
: _Base::_LoserTreePointerUnguardedBase(__k, _sentinel, __comp)
: _Base::_LoserTreePointerUnguardedBase(__k, __sentinel, __comp)
{ }
unsigned int
......@@ -881,8 +886,8 @@ template<bool __stable/* default == true */, typename _Tp, typename _Compare>
return __root;
else
{
unsigned int __left = __init_winner (2 * __root);
unsigned int __right = __init_winner (2 * __root + 1);
unsigned int __left = __init_winner(2 * __root);
unsigned int __right = __init_winner(2 * __root + 1);
if (!_M_comp(*_M_losers[__right]._M_keyp,
*_M_losers[__left]._M_keyp))
{
......@@ -912,39 +917,40 @@ template<bool __stable/* default == true */, typename _Tp, typename _Compare>
}
void
__delete_min_insert(const _Tp& _M_key, bool _M_sup)
__delete_min_insert(const _Tp& __key, bool __sup)
{
#if _GLIBCXX_ASSERTIONS
// no dummy sequence can ever be at the top!
_GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1);
#endif
const _Tp* _M_keyp = &_M_key;
int _M_source = _M_losers[0]._M_source;
for (unsigned int __pos = (_M_k + _M_source) / 2; __pos > 0; __pos /= 2)
const _Tp* __keyp = &__key;
int __source = _M_losers[0]._M_source;
for (unsigned int __pos = (_M_k + __source) / 2; __pos > 0;
__pos /= 2)
{
// The smaller one gets promoted, ties are broken by _M_source.
if (_M_comp(*_M_losers[__pos]._M_keyp, *_M_keyp)
|| (!_M_comp(*_M_keyp, *_M_losers[__pos]._M_keyp)
&& _M_losers[__pos]._M_source < _M_source))
if (_M_comp(*_M_losers[__pos]._M_keyp, *__keyp)
|| (!_M_comp(*__keyp, *_M_losers[__pos]._M_keyp)
&& _M_losers[__pos]._M_source < __source))
{
// The other one is smaller.
std::swap(_M_losers[__pos]._M_source, _M_source);
std::swap(_M_losers[__pos]._M_keyp, _M_keyp);
std::swap(_M_losers[__pos]._M_source, __source);
std::swap(_M_losers[__pos]._M_keyp, __keyp);
}
}
_M_losers[0]._M_source = _M_source;
_M_losers[0]._M_keyp = _M_keyp;
_M_losers[0]._M_source = __source;
_M_losers[0]._M_keyp = __keyp;
}
};
/**
/**
* @brief Unstable unguarded _LoserTree variant storing pointers.
*
* Stable variant is above.
*/
template<typename _Tp, typename _Compare>
template<typename _Tp, typename _Compare>
class _LoserTreePointerUnguarded</* __stable == */false, _Tp, _Compare>
: public _LoserTreePointerUnguardedBase<_Tp, _Compare>
{
......@@ -952,10 +958,10 @@ template<typename _Tp, typename _Compare>
using _Base::_M_k;
using _Base::_M_losers;
public:
_LoserTreePointerUnguarded(unsigned int __k, const _Tp& _sentinel,
public:
_LoserTreePointerUnguarded(unsigned int __k, const _Tp& __sentinel,
_Compare __comp = std::less<_Tp>())
: _Base::_LoserTreePointerUnguardedBase(__k, _sentinel, __comp)
: _Base::_LoserTreePointerUnguardedBase(__k, __sentinel, __comp)
{ }
unsigned int
......@@ -965,8 +971,8 @@ public:
return __root;
else
{
unsigned int __left = __init_winner (2 * __root);
unsigned int __right = __init_winner (2 * __root + 1);
unsigned int __left = __init_winner(2 * __root);
unsigned int __right = __init_winner(2 * __root + 1);
#if _GLIBCXX_ASSERTIONS
// If __left one is sentinel then __right one must be, too.
......@@ -1003,28 +1009,29 @@ public:
}
void
__delete_min_insert(const _Tp& _M_key, bool _M_sup)
__delete_min_insert(const _Tp& __key, bool __sup)
{
#if _GLIBCXX_ASSERTIONS
// no dummy sequence can ever be at the top!
_GLIBCXX_PARALLEL_ASSERT(_M_losers[0]._M_source != -1);
#endif
const _Tp* _M_keyp = &_M_key;
int _M_source = _M_losers[0]._M_source;
for (unsigned int __pos = (_M_k + _M_source) / 2; __pos > 0; __pos /= 2)
const _Tp* __keyp = &__key;
int __source = _M_losers[0]._M_source;
for (unsigned int __pos = (_M_k + __source) / 2; __pos > 0;
__pos /= 2)
{
// The smaller one gets promoted.
if (_M_comp(*(_M_losers[__pos]._M_keyp), *_M_keyp))
if (_M_comp(*(_M_losers[__pos]._M_keyp), *__keyp))
{
// The other one is smaller.
std::swap(_M_losers[__pos]._M_source, _M_source);
std::swap(_M_losers[__pos]._M_keyp, _M_keyp);
std::swap(_M_losers[__pos]._M_source, __source);
std::swap(_M_losers[__pos]._M_keyp, __keyp);
}
}
_M_losers[0]._M_source = _M_source;
_M_losers[0]._M_keyp = _M_keyp;
_M_losers[0]._M_source = __source;
_M_losers[0]._M_keyp = __keyp;
}
};
} // namespace __gnu_parallel
......
......@@ -54,10 +54,9 @@ namespace __gnu_parallel
typename _OutputIterator, typename _DifferenceTp,
typename _Compare>
_OutputIterator
__merge_advance_usual(_RAIter1& __begin1,
_RAIter1 __end1,
_RAIter2& __begin2,
_RAIter2 __end2, _OutputIterator __target,
__merge_advance_usual(_RAIter1& __begin1, _RAIter1 __end1,
_RAIter2& __begin2, _RAIter2 __end2,
_OutputIterator __target,
_DifferenceTp __max_length, _Compare __comp)
{
typedef _DifferenceTp _DifferenceType;
......@@ -103,10 +102,8 @@ namespace __gnu_parallel
typename _OutputIterator, typename _DifferenceTp,
typename _Compare>
_OutputIterator
__merge_advance_movc(_RAIter1& __begin1,
_RAIter1 __end1,
_RAIter2& __begin2,
_RAIter2 __end2,
__merge_advance_movc(_RAIter1& __begin1, _RAIter1 __end1,
_RAIter2& __begin2, _RAIter2 __end2,
_OutputIterator __target,
_DifferenceTp __max_length, _Compare __comp)
{
......@@ -178,8 +175,8 @@ namespace __gnu_parallel
{
_GLIBCXX_CALL(__max_length)
return __merge_advance_movc(__begin1, __end1, __begin2, __end2, __target,
__max_length, __comp);
return __merge_advance_movc(__begin1, __end1, __begin2, __end2,
__target, __max_length, __comp);
}
/** @brief Merge routine fallback to sequential in case the
......@@ -195,13 +192,11 @@ namespace __gnu_parallel
template<typename _RAIter1, typename _RAIter2,
typename _RAIter3, typename _Compare>
inline _RAIter3
__parallel_merge_advance(_RAIter1& __begin1,
_RAIter1 __end1,
__parallel_merge_advance(_RAIter1& __begin1, _RAIter1 __end1,
_RAIter2& __begin2,
// different iterators, parallel implementation
// not available
_RAIter2 __end2,
_RAIter3 __target, typename
_RAIter2 __end2, _RAIter3 __target, typename
std::iterator_traits<_RAIter1>::
difference_type __max_length, _Compare __comp)
{ return __merge_advance(__begin1, __end1, __begin2, __end2, __target,
......@@ -225,10 +220,8 @@ namespace __gnu_parallel
template<typename _RAIter1, typename _RAIter3,
typename _Compare>
inline _RAIter3
__parallel_merge_advance(_RAIter1& __begin1,
_RAIter1 __end1,
_RAIter1& __begin2,
_RAIter1 __end2,
__parallel_merge_advance(_RAIter1& __begin1, _RAIter1 __end1,
_RAIter1& __begin2, _RAIter1 __end2,
_RAIter3 __target, typename
std::iterator_traits<_RAIter1>::
difference_type __max_length, _Compare __comp)
......@@ -242,17 +235,14 @@ namespace __gnu_parallel
typedef typename std::pair<_RAIter1, _RAIter1>
_IteratorPair;
_IteratorPair
seqs[2] = { std::make_pair(__begin1, __end1),
_IteratorPair __seqs[2] = { std::make_pair(__begin1, __end1),
std::make_pair(__begin2, __end2) };
_RAIter3
__target_end = parallel_multiway_merge
< /* __stable = */ true, /* __sentinels = */ false>(
seqs, seqs + 2, __target,
multiway_merge_exact_splitting
_RAIter3 __target_end = parallel_multiway_merge
< /* __stable = */ true, /* __sentinels = */ false>
(__seqs, __seqs + 2, __target, multiway_merge_exact_splitting
< /* __stable = */ true, _IteratorPair*,
_Compare, _DifferenceType1>,
__max_length, __comp, omp_get_max_threads());
_Compare, _DifferenceType1>, __max_length, __comp,
omp_get_max_threads());
return __target_end;
}
......
......@@ -53,8 +53,8 @@ namespace __gnu_parallel
/** @brief Compare __a pair of types lexicographically, ascending. */
template<typename _T1, typename _T2, typename _Compare>
class _Lexicographic
: public std::binary_function<
std::pair<_T1, _T2>, std::pair<_T1, _T2>, bool>
: public std::binary_function<std::pair<_T1, _T2>,
std::pair<_T1, _T2>, bool>
{
private:
_Compare& _M_comp;
......@@ -142,19 +142,19 @@ namespace __gnu_parallel
// Number of sequences, number of elements in total (possibly
// including padding).
_DifferenceType __m = std::distance(__begin_seqs, __end_seqs), __N = 0,
_DifferenceType __m = std::distance(__begin_seqs, __end_seqs), __nn = 0,
__nmax, __n, __r;
for (int __i = 0; __i < __m; __i++)
{
__N += std::distance(__begin_seqs[__i].first,
__nn += std::distance(__begin_seqs[__i].first,
__begin_seqs[__i].second);
_GLIBCXX_PARALLEL_ASSERT(
std::distance(__begin_seqs[__i].first,
__begin_seqs[__i].second) > 0);
}
if (__rank == __N)
if (__rank == __nn)
{
for (int __i = 0; __i < __m; __i++)
__begin_offsets[__i] = __begin_seqs[__i].second; // Very end.
......@@ -163,9 +163,9 @@ namespace __gnu_parallel
}
_GLIBCXX_PARALLEL_ASSERT(__m != 0);
_GLIBCXX_PARALLEL_ASSERT(__N != 0);
_GLIBCXX_PARALLEL_ASSERT(__nn != 0);
_GLIBCXX_PARALLEL_ASSERT(__rank >= 0);
_GLIBCXX_PARALLEL_ASSERT(__rank < __N);
_GLIBCXX_PARALLEL_ASSERT(__rank < __nn);
_DifferenceType* __ns = new _DifferenceType[__m];
_DifferenceType* __a = new _DifferenceType[__m];
......@@ -401,14 +401,14 @@ namespace __gnu_parallel
// Number of sequences, number of elements in total (possibly
// including padding).
_DifferenceType __m = std::distance(__begin_seqs, __end_seqs);
_DifferenceType __N = 0;
_DifferenceType __nn = 0;
_DifferenceType __nmax, __n, __r;
for (int __i = 0; __i < __m; __i++)
__N += std::distance(__begin_seqs[__i].first,
__nn += std::distance(__begin_seqs[__i].first,
__begin_seqs[__i].second);
if (__m == 0 || __N == 0 || __rank < 0 || __rank >= __N)
if (__m == 0 || __nn == 0 || __rank < 0 || __rank >= __nn)
{
// result undefined if there is no data or __rank is outside bounds
throw std::exception();
......@@ -433,7 +433,7 @@ namespace __gnu_parallel
// Pad all lists to this length, at least as long as any ns[__i],
// equality iff __nmax = 2^__k - 1
__l = pow2(__r) - 1;
__l = __round_up_to_pow2(__r) - 1;
for (int __i = 0; __i < __m; ++__i)
{
......
......@@ -54,7 +54,7 @@
namespace __gnu_parallel
{
/** @brief _Iterator wrapper supporting an implicit supremum at the end
/** @brief _Iterator wrapper supporting an implicit supremum at the end
* of the sequence, dominating all comparisons.
*
* The implicit supremum comes with a performance cost.
......@@ -62,7 +62,7 @@ namespace __gnu_parallel
* Deriving from _RAIter is not possible since
* _RAIter need not be a class.
*/
template<typename _RAIter, typename _Compare>
template<typename _RAIter, typename _Compare>
class _GuardedIterator
{
private:
......@@ -113,11 +113,11 @@ template<typename _RAIter, typename _Compare>
operator<(_GuardedIterator<_RAIter, _Compare>& __bi1,
_GuardedIterator<_RAIter, _Compare>& __bi2)
{
if (__bi1._M_current == __bi1._M_end) //__bi1 is sup
return __bi2._M_current == __bi2._M_end; //__bi2 is not sup
if (__bi2._M_current == __bi2._M_end) //__bi2 is sup
if (__bi1._M_current == __bi1._M_end) // __bi1 is sup
return __bi2._M_current == __bi2._M_end; // __bi2 is not sup
if (__bi2._M_current == __bi2._M_end) // __bi2 is sup
return true;
return (__bi1.__comp)(*__bi1, *__bi2); //normal compare
return (__bi1.__comp)(*__bi1, *__bi2); // normal compare
}
/** @brief Compare two elements referenced by guarded iterators.
......@@ -128,15 +128,15 @@ template<typename _RAIter, typename _Compare>
operator<=(_GuardedIterator<_RAIter, _Compare>& __bi1,
_GuardedIterator<_RAIter, _Compare>& __bi2)
{
if (__bi2._M_current == __bi2._M_end) //__bi1 is sup
return __bi1._M_current != __bi1._M_end; //__bi2 is not sup
if (__bi1._M_current == __bi1._M_end) //__bi2 is sup
if (__bi2._M_current == __bi2._M_end) // __bi1 is sup
return __bi1._M_current != __bi1._M_end; // __bi2 is not sup
if (__bi1._M_current == __bi1._M_end) // __bi2 is sup
return false;
return !(__bi1.__comp)(*__bi2, *__bi1); //normal compare
return !(__bi1.__comp)(*__bi2, *__bi1); // normal compare
}
};
template<typename _RAIter, typename _Compare>
template<typename _RAIter, typename _Compare>
class _UnguardedIterator
{
private:
......@@ -200,7 +200,7 @@ template<typename _RAIter, typename _Compare>
}
};
/** @brief Highly efficient 3-way merging procedure.
/** @brief Highly efficient 3-way merging procedure.
*
* Merging is done with the algorithm implementation described by Peter
* Sanders. Basically, the idea is to minimize the number of necessary
......@@ -225,7 +225,7 @@ template<typename _RAIter, typename _Compare>
*
* @return End iterator of output sequence.
*/
template<template<typename RAI, typename C> class iterator,
template<template<typename RAI, typename C> class iterator,
typename _RAIterIterator,
typename _RAIter3,
typename _DifferenceTp,
......@@ -250,7 +250,7 @@ template<template<typename RAI, typename C> class iterator,
return __target;
#if _GLIBCXX_ASSERTIONS
_DifferenceTp orig_length = __length;
_DifferenceTp __orig_length = __length;
#endif
iterator<_RAIter1, _Compare>
......@@ -280,15 +280,15 @@ template<template<typename RAI, typename C> class iterator,
else
goto __s210;
}
#define _GLIBCXX_PARALLEL_MERGE_3_CASE(__a,__b,__c,c0,c1) \
#define _GLIBCXX_PARALLEL_MERGE_3_CASE(__a, __b, __c, __c0, __c1) \
__s ## __a ## __b ## __c : \
*__target = *__seq ## __a; \
++__target; \
--__length; \
++__seq ## __a; \
if (__length == 0) goto finish; \
if (__seq ## __a c0 __seq ## __b) goto __s ## __a ## __b ## __c; \
if (__seq ## __a c1 __seq ## __c) goto __s ## __b ## __a ## __c; \
if (__length == 0) goto __finish; \
if (__seq ## __a __c0 __seq ## __b) goto __s ## __a ## __b ## __c; \
if (__seq ## __a __c1 __seq ## __c) goto __s ## __b ## __a ## __c; \
goto __s ## __b ## __c ## __a;
_GLIBCXX_PARALLEL_MERGE_3_CASE(0, 1, 2, <=, <=);
......@@ -300,7 +300,7 @@ template<template<typename RAI, typename C> class iterator,
#undef _GLIBCXX_PARALLEL_MERGE_3_CASE
finish:
__finish:
;
#if _GLIBCXX_ASSERTIONS
......@@ -308,7 +308,7 @@ template<template<typename RAI, typename C> class iterator,
((_RAIter1)__seq0 - __seqs_begin[0].first) +
((_RAIter1)__seq1 - __seqs_begin[1].first) +
((_RAIter1)__seq2 - __seqs_begin[2].first)
== orig_length);
== __orig_length);
#endif
__seqs_begin[0].first = __seq0;
......@@ -318,7 +318,7 @@ template<template<typename RAI, typename C> class iterator,
return __target;
}
/**
/**
* @brief Highly efficient 4-way merging procedure.
*
* Merging is done with the algorithm implementation described by Peter
......@@ -344,7 +344,7 @@ template<template<typename RAI, typename C> class iterator,
*
* @return End iterator of output sequence.
*/
template<template<typename RAI, typename C> class iterator,
template<template<typename RAI, typename C> class iterator,
typename _RAIterIterator,
typename _RAIter3,
typename _DifferenceTp,
......@@ -370,11 +370,14 @@ template<template<typename RAI, typename C> class iterator,
__seq2(__seqs_begin[2].first, __seqs_begin[2].second, __comp),
__seq3(__seqs_begin[3].first, __seqs_begin[3].second, __comp);
#define _GLIBCXX_PARALLEL_DECISION(__a,__b,__c,d) { \
if (__seq ## d < __seq ## __a) goto __s ## d ## __a ## __b ## __c; \
if (__seq ## d < __seq ## __b) goto __s ## __a ## d ## __b ## __c; \
if (__seq ## d < __seq ## __c) goto __s ## __a ## __b ## d ## __c; \
goto __s ## __a ## __b ## __c ## d; }
#define _GLIBCXX_PARALLEL_DECISION(__a, __b, __c, __d) { \
if (__seq ## __d < __seq ## __a) \
goto __s ## __d ## __a ## __b ## __c; \
if (__seq ## __d < __seq ## __b) \
goto __s ## __a ## __d ## __b ## __c; \
if (__seq ## __d < __seq ## __c) \
goto __s ## __a ## __b ## __d ## __c; \
goto __s ## __a ## __b ## __c ## __d; }
if (__seq0 <= __seq1)
{
......@@ -399,17 +402,21 @@ template<template<typename RAI, typename C> class iterator,
_GLIBCXX_PARALLEL_DECISION(2,1,0,3)
}
#define _GLIBCXX_PARALLEL_MERGE_4_CASE(__a,__b,__c,d,c0,c1,c2) \
__s ## __a ## __b ## __c ## d: \
if (__length == 0) goto finish; \
#define _GLIBCXX_PARALLEL_MERGE_4_CASE(__a, __b, __c, __d, \
__c0, __c1, __c2) \
__s ## __a ## __b ## __c ## __d: \
if (__length == 0) goto __finish; \
*__target = *__seq ## __a; \
++__target; \
--__length; \
++__seq ## __a; \
if (__seq ## __a c0 __seq ## __b) goto __s ## __a ## __b ## __c ## d; \
if (__seq ## __a c1 __seq ## __c) goto __s ## __b ## __a ## __c ## d; \
if (__seq ## __a c2 __seq ## d) goto __s ## __b ## __c ## __a ## d; \
goto __s ## __b ## __c ## d ## __a;
if (__seq ## __a __c0 __seq ## __b) \
goto __s ## __a ## __b ## __c ## __d; \
if (__seq ## __a __c1 __seq ## __c) \
goto __s ## __b ## __a ## __c ## __d; \
if (__seq ## __a __c2 __seq ## __d) \
goto __s ## __b ## __c ## __a ## __d; \
goto __s ## __b ## __c ## __d ## __a;
_GLIBCXX_PARALLEL_MERGE_4_CASE(0, 1, 2, 3, <=, <=, <=);
_GLIBCXX_PARALLEL_MERGE_4_CASE(0, 1, 3, 2, <=, <=, <=);
......@@ -439,7 +446,7 @@ template<template<typename RAI, typename C> class iterator,
#undef _GLIBCXX_PARALLEL_MERGE_4_CASE
#undef _GLIBCXX_PARALLEL_DECISION
finish:
__finish:
;
__seqs_begin[0].first = __seq0;
......@@ -450,7 +457,7 @@ template<template<typename RAI, typename C> class iterator,
return __target;
}
/** @brief Multi-way merging procedure for a high branching factor,
/** @brief Multi-way merging procedure for a high branching factor,
* guarded case.
*
* This merging variant uses a LoserTree class as selected by <tt>_LT</tt>.
......@@ -468,7 +475,7 @@ template<template<typename RAI, typename C> class iterator,
*
* @return End iterator of output sequence.
*/
template<typename _LT,
template<typename _LT,
typename _RAIterIterator,
typename _RAIter3,
typename _DifferenceTp,
......@@ -532,7 +539,7 @@ template<typename _LT,
return __target;
}
/** @brief Multi-way merging procedure for a high branching factor,
/** @brief Multi-way merging procedure for a high branching factor,
* unguarded case.
*
* Merging is done using the LoserTree class <tt>_LT</tt>.
......@@ -550,7 +557,7 @@ template<typename _LT,
*
* @return End iterator of output sequence.
*/
template<typename _LT,
template<typename _LT,
typename _RAIterIterator,
typename _RAIter3,
typename _DifferenceTp, typename _Compare>
......@@ -620,7 +627,7 @@ template<typename _LT,
}
/** @brief Multi-way merging procedure for a high branching factor,
/** @brief Multi-way merging procedure for a high branching factor,
* requiring sentinels to exist.
*
* @param __stable The value must the same as for the used LoserTrees.
......@@ -638,7 +645,7 @@ template<typename _LT,
*
* @return End iterator of output sequence.
*/
template<typename UnguardedLoserTree,
template<typename UnguardedLoserTree,
typename _RAIterIterator,
typename _RAIter3,
typename _DifferenceTp,
......@@ -672,8 +679,7 @@ template<typename UnguardedLoserTree,
// non-sentinel elements as we have.
++((*__s).second);
__target_end = multiway_merge_loser_tree_unguarded
<UnguardedLoserTree>
__target_end = multiway_merge_loser_tree_unguarded<UnguardedLoserTree>
(__seqs_begin, __seqs_end, __target, __sentinel, __length, __comp);
#if _GLIBCXX_ASSERTIONS
......@@ -689,12 +695,12 @@ template<typename UnguardedLoserTree,
return __target_end;
}
/**
/**
* @brief Traits for determining whether the loser tree should
* use pointers or copies.
*
* The field "_M_use_pointer" is used to determine whether to use pointers in
* the loser trees or whether to copy the values into the loser tree.
* The field "_M_use_pointer" is used to determine whether to use pointers
* in he loser trees or whether to copy the values into the loser tree.
*
* The default behavior is to use pointers if the data type is 4 times as
* big as the pointer to it.
......@@ -713,7 +719,7 @@ template<typename UnguardedLoserTree,
*
* @param _Tp type to give the loser tree traits for.
*/
template <typename _Tp>
template <typename _Tp>
struct _LoserTreeTraits
{
/**
......@@ -725,12 +731,12 @@ template <typename _Tp>
static const bool _M_use_pointer = (sizeof(_Tp) > 4 * sizeof(_Tp*));
};
/**
/**
* @brief Switch for 3-way merging with __sentinels turned off.
*
* Note that 3-way merging is always stable!
*/
template<bool __sentinels /*default == false*/,
template<bool __sentinels /*default == false*/,
typename _RAIterIterator,
typename _RAIter3,
typename _DifferenceTp,
......@@ -742,18 +748,16 @@ template<bool __sentinels /*default == false*/,
_RAIterIterator __seqs_end,
_RAIter3 __target,
_DifferenceTp __length, _Compare __comp)
{
return multiway_merge_3_variant<_GuardedIterator>
(__seqs_begin, __seqs_end, __target, __length, __comp);
}
{ return multiway_merge_3_variant<_GuardedIterator>
(__seqs_begin, __seqs_end, __target, __length, __comp); }
};
/**
/**
* @brief Switch for 3-way merging with __sentinels turned on.
*
* Note that 3-way merging is always stable!
*/
template<typename _RAIterIterator,
template<typename _RAIterIterator,
typename _RAIter3,
typename _DifferenceTp,
typename _Compare>
......@@ -766,18 +770,16 @@ template<typename _RAIterIterator,
_RAIterIterator __seqs_end,
_RAIter3 __target,
_DifferenceTp __length, _Compare __comp)
{
return multiway_merge_3_variant<_UnguardedIterator>
(__seqs_begin, __seqs_end, __target, __length, __comp);
}
{ return multiway_merge_3_variant<_UnguardedIterator>
(__seqs_begin, __seqs_end, __target, __length, __comp); }
};
/**
/**
* @brief Switch for 4-way merging with __sentinels turned off.
*
* Note that 4-way merging is always stable!
*/
template<bool __sentinels /*default == false*/,
template<bool __sentinels /*default == false*/,
typename _RAIterIterator,
typename _RAIter3,
typename _DifferenceTp,
......@@ -789,18 +791,16 @@ template<bool __sentinels /*default == false*/,
_RAIterIterator __seqs_end,
_RAIter3 __target,
_DifferenceTp __length, _Compare __comp)
{
return multiway_merge_4_variant<_GuardedIterator>
(__seqs_begin, __seqs_end, __target, __length, __comp);
}
{ return multiway_merge_4_variant<_GuardedIterator>
(__seqs_begin, __seqs_end, __target, __length, __comp); }
};
/**
/**
* @brief Switch for 4-way merging with __sentinels turned on.
*
* Note that 4-way merging is always stable!
*/
template<typename _RAIterIterator,
template<typename _RAIterIterator,
typename _RAIter3,
typename _DifferenceTp,
typename _Compare>
......@@ -813,16 +813,14 @@ template<typename _RAIterIterator,
_RAIterIterator __seqs_end,
_RAIter3 __target,
_DifferenceTp __length, _Compare __comp)
{
return multiway_merge_4_variant<_UnguardedIterator>
(__seqs_begin, __seqs_end, __target, __length, __comp);
}
{ return multiway_merge_4_variant<_UnguardedIterator>
(__seqs_begin, __seqs_end, __target, __length, __comp); }
};
/**
/**
* @brief Switch for k-way merging with __sentinels turned on.
*/
template<bool __sentinels,
template<bool __sentinels,
bool __stable,
typename _RAIterIterator,
typename _RAIter3,
......@@ -855,10 +853,10 @@ template<bool __sentinels,
}
};
/**
/**
* @brief Switch for k-way merging with __sentinels turned off.
*/
template<bool __stable,
template<bool __stable,
typename _RAIterIterator,
typename _RAIter3,
typename _DifferenceTp,
......@@ -891,7 +889,7 @@ template<bool __stable,
}
};
/** @brief Sequential multi-way merging switch.
/** @brief Sequential multi-way merging switch.
*
* The _GLIBCXX_PARALLEL_DECISION is based on the branching factor and
* runtime settings.
......@@ -904,7 +902,7 @@ template<bool __stable,
* @param __stable Stable merging incurs a performance penalty.
* @param __sentinel The sequences have __a __sentinel element.
* @return End iterator of output sequence. */
template<bool __stable,
template<bool __stable,
bool __sentinels,
typename _RAIterIterator,
typename _RAIter3,
......@@ -931,8 +929,8 @@ template<bool __stable,
#if _GLIBCXX_ASSERTIONS
for (_RAIterIterator __s = __seqs_begin; __s != __seqs_end; ++__s)
{
_GLIBCXX_PARALLEL_ASSERT(
__is_sorted((*__s).first, (*__s).second, __comp));
_GLIBCXX_PARALLEL_ASSERT(__is_sorted((*__s).first,
(*__s).second, __comp));
}
#endif
......@@ -966,30 +964,20 @@ template<bool __stable,
__target, __length, __comp);
break;
case 3:
__return_target = __multiway_merge_3_variant_sentinel_switch<
__sentinels,
_RAIterIterator,
_RAIter3,
_DifferenceTp,
_Compare>()(__seqs_begin, __seqs_end, __target, __length, __comp);
__return_target = __multiway_merge_3_variant_sentinel_switch
<__sentinels, _RAIterIterator, _RAIter3, _DifferenceTp, _Compare>()
(__seqs_begin, __seqs_end, __target, __length, __comp);
break;
case 4:
__return_target = __multiway_merge_4_variant_sentinel_switch<
__sentinels,
_RAIterIterator,
_RAIter3,
_DifferenceTp,
_Compare>()(__seqs_begin, __seqs_end, __target, __length, __comp);
__return_target = __multiway_merge_4_variant_sentinel_switch
<__sentinels, _RAIterIterator, _RAIter3, _DifferenceTp, _Compare>()
(__seqs_begin, __seqs_end, __target, __length, __comp);
break;
default:
__return_target = __multiway_merge_k_variant_sentinel_switch<
__sentinels,
__stable,
_RAIterIterator,
_RAIter3,
_DifferenceTp,
_Compare>()(__seqs_begin, __seqs_end, __target, __sentinel,
__length, __comp);
__return_target = __multiway_merge_k_variant_sentinel_switch
<__sentinels, __stable, _RAIterIterator, _RAIter3, _DifferenceTp,
_Compare>()
(__seqs_begin, __seqs_end, __target, __sentinel, __length, __comp);
break;
}
#if _GLIBCXX_ASSERTIONS
......@@ -1000,12 +988,12 @@ template<bool __stable,
return __return_target;
}
/**
/**
* @brief Stable sorting functor.
*
* Used to reduce code instanciation in multiway_merge_sampling_splitting.
*/
template<bool __stable, class _RAIter, class _StrictWeakOrdering>
template<bool __stable, class _RAIter, class _StrictWeakOrdering>
struct _SamplingSorter
{
void
......@@ -1013,12 +1001,12 @@ template<bool __stable, class _RAIter, class _StrictWeakOrdering>
{ __gnu_sequential::stable_sort(__first, __last, __comp); }
};
/**
/**
* @brief Non-__stable sorting functor.
*
* Used to reduce code instantiation in multiway_merge_sampling_splitting.
*/
template<class _RAIter, class _StrictWeakOrdering>
template<class _RAIter, class _StrictWeakOrdering>
struct _SamplingSorter<false, _RAIter, _StrictWeakOrdering>
{
void
......@@ -1026,10 +1014,10 @@ template<class _RAIter, class _StrictWeakOrdering>
{ __gnu_sequential::sort(__first, __last, __comp); }
};
/**
/**
* @brief Sampling based splitting for parallel multiway-merge routine.
*/
template<bool __stable,
template<bool __stable,
typename _RAIterIterator,
typename _Compare,
typename _DifferenceType>
......@@ -1061,8 +1049,7 @@ template<bool __stable,
for (int __s = 0; __s < __k; ++__s)
for (_DifferenceType __i = 0; __i < __num_samples; ++__i)
{
_DifferenceType sample_index =
static_cast<_DifferenceType>
_DifferenceType sample_index = static_cast<_DifferenceType>
(_GLIBCXX_PARALLEL_LENGTH(__seqs_begin[__s])
* (double(__i + 1) / (__num_samples + 1))
* (double(__length) / __total_length));
......@@ -1081,37 +1068,34 @@ template<bool __stable,
{
// For each sequence.
if (__slab > 0)
__pieces[__slab][__seq].first =
std::upper_bound
(__seqs_begin[__seq].first,
__seqs_begin[__seq].second,
__pieces[__slab][__seq].first = std::upper_bound
(__seqs_begin[__seq].first, __seqs_begin[__seq].second,
__samples[__num_samples * __k * __slab / __num_threads],
__comp) - __seqs_begin[__seq].first;
__comp)
- __seqs_begin[__seq].first;
else
// Absolute beginning.
__pieces[__slab][__seq].first = 0;
if ((__slab + 1) < __num_threads)
__pieces[__slab][__seq].second =
std::upper_bound
(__seqs_begin[__seq].first,
__seqs_begin[__seq].second,
__samples[__num_samples * __k * (__slab + 1) /
__num_threads], __comp)
__pieces[__slab][__seq].second = std::upper_bound
(__seqs_begin[__seq].first, __seqs_begin[__seq].second,
__samples[__num_samples * __k * (__slab + 1) / __num_threads],
__comp)
- __seqs_begin[__seq].first;
else
// Absolute end.
__pieces[__slab][__seq].second
= _GLIBCXX_PARALLEL_LENGTH(__seqs_begin[__seq]);
__pieces[__slab][__seq].second =
_GLIBCXX_PARALLEL_LENGTH(__seqs_begin[__seq]);
}
::operator delete(__samples);
}
/**
/**
* @brief Exact splitting for parallel multiway-merge routine.
*
* None of the passed sequences may be empty.
*/
template<bool __stable,
template<bool __stable,
typename _RAIterIterator,
typename _Compare,
typename _DifferenceType>
......@@ -1134,7 +1118,8 @@ template<bool __stable,
const int __num_threads = omp_get_num_threads();
// (Settings::multiway_merge_splitting == __gnu_parallel::_Settings::EXACT).
// (Settings::multiway_merge_splitting
// == __gnu_parallel::_Settings::EXACT).
std::vector<_RAIter1>* __offsets =
new std::vector<_RAIter1>[__num_threads];
std::vector<std::pair<_RAIter1, _RAIter1> > __se(__k);
......@@ -1148,8 +1133,7 @@ template<bool __stable,
for (int __s = 0; __s < (__num_threads - 1); ++__s)
{
__offsets[__s].resize(__k);
multiseq_partition
(__se.begin(), __se.end(), __borders[__s + 1],
multiseq_partition(__se.begin(), __se.end(), __borders[__s + 1],
__offsets[__s].begin(), __comp);
// Last one also needed and available.
......@@ -1158,7 +1142,8 @@ template<bool __stable,
__offsets[__num_threads - 1].resize(__k);
multiseq_partition(__se.begin(), __se.end(),
_DifferenceType(__length),
__offsets[__num_threads - 1].begin(), __comp);
__offsets[__num_threads - 1].begin(),
__comp);
}
}
delete[] __borders;
......@@ -1191,7 +1176,7 @@ template<bool __stable,
delete[] __offsets;
}
/** @brief Parallel multi-way merge routine.
/** @brief Parallel multi-way merge routine.
*
* The _GLIBCXX_PARALLEL_DECISION is based on the branching factor
* and runtime settings.
......@@ -1210,7 +1195,7 @@ template<bool __stable,
* @param __sentinel Ignored.
* @return End iterator of output sequence.
*/
template<bool __stable,
template<bool __stable,
bool __sentinels,
typename _RAIterIterator,
typename _RAIter3,
......@@ -1282,8 +1267,8 @@ template<bool __stable,
__pieces[__s].resize(__k);
_DifferenceType __num_samples =
__gnu_parallel::_Settings::get().merge_oversampling *
__num_threads;
__gnu_parallel::_Settings::get().merge_oversampling
* __num_threads;
__splitter(__ne_seqs, __ne_seqs + __k, __length, __total_length,
__comp, __pieces);
......@@ -1299,15 +1284,14 @@ template<bool __stable,
seq_type* __chunks = new seq_type[__k];
for (int __s = 0; __s < __k; ++__s)
{
__chunks[__s] = std::make_pair(
__ne_seqs[__s].first + __pieces[__iam][__s].first,
__ne_seqs[__s].first + __pieces[__iam][__s].second);
}
__chunks[__s] = std::make_pair(__ne_seqs[__s].first
+ __pieces[__iam][__s].first,
__ne_seqs[__s].first
+ __pieces[__iam][__s].second);
if(__length > __target_position)
__sequential_multiway_merge<__stable, __sentinels>(
__chunks, __chunks + __k, __target + __target_position,
__sequential_multiway_merge<__stable, __sentinels>
(__chunks, __chunks + __k, __target + __target_position,
*(__seqs_begin->second), __length - __target_position, __comp);
delete[] __chunks;
......@@ -1334,7 +1318,7 @@ template<bool __stable,
return __target + __length;
}
/**
/**
* @brief Multiway Merge Frontend.
*
* Merge the sequences specified by seqs_begin and __seqs_end into
......@@ -1370,7 +1354,8 @@ template<bool __stable,
* int __out[33];
* std::vector<std::pair<int*> > seqs;
* for (int __i = 0; __i < 10; ++__i)
* { seqs.push(std::make_pair<int*>(sequences[__i], sequences[__i] + 10)) }
* { seqs.push(std::make_pair<int*>(sequences[__i],
* sequences[__i] + 10)) }
*
* multiway_merge(seqs.begin(), seqs.end(), __target, std::less<int>(), 33);
* </pre>
......@@ -1402,9 +1387,9 @@ template<bool __stable,
*
* @return _M_end iterator of output sequence
*/
// multiway_merge
// public interface
template<typename _RAIterPairIterator,
// multiway_merge
// public interface
template<typename _RAIterPairIterator,
typename _RAIterOut,
typename _DifferenceTp,
typename _Compare>
......@@ -1429,8 +1414,8 @@ template<typename _RAIterPairIterator,
*(__seqs_begin->second), __length, __comp);
}
// public interface
template<typename _RAIterPairIterator,
// public interface
template<typename _RAIterPairIterator,
typename _RAIterOut,
typename _DifferenceTp,
typename _Compare>
......@@ -1451,15 +1436,15 @@ template<typename _RAIterPairIterator,
// Execute merge; maybe parallel, depending on the number of merged
// elements and the number of sequences and global thresholds in
// Settings.
if ((__seqs_end - __seqs_begin > 1) &&
_GLIBCXX_PARALLEL_CONDITION(
if ((__seqs_end - __seqs_begin > 1)
&& _GLIBCXX_PARALLEL_CONDITION(
((__seqs_end - __seqs_begin) >=
__gnu_parallel::_Settings::get().multiway_merge_minimal_k)
&& ((_SequenceIndex)__length >=
__gnu_parallel::_Settings::get().multiway_merge_minimal_n)))
return parallel_multiway_merge
</* __stable = */ false, /* __sentinels = */ false>(
__seqs_begin, __seqs_end, __target,
</* __stable = */ false, /* __sentinels = */ false>
(__seqs_begin, __seqs_end, __target,
multiway_merge_exact_splitting</* __stable = */ false,
typename std::iterator_traits<_RAIterPairIterator>
::value_type*, _Compare, _DifferenceTp>,
......@@ -1467,13 +1452,13 @@ template<typename _RAIterPairIterator,
__tag.__get_num_threads());
else
return __sequential_multiway_merge
</* __stable = */ false, /* __sentinels = */ false>(
__seqs_begin, __seqs_end, __target, *(__seqs_begin->second),
__length, __comp);
</* __stable = */ false, /* __sentinels = */ false>
(__seqs_begin, __seqs_end, __target,
*(__seqs_begin->second), __length, __comp);
}
// public interface
template<typename _RAIterPairIterator,
// public interface
template<typename _RAIterPairIterator,
typename _RAIterOut,
typename _DifferenceTp,
typename _Compare>
......@@ -1494,16 +1479,15 @@ template<typename _RAIterPairIterator,
// Execute merge; maybe parallel, depending on the number of merged
// elements and the number of sequences and global thresholds in
// Settings.
if ((__seqs_end - __seqs_begin > 1) &&
_GLIBCXX_PARALLEL_CONDITION(
if ((__seqs_end - __seqs_begin > 1)
&& _GLIBCXX_PARALLEL_CONDITION(
((__seqs_end - __seqs_begin) >=
__gnu_parallel::_Settings::get().multiway_merge_minimal_k)
&& ((_SequenceIndex)__length >=
__gnu_parallel::_Settings::get().multiway_merge_minimal_n)))
return parallel_multiway_merge
</* __stable = */ false, /* __sentinels = */ false>(
__seqs_begin, __seqs_end,
__target,
</* __stable = */ false, /* __sentinels = */ false>
(__seqs_begin, __seqs_end, __target,
multiway_merge_exact_splitting</* __stable = */ false,
typename std::iterator_traits<_RAIterPairIterator>
::value_type*, _Compare, _DifferenceTp>,
......@@ -1511,13 +1495,13 @@ template<typename _RAIterPairIterator,
__tag.__get_num_threads());
else
return __sequential_multiway_merge
</* __stable = */ false, /* __sentinels = */ false>(
__seqs_begin, __seqs_end,
__target, *(__seqs_begin->second), __length, __comp);
</* __stable = */ false, /* __sentinels = */ false>
(__seqs_begin, __seqs_end, __target,
*(__seqs_begin->second), __length, __comp);
}
// public interface
template<typename _RAIterPairIterator,
// public interface
template<typename _RAIterPairIterator,
typename _RAIterOut,
typename _DifferenceTp,
typename _Compare>
......@@ -1527,13 +1511,11 @@ template<typename _RAIterPairIterator,
_RAIterOut __target,
_DifferenceTp __length, _Compare __comp,
parallel_tag __tag = parallel_tag(0))
{
return multiway_merge(__seqs_begin, __seqs_end, __target, __length,
__comp, exact_tag(__tag.__get_num_threads()));
}
{ return multiway_merge(__seqs_begin, __seqs_end, __target, __length,
__comp, exact_tag(__tag.__get_num_threads())); }
// public interface
template<typename _RAIterPairIterator,
// public interface
template<typename _RAIterPairIterator,
typename _RAIterOut,
typename _DifferenceTp,
typename _Compare>
......@@ -1543,14 +1525,12 @@ template<typename _RAIterPairIterator,
_RAIterOut __target,
_DifferenceTp __length, _Compare __comp,
default_parallel_tag __tag)
{
return multiway_merge(__seqs_begin, __seqs_end, __target, __length,
__comp, exact_tag(__tag.__get_num_threads()));
}
{ return multiway_merge(__seqs_begin, __seqs_end, __target, __length,
__comp, exact_tag(__tag.__get_num_threads())); }
// stable_multiway_merge
// public interface
template<typename _RAIterPairIterator,
// stable_multiway_merge
// public interface
template<typename _RAIterPairIterator,
typename _RAIterOut,
typename _DifferenceTp,
typename _Compare>
......@@ -1571,12 +1551,12 @@ template<typename _RAIterPairIterator,
// Execute multiway merge *sequentially*.
return __sequential_multiway_merge
</* __stable = */ true, /* __sentinels = */ false>
(__seqs_begin, __seqs_end, __target, *(__seqs_begin->second), __length,
__comp);
(__seqs_begin, __seqs_end, __target,
*(__seqs_begin->second), __length, __comp);
}
// public interface
template<typename _RAIterPairIterator,
// public interface
template<typename _RAIterPairIterator,
typename _RAIterOut,
typename _DifferenceTp,
typename _Compare>
......@@ -1597,30 +1577,29 @@ template<typename _RAIterPairIterator,
// Execute merge; maybe parallel, depending on the number of merged
// elements and the number of sequences and global thresholds in
// Settings.
if ((__seqs_end - __seqs_begin > 1) &&
_GLIBCXX_PARALLEL_CONDITION(
if ((__seqs_end - __seqs_begin > 1)
&& _GLIBCXX_PARALLEL_CONDITION(
((__seqs_end - __seqs_begin) >=
__gnu_parallel::_Settings::get().multiway_merge_minimal_k)
&& ((_SequenceIndex)__length >=
__gnu_parallel::_Settings::get().multiway_merge_minimal_n)))
return parallel_multiway_merge
</* __stable = */ true, /* __sentinels = */ false>(
__seqs_begin, __seqs_end,
__target,
</* __stable = */ true, /* __sentinels = */ false>
(__seqs_begin, __seqs_end, __target,
multiway_merge_exact_splitting</* __stable = */ true,
typename std::iterator_traits<_RAIterPairIterator>
::value_type*, _Compare, _DifferenceTp>,
static_cast<_DifferenceType>(__length), __comp,
__tag.__get_num_threads());
else
return __sequential_multiway_merge</* __stable = */ true,
/* __sentinels = */ false>(
__seqs_begin, __seqs_end,
__target, *(__seqs_begin->second), __length, __comp);
return __sequential_multiway_merge
</* __stable = */ true, /* __sentinels = */ false>
(__seqs_begin, __seqs_end, __target,
*(__seqs_begin->second), __length, __comp);
}
// public interface
template<typename _RAIterPairIterator,
// public interface
template<typename _RAIterPairIterator,
typename _RAIterOut,
typename _DifferenceTp,
typename _Compare>
......@@ -1641,16 +1620,15 @@ template<typename _RAIterPairIterator,
// Execute merge; maybe parallel, depending on the number of merged
// elements and the number of sequences and global thresholds in
// Settings.
if ((__seqs_end - __seqs_begin > 1) &&
_GLIBCXX_PARALLEL_CONDITION(
if ((__seqs_end - __seqs_begin > 1)
&& _GLIBCXX_PARALLEL_CONDITION(
((__seqs_end - __seqs_begin) >=
__gnu_parallel::_Settings::get().multiway_merge_minimal_k)
&& ((_SequenceIndex)__length >=
__gnu_parallel::_Settings::get().multiway_merge_minimal_n)))
return parallel_multiway_merge
</* __stable = */ true, /* __sentinels = */ false>(
__seqs_begin, __seqs_end,
__target,
</* __stable = */ true, /* __sentinels = */ false>
(__seqs_begin, __seqs_end, __target,
multiway_merge_sampling_splitting</* __stable = */ true,
typename std::iterator_traits<_RAIterPairIterator>
::value_type*, _Compare, _DifferenceTp>,
......@@ -1658,13 +1636,13 @@ template<typename _RAIterPairIterator,
__tag.__get_num_threads());
else
return __sequential_multiway_merge
</* __stable = */ true, /* __sentinels = */ false>(
__seqs_begin, __seqs_end,
__target, *(__seqs_begin->second), __length, __comp);
</* __stable = */ true, /* __sentinels = */ false>
(__seqs_begin, __seqs_end, __target,
*(__seqs_begin->second), __length, __comp);
}
// public interface
template<typename _RAIterPairIterator,
// public interface
template<typename _RAIterPairIterator,
typename _RAIterOut,
typename _DifferenceTp,
typename _Compare>
......@@ -1680,8 +1658,8 @@ template<typename _RAIterPairIterator,
exact_tag(__tag.__get_num_threads()));
}
// public interface
template<typename _RAIterPairIterator,
// public interface
template<typename _RAIterPairIterator,
typename _RAIterOut,
typename _DifferenceTp,
typename _Compare>
......@@ -1697,7 +1675,7 @@ template<typename _RAIterPairIterator,
exact_tag(__tag.__get_num_threads()));
}
/**
/**
* @brief Multiway Merge Frontend.
*
* Merge the sequences specified by seqs_begin and __seqs_end into
......@@ -1737,7 +1715,8 @@ template<typename _RAIterPairIterator,
* int __out[33];
* std::vector<std::pair<int*> > seqs;
* for (int __i = 0; __i < 10; ++__i)
* { seqs.push(std::make_pair<int*>(sequences[__i], sequences[__i] + 10)) }
* { seqs.push(std::make_pair<int*>(sequences[__i],
* sequences[__i] + 10)) }
*
* multiway_merge(seqs.begin(), seqs.end(), __target, std::less<int>(), 33);
* </pre>
......@@ -1772,9 +1751,9 @@ template<typename _RAIterPairIterator,
*
* @return _M_end iterator of output sequence
*/
// multiway_merge_sentinels
// public interface
template<typename _RAIterPairIterator,
// multiway_merge_sentinels
// public interface
template<typename _RAIterPairIterator,
typename _RAIterOut,
typename _DifferenceTp,
typename _Compare>
......@@ -1799,8 +1778,8 @@ template<typename _RAIterPairIterator,
__target, *(__seqs_begin->second), __length, __comp);
}
// public interface
template<typename _RAIterPairIterator,
// public interface
template<typename _RAIterPairIterator,
typename _RAIterOut,
typename _DifferenceTp,
typename _Compare>
......@@ -1821,16 +1800,15 @@ template<typename _RAIterPairIterator,
// Execute merge; maybe parallel, depending on the number of merged
// elements and the number of sequences and global thresholds in
// Settings.
if ((__seqs_end - __seqs_begin > 1) &&
_GLIBCXX_PARALLEL_CONDITION(
if ((__seqs_end - __seqs_begin > 1)
&& _GLIBCXX_PARALLEL_CONDITION(
((__seqs_end - __seqs_begin) >=
__gnu_parallel::_Settings::get().multiway_merge_minimal_k)
&& ((_SequenceIndex)__length >=
__gnu_parallel::_Settings::get().multiway_merge_minimal_n)))
return parallel_multiway_merge
</* __stable = */ false, /* __sentinels = */ true>(
__seqs_begin, __seqs_end,
__target,
</* __stable = */ false, /* __sentinels = */ true>
(__seqs_begin, __seqs_end, __target,
multiway_merge_exact_splitting</* __stable = */ false,
typename std::iterator_traits<_RAIterPairIterator>
::value_type*, _Compare, _DifferenceTp>,
......@@ -1838,13 +1816,13 @@ template<typename _RAIterPairIterator,
__tag.__get_num_threads());
else
return __sequential_multiway_merge
</* __stable = */ false, /* __sentinels = */ true>(
__seqs_begin, __seqs_end,
__target, *(__seqs_begin->second), __length, __comp);
</* __stable = */ false, /* __sentinels = */ true>
(__seqs_begin, __seqs_end, __target,
*(__seqs_begin->second), __length, __comp);
}
// public interface
template<typename _RAIterPairIterator,
// public interface
template<typename _RAIterPairIterator,
typename _RAIterOut,
typename _DifferenceTp,
typename _Compare>
......@@ -1865,8 +1843,8 @@ template<typename _RAIterPairIterator,
// Execute merge; maybe parallel, depending on the number of merged
// elements and the number of sequences and global thresholds in
// Settings.
if ((__seqs_end - __seqs_begin > 1) &&
_GLIBCXX_PARALLEL_CONDITION(
if ((__seqs_end - __seqs_begin > 1)
&& _GLIBCXX_PARALLEL_CONDITION(
((__seqs_end - __seqs_begin) >=
__gnu_parallel::_Settings::get().multiway_merge_minimal_k)
&& ((_SequenceIndex)__length >=
......@@ -1882,12 +1860,12 @@ template<typename _RAIterPairIterator,
else
return __sequential_multiway_merge
</* __stable = */false, /* __sentinels = */ true>(
__seqs_begin, __seqs_end,
__target, *(__seqs_begin->second), __length, __comp);
__seqs_begin, __seqs_end, __target,
*(__seqs_begin->second), __length, __comp);
}
// public interface
template<typename _RAIterPairIterator,
// public interface
template<typename _RAIterPairIterator,
typename _RAIterOut,
typename _DifferenceTp,
typename _Compare>
......@@ -1903,8 +1881,8 @@ template<typename _RAIterPairIterator,
exact_tag(__tag.__get_num_threads()));
}
// public interface
template<typename _RAIterPairIterator,
// public interface
template<typename _RAIterPairIterator,
typename _RAIterOut,
typename _DifferenceTp,
typename _Compare>
......@@ -1920,9 +1898,9 @@ template<typename _RAIterPairIterator,
exact_tag(__tag.__get_num_threads()));
}
// stable_multiway_merge_sentinels
// public interface
template<typename _RAIterPairIterator,
// stable_multiway_merge_sentinels
// public interface
template<typename _RAIterPairIterator,
typename _RAIterOut,
typename _DifferenceTp,
typename _Compare>
......@@ -1943,12 +1921,12 @@ template<typename _RAIterPairIterator,
// Execute multiway merge *sequentially*.
return __sequential_multiway_merge
</* __stable = */ true, /* __sentinels = */ true>
(__seqs_begin, __seqs_end, __target, *(__seqs_begin->second),
__length, __comp);
(__seqs_begin, __seqs_end, __target,
*(__seqs_begin->second), __length, __comp);
}
// public interface
template<typename _RAIterPairIterator,
// public interface
template<typename _RAIterPairIterator,
typename _RAIterOut,
typename _DifferenceTp,
typename _Compare>
......@@ -1969,16 +1947,15 @@ template<typename _RAIterPairIterator,
// Execute merge; maybe parallel, depending on the number of merged
// elements and the number of sequences and global thresholds in
// Settings.
if ((__seqs_end - __seqs_begin > 1) &&
_GLIBCXX_PARALLEL_CONDITION(
if ((__seqs_end - __seqs_begin > 1)
&& _GLIBCXX_PARALLEL_CONDITION(
((__seqs_end - __seqs_begin) >=
__gnu_parallel::_Settings::get().multiway_merge_minimal_k)
&& ((_SequenceIndex)__length >=
__gnu_parallel::_Settings::get().multiway_merge_minimal_n)))
return parallel_multiway_merge
</* __stable = */ true, /* __sentinels = */ true>(
__seqs_begin, __seqs_end,
__target,
</* __stable = */ true, /* __sentinels = */ true>
(__seqs_begin, __seqs_end, __target,
multiway_merge_exact_splitting</* __stable = */ true,
typename std::iterator_traits<_RAIterPairIterator>
::value_type*, _Compare, _DifferenceTp>,
......@@ -1986,13 +1963,13 @@ template<typename _RAIterPairIterator,
__tag.__get_num_threads());
else
return __sequential_multiway_merge
</* __stable = */ true, /* __sentinels = */ true>(
__seqs_begin, __seqs_end, __target, *(__seqs_begin->second),
__length, __comp);
</* __stable = */ true, /* __sentinels = */ true>
(__seqs_begin, __seqs_end, __target,
*(__seqs_begin->second), __length, __comp);
}
// public interface
template<typename _RAIterPairIterator,
// public interface
template<typename _RAIterPairIterator,
typename _RAIterOut,
typename _DifferenceTp,
typename _Compare>
......@@ -2014,16 +1991,15 @@ template<typename _RAIterPairIterator,
// Execute merge; maybe parallel, depending on the number of merged
// elements and the number of sequences and global thresholds in
// Settings.
if ((__seqs_end - __seqs_begin > 1) &&
_GLIBCXX_PARALLEL_CONDITION(
if ((__seqs_end - __seqs_begin > 1)
&& _GLIBCXX_PARALLEL_CONDITION(
((__seqs_end - __seqs_begin) >=
__gnu_parallel::_Settings::get().multiway_merge_minimal_k)
&& ((_SequenceIndex)__length >=
__gnu_parallel::_Settings::get().multiway_merge_minimal_n)))
return parallel_multiway_merge
</* __stable = */ true, /* __sentinels = */ true>(
__seqs_begin, __seqs_end,
__target,
</* __stable = */ true, /* __sentinels = */ true>
(__seqs_begin, __seqs_end, __target,
multiway_merge_sampling_splitting</* __stable = */ true,
typename std::iterator_traits<_RAIterPairIterator>
::value_type*, _Compare, _DifferenceTp>,
......@@ -2031,13 +2007,13 @@ template<typename _RAIterPairIterator,
__tag.__get_num_threads());
else
return __sequential_multiway_merge
</* __stable = */ true, /* __sentinels = */ true>(
__seqs_begin, __seqs_end,
__target, *(__seqs_begin->second), __length, __comp);
</* __stable = */ true, /* __sentinels = */ true>
(__seqs_begin, __seqs_end, __target,
*(__seqs_begin->second), __length, __comp);
}
// public interface
template<typename _RAIterPairIterator,
// public interface
template<typename _RAIterPairIterator,
typename _RAIterOut,
typename _DifferenceTp,
typename _Compare>
......@@ -2054,8 +2030,8 @@ template<typename _RAIterPairIterator,
exact_tag(__tag.__get_num_threads()));
}
// public interface
template<typename _RAIterPairIterator,
// public interface
template<typename _RAIterPairIterator,
typename _RAIterOut,
typename _DifferenceTp,
typename _Compare>
......@@ -2070,7 +2046,6 @@ template<typename _RAIterPairIterator,
(__seqs_begin, __seqs_end, __target, __length, __comp,
exact_tag(__tag.__get_num_threads()));
}
}; // namespace __gnu_parallel
#endif /* _GLIBCXX_PARALLEL_MULTIWAY_MERGE_H */
......@@ -125,8 +125,7 @@ namespace __gnu_parallel
/** @brief Split by exact splitting. */
template<typename _RAIter, typename _Compare,
typename _SortingPlacesIterator>
struct _SplitConsistently<true, _RAIter,
_Compare, _SortingPlacesIterator>
struct _SplitConsistently<true, _RAIter, _Compare, _SortingPlacesIterator>
{
void
operator()(const _ThreadIndex __iam,
......@@ -140,19 +139,19 @@ namespace __gnu_parallel
std::vector<std::pair<_SortingPlacesIterator,
_SortingPlacesIterator> >
seqs(__sd->_M_num_threads);
__seqs(__sd->_M_num_threads);
for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++)
seqs[__s] = std::make_pair(__sd->_M_temporary[__s],
__seqs[__s] = std::make_pair(__sd->_M_temporary[__s],
__sd->_M_temporary[__s]
+ (__sd->_M_starts[__s + 1]
- __sd->_M_starts[__s]));
std::vector<_SortingPlacesIterator> _M_offsets(__sd->_M_num_threads);
std::vector<_SortingPlacesIterator> __offsets(__sd->_M_num_threads);
// if not last thread
if (__iam < __sd->_M_num_threads - 1)
multiseq_partition(seqs.begin(), seqs.end(),
__sd->_M_starts[__iam + 1], _M_offsets.begin(),
multiseq_partition(__seqs.begin(), __seqs.end(),
__sd->_M_starts[__iam + 1], __offsets.begin(),
__comp);
for (int __seq = 0; __seq < __sd->_M_num_threads; __seq++)
......@@ -160,7 +159,7 @@ namespace __gnu_parallel
// for each sequence
if (__iam < (__sd->_M_num_threads - 1))
__sd->_M_pieces[__iam][__seq]._M_end
= _M_offsets[__seq] - seqs[__seq].first;
= __offsets[__seq] - __seqs[__seq].first;
else
// very end of this sequence
__sd->_M_pieces[__iam][__seq]._M_end =
......@@ -185,8 +184,7 @@ namespace __gnu_parallel
/** @brief Split by sampling. */
template<typename _RAIter, typename _Compare,
typename _SortingPlacesIterator>
struct _SplitConsistently<false, _RAIter, _Compare,
_SortingPlacesIterator>
struct _SplitConsistently<false, _RAIter, _Compare, _SortingPlacesIterator>
{
void
operator()(const _ThreadIndex __iam,
......@@ -282,10 +280,8 @@ namespace __gnu_parallel
const _RAIter& __target,
_Compare& __comp,
_DiffType __length_am) const
{
stable_multiway_merge(__seqs_begin, __seqs_end, __target, __length_am,
__comp, sequential_tag());
}
{ stable_multiway_merge(__seqs_begin, __seqs_end, __target,
__length_am, __comp, sequential_tag()); }
};
template<typename Seq_RAIter, typename _RAIter,
......@@ -298,10 +294,8 @@ namespace __gnu_parallel
const _RAIter& __target,
_Compare& __comp,
_DiffType __length_am) const
{
multiway_merge(__seqs_begin, __seqs_end, __target, __length_am, __comp,
sequential_tag());
}
{ multiway_merge(__seqs_begin, __seqs_end, __target, __length_am,
__comp, sequential_tag()); }
};
/** @brief PMWMS code executed by each thread.
......@@ -321,8 +315,8 @@ namespace __gnu_parallel
_ThreadIndex __iam = omp_get_thread_num();
// Length of this thread's chunk, before merging.
_DifferenceType __length_local
= __sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam];
_DifferenceType __length_local =
__sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam];
// Sort in temporary storage, leave space for sentinel.
......@@ -350,8 +344,7 @@ namespace __gnu_parallel
_DifferenceType __num_samples =
_Settings::get().sort_mwms_oversampling * __sd->_M_num_threads - 1;
_SplitConsistently
<__exact, _RAIter, _Compare, _SortingPlacesIterator>()
_SplitConsistently<__exact, _RAIter, _Compare, _SortingPlacesIterator>()
(__iam, __sd, __comp, __num_samples);
// Offset from __target __begin, __length after merging.
......@@ -366,22 +359,20 @@ namespace __gnu_parallel
typedef std::vector<
std::pair<_SortingPlacesIterator, _SortingPlacesIterator> >
_SeqVector;
_SeqVector seqs(__sd->_M_num_threads);
_SeqVector __seqs(__sd->_M_num_threads);
for (int __s = 0; __s < __sd->_M_num_threads; ++__s)
{
seqs[__s] =
std::make_pair
(__sd->_M_temporary[__s] + __sd->_M_pieces[__iam][__s]._M_begin,
__sd->_M_temporary[__s] + __sd->_M_pieces[__iam][__s]._M_end);
__seqs[__s] =
std::make_pair(__sd->_M_temporary[__s]
+ __sd->_M_pieces[__iam][__s]._M_begin,
__sd->_M_temporary[__s]
+ __sd->_M_pieces[__iam][__s]._M_end);
}
__possibly_stable_multiway_merge<
__stable,
typename _SeqVector::iterator,
_RAIter,
_Compare, _DifferenceType>()
(seqs.begin(), seqs.end(),
__stable, typename _SeqVector::iterator,
_RAIter, _Compare, _DifferenceType>()(__seqs.begin(), __seqs.end(),
__sd->_M_source + __offset, __comp,
__length_am);
......@@ -421,7 +412,7 @@ namespace __gnu_parallel
// shared variables
_PMWMSSortingData<_RAIter> __sd;
_DifferenceType* _M_starts;
_DifferenceType* __starts;
# pragma omp parallel num_threads(__num_threads)
{
......@@ -450,26 +441,25 @@ namespace __gnu_parallel
= new std::vector<_Piece<_DifferenceType> >[__num_threads];
for (int __s = 0; __s < __num_threads; ++__s)
__sd._M_pieces[__s].resize(__num_threads);
_M_starts = __sd._M_starts
= new _DifferenceType[__num_threads + 1];
__starts = __sd._M_starts = new _DifferenceType[__num_threads + 1];
_DifferenceType __chunk_length = __n / __num_threads;
_DifferenceType __split = __n % __num_threads;
_DifferenceType __pos = 0;
for (int __i = 0; __i < __num_threads; ++__i)
{
_M_starts[__i] = __pos;
__pos += (__i < __split)
? (__chunk_length + 1) : __chunk_length;
__starts[__i] = __pos;
__pos += ((__i < __split)
? (__chunk_length + 1) : __chunk_length);
}
_M_starts[__num_threads] = __pos;
__starts[__num_threads] = __pos;
} //single
// Now sort in parallel.
parallel_sort_mwms_pu<__stable, __exact>(&__sd, __comp);
} //parallel
delete[] _M_starts;
delete[] __starts;
delete[] __sd._M_temporary;
if (!__exact)
......
......@@ -69,7 +69,7 @@ namespace __parallel
__accumulate_switch(_IIter __begin, _IIter __end,
_Tp __init, _IteratorTag)
{ return accumulate(__begin, __end, __init,
__gnu_parallel::sequential_tag()); }
__gnu_parallel::sequential_tag()); }
template<typename _IIter, typename _Tp, typename _BinaryOperation,
typename _IteratorTag>
......
......@@ -74,8 +74,8 @@ namespace __gnu_parallel
_DifferenceType;
_DifferenceType __length = __end - __begin;
_ThreadIndex __num_threads =
__gnu_parallel::min<_DifferenceType>(__get_max_threads(), __length);
_ThreadIndex __num_threads = __gnu_parallel::min<_DifferenceType>
(__get_max_threads(), __length);
_Result *__thread_results;
......@@ -94,8 +94,8 @@ namespace __gnu_parallel
#pragma omp for schedule(dynamic, _Settings::get().workstealing_chunk_size)
for (_DifferenceType __pos = 0; __pos < __length; ++__pos)
__thread_results[__iam] =
__r(__thread_results[__iam], __f(__o, __begin+__pos));
__thread_results[__iam] = __r(__thread_results[__iam],
__f(__o, __begin+__pos));
} //parallel
for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
......
......@@ -74,8 +74,8 @@ namespace __gnu_parallel
_DifferenceType;
_DifferenceType __length = __end - __begin;
_ThreadIndex __num_threads =
std::min<_DifferenceType>(__get_max_threads(), __length);
_ThreadIndex __num_threads = std::min<_DifferenceType>
(__get_max_threads(), __length);
_Result *__thread_results;
......
......@@ -75,25 +75,24 @@ namespace __gnu_parallel
_Result *__thread_results;
bool* __constructed;
_ThreadIndex __num_threads =
__gnu_parallel::min<_DifferenceType>(__get_max_threads(), __length);
_ThreadIndex __num_threads = __gnu_parallel::min<_DifferenceType>
(__get_max_threads(), __length);
# pragma omp parallel num_threads(__num_threads)
{
# pragma omp single
{
__num_threads = omp_get_num_threads();
__thread_results =
static_cast<_Result*>(::operator new(__num_threads
* sizeof(_Result)));
__thread_results = static_cast<_Result*>
(::operator new(__num_threads * sizeof(_Result)));
__constructed = new bool[__num_threads];
}
_ThreadIndex __iam = omp_get_thread_num();
// Neutral element.
_Result* __reduct =
static_cast<_Result*>(::operator new(sizeof(_Result)));
_Result* __reduct = static_cast<_Result*>
(::operator new(sizeof(_Result)));
_DifferenceType
__start = equally_split_point(__length, __num_threads, __iam),
......
......@@ -149,8 +149,9 @@ namespace __gnu_parallel
if (__iam == 0)
{
*__result = *__begin;
__parallel_partial_sum_basecase(
__begin + 1, __begin + __borders[1], __result + 1,
__parallel_partial_sum_basecase(__begin + 1,
__begin + __borders[1],
__result + 1,
__bin_op, *__begin);
::new(&(__sums[__iam])) _ValueType(*(__result + __borders[1] - 1));
}
......
......@@ -44,13 +44,14 @@
namespace __gnu_parallel
{
/** @brief Parallel implementation of std::partition.
/** @brief Parallel implementation of std::partition.
* @param __begin Begin iterator of input sequence to split.
* @param __end End iterator of input sequence to split.
* @param __pred Partition predicate, possibly including some kind of pivot.
* @param __pred Partition predicate, possibly including some kind
* of pivot.
* @param __num_threads Maximum number of threads to use for this task.
* @return Number of elements not fulfilling the predicate. */
template<typename _RAIter, typename _Predicate>
template<typename _RAIter, typename _Predicate>
typename std::iterator_traits<_RAIter>::difference_type
__parallel_partition(_RAIter __begin, _RAIter __end,
_Predicate __pred, _ThreadIndex __num_threads)
......@@ -78,7 +79,7 @@ template<typename _RAIter, typename _Predicate>
omp_init_lock(&__result_lock);
//at least two chunks per thread
if(__right - __left + 1 >= 2 * __num_threads * __chunk_size)
if (__right - __left + 1 >= 2 * __num_threads * __chunk_size)
# pragma omp parallel num_threads(__num_threads)
{
# pragma omp single
......@@ -88,10 +89,9 @@ template<typename _RAIter, typename _Predicate>
__reserved_right = new bool[__num_threads];
if (__s.partition_chunk_share > 0.0)
__chunk_size = std::max<_DifferenceType>(
__s.partition_chunk_size,
(double)__n * __s.partition_chunk_share /
(double)__num_threads);
__chunk_size = std::max<_DifferenceType>
(__s.partition_chunk_size, (double)__n
* __s.partition_chunk_share / (double)__num_threads);
else
__chunk_size = __s.partition_chunk_size;
}
......@@ -100,8 +100,8 @@ template<typename _RAIter, typename _Predicate>
{
# pragma omp single
{
_DifferenceType __num_chunks
= (__right - __left + 1) / __chunk_size;
_DifferenceType __num_chunks = ((__right - __left + 1)
/ __chunk_size);
for (int __r = 0; __r < __num_threads; ++__r)
{
......@@ -171,7 +171,8 @@ template<typename _RAIter, typename _Predicate>
// Fetch new chunk(__s).
break;
std::swap(__begin[__thread_left], __begin[__thread_right]);
std::swap(__begin[__thread_left],
__begin[__thread_right]);
++__thread_left;
--__thread_right;
}
......@@ -200,9 +201,8 @@ template<typename _RAIter, typename _Predicate>
&& __thread_left_border >= __leftnew)
{
// Chunk already in place, reserve spot.
__reserved_left
[(__left - (__thread_left_border + 1)) / __chunk_size]
= true;
__reserved_left[(__left - (__thread_left_border + 1))
/ __chunk_size] = true;
}
// <=> __thread_right_border - (__chunk_size - 1) <= __rightnew
......@@ -260,10 +260,9 @@ template<typename _RAIter, typename _Predicate>
_GLIBCXX_PARALLEL_ASSERT(__swapstart != -1);
#endif
std::swap_ranges(
__begin + __thread_right_border,
__begin + __thread_right_border + __chunk_size,
__begin + __swapstart);
std::swap_ranges(__begin + __thread_right_border,
__begin + __thread_right_border
+ __chunk_size, __begin + __swapstart);
}
#if _GLIBCXX_ASSERTIONS
# pragma omp barrier
......@@ -284,6 +283,7 @@ template<typename _RAIter, typename _Predicate>
__left = __leftnew;
__right = __rightnew;
}
# pragma omp flush(__left, __right)
} // end "recursion" //parallel
......@@ -292,11 +292,13 @@ template<typename _RAIter, typename _Predicate>
while (__final_left < __final_right)
{
// Go right until key is geq than pivot.
while (__pred(__begin[__final_left]) && __final_left < __final_right)
while (__pred(__begin[__final_left])
&& __final_left < __final_right)
++__final_left;
// Go left until key is less than pivot.
while (!__pred(__begin[__final_right]) && __final_left < __final_right)
while (!__pred(__begin[__final_right])
&& __final_left < __final_right)
--__final_right;
if (__final_left == __final_right)
......@@ -322,14 +324,14 @@ template<typename _RAIter, typename _Predicate>
return __final_left + 1;
}
/**
/**
* @brief Parallel implementation of std::nth_element().
* @param __begin Begin iterator of input sequence.
* @param __nth _Iterator of element that must be in position afterwards.
* @param __end End iterator of input sequence.
* @param __comp Comparator.
*/
template<typename _RAIter, typename _Compare>
template<typename _RAIter, typename _Compare>
void
__parallel_nth_element(_RAIter __begin, _RAIter __nth,
_RAIter __end, _Compare __comp)
......@@ -361,15 +363,16 @@ template<typename _RAIter, typename _Compare>
// _Compare must have first_value_type, second_value_type,
// result_type
// _Compare ==
// __gnu_parallel::_Lexicographic<S, int, __gnu_parallel::_Less<S, S> >
// __gnu_parallel::_Lexicographic<S, int,
// __gnu_parallel::_Less<S, S> >
// __pivot_pos == std::pair<S, int>*
__gnu_parallel::binder2nd<_Compare, _ValueType, _ValueType, bool>
__pred(__comp, *__pivot_pos);
// Divide, leave pivot unchanged in last place.
_RAIter __split_pos1, __split_pos2;
__split_pos1 = __begin
+ __parallel_partition(__begin, __end - 1, __pred,
__split_pos1 = __begin + __parallel_partition(__begin, __end - 1,
__pred,
__get_max_threads());
// Left side: < __pivot_pos; __right side: >= __pivot_pos
......@@ -386,7 +389,8 @@ template<typename _RAIter, typename _Compare>
// Very unequal split, one part smaller than one 128th
// elements not strictly larger than the pivot.
__gnu_parallel::__unary_negate<__gnu_parallel::
__binder1st<_Compare, _ValueType, _ValueType, bool>, _ValueType>
__binder1st<_Compare, _ValueType,
_ValueType, bool>, _ValueType>
__pred(__gnu_parallel::__binder1st<_Compare, _ValueType,
_ValueType, bool>(__comp, *__pivot_pos));
......@@ -411,12 +415,12 @@ template<typename _RAIter, typename _Compare>
__gnu_sequential::sort(__begin, __end, __comp);
}
/** @brief Parallel implementation of std::partial_sort().
* @param __begin Begin iterator of input sequence.
* @param __middle Sort until this position.
* @param __end End iterator of input sequence.
* @param __comp Comparator. */
template<typename _RAIter, typename _Compare>
/** @brief Parallel implementation of std::partial_sort().
* @param __begin Begin iterator of input sequence.
* @param __middle Sort until this position.
* @param __end End iterator of input sequence.
* @param __comp Comparator. */
template<typename _RAIter, typename _Compare>
void
__parallel_partial_sort(_RAIter __begin,
_RAIter __middle,
......
......@@ -65,10 +65,10 @@ namespace __gnu_parallel
public:
/** @brief Constructor. Not to be called concurrent, of course.
* @param _M_max_size Maximal number of elements to be contained. */
_RestrictedBoundedConcurrentQueue(_SequenceIndex _M_max_size)
_RestrictedBoundedConcurrentQueue(_SequenceIndex __max_size)
{
this->_M_max_size = _M_max_size;
_M_base = new _Tp[_M_max_size];
_M_max_size = __max_size;
_M_base = new _Tp[__max_size];
_M_borders = __encode2(0, 0);
#pragma omp flush
}
......@@ -105,12 +105,12 @@ namespace __gnu_parallel
while (__former_front > __former_back)
{
// Chance.
_CASable
__former_borders = __encode2(__former_front, __former_back);
_CASable
__new_borders = __encode2(__former_front - 1, __former_back);
if (__compare_and_swap(
&_M_borders, __former_borders, __new_borders))
_CASable __former_borders = __encode2(__former_front,
__former_back);
_CASable __new_borders = __encode2(__former_front - 1,
__former_back);
if (__compare_and_swap(&_M_borders, __former_borders,
__new_borders))
{
__t = *(_M_base + (__former_front - 1) % _M_max_size);
return true;
......@@ -132,12 +132,12 @@ namespace __gnu_parallel
while (__former_front > __former_back)
{
// Chance.
_CASable
__former_borders = __encode2(__former_front, __former_back);
_CASable
__new_borders = __encode2(__former_front, __former_back + 1);
if (__compare_and_swap(
&_M_borders, __former_borders, __new_borders))
_CASable __former_borders = __encode2(__former_front,
__former_back);
_CASable __new_borders = __encode2(__former_front,
__former_back + 1);
if (__compare_and_swap(&_M_borders, __former_borders,
__new_borders))
{
__t = *(_M_base + __former_back % _M_max_size);
return true;
......
......@@ -48,8 +48,7 @@ namespace __gnu_parallel
*/
template<typename _RAIter, typename _Compare>
typename std::iterator_traits<_RAIter>::difference_type
__parallel_sort_qs_divide(_RAIter __begin,
_RAIter __end,
__parallel_sort_qs_divide(_RAIter __begin, _RAIter __end,
_Compare __comp, typename std::iterator_traits
<_RAIter>::difference_type __pivot_rank,
typename std::iterator_traits
......@@ -64,25 +63,24 @@ namespace __gnu_parallel
__num_samples = std::min(__num_samples, __n);
// Allocate uninitialized, to avoid default constructor.
_ValueType* __samples =
static_cast<_ValueType*>(::operator new(__num_samples
* sizeof(_ValueType)));
_ValueType* __samples = static_cast<_ValueType*>
(::operator new(__num_samples * sizeof(_ValueType)));
for (_DifferenceType __s = 0; __s < __num_samples; ++__s)
{
const unsigned long long __index
= static_cast<unsigned long long>(__s) * __n / __num_samples;
const unsigned long long __index = static_cast<unsigned long long>
(__s) * __n / __num_samples;
::new(&(__samples[__s])) _ValueType(__begin[__index]);
}
__gnu_sequential::sort(__samples, __samples + __num_samples, __comp);
_ValueType& pivot = __samples[__pivot_rank * __num_samples / __n];
_ValueType& __pivot = __samples[__pivot_rank * __num_samples / __n];
__gnu_parallel::binder2nd<_Compare, _ValueType, _ValueType, bool>
__pred(__comp, pivot);
_DifferenceType __split =
__parallel_partition(__begin, __end, __pred, __num_threads);
__pred(__comp, __pivot);
_DifferenceType __split = __parallel_partition(__begin, __end,
__pred, __num_threads);
::operator delete(__samples);
......@@ -98,8 +96,7 @@ namespace __gnu_parallel
*/
template<typename _RAIter, typename _Compare>
void
__parallel_sort_qs_conquer(_RAIter __begin,
_RAIter __end,
__parallel_sort_qs_conquer(_RAIter __begin, _RAIter __end,
_Compare __comp,
_ThreadIndex __num_threads)
{
......@@ -127,10 +124,9 @@ namespace __gnu_parallel
__pivot_rank = __n * __num_threads_left / __num_threads;
_DifferenceType __split =
__parallel_sort_qs_divide(__begin, __end, __comp, __pivot_rank,
_Settings::get().sort_qs_num_samples_preset,
__num_threads);
_DifferenceType __split = __parallel_sort_qs_divide
(__begin, __end, __comp, __pivot_rank,
_Settings::get().sort_qs_num_samples_preset, __num_threads);
#pragma omp parallel sections num_threads(2)
{
......@@ -144,7 +140,6 @@ namespace __gnu_parallel
}
/** @brief Unbalanced quicksort main call.
* @param __begin Begin iterator of input sequence.
* @param __end End iterator input sequence, ignored.
......@@ -154,8 +149,7 @@ namespace __gnu_parallel
*/
template<typename _RAIter, typename _Compare>
void
__parallel_sort_qs(_RAIter __begin,
_RAIter __end,
__parallel_sort_qs(_RAIter __begin, _RAIter __end,
_Compare __comp,
_ThreadIndex __num_threads)
{
......
......@@ -39,16 +39,16 @@
namespace __gnu_parallel
{
/** @brief Type to hold the index of a bin.
/** @brief Type to hold the index of a bin.
*
* Since many variables of this type are allocated, it should be
* chosen as small as possible.
*/
typedef unsigned short _BinIndex;
typedef unsigned short _BinIndex;
/** @brief Data known to every thread participating in
/** @brief Data known to every thread participating in
__gnu_parallel::__parallel_random_shuffle(). */
template<typename _RAIter>
template<typename _RAIter>
struct _DRandomShufflingGlobalData
{
typedef std::iterator_traits<_RAIter> _TraitsType;
......@@ -84,10 +84,10 @@ template<typename _RAIter>
: _M_source(__source) { }
};
/** @brief Local data for a thread participating in
/** @brief Local data for a thread participating in
__gnu_parallel::__parallel_random_shuffle().
*/
template<typename _RAIter, typename _RandomNumberGenerator>
template<typename _RAIter, typename _RandomNumberGenerator>
struct _DRSSorterPU
{
/** @brief Number of threads participating in total. */
......@@ -106,18 +106,18 @@ template<typename _RAIter, typename _RandomNumberGenerator>
_DRandomShufflingGlobalData<_RAIter>* _M_sd;
};
/** @brief Generate a random number in @__c [0,2^logp).
* @param logp Logarithm (basis 2) of the upper range __bound.
/** @brief Generate a random number in @__c [0,2^__logp).
* @param __logp Logarithm (basis 2) of the upper range __bound.
* @param __rng Random number generator to use.
*/
template<typename _RandomNumberGenerator>
template<typename _RandomNumberGenerator>
inline int
__random_number_pow2(int logp, _RandomNumberGenerator& __rng)
{ return __rng.__genrand_bits(logp); }
__random_number_pow2(int __logp, _RandomNumberGenerator& __rng)
{ return __rng.__genrand_bits(__logp); }
/** @brief Random shuffle code executed by each thread.
/** @brief Random shuffle code executed by each thread.
* @param __pus Array of thread-local data records. */
template<typename _RAIter, typename _RandomNumberGenerator>
template<typename _RAIter, typename _RandomNumberGenerator>
void
__parallel_random_shuffle_drs_pu(_DRSSorterPU<_RAIter,
_RandomNumberGenerator>* __pus)
......@@ -127,121 +127,121 @@ template<typename _RAIter, typename _RandomNumberGenerator>
typedef typename _TraitsType::difference_type _DifferenceType;
_ThreadIndex __iam = omp_get_thread_num();
_DRSSorterPU<_RAIter, _RandomNumberGenerator>* d = &__pus[__iam];
_DRandomShufflingGlobalData<_RAIter>* _M_sd = d->_M_sd;
_DRSSorterPU<_RAIter, _RandomNumberGenerator>* __d = &__pus[__iam];
_DRandomShufflingGlobalData<_RAIter>* __sd = __d->_M_sd;
// Indexing: _M_dist[bin][processor]
_DifferenceType __length = _M_sd->_M_starts[__iam + 1] -
_M_sd->_M_starts[__iam];
_DifferenceType __length = (__sd->_M_starts[__iam + 1]
- __sd->_M_starts[__iam]);
_BinIndex* __oracles = new _BinIndex[__length];
_DifferenceType* _M_dist = new _DifferenceType[_M_sd->_M_num_bins + 1];
_BinIndex* _M_bin_proc = new _BinIndex[_M_sd->_M_num_bins];
_ValueType** _M_temporaries = new _ValueType*[d->_M_num_threads];
_DifferenceType* __dist = new _DifferenceType[__sd->_M_num_bins + 1];
_BinIndex* __bin_proc = new _BinIndex[__sd->_M_num_bins];
_ValueType** __temporaries = new _ValueType*[__d->_M_num_threads];
// Compute oracles and count appearances.
for (_BinIndex __b = 0; __b < _M_sd->_M_num_bins + 1; ++__b)
_M_dist[__b] = 0;
int _M_num_bits = _M_sd->_M_num_bits;
for (_BinIndex __b = 0; __b < __sd->_M_num_bins + 1; ++__b)
__dist[__b] = 0;
int __num_bits = __sd->_M_num_bits;
_RandomNumber __rng(d->_M_seed);
_RandomNumber __rng(__d->_M_seed);
// First main loop.
for (_DifferenceType __i = 0; __i < __length; ++__i)
{
_BinIndex __oracle = __random_number_pow2(_M_num_bits, __rng);
_BinIndex __oracle = __random_number_pow2(__num_bits, __rng);
__oracles[__i] = __oracle;
// To allow prefix (partial) sum.
++(_M_dist[__oracle + 1]);
++(__dist[__oracle + 1]);
}
for (_BinIndex __b = 0; __b < _M_sd->_M_num_bins + 1; ++__b)
_M_sd->_M_dist[__b][__iam + 1] = _M_dist[__b];
for (_BinIndex __b = 0; __b < __sd->_M_num_bins + 1; ++__b)
__sd->_M_dist[__b][__iam + 1] = __dist[__b];
# pragma omp barrier
# pragma omp single
{
// Sum up bins, _M_sd->_M_dist[__s + 1][d->_M_num_threads] now contains
// the total number of items in bin __s
for (_BinIndex __s = 0; __s < _M_sd->_M_num_bins; ++__s)
__gnu_sequential::partial_sum(
_M_sd->_M_dist[__s + 1],
_M_sd->_M_dist[__s + 1] + d->_M_num_threads + 1,
_M_sd->_M_dist[__s + 1]);
// Sum up bins, __sd->_M_dist[__s + 1][__d->_M_num_threads] now
// contains the total number of items in bin __s
for (_BinIndex __s = 0; __s < __sd->_M_num_bins; ++__s)
__gnu_sequential::partial_sum(__sd->_M_dist[__s + 1],
__sd->_M_dist[__s + 1]
+ __d->_M_num_threads + 1,
__sd->_M_dist[__s + 1]);
}
# pragma omp barrier
_SequenceIndex __offset = 0, __global_offset = 0;
for (_BinIndex __s = 0; __s < d->_M_bins_begin; ++__s)
__global_offset += _M_sd->_M_dist[__s + 1][d->_M_num_threads];
for (_BinIndex __s = 0; __s < __d->_M_bins_begin; ++__s)
__global_offset += __sd->_M_dist[__s + 1][__d->_M_num_threads];
# pragma omp barrier
for (_BinIndex __s = d->_M_bins_begin; __s < d->__bins_end; ++__s)
for (_BinIndex __s = __d->_M_bins_begin; __s < __d->__bins_end; ++__s)
{
for (int __t = 0; __t < d->_M_num_threads + 1; ++__t)
_M_sd->_M_dist[__s + 1][__t] += __offset;
__offset = _M_sd->_M_dist[__s + 1][d->_M_num_threads];
for (int __t = 0; __t < __d->_M_num_threads + 1; ++__t)
__sd->_M_dist[__s + 1][__t] += __offset;
__offset = __sd->_M_dist[__s + 1][__d->_M_num_threads];
}
_M_sd->_M_temporaries[__iam] = static_cast<_ValueType*>(
::operator new(sizeof(_ValueType) * __offset));
__sd->_M_temporaries[__iam] = static_cast<_ValueType*>
(::operator new(sizeof(_ValueType) * __offset));
# pragma omp barrier
// Draw local copies to avoid false sharing.
for (_BinIndex __b = 0; __b < _M_sd->_M_num_bins + 1; ++__b)
_M_dist[__b] = _M_sd->_M_dist[__b][__iam];
for (_BinIndex __b = 0; __b < _M_sd->_M_num_bins; ++__b)
_M_bin_proc[__b] = _M_sd->_M_bin_proc[__b];
for (_ThreadIndex __t = 0; __t < d->_M_num_threads; ++__t)
_M_temporaries[__t] = _M_sd->_M_temporaries[__t];
for (_BinIndex __b = 0; __b < __sd->_M_num_bins + 1; ++__b)
__dist[__b] = __sd->_M_dist[__b][__iam];
for (_BinIndex __b = 0; __b < __sd->_M_num_bins; ++__b)
__bin_proc[__b] = __sd->_M_bin_proc[__b];
for (_ThreadIndex __t = 0; __t < __d->_M_num_threads; ++__t)
__temporaries[__t] = __sd->_M_temporaries[__t];
_RAIter _M_source = _M_sd->_M_source;
_DifferenceType __start = _M_sd->_M_starts[__iam];
_RAIter __source = __sd->_M_source;
_DifferenceType __start = __sd->_M_starts[__iam];
// Distribute according to oracles, second main loop.
for (_DifferenceType __i = 0; __i < __length; ++__i)
{
_BinIndex target_bin = __oracles[__i];
_ThreadIndex target_p = _M_bin_proc[target_bin];
_BinIndex __target_bin = __oracles[__i];
_ThreadIndex __target_p = __bin_proc[__target_bin];
// Last column [d->_M_num_threads] stays unchanged.
::new(&(_M_temporaries[target_p][_M_dist[target_bin + 1]++]))
_ValueType(*(_M_source + __i + __start));
// Last column [__d->_M_num_threads] stays unchanged.
::new(&(__temporaries[__target_p][__dist[__target_bin + 1]++]))
_ValueType(*(__source + __i + __start));
}
delete[] __oracles;
delete[] _M_dist;
delete[] _M_bin_proc;
delete[] _M_temporaries;
delete[] __dist;
delete[] __bin_proc;
delete[] __temporaries;
# pragma omp barrier
// Shuffle bins internally.
for (_BinIndex __b = d->_M_bins_begin; __b < d->__bins_end; ++__b)
for (_BinIndex __b = __d->_M_bins_begin; __b < __d->__bins_end; ++__b)
{
_ValueType* __begin =
_M_sd->_M_temporaries[__iam] +
((__b == d->_M_bins_begin)
? 0 : _M_sd->_M_dist[__b][d->_M_num_threads]),
* __end =
_M_sd->_M_temporaries[__iam] +
_M_sd->_M_dist[__b + 1][d->_M_num_threads];
(__sd->_M_temporaries[__iam]
+ (__b == __d->_M_bins_begin
? 0 : __sd->_M_dist[__b][__d->_M_num_threads])),
* __end = (__sd->_M_temporaries[__iam]
+ __sd->_M_dist[__b + 1][__d->_M_num_threads]);
__sequential_random_shuffle(__begin, __end, __rng);
std::copy(__begin, __end, _M_sd->_M_source + __global_offset +
((__b == d->_M_bins_begin)
? 0 : _M_sd->_M_dist[__b][d->_M_num_threads]));
std::copy(__begin, __end, __sd->_M_source + __global_offset
+ (__b == __d->_M_bins_begin
? 0 : __sd->_M_dist[__b][__d->_M_num_threads]));
}
::operator delete(_M_sd->_M_temporaries[__iam]);
::operator delete(__sd->_M_temporaries[__iam]);
}
/** @brief Round up to the next greater power of 2.
/** @brief Round up to the next greater power of 2.
* @param __x _Integer to round up */
template<typename _Tp>
template<typename _Tp>
_Tp
__round_up_to_pow2(_Tp __x)
{
......@@ -251,17 +251,16 @@ template<typename _Tp>
return (_Tp)1 << (__rd_log2(__x - 1) + 1);
}
/** @brief Main parallel random shuffle step.
/** @brief Main parallel random shuffle step.
* @param __begin Begin iterator of sequence.
* @param __end End iterator of sequence.
* @param __n Length of sequence.
* @param __num_threads Number of threads to use.
* @param __rng Random number generator to use.
*/
template<typename _RAIter, typename _RandomNumberGenerator>
template<typename _RAIter, typename _RandomNumberGenerator>
void
__parallel_random_shuffle_drs(_RAIter __begin,
_RAIter __end,
__parallel_random_shuffle_drs(_RAIter __begin, _RAIter __end,
typename std::iterator_traits
<_RAIter>::difference_type __n,
_ThreadIndex __num_threads,
......@@ -278,129 +277,129 @@ template<typename _RAIter, typename _RandomNumberGenerator>
if (__num_threads > __n)
__num_threads = static_cast<_ThreadIndex>(__n);
_BinIndex _M_num_bins, __num_bins_cache;
_BinIndex __num_bins, __num_bins_cache;
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
// Try the L1 cache first.
// Must fit into L1.
__num_bins_cache = std::max<_DifferenceType>(
1, __n / (__s.L1_cache_size_lb / sizeof(_ValueType)));
__num_bins_cache =
std::max<_DifferenceType>(1, __n / (__s.L1_cache_size_lb
/ sizeof(_ValueType)));
__num_bins_cache = __round_up_to_pow2(__num_bins_cache);
// No more buckets than TLB entries, power of 2
// Power of 2 and at least one element per bin, at most the TLB size.
_M_num_bins = std::min<_DifferenceType>(__n, __num_bins_cache);
__num_bins = std::min<_DifferenceType>(__n, __num_bins_cache);
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
// 2 TLB entries needed per bin.
_M_num_bins = std::min<_DifferenceType>(__s.TLB_size / 2, _M_num_bins);
__num_bins = std::min<_DifferenceType>(__s.TLB_size / 2, __num_bins);
#endif
_M_num_bins = __round_up_to_pow2(_M_num_bins);
__num_bins = __round_up_to_pow2(__num_bins);
if (_M_num_bins < __num_bins_cache)
if (__num_bins < __num_bins_cache)
{
#endif
// Now try the L2 cache
// Must fit into L2
__num_bins_cache = static_cast<_BinIndex>(std::max<_DifferenceType>(
1, __n / (__s.L2_cache_size / sizeof(_ValueType))));
__num_bins_cache = static_cast<_BinIndex>
(std::max<_DifferenceType>(1, __n / (__s.L2_cache_size
/ sizeof(_ValueType))));
__num_bins_cache = __round_up_to_pow2(__num_bins_cache);
// No more buckets than TLB entries, power of 2.
_M_num_bins = static_cast<_BinIndex>(
std::min(__n, static_cast<_DifferenceType>(__num_bins_cache)));
__num_bins = static_cast<_BinIndex>
(std::min(__n, static_cast<_DifferenceType>(__num_bins_cache)));
// Power of 2 and at least one element per bin, at most the TLB size.
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
// 2 TLB entries needed per bin.
_M_num_bins = std::min(
static_cast<_DifferenceType>(__s.TLB_size / 2), _M_num_bins);
__num_bins = std::min(static_cast<_DifferenceType>(__s.TLB_size / 2),
__num_bins);
#endif
_M_num_bins = __round_up_to_pow2(_M_num_bins);
__num_bins = __round_up_to_pow2(__num_bins);
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
}
#endif
__num_threads = std::min<_BinIndex>(__num_threads, _M_num_bins);
__num_threads = std::min<_BinIndex>(__num_threads, __num_bins);
if (__num_threads <= 1)
return __sequential_random_shuffle(__begin, __end, __rng);
_DRandomShufflingGlobalData<_RAIter> _M_sd(__begin);
_DRandomShufflingGlobalData<_RAIter> __sd(__begin);
_DRSSorterPU<_RAIter, _RandomNumber >* __pus;
_DifferenceType* _M_starts;
_DifferenceType* __starts;
# pragma omp parallel num_threads(__num_threads)
{
_ThreadIndex __num_threads = omp_get_num_threads();
# pragma omp single
{
__pus = new _DRSSorterPU<_RAIter, _RandomNumber>
[__num_threads];
_M_sd._M_temporaries = new _ValueType*[__num_threads];
_M_sd._M_dist = new _DifferenceType*[_M_num_bins + 1];
_M_sd._M_bin_proc = new _ThreadIndex[_M_num_bins];
for (_BinIndex __b = 0; __b < _M_num_bins + 1; ++__b)
_M_sd._M_dist[__b] = new _DifferenceType[__num_threads + 1];
for (_BinIndex __b = 0; __b < (_M_num_bins + 1); ++__b)
__pus = new _DRSSorterPU<_RAIter, _RandomNumber>[__num_threads];
__sd._M_temporaries = new _ValueType*[__num_threads];
__sd._M_dist = new _DifferenceType*[__num_bins + 1];
__sd._M_bin_proc = new _ThreadIndex[__num_bins];
for (_BinIndex __b = 0; __b < __num_bins + 1; ++__b)
__sd._M_dist[__b] = new _DifferenceType[__num_threads + 1];
for (_BinIndex __b = 0; __b < (__num_bins + 1); ++__b)
{
_M_sd._M_dist[0][0] = 0;
_M_sd._M_dist[__b][0] = 0;
__sd._M_dist[0][0] = 0;
__sd._M_dist[__b][0] = 0;
}
_M_starts = _M_sd._M_starts
= new _DifferenceType[__num_threads + 1];
int bin_cursor = 0;
_M_sd._M_num_bins = _M_num_bins;
_M_sd._M_num_bits = __rd_log2(_M_num_bins);
__starts = __sd._M_starts = new _DifferenceType[__num_threads + 1];
int __bin_cursor = 0;
__sd._M_num_bins = __num_bins;
__sd._M_num_bits = __rd_log2(__num_bins);
_DifferenceType __chunk_length = __n / __num_threads,
__split = __n % __num_threads, __start = 0;
_DifferenceType bin_chunk_length = _M_num_bins / __num_threads,
bin_split = _M_num_bins % __num_threads;
__split = __n % __num_threads,
__start = 0;
_DifferenceType __bin_chunk_length = __num_bins / __num_threads,
__bin_split = __num_bins % __num_threads;
for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
{
_M_starts[__i] = __start;
__start += (__i < __split)
? (__chunk_length + 1) : __chunk_length;
int __j = __pus[__i]._M_bins_begin = bin_cursor;
__starts[__i] = __start;
__start += (__i < __split
? (__chunk_length + 1) : __chunk_length);
int __j = __pus[__i]._M_bins_begin = __bin_cursor;
// Range of bins for this processor.
bin_cursor += (__i < bin_split) ?
(bin_chunk_length + 1) : bin_chunk_length;
__pus[__i].__bins_end = bin_cursor;
for (; __j < bin_cursor; ++__j)
_M_sd._M_bin_proc[__j] = __i;
__bin_cursor += (__i < __bin_split
? (__bin_chunk_length + 1)
: __bin_chunk_length);
__pus[__i].__bins_end = __bin_cursor;
for (; __j < __bin_cursor; ++__j)
__sd._M_bin_proc[__j] = __i;
__pus[__i]._M_num_threads = __num_threads;
__pus[__i]._M_seed =
__rng(std::numeric_limits<uint32_t>::max());
__pus[__i]._M_sd = &_M_sd;
__pus[__i]._M_seed = __rng(std::numeric_limits<uint32_t>::max());
__pus[__i]._M_sd = &__sd;
}
_M_starts[__num_threads] = __start;
__starts[__num_threads] = __start;
} //single
// Now shuffle in parallel.
__parallel_random_shuffle_drs_pu(__pus);
} // parallel
delete[] _M_starts;
delete[] _M_sd._M_bin_proc;
for (int __s = 0; __s < (_M_num_bins + 1); ++__s)
delete[] _M_sd._M_dist[__s];
delete[] _M_sd._M_dist;
delete[] _M_sd._M_temporaries;
delete[] __starts;
delete[] __sd._M_bin_proc;
for (int __s = 0; __s < (__num_bins + 1); ++__s)
delete[] __sd._M_dist[__s];
delete[] __sd._M_dist;
delete[] __sd._M_temporaries;
delete[] __pus;
}
/** @brief Sequential cache-efficient random shuffle.
/** @brief Sequential cache-efficient random shuffle.
* @param __begin Begin iterator of sequence.
* @param __end End iterator of sequence.
* @param __rng Random number generator to use.
*/
template<typename _RAIter, typename _RandomNumberGenerator>
template<typename _RAIter, typename _RandomNumberGenerator>
void
__sequential_random_shuffle(_RAIter __begin,
_RAIter __end,
__sequential_random_shuffle(_RAIter __begin, _RAIter __end,
_RandomNumberGenerator& __rng)
{
typedef std::iterator_traits<_RAIter> _TraitsType;
......@@ -410,66 +409,64 @@ template<typename _RAIter, typename _RandomNumberGenerator>
_DifferenceType __n = __end - __begin;
const _Settings& __s = _Settings::get();
_BinIndex _M_num_bins, __num_bins_cache;
_BinIndex __num_bins, __num_bins_cache;
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
// Try the L1 cache first, must fit into L1.
__num_bins_cache =
std::max<_DifferenceType>
__num_bins_cache = std::max<_DifferenceType>
(1, __n / (__s.L1_cache_size_lb / sizeof(_ValueType)));
__num_bins_cache = __round_up_to_pow2(__num_bins_cache);
// No more buckets than TLB entries, power of 2
// Power of 2 and at least one element per bin, at most the TLB size
_M_num_bins = std::min(__n, (_DifferenceType)__num_bins_cache);
__num_bins = std::min(__n, (_DifferenceType)__num_bins_cache);
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
// 2 TLB entries needed per bin
_M_num_bins = std::min((_DifferenceType)__s.TLB_size / 2, _M_num_bins);
__num_bins = std::min((_DifferenceType)__s.TLB_size / 2, __num_bins);
#endif
_M_num_bins = __round_up_to_pow2(_M_num_bins);
__num_bins = __round_up_to_pow2(__num_bins);
if (_M_num_bins < __num_bins_cache)
if (__num_bins < __num_bins_cache)
{
#endif
// Now try the L2 cache, must fit into L2.
__num_bins_cache =
static_cast<_BinIndex>(std::max<_DifferenceType>(
1, __n / (__s.L2_cache_size / sizeof(_ValueType))));
__num_bins_cache = static_cast<_BinIndex>
(std::max<_DifferenceType>(1, __n / (__s.L2_cache_size
/ sizeof(_ValueType))));
__num_bins_cache = __round_up_to_pow2(__num_bins_cache);
// No more buckets than TLB entries, power of 2
// Power of 2 and at least one element per bin, at most the TLB size.
_M_num_bins = static_cast<_BinIndex>
__num_bins = static_cast<_BinIndex>
(std::min(__n, static_cast<_DifferenceType>(__num_bins_cache)));
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
// 2 TLB entries needed per bin
_M_num_bins =
std::min<_DifferenceType>(__s.TLB_size / 2, _M_num_bins);
__num_bins = std::min<_DifferenceType>(__s.TLB_size / 2, __num_bins);
#endif
_M_num_bins = __round_up_to_pow2(_M_num_bins);
__num_bins = __round_up_to_pow2(__num_bins);
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
}
#endif
int _M_num_bits = __rd_log2(_M_num_bins);
int __num_bits = __rd_log2(__num_bins);
if (_M_num_bins > 1)
if (__num_bins > 1)
{
_ValueType* __target = static_cast<_ValueType*>(
::operator new(sizeof(_ValueType) * __n));
_ValueType* __target =
static_cast<_ValueType*>(::operator new(sizeof(_ValueType) * __n));
_BinIndex* __oracles = new _BinIndex[__n];
_DifferenceType* __dist0 = new _DifferenceType[_M_num_bins + 1],
* __dist1 = new _DifferenceType[_M_num_bins + 1];
_DifferenceType* __dist0 = new _DifferenceType[__num_bins + 1],
* __dist1 = new _DifferenceType[__num_bins + 1];
for (int __b = 0; __b < _M_num_bins + 1; ++__b)
for (int __b = 0; __b < __num_bins + 1; ++__b)
__dist0[__b] = 0;
_RandomNumber bitrng(__rng(0xFFFFFFFF));
_RandomNumber __bitrng(__rng(0xFFFFFFFF));
for (_DifferenceType __i = 0; __i < __n; ++__i)
{
_BinIndex __oracle = __random_number_pow2(_M_num_bits, bitrng);
_BinIndex __oracle = __random_number_pow2(__num_bits, __bitrng);
__oracles[__i] = __oracle;
// To allow prefix (partial) sum.
......@@ -477,10 +474,10 @@ template<typename _RAIter, typename _RandomNumberGenerator>
}
// Sum up bins.
__gnu_sequential::
partial_sum(__dist0, __dist0 + _M_num_bins + 1, __dist0);
__gnu_sequential::partial_sum(__dist0, __dist0 + __num_bins + 1,
__dist0);
for (int __b = 0; __b < _M_num_bins + 1; ++__b)
for (int __b = 0; __b < __num_bins + 1; ++__b)
__dist1[__b] = __dist0[__b];
// Distribute according to oracles.
......@@ -488,12 +485,9 @@ template<typename _RAIter, typename _RandomNumberGenerator>
::new(&(__target[(__dist0[__oracles[__i]])++]))
_ValueType(*(__begin + __i));
for (int __b = 0; __b < _M_num_bins; ++__b)
{
for (int __b = 0; __b < __num_bins; ++__b)
__sequential_random_shuffle(__target + __dist1[__b],
__target + __dist1[__b + 1],
__rng);
}
__target + __dist1[__b + 1], __rng);
// Copy elements back.
std::copy(__target, __target + __n, __begin);
......@@ -507,24 +501,22 @@ template<typename _RAIter, typename _RandomNumberGenerator>
__gnu_sequential::random_shuffle(__begin, __end, __rng);
}
/** @brief Parallel random public call.
/** @brief Parallel random public call.
* @param __begin Begin iterator of sequence.
* @param __end End iterator of sequence.
* @param __rng Random number generator to use.
*/
template<typename _RAIter, typename _RandomNumberGenerator>
template<typename _RAIter, typename _RandomNumberGenerator>
inline void
__parallel_random_shuffle(_RAIter __begin,
_RAIter __end,
__parallel_random_shuffle(_RAIter __begin, _RAIter __end,
_RandomNumberGenerator __rng = _RandomNumber())
{
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::difference_type _DifferenceType;
_DifferenceType __n = __end - __begin;
__parallel_random_shuffle_drs(
__begin, __end, __n, __get_max_threads(), __rng) ;
__parallel_random_shuffle_drs(__begin, __end, __n,
__get_max_threads(), __rng);
}
}
#endif /* _GLIBCXX_PARALLEL_RANDOM_SHUFFLE_H */
......@@ -38,7 +38,6 @@
#include <parallel/parallel.h>
#include <parallel/equally_split.h>
namespace __gnu_parallel
{
/**
......@@ -47,7 +46,7 @@ namespace __gnu_parallel
* @param __length Length of sequence to search for.
* @param __advances Returned __offsets.
*/
template<typename _RAIter, typename _DifferenceTp>
template<typename _RAIter, typename _DifferenceTp>
void
__calc_borders(_RAIter __elements, _DifferenceTp __length,
_DifferenceTp* __off)
......@@ -75,7 +74,7 @@ template<typename _RAIter, typename _DifferenceTp>
* @param __end2 End iterator of second sequence.
* @param __pred Find predicate.
* @return Place of finding in first sequences. */
template<typename __RAIter1,
template<typename __RAIter1,
typename __RAIter2,
typename _Pred>
__RAIter1
......@@ -108,9 +107,9 @@ template<typename __RAIter1,
omp_lock_t __result_lock;
omp_init_lock(&__result_lock);
_ThreadIndex __num_threads =
std::max<_DifferenceType>(1,
std::min<_DifferenceType>(__input_length, __get_max_threads()));
_ThreadIndex __num_threads = std::max<_DifferenceType>
(1, std::min<_DifferenceType>(__input_length,
__get_max_threads()));
_DifferenceType __advances[__pattern_length];
__calc_borders(__begin2, __pattern_length, __advances);
......@@ -135,7 +134,7 @@ template<typename __RAIter1,
while (__start <= __stop && !__found_pattern)
{
// Get new value of result.
#pragma omp flush(__result)
#pragma omp flush(__result)
// No chance for this thread to find first occurrence.
if (__result < __start)
break;
......@@ -156,9 +155,8 @@ template<typename __RAIter1,
}
// Make safe jump.
__start += (__pos_in_pattern - __advances[__pos_in_pattern]);
__pos_in_pattern =
(__advances[__pos_in_pattern] < 0) ?
0 : __advances[__pos_in_pattern];
__pos_in_pattern = (__advances[__pos_in_pattern] < 0
? 0 : __advances[__pos_in_pattern]);
}
} //parallel
......
......@@ -41,9 +41,9 @@
namespace __gnu_parallel
{
template<typename _IIter, typename _OutputIterator>
template<typename _IIter, typename _OutputIterator>
_OutputIterator
copy_tail(std::pair<_IIter, _IIter> __b,
__copy_tail(std::pair<_IIter, _IIter> __b,
std::pair<_IIter, _IIter> __e, _OutputIterator __r)
{
if (__b.first != __e.first)
......@@ -62,25 +62,24 @@ template<typename _IIter, typename _OutputIterator>
return __r;
}
template<typename _IIter,
template<typename _IIter,
typename _OutputIterator,
typename _Compare>
struct symmetric_difference_func
struct __symmetric_difference_func
{
typedef std::iterator_traits<_IIter> _TraitsType;
typedef typename _TraitsType::difference_type _DifferenceType;
typedef typename std::pair<_IIter, _IIter> _IteratorPair;
symmetric_difference_func(_Compare __comp) : _M_comp(__comp) {}
__symmetric_difference_func(_Compare __comp) : _M_comp(__comp) {}
_Compare _M_comp;
_OutputIterator
_M_invoke(_IIter __a, _IIter __b,
_IIter __c, _IIter d,
_M_invoke(_IIter __a, _IIter __b, _IIter __c, _IIter __d,
_OutputIterator __r) const
{
while (__a != __b && __c != d)
while (__a != __b && __c != __d)
{
if (_M_comp(*__a, *__c))
{
......@@ -100,12 +99,11 @@ template<typename _IIter,
++__c;
}
}
return std::copy(__c, d, std::copy(__a, __b, __r));
return std::copy(__c, __d, std::copy(__a, __b, __r));
}
_DifferenceType
__count(_IIter __a, _IIter __b,
_IIter __c, _IIter d) const
__count(_IIter __a, _IIter __b, _IIter __c, _IIter d) const
{
_DifferenceType __counter = 0;
......@@ -141,7 +139,7 @@ template<typename _IIter,
};
template<typename _IIter,
template<typename _IIter,
typename _OutputIterator,
typename _Compare>
struct __difference_func
......@@ -199,17 +197,17 @@ template<typename _IIter,
return __counter + (__b - __a);
}
inline _OutputIterator
__first_empty(_IIter __c, _IIter d, _OutputIterator __out) const
_OutputIterator
__first_empty(_IIter, _IIter, _OutputIterator __out) const
{ return __out; }
inline _OutputIterator
_OutputIterator
__second_empty(_IIter __a, _IIter __b, _OutputIterator __out) const
{ return std::copy(__a, __b, __out); }
};
template<typename _IIter,
template<typename _IIter,
typename _OutputIterator,
typename _Compare>
struct __intersection_func
......@@ -223,10 +221,10 @@ template<typename _IIter,
_Compare _M_comp;
_OutputIterator
_M_invoke(_IIter __a, _IIter __b, _IIter __c, _IIter d,
_M_invoke(_IIter __a, _IIter __b, _IIter __c, _IIter __d,
_OutputIterator __r) const
{
while (__a != __b && __c != d)
while (__a != __b && __c != __d)
{
if (_M_comp(*__a, *__c))
{ ++__a; }
......@@ -245,12 +243,11 @@ template<typename _IIter,
}
_DifferenceType
__count(_IIter __a, _IIter __b,
_IIter __c, _IIter d) const
__count(_IIter __a, _IIter __b, _IIter __c, _IIter __d) const
{
_DifferenceType __counter = 0;
while (__a != __b && __c != d)
while (__a != __b && __c != __d)
{
if (_M_comp(*__a, *__c))
{ ++__a; }
......@@ -267,16 +264,16 @@ template<typename _IIter,
return __counter;
}
inline _OutputIterator
__first_empty(_IIter __c, _IIter d, _OutputIterator __out) const
_OutputIterator
__first_empty(_IIter, _IIter, _OutputIterator __out) const
{ return __out; }
inline _OutputIterator
__second_empty(_IIter __a, _IIter __b, _OutputIterator __out) const
_OutputIterator
__second_empty(_IIter, _IIter, _OutputIterator __out) const
{ return __out; }
};
template<class _IIter, class _OutputIterator, class _Compare>
template<class _IIter, class _OutputIterator, class _Compare>
struct __union_func
{
typedef typename std::iterator_traits<_IIter>::difference_type
......@@ -288,9 +285,9 @@ template<class _IIter, class _OutputIterator, class _Compare>
_OutputIterator
_M_invoke(_IIter __a, const _IIter __b, _IIter __c,
const _IIter d, _OutputIterator __r) const
const _IIter __d, _OutputIterator __r) const
{
while (__a != __b && __c != d)
while (__a != __b && __c != __d)
{
if (_M_comp(*__a, *__c))
{
......@@ -310,16 +307,15 @@ template<class _IIter, class _OutputIterator, class _Compare>
}
++__r;
}
return std::copy(__c, d, std::copy(__a, __b, __r));
return std::copy(__c, __d, std::copy(__a, __b, __r));
}
_DifferenceType
__count(_IIter __a, _IIter __b,
_IIter __c, _IIter d) const
__count(_IIter __a, _IIter __b, _IIter __c, _IIter __d) const
{
_DifferenceType __counter = 0;
while (__a != __b && __c != d)
while (__a != __b && __c != __d)
{
if (_M_comp(*__a, *__c))
{ ++__a; }
......@@ -334,20 +330,20 @@ template<class _IIter, class _OutputIterator, class _Compare>
}
__counter += (__b - __a);
__counter += (d - __c);
__counter += (__d - __c);
return __counter;
}
inline _OutputIterator
__first_empty(_IIter __c, _IIter d, _OutputIterator __out) const
{ return std::copy(__c, d, __out); }
_OutputIterator
__first_empty(_IIter __c, _IIter __d, _OutputIterator __out) const
{ return std::copy(__c, __d, __out); }
inline _OutputIterator
_OutputIterator
__second_empty(_IIter __a, _IIter __b, _OutputIterator __out) const
{ return std::copy(__a, __b, __out); }
};
template<typename _IIter,
template<typename _IIter,
typename _OutputIterator,
typename Operation>
_OutputIterator
......@@ -367,11 +363,11 @@ template<typename _IIter,
if (__begin2 == __end2)
return __op.__second_empty(__begin1, __end1, __result);
const _DifferenceType size = (__end1 - __begin1) + (__end2 - __begin2);
const _DifferenceType __size = (__end1 - __begin1) + (__end2 - __begin2);
const _IteratorPair __sequence[ 2 ] =
{ std::make_pair(__begin1, __end1), std::make_pair(__begin2, __end2) };
_OutputIterator return_value = __result;
const _IteratorPair __sequence[2] = { std::make_pair(__begin1, __end1),
std::make_pair(__begin2, __end2) };
_OutputIterator __return_value = __result;
_DifferenceType *__borders;
_IteratorPair *__block_begins;
_DifferenceType* __lengths;
......@@ -387,7 +383,7 @@ template<typename _IIter,
__num_threads = omp_get_num_threads();
__borders = new _DifferenceType[__num_threads + 2];
equally_split(size, __num_threads + 1, __borders);
equally_split(__size, __num_threads + 1, __borders);
__block_begins = new _IteratorPair[__num_threads + 1];
// Very __start.
__block_begins[0] = std::make_pair(__begin1, __begin2);
......@@ -406,22 +402,22 @@ template<typename _IIter,
// allowed to read?
// together
// *(__offset[ 0 ] - 1) == *__offset[ 1 ]
if (__offset[ 0 ] != __begin1 && __offset[ 1 ] != __end2
&& !__op._M_comp(*(__offset[ 0 ] - 1), *__offset[ 1 ])
&& !__op._M_comp(*__offset[ 1 ], *(__offset[ 0 ] - 1)))
if (__offset[ 0 ] != __begin1 && __offset[1] != __end2
&& !__op._M_comp(*(__offset[0] - 1), *__offset[1])
&& !__op._M_comp(*__offset[1], *(__offset[0] - 1)))
{
// Avoid split between globally equal elements: move one to
// front in first sequence.
--__offset[ 0 ];
--__offset[0];
}
_IteratorPair block_end = __block_begins[ __iam + 1 ] =
_IteratorPair(__offset[ 0 ], __offset[ 1 ]);
_IteratorPair __block_end = __block_begins[__iam + 1] =
_IteratorPair(__offset[0], __offset[1]);
// Make sure all threads have their block_begin result written out.
# pragma omp barrier
_IteratorPair __block_begin = __block_begins[ __iam ];
_IteratorPair __block_begin = __block_begins[__iam];
// Begin working for the first block, while the others except
// the last start to count.
......@@ -429,15 +425,15 @@ template<typename _IIter,
{
// The first thread can copy already.
__lengths[ __iam ] =
__op._M_invoke(__block_begin.first, block_end.first,
__block_begin.second, block_end.second, __result)
- __result;
__op._M_invoke(__block_begin.first, __block_end.first,
__block_begin.second, __block_end.second,
__result) - __result;
}
else
{
__lengths[ __iam ] =
__op.__count(__block_begin.first, block_end.first,
__block_begin.second, block_end.second);
__op.__count(__block_begin.first, __block_end.first,
__block_begin.second, __block_end.second);
}
// Make sure everyone wrote their lengths.
......@@ -454,8 +450,9 @@ template<typename _IIter,
__block_begin = __block_begins[__num_threads];
// Return the result iterator of the last block.
return_value = __op._M_invoke(
__block_begin.first, __end1, __block_begin.second, __end2, __r);
__return_value =
__op._M_invoke(__block_begin.first, __end1,
__block_begin.second, __end2, __r);
}
else
......@@ -464,67 +461,69 @@ template<typename _IIter,
__r += __lengths[ __i ];
// Reset begins for copy pass.
__op._M_invoke(__block_begin.first, block_end.first,
__block_begin.second, block_end.second, __r);
__op._M_invoke(__block_begin.first, __block_end.first,
__block_begin.second, __block_end.second, __r);
}
}
return return_value;
return __return_value;
}
template<typename _IIter,
template<typename _IIter,
typename _OutputIterator,
typename _Compare>
inline _OutputIterator
__parallel_set_union(_IIter __begin1, _IIter __end1,
_IIter __begin2, _IIter __end2,
_OutputIterator __result, _Compare _M_comp)
_OutputIterator __result, _Compare __comp)
{
return __parallel_set_operation(__begin1, __end1, __begin2, __end2,
__result, __union_func< _IIter, _OutputIterator, _Compare>(_M_comp));
__result,
__union_func< _IIter, _OutputIterator,
_Compare>(__comp));
}
template<typename _IIter,
template<typename _IIter,
typename _OutputIterator,
typename _Compare>
inline _OutputIterator
__parallel_set_intersection(_IIter __begin1, _IIter __end1,
_IIter __begin2, _IIter __end2,
_OutputIterator __result, _Compare _M_comp)
_OutputIterator __result, _Compare __comp)
{
return __parallel_set_operation(
__begin1, __end1, __begin2, __end2, __result,
__intersection_func<_IIter, _OutputIterator, _Compare>(_M_comp));
return __parallel_set_operation(__begin1, __end1, __begin2, __end2,
__result,
__intersection_func<_IIter,
_OutputIterator, _Compare>(__comp));
}
template<typename _IIter,
template<typename _IIter,
typename _OutputIterator,
typename _Compare>
inline _OutputIterator
__parallel_set_difference(_IIter __begin1, _IIter __end1,
_IIter __begin2, _IIter __end2,
_OutputIterator __result, _Compare _M_comp)
_OutputIterator __result, _Compare __comp)
{
return __parallel_set_operation(
__begin1, __end1, __begin2, __end2, __result,
__difference_func<_IIter, _OutputIterator, _Compare>(_M_comp));
return __parallel_set_operation(__begin1, __end1, __begin2, __end2,
__result,
__difference_func<_IIter,
_OutputIterator, _Compare>(__comp));
}
template<typename _IIter,
template<typename _IIter,
typename _OutputIterator,
typename _Compare>
inline _OutputIterator
__parallel_set_symmetric_difference(_IIter __begin1, _IIter __end1,
_IIter __begin2, _IIter __end2,
_OutputIterator __result,
_Compare _M_comp)
_Compare __comp)
{
return __parallel_set_operation(
__begin1, __end1, __begin2, __end2, __result,
symmetric_difference_func<_IIter, _OutputIterator, _Compare>
(_M_comp));
return __parallel_set_operation(__begin1, __end1, __begin2, __end2,
__result,
__symmetric_difference_func<_IIter,
_OutputIterator, _Compare>(__comp));
}
}
#endif /* _GLIBCXX_PARALLEL_SET_OPERATIONS_H */
......@@ -95,7 +95,8 @@ namespace __gnu_parallel
template<bool __stable, typename _RAIter, typename _Compare>
inline void
__parallel_sort(_RAIter __begin, _RAIter __end,
_Compare __comp, multiway_mergesort_exact_tag __parallelism)
_Compare __comp,
multiway_mergesort_exact_tag __parallelism)
{
_GLIBCXX_CALL(__end - __begin)
......@@ -114,7 +115,8 @@ namespace __gnu_parallel
template<bool __stable, typename _RAIter, typename _Compare>
inline void
__parallel_sort(_RAIter __begin, _RAIter __end,
_Compare __comp, multiway_mergesort_sampling_tag __parallelism)
_Compare __comp,
multiway_mergesort_sampling_tag __parallelism)
{
_GLIBCXX_CALL(__end - __begin)
......@@ -163,7 +165,6 @@ namespace __gnu_parallel
__parallelism.__get_num_threads());
}
/**
* @brief Choose multiway mergesort with exact splitting,
* for parallel sorting.
......@@ -184,7 +185,6 @@ namespace __gnu_parallel
multiway_mergesort_exact_tag(__parallelism.__get_num_threads()));
}
/**
* @brief Choose a parallel sorting algorithm.
* @param __begin Begin iterator of input sequence.
......
......@@ -51,20 +51,16 @@ namespace __gnu_parallel
public:
/** @brief Default constructor. Use default number of threads. */
parallel_tag()
{
this->_M_num_threads = 0;
}
{ _M_num_threads = 0; }
/** @brief Default constructor. Recommend number of threads to use.
* @param __num_threads Desired number of threads. */
parallel_tag(_ThreadIndex __num_threads)
{
this->_M_num_threads = __num_threads;
}
{ _M_num_threads = __num_threads; }
/** @brief Find out desired number of threads.
* @return Desired number of threads. */
inline _ThreadIndex __get_num_threads()
_ThreadIndex __get_num_threads()
{
if(_M_num_threads == 0)
return omp_get_max_threads();
......@@ -74,10 +70,8 @@ namespace __gnu_parallel
/** @brief Set the desired number of threads.
* @param __num_threads Desired number of threads. */
inline void set_num_threads(_ThreadIndex __num_threads)
{
this->_M_num_threads = __num_threads;
}
void set_num_threads(_ThreadIndex __num_threads)
{ _M_num_threads = __num_threads; }
};
/** @brief Recommends parallel execution using the
......
......@@ -37,19 +37,19 @@
namespace __gnu_parallel
{
/** @brief Parallel std::unique_copy(), w/__o explicit equality predicate.
/** @brief Parallel std::unique_copy(), w/__o explicit equality predicate.
* @param __first Begin iterator of input sequence.
* @param __last End iterator of input sequence.
* @param __result Begin iterator of result __sequence.
* @param __binary_pred Equality predicate.
* @return End iterator of result __sequence. */
template<typename _IIter,
template<typename _IIter,
class _OutputIterator,
class _BinaryPredicate>
_OutputIterator
__parallel_unique_copy(_IIter __first, _IIter __last,
_OutputIterator __result, _BinaryPredicate __binary_pred)
_OutputIterator __result,
_BinaryPredicate __binary_pred)
{
_GLIBCXX_CALL(__last - __first)
......@@ -57,9 +57,9 @@ template<typename _IIter,
typedef typename _TraitsType::value_type _ValueType;
typedef typename _TraitsType::difference_type _DifferenceType;
_DifferenceType size = __last - __first;
_DifferenceType __size = __last - __first;
if (size == 0)
if (__size == 0)
return __result;
// Let the first thread process two parts.
......@@ -74,7 +74,7 @@ template<typename _IIter,
{
__num_threads = omp_get_num_threads();
__borders = new _DifferenceType[__num_threads + 2];
equally_split(size, __num_threads + 1, __borders);
equally_split(__size, __num_threads + 1, __borders);
__counter = new _DifferenceType[__num_threads + 1];
}
......@@ -95,12 +95,13 @@ template<typename _IIter,
++__i;
*__out++ = *__first;
for (_IIter iter = __first + __begin; iter < __first + __end; ++iter)
for (_IIter __iter = __first + __begin; __iter < __first + __end;
++__iter)
{
if (!__binary_pred(*iter, *(iter-1)))
if (!__binary_pred(*__iter, *(__iter - 1)))
{
++__i;
*__out++ = *iter;
*__out++ = *__iter;
}
}
}
......@@ -109,9 +110,10 @@ template<typename _IIter,
__begin = __borders[__iam]; //one part
__end = __borders[__iam + 1];
for (_IIter iter = __first + __begin; iter < __first + __end; ++iter)
for (_IIter __iter = __first + __begin; __iter < __first + __end;
++__iter)
{
if (!__binary_pred(*iter, *(iter - 1)))
if (!__binary_pred(*__iter, *(__iter - 1)))
++__i;
}
}
......@@ -135,14 +137,16 @@ template<typename _IIter,
_OutputIterator __iter_out = __result + __begin_output;
__begin = __borders[__num_threads];
__end = size;
__end = __size;
for (_IIter iter = __first + __begin; iter < __first + __end; ++iter)
for (_IIter __iter = __first + __begin; __iter < __first + __end;
++__iter)
{
if (iter == __first || !__binary_pred(*iter, *(iter - 1)))
if (__iter == __first
|| !__binary_pred(*__iter, *(__iter - 1)))
{
++__i;
*__iter_out++ = *iter;
*__iter_out++ = *__iter;
}
}
......@@ -154,10 +158,11 @@ template<typename _IIter,
__begin_output += __counter[__t];
_OutputIterator __iter_out = __result + __begin_output;
for (_IIter iter = __first + __begin; iter < __first + __end; ++iter)
for (_IIter __iter = __first + __begin; __iter < __first + __end;
++__iter)
{
if (!__binary_pred(*iter, *(iter-1)))
*__iter_out++ = *iter;
if (!__binary_pred(*__iter, *(__iter - 1)))
*__iter_out++ = *__iter;
}
}
}
......@@ -171,12 +176,12 @@ template<typename _IIter,
return __result + __end_output;
}
/** @brief Parallel std::unique_copy(), without explicit equality predicate
/** @brief Parallel std::unique_copy(), without explicit equality predicate
* @param __first Begin iterator of input sequence.
* @param __last End iterator of input sequence.
* @param __result Begin iterator of result __sequence.
* @return End iterator of result __sequence. */
template<typename _IIter, class _OutputIterator>
template<typename _IIter, class _OutputIterator>
inline _OutputIterator
__parallel_unique_copy(_IIter __first, _IIter __last,
_OutputIterator __result)
......
......@@ -49,8 +49,8 @@ namespace __gnu_parallel
#define _GLIBCXX_JOB_VOLATILE volatile
/** @brief One __job for a certain thread. */
template<typename _DifferenceTp>
/** @brief One __job for a certain thread. */
template<typename _DifferenceTp>
struct _Job
{
typedef _DifferenceTp _DifferenceType;
......@@ -72,7 +72,7 @@ template<typename _DifferenceTp>
_GLIBCXX_JOB_VOLATILE _DifferenceType _M_load;
};
/** @brief Work stealing algorithm for random access iterators.
/** @brief Work stealing algorithm for random access iterators.
*
* Uses O(1) additional memory. Synchronization at job lists is
* done with atomic operations.
......@@ -90,15 +90,17 @@ template<typename _DifferenceTp>
* std::count_n()).
* @return User-supplied functor (that may contain a part of the result).
*/
template<typename _RAIter,
template<typename _RAIter,
typename _Op,
typename _Fu,
typename _Red,
typename _Result>
_Op
__for_each_template_random_access_workstealing(
_RAIter __begin, _RAIter __end, _Op __op, _Fu& __f, _Red __r,
_Result __base, _Result& __output,
__for_each_template_random_access_workstealing(_RAIter __begin,
_RAIter __end, _Op __op,
_Fu& __f, _Red __r,
_Result __base,
_Result& __output,
typename std::iterator_traits<_RAIter>::difference_type __bound)
{
_GLIBCXX_CALL(__end - __begin)
......@@ -115,8 +117,8 @@ template<typename _RAIter,
_DifferenceType __length = (__bound < 0) ? (__end - __begin) : __bound;
// To avoid false sharing in a cache line.
const int __stride =
__s.cache_line_size * 10 / sizeof(_Job<_DifferenceType>) + 1;
const int __stride = (__s.cache_line_size * 10
/ sizeof(_Job<_DifferenceType>) + 1);
// Total number of threads currently working.
_ThreadIndex __busy = 0;
......@@ -130,13 +132,12 @@ template<typename _RAIter,
__output = __base;
// No more threads than jobs, at least one thread.
_ThreadIndex __num_threads =
__gnu_parallel::max<_ThreadIndex>(1,
__gnu_parallel::min<_DifferenceType>(__length, __get_max_threads()));
_ThreadIndex __num_threads = __gnu_parallel::max<_ThreadIndex>
(1, __gnu_parallel::min<_DifferenceType>(__length,
__get_max_threads()));
# pragma omp parallel shared(__busy) num_threads(__num_threads)
{
# pragma omp single
{
__num_threads = omp_get_num_threads();
......@@ -167,7 +168,7 @@ template<typename _RAIter,
// Every thread has its own random number generator
// (modulo __num_threads).
_RandomNumber rand_gen(__iam, __num_threads);
_RandomNumber __rand_gen(__iam, __num_threads);
// This thread is currently working.
# pragma omp atomic
......@@ -176,11 +177,12 @@ template<typename _RAIter,
__iam_working = true;
// How many jobs per thread? last thread gets the rest.
__my_job._M_first =
static_cast<_DifferenceType>(__iam * (__length / __num_threads));
__my_job._M_first = static_cast<_DifferenceType>
(__iam * (__length / __num_threads));
__my_job._M_last = (__iam == (__num_threads - 1)) ?
(__length - 1) : ((__iam + 1) * (__length / __num_threads) - 1);
__my_job._M_last = (__iam == (__num_threads - 1)
? (__length - 1)
: ((__iam + 1) * (__length / __num_threads) - 1));
__my_job._M_load = __my_job._M_last - __my_job._M_first + 1;
// Init result with _M_first value (to have a base value for reduction)
......@@ -210,8 +212,8 @@ template<typename _RAIter,
// fetch-and-add call
// Reserve current job block (size __chunk_size) in my queue.
_DifferenceType __current_job =
__fetch_and_add<_DifferenceType>(
&(__my_job._M_first), __chunk_size);
__fetch_and_add<_DifferenceType>(&(__my_job._M_first),
__chunk_size);
// Update _M_load, to make the three values consistent,
// _M_first might have been changed in the meantime
......@@ -242,13 +244,14 @@ template<typename _RAIter,
__iam_working = false;
}
_DifferenceType __supposed_first, __supposed_last, __supposed_load;
_DifferenceType __supposed_first, __supposed_last,
__supposed_load;
do
{
// Find random nonempty deque (not own), do consistency check.
__yield();
# pragma omp flush(__busy)
__victim = rand_gen();
__victim = __rand_gen();
__supposed_first = __job[__victim * __stride]._M_first;
__supposed_last = __job[__victim * __stride]._M_last;
__supposed_load = __job[__victim * __stride]._M_load;
......@@ -269,14 +272,14 @@ template<typename _RAIter,
// Push __victim's current start forward.
_DifferenceType __stolen_first =
__fetch_and_add<_DifferenceType>(
&(__job[__victim * __stride]._M_first), __steal);
_DifferenceType __stolen_try =
__stolen_first + __steal - _DifferenceType(1);
__fetch_and_add<_DifferenceType>
(&(__job[__victim * __stride]._M_first), __steal);
_DifferenceType __stolen_try = (__stolen_first + __steal
- _DifferenceType(1));
__my_job._M_first = __stolen_first;
__my_job._M_last =
__gnu_parallel::min(__stolen_try, __supposed_last);
__my_job._M_last = __gnu_parallel::min(__stolen_try,
__supposed_last);
__my_job._M_load = __my_job._M_last - __my_job._M_first + 1;
// Has potential work again.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment