Commit 15ac3c72 by Johannes Singler Committed by Johannes Singler

algobase.h: Replace tabs by spaces; correct line breaks.

2009-09-17  Johannes Singler  <singler@ira.uka.de>

        * include/parallel/algobase.h: Replace tabs by spaces;
        correct line breaks.
        * include/parallel/algorithmfwd.h: Likewise.
        * include/parallel/balanced_quicksort.h: Likewise.
        * include/parallel/base.h: Likewise.
        * include/parallel/checkers.h: Likewise.
        * include/parallel/compatibility.h: Likewise.
        * include/parallel/equally_split.h: Likewise.
        * include/parallel/find.h: Likewise.
        * include/parallel/for_each.h: Likewise.
        * include/parallel/for_each_selectors.h: Likewise.
        * include/parallel/iterator.h: Likewise.
        * include/parallel/list_partition.h: Likewise.
        * include/parallel/losertree.h: Likewise.
        * include/parallel/merge.h: Likewise.
        * include/parallel/multiseq_selection.h: Likewise.
        * include/parallel/multiway_merge.h: Likewise.
        * include/parallel/multiway_mergesort.h: Likewise.
        * include/parallel/numeric: Likewise.
        * include/parallel/numericfwd.h: Likewise.
        * include/parallel/omp_loop.h: Likewise.
        * include/parallel/omp_loop_static.h: Likewise.
        * include/parallel/par_loop.h: Likewise.
        * include/parallel/partial_sum.h: Likewise.
        * include/parallel/partition.h: Likewise.
        * include/parallel/queue.h: Likewise.
        * include/parallel/quicksort.h: Likewise.
        * include/parallel/random_number.h: Likewise.
        * include/parallel/random_shuffle.h: Likewise.
        * include/parallel/search.h: Likewise.
        * include/parallel/set_operations.h: Likewise.
        * include/parallel/settings.h: Likewise.
        * include/parallel/sort.h: Likewise.
        * include/parallel/types.h: Likewise.
        * include/parallel/unique_copy.h: Likewise.
        * include/parallel/workstealing.h: Likewise.
        * include/parallel/algo.h: Likewise;
        shorten _ForwardIterator to _FIterator.
        * include/parallel/find_selectors.h: Likewise.

From-SVN: r151791
parent 10e154df
2009-09-17 Johannes Singler <singler@ira.uka.de>
* include/parallel/algobase.h: Replace tabs by spaces;
correct line breaks.
* include/parallel/algorithmfwd.h: Likewise.
* include/parallel/balanced_quicksort.h: Likewise.
* include/parallel/base.h: Likewise.
* include/parallel/checkers.h: Likewise.
* include/parallel/compatibility.h: Likewise.
* include/parallel/equally_split.h: Likewise.
* include/parallel/find.h: Likewise.
* include/parallel/for_each.h: Likewise.
* include/parallel/for_each_selectors.h: Likewise.
* include/parallel/iterator.h: Likewise.
* include/parallel/list_partition.h: Likewise.
* include/parallel/losertree.h: Likewise.
* include/parallel/merge.h: Likewise.
* include/parallel/multiseq_selection.h: Likewise.
* include/parallel/multiway_merge.h: Likewise.
* include/parallel/multiway_mergesort.h: Likewise.
* include/parallel/numeric: Likewise.
* include/parallel/numericfwd.h: Likewise.
* include/parallel/omp_loop.h: Likewise.
* include/parallel/omp_loop_static.h: Likewise.
* include/parallel/par_loop.h: Likewise.
* include/parallel/partial_sum.h: Likewise.
* include/parallel/partition.h: Likewise.
* include/parallel/queue.h: Likewise.
* include/parallel/quicksort.h: Likewise.
* include/parallel/random_number.h: Likewise.
* include/parallel/random_shuffle.h: Likewise.
* include/parallel/search.h: Likewise.
* include/parallel/set_operations.h: Likewise.
* include/parallel/settings.h: Likewise.
* include/parallel/sort.h: Likewise.
* include/parallel/types.h: Likewise.
* include/parallel/unique_copy.h: Likewise.
* include/parallel/workstealing.h: Likewise.
* include/parallel/algo.h: Likewise;
shorten _ForwardIterator to _FIterator.
* include/parallel/find_selectors.h: Likewise.
2009-09-16 Johannes Singler <singler@ira.uka.de>
* include/parallel/base.h: Correct some comments accidentally changed
......
......@@ -108,19 +108,25 @@ template<typename _RAIter, typename _Compare>
_RAIter __pivot_pos =
__median_of_three_iterators(__begin, __begin + (__end - __begin) / 2,
__end - 1, __comp);
__end - 1, __comp);
#if defined(_GLIBCXX_ASSERTIONS)
// Must be in between somewhere.
_DifferenceType __n = __end - __begin;
_GLIBCXX_PARALLEL_ASSERT(
(!__comp(*__pivot_pos, *__begin) && !__comp(*(__begin + __n / 2), *__pivot_pos))
|| (!__comp(*__pivot_pos, *__begin) && !__comp(*(__end - 1), *__pivot_pos))
|| (!__comp(*__pivot_pos, *(__begin + __n / 2)) && !__comp(*__begin, *__pivot_pos))
|| (!__comp(*__pivot_pos, *(__begin + __n / 2)) && !__comp(*(__end - 1), *__pivot_pos))
|| (!__comp(*__pivot_pos, *(__end - 1)) && !__comp(*__begin, *__pivot_pos))
|| (!__comp(*__pivot_pos, *(__end - 1)) && !__comp(*(__begin + __n / 2), *__pivot_pos)));
(!__comp(*__pivot_pos, *__begin) &&
!__comp(*(__begin + __n / 2), *__pivot_pos))
|| (!__comp(*__pivot_pos, *__begin) &&
!__comp(*(__end - 1), *__pivot_pos))
|| (!__comp(*__pivot_pos, *(__begin + __n / 2)) &&
!__comp(*__begin, *__pivot_pos))
|| (!__comp(*__pivot_pos, *(__begin + __n / 2)) &&
!__comp(*(__end - 1), *__pivot_pos))
|| (!__comp(*__pivot_pos, *(__end - 1)) &&
!__comp(*__begin, *__pivot_pos))
|| (!__comp(*__pivot_pos, *(__end - 1)) &&
!__comp(*(__begin + __n / 2), *__pivot_pos)));
#endif
// Swap pivot value to end.
......@@ -183,15 +189,17 @@ template<typename _RAIter, typename _Compare>
}
// Divide step.
_DifferenceType __split_pos = __qsb_divide(__begin, __end, __comp, __num_threads);
_DifferenceType __split_pos =
__qsb_divide(__begin, __end, __comp, __num_threads);
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(0 <= __split_pos && __split_pos < (__end - __begin));
_GLIBCXX_PARALLEL_ASSERT(0 <= __split_pos &&
__split_pos < (__end - __begin));
#endif
_ThreadIndex __num_threads_leftside =
std::max<_ThreadIndex>(1, std::min<_ThreadIndex>(
__num_threads - 1, __split_pos * __num_threads / __n));
__num_threads - 1, __split_pos * __num_threads / __n));
# pragma omp atomic
*__tls[__iam]->_M_elements_leftover -= (_DifferenceType)1;
......@@ -284,11 +292,13 @@ template<typename _RAIter, typename _Compare>
// Divide, leave pivot unchanged in last place.
_RAIter __split_pos1, __split_pos2;
__split_pos1 = __gnu_sequential::partition(__begin, __end - 1, __pred);
__split_pos1 =
__gnu_sequential::partition(__begin, __end - 1, __pred);
// Left side: < __pivot_pos; __right side: >= __pivot_pos.
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(__begin <= __split_pos1 && __split_pos1 < __end);
_GLIBCXX_PARALLEL_ASSERT(__begin <= __split_pos1
&& __split_pos1 < __end);
#endif
// Swap pivot back to middle.
if (__split_pos1 != __pivot_pos)
......@@ -302,14 +312,14 @@ template<typename _RAIter, typename _Compare>
// Very unequal split, one part smaller than one 128th
// elements not strictly larger than the pivot.
__gnu_parallel::__unary_negate<__gnu_parallel::__binder1st
<_Compare, _ValueType, _ValueType, bool>, _ValueType>
__pred(__gnu_parallel::__binder1st
<_Compare, _ValueType, _ValueType, bool>(__comp,
*__pivot_pos));
<_Compare, _ValueType, _ValueType, bool>, _ValueType>
__pred(__gnu_parallel::__binder1st
<_Compare, _ValueType, _ValueType, bool>(
__comp, *__pivot_pos));
// Find other end of pivot-equal range.
__split_pos2 = __gnu_sequential::partition(__split_pos1 + 1,
__end, __pred);
__end, __pred);
}
else
// Only skip the pivot.
......@@ -325,10 +335,10 @@ template<typename _RAIter, typename _Compare>
{
// Right side larger.
if ((__split_pos2) != __end)
__tl._M_leftover_parts.push_front(std::make_pair(__split_pos2,
__end));
__tl._M_leftover_parts.push_front(
std::make_pair(__split_pos2, __end));
//__current.first = __begin; //already set anyway
//__current.first = __begin; //already set anyway
__current.second = __split_pos1;
continue;
}
......@@ -337,10 +347,10 @@ template<typename _RAIter, typename _Compare>
// Left side larger.
if (__begin != __split_pos1)
__tl._M_leftover_parts.push_front(std::make_pair(__begin,
__split_pos1));
__split_pos1));
__current.first = __split_pos2;
//__current.second = __end; //already set anyway
//__current.second = __end; //already set anyway
continue;
}
}
......@@ -367,10 +377,11 @@ template<typename _RAIter, typename _Compare>
// Look for new work.
bool __successfully_stolen = false;
while (__wait && *__tl._M_elements_leftover > 0 && !__successfully_stolen
while (__wait && *__tl._M_elements_leftover > 0
&& !__successfully_stolen
#if _GLIBCXX_ASSERTIONS
// Possible dead-lock.
&& (omp_get_wtime() < (__search_start + 1.0))
// Possible dead-lock.
&& (omp_get_wtime() < (__search_start + 1.0))
#endif
)
{
......@@ -392,7 +403,7 @@ template<typename _RAIter, typename _Compare>
{
sleep(1);
_GLIBCXX_PARALLEL_ASSERT(omp_get_wtime()
< (__search_start + 1.0));
< (__search_start + 1.0));
}
#endif
if (!__successfully_stolen)
......@@ -439,11 +450,13 @@ template<typename _RAIter, typename _Compare>
// Initialize thread local storage
_TLSType** __tls = new _TLSType*[__num_threads];
_DifferenceType __queue_size = __num_threads * (_ThreadIndex)(log2(__n) + 1);
_DifferenceType __queue_size =
__num_threads * (_ThreadIndex)(log2(__n) + 1);
for (_ThreadIndex __t = 0; __t < __num_threads; ++__t)
__tls[__t] = new _QSBThreadLocal<_RAIter>(__queue_size);
// There can never be more than ceil(log2(__n)) ranges on the stack, because
// There can never be more than ceil(log2(__n)) ranges on the stack,
// because
// 1. Only one processor pushes onto the stack
// 2. The largest range has at most length __n
// 3. Each range is larger than half of the range remaining
......@@ -459,13 +472,15 @@ template<typename _RAIter, typename _Compare>
}
// Main recursion call.
__qsb_conquer(__tls, __begin, __begin + __n, __comp, 0, __num_threads, true);
__qsb_conquer(
__tls, __begin, __begin + __n, __comp, 0, __num_threads, true);
#if _GLIBCXX_ASSERTIONS
// All stack must be empty.
_Piece __dummy;
for (int __i = 1; __i < __num_threads; ++__i)
_GLIBCXX_PARALLEL_ASSERT(!__tls[__i]->_M_leftover_parts.pop_back(__dummy));
_GLIBCXX_PARALLEL_ASSERT(
!__tls[__i]->_M_leftover_parts.pop_back(__dummy));
#endif
for (int __i = 0; __i < __num_threads; ++__i)
......
......@@ -119,7 +119,7 @@ template<typename _Size>
* @see decode2
*/
inline _CASable
__encode2(int __a, int __b) //must all be non-negative, actually
__encode2(int __a, int __b) //must all be non-negative, actually
{
return (((_CASable)__a) << (_CASable_bits / 2)) | (((_CASable)__b) << 0);
}
......@@ -192,7 +192,7 @@ template<typename _Predicate, typename argument_type>
/** @brief Similar to std::__binder1st,
* but giving the argument types explicitly. */
template<typename _Operation, typename _FirstArgumentType,
typename _SecondArgumentType, typename _ResultType>
typename _SecondArgumentType, typename _ResultType>
class __binder1st
: public std::unary_function<_SecondArgumentType, _ResultType>
{
......@@ -221,7 +221,7 @@ template<typename _Operation, typename _FirstArgumentType,
* explicitly.
*/
template<typename _Operation, typename _FirstArgumentType,
typename _SecondArgumentType, typename _ResultType>
typename _SecondArgumentType, typename _ResultType>
class binder2nd
: public std::unary_function<_FirstArgumentType, _ResultType>
{
......@@ -281,7 +281,7 @@ template<typename _Tp1, typename _Tp2>
struct _Plus : public std::binary_function<_Tp1, _Tp2, _Tp1>
{
typedef __typeof__(*static_cast<_Tp1*>(NULL)
+ *static_cast<_Tp2*>(NULL)) __result;
+ *static_cast<_Tp2*>(NULL)) __result;
__result
operator()(const _Tp1& __x, const _Tp2& __y) const
......@@ -293,7 +293,7 @@ template<typename _Tp>
struct _Plus<_Tp, _Tp> : public std::binary_function<_Tp, _Tp, _Tp>
{
typedef __typeof__(*static_cast<_Tp*>(NULL)
+ *static_cast<_Tp*>(NULL)) __result;
+ *static_cast<_Tp*>(NULL)) __result;
__result
operator()(const _Tp& __x, const _Tp& __y) const
......@@ -306,7 +306,7 @@ template<typename _Tp1, typename _Tp2>
struct _Multiplies : public std::binary_function<_Tp1, _Tp2, _Tp1>
{
typedef __typeof__(*static_cast<_Tp1*>(NULL)
* *static_cast<_Tp2*>(NULL)) __result;
* *static_cast<_Tp2*>(NULL)) __result;
__result
operator()(const _Tp1& __x, const _Tp2& __y) const
......@@ -318,7 +318,7 @@ template<typename _Tp>
struct _Multiplies<_Tp, _Tp> : public std::binary_function<_Tp, _Tp, _Tp>
{
typedef __typeof__(*static_cast<_Tp*>(NULL)
* *static_cast<_Tp*>(NULL)) __result;
* *static_cast<_Tp*>(NULL)) __result;
__result
operator()(const _Tp& __x, const _Tp& __y) const
......
......@@ -39,7 +39,8 @@
namespace __gnu_parallel
{
/**
* @brief Check whether @__c [__begin, @__c __end) is sorted according to @__c __comp.
* @brief Check whether @__c [__begin, @__c __end) is sorted according
* to @__c __comp.
* @param __begin Begin iterator of sequence.
* @param __end End iterator of sequence.
* @param __comp Comparator.
......@@ -49,33 +50,34 @@ namespace __gnu_parallel
template<typename _IIter, typename _Compare>
bool
__is_sorted(_IIter __begin, _IIter __end,
_Compare __comp
= std::less<typename std::iterator_traits<_IIter>::
_ValueType>())
_Compare __comp
= std::less<typename std::iterator_traits<_IIter>::
_ValueType>())
{
if (__begin == __end)
return true;
return true;
_IIter __current(__begin), __recent(__begin);
unsigned long long __position = 1;
for (__current++; __current != __end; __current++)
{
if (__comp(*__current, *__recent))
{
printf("__is_sorted: check failed before position %__i.\n",
__position);
return false;
}
__recent = __current;
__position++;
}
{
if (__comp(*__current, *__recent))
{
printf("__is_sorted: check failed before position %__i.\n",
__position);
return false;
}
__recent = __current;
__position++;
}
return true;
}
/**
* @brief Check whether @__c [__begin, @__c __end) is sorted according to @__c __comp.
* @brief Check whether @__c [__begin, @__c __end) is sorted according to
* @__c __comp.
* Prints the position in case an unordered pair is found.
* @param __begin Begin iterator of sequence.
* @param __end End iterator of sequence.
......@@ -87,36 +89,37 @@ namespace __gnu_parallel
template<typename _IIter, typename _Compare>
bool
is_sorted_failure(_IIter __begin, _IIter __end,
_IIter& __first_failure,
_Compare __comp
= std::less<typename std::iterator_traits<_IIter>::
_ValueType>())
_IIter& __first_failure,
_Compare __comp
= std::less<typename std::iterator_traits<_IIter>::
_ValueType>())
{
if (__begin == __end)
return true;
return true;
_IIter __current(__begin), __recent(__begin);
unsigned long long __position = 1;
for (__current++; __current != __end; __current++)
{
if (__comp(*__current, *__recent))
{
__first_failure = __current;
printf("__is_sorted: check failed before position %lld.\n",
__position);
return false;
}
__recent = __current;
__position++;
}
{
if (__comp(*__current, *__recent))
{
__first_failure = __current;
printf("__is_sorted: check failed before position %lld.\n",
__position);
return false;
}
__recent = __current;
__position++;
}
__first_failure = __end;
return true;
}
/**
* @brief Check whether @__c [__begin, @__c __end) is sorted according to @__c __comp.
* @brief Check whether @__c [__begin, @__c __end) is sorted according to
* @__c __comp.
* Prints all unordered pair, including the surrounding two elements.
* @param __begin Begin iterator of sequence.
* @param __end End iterator of sequence.
......@@ -127,26 +130,26 @@ namespace __gnu_parallel
bool
// XXX Compare default template argument
is_sorted_print_failures(_IIter __begin, _IIter __end,
_Compare __comp
= std::less<typename std::iterator_traits
<_IIter>::value_type>())
_Compare __comp
= std::less<typename std::iterator_traits
<_IIter>::value_type>())
{
if (__begin == __end)
return true;
return true;
_IIter __recent(__begin);
bool __ok = true;
for (_IIter __pos(__begin + 1); __pos != __end; __pos++)
{
if (__comp(*__pos, *__recent))
{
printf("%ld: %d %d %d %d\n", __pos - __begin, *(__pos - 2),
*(__pos- 1), *__pos, *(__pos + 1));
__ok = false;
}
__recent = __pos;
}
{
if (__comp(*__pos, *__recent))
{
printf("%ld: %d %d %d %d\n", __pos - __begin, *(__pos - 2),
*(__pos- 1), *__pos, *(__pos + 1));
__ok = false;
}
__recent = __pos;
}
return __ok;
}
}
......
......@@ -65,9 +65,9 @@ namespace __gnu_parallel
int32 __faa32(int32* __x, int32 __inc)
{
asm volatile("lock xadd %0,%1"
: "=__r" (__inc), "=__m" (*__x)
: "0" (__inc)
: "memory");
: "=__r" (__inc), "=__m" (*__x)
: "0" (__inc)
: "memory");
return __inc;
}
#if defined(__x86_64)
......@@ -75,9 +75,9 @@ namespace __gnu_parallel
int64 __faa64(int64* __x, int64 __inc)
{
asm volatile("lock xadd %0,%1"
: "=__r" (__inc), "=__m" (*__x)
: "0" (__inc)
: "memory");
: "=__r" (__inc), "=__m" (*__x)
: "0" (__inc)
: "memory");
return __inc;
}
#endif
......@@ -94,25 +94,25 @@ namespace __gnu_parallel
inline int32
__fetch_and_add_32(volatile int32* __ptr, int32 __addend)
{
#if defined(__ICC) //x86 version
#if defined(__ICC) //x86 version
return _InterlockedExchangeAdd((void*)__ptr, __addend);
#elif defined(__ECC) //IA-64 version
#elif defined(__ECC) //IA-64 version
return _InterlockedExchangeAdd((void*)__ptr, __addend);
#elif defined(__ICL) || defined(_MSC_VER)
return _InterlockedExchangeAdd(reinterpret_cast<volatile long*>(__ptr),
__addend);
__addend);
#elif defined(__GNUC__)
return __sync_fetch_and_add(__ptr, __addend);
#elif defined(__SUNPRO_CC) && defined(__sparc)
volatile int32 __before, __after;
do
{
__before = *__ptr;
__after = __before + __addend;
__before = *__ptr;
__after = __before + __addend;
} while (atomic_cas_32((volatile unsigned int*)__ptr, __before,
__after) != __before);
__after) != __before);
return __before;
#else //fallback, slow
#else //fallback, slow
#pragma message("slow __fetch_and_add_32")
int32 __res;
#pragma omp critical
......@@ -133,32 +133,32 @@ namespace __gnu_parallel
inline int64
__fetch_and_add_64(volatile int64* __ptr, int64 __addend)
{
#if defined(__ICC) && defined(__x86_64) //x86 version
#if defined(__ICC) && defined(__x86_64) //x86 version
return __faa64<int>((int64*)__ptr, __addend);
#elif defined(__ECC) //IA-64 version
#elif defined(__ECC) //IA-64 version
return _InterlockedExchangeAdd64((void*)__ptr, __addend);
#elif defined(__ICL) || defined(_MSC_VER)
#ifndef _WIN64
_GLIBCXX_PARALLEL_ASSERT(false); //not available in this case
_GLIBCXX_PARALLEL_ASSERT(false); //not available in this case
return 0;
#else
return _InterlockedExchangeAdd64(__ptr, __addend);
#endif
#elif defined(__GNUC__) && defined(__x86_64)
return __sync_fetch_and_add(__ptr, __addend);
#elif defined(__GNUC__) && defined(__i386) && \
#elif defined(__GNUC__) && defined(__i386) && \
(defined(__i686) || defined(__pentium4) || defined(__athlon))
return __sync_fetch_and_add(__ptr, __addend);
#elif defined(__SUNPRO_CC) && defined(__sparc)
volatile int64 __before, __after;
do
{
__before = *__ptr;
__after = __before + __addend;
__before = *__ptr;
__after = __before + __addend;
} while (atomic_cas_64((volatile unsigned long long*)__ptr, __before,
__after) != __before);
__after) != __before);
return __before;
#else //fallback, slow
#else //fallback, slow
#if defined(__GNUC__) && defined(__i386)
// XXX doesn'__t work with -march=native
//#warning "please compile with -march=i686 or better"
......@@ -201,9 +201,10 @@ namespace __gnu_parallel
{
int32 __before;
__asm__ __volatile__("lock; cmpxchgl %1,%2"
: "=a"(__before)
: "q"(__nw), "__m"(*(volatile long long*)(__ptr)), "0"(__old)
: "memory");
: "=a"(__before)
: "q"(__nw), "__m"(*(volatile long long*)(__ptr)),
"0"(__old)
: "memory");
return __before;
}
......@@ -214,9 +215,10 @@ namespace __gnu_parallel
{
int64 __before;
__asm__ __volatile__("lock; cmpxchgq %1,%2"
: "=a"(__before)
: "q"(__nw), "__m"(*(volatile long long*)(__ptr)), "0"(__old)
: "memory");
: "=a"(__before)
: "q"(__nw), "__m"(*(volatile long long*)(__ptr)),
"0"(__old)
: "memory");
return __before;
}
#endif
......@@ -232,32 +234,35 @@ namespace __gnu_parallel
* @param __replacement Replacement value.
*/
inline bool
__compare_and_swap_32(volatile int32* __ptr, int32 __comparand, int32 __replacement)
__compare_and_swap_32(volatile int32* __ptr, int32 __comparand,
int32 __replacement)
{
#if defined(__ICC) //x86 version
#if defined(__ICC) //x86 version
return _InterlockedCompareExchange((void*)__ptr, __replacement,
__comparand) == __comparand;
#elif defined(__ECC) //IA-64 version
__comparand) == __comparand;
#elif defined(__ECC) //IA-64 version
return _InterlockedCompareExchange((void*)__ptr, __replacement,
__comparand) == __comparand;
__comparand) == __comparand;
#elif defined(__ICL) || defined(_MSC_VER)
return _InterlockedCompareExchange(reinterpret_cast<volatile long*>(__ptr),
__replacement, __comparand) == __comparand;
return _InterlockedCompareExchange(
reinterpret_cast<volatile long*>(__ptr),
__replacement, __comparand)
== __comparand;
#elif defined(__GNUC__)
return __sync_bool_compare_and_swap(__ptr, __comparand, __replacement);
#elif defined(__SUNPRO_CC) && defined(__sparc)
return atomic_cas_32((volatile unsigned int*)__ptr, __comparand,
__replacement) == __comparand;
__replacement) == __comparand;
#else
#pragma message("slow __compare_and_swap_32")
bool __res = false;
#pragma omp critical
{
if (*__ptr == __comparand)
{
*__ptr = __replacement;
__res = true;
}
{
*__ptr = __replacement;
__res = true;
}
}
return __res;
#endif
......@@ -272,30 +277,31 @@ namespace __gnu_parallel
* @param __replacement Replacement value.
*/
inline bool
__compare_and_swap_64(volatile int64* __ptr, int64 __comparand, int64 __replacement)
__compare_and_swap_64(volatile int64* __ptr, int64 __comparand,
int64 __replacement)
{
#if defined(__ICC) && defined(__x86_64) //x86 version
#if defined(__ICC) && defined(__x86_64) //x86 version
return __cas64<int>(__ptr, __comparand, __replacement) == __comparand;
#elif defined(__ECC) //IA-64 version
#elif defined(__ECC) //IA-64 version
return _InterlockedCompareExchange64((void*)__ptr, __replacement,
__comparand) == __comparand;
__comparand) == __comparand;
#elif defined(__ICL) || defined(_MSC_VER)
#ifndef _WIN64
_GLIBCXX_PARALLEL_ASSERT(false); //not available in this case
_GLIBCXX_PARALLEL_ASSERT(false); //not available in this case
return 0;
#else
return _InterlockedCompareExchange64(__ptr, __replacement,
__comparand) == __comparand;
__comparand) == __comparand;
#endif
#elif defined(__GNUC__) && defined(__x86_64)
return __sync_bool_compare_and_swap(__ptr, __comparand, __replacement);
#elif defined(__GNUC__) && defined(__i386) && \
#elif defined(__GNUC__) && defined(__i386) && \
(defined(__i686) || defined(__pentium4) || defined(__athlon))
return __sync_bool_compare_and_swap(__ptr, __comparand, __replacement);
#elif defined(__SUNPRO_CC) && defined(__sparc)
return atomic_cas_64((volatile unsigned long long*)__ptr,
__comparand, __replacement) == __comparand;
__comparand, __replacement) == __comparand;
#else
#if defined(__GNUC__) && defined(__i386)
// XXX -march=native
......@@ -306,10 +312,10 @@ namespace __gnu_parallel
#pragma omp critical
{
if (*__ptr == __comparand)
{
*__ptr = __replacement;
__res = true;
}
{
*__ptr = __replacement;
__res = true;
}
}
return __res;
#endif
......@@ -327,9 +333,11 @@ namespace __gnu_parallel
__compare_and_swap(volatile _Tp* __ptr, _Tp __comparand, _Tp __replacement)
{
if (sizeof(_Tp) == sizeof(int32))
return __compare_and_swap_32((volatile int32*) __ptr, (int32)__comparand, (int32)__replacement);
return __compare_and_swap_32((volatile int32*) __ptr, (int32)__comparand,
(int32)__replacement);
else if (sizeof(_Tp) == sizeof(int64))
return __compare_and_swap_64((volatile int64*) __ptr, (int64)__comparand, (int64)__replacement);
return __compare_and_swap_64((volatile int64*) __ptr, (int64)__comparand,
(int64)__replacement);
else
_GLIBCXX_PARALLEL_ASSERT(false);
}
......
......@@ -45,7 +45,8 @@ namespace __gnu_parallel
* @returns End of splitter sequence, i.e. @__c __s+__num_threads+1 */
template<typename _DifferenceType, typename _OutputIterator>
_OutputIterator
equally_split(_DifferenceType __n, _ThreadIndex __num_threads, _OutputIterator __s)
equally_split(_DifferenceType __n, _ThreadIndex __num_threads,
_OutputIterator __s)
{
_DifferenceType __chunk_length = __n / __num_threads;
_DifferenceType __num_longer_chunks = __n % __num_threads;
......@@ -53,7 +54,8 @@ template<typename _DifferenceType, typename _OutputIterator>
for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
{
*__s++ = __pos;
__pos += (__i < __num_longer_chunks) ? (__chunk_length + 1) : __chunk_length;
__pos += (__i < __num_longer_chunks) ?
(__chunk_length + 1) : __chunk_length;
}
*__s++ = __n;
return __s;
......
......@@ -53,9 +53,9 @@ namespace __gnu_parallel
* @return Place of finding in both sequences.
*/
template<typename _RAIter1,
typename _RAIter2,
typename _Pred,
typename _Selector>
typename _RAIter2,
typename _Pred,
typename _Selector>
inline std::pair<_RAIter1, _RAIter2>
__find_template(_RAIter1 __begin1, _RAIter1 __end1,
_RAIter2 __begin2, _Pred __pred, _Selector __selector)
......@@ -64,13 +64,13 @@ template<typename _RAIter1,
{
case GROWING_BLOCKS:
return __find_template(__begin1, __end1, __begin2, __pred, __selector,
growing_blocks_tag());
growing_blocks_tag());
case CONSTANT_SIZE_BLOCKS:
return __find_template(__begin1, __end1, __begin2, __pred, __selector,
constant_size_blocks_tag());
constant_size_blocks_tag());
case EQUAL_SPLIT:
return __find_template(__begin1, __end1, __begin2, __pred, __selector,
equal_split_tag());
equal_split_tag());
default:
_GLIBCXX_PARALLEL_ASSERT(false);
return std::make_pair(__begin1, __begin2);
......@@ -90,9 +90,9 @@ template<typename _RAIter1,
* @return Place of finding in both sequences.
*/
template<typename _RAIter1,
typename _RAIter2,
typename _Pred,
typename _Selector>
typename _RAIter2,
typename _Pred,
typename _Selector>
std::pair<_RAIter1, _RAIter2>
__find_template(_RAIter1 __begin1,
_RAIter1 __end1,
......@@ -125,7 +125,8 @@ template<typename _RAIter1,
} //single
_ThreadIndex __iam = omp_get_thread_num();
_DifferenceType __start = __borders[__iam], __stop = __borders[__iam + 1];
_DifferenceType __start = __borders[__iam],
__stop = __borders[__iam + 1];
_RAIter1 __i1 = __begin1 + __start;
_RAIter2 __i2 = __begin2 + __start;
......@@ -153,8 +154,7 @@ template<typename _RAIter1,
delete[] __borders;
return
std::pair<_RAIter1, _RAIter2>(__begin1 + __result,
__begin2 + __result);
std::pair<_RAIter1, _RAIter2>(__begin1 + __result, __begin2 + __result);
}
#endif
......@@ -178,15 +178,14 @@ template<typename _RAIter1,
* There are two main differences between the growing blocks and
* the constant-size blocks variants.
* 1. For GB, the block size grows; for CSB, the block size is fixed.
* 2. For GB, the blocks are allocated dynamically;
* for CSB, the blocks are allocated in a predetermined manner,
* namely spacial round-robin.
*/
template<typename _RAIter1,
typename _RAIter2,
typename _Pred,
typename _Selector>
typename _RAIter2,
typename _Pred,
typename _Selector>
std::pair<_RAIter1, _RAIter2>
__find_template(_RAIter1 __begin1, _RAIter1 __end1,
_RAIter2 __begin2, _Pred __pred, _Selector __selector,
......@@ -231,7 +230,7 @@ template<typename _RAIter1,
_DifferenceType __block_size = __s.find_initial_block_size;
_DifferenceType __start =
__fetch_and_add<_DifferenceType>(&__next_block_start, __block_size);
__fetch_and_add<_DifferenceType>(&__next_block_start, __block_size);
// Get new block, update pointer to next block.
_DifferenceType __stop =
......@@ -250,7 +249,8 @@ template<typename _RAIter1,
}
__local_result = __selector._M_sequential_algorithm(
__begin1 + __start, __begin1 + __stop, __begin2 + __start, __pred);
__begin1 + __start, __begin1 + __stop,
__begin2 + __start, __pred);
if (__local_result.first != (__begin1 + __stop))
{
omp_set_lock(&__result_lock);
......@@ -259,20 +259,22 @@ template<typename _RAIter1,
__result = __local_result.first - __begin1;
// Result cannot be in future blocks, stop algorithm.
__fetch_and_add<_DifferenceType>(&__next_block_start, __length);
__fetch_and_add<_DifferenceType>(
&__next_block_start, __length);
}
omp_unset_lock(&__result_lock);
}
__block_size =
std::min<_DifferenceType>(__block_size * __s.find_increasing_factor,
__s.find_maximum_block_size);
__block_size = std::min<_DifferenceType>(
__block_size * __s.find_increasing_factor,
__s.find_maximum_block_size);
// Get new block, update pointer to next block.
__start =
__fetch_and_add<_DifferenceType>(&__next_block_start, __block_size);
__fetch_and_add<_DifferenceType>(
&__next_block_start, __block_size);
__stop = ((__length < (__start + __block_size))
? __length : (__start + __block_size));
? __length : (__start + __block_size));
}
} //parallel
......@@ -280,8 +282,7 @@ template<typename _RAIter1,
// Return iterator on found element.
return
std::pair<_RAIter1, _RAIter2>(__begin1 + __result,
__begin2 + __result);
std::pair<_RAIter1, _RAIter2>(__begin1 + __result, __begin2 + __result);
}
#endif
......@@ -307,9 +308,9 @@ template<typename _RAIter1,
* round-robin.
*/
template<typename _RAIter1,
typename _RAIter2,
typename _Pred,
typename _Selector>
typename _RAIter2,
typename _Pred,
typename _Selector>
std::pair<_RAIter1, _RAIter2>
__find_template(_RAIter1 __begin1, _RAIter1 __end1,
_RAIter2 __begin2, _Pred __pred, _Selector __selector,
......@@ -329,8 +330,8 @@ template<typename _RAIter1,
// Try it sequentially first.
std::pair<_RAIter1, _RAIter2> __find_seq_result =
__selector._M_sequential_algorithm(__begin1, __begin1 + __sequential_search_size,
__begin2, __pred);
__selector._M_sequential_algorithm(
__begin1, __begin1 + __sequential_search_size, __begin2, __pred);
if (__find_seq_result.first != (__begin1 + __sequential_search_size))
return __find_seq_result;
......@@ -384,7 +385,8 @@ template<typename _RAIter1,
// Where to work.
__start = __iteration_start + __iam * __block_size;
__stop = std::min<_DifferenceType>(__length, __start + __block_size);
__stop = std::min<_DifferenceType>(
__length, __start + __block_size);
}
} //parallel
......@@ -392,8 +394,7 @@ template<typename _RAIter1,
// Return iterator on found element.
return
std::pair<_RAIter1, _RAIter2>(__begin1 + __result,
__begin2 + __result);
std::pair<_RAIter1, _RAIter2>(__begin1 + __result, __begin2 + __result);
}
#endif
} // end namespace
......
......@@ -55,7 +55,7 @@ namespace __gnu_parallel
* @param __pred Find predicate.
*/
template<typename _RAIter1, typename _RAIter2,
typename _Pred>
typename _Pred>
bool
operator()(_RAIter1 __i1, _RAIter2 __i2, _Pred __pred)
{ return __pred(*__i1); }
......@@ -67,13 +67,13 @@ namespace __gnu_parallel
* @param __pred Find predicate.
*/
template<typename _RAIter1, typename _RAIter2,
typename _Pred>
typename _Pred>
std::pair<_RAIter1, _RAIter2>
_M_sequential_algorithm(_RAIter1 __begin1,
_RAIter1 __end1,
_RAIter2 __begin2, _Pred __pred)
_RAIter1 __end1,
_RAIter2 __begin2, _Pred __pred)
{ return std::make_pair(find_if(__begin1, __end1, __pred,
sequential_tag()), __begin2); }
sequential_tag()), __begin2); }
};
/** @brief Test predicate on two adjacent elements. */
......@@ -85,12 +85,12 @@ namespace __gnu_parallel
* @param __pred Find predicate.
*/
template<typename _RAIter1, typename _RAIter2,
typename _Pred>
typename _Pred>
bool
operator()(_RAIter1 __i1, _RAIter2 __i2, _Pred __pred)
{
// Passed end iterator is one short.
return __pred(*__i1, *(__i1 + 1));
// Passed end iterator is one short.
return __pred(*__i1, *(__i1 + 1));
}
/** @brief Corresponding sequential algorithm on a sequence.
......@@ -100,18 +100,18 @@ namespace __gnu_parallel
* @param __pred Find predicate.
*/
template<typename _RAIter1, typename _RAIter2,
typename _Pred>
typename _Pred>
std::pair<_RAIter1, _RAIter2>
_M_sequential_algorithm(_RAIter1 __begin1,
_RAIter1 __end1,
_RAIter2 __begin2, _Pred __pred)
_RAIter1 __end1,
_RAIter2 __begin2, _Pred __pred)
{
// Passed end iterator is one short.
_RAIter1 spot = adjacent_find(__begin1, __end1 + 1,
__pred, sequential_tag());
if (spot == (__end1 + 1))
spot = __end1;
return std::make_pair(spot, __begin2);
// Passed end iterator is one short.
_RAIter1 spot = adjacent_find(__begin1, __end1 + 1,
__pred, sequential_tag());
if (spot == (__end1 + 1))
spot = __end1;
return std::make_pair(spot, __begin2);
}
};
......@@ -125,7 +125,7 @@ namespace __gnu_parallel
* @param __pred Find predicate.
*/
template<typename _RAIter1, typename _RAIter2,
typename _Pred>
typename _Pred>
bool
operator()(_RAIter1 __i1, _RAIter2 __i2, _Pred __pred)
{ return !__pred(*__i1, *__i2); }
......@@ -138,23 +138,24 @@ namespace __gnu_parallel
* @param __pred Find predicate.
*/
template<typename _RAIter1, typename _RAIter2,
typename _Pred>
typename _Pred>
std::pair<_RAIter1, _RAIter2>
_M_sequential_algorithm(_RAIter1 __begin1,
_RAIter1 __end1,
_RAIter2 __begin2, _Pred __pred)
{ return mismatch(__begin1, __end1, __begin2, __pred, sequential_tag()); }
_RAIter1 __end1,
_RAIter2 __begin2, _Pred __pred)
{ return mismatch(__begin1, __end1, __begin2, __pred, sequential_tag());
}
};
/** @brief Test predicate on several elements. */
template<typename _ForwardIterator>
template<typename _FIterator>
struct __find_first_of_selector : public __generic_find_selector
{
_ForwardIterator _M_begin;
_ForwardIterator _M_end;
_FIterator _M_begin;
_FIterator _M_end;
explicit __find_first_of_selector(_ForwardIterator __begin, _ForwardIterator __end)
explicit __find_first_of_selector(_FIterator __begin, _FIterator __end)
: _M_begin(__begin), _M_end(__end) { }
/** @brief Test on one position.
......@@ -162,15 +163,15 @@ namespace __gnu_parallel
* @param __i2 _Iterator on second sequence (unused).
* @param __pred Find predicate. */
template<typename _RAIter1, typename _RAIter2,
typename _Pred>
typename _Pred>
bool
operator()(_RAIter1 __i1, _RAIter2 __i2, _Pred __pred)
{
for (_ForwardIterator __pos_in_candidates = _M_begin;
__pos_in_candidates != _M_end; ++__pos_in_candidates)
if (__pred(*__i1, *__pos_in_candidates))
return true;
return false;
for (_FIterator __pos_in_candidates = _M_begin;
__pos_in_candidates != _M_end; ++__pos_in_candidates)
if (__pred(*__i1, *__pos_in_candidates))
return true;
return false;
}
/** @brief Corresponding sequential algorithm on a sequence.
......@@ -179,13 +180,16 @@ namespace __gnu_parallel
* @param __begin2 Begin iterator of second sequence.
* @param __pred Find predicate. */
template<typename _RAIter1, typename _RAIter2,
typename _Pred>
typename _Pred>
std::pair<_RAIter1, _RAIter2>
_M_sequential_algorithm(_RAIter1 __begin1,
_RAIter1 __end1,
_RAIter2 __begin2, _Pred __pred)
{ return std::make_pair(find_first_of(__begin1, __end1, _M_begin, _M_end, __pred,
sequential_tag()), __begin2); }
_RAIter1 __end1,
_RAIter2 __begin2, _Pred __pred)
{
return std::make_pair(
find_first_of(__begin1, __end1, _M_begin, _M_end, __pred,
sequential_tag()), __begin2);
}
};
}
......
......@@ -56,41 +56,44 @@ namespace __gnu_parallel
* @param __bound Maximum number of elements processed.
* @param __parallelism_tag Parallelization method */
template<typename _IIter, typename _UserOp,
typename _Functionality, typename _Red, typename _Result>
typename _Functionality, typename _Red, typename _Result>
_UserOp
__for_each_template_random_access(_IIter __begin, _IIter __end,
_UserOp __user_op,
_Functionality& __functionality,
_Red __reduction, _Result __reduction_start,
_Result& __output, typename
std::iterator_traits<_IIter>::
difference_type __bound,
_Parallelism __parallelism_tag)
_UserOp __user_op,
_Functionality& __functionality,
_Red __reduction,
_Result __reduction_start,
_Result& __output, typename
std::iterator_traits<_IIter>::
difference_type __bound,
_Parallelism __parallelism_tag)
{
if (__parallelism_tag == parallel_unbalanced)
return for_each_template_random_access_ed(__begin, __end, __user_op,
__functionality, __reduction,
__reduction_start,
__output, __bound);
return for_each_template_random_access_ed(__begin, __end, __user_op,
__functionality, __reduction,
__reduction_start,
__output, __bound);
else if (__parallelism_tag == parallel_omp_loop)
return for_each_template_random_access_omp_loop(__begin, __end, __user_op,
__functionality,
__reduction,
__reduction_start,
__output, __bound);
return for_each_template_random_access_omp_loop(
__begin, __end, __user_op,
__functionality,
__reduction,
__reduction_start,
__output, __bound);
else if (__parallelism_tag == parallel_omp_loop_static)
return for_each_template_random_access_omp_loop(__begin, __end, __user_op,
__functionality,
__reduction,
__reduction_start,
__output, __bound);
else //e. g. parallel_balanced
return for_each_template_random_access_workstealing(__begin, __end,
__user_op,
__functionality,
__reduction,
__reduction_start,
__output, __bound);
return for_each_template_random_access_omp_loop(
__begin, __end, __user_op,
__functionality,
__reduction,
__reduction_start,
__output, __bound);
else //e. g. parallel_balanced
return for_each_template_random_access_workstealing(__begin, __end,
__user_op,
__functionality,
__reduction,
__reduction_start,
__output, __bound);
}
}
......
......@@ -59,10 +59,10 @@ namespace __gnu_parallel
template<typename _Op>
bool
operator()(_Op& __o, _It __i)
{
__o(*__i);
return true;
}
{
__o(*__i);
return true;
}
};
/** @brief std::generate() selector. */
......@@ -76,9 +76,9 @@ namespace __gnu_parallel
bool
operator()(_Op& __o, _It __i)
{
*__i = __o();
return true;
}
*__i = __o();
return true;
}
};
/** @brief std::fill() selector. */
......@@ -91,10 +91,10 @@ namespace __gnu_parallel
template<typename Val>
bool
operator()(Val& __v, _It __i)
{
*__i = __v;
return true;
}
{
*__i = __v;
return true;
}
};
/** @brief std::transform() __selector, one input sequence variant. */
......@@ -107,10 +107,10 @@ namespace __gnu_parallel
template<typename _Op>
bool
operator()(_Op& __o, _It __i)
{
*__i.second = __o(*__i.first);
return true;
}
{
*__i.second = __o(*__i.first);
return true;
}
};
/** @brief std::transform() __selector, two input sequences variant. */
......@@ -123,10 +123,10 @@ namespace __gnu_parallel
template<typename _Op>
bool
operator()(_Op& __o, _It __i)
{
*__i._M_third = __o(*__i._M_first, *__i._M_second);
return true;
}
{
*__i._M_third = __o(*__i._M_first, *__i._M_second);
return true;
}
};
/** @brief std::replace() selector. */
......@@ -147,9 +147,9 @@ namespace __gnu_parallel
bool
operator()(_Tp& __v, _It __i)
{
if (*__i == __v)
*__i = __new_val;
return true;
if (*__i == __v)
*__i = __new_val;
return true;
}
};
......@@ -171,9 +171,9 @@ namespace __gnu_parallel
bool
operator()(_Op& __o, _It __i)
{
if (__o(*__i))
*__i = __new_val;
return true;
if (__o(*__i))
*__i = __new_val;
return true;
}
};
......@@ -188,7 +188,7 @@ namespace __gnu_parallel
template<typename Val>
_Diff
operator()(Val& __v, _It __i)
{ return (__v == *__i) ? 1 : 0; }
{ return (__v == *__i) ? 1 : 0; }
};
/** @brief std::count_if () selector. */
......@@ -202,7 +202,7 @@ namespace __gnu_parallel
template<typename _Op>
_Diff
operator()(_Op& __o, _It __i)
{ return (__o(*__i)) ? 1 : 0; }
{ return (__o(*__i)) ? 1 : 0; }
};
/** @brief std::accumulate() selector. */
......@@ -214,8 +214,9 @@ namespace __gnu_parallel
* @param __i iterator referencing object.
* @return The current value. */
template<typename _Op>
typename std::iterator_traits<_It>::value_type operator()(_Op __o, _It __i)
{ return *__i; }
typename std::iterator_traits<_It>::value_type
operator()(_Op __o, _It __i)
{ return *__i; }
};
/** @brief std::inner_product() selector. */
......@@ -242,11 +243,11 @@ namespace __gnu_parallel
template<typename _Op>
_Tp
operator()(_Op __mult, _It __current)
{
typename std::iterator_traits<_It>::difference_type __position
= __current - __begin1_iterator;
return __mult(*__current, *(begin2_iterator + __position));
}
{
typename std::iterator_traits<_It>::difference_type __position
= __current - __begin1_iterator;
return __mult(*__current, *(begin2_iterator + __position));
}
};
/** @brief Selector that just returns the passed iterator. */
......@@ -260,24 +261,25 @@ namespace __gnu_parallel
template<typename _Op>
_It
operator()(_Op __o, _It __i)
{ return __i; }
{ return __i; }
};
/** @brief Selector that returns the difference between two adjacent
* __elements.
*/
template<typename _It>
struct __adjacent_difference_selector : public __generic_for_each_selector<_It>
struct __adjacent_difference_selector :
public __generic_for_each_selector<_It>
{
template<typename _Op>
bool
operator()(_Op& __o, _It __i)
{
typename _It::first_type __go_back_one = __i.first;
--__go_back_one;
*__i.__second = __o(*__i.__first, *__go_back_one);
return true;
}
{
typename _It::first_type __go_back_one = __i.first;
--__go_back_one;
*__i.__second = __o(*__i.__first, *__go_back_one);
return true;
}
};
// XXX move into type_traits?
......@@ -315,10 +317,10 @@ namespace __gnu_parallel
_It
operator()(_It __x, _It __y)
{
if (__comp(*__x, *__y))
return __x;
else
return __y;
if (__comp(*__x, *__y))
return __x;
else
return __y;
}
};
......@@ -334,10 +336,10 @@ namespace __gnu_parallel
_It
operator()(_It __x, _It __y)
{
if (__comp(*__x, *__y))
return __y;
else
return __x;
if (__comp(*__x, *__y))
return __y;
else
return __x;
}
};
......@@ -353,7 +355,7 @@ namespace __gnu_parallel
template<typename _Result, typename _Addend>
_Result
operator()(const _Result& __x, const _Addend& __y)
{ return __binop(__x, __y); }
{ return __binop(__x, __y); }
};
}
......
......@@ -40,7 +40,8 @@ namespace __gnu_parallel
/** @brief A pair of iterators. The usual iterator operations are
* applied to both child iterators.
*/
template<typename _Iterator1, typename _Iterator2, typename _IteratorCategory>
template<typename _Iterator1, typename _Iterator2,
typename _IteratorCategory>
class _IteratorPair : public std::pair<_Iterator1, _Iterator2>
{
private:
......@@ -57,16 +58,16 @@ namespace __gnu_parallel
_IteratorPair() { }
_IteratorPair(const _Iterator1& __first, const _Iterator2& __second)
_IteratorPair(const _Iterator1& __first, const _Iterator2& __second)
: _Base(__first, __second) { }
// Pre-increment operator.
_IteratorPair&
operator++()
{
++_Base::first;
++_Base::second;
return *this;
++_Base::first;
++_Base::second;
return *this;
}
// Post-increment operator.
......@@ -78,9 +79,9 @@ namespace __gnu_parallel
_IteratorPair&
operator--()
{
--_Base::first;
--_Base::second;
return *this;
--_Base::first;
--_Base::second;
return *this;
}
// Post-decrement operator.
......@@ -95,14 +96,15 @@ namespace __gnu_parallel
_IteratorPair&
operator=(const _IteratorPair& __other)
{
_Base::first = __other.first;
_Base::second = __other.second;
return *this;
_Base::first = __other.first;
_Base::second = __other.second;
return *this;
}
_IteratorPair
operator+(difference_type __delta) const
{ return _IteratorPair(_Base::first + __delta, _Base::second + __delta); }
{ return _IteratorPair(_Base::first + __delta, _Base::second + __delta);
}
difference_type
operator-(const _IteratorPair& __other) const
......@@ -114,7 +116,7 @@ namespace __gnu_parallel
applied to all three child iterators.
*/
template<typename _Iterator1, typename _Iterator2, typename _Iterator3,
typename _IteratorCategory>
typename _IteratorCategory>
class _IteratorTriple
{
public:
......@@ -132,21 +134,21 @@ namespace __gnu_parallel
_IteratorTriple() { }
_IteratorTriple(const _Iterator1& __first, const _Iterator2& __second,
const _Iterator3& __third)
const _Iterator3& __third)
{
_M_first = __first;
_M_second = __second;
_M_third = __third;
_M_first = __first;
_M_second = __second;
_M_third = __third;
}
// Pre-increment operator.
_IteratorTriple&
operator++()
{
++_M_first;
++_M_second;
++_M_third;
return *this;
++_M_first;
++_M_second;
++_M_third;
return *this;
}
// Post-increment operator.
......@@ -158,10 +160,10 @@ namespace __gnu_parallel
_IteratorTriple&
operator--()
{
--_M_first;
--_M_second;
--_M_third;
return *this;
--_M_first;
--_M_second;
--_M_third;
return *this;
}
// Post-decrement operator.
......@@ -176,15 +178,16 @@ namespace __gnu_parallel
_IteratorTriple&
operator=(const _IteratorTriple& __other)
{
_M_first = __other._M_first;
_M_second = __other._M_second;
_M_third = __other._M_third;
return *this;
_M_first = __other._M_first;
_M_second = __other._M_second;
_M_third = __other._M_third;
return *this;
}
_IteratorTriple
operator+(difference_type __delta) const
{ return _IteratorTriple(_M_first + __delta, _M_second + __delta, _M_third + __delta); }
{ return _IteratorTriple(_M_first + __delta, _M_second + __delta,
_M_third + __delta); }
difference_type
operator-(const _IteratorTriple& __other) const
......
......@@ -48,17 +48,17 @@ namespace __gnu_parallel
template<typename _IIter>
void
__shrink_and_double(std::vector<_IIter>& __os_starts,
size_t& __count_to_two, size_t& __range_length,
const bool __make_twice)
size_t& __count_to_two, size_t& __range_length,
const bool __make_twice)
{
++__count_to_two;
if (not __make_twice or __count_to_two < 2)
__shrink(__os_starts, __count_to_two, __range_length);
__shrink(__os_starts, __count_to_two, __range_length);
else
{
__os_starts.resize((__os_starts.size() - 1) * 2 + 1);
__count_to_two = 0;
}
{
__os_starts.resize((__os_starts.size() - 1) * 2 + 1);
__count_to_two = 0;
}
}
/** @brief Combines two ranges into one and thus halves the number of ranges.
......@@ -68,11 +68,11 @@ namespace __gnu_parallel
template<typename _IIter>
void
__shrink(std::vector<_IIter>& __os_starts, size_t& __count_to_two,
size_t& __range_length)
size_t& __range_length)
{
for (typename std::vector<_IIter>::size_type __i = 0;
__i <= (__os_starts.size() / 2); ++__i)
__os_starts[__i] = __os_starts[__i * 2];
__i <= (__os_starts.size() / 2); ++__i)
__os_starts[__i] = __os_starts[__i * 2];
__range_length *= 2;
}
......@@ -98,17 +98,17 @@ namespace __gnu_parallel
template<typename _IIter, typename _FunctorType>
size_t
list_partition(const _IIter __begin, const _IIter __end,
_IIter* __starts, size_t* __lengths, const int __num_parts,
_FunctorType& __f, int __oversampling = 0)
_IIter* __starts, size_t* __lengths, const int __num_parts,
_FunctorType& __f, int __oversampling = 0)
{
bool __make_twice = false;
// The resizing algorithm is chosen according to the oversampling factor.
if (__oversampling == 0)
{
__make_twice = true;
__oversampling = 1;
}
{
__make_twice = true;
__oversampling = 1;
}
std::vector<_IIter> __os_starts(2 * __oversampling * __num_parts + 1);
......@@ -119,27 +119,28 @@ namespace __gnu_parallel
size_t __range_length = 1;
size_t __count_to_two = 0;
while (__it != __end)
{
__cur = __next;
for (; __cur < __os_starts.size() and __it != __end; ++__cur)
{
for (__dist_limit += __range_length;
__dist < __dist_limit and __it != __end; ++__dist)
{
__f(__it);
++__it;
}
__os_starts[__cur] = __it;
}
// Must compare for end and not __cur < __os_starts.size() , because
// __cur could be == __os_starts.size() as well
if (__it == __end)
break;
__shrink_and_double(__os_starts, __count_to_two, __range_length, __make_twice);
__next = __os_starts.size() / 2 + 1;
}
{
__cur = __next;
for (; __cur < __os_starts.size() and __it != __end; ++__cur)
{
for (__dist_limit += __range_length;
__dist < __dist_limit and __it != __end; ++__dist)
{
__f(__it);
++__it;
}
__os_starts[__cur] = __it;
}
// Must compare for end and not __cur < __os_starts.size() , because
// __cur could be == __os_starts.size() as well
if (__it == __end)
break;
__shrink_and_double(__os_starts, __count_to_two, __range_length,
__make_twice);
__next = __os_starts.size() / 2 + 1;
}
// Calculation of the parts (one must be extracted from __current
// because the partition beginning at __end, consists only of
......@@ -152,19 +153,20 @@ namespace __gnu_parallel
// Smallest partitions.
for (int __i = 1; __i < (__num_parts + 1 - __size_greater); ++__i)
{
__lengths[__i - 1] = __size_part * __range_length;
__index += __size_part;
__starts[__i] = __os_starts[__index];
}
{
__lengths[__i - 1] = __size_part * __range_length;
__index += __size_part;
__starts[__i] = __os_starts[__index];
}
// Biggest partitions.
for (int __i = __num_parts + 1 - __size_greater; __i <= __num_parts; ++__i)
{
__lengths[__i - 1] = (__size_part+1) * __range_length;
__index += (__size_part+1);
__starts[__i] = __os_starts[__index];
}
for (int __i = __num_parts + 1 - __size_greater; __i <= __num_parts;
++__i)
{
__lengths[__i - 1] = (__size_part+1) * __range_length;
__index += (__size_part+1);
__starts[__i] = __os_starts[__index];
}
// Correction of the end size (the end iteration has not finished).
__lengths[__num_parts - 1] -= (__dist_limit - __dist);
......
......@@ -106,7 +106,8 @@ public:
_M_offset = _M_k;
// Avoid default-constructing _M_losers[]._M_key
_M_losers = static_cast<_Loser*>(::operator new(2 * _M_k * sizeof(_Loser)));
_M_losers
= static_cast<_Loser*>(::operator new(2 * _M_k * sizeof(_Loser)));
for (unsigned int __i = _M_ik - 1; __i < _M_k; ++__i)
_M_losers[__i + _M_k]._M_sup = true;
......@@ -187,7 +188,7 @@ public:
unsigned int __right = __init_winner (2 * __root + 1);
if (_M_losers[__right]._M_sup
|| (!_M_losers[__left]._M_sup
&& !_M_comp(_M_losers[__right]._M_key, _M_losers[__left]._M_key)))
&& !_M_comp(_M_losers[__right]._M_key, _M_losers[__left]._M_key)))
{
// Left one is less or equal.
_M_losers[__root] = _M_losers[__right];
......@@ -223,7 +224,8 @@ public:
for (unsigned int __pos = (_M_k + _M_source) / 2; __pos > 0; __pos /= 2)
{
// The smaller one gets promoted, ties are broken by _M_source.
if ((_M_sup && (!_M_losers[__pos]._M_sup || _M_losers[__pos]._M_source < _M_source))
if ((_M_sup && (!_M_losers[__pos]._M_sup
|| _M_losers[__pos]._M_source < _M_source))
|| (!_M_sup && !_M_losers[__pos]._M_sup
&& ((_M_comp(_M_losers[__pos]._M_key, _M_key))
|| (!_M_comp(_M_key, _M_losers[__pos]._M_key)
......@@ -280,9 +282,9 @@ public:
{
unsigned int __left = __init_winner (2 * __root);
unsigned int __right = __init_winner (2 * __root + 1);
if (_M_losers[__right]._M_sup ||
(!_M_losers[__left]._M_sup
&& !_M_comp(_M_losers[__right]._M_key, _M_losers[__left]._M_key)))
if (_M_losers[__right]._M_sup
|| (!_M_losers[__left]._M_sup
&& !_M_comp(_M_losers[__right]._M_key, _M_losers[__left]._M_key)))
{
// Left one is less or equal.
_M_losers[__root] = _M_losers[__right];
......@@ -320,7 +322,8 @@ public:
for (unsigned int __pos = (_M_k + _M_source) / 2; __pos > 0; __pos /= 2)
{
// The smaller one gets promoted.
if (_M_sup || (!_M_losers[__pos]._M_sup && _M_comp(_M_losers[__pos]._M_key, _M_key)))
if (_M_sup || (!_M_losers[__pos]._M_sup
&& _M_comp(_M_losers[__pos]._M_key, _M_key)))
{
// The other one is smaller.
std::swap(_M_losers[__pos]._M_sup, _M_sup);
......@@ -414,8 +417,9 @@ public:
unsigned int __left = __init_winner (2 * __root);
unsigned int __right = __init_winner (2 * __root + 1);
if (_M_losers[__right]._M_sup
|| (!_M_losers[__left]._M_sup && !_M_comp(*_M_losers[__right]._M_keyp,
*_M_losers[__left]._M_keyp)))
|| (!_M_losers[__left]._M_sup
&& !_M_comp(*_M_losers[__right]._M_keyp,
*_M_losers[__left]._M_keyp)))
{
// Left one is less or equal.
_M_losers[__root] = _M_losers[__right];
......@@ -445,7 +449,8 @@ public:
for (unsigned int __pos = (_M_k + _M_source) / 2; __pos > 0; __pos /= 2)
{
// The smaller one gets promoted, ties are broken by _M_source.
if ((_M_sup && (!_M_losers[__pos]._M_sup || _M_losers[__pos]._M_source < _M_source)) ||
if ((_M_sup && (!_M_losers[__pos]._M_sup ||
_M_losers[__pos]._M_source < _M_source)) ||
(!_M_sup && !_M_losers[__pos]._M_sup &&
((_M_comp(*_M_losers[__pos]._M_keyp, *_M_keyp)) ||
(!_M_comp(*_M_keyp, *_M_losers[__pos]._M_keyp)
......@@ -495,7 +500,8 @@ public:
unsigned int __right = __init_winner (2 * __root + 1);
if (_M_losers[__right]._M_sup
|| (!_M_losers[__left]._M_sup
&& !_M_comp(*_M_losers[__right]._M_keyp, *_M_losers[__left]._M_keyp)))
&& !_M_comp(*_M_losers[__right]._M_keyp,
*_M_losers[__left]._M_keyp)))
{
// Left one is less or equal.
_M_losers[__root] = _M_losers[__right];
......@@ -525,7 +531,8 @@ public:
for (unsigned int __pos = (_M_k + _M_source) / 2; __pos > 0; __pos /= 2)
{
// The smaller one gets promoted.
if (_M_sup || (!_M_losers[__pos]._M_sup && _M_comp(*_M_losers[__pos]._M_keyp, *_M_keyp)))
if (_M_sup || (!_M_losers[__pos]._M_sup
&& _M_comp(*_M_losers[__pos]._M_keyp, *_M_keyp)))
{
// The other one is smaller.
std::swap(_M_losers[__pos]._M_sup, _M_sup);
......@@ -576,7 +583,8 @@ public:
_M_k = 1 << (__log2(_M_ik - 1) + 1);
_M_offset = _M_k;
// Avoid default-constructing _M_losers[]._M_key
_M_losers = static_cast<_Loser*>(::operator new(2 * _M_k * sizeof(_Loser)));
_M_losers
= static_cast<_Loser*>(::operator new(2 * _M_k * sizeof(_Loser)));
for (unsigned int __i = _M_k + _M_ik - 1; __i < (2 * _M_k); ++__i)
{
......@@ -677,7 +685,8 @@ public:
{
// The smaller one gets promoted, ties are broken by _M_source.
if (_M_comp(_M_losers[__pos]._M_key, _M_key)
|| (!_M_comp(_M_key, _M_losers[__pos]._M_key) && _M_losers[__pos]._M_source < _M_source))
|| (!_M_comp(_M_key, _M_losers[__pos]._M_key)
&& _M_losers[__pos]._M_source < _M_source))
{
// The other one is smaller.
std::swap(_M_losers[__pos]._M_source, _M_source);
......@@ -914,7 +923,8 @@ public:
{
// The smaller one gets promoted, ties are broken by _M_source.
if (_M_comp(*_M_losers[__pos]._M_keyp, *_M_keyp)
|| (!_M_comp(*_M_keyp, *_M_losers[__pos]._M_keyp) && _M_losers[__pos]._M_source < _M_source))
|| (!_M_comp(*_M_keyp, *_M_losers[__pos]._M_keyp)
&& _M_losers[__pos]._M_source < _M_source))
{
// The other one is smaller.
std::swap(_M_losers[__pos]._M_source, _M_source);
......
......@@ -51,36 +51,36 @@ namespace __gnu_parallel
* @param __comp Comparator.
* @return Output end iterator. */
template<typename _RAIter1, typename _RAIter2,
typename _OutputIterator, typename _DifferenceTp,
typename _Compare>
typename _OutputIterator, typename _DifferenceTp,
typename _Compare>
_OutputIterator
__merge_advance_usual(_RAIter1& __begin1,
_RAIter1 __end1,
_RAIter2& __begin2,
_RAIter2 __end2, _OutputIterator __target,
_DifferenceTp __max_length, _Compare __comp)
_RAIter1 __end1,
_RAIter2& __begin2,
_RAIter2 __end2, _OutputIterator __target,
_DifferenceTp __max_length, _Compare __comp)
{
typedef _DifferenceTp _DifferenceType;
while (__begin1 != __end1 && __begin2 != __end2 && __max_length > 0)
{
// array1[__i1] < array0[i0]
if (__comp(*__begin2, *__begin1))
*__target++ = *__begin2++;
else
*__target++ = *__begin1++;
--__max_length;
}
{
// array1[__i1] < array0[i0]
if (__comp(*__begin2, *__begin1))
*__target++ = *__begin2++;
else
*__target++ = *__begin1++;
--__max_length;
}
if (__begin1 != __end1)
{
__target = std::copy(__begin1, __begin1 + __max_length, __target);
__begin1 += __max_length;
}
{
__target = std::copy(__begin1, __begin1 + __max_length, __target);
__begin1 += __max_length;
}
else
{
__target = std::copy(__begin2, __begin2 + __max_length, __target);
__begin2 += __max_length;
}
{
__target = std::copy(__begin2, __begin2 + __max_length, __target);
__begin2 += __max_length;
}
return __target;
}
......@@ -100,56 +100,56 @@ namespace __gnu_parallel
* @param __comp Comparator.
* @return Output end iterator. */
template<typename _RAIter1, typename _RAIter2,
typename _OutputIterator, typename _DifferenceTp,
typename _Compare>
typename _OutputIterator, typename _DifferenceTp,
typename _Compare>
_OutputIterator
__merge_advance_movc(_RAIter1& __begin1,
_RAIter1 __end1,
_RAIter2& __begin2,
_RAIter2 __end2,
_OutputIterator __target,
_DifferenceTp __max_length, _Compare __comp)
_RAIter1 __end1,
_RAIter2& __begin2,
_RAIter2 __end2,
_OutputIterator __target,
_DifferenceTp __max_length, _Compare __comp)
{
typedef _DifferenceTp _DifferenceType;
typedef typename std::iterator_traits<_RAIter1>::value_type
value_type1;
value_type1;
typedef typename std::iterator_traits<_RAIter2>::value_type
value_type2;
value_type2;
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(__max_length >= 0);
#endif
while (__begin1 != __end1 && __begin2 != __end2 && __max_length > 0)
{
_RAIter1 __next1 = __begin1 + 1;
_RAIter2 __next2 = __begin2 + 1;
value_type1 __element1 = *__begin1;
value_type2 __element2 = *__begin2;
{
_RAIter1 __next1 = __begin1 + 1;
_RAIter2 __next2 = __begin2 + 1;
value_type1 __element1 = *__begin1;
value_type2 __element2 = *__begin2;
if (__comp(__element2, __element1))
{
__element1 = __element2;
__begin2 = __next2;
}
else
__begin1 = __next1;
if (__comp(__element2, __element1))
{
__element1 = __element2;
__begin2 = __next2;
}
else
__begin1 = __next1;
*__target = __element1;
*__target = __element1;
++__target;
--__max_length;
}
++__target;
--__max_length;
}
if (__begin1 != __end1)
{
__target = std::copy(__begin1, __begin1 + __max_length, __target);
__begin1 += __max_length;
}
{
__target = std::copy(__begin1, __begin1 + __max_length, __target);
__begin1 += __max_length;
}
else
{
__target = std::copy(__begin2, __begin2 + __max_length, __target);
__begin2 += __max_length;
}
{
__target = std::copy(__begin2, __begin2 + __max_length, __target);
__begin2 += __max_length;
}
return __target;
}
......@@ -168,18 +168,18 @@ namespace __gnu_parallel
* @param __comp Comparator.
* @return Output end iterator. */
template<typename _RAIter1, typename _RAIter2,
typename _OutputIterator, typename _DifferenceTp,
typename _Compare>
typename _OutputIterator, typename _DifferenceTp,
typename _Compare>
inline _OutputIterator
__merge_advance(_RAIter1& __begin1, _RAIter1 __end1,
_RAIter2& __begin2, _RAIter2 __end2,
_OutputIterator __target, _DifferenceTp __max_length,
_Compare __comp)
_RAIter2& __begin2, _RAIter2 __end2,
_OutputIterator __target, _DifferenceTp __max_length,
_Compare __comp)
{
_GLIBCXX_CALL(__max_length)
return __merge_advance_movc(__begin1, __end1, __begin2, __end2, __target,
__max_length, __comp);
__max_length, __comp);
}
/** @brief Merge routine fallback to sequential in case the
......@@ -193,19 +193,19 @@ namespace __gnu_parallel
* @param __comp Comparator.
* @return Output end iterator. */
template<typename _RAIter1, typename _RAIter2,
typename _RAIter3, typename _Compare>
typename _RAIter3, typename _Compare>
inline _RAIter3
__parallel_merge_advance(_RAIter1& __begin1,
_RAIter1 __end1,
_RAIter2& __begin2,
// different iterators, parallel implementation
// not available
_RAIter2 __end2,
_RAIter3 __target, typename
std::iterator_traits<_RAIter1>::
difference_type __max_length, _Compare __comp)
_RAIter1 __end1,
_RAIter2& __begin2,
// different iterators, parallel implementation
// not available
_RAIter2 __end2,
_RAIter3 __target, typename
std::iterator_traits<_RAIter1>::
difference_type __max_length, _Compare __comp)
{ return __merge_advance(__begin1, __end1, __begin2, __end2, __target,
__max_length, __comp); }
__max_length, __comp); }
/** @brief Parallel merge routine being able to merge only the @__c
* __max_length smallest elements.
......@@ -223,28 +223,28 @@ namespace __gnu_parallel
* @return Output end iterator.
*/
template<typename _RAIter1, typename _RAIter3,
typename _Compare>
typename _Compare>
inline _RAIter3
__parallel_merge_advance(_RAIter1& __begin1,
_RAIter1 __end1,
_RAIter1& __begin2,
_RAIter1 __end2,
_RAIter3 __target, typename
std::iterator_traits<_RAIter1>::
difference_type __max_length, _Compare __comp)
_RAIter1 __end1,
_RAIter1& __begin2,
_RAIter1 __end2,
_RAIter3 __target, typename
std::iterator_traits<_RAIter1>::
difference_type __max_length, _Compare __comp)
{
typedef typename
std::iterator_traits<_RAIter1>::value_type _ValueType;
typedef typename std::iterator_traits<_RAIter1>::
difference_type _DifferenceType1 /* == difference_type2 */;
difference_type _DifferenceType1 /* == difference_type2 */;
typedef typename std::iterator_traits<_RAIter3>::
difference_type _DifferenceType3;
difference_type _DifferenceType3;
typedef typename std::pair<_RAIter1, _RAIter1>
_IteratorPair;
_IteratorPair
seqs[2] = { std::make_pair(__begin1, __end1),
std::make_pair(__begin2, __end2) };
seqs[2] = { std::make_pair(__begin1, __end1),
std::make_pair(__begin2, __end2) };
_RAIter3
__target_end = parallel_multiway_merge
< /* __stable = */ true, /* __sentinels = */ false>(
......@@ -256,6 +256,6 @@ namespace __gnu_parallel
return __target_end;
}
} //namespace __gnu_parallel
} //namespace __gnu_parallel
#endif /* _GLIBCXX_PARALLEL_MERGE_H */
......@@ -111,7 +111,7 @@ template<typename _RAIter, typename _DifferenceTp>
for (_DifferenceType __i = 0; __i < __num_samples; ++__i)
::new(&(__sd->_M_samples[__iam * __num_samples + __i]))
_ValueType(__sd->_M_source[__sd->_M_starts[__iam] + __es[__i + 1]]);
_ValueType(__sd->_M_source[__sd->_M_starts[__iam] + __es[__i + 1]]);
delete[] __es;
}
......@@ -144,25 +144,28 @@ template<typename _RAIter, typename _Compare,
seqs(__sd->_M_num_threads);
for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++)
seqs[__s] = std::make_pair(__sd->_M_temporary[__s],
__sd->_M_temporary[__s]
+ (__sd->_M_starts[__s + 1] - __sd->_M_starts[__s]));
__sd->_M_temporary[__s]
+ (__sd->_M_starts[__s + 1]
- __sd->_M_starts[__s]));
std::vector<_SortingPlacesIterator> _M_offsets(__sd->_M_num_threads);
// if not last thread
if (__iam < __sd->_M_num_threads - 1)
multiseq_partition(seqs.begin(), seqs.end(),
__sd->_M_starts[__iam + 1], _M_offsets.begin(), __comp);
__sd->_M_starts[__iam + 1], _M_offsets.begin(),
__comp);
for (int __seq = 0; __seq < __sd->_M_num_threads; __seq++)
{
// for each sequence
if (__iam < (__sd->_M_num_threads - 1))
__sd->_M_pieces[__iam][__seq]._M_end = _M_offsets[__seq] - seqs[__seq].first;
__sd->_M_pieces[__iam][__seq]._M_end
= _M_offsets[__seq] - seqs[__seq].first;
else
// very end of this sequence
__sd->_M_pieces[__iam][__seq]._M_end =
__sd->_M_starts[__seq + 1] - __sd->_M_starts[__seq];
__sd->_M_starts[__seq + 1] - __sd->_M_starts[__seq];
}
# pragma omp barrier
......@@ -171,7 +174,8 @@ template<typename _RAIter, typename _Compare,
{
// For each sequence.
if (__iam > 0)
__sd->_M_pieces[__iam][__seq]._M_begin = __sd->_M_pieces[__iam - 1][__seq]._M_end;
__sd->_M_pieces[__iam][__seq]._M_begin =
__sd->_M_pieces[__iam - 1][__seq]._M_end;
else
// Absolute beginning.
__sd->_M_pieces[__iam][__seq]._M_begin = 0;
......@@ -204,7 +208,8 @@ template<typename _RAIter, typename _Compare,
# pragma omp single
__gnu_sequential::sort(__sd->_M_samples,
__sd->_M_samples + (__num_samples * __sd->_M_num_threads),
__sd->_M_samples
+ (__num_samples * __sd->_M_num_threads),
__comp);
# pragma omp barrier
......@@ -224,17 +229,19 @@ template<typename _RAIter, typename _Compare,
// Absolute beginning.
__sd->_M_pieces[__iam][__s]._M_begin = 0;
if ((__num_samples * (__iam + 1)) < (__num_samples * __sd->_M_num_threads))
if ((__num_samples * (__iam + 1)) <
(__num_samples * __sd->_M_num_threads))
__sd->_M_pieces[__iam][__s]._M_end =
std::lower_bound(__sd->_M_temporary[__s],
__sd->_M_temporary[__s]
+ (__sd->_M_starts[__s + 1] - __sd->_M_starts[__s]),
+ (__sd->_M_starts[__s + 1] - __sd->_M_starts[__s]),
__sd->_M_samples[__num_samples * (__iam + 1)],
__comp)
- __sd->_M_temporary[__s];
else
// Absolute end.
__sd->_M_pieces[__iam][__s]._M_end = __sd->_M_starts[__s + 1] - __sd->_M_starts[__s];
__sd->_M_pieces[__iam][__s]._M_end = __sd->_M_starts[__s + 1]
- __sd->_M_starts[__s];
}
}
};
......@@ -283,8 +290,8 @@ template<typename Seq_RAIter, typename _RAIter,
_Compare& __comp,
DiffType __length_am) const
{
stable_multiway_merge(__seqs_begin, __seqs_end, __target, __length_am, __comp,
sequential_tag());
stable_multiway_merge(__seqs_begin, __seqs_end, __target, __length_am,
__comp, sequential_tag());
}
};
......@@ -322,7 +329,8 @@ template<bool __stable, bool __exact, typename _RAIter,
_ThreadIndex __iam = omp_get_thread_num();
// Length of this thread's chunk, before merging.
_DifferenceType __length_local = __sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam];
_DifferenceType __length_local
= __sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam];
// Sort in temporary storage, leave space for sentinel.
......@@ -333,12 +341,15 @@ template<bool __stable, bool __exact, typename _RAIter,
::operator new(sizeof(_ValueType) * (__length_local + 1)));
// Copy there.
std::uninitialized_copy(__sd->_M_source + __sd->_M_starts[__iam],
__sd->_M_source + __sd->_M_starts[__iam] + __length_local,
__sd->_M_temporary[__iam]);
std::uninitialized_copy(
__sd->_M_source + __sd->_M_starts[__iam],
__sd->_M_source + __sd->_M_starts[__iam] + __length_local,
__sd->_M_temporary[__iam]);
__possibly_stable_sort<__stable, _SortingPlacesIterator, _Compare>()
(__sd->_M_temporary[__iam], __sd->_M_temporary[__iam] + __length_local, __comp);
(__sd->_M_temporary[__iam],
__sd->_M_temporary[__iam] + __length_local,
__comp);
// Invariant: locally sorted subsequence in sd->_M_temporary[__iam],
// __sd->_M_temporary[__iam] + __length_local.
......@@ -355,7 +366,8 @@ template<bool __stable, bool __exact, typename _RAIter,
_DifferenceType __offset = 0, __length_am = 0;
for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++)
{
__length_am += __sd->_M_pieces[__iam][__s]._M_end - __sd->_M_pieces[__iam][__s]._M_begin;
__length_am += __sd->_M_pieces[__iam][__s]._M_end
- __sd->_M_pieces[__iam][__s]._M_begin;
__offset += __sd->_M_pieces[__iam][__s]._M_begin;
}
......@@ -367,8 +379,9 @@ template<bool __stable, bool __exact, typename _RAIter,
for (int __s = 0; __s < __sd->_M_num_threads; ++__s)
{
seqs[__s] =
std::make_pair(__sd->_M_temporary[__s] + __sd->_M_pieces[__iam][__s]._M_begin,
__sd->_M_temporary[__s] + __sd->_M_pieces[__iam][__s]._M_end);
std::make_pair(
__sd->_M_temporary[__s] + __sd->_M_pieces[__iam][__s]._M_begin,
__sd->_M_temporary[__s] + __sd->_M_pieces[__iam][__s]._M_end);
}
__possibly_stable_multiway_merge<
......@@ -420,7 +433,7 @@ template<bool __stable, bool __exact, typename _RAIter,
# pragma omp parallel num_threads(__num_threads)
{
__num_threads = omp_get_num_threads(); //no more threads than requested
__num_threads = omp_get_num_threads(); //no more threads than requested
# pragma omp single
{
......@@ -432,7 +445,7 @@ template<bool __stable, bool __exact, typename _RAIter,
if (!__exact)
{
_DifferenceType size =
(_Settings::get().sort_mwms_oversampling * __num_threads - 1)
(_Settings::get().sort_mwms_oversampling * __num_threads - 1)
* __num_threads;
__sd._M_samples = static_cast<_ValueType*>(
::operator new(size * sizeof(_ValueType)));
......@@ -441,10 +454,12 @@ template<bool __stable, bool __exact, typename _RAIter,
__sd._M_samples = NULL;
__sd._M_offsets = new _DifferenceType[__num_threads - 1];
__sd._M_pieces = new std::vector<_Piece<_DifferenceType> >[__num_threads];
__sd._M_pieces
= new std::vector<_Piece<_DifferenceType> >[__num_threads];
for (int __s = 0; __s < __num_threads; ++__s)
__sd._M_pieces[__s].resize(__num_threads);
_M_starts = __sd._M_starts = new _DifferenceType[__num_threads + 1];
_M_starts = __sd._M_starts
= new _DifferenceType[__num_threads + 1];
_DifferenceType __chunk_length = __n / __num_threads;
_DifferenceType __split = __n % __num_threads;
......@@ -452,7 +467,8 @@ template<bool __stable, bool __exact, typename _RAIter,
for (int __i = 0; __i < __num_threads; ++__i)
{
_M_starts[__i] = __pos;
__pos += (__i < __split) ? (__chunk_length + 1) : __chunk_length;
__pos += (__i < __split)
? (__chunk_length + 1) : __chunk_length;
}
_M_starts[__num_threads] = __pos;
} //single
......
......@@ -61,24 +61,24 @@ namespace __parallel
template<typename _IIter, typename _Tp, typename _BinaryOper>
_Tp
accumulate(_IIter, _IIter, _Tp, _BinaryOper,
__gnu_parallel::sequential_tag);
__gnu_parallel::sequential_tag);
template<typename _IIter, typename _Tp, typename _BinaryOper>
_Tp
accumulate(_IIter, _IIter, _Tp, _BinaryOper,
__gnu_parallel::_Parallelism);
__gnu_parallel::_Parallelism);
template<typename _IIter, typename _Tp, typename _BinaryOper,
typename _Tag>
typename _Tag>
_Tp
__accumulate_switch(_IIter, _IIter, _Tp, _BinaryOper, _Tag);
template<typename _RAIter, typename _Tp, typename _BinaryOper>
_Tp
__accumulate_switch(_RAIter, _RAIter, _Tp, _BinaryOper,
random_access_iterator_tag,
__gnu_parallel::_Parallelism __parallelism
= __gnu_parallel::parallel_unbalanced);
random_access_iterator_tag,
__gnu_parallel::_Parallelism __parallelism
= __gnu_parallel::parallel_unbalanced);
template<typename _IIter, typename _OIter>
_OIter
......@@ -91,36 +91,36 @@ namespace __parallel
template<typename _IIter, typename _OIter>
_OIter
adjacent_difference(_IIter, _IIter, _OIter,
__gnu_parallel::sequential_tag);
__gnu_parallel::sequential_tag);
template<typename _IIter, typename _OIter, typename _BinaryOper>
_OIter
adjacent_difference(_IIter, _IIter, _OIter, _BinaryOper,
__gnu_parallel::sequential_tag);
__gnu_parallel::sequential_tag);
template<typename _IIter, typename _OIter>
_OIter
adjacent_difference(_IIter, _IIter, _OIter,
__gnu_parallel::_Parallelism);
__gnu_parallel::_Parallelism);
template<typename _IIter, typename _OIter, typename _BinaryOper>
_OIter
adjacent_difference(_IIter, _IIter, _OIter, _BinaryOper,
__gnu_parallel::_Parallelism);
__gnu_parallel::_Parallelism);
template<typename _IIter, typename _OIter, typename _BinaryOper,
typename _Tag1, typename _Tag2>
typename _Tag1, typename _Tag2>
_OIter
__adjacent_difference_switch(_IIter, _IIter, _OIter, _BinaryOper,
_Tag1, _Tag2);
_Tag1, _Tag2);
template<typename _IIter, typename _OIter, typename _BinaryOper>
_OIter
__adjacent_difference_switch(_IIter, _IIter, _OIter, _BinaryOper,
random_access_iterator_tag,
random_access_iterator_tag,
__gnu_parallel::_Parallelism __parallelism
= __gnu_parallel::parallel_unbalanced);
random_access_iterator_tag,
random_access_iterator_tag,
__gnu_parallel::_Parallelism __parallelism
= __gnu_parallel::parallel_unbalanced);
template<typename _IIter1, typename _IIter2, typename _Tp>
_Tp
......@@ -129,46 +129,46 @@ namespace __parallel
template<typename _IIter1, typename _IIter2, typename _Tp>
_Tp
inner_product(_IIter1, _IIter1, _IIter2, _Tp,
__gnu_parallel::sequential_tag);
__gnu_parallel::sequential_tag);
template<typename _IIter1, typename _IIter2, typename _Tp>
_Tp
inner_product(_IIter1, _IIter1, _IIter2, _Tp,
__gnu_parallel::_Parallelism);
__gnu_parallel::_Parallelism);
template<typename _IIter1, typename _IIter2, typename _Tp,
typename _BinaryFunction1, typename _BinaryFunction2>
typename _BinaryFunction1, typename _BinaryFunction2>
_Tp
inner_product(_IIter1, _IIter1, _IIter2, _Tp,
_BinaryFunction1, _BinaryFunction2);
_BinaryFunction1, _BinaryFunction2);
template<typename _IIter1, typename _IIter2, typename _Tp,
typename _BinaryFunction1, typename _BinaryFunction2>
typename _BinaryFunction1, typename _BinaryFunction2>
_Tp
inner_product(_IIter1, _IIter1, _IIter2, _Tp, _BinaryFunction1,
_BinaryFunction2, __gnu_parallel::sequential_tag);
_BinaryFunction2, __gnu_parallel::sequential_tag);
template<typename _IIter1, typename _IIter2, typename _Tp,
typename BinaryFunction1, typename BinaryFunction2>
typename BinaryFunction1, typename BinaryFunction2>
_Tp
inner_product(_IIter1, _IIter1, _IIter2, _Tp, BinaryFunction1,
BinaryFunction2, __gnu_parallel::_Parallelism);
BinaryFunction2, __gnu_parallel::_Parallelism);
template<typename _RAIter1, typename _RAIter2, typename _Tp,
typename BinaryFunction1, typename BinaryFunction2>
typename BinaryFunction1, typename BinaryFunction2>
_Tp
__inner_product_switch(_RAIter1, _RAIter1, _RAIter2, _Tp, BinaryFunction1,
BinaryFunction2, random_access_iterator_tag,
random_access_iterator_tag,
__gnu_parallel::_Parallelism
= __gnu_parallel::parallel_unbalanced);
BinaryFunction2, random_access_iterator_tag,
random_access_iterator_tag,
__gnu_parallel::_Parallelism
= __gnu_parallel::parallel_unbalanced);
template<typename _IIter1, typename _IIter2, typename _Tp,
typename _BinaryFunction1, typename _BinaryFunction2,
typename _Tag1, typename _Tag2>
typename _BinaryFunction1, typename _BinaryFunction2,
typename _Tag1, typename _Tag2>
_Tp
__inner_product_switch(_IIter1, _IIter1, _IIter2, _Tp, _BinaryFunction1,
_BinaryFunction2, _Tag1, _Tag2);
_BinaryFunction2, _Tag1, _Tag2);
template<typename _IIter, typename _OIter>
......@@ -178,7 +178,7 @@ namespace __parallel
template<typename _IIter, typename _OIter, typename _BinaryOper>
_OIter
partial_sum(_IIter, _IIter, _OIter, _BinaryOper,
__gnu_parallel::sequential_tag);
__gnu_parallel::sequential_tag);
template<typename _IIter, typename _OIter>
_OIter
......@@ -189,14 +189,14 @@ namespace __parallel
partial_sum(_IIter, _IIter, _OIter, _BinaryOper);
template<typename _IIter, typename _OIter, typename _BinaryOper,
typename _Tag1, typename _Tag2>
typename _Tag1, typename _Tag2>
_OIter
__partial_sum_switch(_IIter, _IIter, _OIter, _BinaryOper, _Tag1, _Tag2);
template<typename _IIter, typename _OIter, typename _BinaryOper>
_OIter
__partial_sum_switch(_IIter, _IIter, _OIter, _BinaryOper,
random_access_iterator_tag, random_access_iterator_tag);
random_access_iterator_tag, random_access_iterator_tag);
} // end namespace
} // end namespace
......
......@@ -59,18 +59,15 @@ namespace __gnu_parallel
* @return User-supplied functor (that may contain a part of the result).
*/
template<typename _RAIter,
typename _Op,
typename _Fu,
typename _Red,
typename _Result>
typename _Op,
typename _Fu,
typename _Red,
typename _Result>
_Op
for_each_template_random_access_omp_loop(_RAIter __begin,
_RAIter __end,
_Op __o, _Fu& __f, _Red __r, _Result __base,
_Result& __output,
typename std::iterator_traits
<_RAIter>::
difference_type __bound)
for_each_template_random_access_omp_loop(
_RAIter __begin, _RAIter __end, _Op __o, _Fu& __f, _Red __r,
_Result __base, _Result& __output,
typename std::iterator_traits<_RAIter>::difference_type __bound)
{
typedef typename
std::iterator_traits<_RAIter>::difference_type
......@@ -95,7 +92,7 @@ template<typename _RAIter,
_ThreadIndex __iam = omp_get_thread_num();
# pragma omp for schedule(dynamic, _Settings::get().workstealing_chunk_size)
#pragma omp for schedule(dynamic, _Settings::get().workstealing_chunk_size)
for (_DifferenceType __pos = 0; __pos < __length; ++__pos)
__thread_results[__iam] =
__r(__thread_results[__iam], __f(__o, __begin+__pos));
......
......@@ -59,18 +59,15 @@ namespace __gnu_parallel
* @return User-supplied functor (that may contain a part of the result).
*/
template<typename _RAIter,
typename _Op,
typename _Fu,
typename _Red,
typename _Result>
typename _Op,
typename _Fu,
typename _Red,
typename _Result>
_Op
for_each_template_random_access_omp_loop_static(_RAIter __begin,
_RAIter __end,
_Op __o, _Fu& __f, _Red __r,
_Result __base, _Result& __output,
typename std::iterator_traits
<_RAIter>::
difference_type __bound)
for_each_template_random_access_omp_loop_static(
_RAIter __begin, _RAIter __end, _Op __o, _Fu& __f, _Red __r,
_Result __base, _Result& __output,
typename std::iterator_traits<_RAIter>::difference_type __bound)
{
typedef typename
std::iterator_traits<_RAIter>::difference_type
......@@ -95,9 +92,10 @@ template<typename _RAIter,
_ThreadIndex __iam = omp_get_thread_num();
# pragma omp for schedule(static, _Settings::get().workstealing_chunk_size)
#pragma omp for schedule(static, _Settings::get().workstealing_chunk_size)
for (_DifferenceType __pos = 0; __pos < __length; ++__pos)
__thread_results[__iam] = __r(__thread_results[__iam], __f(__o, __begin+__pos));
__thread_results[__iam] = __r(__thread_results[__iam],
__f(__o, __begin+__pos));
} //parallel
for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
......
......@@ -60,18 +60,15 @@ namespace __gnu_parallel
* @return User-supplied functor (that may contain a part of the result).
*/
template<typename _RAIter,
typename _Op,
typename _Fu,
typename _Red,
typename _Result>
typename _Op,
typename _Fu,
typename _Red,
typename _Result>
_Op
for_each_template_random_access_ed(_RAIter __begin,
_RAIter __end,
_Op __o, _Fu& __f, _Red __r, _Result __base,
_Result& __output,
typename std::iterator_traits
<_RAIter>::
difference_type __bound)
for_each_template_random_access_ed(
_RAIter __begin, _RAIter __end, _Op __o, _Fu& __f, _Red __r,
_Result __base, _Result& __output,
typename std::iterator_traits<_RAIter>::difference_type __bound)
{
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::difference_type _DifferenceType;
......@@ -87,15 +84,17 @@ template<typename _RAIter,
# pragma omp single
{
__num_threads = omp_get_num_threads();
__thread_results = static_cast<_Result*>(
::operator new(__num_threads * sizeof(_Result)));
__thread_results =
static_cast<_Result*>(
::operator new(__num_threads * sizeof(_Result)));
__constructed = new bool[__num_threads];
}
_ThreadIndex __iam = omp_get_thread_num();
// Neutral element.
_Result* __reduct = static_cast<_Result*>(::operator new(sizeof(_Result)));
_Result* __reduct =
static_cast<_Result*>(::operator new(sizeof(_Result)));
_DifferenceType
__start = equally_split_point(__length, __num_threads, __iam),
......
......@@ -52,13 +52,13 @@ namespace __gnu_parallel
* element is unknown in general.
* @return End iterator of output sequence. */
template<typename _IIter,
typename _OutputIterator,
typename _BinaryOperation>
typename _OutputIterator,
typename _BinaryOperation>
_OutputIterator
__parallel_partial_sum_basecase(_IIter __begin, _IIter __end,
_OutputIterator __result, _BinaryOperation __bin_op,
typename std::iterator_traits
<_IIter>::value_type __value)
__parallel_partial_sum_basecase(
_IIter __begin, _IIter __end, _OutputIterator __result,
_BinaryOperation __bin_op,
typename std::iterator_traits <_IIter>::value_type __value)
{
if (__begin == __end)
return __result;
......@@ -84,13 +84,13 @@ template<typename _IIter,
* @return End iterator of output sequence.
*/
template<typename _IIter,
typename _OutputIterator,
typename _BinaryOperation>
typename _OutputIterator,
typename _BinaryOperation>
_OutputIterator
__parallel_partial_sum_linear(_IIter __begin, _IIter __end,
_OutputIterator __result, _BinaryOperation __bin_op,
typename std::iterator_traits
<_IIter>::difference_type __n)
__parallel_partial_sum_linear(
_IIter __begin, _IIter __end, _OutputIterator __result,
_BinaryOperation __bin_op,
typename std::iterator_traits<_IIter>::difference_type __n)
{
typedef std::iterator_traits<_IIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
......@@ -128,8 +128,8 @@ template<typename _IIter,
{
_DifferenceType __chunk_length =
((double)__n
/ ((double)__num_threads + __s.partial_sum_dilation)),
__borderstart = __n - __num_threads * __chunk_length;
/ ((double)__num_threads + __s.partial_sum_dilation)),
__borderstart = __n - __num_threads * __chunk_length;
__borders[0] = 0;
for (int __i = 1; __i < (__num_threads + 1); ++__i)
{
......@@ -140,7 +140,7 @@ template<typename _IIter,
}
__sums = static_cast<_ValueType*>(::operator new(sizeof(_ValueType)
* __num_threads));
* __num_threads));
_OutputIterator __target_end;
} //single
......@@ -148,33 +148,35 @@ template<typename _IIter,
if (__iam == 0)
{
*__result = *__begin;
__parallel_partial_sum_basecase(__begin + 1, __begin + __borders[1],
__result + 1, __bin_op, *__begin);
__parallel_partial_sum_basecase(
__begin + 1, __begin + __borders[1], __result + 1,
__bin_op, *__begin);
::new(&(__sums[__iam])) _ValueType(*(__result + __borders[1] - 1));
}
else
{
::new(&(__sums[__iam]))
_ValueType(std::accumulate(__begin + __borders[__iam] + 1,
__begin + __borders[__iam + 1],
*(__begin + __borders[__iam]),
__bin_op,
__gnu_parallel::sequential_tag()));
_ValueType(std::accumulate(__begin + __borders[__iam] + 1,
__begin + __borders[__iam + 1],
*(__begin + __borders[__iam]),
__bin_op,
__gnu_parallel::sequential_tag()));
}
# pragma omp barrier
# pragma omp single
__parallel_partial_sum_basecase(
__sums + 1, __sums + __num_threads, __sums + 1, __bin_op, __sums[0]);
__parallel_partial_sum_basecase(__sums + 1, __sums + __num_threads,
__sums + 1, __bin_op, __sums[0]);
# pragma omp barrier
// Still same team.
__parallel_partial_sum_basecase(__begin + __borders[__iam + 1],
__begin + __borders[__iam + 2],
__result + __borders[__iam + 1], __bin_op,
__sums[__iam]);
__parallel_partial_sum_basecase(
__begin + __borders[__iam + 1],
__begin + __borders[__iam + 2],
__result + __borders[__iam + 1],
__bin_op, __sums[__iam]);
} //parallel
::operator delete(__sums);
......@@ -190,8 +192,8 @@ template<typename _IIter,
* @param __bin_op Associative binary function.
* @return End iterator of output sequence. */
template<typename _IIter,
typename _OutputIterator,
typename _BinaryOperation>
typename _OutputIterator,
typename _BinaryOperation>
_OutputIterator
__parallel_partial_sum(_IIter __begin, _IIter __end,
_OutputIterator __result, _BinaryOperation __bin_op)
......@@ -208,7 +210,8 @@ template<typename _IIter,
{
case LINEAR:
// Need an initial offset.
return __parallel_partial_sum_linear(__begin, __end, __result, __bin_op, __n);
return __parallel_partial_sum_linear(
__begin, __end, __result, __bin_op, __n);
default:
// Partial_sum algorithm not implemented.
_GLIBCXX_PARALLEL_ASSERT(0);
......
......@@ -88,9 +88,10 @@ template<typename _RAIter, typename _Predicate>
__reserved_right = new bool[__num_threads];
if (__s.partition_chunk_share > 0.0)
__chunk_size = std::max<_DifferenceType>(__s.partition_chunk_size,
(double)__n * __s.partition_chunk_share
/ (double)__num_threads);
__chunk_size = std::max<_DifferenceType>(
__s.partition_chunk_size,
(double)__n * __s.partition_chunk_share /
(double)__num_threads);
else
__chunk_size = __s.partition_chunk_size;
}
......@@ -99,7 +100,8 @@ template<typename _RAIter, typename _Predicate>
{
# pragma omp single
{
_DifferenceType __num_chunks = (__right - __left + 1) / __chunk_size;
_DifferenceType __num_chunks
= (__right - __left + 1) / __chunk_size;
for (int __r = 0; __r < __num_threads; ++__r)
{
......@@ -198,7 +200,8 @@ template<typename _RAIter, typename _Predicate>
&& __thread_left_border >= __leftnew)
{
// Chunk already in place, reserve spot.
__reserved_left[(__left - (__thread_left_border + 1)) / __chunk_size]
__reserved_left
[(__left - (__thread_left_border + 1)) / __chunk_size]
= true;
}
......@@ -208,7 +211,7 @@ template<typename _RAIter, typename _Predicate>
{
// Chunk already in place, reserve spot.
__reserved_right[((__thread_right_border - 1) - __right)
/ __chunk_size] = true;
/ __chunk_size] = true;
}
# pragma omp barrier
......@@ -233,9 +236,9 @@ template<typename _RAIter, typename _Predicate>
#endif
std::swap_ranges(__begin + __thread_left_border
- (__chunk_size - 1),
__begin + __thread_left_border + 1,
__begin + __swapstart);
- (__chunk_size - 1),
__begin + __thread_left_border + 1,
__begin + __swapstart);
}
if (thread_right >= __thread_right_border
......@@ -257,9 +260,10 @@ template<typename _RAIter, typename _Predicate>
_GLIBCXX_PARALLEL_ASSERT(__swapstart != -1);
#endif
std::swap_ranges(__begin + __thread_right_border,
__begin + __thread_right_border + __chunk_size,
__begin + __swapstart);
std::swap_ranges(
__begin + __thread_right_border,
__begin + __thread_right_border + __chunk_size,
__begin + __swapstart);
}
#if _GLIBCXX_ASSERTIONS
# pragma omp barrier
......@@ -328,7 +332,7 @@ template<typename _RAIter, typename _Predicate>
template<typename _RAIter, typename _Compare>
void
parallel_nth_element(_RAIter __begin, _RAIter __nth,
_RAIter __end, _Compare __comp)
_RAIter __end, _Compare __comp)
{
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
......@@ -355,18 +359,19 @@ template<typename _RAIter, typename _Compare>
__pivot_pos = __end - 1;
// XXX _Compare must have first__ValueType, second__ValueType,
// _ResultType
// _ResultType
// _Compare == __gnu_parallel::_Lexicographic<S, int,
// __gnu_parallel::_Less<S, S> >
// __gnu_parallel::_Less<S, S> >
// __pivot_pos == std::pair<S, int>*
// XXX binder2nd only for _RAIters??
__gnu_parallel::binder2nd<_Compare, _ValueType, _ValueType, bool>
__pred(__comp, *__pivot_pos);
__pred(__comp, *__pivot_pos);
// Divide, leave pivot unchanged in last place.
_RAIter __split_pos1, __split_pos2;
__split_pos1 = __begin + __parallel_partition(__begin, __end - 1, __pred,
__get_max_threads());
__split_pos1 = __begin
+ __parallel_partition(__begin, __end - 1, __pred,
__get_max_threads());
// Left side: < __pivot_pos; __right side: >= __pivot_pos
......@@ -377,18 +382,18 @@ template<typename _RAIter, typename _Compare>
// In case all elements are equal, __split_pos1 == 0
if ((__split_pos1 + 1 - __begin) < (__n >> 7)
|| (__end - __split_pos1) < (__n >> 7))
|| (__end - __split_pos1) < (__n >> 7))
{
// Very unequal split, one part smaller than one 128th
// elements not strictly larger than the pivot.
__gnu_parallel::__unary_negate<__gnu_parallel::
__binder1st<_Compare, _ValueType, _ValueType, bool>, _ValueType>
__pred(__gnu_parallel::__binder1st<_Compare, _ValueType,
_ValueType, bool>(__comp, *__pivot_pos));
__binder1st<_Compare, _ValueType, _ValueType, bool>, _ValueType>
__pred(__gnu_parallel::__binder1st<_Compare, _ValueType,
_ValueType, bool>(__comp, *__pivot_pos));
// Find other end of pivot-equal range.
__split_pos2 = __gnu_sequential::partition(__split_pos1 + 1,
__end, __pred);
__end, __pred);
}
else
// Only skip the pivot.
......@@ -415,8 +420,8 @@ template<typename _RAIter, typename _Compare>
template<typename _RAIter, typename _Compare>
void
parallel_partial_sort(_RAIter __begin,
_RAIter __middle,
_RAIter __end, _Compare __comp)
_RAIter __middle,
_RAIter __end, _Compare __comp)
{
parallel_nth_element(__begin, __middle, __end, __comp);
std::sort(__begin, __middle, __comp);
......
......@@ -59,7 +59,7 @@ namespace __gnu_parallel
_SequenceIndex _M_max_size;
/** @brief Cyclic __begin and __end pointers contained in one
atomically changeable value. */
atomically changeable value. */
_GLIBCXX_VOLATILE _CASable _M_borders;
public:
......@@ -67,9 +67,9 @@ namespace __gnu_parallel
* @param _M_max_size Maximal number of elements to be contained. */
_RestrictedBoundedConcurrentQueue(_SequenceIndex _M_max_size)
{
this->_M_max_size = _M_max_size;
_M_base = new _Tp[_M_max_size];
_M_borders = __encode2(0, 0);
this->_M_max_size = _M_max_size;
_M_base = new _Tp[_M_max_size];
_M_borders = __encode2(0, 0);
#pragma omp flush
}
......@@ -82,16 +82,16 @@ namespace __gnu_parallel
void
push_front(const _Tp& __t)
{
_CASable __former_borders = _M_borders;
int __former_front, __former_back;
decode2(__former_borders, __former_front, __former_back);
*(_M_base + __former_front % _M_max_size) = __t;
_CASable __former_borders = _M_borders;
int __former_front, __former_back;
decode2(__former_borders, __former_front, __former_back);
*(_M_base + __former_front % _M_max_size) = __t;
#if _GLIBCXX_ASSERTIONS
// Otherwise: front - back > _M_max_size eventually.
_GLIBCXX_PARALLEL_ASSERT(((__former_front + 1) - __former_back)
<= _M_max_size);
// Otherwise: front - back > _M_max_size eventually.
_GLIBCXX_PARALLEL_ASSERT(((__former_front + 1) - __former_back)
<= _M_max_size);
#endif
__fetch_and_add(&_M_borders, __encode2(1, 0));
__fetch_and_add(&_M_borders, __encode2(1, 0));
}
/** @brief Pops one element from the queue at the front end.
......@@ -99,50 +99,56 @@ namespace __gnu_parallel
bool
pop_front(_Tp& __t)
{
int __former_front, __former_back;
int __former_front, __former_back;
#pragma omp flush
decode2(_M_borders, __former_front, __former_back);
while (__former_front > __former_back)
{
// Chance.
_CASable __former_borders = __encode2(__former_front, __former_back);
_CASable __new_borders = __encode2(__former_front - 1, __former_back);
if (__compare_and_swap(&_M_borders, __former_borders, __new_borders))
{
__t = *(_M_base + (__former_front - 1) % _M_max_size);
return true;
}
decode2(_M_borders, __former_front, __former_back);
while (__former_front > __former_back)
{
// Chance.
_CASable
__former_borders = __encode2(__former_front, __former_back);
_CASable
__new_borders = __encode2(__former_front - 1, __former_back);
if (__compare_and_swap(
&_M_borders, __former_borders, __new_borders))
{
__t = *(_M_base + (__former_front - 1) % _M_max_size);
return true;
}
#pragma omp flush
decode2(_M_borders, __former_front, __former_back);
}
return false;
decode2(_M_borders, __former_front, __former_back);
}
return false;
}
/** @brief Pops one element from the queue at the front end.
* Must not be called concurrently with pop_front(). */
bool
pop_back(_Tp& __t) //queue behavior
pop_back(_Tp& __t) //queue behavior
{
int __former_front, __former_back;
int __former_front, __former_back;
#pragma omp flush
decode2(_M_borders, __former_front, __former_back);
while (__former_front > __former_back)
{
// Chance.
_CASable __former_borders = __encode2(__former_front, __former_back);
_CASable __new_borders = __encode2(__former_front, __former_back + 1);
if (__compare_and_swap(&_M_borders, __former_borders, __new_borders))
{
__t = *(_M_base + __former_back % _M_max_size);
return true;
}
decode2(_M_borders, __former_front, __former_back);
while (__former_front > __former_back)
{
// Chance.
_CASable
__former_borders = __encode2(__former_front, __former_back);
_CASable
__new_borders = __encode2(__former_front, __former_back + 1);
if (__compare_and_swap(
&_M_borders, __former_borders, __new_borders))
{
__t = *(_M_base + __former_back % _M_max_size);
return true;
}
#pragma omp flush
decode2(_M_borders, __former_front, __former_back);
}
return false;
decode2(_M_borders, __former_front, __former_back);
}
return false;
}
};
} //namespace __gnu_parallel
} //namespace __gnu_parallel
#undef _GLIBCXX_VOLATILE
......
......@@ -49,12 +49,12 @@ namespace __gnu_parallel
template<typename _RAIter, typename _Compare>
typename std::iterator_traits<_RAIter>::difference_type
__parallel_sort_qs_divide(_RAIter __begin,
_RAIter __end,
_Compare __comp, typename std::iterator_traits
<_RAIter>::difference_type __pivot_rank,
typename std::iterator_traits
<_RAIter>::difference_type
__num_samples, _ThreadIndex __num_threads)
_RAIter __end,
_Compare __comp, typename std::iterator_traits
<_RAIter>::difference_type __pivot_rank,
typename std::iterator_traits
<_RAIter>::difference_type
__num_samples, _ThreadIndex __num_threads)
{
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
......@@ -65,15 +65,15 @@ namespace __gnu_parallel
// Allocate uninitialized, to avoid default constructor.
_ValueType* __samples =
static_cast<_ValueType*>(::operator new(__num_samples
* sizeof(_ValueType)));
static_cast<_ValueType*>(::operator new(__num_samples
* sizeof(_ValueType)));
for (_DifferenceType __s = 0; __s < __num_samples; ++__s)
{
const unsigned long long __index = static_cast<unsigned long long>(__s)
* __n / __num_samples;
::new(&(__samples[__s])) _ValueType(__begin[__index]);
}
{
const unsigned long long __index
= static_cast<unsigned long long>(__s) * __n / __num_samples;
::new(&(__samples[__s])) _ValueType(__begin[__index]);
}
__gnu_sequential::sort(__samples, __samples + __num_samples, __comp);
......@@ -99,47 +99,47 @@ namespace __gnu_parallel
template<typename _RAIter, typename _Compare>
void
__parallel_sort_qs_conquer(_RAIter __begin,
_RAIter __end,
_Compare __comp,
_ThreadIndex __num_threads)
_RAIter __end,
_Compare __comp,
_ThreadIndex __num_threads)
{
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
typedef typename _TraitsType::difference_type _DifferenceType;
if (__num_threads <= 1)
{
__gnu_sequential::sort(__begin, __end, __comp);
return;
}
{
__gnu_sequential::sort(__begin, __end, __comp);
return;
}
_DifferenceType __n = __end - __begin, __pivot_rank;
if (__n <= 1)
return;
return;
_ThreadIndex __num_threads_left;
if ((__num_threads % 2) == 1)
__num_threads_left = __num_threads / 2 + 1;
__num_threads_left = __num_threads / 2 + 1;
else
__num_threads_left = __num_threads / 2;
__num_threads_left = __num_threads / 2;
__pivot_rank = __n * __num_threads_left / __num_threads;
_DifferenceType __split =
__parallel_sort_qs_divide(__begin, __end, __comp, __pivot_rank,
_Settings::get().sort_qs_num_samples_preset,
__num_threads);
__parallel_sort_qs_divide(__begin, __end, __comp, __pivot_rank,
_Settings::get().sort_qs_num_samples_preset,
__num_threads);
#pragma omp parallel sections num_threads(2)
{
#pragma omp section
__parallel_sort_qs_conquer(__begin, __begin + __split,
__comp, __num_threads_left);
__parallel_sort_qs_conquer(__begin, __begin + __split,
__comp, __num_threads_left);
#pragma omp section
__parallel_sort_qs_conquer(__begin + __split, __end,
__comp, __num_threads - __num_threads_left);
__parallel_sort_qs_conquer(__begin + __split, __end,
__comp, __num_threads - __num_threads_left);
}
}
......@@ -155,9 +155,9 @@ namespace __gnu_parallel
template<typename _RAIter, typename _Compare>
void
__parallel_sort_qs(_RAIter __begin,
_RAIter __end,
_Compare __comp,
_ThreadIndex __num_threads)
_RAIter __end,
_Compare __comp,
_ThreadIndex __num_threads)
{
_GLIBCXX_CALL(__n)
......@@ -171,7 +171,8 @@ namespace __gnu_parallel
if (__num_threads > __n)
__num_threads = static_cast<_ThreadIndex>(__n);
__parallel_sort_qs_conquer(__begin, __begin + __n, __comp, __num_threads);
__parallel_sort_qs_conquer(
__begin, __begin + __n, __comp, __num_threads);
}
} //namespace __gnu_parallel
......
......@@ -41,14 +41,14 @@ namespace __gnu_parallel
class _RandomNumber
{
private:
std::tr1::mt19937 _M_mt;
uint64 _M_supremum;
uint64 _M_rand_sup;
double _M_supremum_reciprocal;
double _M_rand_sup_reciprocal;
std::tr1::mt19937 _M_mt;
uint64 _M_supremum;
uint64 _M_rand_sup;
double _M_supremum_reciprocal;
double _M_rand_sup_reciprocal;
// Assumed to be twice as long as the usual random number.
uint64 __cache;
uint64 __cache;
// Bit results.
int __bits_left;
......@@ -56,17 +56,17 @@ namespace __gnu_parallel
static uint32
__scale_down(uint64 __x,
#if _GLIBCXX_SCALE_DOWN_FPU
uint64 /*_M_supremum*/, double _M_supremum_reciprocal)
uint64 /*_M_supremum*/, double _M_supremum_reciprocal)
#else
uint64 _M_supremum, double /*_M_supremum_reciprocal*/)
#endif
{
{
#if _GLIBCXX_SCALE_DOWN_FPU
return uint32(__x * _M_supremum_reciprocal);
return uint32(__x * _M_supremum_reciprocal);
#else
return static_cast<uint32>(__x % _M_supremum);
return static_cast<uint32>(__x % _M_supremum);
#endif
}
}
public:
/** @brief Default constructor. Seed with 0. */
......@@ -94,12 +94,12 @@ namespace __gnu_parallel
{ return __scale_down(_M_mt(), _M_supremum, _M_supremum_reciprocal); }
/** @brief Generate unsigned random 32-bit integer in the
interval @__c [0,local_supremum). */
interval @__c [0,local_supremum). */
uint32
operator()(uint64 local_supremum)
{
return __scale_down(_M_mt(), local_supremum,
double(local_supremum * _M_rand_sup_reciprocal));
double(local_supremum * _M_rand_sup_reciprocal));
}
/** @brief Generate a number of random bits, run-time parameter.
......@@ -111,10 +111,10 @@ namespace __gnu_parallel
__cache = __cache >> __bits;
__bits_left -= __bits;
if (__bits_left < 32)
{
__cache |= ((uint64(_M_mt())) << __bits_left);
__bits_left += 32;
}
{
__cache |= ((uint64(_M_mt())) << __bits_left);
__bits_left += 32;
}
return __res;
}
};
......
......@@ -70,7 +70,7 @@ template<typename _RAIter>
_DifferenceType* _M_starts;
/** @brief Number of the thread that will further process the
corresponding bin. */
corresponding bin. */
_ThreadIndex* _M_bin_proc;
/** @brief Number of bins to distribute to. */
......@@ -131,7 +131,8 @@ template<typename _RAIter, typename RandomNumberGenerator>
_DRandomShufflingGlobalData<_RAIter>* _M_sd = d->_M_sd;
// Indexing: _M_dist[bin][processor]
_DifferenceType __length = _M_sd->_M_starts[__iam + 1] - _M_sd->_M_starts[__iam];
_DifferenceType __length = _M_sd->_M_starts[__iam + 1] -
_M_sd->_M_starts[__iam];
_BinIndex* __oracles = new _BinIndex[__length];
_DifferenceType* _M_dist = new _DifferenceType[_M_sd->_M_num_bins + 1];
_BinIndex* _M_bin_proc = new _BinIndex[_M_sd->_M_num_bins];
......@@ -161,12 +162,13 @@ template<typename _RAIter, typename RandomNumberGenerator>
# pragma omp single
{
// Sum up bins, _M_sd->_M_dist[__s + 1][d->_M_num_threads] now contains the
// total number of items in bin __s
// Sum up bins, _M_sd->_M_dist[__s + 1][d->_M_num_threads] now contains
// the total number of items in bin __s
for (_BinIndex __s = 0; __s < _M_sd->_M_num_bins; ++__s)
__gnu_sequential::partial_sum(_M_sd->_M_dist[__s + 1],
_M_sd->_M_dist[__s + 1] + d->_M_num_threads + 1,
_M_sd->_M_dist[__s + 1]);
__gnu_sequential::partial_sum(
_M_sd->_M_dist[__s + 1],
_M_sd->_M_dist[__s + 1] + d->_M_num_threads + 1,
_M_sd->_M_dist[__s + 1]);
}
# pragma omp barrier
......@@ -179,9 +181,9 @@ template<typename _RAIter, typename RandomNumberGenerator>
for (_BinIndex __s = d->_M_bins_begin; __s < d->__bins_end; ++__s)
{
for (int __t = 0; __t < d->_M_num_threads + 1; ++__t)
_M_sd->_M_dist[__s + 1][__t] += __offset;
__offset = _M_sd->_M_dist[__s + 1][d->_M_num_threads];
for (int __t = 0; __t < d->_M_num_threads + 1; ++__t)
_M_sd->_M_dist[__s + 1][__t] += __offset;
__offset = _M_sd->_M_dist[__s + 1][d->_M_num_threads];
}
_M_sd->_M_temporaries[__iam] = static_cast<_ValueType*>(
......@@ -208,7 +210,7 @@ template<typename _RAIter, typename RandomNumberGenerator>
// Last column [d->_M_num_threads] stays unchanged.
::new(&(_M_temporaries[target_p][_M_dist[target_bin + 1]++]))
_ValueType(*(_M_source + __i + __start));
_ValueType(*(_M_source + __i + __start));
}
delete[] __oracles;
......@@ -223,12 +225,15 @@ template<typename _RAIter, typename RandomNumberGenerator>
{
_ValueType* __begin =
_M_sd->_M_temporaries[__iam] +
((__b == d->_M_bins_begin) ? 0 : _M_sd->_M_dist[__b][d->_M_num_threads]),
((__b == d->_M_bins_begin)
? 0 : _M_sd->_M_dist[__b][d->_M_num_threads]),
* __end =
_M_sd->_M_temporaries[__iam] + _M_sd->_M_dist[__b + 1][d->_M_num_threads];
_M_sd->_M_temporaries[__iam] +
_M_sd->_M_dist[__b + 1][d->_M_num_threads];
__sequential_random_shuffle(__begin, __end, __rng);
std::copy(__begin, __end, _M_sd->_M_source + __global_offset +
((__b == d->_M_bins_begin) ? 0 : _M_sd->_M_dist[__b][d->_M_num_threads]));
((__b == d->_M_bins_begin)
? 0 : _M_sd->_M_dist[__b][d->_M_num_threads]));
}
::operator delete(_M_sd->_M_temporaries[__iam]);
......@@ -256,11 +261,11 @@ template<typename _Tp>
template<typename _RAIter, typename RandomNumberGenerator>
void
__parallel_random_shuffle_drs(_RAIter __begin,
_RAIter __end,
typename std::iterator_traits
<_RAIter>::difference_type __n,
_ThreadIndex __num_threads,
RandomNumberGenerator& __rng)
_RAIter __end,
typename std::iterator_traits
<_RAIter>::difference_type __n,
_ThreadIndex __num_threads,
RandomNumberGenerator& __rng)
{
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
......@@ -343,7 +348,8 @@ template<typename _RAIter, typename RandomNumberGenerator>
_M_sd._M_dist[0][0] = 0;
_M_sd._M_dist[__b][0] = 0;
}
_M_starts = _M_sd._M_starts = new _DifferenceType[__num_threads + 1];
_M_starts = _M_sd._M_starts
= new _DifferenceType[__num_threads + 1];
int bin_cursor = 0;
_M_sd._M_num_bins = _M_num_bins;
_M_sd._M_num_bits = __log2(_M_num_bins);
......@@ -355,7 +361,8 @@ template<typename _RAIter, typename RandomNumberGenerator>
for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
{
_M_starts[__i] = __start;
__start += (__i < __split) ? (__chunk_length + 1) : __chunk_length;
__start += (__i < __split)
? (__chunk_length + 1) : __chunk_length;
int __j = __pus[__i]._M_bins_begin = bin_cursor;
// Range of bins for this processor.
......@@ -469,14 +476,16 @@ template<typename _RAIter, typename RandomNumberGenerator>
}
// Sum up bins.
__gnu_sequential::partial_sum(__dist0, __dist0 + _M_num_bins + 1, __dist0);
__gnu_sequential::
partial_sum(__dist0, __dist0 + _M_num_bins + 1, __dist0);
for (int __b = 0; __b < _M_num_bins + 1; ++__b)
__dist1[__b] = __dist0[__b];
// Distribute according to oracles.
for (_DifferenceType __i = 0; __i < __n; ++__i)
::new(&(__target[(__dist0[__oracles[__i]])++])) _ValueType(*(__begin + __i));
::new(&(__target[(__dist0[__oracles[__i]])++]))
_ValueType(*(__begin + __i));
for (int __b = 0; __b < _M_num_bins; ++__b)
{
......@@ -511,7 +520,8 @@ template<typename _RAIter, typename RandomNumberGenerator>
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::difference_type _DifferenceType;
_DifferenceType __n = __end - __begin;
__parallel_random_shuffle_drs(__begin, __end, __n, __get_max_threads(), __rng) ;
__parallel_random_shuffle_drs(
__begin, __end, __n, __get_max_threads(), __rng) ;
}
}
......
......@@ -76,8 +76,8 @@ template<typename _RAIter, typename _DifferenceTp>
* @param __pred Find predicate.
* @return Place of finding in first sequences. */
template<typename __RAIter1,
typename __RAIter2,
typename _Pred>
typename __RAIter2,
typename _Pred>
__RAIter1
__search_template(__RAIter1 __begin1, __RAIter1 __end1,
__RAIter2 __begin2, __RAIter2 __end2,
......@@ -126,7 +126,8 @@ template<typename __RAIter1,
_ThreadIndex __iam = omp_get_thread_num();
_DifferenceType __start = __splitters[__iam], __stop = __splitters[__iam + 1];
_DifferenceType __start = __splitters[__iam],
__stop = __splitters[__iam + 1];
_DifferenceType __pos_in_pattern = 0;
bool __found_pattern = false;
......@@ -156,7 +157,8 @@ template<typename __RAIter1,
// Make safe jump.
__start += (__pos_in_pattern - __advances[__pos_in_pattern]);
__pos_in_pattern =
(__advances[__pos_in_pattern] < 0) ? 0 : __advances[__pos_in_pattern];
(__advances[__pos_in_pattern] < 0) ?
0 : __advances[__pos_in_pattern];
}
} //parallel
......
......@@ -63,8 +63,8 @@ template<typename _IIter, typename _OutputIterator>
}
template<typename _IIter,
typename _OutputIterator,
typename _Compare>
typename _OutputIterator,
typename _Compare>
struct symmetric_difference_func
{
typedef std::iterator_traits<_IIter> _TraitsType;
......@@ -77,8 +77,8 @@ template<typename _IIter,
_OutputIterator
_M_invoke(_IIter __a, _IIter __b,
_IIter __c, _IIter d,
_OutputIterator __r) const
_IIter __c, _IIter d,
_OutputIterator __r) const
{
while (__a != __b && __c != d)
{
......@@ -105,7 +105,7 @@ template<typename _IIter,
_DifferenceType
__count(_IIter __a, _IIter __b,
_IIter __c, _IIter d) const
_IIter __c, _IIter d) const
{
_DifferenceType __counter = 0;
......@@ -142,8 +142,8 @@ template<typename _IIter,
template<typename _IIter,
typename _OutputIterator,
typename _Compare>
typename _OutputIterator,
typename _Compare>
struct __difference_func
{
typedef std::iterator_traits<_IIter> _TraitsType;
......@@ -179,7 +179,7 @@ template<typename _IIter,
_DifferenceType
__count(_IIter __a, _IIter __b,
_IIter __c, _IIter d) const
_IIter __c, _IIter d) const
{
_DifferenceType __counter = 0;
......@@ -210,8 +210,8 @@ template<typename _IIter,
template<typename _IIter,
typename _OutputIterator,
typename _Compare>
typename _OutputIterator,
typename _Compare>
struct __intersection_func
{
typedef std::iterator_traits<_IIter> _TraitsType;
......@@ -246,7 +246,7 @@ template<typename _IIter,
_DifferenceType
__count(_IIter __a, _IIter __b,
_IIter __c, _IIter d) const
_IIter __c, _IIter d) const
{
_DifferenceType __counter = 0;
......@@ -315,7 +315,7 @@ template<class _IIter, class _OutputIterator, class _Compare>
_DifferenceType
__count(_IIter __a, _IIter __b,
_IIter __c, _IIter d) const
_IIter __c, _IIter d) const
{
_DifferenceType __counter = 0;
......@@ -348,8 +348,8 @@ template<class _IIter, class _OutputIterator, class _Compare>
};
template<typename _IIter,
typename _OutputIterator,
typename Operation>
typename _OutputIterator,
typename Operation>
_OutputIterator
__parallel_set_operation(_IIter __begin1, _IIter __end1,
_IIter __begin2, _IIter __end2,
......@@ -370,7 +370,7 @@ template<typename _IIter,
const _DifferenceType size = (__end1 - __begin1) + (__end2 - __begin2);
const _IteratorPair __sequence[ 2 ] =
{ std::make_pair(__begin1, __end1), std::make_pair(__begin2, __end2) } ;
{ std::make_pair(__begin1, __end1), std::make_pair(__begin2, __end2) };
_OutputIterator return_value = __result;
_DifferenceType *__borders;
_IteratorPair *__block_begins;
......@@ -400,7 +400,8 @@ template<typename _IIter,
_IIter __offset[2];
const _DifferenceType __rank = __borders[__iam + 1];
multiseq_partition(__sequence, __sequence + 2, __rank, __offset, __op._M_comp);
multiseq_partition(__sequence, __sequence + 2,
__rank, __offset, __op._M_comp);
// allowed to read?
// together
......@@ -427,15 +428,16 @@ template<typename _IIter,
if (__iam == 0)
{
// The first thread can copy already.
__lengths[ __iam ] = __op._M_invoke(__block_begin.first, block_end.first,
__block_begin.second, block_end.second,
__result)
__lengths[ __iam ] =
__op._M_invoke(__block_begin.first, block_end.first,
__block_begin.second, block_end.second, __result)
- __result;
}
else
{
__lengths[ __iam ] = __op.__count(__block_begin.first, block_end.first,
__block_begin.second, block_end.second);
__lengths[ __iam ] =
__op.__count(__block_begin.first, block_end.first,
__block_begin.second, block_end.second);
}
// Make sure everyone wrote their lengths.
......@@ -453,7 +455,7 @@ template<typename _IIter,
// Return the result iterator of the last block.
return_value = __op._M_invoke(
__block_begin.first, __end1, __block_begin.second, __end2, __r);
__block_begin.first, __end1, __block_begin.second, __end2, __r);
}
else
......@@ -471,52 +473,56 @@ template<typename _IIter,
template<typename _IIter,
typename _OutputIterator,
typename _Compare>
typename _OutputIterator,
typename _Compare>
inline _OutputIterator
__parallel_set_union(_IIter __begin1, _IIter __end1,
_IIter __begin2, _IIter __end2,
_OutputIterator __result, _Compare _M_comp)
{
return __parallel_set_operation(__begin1, __end1, __begin2, __end2, __result,
__union_func< _IIter, _OutputIterator, _Compare>(_M_comp));
return __parallel_set_operation(__begin1, __end1, __begin2, __end2,
__result, __union_func< _IIter, _OutputIterator, _Compare>(_M_comp));
}
template<typename _IIter,
typename _OutputIterator,
typename _Compare>
typename _OutputIterator,
typename _Compare>
inline _OutputIterator
__parallel_set_intersection(_IIter __begin1, _IIter __end1,
_IIter __begin2, _IIter __end2,
_OutputIterator __result, _Compare _M_comp)
_IIter __begin2, _IIter __end2,
_OutputIterator __result, _Compare _M_comp)
{
return __parallel_set_operation(__begin1, __end1, __begin2, __end2, __result,
__intersection_func<_IIter, _OutputIterator, _Compare>(_M_comp));
return __parallel_set_operation(
__begin1, __end1, __begin2, __end2, __result,
__intersection_func<_IIter, _OutputIterator, _Compare>(_M_comp));
}
template<typename _IIter,
typename _OutputIterator,
typename _Compare>
typename _OutputIterator,
typename _Compare>
inline _OutputIterator
__parallel_set_difference(_IIter __begin1, _IIter __end1,
_IIter __begin2, _IIter __end2,
_OutputIterator __result, _Compare _M_comp)
_IIter __begin2, _IIter __end2,
_OutputIterator __result, _Compare _M_comp)
{
return __parallel_set_operation(__begin1, __end1, __begin2, __end2, __result,
__difference_func<_IIter, _OutputIterator, _Compare>(_M_comp));
return __parallel_set_operation(
__begin1, __end1, __begin2, __end2, __result,
__difference_func<_IIter, _OutputIterator, _Compare>(_M_comp));
}
template<typename _IIter,
typename _OutputIterator,
typename _Compare>
typename _OutputIterator,
typename _Compare>
inline _OutputIterator
__parallel_set_symmetric_difference(_IIter __begin1, _IIter __end1,
_IIter __begin2, _IIter __end2,
_OutputIterator __result, _Compare _M_comp)
_IIter __begin2, _IIter __end2,
_OutputIterator __result,
_Compare _M_comp)
{
return __parallel_set_operation(__begin1, __end1, __begin2, __end2, __result,
symmetric_difference_func<_IIter, _OutputIterator, _Compare>
(_M_comp));
return __parallel_set_operation(
__begin1, __end1, __begin2, __end2, __result,
symmetric_difference_func<_IIter, _OutputIterator, _Compare>
(_M_comp));
}
}
......
......@@ -54,13 +54,13 @@
namespace __gnu_parallel
{
//prototype
//prototype
template<bool __stable, typename _RAIter,
typename _Compare, typename _Parallelism>
void
parallel_sort(_RAIter __begin, _RAIter __end,
_Compare __comp, _Parallelism __parallelism);
/**
* @brief Choose multiway mergesort, splitting variant at run-time,
* for parallel sorting.
......@@ -138,7 +138,8 @@ namespace __gnu_parallel
_GLIBCXX_PARALLEL_ASSERT(__stable == false);
__parallel_sort_qs(__begin, __end, __comp, __parallelism.__get_num_threads());
__parallel_sort_qs(__begin, __end, __comp,
__parallelism.__get_num_threads());
}
/**
......@@ -158,7 +159,8 @@ namespace __gnu_parallel
_GLIBCXX_PARALLEL_ASSERT(__stable == false);
__parallel_sort_qsb(__begin, __end, __comp, __parallelism.__get_num_threads());
__parallel_sort_qsb(__begin, __end, __comp,
__parallelism.__get_num_threads());
}
......@@ -215,11 +217,13 @@ namespace __gnu_parallel
#endif
#if _GLIBCXX_QUICKSORT
else if (_Settings::get().sort_algorithm == QS)
__parallel_sort_qs(__begin, __end, __comp, __parallelism.__get_num_threads());
__parallel_sort_qs(__begin, __end, __comp,
__parallelism.__get_num_threads());
#endif
#if _GLIBCXX_BAL_QUICKSORT
else if (_Settings::get().sort_algorithm == QS_BALANCED)
__parallel_sort_qsb(__begin, __end, __comp, __parallelism.__get_num_threads());
__parallel_sort_qsb(__begin, __end, __comp,
__parallelism.__get_num_threads());
#endif
else
__gnu_sequential::sort(__begin, __end, __comp);
......
......@@ -149,7 +149,8 @@ namespace __gnu_parallel
static const int _CASable_bits = sizeof(_CASable) * 8;
/// ::_CASable with the right half of bits set to 1.
static const _CASable _CASable_mask = ((_CASable(1) << (_CASable_bits / 2)) - 1);
static const _CASable _CASable_mask =
((_CASable(1) << (_CASable_bits / 2)) - 1);
}
#endif /* _GLIBCXX_PARALLEL_TYPES_H */
......@@ -45,11 +45,11 @@ namespace __gnu_parallel
* @param __binary_pred Equality predicate.
* @return End iterator of result __sequence. */
template<typename _IIter,
class _OutputIterator,
class _BinaryPredicate>
class _OutputIterator,
class _BinaryPredicate>
_OutputIterator
__parallel_unique_copy(_IIter __first, _IIter __last,
_OutputIterator __result, _BinaryPredicate __binary_pred)
_OutputIterator __result, _BinaryPredicate __binary_pred)
{
_GLIBCXX_CALL(__last - __first)
......@@ -72,10 +72,10 @@ template<typename _IIter,
{
# pragma omp single
{
__num_threads = omp_get_num_threads();
__borders = new _DifferenceType[__num_threads + 2];
equally_split(size, __num_threads + 1, __borders);
__counter = new _DifferenceType[__num_threads + 1];
__num_threads = omp_get_num_threads();
__borders = new _DifferenceType[__num_threads + 2];
equally_split(size, __num_threads + 1, __borders);
__counter = new _DifferenceType[__num_threads + 1];
}
_ThreadIndex __iam = omp_get_thread_num();
......@@ -89,7 +89,7 @@ template<typename _IIter,
if (__iam == 0)
{
__begin = __borders[0] + 1; // == 1
__begin = __borders[0] + 1; // == 1
__end = __borders[__iam + 1];
++__i;
......@@ -112,8 +112,8 @@ template<typename _IIter,
for (_IIter iter = __first + __begin; iter < __first + __end; ++iter)
{
if (!__binary_pred(*iter, *(iter - 1)))
++__i;
}
++__i;
}
}
__counter[__iam] = __i;
......@@ -157,8 +157,8 @@ template<typename _IIter,
for (_IIter iter = __first + __begin; iter < __first + __end; ++iter)
{
if (!__binary_pred(*iter, *(iter-1)))
*__iter_out++ = *iter;
}
*__iter_out++ = *iter;
}
}
}
......@@ -184,7 +184,7 @@ template<typename _IIter, class _OutputIterator>
typedef typename std::iterator_traits<_IIter>::value_type
_ValueType;
return __parallel_unique_copy(__first, __last, __result,
std::equal_to<_ValueType>());
std::equal_to<_ValueType>());
}
}//namespace __gnu_parallel
......
......@@ -91,18 +91,15 @@ template<typename _DifferenceTp>
* @return User-supplied functor (that may contain a part of the result).
*/
template<typename _RAIter,
typename _Op,
typename _Fu,
typename _Red,
typename _Result>
typename _Op,
typename _Fu,
typename _Red,
typename _Result>
_Op
for_each_template_random_access_workstealing(_RAIter __begin,
_RAIter __end,
_Op __op, _Fu& __f, _Red __r,
_Result __base, _Result& __output,
typename std::iterator_traits
<_RAIter>::
difference_type __bound)
for_each_template_random_access_workstealing(
_RAIter __begin, _RAIter __end, _Op __op, _Fu& __f, _Red __r,
_Result __base, _Result& __output,
typename std::iterator_traits<_RAIter>::difference_type __bound)
{
_GLIBCXX_CALL(__end - __begin)
......@@ -111,13 +108,15 @@ template<typename _RAIter,
const _Settings& __s = _Settings::get();
_DifferenceType __chunk_size = static_cast<_DifferenceType>(__s.workstealing_chunk_size);
_DifferenceType __chunk_size =
static_cast<_DifferenceType>(__s.workstealing_chunk_size);
// How many jobs?
_DifferenceType __length = (__bound < 0) ? (__end - __begin) : __bound;
// To avoid false sharing in a cache line.
const int __stride = __s.cache_line_size * 10 / sizeof(_Job<_DifferenceType>) + 1;
const int __stride =
__s.cache_line_size * 10 / sizeof(_Job<_DifferenceType>) + 1;
// Total number of threads currently working.
_ThreadIndex __busy = 0;
......@@ -132,8 +131,8 @@ template<typename _RAIter,
// No more threads than jobs, at least one thread.
_ThreadIndex __num_threads =
__gnu_parallel::max<_ThreadIndex>(1,
__gnu_parallel::min<_DifferenceType>(__length, __get_max_threads()));
__gnu_parallel::max<_ThreadIndex>(1,
__gnu_parallel::min<_DifferenceType>(__length, __get_max_threads()));
# pragma omp parallel shared(__busy) num_threads(__num_threads)
{
......@@ -184,7 +183,7 @@ template<typename _RAIter,
(__length - 1) : ((__iam + 1) * (__length / __num_threads) - 1);
__my_job._M_load = __my_job._M_last - __my_job._M_first + 1;
// Init result with _M_first __value (to have a base value for reduction).
// Init result with _M_first value (to have a base value for reduction)
if (__my_job._M_first <= __my_job._M_last)
{
// Cannot use volatile variable directly.
......@@ -211,13 +210,15 @@ template<typename _RAIter,
// fetch-and-add call
// Reserve current job block (size __chunk_size) in my queue.
_DifferenceType current_job =
__fetch_and_add<_DifferenceType>(&(__my_job._M_first), __chunk_size);
__fetch_and_add<_DifferenceType>(
&(__my_job._M_first), __chunk_size);
// Update _M_load, to make the three values consistent,
// _M_first might have been changed in the meantime
__my_job._M_load = __my_job._M_last - __my_job._M_first + 1;
for (_DifferenceType job_counter = 0;
job_counter < __chunk_size && current_job <= __my_job._M_last;
job_counter < __chunk_size
&& current_job <= __my_job._M_last;
++job_counter)
{
// Yes: process it!
......@@ -254,7 +255,8 @@ template<typename _RAIter,
}
while (__busy > 0
&& ((__supposed_load <= 0)
|| ((__supposed_first + __supposed_load - 1) != __supposed_last)));
|| ((__supposed_first + __supposed_load - 1)
!= __supposed_last)));
if (__busy == 0)
break;
......@@ -273,7 +275,8 @@ template<typename _RAIter,
__stolen_first + __steal - _DifferenceType(1);
__my_job._M_first = __stolen_first;
__my_job._M_last = __gnu_parallel::min(stolen_try, __supposed_last);
__my_job._M_last =
__gnu_parallel::min(stolen_try, __supposed_last);
__my_job._M_load = __my_job._M_last - __my_job._M_first + 1;
// Has potential work again.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment