Commit 5817ff8e by Paolo Carlini Committed by Paolo Carlini

multiway_merge.h: Reformat to 80 columns; adjust some inline specifiers; other minor style fixes.

2008-01-09  Paolo Carlini  <pcarlini@suse.de>

	* include/parallel/multiway_merge.h: Reformat to 80 columns;
	adjust some inline specifiers; other minor style fixes.
	* include/parallel/losertree.h: Likewise.
	* include/parallel/list_partition.h: Likewise.
	* include/parallel/multiseq_selection.h: Likewise.
	* include/parallel/workstealing.h: Likewise.
	* include/parallel/base.h: Likewise.
	* include/parallel/par_loop.h: Likewise.
	* include/parallel/numeric: Likewise.
	* include/parallel/quicksort.h: Likewise.
	* include/parallel/algorithmfwd.h: Likewise.
	* include/parallel/for_each_selectors.h: Likewise.
	* include/parallel/omp_loop_static.h: Likewise.
	* include/parallel/random_shuffle.h: Likewise.
	* include/parallel/balanced_quicksort.h: Likewise.
	* include/parallel/set_operations.h: Likewise.
	* include/parallel/tree.h: Likewise.
	* include/parallel/merge.h: Likewise.
	* include/parallel/unique_copy.h: Likewise.
	* include/parallel/settings.h: Likewise.
	* include/parallel/multiway_mergesort.h: Likewise.
	* include/parallel/numericfwd.h: Likewise.
	* include/parallel/search.h: Likewise.
	* include/parallel/partition.h: Likewise.
	* include/parallel/compatibility.h: Likewise.
	* include/parallel/partial_sum.h: Likewise.
	* include/parallel/find.h: Likewise.
	* include/parallel/algo.h: Likewise.
	* include/parallel/queue.h: Likewise.
	* include/parallel/omp_loop.h: Likewise.
	* include/parallel/sort.h: Likewise.
	* include/parallel/random_number.h: Likewise.

From-SVN: r131440
parent 299f5734
2008-01-09 Paolo Carlini <pcarlini@suse.de>
* include/parallel/multiway_merge.h: Reformat to 80 columns;
adjust some inline specifiers; other minor style fixes.
* include/parallel/losertree.h: Likewise.
* include/parallel/list_partition.h: Likewise.
* include/parallel/multiseq_selection.h: Likewise.
* include/parallel/workstealing.h: Likewise.
* include/parallel/base.h: Likewise.
* include/parallel/par_loop.h: Likewise.
* include/parallel/numeric: Likewise.
* include/parallel/quicksort.h: Likewise.
* include/parallel/algorithmfwd.h: Likewise.
* include/parallel/for_each_selectors.h: Likewise.
* include/parallel/omp_loop_static.h: Likewise.
* include/parallel/random_shuffle.h: Likewise.
* include/parallel/balanced_quicksort.h: Likewise.
* include/parallel/set_operations.h: Likewise.
* include/parallel/tree.h: Likewise.
* include/parallel/merge.h: Likewise.
* include/parallel/unique_copy.h: Likewise.
* include/parallel/settings.h: Likewise.
* include/parallel/multiway_mergesort.h: Likewise.
* include/parallel/numericfwd.h: Likewise.
* include/parallel/search.h: Likewise.
* include/parallel/partition.h: Likewise.
* include/parallel/compatibility.h: Likewise.
* include/parallel/partial_sum.h: Likewise.
* include/parallel/find.h: Likewise.
* include/parallel/algo.h: Likewise.
* include/parallel/queue.h: Likewise.
* include/parallel/omp_loop.h: Likewise.
* include/parallel/sort.h: Likewise.
* include/parallel/random_number.h: Likewise.
2008-01-09 Benjamin Kosnik <bkoz@redhat.com> 2008-01-09 Benjamin Kosnik <bkoz@redhat.com>
* docs/html/17_intro/api.html: Fix markup for rope.h. * docs/html/17_intro/api.html: Fix markup for rope.h.
......
...@@ -454,8 +454,8 @@ namespace __parallel ...@@ -454,8 +454,8 @@ namespace __parallel
_RAIter3 _RAIter3
transform2_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter3, _BiOperation, transform2_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter3, _BiOperation,
random_access_iterator_tag, random_access_iterator_tag, random_access_iterator_tag, random_access_iterator_tag,
random_access_iterator_tag, random_access_iterator_tag,
__gnu_parallel::parallelism parallelism_tag); __gnu_parallel::parallelism);
template<typename _IIter1, typename _IIter2, typename _OIter, template<typename _IIter1, typename _IIter2, typename _OIter,
typename _BiOperation, typename _Tag1, typename _BiOperation, typename _Tag1,
...@@ -525,7 +525,7 @@ namespace __parallel ...@@ -525,7 +525,7 @@ namespace __parallel
template<typename _FIter> template<typename _FIter>
_FIter _FIter
max_element(_FIter, _FIter, __gnu_parallel::parallelism parallelism_tag); max_element(_FIter, _FIter, __gnu_parallel::parallelism);
template<typename _FIter, typename _Compare> template<typename _FIter, typename _Compare>
_FIter _FIter
......
...@@ -112,8 +112,9 @@ template<typename RandomAccessIterator, typename Comparator> ...@@ -112,8 +112,9 @@ template<typename RandomAccessIterator, typename Comparator>
typedef typename traits_type::value_type value_type; typedef typename traits_type::value_type value_type;
typedef typename traits_type::difference_type difference_type; typedef typename traits_type::difference_type difference_type;
RandomAccessIterator pivot_pos = median_of_three_iterators( RandomAccessIterator pivot_pos =
begin, begin + (end - begin) / 2, end - 1, comp); median_of_three_iterators(begin, begin + (end - begin) / 2,
end - 1, comp);
#if defined(_GLIBCXX_ASSERTIONS) #if defined(_GLIBCXX_ASSERTIONS)
// Must be in between somewhere. // Must be in between somewhere.
...@@ -146,9 +147,9 @@ template<typename RandomAccessIterator, typename Comparator> ...@@ -146,9 +147,9 @@ template<typename RandomAccessIterator, typename Comparator>
#if _GLIBCXX_ASSERTIONS #if _GLIBCXX_ASSERTIONS
RandomAccessIterator r; RandomAccessIterator r;
for (r = begin; r != pivot_pos; r++) for (r = begin; r != pivot_pos; ++r)
_GLIBCXX_PARALLEL_ASSERT(comp(*r, *pivot_pos)); _GLIBCXX_PARALLEL_ASSERT(comp(*r, *pivot_pos));
for (; r != end; r++) for (; r != end; ++r)
_GLIBCXX_PARALLEL_ASSERT(!comp(*r, *pivot_pos)); _GLIBCXX_PARALLEL_ASSERT(!comp(*r, *pivot_pos));
#endif #endif
...@@ -308,12 +309,12 @@ template<typename RandomAccessIterator, typename Comparator> ...@@ -308,12 +309,12 @@ template<typename RandomAccessIterator, typename Comparator>
__gnu_parallel::unary_negate<__gnu_parallel::binder1st __gnu_parallel::unary_negate<__gnu_parallel::binder1st
<Comparator, value_type, value_type, bool>, value_type> <Comparator, value_type, value_type, bool>, value_type>
pred(__gnu_parallel::binder1st pred(__gnu_parallel::binder1st
<Comparator, value_type, value_type, bool>( <Comparator, value_type, value_type, bool>(comp,
comp, *pivot_pos)); *pivot_pos));
// Find other end of pivot-equal range. // Find other end of pivot-equal range.
split_pos2 = __gnu_sequential::partition( split_pos2 = __gnu_sequential::partition(split_pos1 + 1,
split_pos1 + 1, end, pred); end, pred);
} }
else else
// Only skip the pivot. // Only skip the pivot.
...@@ -339,8 +340,8 @@ template<typename RandomAccessIterator, typename Comparator> ...@@ -339,8 +340,8 @@ template<typename RandomAccessIterator, typename Comparator>
{ {
// Left side larger. // Left side larger.
if (begin != split_pos1) if (begin != split_pos1)
tl.leftover_parts.push_front( tl.leftover_parts.push_front(std::make_pair(begin,
std::make_pair(begin, split_pos1)); split_pos1));
current.first = split_pos2; current.first = split_pos2;
//current.second = end; //already set anyway //current.second = end; //already set anyway
...@@ -394,8 +395,8 @@ template<typename RandomAccessIterator, typename Comparator> ...@@ -394,8 +395,8 @@ template<typename RandomAccessIterator, typename Comparator>
if (omp_get_wtime() >= (search_start + 1.0)) if (omp_get_wtime() >= (search_start + 1.0))
{ {
sleep(1); sleep(1);
_GLIBCXX_PARALLEL_ASSERT( _GLIBCXX_PARALLEL_ASSERT(omp_get_wtime()
omp_get_wtime() < (search_start + 1.0)); < (search_start + 1.0));
} }
#endif #endif
if (!successfully_stolen) if (!successfully_stolen)
...@@ -452,7 +453,7 @@ template<typename RandomAccessIterator, typename Comparator> ...@@ -452,7 +453,7 @@ template<typename RandomAccessIterator, typename Comparator>
// 2. The largest range has at most length n // 2. The largest range has at most length n
// 3. Each range is larger than half of the range remaining // 3. Each range is larger than half of the range remaining
volatile difference_type elements_leftover = n; volatile difference_type elements_leftover = n;
for (int i = 0; i < num_threads; i++) for (int i = 0; i < num_threads; ++i)
{ {
tls[i]->elements_leftover = &elements_leftover; tls[i]->elements_leftover = &elements_leftover;
tls[i]->num_threads = num_threads; tls[i]->num_threads = num_threads;
...@@ -468,11 +469,11 @@ template<typename RandomAccessIterator, typename Comparator> ...@@ -468,11 +469,11 @@ template<typename RandomAccessIterator, typename Comparator>
#if _GLIBCXX_ASSERTIONS #if _GLIBCXX_ASSERTIONS
// All stack must be empty. // All stack must be empty.
Piece dummy; Piece dummy;
for (int i = 1; i < num_threads; i++) for (int i = 1; i < num_threads; ++i)
_GLIBCXX_PARALLEL_ASSERT(!tls[i]->leftover_parts.pop_back(dummy)); _GLIBCXX_PARALLEL_ASSERT(!tls[i]->leftover_parts.pop_back(dummy));
#endif #endif
for (int i = 0; i < num_threads; i++) for (int i = 0; i < num_threads; ++i)
delete tls[i]; delete tls[i];
delete[] tls; delete[] tls;
} }
......
// -*- C++ -*- // -*- C++ -*-
// Copyright (C) 2007 Free Software Foundation, Inc. // Copyright (C) 2007, 2008 Free Software Foundation, Inc.
// //
// This file is part of the GNU ISO C++ Library. This library is free // This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the terms // software; you can redistribute it and/or modify it under the terms
...@@ -96,17 +96,13 @@ decode2(lcas_t x, int& a, int& b) ...@@ -96,17 +96,13 @@ decode2(lcas_t x, int& a, int& b)
template<typename T> template<typename T>
const T& const T&
min(const T& a, const T& b) min(const T& a, const T& b)
{ { return (a < b) ? a : b; }
return (a < b) ? a : b;
};
/** @brief Equivalent to std::max. */ /** @brief Equivalent to std::max. */
template<typename T> template<typename T>
const T& const T&
max(const T& a, const T& b) max(const T& a, const T& b)
{ { return (a > b) ? a : b; }
return (a > b) ? a : b;
};
/** @brief Constructs predicate for equality from strict weak /** @brief Constructs predicate for equality from strict weak
* ordering predicate * ordering predicate
...@@ -402,7 +398,7 @@ template<typename _ValueTp> ...@@ -402,7 +398,7 @@ template<typename _ValueTp>
* @param comp Comparator. * @param comp Comparator.
*/ */
template<typename RandomAccessIterator, typename Comparator> template<typename RandomAccessIterator, typename Comparator>
RandomAccessIterator RandomAccessIterator
median_of_three_iterators(RandomAccessIterator a, RandomAccessIterator b, median_of_three_iterators(RandomAccessIterator a, RandomAccessIterator b,
RandomAccessIterator c, Comparator& comp) RandomAccessIterator c, Comparator& comp)
{ {
......
// -*- C++ -*- // -*- C++ -*-
// Copyright (C) 2007 Free Software Foundation, Inc. // Copyright (C) 2007, 2008 Free Software Foundation, Inc.
// //
// This file is part of the GNU ISO C++ Library. This library is free // This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the terms // software; you can redistribute it and/or modify it under the terms
...@@ -105,7 +105,8 @@ namespace __gnu_parallel ...@@ -105,7 +105,8 @@ namespace __gnu_parallel
#elif defined(__ECC) //IA-64 version #elif defined(__ECC) //IA-64 version
return _InterlockedExchangeAdd((void*)ptr, addend); return _InterlockedExchangeAdd((void*)ptr, addend);
#elif defined(__ICL) || defined(_MSC_VER) #elif defined(__ICL) || defined(_MSC_VER)
return _InterlockedExchangeAdd(reinterpret_cast<volatile long*>(ptr), addend); return _InterlockedExchangeAdd(reinterpret_cast<volatile long*>(ptr),
addend);
#elif defined(__GNUC__) #elif defined(__GNUC__)
return __sync_fetch_and_add(ptr, addend); return __sync_fetch_and_add(ptr, addend);
#elif defined(__SUNPRO_CC) && defined(__sparc) #elif defined(__SUNPRO_CC) && defined(__sparc)
...@@ -114,7 +115,8 @@ namespace __gnu_parallel ...@@ -114,7 +115,8 @@ namespace __gnu_parallel
{ {
before = *ptr; before = *ptr;
after = before + addend; after = before + addend;
} while (atomic_cas_32((volatile unsigned int*)ptr, before, after) != before); } while (atomic_cas_32((volatile unsigned int*)ptr, before,
after) != before);
return before; return before;
#else //fallback, slow #else //fallback, slow
#pragma message("slow fetch_and_add_32") #pragma message("slow fetch_and_add_32")
...@@ -159,7 +161,8 @@ namespace __gnu_parallel ...@@ -159,7 +161,8 @@ namespace __gnu_parallel
{ {
before = *ptr; before = *ptr;
after = before + addend; after = before + addend;
} while (atomic_cas_64((volatile unsigned long long*)ptr, before, after) != before); } while (atomic_cas_64((volatile unsigned long long*)ptr, before,
after) != before);
return before; return before;
#else //fallback, slow #else //fallback, slow
#if defined(__GNUC__) && defined(__i386) #if defined(__GNUC__) && defined(__i386)
...@@ -238,15 +241,19 @@ namespace __gnu_parallel ...@@ -238,15 +241,19 @@ namespace __gnu_parallel
compare_and_swap_32(volatile int32* ptr, int32 comparand, int32 replacement) compare_and_swap_32(volatile int32* ptr, int32 comparand, int32 replacement)
{ {
#if defined(__ICC) //x86 version #if defined(__ICC) //x86 version
return _InterlockedCompareExchange((void*)ptr, replacement, comparand) == comparand; return _InterlockedCompareExchange((void*)ptr, replacement,
comparand) == comparand;
#elif defined(__ECC) //IA-64 version #elif defined(__ECC) //IA-64 version
return _InterlockedCompareExchange((void*)ptr, replacement, comparand) == comparand; return _InterlockedCompareExchange((void*)ptr, replacement,
comparand) == comparand;
#elif defined(__ICL) || defined(_MSC_VER) #elif defined(__ICL) || defined(_MSC_VER)
return _InterlockedCompareExchange(reinterpret_cast<volatile long*>(ptr), replacement, comparand) == comparand; return _InterlockedCompareExchange(reinterpret_cast<volatile long*>(ptr),
replacement, comparand) == comparand;
#elif defined(__GNUC__) #elif defined(__GNUC__)
return __sync_bool_compare_and_swap(ptr, comparand, replacement); return __sync_bool_compare_and_swap(ptr, comparand, replacement);
#elif defined(__SUNPRO_CC) && defined(__sparc) #elif defined(__SUNPRO_CC) && defined(__sparc)
return atomic_cas_32((volatile unsigned int*)ptr, comparand, replacement) == comparand; return atomic_cas_32((volatile unsigned int*)ptr, comparand,
replacement) == comparand;
#else #else
#pragma message("slow compare_and_swap_32") #pragma message("slow compare_and_swap_32")
bool res = false; bool res = false;
...@@ -276,13 +283,15 @@ namespace __gnu_parallel ...@@ -276,13 +283,15 @@ namespace __gnu_parallel
#if defined(__ICC) && defined(__x86_64) //x86 version #if defined(__ICC) && defined(__x86_64) //x86 version
return cas64<int>(ptr, comparand, replacement) == comparand; return cas64<int>(ptr, comparand, replacement) == comparand;
#elif defined(__ECC) //IA-64 version #elif defined(__ECC) //IA-64 version
return _InterlockedCompareExchange64((void*)ptr, replacement, comparand) == comparand; return _InterlockedCompareExchange64((void*)ptr, replacement,
comparand) == comparand;
#elif defined(__ICL) || defined(_MSC_VER) #elif defined(__ICL) || defined(_MSC_VER)
#ifndef _WIN64 #ifndef _WIN64
_GLIBCXX_PARALLEL_ASSERT(false); //not available in this case _GLIBCXX_PARALLEL_ASSERT(false); //not available in this case
return 0; return 0;
#else #else
return _InterlockedCompareExchange64(ptr, replacement, comparand) == comparand; return _InterlockedCompareExchange64(ptr, replacement,
comparand) == comparand;
#endif #endif
#elif defined(__GNUC__) && defined(__x86_64) #elif defined(__GNUC__) && defined(__x86_64)
...@@ -291,7 +300,8 @@ namespace __gnu_parallel ...@@ -291,7 +300,8 @@ namespace __gnu_parallel
(defined(__i686) || defined(__pentium4) || defined(__athlon)) (defined(__i686) || defined(__pentium4) || defined(__athlon))
return __sync_bool_compare_and_swap(ptr, comparand, replacement); return __sync_bool_compare_and_swap(ptr, comparand, replacement);
#elif defined(__SUNPRO_CC) && defined(__sparc) #elif defined(__SUNPRO_CC) && defined(__sparc)
return atomic_cas_64((volatile unsigned long long*)ptr, comparand, replacement) == comparand; return atomic_cas_64((volatile unsigned long long*)ptr,
comparand, replacement) == comparand;
#else #else
#if defined(__GNUC__) && defined(__i386) #if defined(__GNUC__) && defined(__i386)
// XXX -march=native // XXX -march=native
...@@ -323,9 +333,11 @@ namespace __gnu_parallel ...@@ -323,9 +333,11 @@ namespace __gnu_parallel
compare_and_swap(volatile T* ptr, T comparand, T replacement) compare_and_swap(volatile T* ptr, T comparand, T replacement)
{ {
if (sizeof(T) == sizeof(int32)) if (sizeof(T) == sizeof(int32))
return compare_and_swap_32((volatile int32*) ptr, (int32)comparand, (int32)replacement); return compare_and_swap_32((volatile int32*) ptr,
(int32)comparand, (int32)replacement);
else if (sizeof(T) == sizeof(int64)) else if (sizeof(T) == sizeof(int64))
return compare_and_swap_64((volatile int64*) ptr, (int64)comparand, (int64)replacement); return compare_and_swap_64((volatile int64*) ptr,
(int64)comparand, (int64)replacement);
else else
_GLIBCXX_PARALLEL_ASSERT(false); _GLIBCXX_PARALLEL_ASSERT(false);
} }
......
...@@ -62,7 +62,7 @@ template<typename RandomAccessIterator1, ...@@ -62,7 +62,7 @@ template<typename RandomAccessIterator1,
typename RandomAccessIterator2, typename RandomAccessIterator2,
typename Pred, typename Pred,
typename Selector> typename Selector>
std::pair<RandomAccessIterator1, RandomAccessIterator2> inline std::pair<RandomAccessIterator1, RandomAccessIterator2>
find_template(RandomAccessIterator1 begin1, RandomAccessIterator1 end1, find_template(RandomAccessIterator1 begin1, RandomAccessIterator1 end1,
RandomAccessIterator2 begin2, Pred pred, Selector selector) RandomAccessIterator2 begin2, Pred pred, Selector selector)
{ {
...@@ -70,13 +70,13 @@ template<typename RandomAccessIterator1, ...@@ -70,13 +70,13 @@ template<typename RandomAccessIterator1,
{ {
case Settings::GROWING_BLOCKS: case Settings::GROWING_BLOCKS:
return find_template(begin1, end1, begin2, pred, selector, return find_template(begin1, end1, begin2, pred, selector,
growing_blocks_tag()); growing_blocks_tag());
case Settings::CONSTANT_SIZE_BLOCKS: case Settings::CONSTANT_SIZE_BLOCKS:
return find_template(begin1, end1, begin2, pred, selector, return find_template(begin1, end1, begin2, pred, selector,
constant_size_blocks_tag()); constant_size_blocks_tag());
case Settings::EQUAL_SPLIT: case Settings::EQUAL_SPLIT:
return find_template(begin1, end1, begin2, pred, selector, return find_template(begin1, end1, begin2, pred, selector,
equal_split_tag()); equal_split_tag());
default: default:
_GLIBCXX_PARALLEL_ASSERT(false); _GLIBCXX_PARALLEL_ASSERT(false);
return std::make_pair(begin1, begin2); return std::make_pair(begin1, begin2);
...@@ -158,8 +158,9 @@ template<typename RandomAccessIterator1, ...@@ -158,8 +158,9 @@ template<typename RandomAccessIterator1,
omp_destroy_lock(&result_lock); omp_destroy_lock(&result_lock);
delete[] borders; delete[] borders;
return std::pair<RandomAccessIterator1, RandomAccessIterator2>( return
begin1 + result, begin2 + result); std::pair<RandomAccessIterator1, RandomAccessIterator2>(begin1 + result,
begin2 + result);
} }
#endif #endif
...@@ -205,8 +206,8 @@ template<typename RandomAccessIterator1, ...@@ -205,8 +206,8 @@ template<typename RandomAccessIterator1,
difference_type length = end1 - begin1; difference_type length = end1 - begin1;
difference_type sequential_search_size = std::min<difference_type>( difference_type sequential_search_size =
length, Settings::find_sequential_search_size); std::min<difference_type>(length, Settings::find_sequential_search_size);
// Try it sequentially first. // Try it sequentially first.
std::pair<RandomAccessIterator1, RandomAccessIterator2> find_seq_result = std::pair<RandomAccessIterator1, RandomAccessIterator2> find_seq_result =
...@@ -267,23 +268,25 @@ template<typename RandomAccessIterator1, ...@@ -267,23 +268,25 @@ template<typename RandomAccessIterator1,
omp_unset_lock(&result_lock); omp_unset_lock(&result_lock);
} }
block_size = std::min<difference_type>( block_size =
block_size * Settings::find_increasing_factor, std::min<difference_type>(block_size
Settings::find_maximum_block_size); * Settings::find_increasing_factor,
Settings::find_maximum_block_size);
// Get new block, update pointer to next block. // Get new block, update pointer to next block.
start = start =
fetch_and_add<difference_type>(&next_block_start, block_size); fetch_and_add<difference_type>(&next_block_start, block_size);
stop = (length < (start + block_size)) ? stop = ((length < (start + block_size))
length : (start + block_size); ? length : (start + block_size));
} }
} //parallel } //parallel
omp_destroy_lock(&result_lock); omp_destroy_lock(&result_lock);
// Return iterator on found element. // Return iterator on found element.
return std::pair<RandomAccessIterator1, RandomAccessIterator2>( return
begin1 + result, begin2 + result); std::pair<RandomAccessIterator1, RandomAccessIterator2>(begin1 + result,
begin2 + result);
} }
#endif #endif
...@@ -391,8 +394,9 @@ template<typename RandomAccessIterator1, ...@@ -391,8 +394,9 @@ template<typename RandomAccessIterator1,
omp_destroy_lock(&result_lock); omp_destroy_lock(&result_lock);
// Return iterator on found element. // Return iterator on found element.
return std::pair<RandomAccessIterator1, RandomAccessIterator2>( return
begin1 + result, begin2 + result); std::pair<RandomAccessIterator1, RandomAccessIterator2>(begin1 + result,
begin2 + result);
} }
#endif #endif
} // end namespace } // end namespace
......
...@@ -159,7 +159,7 @@ namespace __gnu_parallel ...@@ -159,7 +159,7 @@ namespace __gnu_parallel
// Smallest partitions. // Smallest partitions.
for (int i = 1; i < (num_parts + 1 - size_greater); ++i) for (int i = 1; i < (num_parts + 1 - size_greater); ++i)
{ {
lengths[i-1] = size_part * range_length; lengths[i - 1] = size_part * range_length;
index += size_part; index += size_part;
starts[i] = os_starts[index]; starts[i] = os_starts[index];
} }
...@@ -167,7 +167,7 @@ namespace __gnu_parallel ...@@ -167,7 +167,7 @@ namespace __gnu_parallel
// Biggest partitions. // Biggest partitions.
for (int i = num_parts + 1 - size_greater; i <= num_parts; ++i) for (int i = num_parts + 1 - size_greater; i <= num_parts; ++i)
{ {
lengths[i-1] = (size_part+1) * range_length; lengths[i - 1] = (size_part+1) * range_length;
index += (size_part+1); index += (size_part+1);
starts[i] = os_starts[index]; starts[i] = os_starts[index];
} }
......
...@@ -82,7 +82,7 @@ template<typename T, typename Comparator = std::less<T> > ...@@ -82,7 +82,7 @@ template<typename T, typename Comparator = std::less<T> >
size = _size; size = _size;
offset = size; offset = size;
losers = new Loser[size]; losers = new Loser[size];
for (unsigned int l = 0; l < size; l++) for (unsigned int l = 0; l < size; ++l)
{ {
//losers[l].key = ... stays unset //losers[l].key = ... stays unset
losers[l].inf = true; losers[l].inf = true;
...@@ -156,9 +156,10 @@ template<typename T, typename Comparator = std::less<T> > ...@@ -156,9 +156,10 @@ template<typename T, typename Comparator = std::less<T> >
bool inf = false; bool inf = false;
for (unsigned int pos = (offset + source) / 2; pos > 0; pos /= 2) for (unsigned int pos = (offset + source) / 2; pos > 0; pos /= 2)
{ {
if ((!inf && !losers[pos].inf && !sup && !losers[pos].sup && if ((!inf && !losers[pos].inf && !sup && !losers[pos].sup
((comp(losers[pos].key, key)) || && ((comp(losers[pos].key, key))
(!comp(key, losers[pos].key) && losers[pos].source < source))) || (!comp(key, losers[pos].key)
&& losers[pos].source < source)))
|| losers[pos].inf || sup) || losers[pos].inf || sup)
{ {
// Take next key. // Take next key.
...@@ -186,8 +187,9 @@ template<typename T, typename Comparator = std::less<T> > ...@@ -186,8 +187,9 @@ template<typename T, typename Comparator = std::less<T> >
for (unsigned int pos = (offset + source) / 2; pos > 0; pos /= 2) for (unsigned int pos = (offset + source) / 2; pos > 0; pos /= 2)
{ {
if ((!inf && !losers[pos].inf && !sup && !losers[pos].sup if ((!inf && !losers[pos].inf && !sup && !losers[pos].sup
&& ((comp(losers[pos].key, key)) || && ((comp(losers[pos].key, key))
(!comp(key, losers[pos].key) && losers[pos].source < source))) || (!comp(key, losers[pos].key)
&& losers[pos].source < source)))
|| losers[pos].inf || sup) || losers[pos].inf || sup)
{ {
std::swap(losers[pos].key, key); std::swap(losers[pos].key, key);
...@@ -285,9 +287,9 @@ template<typename T, typename Comparator = std::less<T> > ...@@ -285,9 +287,9 @@ template<typename T, typename Comparator = std::less<T> >
{ {
unsigned int left = init_winner (2 * root); unsigned int left = init_winner (2 * root);
unsigned int right = init_winner (2 * root + 1); unsigned int right = init_winner (2 * root + 1);
if (losers[right].sup || if (losers[right].sup
(!losers[left].sup || (!losers[left].sup
&& !comp(losers[right].key, losers[left].key))) && !comp(losers[right].key, losers[left].key)))
{ {
// Left one is less or equal. // Left one is less or equal.
losers[root] = losers[right]; losers[root] = losers[right];
...@@ -345,7 +347,7 @@ template<typename T, typename Comparator = std::less<T> > ...@@ -345,7 +347,7 @@ template<typename T, typename Comparator = std::less<T> >
unsigned int right = init_winner (2 * root + 1); unsigned int right = init_winner (2 * root + 1);
if (losers[right].sup if (losers[right].sup
|| (!losers[left].sup || (!losers[left].sup
&& !comp(losers[right].key, losers[left].key))) && !comp(losers[right].key, losers[left].key)))
{ {
// Left one is less or equal. // Left one is less or equal.
losers[root] = losers[right]; losers[root] = losers[right];
...@@ -443,7 +445,7 @@ template<typename T, typename Comparator = std::less<T> > ...@@ -443,7 +445,7 @@ template<typename T, typename Comparator = std::less<T> >
#ifndef COPY #ifndef COPY
keys = new T[ik]; keys = new T[ik];
#endif #endif
for (unsigned int i = ik - 1; i < k; i++) for (unsigned int i = ik - 1; i < k; ++i)
losers[i + k].sup = true; losers[i + k].sup = true;
} }
...@@ -569,11 +571,11 @@ template<typename T, typename Comparator = std::less<T> > ...@@ -569,11 +571,11 @@ template<typename T, typename Comparator = std::less<T> >
for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2) for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
{ {
// The smaller one gets promoted, ties are broken by source. // The smaller one gets promoted, ties are broken by source.
if ( (sup && (!losers[pos].sup || losers[pos].source < source)) || if ((sup && (!losers[pos].sup || losers[pos].source < source))
(!sup && !losers[pos].sup && || (!sup && !losers[pos].sup
((comp(KEY(pos), KEY_SOURCE(source))) || && ((comp(KEY(pos), KEY_SOURCE(source)))
(!comp(KEY_SOURCE(source), KEY(pos)) || (!comp(KEY_SOURCE(source), KEY(pos))
&& losers[pos].source < source)))) && losers[pos].source < source))))
{ {
// The other one is smaller. // The other one is smaller.
std::swap(losers[pos].sup, sup); std::swap(losers[pos].sup, sup);
...@@ -629,7 +631,7 @@ template<typename T, typename Comparator = std::less<T> > ...@@ -629,7 +631,7 @@ template<typename T, typename Comparator = std::less<T> >
k = 1 << (log2(ik - 1) + 1); k = 1 << (log2(ik - 1) + 1);
offset = k; offset = k;
losers = new Loser[k * 2]; losers = new Loser[k * 2];
for (unsigned int i = ik - 1; i < k; i++) for (unsigned int i = ik - 1; i < k; ++i)
losers[i + k].sup = true; losers[i + k].sup = true;
} }
...@@ -746,11 +748,11 @@ template<typename T, typename Comparator = std::less<T> > ...@@ -746,11 +748,11 @@ template<typename T, typename Comparator = std::less<T> >
for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2) for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
{ {
// The smaller one gets promoted, ties are broken by source. // The smaller one gets promoted, ties are broken by source.
if ( (sup && (!losers[pos].sup || losers[pos].source < source)) || if ( (sup && (!losers[pos].sup || losers[pos].source < source))
(!sup && !losers[pos].sup && || (!sup && !losers[pos].sup &&
((comp(*losers[pos].keyp, *keyp)) || ((comp(*losers[pos].keyp, *keyp))
(!comp(*keyp, *losers[pos].keyp) || (!comp(*keyp, *losers[pos].keyp)
&& losers[pos].source < source)))) && losers[pos].source < source))))
{ {
// The other one is smaller. // The other one is smaller.
std::swap(losers[pos].sup, sup); std::swap(losers[pos].sup, sup);
...@@ -995,8 +997,8 @@ template<typename T, typename Comparator = std::less<T> > ...@@ -995,8 +997,8 @@ template<typename T, typename Comparator = std::less<T> >
// Next greater or equal power of 2. // Next greater or equal power of 2.
unsigned int division = 1 << (log2(end - begin - 1)); unsigned int division = 1 << (log2(end - begin - 1));
unsigned int left = init_winner(2 * root, begin, begin + division); unsigned int left = init_winner(2 * root, begin, begin + division);
unsigned int right unsigned int right = init_winner(2 * root + 1,
= init_winner(2 * root + 1, begin + division, end); begin + division, end);
if (!comp(*losers[right].keyp, *losers[left].keyp)) if (!comp(*losers[right].keyp, *losers[left].keyp))
{ {
// Left one is less or equal. // Left one is less or equal.
......
...@@ -74,7 +74,7 @@ namespace __gnu_parallel ...@@ -74,7 +74,7 @@ namespace __gnu_parallel
*target++ = *begin2++; *target++ = *begin2++;
else else
*target++ = *begin1++; *target++ = *begin1++;
max_length--; --max_length;
} }
if (begin1 != end1) if (begin1 != end1)
...@@ -143,8 +143,8 @@ namespace __gnu_parallel ...@@ -143,8 +143,8 @@ namespace __gnu_parallel
*target = element1; *target = element1;
target++; ++target;
max_length--; --max_length;
} }
if (begin1 != end1) if (begin1 != end1)
{ {
......
...@@ -212,7 +212,7 @@ namespace __gnu_parallel ...@@ -212,7 +212,7 @@ namespace __gnu_parallel
difference_type localrank = rank * m / N ; difference_type localrank = rank * m / N ;
int j; int j;
for (j = 0; j < localrank && ((n + 1) <= ns[sample[j].second]); j++) for (j = 0; j < localrank && ((n + 1) <= ns[sample[j].second]); ++j)
a[sample[j].second] += n + 1; a[sample[j].second] += n + 1;
for (; j < m; j++) for (; j < m; j++)
b[sample[j].second] -= n + 1; b[sample[j].second] -= n + 1;
...@@ -279,7 +279,7 @@ namespace __gnu_parallel ...@@ -279,7 +279,7 @@ namespace __gnu_parallel
if (b[i] < ns[i]) if (b[i] < ns[i])
pq.push(std::make_pair(S(i)[b[i]], i)); pq.push(std::make_pair(S(i)[b[i]], i));
for (; skew != 0 && !pq.empty(); skew--) for (; skew != 0 && !pq.empty(); --skew)
{ {
int source = pq.top().second; int source = pq.top().second;
pq.pop(); pq.pop();
...@@ -302,7 +302,7 @@ namespace __gnu_parallel ...@@ -302,7 +302,7 @@ namespace __gnu_parallel
if (a[i] > 0) if (a[i] > 0)
pq.push(std::make_pair(S(i)[a[i] - 1], i)); pq.push(std::make_pair(S(i)[a[i] - 1], i));
for (; skew != 0; skew++) for (; skew != 0; ++skew)
{ {
int source = pq.top().second; int source = pq.top().second;
pq.pop(); pq.pop();
...@@ -416,7 +416,7 @@ namespace __gnu_parallel ...@@ -416,7 +416,7 @@ namespace __gnu_parallel
ns[0] = std::distance(begin_seqs[0].first, begin_seqs[0].second); ns[0] = std::distance(begin_seqs[0].first, begin_seqs[0].second);
nmax = ns[0]; nmax = ns[0];
for (int i = 0; i < m; i++) for (int i = 0; i < m; ++i)
{ {
ns[i] = std::distance(begin_seqs[i].first, begin_seqs[i].second); ns[i] = std::distance(begin_seqs[i].first, begin_seqs[i].second);
nmax = std::max(nmax, ns[i]); nmax = std::max(nmax, ns[i]);
...@@ -431,7 +431,7 @@ namespace __gnu_parallel ...@@ -431,7 +431,7 @@ namespace __gnu_parallel
// From now on, including padding. // From now on, including padding.
N = l * m; N = l * m;
for (int i = 0; i < m; i++) for (int i = 0; i < m; ++i)
{ {
a[i] = 0; a[i] = 0;
b[i] = l; b[i] = l;
...@@ -460,9 +460,9 @@ namespace __gnu_parallel ...@@ -460,9 +460,9 @@ namespace __gnu_parallel
difference_type localrank = rank * m / N ; difference_type localrank = rank * m / N ;
int j; int j;
for (j = 0; j < localrank && ((n + 1) <= ns[sample[j].second]); j++) for (j = 0; j < localrank && ((n + 1) <= ns[sample[j].second]); ++j)
a[sample[j].second] += n + 1; a[sample[j].second] += n + 1;
for (; j < m; j++) for (; j < m; ++j)
b[sample[j].second] -= n + 1; b[sample[j].second] -= n + 1;
// Further refinement. // Further refinement.
...@@ -471,7 +471,7 @@ namespace __gnu_parallel ...@@ -471,7 +471,7 @@ namespace __gnu_parallel
n /= 2; n /= 2;
const T* lmax = NULL; const T* lmax = NULL;
for (int i = 0; i < m; i++) for (int i = 0; i < m; ++i)
{ {
if (a[i] > 0) if (a[i] > 0)
{ {
...@@ -496,7 +496,7 @@ namespace __gnu_parallel ...@@ -496,7 +496,7 @@ namespace __gnu_parallel
} }
difference_type leftsize = 0, total = 0; difference_type leftsize = 0, total = 0;
for (int i = 0; i < m; i++) for (int i = 0; i < m; ++i)
{ {
leftsize += a[i] / (n + 1); leftsize += a[i] / (n + 1);
total += l / (n + 1); total += l / (n + 1);
...@@ -512,7 +512,7 @@ namespace __gnu_parallel ...@@ -512,7 +512,7 @@ namespace __gnu_parallel
std::vector<std::pair<T, int> >, std::vector<std::pair<T, int> >,
lexicographic_reverse<T, int, Comparator> > pq(lrcomp); lexicographic_reverse<T, int, Comparator> > pq(lrcomp);
for (int i = 0; i < m; i++) for (int i = 0; i < m; ++i)
if (b[i] < ns[i]) if (b[i] < ns[i])
pq.push(std::make_pair(S(i)[b[i]], i)); pq.push(std::make_pair(S(i)[b[i]], i));
...@@ -535,7 +535,7 @@ namespace __gnu_parallel ...@@ -535,7 +535,7 @@ namespace __gnu_parallel
std::vector<std::pair<T, int> >, std::vector<std::pair<T, int> >,
lexicographic<T, int, Comparator> > pq(lcomp); lexicographic<T, int, Comparator> > pq(lcomp);
for (int i = 0; i < m; i++) for (int i = 0; i < m; ++i)
if (a[i] > 0) if (a[i] > 0)
pq.push(std::make_pair(S(i)[a[i] - 1], i)); pq.push(std::make_pair(S(i)[a[i] - 1], i));
...@@ -566,7 +566,7 @@ namespace __gnu_parallel ...@@ -566,7 +566,7 @@ namespace __gnu_parallel
// Impossible to avoid the warning? // Impossible to avoid the warning?
T maxleft, minright; T maxleft, minright;
for (int i = 0; i < m; i++) for (int i = 0; i < m; ++i)
{ {
if (a[i] > 0) if (a[i] > 0)
{ {
...@@ -610,7 +610,7 @@ namespace __gnu_parallel ...@@ -610,7 +610,7 @@ namespace __gnu_parallel
// We have to calculate an offset. // We have to calculate an offset.
offset = 0; offset = 0;
for (int i = 0; i < m; i++) for (int i = 0; i < m; ++i)
{ {
difference_type lb = std::lower_bound(S(i), S(i) + ns[i], difference_type lb = std::lower_bound(S(i), S(i) + ns[i],
minright, minright,
......
...@@ -120,7 +120,7 @@ template<typename RandomAccessIterator> ...@@ -120,7 +120,7 @@ template<typename RandomAccessIterator>
* @param num_samples Number of samples to select. * @param num_samples Number of samples to select.
*/ */
template<typename RandomAccessIterator, typename _DifferenceTp> template<typename RandomAccessIterator, typename _DifferenceTp>
inline void void
determine_samples(PMWMSSortingData<RandomAccessIterator>* sd, determine_samples(PMWMSSortingData<RandomAccessIterator>* sd,
_DifferenceTp& num_samples) _DifferenceTp& num_samples)
{ {
...@@ -138,9 +138,9 @@ template<typename RandomAccessIterator, typename _DifferenceTp> ...@@ -138,9 +138,9 @@ template<typename RandomAccessIterator, typename _DifferenceTp>
equally_split(sd->starts[iam + 1] - sd->starts[iam], equally_split(sd->starts[iam + 1] - sd->starts[iam],
num_samples + 1, es); num_samples + 1, es);
for (difference_type i = 0; i < num_samples; i++) for (difference_type i = 0; i < num_samples; ++i)
::new(&(sd->samples[iam * num_samples + i])) value_type( ::new(&(sd->samples[iam * num_samples + i]))
sd->source[sd->starts[iam] + es[i + 1]]); value_type(sd->source[sd->starts[iam] + es[i + 1]]);
delete[] es; delete[] es;
} }
...@@ -150,7 +150,7 @@ template<typename RandomAccessIterator, typename _DifferenceTp> ...@@ -150,7 +150,7 @@ template<typename RandomAccessIterator, typename _DifferenceTp>
* @param comp Comparator. * @param comp Comparator.
*/ */
template<typename RandomAccessIterator, typename Comparator> template<typename RandomAccessIterator, typename Comparator>
inline void void
parallel_sort_mwms_pu(PMWMSSortingData<RandomAccessIterator>* sd, parallel_sort_mwms_pu(PMWMSSortingData<RandomAccessIterator>* sd,
Comparator& comp) Comparator& comp)
{ {
...@@ -209,7 +209,7 @@ template<typename RandomAccessIterator, typename Comparator> ...@@ -209,7 +209,7 @@ template<typename RandomAccessIterator, typename Comparator>
# pragma omp barrier # pragma omp barrier
for (int s = 0; s < sd->num_threads; s++) for (int s = 0; s < sd->num_threads; ++s)
{ {
// For each sequence. // For each sequence.
if (num_samples * iam > 0) if (num_samples * iam > 0)
...@@ -243,7 +243,7 @@ template<typename RandomAccessIterator, typename Comparator> ...@@ -243,7 +243,7 @@ template<typename RandomAccessIterator, typename Comparator>
std::vector<std::pair<SortingPlacesIterator, SortingPlacesIterator> > std::vector<std::pair<SortingPlacesIterator, SortingPlacesIterator> >
seqs(sd->num_threads); seqs(sd->num_threads);
for (int s = 0; s < sd->num_threads; s++) for (int s = 0; s < sd->num_threads; ++s)
seqs[s] = std::make_pair(sd->sorting_places[s], seqs[s] = std::make_pair(sd->sorting_places[s],
sd->sorting_places[s] sd->sorting_places[s]
+ (sd->starts[s + 1] - sd->starts[s])); + (sd->starts[s + 1] - sd->starts[s]));
...@@ -255,20 +255,20 @@ template<typename RandomAccessIterator, typename Comparator> ...@@ -255,20 +255,20 @@ template<typename RandomAccessIterator, typename Comparator>
multiseq_partition(seqs.begin(), seqs.end(), multiseq_partition(seqs.begin(), seqs.end(),
sd->starts[iam + 1], offsets.begin(), comp); sd->starts[iam + 1], offsets.begin(), comp);
for (int seq = 0; seq < sd->num_threads; seq++) for (int seq = 0; seq < sd->num_threads; ++seq)
{ {
// for each sequence // for each sequence
if (iam < (sd->num_threads - 1)) if (iam < (sd->num_threads - 1))
sd->pieces[iam][seq].end = offsets[seq] - seqs[seq].first; sd->pieces[iam][seq].end = offsets[seq] - seqs[seq].first;
else else
// very end of this sequence // very end of this sequence
sd->pieces[iam][seq].end = sd->pieces[iam][seq].end = (sd->starts[seq + 1]
sd->starts[seq + 1] - sd->starts[seq]; - sd->starts[seq]);
} }
# pragma omp barrier # pragma omp barrier
for (int seq = 0; seq < sd->num_threads; seq++) for (int seq = 0; seq < sd->num_threads; ++seq)
{ {
// For each sequence. // For each sequence.
if (iam > 0) if (iam > 0)
...@@ -281,7 +281,7 @@ template<typename RandomAccessIterator, typename Comparator> ...@@ -281,7 +281,7 @@ template<typename RandomAccessIterator, typename Comparator>
// Offset from target begin, length after merging. // Offset from target begin, length after merging.
difference_type offset = 0, length_am = 0; difference_type offset = 0, length_am = 0;
for (int s = 0; s < sd->num_threads; s++) for (int s = 0; s < sd->num_threads; ++s)
{ {
length_am += sd->pieces[iam][s].end - sd->pieces[iam][s].begin; length_am += sd->pieces[iam][s].end - sd->pieces[iam][s].begin;
offset += sd->pieces[iam][s].begin; offset += sd->pieces[iam][s].begin;
...@@ -293,8 +293,8 @@ template<typename RandomAccessIterator, typename Comparator> ...@@ -293,8 +293,8 @@ template<typename RandomAccessIterator, typename Comparator>
// instead of the assignment operator. // instead of the assignment operator.
// XXX incorrect (de)construction // XXX incorrect (de)construction
sd->merging_places[iam] = sd->temporaries[iam] = sd->merging_places[iam] = sd->temporaries[iam] =
static_cast<value_type*>( static_cast<value_type*>(::operator new(sizeof(value_type)
::operator new(sizeof(value_type) * length_am)); * length_am));
#else #else
// Merge directly to target. // Merge directly to target.
sd->merging_places[iam] = sd->source + offset; sd->merging_places[iam] = sd->source + offset;
...@@ -302,11 +302,11 @@ template<typename RandomAccessIterator, typename Comparator> ...@@ -302,11 +302,11 @@ template<typename RandomAccessIterator, typename Comparator>
std::vector<std::pair<SortingPlacesIterator, SortingPlacesIterator> > std::vector<std::pair<SortingPlacesIterator, SortingPlacesIterator> >
seqs(sd->num_threads); seqs(sd->num_threads);
for (int s = 0; s < sd->num_threads; s++) for (int s = 0; s < sd->num_threads; ++s)
{ {
seqs[s] = seqs[s] =
std::make_pair(sd->sorting_places[s] + sd->pieces[iam][s].begin, std::make_pair(sd->sorting_places[s] + sd->pieces[iam][s].begin,
sd->sorting_places[s] + sd->pieces[iam][s].end); sd->sorting_places[s] + sd->pieces[iam][s].end);
} }
multiway_merge(seqs.begin(), seqs.end(), sd->merging_places[iam], comp, multiway_merge(seqs.begin(), seqs.end(), sd->merging_places[iam], comp,
...@@ -333,13 +333,11 @@ template<typename RandomAccessIterator, typename Comparator> ...@@ -333,13 +333,11 @@ template<typename RandomAccessIterator, typename Comparator>
* @param stable Stable sorting. * @param stable Stable sorting.
*/ */
template<typename RandomAccessIterator, typename Comparator> template<typename RandomAccessIterator, typename Comparator>
inline void void
parallel_sort_mwms(RandomAccessIterator begin, RandomAccessIterator end, parallel_sort_mwms(RandomAccessIterator begin, RandomAccessIterator end,
Comparator comp, Comparator comp, typename
typename std::iterator_traits<RandomAccessIterator> std::iterator_traits<RandomAccessIterator>::
::difference_type n, difference_type n, int num_threads, bool stable)
int num_threads,
bool stable)
{ {
_GLIBCXX_CALL(n) _GLIBCXX_CALL(n)
...@@ -382,14 +380,14 @@ template<typename RandomAccessIterator, typename Comparator> ...@@ -382,14 +380,14 @@ template<typename RandomAccessIterator, typename Comparator>
(Settings::sort_mwms_oversampling * num_threads - 1) (Settings::sort_mwms_oversampling * num_threads - 1)
* num_threads; * num_threads;
sd.samples = static_cast<value_type*>( sd.samples = static_cast<value_type*>(
::operator new(size * sizeof(value_type))); ::operator new(size * sizeof(value_type)));
} }
else else
sd.samples = NULL; sd.samples = NULL;
sd.offsets = new difference_type[num_threads - 1]; sd.offsets = new difference_type[num_threads - 1];
sd.pieces = new std::vector<Piece<difference_type> >[num_threads]; sd.pieces = new std::vector<Piece<difference_type> >[num_threads];
for (int s = 0; s < num_threads; s++) for (int s = 0; s < num_threads; ++s)
sd.pieces[s].resize(num_threads); sd.pieces[s].resize(num_threads);
starts = sd.starts = new difference_type[num_threads + 1]; starts = sd.starts = new difference_type[num_threads + 1];
sd.stable = stable; sd.stable = stable;
...@@ -397,7 +395,7 @@ template<typename RandomAccessIterator, typename Comparator> ...@@ -397,7 +395,7 @@ template<typename RandomAccessIterator, typename Comparator>
difference_type chunk_length = n / num_threads; difference_type chunk_length = n / num_threads;
difference_type split = n % num_threads; difference_type split = n % num_threads;
difference_type pos = 0; difference_type pos = 0;
for (int i = 0; i < num_threads; i++) for (int i = 0; i < num_threads; ++i)
{ {
starts[i] = pos; starts[i] = pos;
pos += (i < split) ? (chunk_length + 1) : chunk_length; pos += (i < split) ? (chunk_length + 1) : chunk_length;
......
// -*- C++ -*- // -*- C++ -*-
// Copyright (C) 2007 Free Software Foundation, Inc. // Copyright (C) 2007, 2008 Free Software Foundation, Inc.
// //
// This file is part of the GNU ISO C++ Library. This library is free // This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the terms // software; you can redistribute it and/or modify it under the terms
...@@ -65,17 +65,18 @@ namespace __gnu_parallel ...@@ -65,17 +65,18 @@ namespace __gnu_parallel
* @return User-supplied functor (that may contain a part of the result). * @return User-supplied functor (that may contain a part of the result).
*/ */
template<typename RandomAccessIterator, template<typename RandomAccessIterator,
typename Op, typename Op,
typename Fu, typename Fu,
typename Red, typename Red,
typename Result> typename Result>
Op Op
for_each_template_random_access_omp_loop( for_each_template_random_access_omp_loop(RandomAccessIterator begin,
RandomAccessIterator begin, RandomAccessIterator end,
RandomAccessIterator end, Op o, Fu& f, Red r, Result base,
Op o, Fu& f, Red r, Result base, Result& output, Result& output,
typename std::iterator_traits<RandomAccessIterator>:: typename std::iterator_traits
difference_type bound) <RandomAccessIterator>::
difference_type bound)
{ {
typedef typename typedef typename
std::iterator_traits<RandomAccessIterator>::difference_type std::iterator_traits<RandomAccessIterator>::difference_type
...@@ -83,7 +84,7 @@ template<typename RandomAccessIterator, ...@@ -83,7 +84,7 @@ template<typename RandomAccessIterator,
difference_type length = end - begin; difference_type length = end - begin;
thread_index_t num_threads = thread_index_t num_threads =
__gnu_parallel::min<difference_type>(get_max_threads(), length); __gnu_parallel::min<difference_type>(get_max_threads(), length);
Result *thread_results; Result *thread_results;
...@@ -94,19 +95,19 @@ template<typename RandomAccessIterator, ...@@ -94,19 +95,19 @@ template<typename RandomAccessIterator,
num_threads = omp_get_num_threads(); num_threads = omp_get_num_threads();
thread_results = new Result[num_threads]; thread_results = new Result[num_threads];
for (thread_index_t i = 0; i < num_threads; i++) for (thread_index_t i = 0; i < num_threads; ++i)
thread_results[i] = Result(); thread_results[i] = Result();
} }
thread_index_t iam = omp_get_thread_num(); thread_index_t iam = omp_get_thread_num();
# pragma omp for schedule(dynamic, Settings::workstealing_chunk_size) # pragma omp for schedule(dynamic, Settings::workstealing_chunk_size)
for (difference_type pos = 0; pos < length; pos++) for (difference_type pos = 0; pos < length; ++pos)
thread_results[iam] = thread_results[iam] =
r(thread_results[iam], f(o, begin+pos)); r(thread_results[iam], f(o, begin+pos));
} //parallel } //parallel
for (thread_index_t i = 0; i < num_threads; i++) for (thread_index_t i = 0; i < num_threads; ++i)
output = r(output, thread_results[i]); output = r(output, thread_results[i]);
delete [] thread_results; delete [] thread_results;
......
// -*- C++ -*- // -*- C++ -*-
// Copyright (C) 2007 Free Software Foundation, Inc. // Copyright (C) 2007, 2008 Free Software Foundation, Inc.
// //
// This file is part of the GNU ISO C++ Library. This library is free // This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the terms // software; you can redistribute it and/or modify it under the terms
...@@ -65,25 +65,26 @@ namespace __gnu_parallel ...@@ -65,25 +65,26 @@ namespace __gnu_parallel
* @return User-supplied functor (that may contain a part of the result). * @return User-supplied functor (that may contain a part of the result).
*/ */
template<typename RandomAccessIterator, template<typename RandomAccessIterator,
typename Op, typename Op,
typename Fu, typename Fu,
typename Red, typename Red,
typename Result> typename Result>
Op Op
for_each_template_random_access_omp_loop_static( for_each_template_random_access_omp_loop_static(RandomAccessIterator begin,
RandomAccessIterator begin, RandomAccessIterator end,
RandomAccessIterator end, Op o, Fu& f, Red r,
Op o, Fu& f, Red r, Result base, Result& output, Result base, Result& output,
typename std::iterator_traits<RandomAccessIterator>:: typename std::iterator_traits
difference_type bound) <RandomAccessIterator>::
difference_type bound)
{ {
typedef typename typedef typename
std::iterator_traits<RandomAccessIterator>::difference_type std::iterator_traits<RandomAccessIterator>::difference_type
difference_type; difference_type;
difference_type length = end - begin; difference_type length = end - begin;
thread_index_t num_threads = thread_index_t num_threads =
std::min<difference_type>(get_max_threads(), length); std::min<difference_type>(get_max_threads(), length);
Result *thread_results; Result *thread_results;
...@@ -94,20 +95,19 @@ template<typename RandomAccessIterator, ...@@ -94,20 +95,19 @@ template<typename RandomAccessIterator,
num_threads = omp_get_num_threads(); num_threads = omp_get_num_threads();
thread_results = new Result[num_threads]; thread_results = new Result[num_threads];
for (thread_index_t i = 0; i < num_threads; i++) for (thread_index_t i = 0; i < num_threads; ++i)
thread_results[i] = Result(); thread_results[i] = Result();
} }
thread_index_t iam = omp_get_thread_num(); thread_index_t iam = omp_get_thread_num();
# pragma omp for schedule(static, Settings::workstealing_chunk_size) # pragma omp for schedule(static, Settings::workstealing_chunk_size)
for (difference_type pos = 0; pos < length; pos++) for (difference_type pos = 0; pos < length; ++pos)
thread_results[iam] = thread_results[iam] = r(thread_results[iam], f(o, begin+pos));
r(thread_results[iam], f(o, begin+pos));
} //parallel } //parallel
for (thread_index_t i = 0; i < num_threads; i++) for (thread_index_t i = 0; i < num_threads; ++i)
output = r(output, thread_results[i]); output = r(output, thread_results[i]);
delete [] thread_results; delete [] thread_results;
......
// -*- C++ -*- // -*- C++ -*-
// Copyright (C) 2007 Free Software Foundation, Inc. // Copyright (C) 2007, 2008 Free Software Foundation, Inc.
// //
// This file is part of the GNU ISO C++ Library. This library is free // This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the terms // software; you can redistribute it and/or modify it under the terms
...@@ -64,19 +64,19 @@ namespace __gnu_parallel ...@@ -64,19 +64,19 @@ namespace __gnu_parallel
* std::count_n()). * std::count_n()).
* @return User-supplied functor (that may contain a part of the result). * @return User-supplied functor (that may contain a part of the result).
*/ */
template< template<typename RandomAccessIterator,
typename RandomAccessIterator, typename Op,
typename Op, typename Fu,
typename Fu, typename Red,
typename Red, typename Result>
typename Result>
Op Op
for_each_template_random_access_ed( for_each_template_random_access_ed(RandomAccessIterator begin,
RandomAccessIterator begin, RandomAccessIterator end,
RandomAccessIterator end, Op o, Fu& f, Red r, Result base,
Op o, Fu& f, Red r, Result base, Result& output, Result& output,
typename std::iterator_traits<RandomAccessIterator>:: typename std::iterator_traits
difference_type bound) <RandomAccessIterator>::
difference_type bound)
{ {
typedef std::iterator_traits<RandomAccessIterator> traits_type; typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::difference_type difference_type; typedef typename traits_type::difference_type difference_type;
...@@ -85,7 +85,7 @@ template< ...@@ -85,7 +85,7 @@ template<
Result *thread_results; Result *thread_results;
thread_index_t num_threads = thread_index_t num_threads =
__gnu_parallel::min<difference_type>(get_max_threads(), length); __gnu_parallel::min<difference_type>(get_max_threads(), length);
# pragma omp parallel num_threads(num_threads) # pragma omp parallel num_threads(num_threads)
{ {
...@@ -116,7 +116,7 @@ template< ...@@ -116,7 +116,7 @@ template<
thread_results[iam] = reduct; thread_results[iam] = reduct;
} //parallel } //parallel
for (thread_index_t i = 0; i < num_threads; i++) for (thread_index_t i = 0; i < num_threads; ++i)
output = r(output, thread_results[i]); output = r(output, thread_results[i]);
// Points to last element processed (needed as return value for // Points to last element processed (needed as return value for
......
...@@ -57,15 +57,14 @@ namespace __gnu_parallel ...@@ -57,15 +57,14 @@ namespace __gnu_parallel
* @param value Start value. Must be passed since the neutral * @param value Start value. Must be passed since the neutral
* element is unknown in general. * element is unknown in general.
* @return End iterator of output sequence. */ * @return End iterator of output sequence. */
template< template<typename InputIterator,
typename InputIterator, typename OutputIterator,
typename OutputIterator, typename BinaryOperation>
typename BinaryOperation> OutputIterator
inline OutputIterator parallel_partial_sum_basecase(InputIterator begin, InputIterator end,
parallel_partial_sum_basecase( OutputIterator result, BinaryOperation bin_op,
InputIterator begin, InputIterator end, typename std::iterator_traits
OutputIterator result, BinaryOperation bin_op, <InputIterator>::value_type value)
typename std::iterator_traits<InputIterator>::value_type value)
{ {
if (begin == end) if (begin == end)
return result; return result;
...@@ -90,15 +89,14 @@ template< ...@@ -90,15 +89,14 @@ template<
* @param num_threads Number of threads to use. * @param num_threads Number of threads to use.
* @return End iterator of output sequence. * @return End iterator of output sequence.
*/ */
template< template<typename InputIterator,
typename InputIterator, typename OutputIterator,
typename OutputIterator, typename BinaryOperation>
typename BinaryOperation>
OutputIterator OutputIterator
parallel_partial_sum_linear( parallel_partial_sum_linear(InputIterator begin, InputIterator end,
InputIterator begin, InputIterator end, OutputIterator result, BinaryOperation bin_op,
OutputIterator result, BinaryOperation bin_op, typename std::iterator_traits
typename std::iterator_traits<InputIterator>::difference_type n) <InputIterator>::difference_type n)
{ {
typedef std::iterator_traits<InputIterator> traits_type; typedef std::iterator_traits<InputIterator> traits_type;
typedef typename traits_type::value_type value_type; typedef typename traits_type::value_type value_type;
...@@ -133,9 +131,10 @@ template< ...@@ -133,9 +131,10 @@ template<
else else
{ {
difference_type chunk_length = difference_type chunk_length =
((double)n / ((double)n
((double)num_threads + Settings::partial_sum_dilatation)), / ((double)num_threads
borderstart = n - num_threads * chunk_length; + Settings::partial_sum_dilatation)),
borderstart = n - num_threads * chunk_length;
borders[0] = 0; borders[0] = 0;
for (int i = 1; i < (num_threads + 1); ++i) for (int i = 1; i < (num_threads + 1); ++i)
{ {
...@@ -145,8 +144,8 @@ template< ...@@ -145,8 +144,8 @@ template<
borders[num_threads + 1] = n; borders[num_threads + 1] = n;
} }
sums = static_cast<value_type*>( sums = static_cast<value_type*>(::operator new(sizeof(value_type)
::operator new(sizeof(value_type) * num_threads)); * num_threads));
OutputIterator target_end; OutputIterator target_end;
} //single } //single
...@@ -155,16 +154,17 @@ template< ...@@ -155,16 +154,17 @@ template<
{ {
*result = *begin; *result = *begin;
parallel_partial_sum_basecase(begin + 1, begin + borders[1], parallel_partial_sum_basecase(begin + 1, begin + borders[1],
result + 1, bin_op, *begin); result + 1, bin_op, *begin);
::new(&(sums[iam])) value_type(*(result + borders[1] - 1)); ::new(&(sums[iam])) value_type(*(result + borders[1] - 1));
} }
else else
{ {
::new(&(sums[iam])) value_type( ::new(&(sums[iam]))
std::accumulate(begin + borders[iam] + 1, value_type(std::accumulate(begin + borders[iam] + 1,
begin + borders[iam + 1], begin + borders[iam + 1],
*(begin + borders[iam]), *(begin + borders[iam]),
bin_op, __gnu_parallel::sequential_tag())); bin_op,
__gnu_parallel::sequential_tag()));
} }
# pragma omp barrier # pragma omp barrier
...@@ -177,9 +177,9 @@ template< ...@@ -177,9 +177,9 @@ template<
// Still same team. // Still same team.
parallel_partial_sum_basecase(begin + borders[iam + 1], parallel_partial_sum_basecase(begin + borders[iam + 1],
begin + borders[iam + 2], begin + borders[iam + 2],
result + borders[iam + 1], bin_op, result + borders[iam + 1], bin_op,
sums[iam]); sums[iam]);
} //parallel } //parallel
::operator delete(sums); ::operator delete(sums);
...@@ -194,10 +194,9 @@ template< ...@@ -194,10 +194,9 @@ template<
* @param result Begin iterator of output sequence. * @param result Begin iterator of output sequence.
* @param bin_op Associative binary function. * @param bin_op Associative binary function.
* @return End iterator of output sequence. */ * @return End iterator of output sequence. */
template< template<typename InputIterator,
typename InputIterator, typename OutputIterator,
typename OutputIterator, typename BinaryOperation>
typename BinaryOperation>
OutputIterator OutputIterator
parallel_partial_sum(InputIterator begin, InputIterator end, parallel_partial_sum(InputIterator begin, InputIterator end,
OutputIterator result, BinaryOperation bin_op) OutputIterator result, BinaryOperation bin_op)
......
// -*- C++ -*- // -*- C++ -*-
// Copyright (C) 2007 Free Software Foundation, Inc. // Copyright (C) 2007, 2008 Free Software Foundation, Inc.
// //
// This file is part of the GNU ISO C++ Library. This library is free // This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the terms // software; you can redistribute it and/or modify it under the terms
...@@ -92,10 +92,11 @@ template<typename RandomAccessIterator, typename Predicate> ...@@ -92,10 +92,11 @@ template<typename RandomAccessIterator, typename Predicate>
reserved_right = new bool[num_threads]; reserved_right = new bool[num_threads];
if (Settings::partition_chunk_share > 0.0) if (Settings::partition_chunk_share > 0.0)
chunk_size = std::max<difference_type>( chunk_size = std::max<difference_type>(Settings::
Settings::partition_chunk_size, partition_chunk_size,
(double)n * Settings::partition_chunk_share / (double)n * Settings::
(double)num_threads); partition_chunk_share
/ (double)num_threads);
else else
chunk_size = Settings::partition_chunk_size; chunk_size = Settings::partition_chunk_size;
} }
...@@ -106,7 +107,7 @@ template<typename RandomAccessIterator, typename Predicate> ...@@ -106,7 +107,7 @@ template<typename RandomAccessIterator, typename Predicate>
{ {
difference_type num_chunks = (right - left + 1) / chunk_size; difference_type num_chunks = (right - left + 1) / chunk_size;
for (int r = 0; r < num_threads; r++) for (int r = 0; r < num_threads; ++r)
{ {
reserved_left[r] = false; reserved_left[r] = false;
reserved_right[r] = false; reserved_right[r] = false;
...@@ -164,10 +165,10 @@ template<typename RandomAccessIterator, typename Predicate> ...@@ -164,10 +165,10 @@ template<typename RandomAccessIterator, typename Predicate>
{ {
while (pred(begin[thread_left]) while (pred(begin[thread_left])
&& thread_left <= thread_left_border) && thread_left <= thread_left_border)
thread_left++; ++thread_left;
while (!pred(begin[thread_right]) while (!pred(begin[thread_right])
&& thread_right >= thread_right_border) && thread_right >= thread_right_border)
thread_right--; --thread_right;
if (thread_left > thread_left_border if (thread_left > thread_left_border
|| thread_right < thread_right_border) || thread_right < thread_right_border)
...@@ -175,18 +176,18 @@ template<typename RandomAccessIterator, typename Predicate> ...@@ -175,18 +176,18 @@ template<typename RandomAccessIterator, typename Predicate>
break; break;
std::swap(begin[thread_left], begin[thread_right]); std::swap(begin[thread_left], begin[thread_right]);
thread_left++; ++thread_left;
thread_right--; --thread_right;
} }
} }
// Now swap the leftover chunks to the right places. // Now swap the leftover chunks to the right places.
if (thread_left <= thread_left_border) if (thread_left <= thread_left_border)
# pragma omp atomic # pragma omp atomic
leftover_left++; ++leftover_left;
if (thread_right >= thread_right_border) if (thread_right >= thread_right_border)
# pragma omp atomic # pragma omp atomic
leftover_right++; ++leftover_right;
# pragma omp barrier # pragma omp barrier
...@@ -212,9 +213,8 @@ template<typename RandomAccessIterator, typename Predicate> ...@@ -212,9 +213,8 @@ template<typename RandomAccessIterator, typename Predicate>
&& thread_right_border <= rightnew) && thread_right_border <= rightnew)
{ {
// Chunk already in place, reserve spot. // Chunk already in place, reserve spot.
reserved_right reserved_right[((thread_right_border - 1) - right)
[((thread_right_border - 1) - right) / chunk_size] / chunk_size] = true;
= true;
} }
# pragma omp barrier # pragma omp barrier
...@@ -225,7 +225,7 @@ template<typename RandomAccessIterator, typename Predicate> ...@@ -225,7 +225,7 @@ template<typename RandomAccessIterator, typename Predicate>
// Find spot and swap. // Find spot and swap.
difference_type swapstart = -1; difference_type swapstart = -1;
omp_set_lock(&result_lock); omp_set_lock(&result_lock);
for (int r = 0; r < leftover_left; r++) for (int r = 0; r < leftover_left; ++r)
if (!reserved_left[r]) if (!reserved_left[r])
{ {
reserved_left[r] = true; reserved_left[r] = true;
...@@ -238,10 +238,10 @@ template<typename RandomAccessIterator, typename Predicate> ...@@ -238,10 +238,10 @@ template<typename RandomAccessIterator, typename Predicate>
_GLIBCXX_PARALLEL_ASSERT(swapstart != -1); _GLIBCXX_PARALLEL_ASSERT(swapstart != -1);
#endif #endif
std::swap_ranges( std::swap_ranges(begin + thread_left_border
begin + thread_left_border - (chunk_size - 1), - (chunk_size - 1),
begin + thread_left_border + 1, begin + thread_left_border + 1,
begin + swapstart); begin + swapstart);
} }
if (thread_right >= thread_right_border if (thread_right >= thread_right_border
...@@ -250,7 +250,7 @@ template<typename RandomAccessIterator, typename Predicate> ...@@ -250,7 +250,7 @@ template<typename RandomAccessIterator, typename Predicate>
// Find spot and swap // Find spot and swap
difference_type swapstart = -1; difference_type swapstart = -1;
omp_set_lock(&result_lock); omp_set_lock(&result_lock);
for (int r = 0; r < leftover_right; r++) for (int r = 0; r < leftover_right; ++r)
if (!reserved_right[r]) if (!reserved_right[r])
{ {
reserved_right[r] = true; reserved_right[r] = true;
...@@ -264,17 +264,17 @@ template<typename RandomAccessIterator, typename Predicate> ...@@ -264,17 +264,17 @@ template<typename RandomAccessIterator, typename Predicate>
#endif #endif
std::swap_ranges(begin + thread_right_border, std::swap_ranges(begin + thread_right_border,
begin + thread_right_border + chunk_size, begin + thread_right_border + chunk_size,
begin + swapstart); begin + swapstart);
} }
#if _GLIBCXX_ASSERTIONS #if _GLIBCXX_ASSERTIONS
# pragma omp barrier # pragma omp barrier
# pragma omp single # pragma omp single
{ {
for (int r = 0; r < leftover_left; r++) for (int r = 0; r < leftover_left; ++r)
_GLIBCXX_PARALLEL_ASSERT(reserved_left[r]); _GLIBCXX_PARALLEL_ASSERT(reserved_left[r]);
for (int r = 0; r < leftover_right; r++) for (int r = 0; r < leftover_right; ++r)
_GLIBCXX_PARALLEL_ASSERT(reserved_right[r]); _GLIBCXX_PARALLEL_ASSERT(reserved_right[r]);
} }
...@@ -295,17 +295,17 @@ template<typename RandomAccessIterator, typename Predicate> ...@@ -295,17 +295,17 @@ template<typename RandomAccessIterator, typename Predicate>
{ {
// Go right until key is geq than pivot. // Go right until key is geq than pivot.
while (pred(begin[final_left]) && final_left < final_right) while (pred(begin[final_left]) && final_left < final_right)
final_left++; ++final_left;
// Go left until key is less than pivot. // Go left until key is less than pivot.
while (!pred(begin[final_right]) && final_left < final_right) while (!pred(begin[final_right]) && final_left < final_right)
final_right--; --final_right;
if (final_left == final_right) if (final_left == final_right)
break; break;
std::swap(begin[final_left], begin[final_right]); std::swap(begin[final_left], begin[final_right]);
final_left++; ++final_left;
final_right--; --final_right;
} }
// All elements on the left side are < piv, all elements on the // All elements on the left side are < piv, all elements on the
...@@ -345,7 +345,8 @@ template<typename RandomAccessIterator, typename Comparator> ...@@ -345,7 +345,8 @@ template<typename RandomAccessIterator, typename Comparator>
RandomAccessIterator split; RandomAccessIterator split;
random_number rng; random_number rng;
difference_type minimum_length = std::max<difference_type>(2, Settings::partition_minimal_n); difference_type minimum_length =
std::max<difference_type>(2, Settings::partition_minimal_n);
// Break if input range to small. // Break if input range to small.
while (static_cast<sequence_index_t>(end - begin) >= minimum_length) while (static_cast<sequence_index_t>(end - begin) >= minimum_length)
...@@ -359,15 +360,19 @@ template<typename RandomAccessIterator, typename Comparator> ...@@ -359,15 +360,19 @@ template<typename RandomAccessIterator, typename Comparator>
std::swap(*pivot_pos, *(end - 1)); std::swap(*pivot_pos, *(end - 1));
pivot_pos = end - 1; pivot_pos = end - 1;
// XXX Comparator must have first_value_type, second_value_type, result_type // XXX Comparator must have first_value_type, second_value_type,
// Comparator == __gnu_parallel::lexicographic<S, int, __gnu_parallel::less<S, S> > // result_type
// Comparator == __gnu_parallel::lexicographic<S, int,
// __gnu_parallel::less<S, S> >
// pivot_pos == std::pair<S, int>* // pivot_pos == std::pair<S, int>*
// XXX binder2nd only for RandomAccessIterators?? // XXX binder2nd only for RandomAccessIterators??
__gnu_parallel::binder2nd<Comparator, value_type, value_type, bool> pred(comp, *pivot_pos); __gnu_parallel::binder2nd<Comparator, value_type, value_type, bool>
pred(comp, *pivot_pos);
// Divide, leave pivot unchanged in last place. // Divide, leave pivot unchanged in last place.
RandomAccessIterator split_pos1, split_pos2; RandomAccessIterator split_pos1, split_pos2;
split_pos1 = begin + parallel_partition(begin, end - 1, pred, get_max_threads()); split_pos1 = begin + parallel_partition(begin, end - 1, pred,
get_max_threads());
// Left side: < pivot_pos; right side: >= pivot_pos // Left side: < pivot_pos; right side: >= pivot_pos
...@@ -377,14 +382,19 @@ template<typename RandomAccessIterator, typename Comparator> ...@@ -377,14 +382,19 @@ template<typename RandomAccessIterator, typename Comparator>
pivot_pos = split_pos1; pivot_pos = split_pos1;
// In case all elements are equal, split_pos1 == 0 // In case all elements are equal, split_pos1 == 0
if ((split_pos1 + 1 - begin) < (n >> 7) || (end - split_pos1) < (n >> 7)) if ((split_pos1 + 1 - begin) < (n >> 7)
|| (end - split_pos1) < (n >> 7))
{ {
// Very unequal split, one part smaller than one 128th // Very unequal split, one part smaller than one 128th
// elements not stricly larger than the pivot. // elements not stricly larger than the pivot.
__gnu_parallel::unary_negate<__gnu_parallel::binder1st<Comparator, value_type, value_type, bool>, value_type> pred(__gnu_parallel::binder1st<Comparator, value_type, value_type, bool>(comp, *pivot_pos)); __gnu_parallel::unary_negate<__gnu_parallel::
binder1st<Comparator, value_type, value_type, bool>, value_type>
pred(__gnu_parallel::binder1st<Comparator, value_type,
value_type, bool>(comp, *pivot_pos));
// Find other end of pivot-equal range. // Find other end of pivot-equal range.
split_pos2 = __gnu_sequential::partition(split_pos1 + 1, end, pred); split_pos2 = __gnu_sequential::partition(split_pos1 + 1,
end, pred);
} }
else else
// Only skip the pivot. // Only skip the pivot.
...@@ -410,7 +420,9 @@ template<typename RandomAccessIterator, typename Comparator> ...@@ -410,7 +420,9 @@ template<typename RandomAccessIterator, typename Comparator>
* @param comp Comparator. */ * @param comp Comparator. */
template<typename RandomAccessIterator, typename Comparator> template<typename RandomAccessIterator, typename Comparator>
void void
parallel_partial_sort(RandomAccessIterator begin, RandomAccessIterator middle, RandomAccessIterator end, Comparator comp) parallel_partial_sort(RandomAccessIterator begin,
RandomAccessIterator middle,
RandomAccessIterator end, Comparator comp)
{ {
parallel_nth_element(begin, middle, end, comp); parallel_nth_element(begin, middle, end, comp);
std::sort(begin, middle, comp); std::sort(begin, middle, comp);
......
// -*- C++ -*- // -*- C++ -*-
// Copyright (C) 2007 Free Software Foundation, Inc. // Copyright (C) 2007, 2008 Free Software Foundation, Inc.
// //
// This file is part of the GNU ISO C++ Library. This library is free // This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the terms // software; you can redistribute it and/or modify it under the terms
...@@ -55,96 +55,98 @@ namespace __gnu_parallel ...@@ -55,96 +55,98 @@ namespace __gnu_parallel
* Calling them would not make sense in a concurrent setting. * Calling them would not make sense in a concurrent setting.
* @param T Contained element type. */ * @param T Contained element type. */
template<typename T> template<typename T>
class RestrictedBoundedConcurrentQueue class RestrictedBoundedConcurrentQueue
{
private:
/** @brief Array of elements, seen as cyclic buffer. */
T* base;
/** @brief Maximal number of elements contained at the same time. */
sequence_index_t max_size;
/** @brief Cyclic begin and end pointers contained in one
atomically changeable value. */
_GLIBCXX_VOLATILE lcas_t borders;
public:
/** @brief Constructor. Not to be called concurrent, of course.
* @param max_size Maximal number of elements to be contained. */
RestrictedBoundedConcurrentQueue(sequence_index_t max_size)
{ {
this->max_size = max_size; private:
base = new T[max_size]; /** @brief Array of elements, seen as cyclic buffer. */
borders = encode2(0, 0); T* base;
/** @brief Maximal number of elements contained at the same time. */
sequence_index_t max_size;
/** @brief Cyclic begin and end pointers contained in one
atomically changeable value. */
_GLIBCXX_VOLATILE lcas_t borders;
public:
/** @brief Constructor. Not to be called concurrent, of course.
* @param max_size Maximal number of elements to be contained. */
RestrictedBoundedConcurrentQueue(sequence_index_t max_size)
{
this->max_size = max_size;
base = new T[max_size];
borders = encode2(0, 0);
#pragma omp flush #pragma omp flush
} }
/** @brief Destructor. Not to be called concurrent, of course. */ /** @brief Destructor. Not to be called concurrent, of course. */
~RestrictedBoundedConcurrentQueue() ~RestrictedBoundedConcurrentQueue()
{ { delete[] base; }
delete[] base;
} /** @brief Pushes one element into the queue at the front end.
* Must not be called concurrently with pop_front(). */
/** @brief Pushes one element into the queue at the front end. void
* Must not be called concurrently with pop_front(). */ push_front(const T& t)
void push_front(const T& t) {
{ lcas_t former_borders = borders;
lcas_t former_borders = borders; int former_front, former_back;
int former_front, former_back; decode2(former_borders, former_front, former_back);
decode2(former_borders, former_front, former_back); *(base + former_front % max_size) = t;
*(base + former_front % max_size) = t;
#if _GLIBCXX_ASSERTIONS #if _GLIBCXX_ASSERTIONS
// Otherwise: front - back > max_size eventually. // Otherwise: front - back > max_size eventually.
_GLIBCXX_PARALLEL_ASSERT(((former_front + 1) - former_back) <= max_size); _GLIBCXX_PARALLEL_ASSERT(((former_front + 1) - former_back)
<= max_size);
#endif #endif
fetch_and_add(&borders, encode2(1, 0)); fetch_and_add(&borders, encode2(1, 0));
} }
/** @brief Pops one element from the queue at the front end. /** @brief Pops one element from the queue at the front end.
* Must not be called concurrently with pop_front(). */ * Must not be called concurrently with pop_front(). */
bool pop_front(T& t) bool
{ pop_front(T& t)
int former_front, former_back; {
int former_front, former_back;
#pragma omp flush #pragma omp flush
decode2(borders, former_front, former_back); decode2(borders, former_front, former_back);
while (former_front > former_back) while (former_front > former_back)
{ {
// Chance. // Chance.
lcas_t former_borders = encode2(former_front, former_back); lcas_t former_borders = encode2(former_front, former_back);
lcas_t new_borders = encode2(former_front - 1, former_back); lcas_t new_borders = encode2(former_front - 1, former_back);
if (compare_and_swap(&borders, former_borders, new_borders)) if (compare_and_swap(&borders, former_borders, new_borders))
{ {
t = *(base + (former_front - 1) % max_size); t = *(base + (former_front - 1) % max_size);
return true; return true;
} }
#pragma omp flush #pragma omp flush
decode2(borders, former_front, former_back); decode2(borders, former_front, former_back);
} }
return false; return false;
} }
/** @brief Pops one element from the queue at the front end. /** @brief Pops one element from the queue at the front end.
* Must not be called concurrently with pop_front(). */ * Must not be called concurrently with pop_front(). */
bool pop_back(T& t) //queue behavior bool
{ pop_back(T& t) //queue behavior
int former_front, former_back; {
int former_front, former_back;
#pragma omp flush #pragma omp flush
decode2(borders, former_front, former_back); decode2(borders, former_front, former_back);
while (former_front > former_back) while (former_front > former_back)
{ {
// Chance. // Chance.
lcas_t former_borders = encode2(former_front, former_back); lcas_t former_borders = encode2(former_front, former_back);
lcas_t new_borders = encode2(former_front, former_back + 1); lcas_t new_borders = encode2(former_front, former_back + 1);
if (compare_and_swap(&borders, former_borders, new_borders)) if (compare_and_swap(&borders, former_borders, new_borders))
{ {
t = *(base + former_back % max_size); t = *(base + former_back % max_size);
return true; return true;
} }
#pragma omp flush #pragma omp flush
decode2(borders, former_front, former_back); decode2(borders, former_front, former_back);
} }
return false; return false;
} }
}; };
} //namespace __gnu_parallel } //namespace __gnu_parallel
......
...@@ -53,48 +53,46 @@ namespace __gnu_parallel ...@@ -53,48 +53,46 @@ namespace __gnu_parallel
* this part. * this part.
*/ */
template<typename RandomAccessIterator, typename Comparator> template<typename RandomAccessIterator, typename Comparator>
inline typename std::iterator_traits<RandomAccessIterator>::difference_type
typename std::iterator_traits<RandomAccessIterator>::difference_type parallel_sort_qs_divide(RandomAccessIterator begin,
parallel_sort_qs_divide( RandomAccessIterator end,
RandomAccessIterator begin, Comparator comp, typename std::iterator_traits
RandomAccessIterator end, <RandomAccessIterator>::difference_type pivot_rank,
Comparator comp, typename std::iterator_traits
typename std::iterator_traits<RandomAccessIterator>::difference_type <RandomAccessIterator>::difference_type
pivot_rank, num_samples, thread_index_t num_threads)
typename std::iterator_traits<RandomAccessIterator>::difference_type {
num_samples, typedef std::iterator_traits<RandomAccessIterator> traits_type;
thread_index_t num_threads) typedef typename traits_type::value_type value_type;
{ typedef typename traits_type::difference_type difference_type;
typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::value_type value_type; difference_type n = end - begin;
typedef typename traits_type::difference_type difference_type; num_samples = std::min(num_samples, n);
difference_type n = end - begin; // Allocate uninitialized, to avoid default constructor.
num_samples = std::min(num_samples, n); value_type* samples =
static_cast<value_type*>(::operator new(num_samples
// Allocate uninitialized, to avoid default constructor. * sizeof(value_type)));
value_type* samples = static_cast<value_type*>(
::operator new(num_samples * sizeof(value_type))); for (difference_type s = 0; s < num_samples; ++s)
{
for (difference_type s = 0; s < num_samples; ++s) const unsigned long long index = static_cast<unsigned long long>(s)
{ * n / num_samples;
const unsigned long long index = static_cast<unsigned long long>(s) ::new(&(samples[s])) value_type(begin[index]);
* n / num_samples; }
::new(&(samples[s])) value_type(begin[index]);
}
__gnu_sequential::sort(samples, samples + num_samples, comp); __gnu_sequential::sort(samples, samples + num_samples, comp);
value_type& pivot = samples[pivot_rank * num_samples / n]; value_type& pivot = samples[pivot_rank * num_samples / n];
__gnu_parallel::binder2nd<Comparator, value_type, value_type, bool> __gnu_parallel::binder2nd<Comparator, value_type, value_type, bool>
pred(comp, pivot); pred(comp, pivot);
difference_type split = parallel_partition(begin, end, pred, num_threads); difference_type split = parallel_partition(begin, end, pred, num_threads);
::operator delete(samples); ::operator delete(samples);
return split; return split;
} }
/** @brief Unbalanced quicksort conquer step. /** @brief Unbalanced quicksort conquer step.
* @param begin Begin iterator of subsequence. * @param begin Begin iterator of subsequence.
...@@ -104,50 +102,51 @@ namespace __gnu_parallel ...@@ -104,50 +102,51 @@ namespace __gnu_parallel
* this part. * this part.
*/ */
template<typename RandomAccessIterator, typename Comparator> template<typename RandomAccessIterator, typename Comparator>
inline void void
parallel_sort_qs_conquer(RandomAccessIterator begin, parallel_sort_qs_conquer(RandomAccessIterator begin,
RandomAccessIterator end, RandomAccessIterator end,
Comparator comp, Comparator comp,
thread_index_t num_threads) thread_index_t num_threads)
{ {
typedef std::iterator_traits<RandomAccessIterator> traits_type; typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::value_type value_type; typedef typename traits_type::value_type value_type;
typedef typename traits_type::difference_type difference_type; typedef typename traits_type::difference_type difference_type;
if (num_threads <= 1) if (num_threads <= 1)
{ {
__gnu_sequential::sort(begin, end, comp); __gnu_sequential::sort(begin, end, comp);
return; return;
} }
difference_type n = end - begin, pivot_rank; difference_type n = end - begin, pivot_rank;
if (n <= 1) if (n <= 1)
return; return;
thread_index_t num_threads_left; thread_index_t num_threads_left;
if ((num_threads % 2) == 1) if ((num_threads % 2) == 1)
num_threads_left = num_threads / 2 + 1; num_threads_left = num_threads / 2 + 1;
else else
num_threads_left = num_threads / 2; num_threads_left = num_threads / 2;
pivot_rank = n * num_threads_left / num_threads; pivot_rank = n * num_threads_left / num_threads;
difference_type split = parallel_sort_qs_divide( difference_type split =
begin, end, comp, pivot_rank, parallel_sort_qs_divide(begin, end, comp, pivot_rank,
Settings::sort_qs_num_samples_preset, num_threads); Settings::sort_qs_num_samples_preset,
num_threads);
#pragma omp parallel sections #pragma omp parallel sections
{ {
#pragma omp section #pragma omp section
parallel_sort_qs_conquer(begin, begin + split, parallel_sort_qs_conquer(begin, begin + split,
comp, num_threads_left); comp, num_threads_left);
#pragma omp section #pragma omp section
parallel_sort_qs_conquer(begin + split, end, parallel_sort_qs_conquer(begin + split, end,
comp, num_threads - num_threads_left); comp, num_threads - num_threads_left);
}
} }
}
...@@ -160,34 +159,33 @@ namespace __gnu_parallel ...@@ -160,34 +159,33 @@ namespace __gnu_parallel
* this part. * this part.
*/ */
template<typename RandomAccessIterator, typename Comparator> template<typename RandomAccessIterator, typename Comparator>
inline void void
parallel_sort_qs( parallel_sort_qs(RandomAccessIterator begin,
RandomAccessIterator begin, RandomAccessIterator end,
RandomAccessIterator end, Comparator comp, typename std::iterator_traits
Comparator comp, <RandomAccessIterator>::difference_type n,
typename std::iterator_traits<RandomAccessIterator>::difference_type n, int num_threads)
int num_threads) {
{ _GLIBCXX_CALL(n)
_GLIBCXX_CALL(n)
typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef std::iterator_traits<RandomAccessIterator> traits_type; typedef typename traits_type::value_type value_type;
typedef typename traits_type::value_type value_type; typedef typename traits_type::difference_type difference_type;
typedef typename traits_type::difference_type difference_type;
if (n == 0)
if (n == 0) return;
return;
// At least one element per processor.
// At least one element per processor. if (num_threads > n)
if (num_threads > n) num_threads = static_cast<thread_index_t>(n);
num_threads = static_cast<thread_index_t>(n);
Settings::sort_qs_num_samples_preset = 100;
Settings::sort_qs_num_samples_preset = 100;
// Hard to avoid.
// Hard to avoid. omp_set_num_threads(num_threads);
omp_set_num_threads(num_threads);
parallel_sort_qs_conquer(begin, begin + n, comp, num_threads);
parallel_sort_qs_conquer(begin, begin + n, comp, num_threads); }
}
} //namespace __gnu_parallel } //namespace __gnu_parallel
......
...@@ -124,7 +124,7 @@ template<typename RandomNumberGenerator> ...@@ -124,7 +124,7 @@ template<typename RandomNumberGenerator>
/** @brief Random shuffle code executed by each thread. /** @brief Random shuffle code executed by each thread.
* @param pus Array of thread-local data records. */ * @param pus Array of thread-local data records. */
template<typename RandomAccessIterator, typename RandomNumberGenerator> template<typename RandomAccessIterator, typename RandomNumberGenerator>
inline void void
parallel_random_shuffle_drs_pu(DRSSorterPU<RandomAccessIterator, parallel_random_shuffle_drs_pu(DRSSorterPU<RandomAccessIterator,
RandomNumberGenerator>* pus) RandomNumberGenerator>* pus)
{ {
...@@ -213,8 +213,8 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator> ...@@ -213,8 +213,8 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator>
thread_index_t target_p = bin_proc[target_bin]; thread_index_t target_p = bin_proc[target_bin];
// Last column [d->num_threads] stays unchanged. // Last column [d->num_threads] stays unchanged.
::new(&(temporaries[target_p][dist[target_bin + 1]++])) value_type( ::new(&(temporaries[target_p][dist[target_bin + 1]++]))
*(source + i + start)); value_type(*(source + i + start));
} }
delete[] oracles; delete[] oracles;
...@@ -260,13 +260,13 @@ template<typename T> ...@@ -260,13 +260,13 @@ template<typename T>
* @param rng Random number generator to use. * @param rng Random number generator to use.
*/ */
template<typename RandomAccessIterator, typename RandomNumberGenerator> template<typename RandomAccessIterator, typename RandomNumberGenerator>
inline void void
parallel_random_shuffle_drs( parallel_random_shuffle_drs(RandomAccessIterator begin,
RandomAccessIterator begin, RandomAccessIterator end,
RandomAccessIterator end, typename std::iterator_traits
typename std::iterator_traits<RandomAccessIterator>::difference_type n, <RandomAccessIterator>::difference_type n,
thread_index_t num_threads, thread_index_t num_threads,
RandomNumberGenerator& rng) RandomNumberGenerator& rng)
{ {
typedef std::iterator_traits<RandomAccessIterator> traits_type; typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::value_type value_type; typedef typename traits_type::value_type value_type;
...@@ -393,7 +393,7 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator> ...@@ -393,7 +393,7 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator>
* @param rng Random number generator to use. * @param rng Random number generator to use.
*/ */
template<typename RandomAccessIterator, typename RandomNumberGenerator> template<typename RandomAccessIterator, typename RandomNumberGenerator>
inline void void
sequential_random_shuffle(RandomAccessIterator begin, sequential_random_shuffle(RandomAccessIterator begin,
RandomAccessIterator end, RandomAccessIterator end,
RandomNumberGenerator& rng) RandomNumberGenerator& rng)
......
// -*- C++ -*- // -*- C++ -*-
// Copyright (C) 2007 Free Software Foundation, Inc. // Copyright (C) 2007, 2008 Free Software Foundation, Inc.
// //
// This file is part of the GNU ISO C++ Library. This library is free // This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the terms // software; you can redistribute it and/or modify it under the terms
...@@ -81,10 +81,9 @@ template<typename RandomAccessIterator, typename _DifferenceTp> ...@@ -81,10 +81,9 @@ template<typename RandomAccessIterator, typename _DifferenceTp>
* @param end2 End iterator of second sequence. * @param end2 End iterator of second sequence.
* @param pred Find predicate. * @param pred Find predicate.
* @return Place of finding in first sequences. */ * @return Place of finding in first sequences. */
template< template<typename _RandomAccessIterator1,
typename _RandomAccessIterator1, typename _RandomAccessIterator2,
typename _RandomAccessIterator2, typename Pred>
typename Pred>
_RandomAccessIterator1 _RandomAccessIterator1
search_template(_RandomAccessIterator1 begin1, _RandomAccessIterator1 end1, search_template(_RandomAccessIterator1 begin1, _RandomAccessIterator1 end1,
_RandomAccessIterator2 begin2, _RandomAccessIterator2 end2, _RandomAccessIterator2 begin2, _RandomAccessIterator2 end2,
......
// -*- C++ -*- // -*- C++ -*-
// Copyright (C) 2007 Free Software Foundation, Inc. // Copyright (C) 2007, 2008 Free Software Foundation, Inc.
// //
// This file is part of the GNU ISO C++ Library. This library is free // This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the terms // software; you can redistribute it and/or modify it under the terms
...@@ -48,7 +48,7 @@ ...@@ -48,7 +48,7 @@
namespace __gnu_parallel namespace __gnu_parallel
{ {
template<typename InputIterator, typename OutputIterator> template<typename InputIterator, typename OutputIterator>
inline OutputIterator OutputIterator
copy_tail(std::pair<InputIterator, InputIterator> b, copy_tail(std::pair<InputIterator, InputIterator> b,
std::pair<InputIterator, InputIterator> e, OutputIterator r) std::pair<InputIterator, InputIterator> e, OutputIterator r)
{ {
...@@ -68,10 +68,9 @@ template<typename InputIterator, typename OutputIterator> ...@@ -68,10 +68,9 @@ template<typename InputIterator, typename OutputIterator>
return r; return r;
} }
template< template<typename InputIterator,
typename InputIterator, typename OutputIterator,
typename OutputIterator, typename Comparator>
typename Comparator>
struct symmetric_difference_func struct symmetric_difference_func
{ {
typedef std::iterator_traits<InputIterator> traits_type; typedef std::iterator_traits<InputIterator> traits_type;
...@@ -82,9 +81,10 @@ template< ...@@ -82,9 +81,10 @@ template<
Comparator comp; Comparator comp;
inline OutputIterator invoke(InputIterator a, InputIterator b, OutputIterator
InputIterator c, InputIterator d, invoke(InputIterator a, InputIterator b,
OutputIterator r) const InputIterator c, InputIterator d,
OutputIterator r) const
{ {
while (a != b && c != d) while (a != b && c != d)
{ {
...@@ -109,9 +109,9 @@ template< ...@@ -109,9 +109,9 @@ template<
return std::copy(c, d, std::copy(a, b, r)); return std::copy(c, d, std::copy(a, b, r));
} }
inline difference_type difference_type
count(InputIterator a, InputIterator b, InputIterator c, InputIterator d) count(InputIterator a, InputIterator b,
const InputIterator c, InputIterator d) const
{ {
difference_type counter = 0; difference_type counter = 0;
...@@ -137,21 +137,19 @@ template< ...@@ -137,21 +137,19 @@ template<
return counter + (b - a) + (d - c); return counter + (b - a) + (d - c);
} }
inline OutputIterator OutputIterator
first_empty(InputIterator c, InputIterator d, OutputIterator out) const first_empty(InputIterator c, InputIterator d, OutputIterator out) const
{ return std::copy(c, d, out); } { return std::copy(c, d, out); }
inline OutputIterator OutputIterator
second_empty(InputIterator a, InputIterator b, OutputIterator out) const second_empty(InputIterator a, InputIterator b, OutputIterator out) const
{ return std::copy(a, b, out); } { return std::copy(a, b, out); }
}; };
template< template<typename InputIterator,
typename InputIterator, typename OutputIterator,
typename OutputIterator, typename Comparator>
typename Comparator>
struct difference_func struct difference_func
{ {
typedef std::iterator_traits<InputIterator> traits_type; typedef std::iterator_traits<InputIterator> traits_type;
...@@ -162,7 +160,7 @@ template< ...@@ -162,7 +160,7 @@ template<
Comparator comp; Comparator comp;
inline OutputIterator OutputIterator
invoke(InputIterator a, InputIterator b, InputIterator c, InputIterator d, invoke(InputIterator a, InputIterator b, InputIterator c, InputIterator d,
OutputIterator r) const OutputIterator r) const
{ {
...@@ -185,9 +183,9 @@ template< ...@@ -185,9 +183,9 @@ template<
return std::copy(a, b, r); return std::copy(a, b, r);
} }
inline difference_type difference_type
count(InputIterator a, InputIterator b, InputIterator c, InputIterator d) count(InputIterator a, InputIterator b,
const InputIterator c, InputIterator d) const
{ {
difference_type counter = 0; difference_type counter = 0;
...@@ -217,10 +215,9 @@ template< ...@@ -217,10 +215,9 @@ template<
}; };
template< template<typename InputIterator,
typename InputIterator, typename OutputIterator,
typename OutputIterator, typename Comparator>
typename Comparator>
struct intersection_func struct intersection_func
{ {
typedef std::iterator_traits<InputIterator> traits_type; typedef std::iterator_traits<InputIterator> traits_type;
...@@ -231,7 +228,7 @@ template< ...@@ -231,7 +228,7 @@ template<
Comparator comp; Comparator comp;
inline OutputIterator OutputIterator
invoke(InputIterator a, InputIterator b, InputIterator c, InputIterator d, invoke(InputIterator a, InputIterator b, InputIterator c, InputIterator d,
OutputIterator r) const OutputIterator r) const
{ {
...@@ -253,9 +250,9 @@ template< ...@@ -253,9 +250,9 @@ template<
return r; return r;
} }
inline difference_type difference_type
count(InputIterator a, InputIterator b, InputIterator c, InputIterator d) count(InputIterator a, InputIterator b,
const InputIterator c, InputIterator d) const
{ {
difference_type counter = 0; difference_type counter = 0;
...@@ -289,13 +286,13 @@ template<class InputIterator, class OutputIterator, class Comparator> ...@@ -289,13 +286,13 @@ template<class InputIterator, class OutputIterator, class Comparator>
struct union_func struct union_func
{ {
typedef typename std::iterator_traits<InputIterator>::difference_type typedef typename std::iterator_traits<InputIterator>::difference_type
difference_type; difference_type;
union_func(Comparator c) : comp(c) {} union_func(Comparator c) : comp(c) {}
Comparator comp; Comparator comp;
inline OutputIterator OutputIterator
invoke(InputIterator a, const InputIterator b, InputIterator c, invoke(InputIterator a, const InputIterator b, InputIterator c,
const InputIterator d, OutputIterator r) const const InputIterator d, OutputIterator r) const
{ {
...@@ -322,9 +319,9 @@ template<class InputIterator, class OutputIterator, class Comparator> ...@@ -322,9 +319,9 @@ template<class InputIterator, class OutputIterator, class Comparator>
return std::copy(c, d, std::copy(a, b, r)); return std::copy(c, d, std::copy(a, b, r));
} }
inline difference_type difference_type
count(InputIterator a, InputIterator b, InputIterator c, InputIterator d) count(InputIterator a, InputIterator b,
const InputIterator c, InputIterator d) const
{ {
difference_type counter = 0; difference_type counter = 0;
...@@ -356,10 +353,9 @@ template<class InputIterator, class OutputIterator, class Comparator> ...@@ -356,10 +353,9 @@ template<class InputIterator, class OutputIterator, class Comparator>
{ return std::copy(a, b, out); } { return std::copy(a, b, out); }
}; };
template< template<typename InputIterator,
typename InputIterator, typename OutputIterator,
typename OutputIterator, typename Operation>
typename Operation>
OutputIterator OutputIterator
parallel_set_operation(InputIterator begin1, InputIterator end1, parallel_set_operation(InputIterator begin1, InputIterator end1,
InputIterator begin2, InputIterator end2, InputIterator begin2, InputIterator end2,
...@@ -480,11 +476,10 @@ template< ...@@ -480,11 +476,10 @@ template<
} }
template< template<typename InputIterator,
typename InputIterator, typename OutputIterator,
typename OutputIterator, typename Comparator>
typename Comparator> inline OutputIterator
OutputIterator
parallel_set_union(InputIterator begin1, InputIterator end1, parallel_set_union(InputIterator begin1, InputIterator end1,
InputIterator begin2, InputIterator end2, InputIterator begin2, InputIterator end2,
OutputIterator result, Comparator comp) OutputIterator result, Comparator comp)
...@@ -493,11 +488,10 @@ template< ...@@ -493,11 +488,10 @@ template<
union_func< InputIterator, OutputIterator, Comparator>(comp)); union_func< InputIterator, OutputIterator, Comparator>(comp));
} }
template< template<typename InputIterator,
typename InputIterator, typename OutputIterator,
typename OutputIterator, typename Comparator>
typename Comparator> inline OutputIterator
OutputIterator
parallel_set_intersection(InputIterator begin1, InputIterator end1, parallel_set_intersection(InputIterator begin1, InputIterator end1,
InputIterator begin2, InputIterator end2, InputIterator begin2, InputIterator end2,
OutputIterator result, Comparator comp) OutputIterator result, Comparator comp)
...@@ -508,7 +502,7 @@ template< ...@@ -508,7 +502,7 @@ template<
template<typename InputIterator, typename OutputIterator> template<typename InputIterator, typename OutputIterator>
OutputIterator inline OutputIterator
set_intersection(InputIterator begin1, InputIterator end1, set_intersection(InputIterator begin1, InputIterator end1,
InputIterator begin2, InputIterator end2, InputIterator begin2, InputIterator end2,
OutputIterator result) OutputIterator result)
...@@ -517,14 +511,13 @@ template<typename InputIterator, typename OutputIterator> ...@@ -517,14 +511,13 @@ template<typename InputIterator, typename OutputIterator>
typedef typename traits_type::value_type value_type; typedef typename traits_type::value_type value_type;
return set_intersection(begin1, end1, begin2, end2, result, return set_intersection(begin1, end1, begin2, end2, result,
std::less<value_type>()); std::less<value_type>());
} }
template< template<typename InputIterator,
typename InputIterator, typename OutputIterator,
typename OutputIterator, typename Comparator>
typename Comparator> inline OutputIterator
OutputIterator
parallel_set_difference(InputIterator begin1, InputIterator end1, parallel_set_difference(InputIterator begin1, InputIterator end1,
InputIterator begin2, InputIterator end2, InputIterator begin2, InputIterator end2,
OutputIterator result, Comparator comp) OutputIterator result, Comparator comp)
...@@ -533,11 +526,10 @@ template< ...@@ -533,11 +526,10 @@ template<
difference_func<InputIterator, OutputIterator, Comparator>(comp)); difference_func<InputIterator, OutputIterator, Comparator>(comp));
} }
template< template<typename InputIterator,
typename InputIterator, typename OutputIterator,
typename OutputIterator, typename Comparator>
typename Comparator> inline OutputIterator
OutputIterator
parallel_set_symmetric_difference(InputIterator begin1, InputIterator end1, parallel_set_symmetric_difference(InputIterator begin1, InputIterator end1,
InputIterator begin2, InputIterator end2, InputIterator begin2, InputIterator end2,
OutputIterator result, Comparator comp) OutputIterator result, Comparator comp)
......
// -*- C++ -*- // -*- C++ -*-
// Copyright (C) 2007 Free Software Foundation, Inc. // Copyright (C) 2007, 2008 Free Software Foundation, Inc.
// //
// This file is part of the GNU ISO C++ Library. This library is free // This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the terms // software; you can redistribute it and/or modify it under the terms
...@@ -33,7 +33,8 @@ ...@@ -33,7 +33,8 @@
* whether to use parallelized algorithms. * whether to use parallelized algorithms.
* This file is a GNU parallel extension to the Standard C++ Library. * This file is a GNU parallel extension to the Standard C++ Library.
* *
* @section parallelization_decision The decision whether to run an algorithm in parallel. * @section parallelization_decision The decision whether to run
* an algorithm in parallel.
* *
* There are several ways the user can switch on and off the * There are several ways the user can switch on and off the
* parallel execution of an algorithm, both at compile- and * parallel execution of an algorithm, both at compile- and
...@@ -104,7 +105,10 @@ ...@@ -104,7 +105,10 @@
* __gnu_parallel::Settings::force_parallel, i. e. usually a decision based on * __gnu_parallel::Settings::force_parallel, i. e. usually a decision based on
* the input size. * the input size.
*/ */
#define _GLIBCXX_PARALLEL_CONDITION(c) (!(__gnu_parallel::Settings::force_sequential) && ((__gnu_parallel::get_max_threads() > 1 && (c)) || __gnu_parallel::Settings::force_parallel)) #define _GLIBCXX_PARALLEL_CONDITION(c) \
(!(__gnu_parallel::Settings::force_sequential) \
&& ((__gnu_parallel::get_max_threads() > 1 \
&& (c)) || __gnu_parallel::Settings::force_parallel))
namespace __gnu_parallel namespace __gnu_parallel
{ {
...@@ -131,7 +135,8 @@ namespace ...@@ -131,7 +135,8 @@ namespace
/** @brief Different merging algorithms: bubblesort-alike, /** @brief Different merging algorithms: bubblesort-alike,
loser-tree variants, enum sentinel */ loser-tree variants, enum sentinel */
enum MultiwayMergeAlgorithm enum MultiwayMergeAlgorithm
{ BUBBLE, LOSER_TREE_EXPLICIT, LOSER_TREE, LOSER_TREE_COMBINED, LOSER_TREE_SENTINEL, MWM_ALGORITHM_LAST }; { BUBBLE, LOSER_TREE_EXPLICIT, LOSER_TREE, LOSER_TREE_COMBINED,
LOSER_TREE_SENTINEL, MWM_ALGORITHM_LAST };
/** @brief Different splitting strategies for sorting/merging: /** @brief Different splitting strategies for sorting/merging:
by sampling, exact */ by sampling, exact */
...@@ -340,7 +345,8 @@ namespace ...@@ -340,7 +345,8 @@ namespace
volatile sequence_index_t Settings::partition_chunk_size = 1000; volatile sequence_index_t Settings::partition_chunk_size = 1000;
volatile double Settings::partition_chunk_share = 0.0; volatile double Settings::partition_chunk_share = 0.0;
volatile unsigned int Settings::adjacent_difference_minimal_n = 1000; volatile unsigned int Settings::adjacent_difference_minimal_n = 1000;
volatile Settings::PartialSumAlgorithm Settings::partial_sum_algorithm = Settings::LINEAR; volatile Settings::PartialSumAlgorithm Settings::
partial_sum_algorithm = Settings::LINEAR;
volatile unsigned int Settings::partial_sum_minimal_n = 1000; volatile unsigned int Settings::partial_sum_minimal_n = 1000;
volatile float Settings::partial_sum_dilatation = 1.0f; volatile float Settings::partial_sum_dilatation = 1.0f;
volatile unsigned int Settings::random_shuffle_minimal_n = 1000; volatile unsigned int Settings::random_shuffle_minimal_n = 1000;
...@@ -352,10 +358,13 @@ namespace ...@@ -352,10 +358,13 @@ namespace
// unique copy // unique copy
volatile sequence_index_t Settings::unique_copy_minimal_n = 10000; volatile sequence_index_t Settings::unique_copy_minimal_n = 10000;
volatile Settings::MultiwayMergeAlgorithm Settings::multiway_merge_algorithm = Settings::LOSER_TREE; volatile Settings::MultiwayMergeAlgorithm Settings::
volatile Settings::Splitting Settings::multiway_merge_splitting = Settings::EXACT; multiway_merge_algorithm = Settings::LOSER_TREE;
volatile Settings::Splitting Settings::multiway_merge_splitting =
Settings::EXACT;
volatile unsigned int Settings::multiway_merge_oversampling = 10; volatile unsigned int Settings::multiway_merge_oversampling = 10;
volatile Settings::FindDistribution Settings::find_distribution = Settings::CONSTANT_SIZE_BLOCKS; volatile Settings::FindDistribution Settings::find_distribution =
Settings::CONSTANT_SIZE_BLOCKS;
volatile sequence_index_t Settings::find_sequential_search_size = 256; volatile sequence_index_t Settings::find_sequential_search_size = 256;
volatile sequence_index_t Settings::find_initial_block_size = 256; volatile sequence_index_t Settings::find_initial_block_size = 256;
volatile sequence_index_t Settings::find_maximum_block_size = 8192; volatile sequence_index_t Settings::find_maximum_block_size = 8192;
...@@ -375,7 +384,8 @@ namespace ...@@ -375,7 +384,8 @@ namespace
volatile sequence_index_t Settings::set_union_minimal_n = 1000; volatile sequence_index_t Settings::set_union_minimal_n = 1000;
volatile sequence_index_t Settings::set_intersection_minimal_n = 1000; volatile sequence_index_t Settings::set_intersection_minimal_n = 1000;
volatile sequence_index_t Settings::set_difference_minimal_n = 1000; volatile sequence_index_t Settings::set_difference_minimal_n = 1000;
volatile sequence_index_t Settings::set_symmetric_difference_minimal_n = 1000; volatile sequence_index_t Settings::set_symmetric_difference_minimal_n =
1000;
volatile unsigned long long Settings::L1_cache_size = 16 << 10; volatile unsigned long long Settings::L1_cache_size = 16 << 10;
volatile unsigned long long Settings::L2_cache_size = 256 << 10; volatile unsigned long long Settings::L2_cache_size = 256 << 10;
volatile unsigned int Settings::TLB_size = 128; volatile unsigned int Settings::TLB_size = 128;
......
// -*- C++ -*- // -*- C++ -*-
// Copyright (C) 2007 Free Software Foundation, Inc. // Copyright (C) 2007, 2008 Free Software Foundation, Inc.
// //
// This file is part of the GNU ISO C++ Library. This library is free // This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the terms // software; you can redistribute it and/or modify it under the terms
...@@ -69,36 +69,37 @@ namespace __gnu_parallel ...@@ -69,36 +69,37 @@ namespace __gnu_parallel
* @callgraph * @callgraph
*/ */
template<typename RandomAccessIterator, typename Comparator> template<typename RandomAccessIterator, typename Comparator>
inline void inline void
parallel_sort(RandomAccessIterator begin, RandomAccessIterator end, parallel_sort(RandomAccessIterator begin, RandomAccessIterator end,
Comparator comp, bool stable) Comparator comp, bool stable)
{ {
_GLIBCXX_CALL(end - begin) _GLIBCXX_CALL(end - begin)
typedef std::iterator_traits<RandomAccessIterator> traits_type; typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::value_type value_type; typedef typename traits_type::value_type value_type;
typedef typename traits_type::difference_type difference_type; typedef typename traits_type::difference_type difference_type;
if (begin != end) if (begin != end)
{ {
difference_type n = end - begin; difference_type n = end - begin;
if (false) ; if (false) ;
#if _GLIBCXX_MERGESORT #if _GLIBCXX_MERGESORT
else if (Settings::sort_algorithm == Settings::MWMS || stable) else if (Settings::sort_algorithm == Settings::MWMS || stable)
parallel_sort_mwms(begin, end, comp, n, get_max_threads(), stable); parallel_sort_mwms(begin, end, comp, n, get_max_threads(), stable);
#endif #endif
#if _GLIBCXX_QUICKSORT #if _GLIBCXX_QUICKSORT
else if (Settings::sort_algorithm == Settings::QS && !stable) else if (Settings::sort_algorithm == Settings::QS && !stable)
parallel_sort_qs(begin, end, comp, n, get_max_threads()); parallel_sort_qs(begin, end, comp, n, get_max_threads());
#endif #endif
#if _GLIBCXX_BAL_QUICKSORT #if _GLIBCXX_BAL_QUICKSORT
else if (Settings::sort_algorithm == Settings::QS_BALANCED && !stable) else if (Settings::sort_algorithm == Settings::QS_BALANCED
parallel_sort_qsb(begin, end, comp, n, get_max_threads()); && !stable)
parallel_sort_qsb(begin, end, comp, n, get_max_threads());
#endif #endif
else else
__gnu_sequential::sort(begin, end, comp); __gnu_sequential::sort(begin, end, comp);
} }
} }
} // end namespace __gnu_parallel } // end namespace __gnu_parallel
#endif #endif
This source diff could not be displayed because it is too large. You can view the blob instead.
// -*- C++ -*- // -*- C++ -*-
// Copyright (C) 2007 Free Software Foundation, Inc. // Copyright (C) 2007, 2008 Free Software Foundation, Inc.
// //
// This file is part of the GNU ISO C++ Library. This library is free // This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the terms // software; you can redistribute it and/or modify it under the terms
...@@ -50,11 +50,10 @@ namespace __gnu_parallel ...@@ -50,11 +50,10 @@ namespace __gnu_parallel
* @param result Begin iterator of result sequence. * @param result Begin iterator of result sequence.
* @param binary_pred Equality predicate. * @param binary_pred Equality predicate.
* @return End iterator of result sequence. */ * @return End iterator of result sequence. */
template< template<typename InputIterator,
typename InputIterator, class OutputIterator,
class OutputIterator, class BinaryPredicate>
class BinaryPredicate> OutputIterator
inline OutputIterator
parallel_unique_copy(InputIterator first, InputIterator last, parallel_unique_copy(InputIterator first, InputIterator last,
OutputIterator result, BinaryPredicate binary_pred) OutputIterator result, BinaryPredicate binary_pred)
{ {
...@@ -79,10 +78,10 @@ template< ...@@ -79,10 +78,10 @@ template<
{ {
# pragma omp single # pragma omp single
{ {
num_threads = omp_get_num_threads(); num_threads = omp_get_num_threads();
borders = new difference_type[num_threads + 2]; borders = new difference_type[num_threads + 2];
equally_split(size, num_threads + 1, borders); equally_split(size, num_threads + 1, borders);
counter = new difference_type[num_threads + 1]; counter = new difference_type[num_threads + 1];
} }
thread_index_t iam = omp_get_thread_num(); thread_index_t iam = omp_get_thread_num();
...@@ -99,14 +98,14 @@ template< ...@@ -99,14 +98,14 @@ template<
begin = borders[0] + 1; // == 1 begin = borders[0] + 1; // == 1
end = borders[iam + 1]; end = borders[iam + 1];
i++; ++i;
*out++ = *first; *out++ = *first;
for (InputIterator iter = first + begin; iter < first + end; ++iter) for (InputIterator iter = first + begin; iter < first + end; ++iter)
{ {
if (!binary_pred(*iter, *(iter-1))) if (!binary_pred(*iter, *(iter-1)))
{ {
i++; ++i;
*out++ = *iter; *out++ = *iter;
} }
} }
...@@ -118,11 +117,9 @@ template< ...@@ -118,11 +117,9 @@ template<
for (InputIterator iter = first + begin; iter < first + end; ++iter) for (InputIterator iter = first + begin; iter < first + end; ++iter)
{ {
if (!binary_pred(*iter, *(iter-1))) if (!binary_pred(*iter, *(iter - 1)))
{ ++i;
i++; }
}
}
} }
counter[iam] = i; counter[iam] = i;
...@@ -136,7 +133,7 @@ template< ...@@ -136,7 +133,7 @@ template<
if (iam == 0) if (iam == 0)
{ {
for (int t = 0; t < num_threads; t++) for (int t = 0; t < num_threads; ++t)
begin_output += counter[t]; begin_output += counter[t];
i = 0; i = 0;
...@@ -148,9 +145,9 @@ template< ...@@ -148,9 +145,9 @@ template<
for (InputIterator iter = first + begin; iter < first + end; ++iter) for (InputIterator iter = first + begin; iter < first + end; ++iter)
{ {
if (iter == first || !binary_pred(*iter, *(iter-1))) if (iter == first || !binary_pred(*iter, *(iter - 1)))
{ {
i++; ++i;
*iter_out++ = *iter; *iter_out++ = *iter;
} }
} }
...@@ -166,10 +163,8 @@ template< ...@@ -166,10 +163,8 @@ template<
for (InputIterator iter = first + begin; iter < first + end; ++iter) for (InputIterator iter = first + begin; iter < first + end; ++iter)
{ {
if (!binary_pred(*iter, *(iter-1))) if (!binary_pred(*iter, *(iter-1)))
{ *iter_out++ = *iter;
*iter_out++ = *iter; }
}
}
} }
} }
...@@ -193,8 +188,8 @@ template<typename InputIterator, class OutputIterator> ...@@ -193,8 +188,8 @@ template<typename InputIterator, class OutputIterator>
OutputIterator result) OutputIterator result)
{ {
typedef typename std::iterator_traits<InputIterator>::value_type value_type; typedef typename std::iterator_traits<InputIterator>::value_type value_type;
return parallel_unique_copy(first, last, result,
return parallel_unique_copy(first, last, result, std::equal_to<value_type>()); std::equal_to<value_type>());
} }
}//namespace __gnu_parallel }//namespace __gnu_parallel
......
// -*- C++ -*- // -*- C++ -*-
// Copyright (C) 2007 Free Software Foundation, Inc. // Copyright (C) 2007, 2008 Free Software Foundation, Inc.
// //
// This file is part of the GNU ISO C++ Library. This library is free // This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the terms // software; you can redistribute it and/or modify it under the terms
...@@ -96,20 +96,19 @@ template<typename _DifferenceTp> ...@@ -96,20 +96,19 @@ template<typename _DifferenceTp>
* std::count_n()). * std::count_n()).
* @return User-supplied functor (that may contain a part of the result). * @return User-supplied functor (that may contain a part of the result).
*/ */
template< template<typename RandomAccessIterator,
typename RandomAccessIterator, typename Op,
typename Op, typename Fu,
typename Fu, typename Red,
typename Red, typename Result>
typename Result>
Op Op
for_each_template_random_access_workstealing( for_each_template_random_access_workstealing(RandomAccessIterator begin,
RandomAccessIterator begin, RandomAccessIterator end,
RandomAccessIterator end, Op op, Fu& f, Red r,
Op op, Fu& f, Red r, Result base, Result& output,
Result base, Result& output, typename std::iterator_traits
typename std::iterator_traits<RandomAccessIterator>::difference_type <RandomAccessIterator>::
bound) difference_type bound)
{ {
_GLIBCXX_CALL(end - begin) _GLIBCXX_CALL(end - begin)
...@@ -180,7 +179,7 @@ template< ...@@ -180,7 +179,7 @@ template<
// This thread is currently working. // This thread is currently working.
# pragma omp atomic # pragma omp atomic
busy++; ++busy;
iam_working = true; iam_working = true;
...@@ -198,8 +197,8 @@ template< ...@@ -198,8 +197,8 @@ template<
// Cannot use volatile variable directly. // Cannot use volatile variable directly.
difference_type my_first = my_job.first; difference_type my_first = my_job.first;
result = f(op, begin + my_first); result = f(op, begin + my_first);
my_job.first++; ++my_job.first;
my_job.load--; --my_job.load;
} }
RandomAccessIterator current; RandomAccessIterator current;
...@@ -226,11 +225,11 @@ template< ...@@ -226,11 +225,11 @@ template<
my_job.load = my_job.last - my_job.first + 1; my_job.load = my_job.last - my_job.first + 1;
for (difference_type job_counter = 0; for (difference_type job_counter = 0;
job_counter < chunk_size && current_job <= my_job.last; job_counter < chunk_size && current_job <= my_job.last;
job_counter++) ++job_counter)
{ {
// Yes: process it! // Yes: process it!
current = begin + current_job; current = begin + current_job;
current_job++; ++current_job;
// Do actual work. // Do actual work.
result = r(result, f(op, current)); result = r(result, f(op, current));
...@@ -244,7 +243,7 @@ template< ...@@ -244,7 +243,7 @@ template<
{ {
// This thread no longer has work. // This thread no longer has work.
# pragma omp atomic # pragma omp atomic
busy--; --busy;
iam_working = false; iam_working = false;
} }
...@@ -286,7 +285,7 @@ template< ...@@ -286,7 +285,7 @@ template<
// Has potential work again. // Has potential work again.
# pragma omp atomic # pragma omp atomic
busy++; ++busy;
iam_working = true; iam_working = true;
# pragma omp flush(busy) # pragma omp flush(busy)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment