Commit f9985df5 by Johannes Singler Committed by Johannes Singler

re PR libstdc++/35588 ([parallel mode] parallel std::sort and bind())

2008-04-07  Johannes Singler  <singler@ira.uka.de>

         * include/parallel/multiway_merge.h:
           Moved decisions to compiletime instead of runtime.
         * include/parallel/losertree.h:
           Removed obsolete variants, added variant that uses pointers
           in the loser tree.
         * include/parallel/types.h:
           Remove obsolete settings options from enum.
         * include/parallel/features.h:
           Remove obsolete compile-time switches.
         * include/parallel/compiletime_settings.h:
           Remove obsolete variant that copies back *after* sorting.
         * include/parallel/tags.h:
           Add one new tag for compile-time switch.
         * include/parallel/merge.h:
           Adapt to changes in multiway_merge.h.
         * include/parallel/multiway_mergesort.h:
           Adapt to changes in multiway_merge.h.
           Factor out splitting variants.
           Remove obsolete variant that copies back *after* sorting.
         * include/parallel/sort.h:
           Adapt to changes in multiway_mergesort.h.
         * testsuite/25_algorithms/sort/35588.cc:
           Added test case from / for PR 35588.

From-SVN: r133975
parent 1d4cd3d0
2008-04-07 Johannes Singler <singler@ira.uka.de>
* include/parallel/multiway_merge.h:
Moved decisions to compiletime instead of runtime.
* include/parallel/losertree.h:
Removed obsolete variants, added variant that uses pointers
in the loser tree.
* include/parallel/types.h:
Remove obsolete settings options from enum.
* include/parallel/features.h:
Remove obsolete compile-time switches.
* include/parallel/compiletime_settings.h:
Remove obsolete variant that copies back *after* sorting.
* include/parallel/tags.h:
Add one new tag for compile-time switch.
* include/parallel/merge.h:
Adapt to changes in multiway_merge.h.
* include/parallel/multiway_mergesort.h:
Adapt to changes in multiway_merge.h.
Factor out splitting variants.
Remove obsolete variant that copies back *after* sorting.
* include/parallel/sort.h:
Adapt to changes in multiway_mergesort.h.
* testsuite/25_algorithms/sort/35588.cc:
Added test case from / for PR 35588.
2008-03-29 Paolo Carlini <pcarlini@suse.de> 2008-03-29 Paolo Carlini <pcarlini@suse.de>
PR libstdc++/35725 PR libstdc++/35725
......
...@@ -73,17 +73,9 @@ ...@@ -73,17 +73,9 @@
* __gnu_parallel::parallel_random_shuffle(). */ * __gnu_parallel::parallel_random_shuffle(). */
#define _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 0 #define _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 0
#endif #endif
#ifndef _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB #ifndef _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
/** @brief Switch on many _GLIBCXX_PARALLEL_ASSERTions in parallel code. /** @brief Switch on many _GLIBCXX_PARALLEL_ASSERTions in parallel code.
* Consider the size of the TLB for * Consider the size of the TLB for
* __gnu_parallel::parallel_random_shuffle(). */ * __gnu_parallel::parallel_random_shuffle(). */
#define _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB 0 #define _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB 0
#endif #endif
#ifndef _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST
/** @brief First copy the data, sort it locally, and merge it back
* (0); or copy it back after everything is done (1).
*
* Recommendation: 0 */
#define _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST 0
#endif
...@@ -61,66 +61,6 @@ ...@@ -61,66 +61,6 @@
#define _GLIBCXX_BAL_QUICKSORT 1 #define _GLIBCXX_BAL_QUICKSORT 1
#endif #endif
#ifndef _GLIBCXX_LOSER_TREE
/** @def _GLIBCXX_LOSER_TREE
* @brief Include guarded (sequences may run empty) loser tree,
* moving objects.
* @see __gnu_parallel::_Settings multiway_merge_algorithm */
#define _GLIBCXX_LOSER_TREE 1
#endif
#ifndef _GLIBCXX_LOSER_TREE_EXPLICIT
/** @def _GLIBCXX_LOSER_TREE_EXPLICIT
* @brief Include standard loser tree, storing two flags for infimum
* and supremum.
* @see __gnu_parallel::_Settings multiway_merge_algorithm */
#define _GLIBCXX_LOSER_TREE_EXPLICIT 0
#endif
#ifndef _GLIBCXX_LOSER_TREE_REFERENCE
/** @def _GLIBCXX_LOSER_TREE_REFERENCE
* @brief Include some loser tree variant.
* @see __gnu_parallel::_Settings multiway_merge_algorithm */
#define _GLIBCXX_LOSER_TREE_REFERENCE 0
#endif
#ifndef _GLIBCXX_LOSER_TREE_POINTER
/** @def _GLIBCXX_LOSER_TREE_POINTER
* @brief Include some loser tree variant.
* @see __gnu_parallel::_Settings multiway_merge_algorithm */
#define _GLIBCXX_LOSER_TREE_POINTER 1
#endif
#ifndef _GLIBCXX_LOSER_TREE_UNGUARDED
/** @def _GLIBCXX_LOSER_TREE_UNGUARDED
* @brief Include unguarded (sequences must not run empty) loser
* tree, moving objects.
* @see __gnu_parallel::_Settings multiway_merge_algorithm */
#define _GLIBCXX_LOSER_TREE_UNGUARDED 0
#endif
#ifndef _GLIBCXX_LOSER_TREE_POINTER_UNGUARDED
/** @def _GLIBCXX_LOSER_TREE_POINTER_UNGUARDED
* @brief Include some loser tree variant.
* @see __gnu_parallel::_Settings multiway_merge_algorithm */
#define _GLIBCXX_LOSER_TREE_POINTER_UNGUARDED 1
#endif
#ifndef _GLIBCXX_LOSER_TREE_COMBINED
/** @def _GLIBCXX_LOSER_TREE_COMBINED
* @brief Include some loser tree variant.
* @see __gnu_parallel::_Settings multiway_merge_algorithm */
#define _GLIBCXX_LOSER_TREE_COMBINED 0
#endif
#ifndef _GLIBCXX_LOSER_TREE_SENTINEL
/** @def _GLIBCXX_LOSER_TREE_SENTINEL
* @brief Include some loser tree variant.
* @see __gnu_parallel::_Settings multiway_merge_algorithm */
#define _GLIBCXX_LOSER_TREE_SENTINEL 0
#endif
#ifndef _GLIBCXX_FIND_GROWING_BLOCKS #ifndef _GLIBCXX_FIND_GROWING_BLOCKS
/** @brief Include the growing blocks variant for std::find. /** @brief Include the growing blocks variant for std::find.
* @see __gnu_parallel::_Settings::find_algorithm */ * @see __gnu_parallel::_Settings::find_algorithm */
......
...@@ -47,878 +47,725 @@ ...@@ -47,878 +47,725 @@
namespace __gnu_parallel namespace __gnu_parallel
{ {
#if _GLIBCXX_LOSER_TREE_EXPLICIT /**
* @brief Guarded loser/tournament tree.
/** @brief Guarded loser tree, copying the whole element into the *
* tree structure. * The smallest element is at the top.
* *
* Guarding is done explicitly through two flags per element, inf * Guarding is done explicitly through one flag sup per element,
* and sup This is a quite slow variant. * inf is not needed due to a better initialization routine. This
*/ * is a well-performing variant.
template<typename T, typename Comparator = std::less<T> > *
class LoserTreeExplicit * @param T the element type
* @param Comparator the comparator to use, defaults to std::less<T>
*/
template<typename T, typename Comparator>
class LoserTreeBase
{
protected:
/** @brief Internal representation of a LoserTree element. */
struct Loser
{ {
private: /** @brief flag, true iff this is a "maximum" sentinel. */
struct Loser bool sup;
{ /** @brief index of the source sequence. */
// The relevant element. int source;
T key; /** @brief key of the element in the LoserTree. */
T key;
// Is this an infimum or supremum element? };
bool inf, sup;
// Number of the sequence the element comes from.
int source;
};
unsigned int size, offset;
Loser* losers;
Comparator comp;
public:
LoserTreeExplicit(unsigned int _size, Comparator _comp = std::less<T>())
: comp(_comp)
{
size = _size;
offset = size;
losers = new Loser[size];
for (unsigned int l = 0; l < size; ++l)
{
//losers[l].key = ... stays unset
losers[l].inf = true;
losers[l].sup = false;
//losers[l].source = -1; //sentinel
}
}
~LoserTreeExplicit()
{ delete[] losers; }
int unsigned int ik, k, offset;
get_min_source()
{ return losers[0].source; } /** log_2{k} */
unsigned int _M_log_k;
/** @brief LoserTree elements. */
Loser* losers;
/** @brief Comparator to use. */
Comparator comp;
/**
* @brief State flag that determines whether the LoserTree is empty.
*
* Only used for building the LoserTree.
*/
bool first_insert;
public:
/**
* @brief The constructor.
*
* @param _k The number of sequences to merge.
* @param _comp The comparator to use.
*/
LoserTreeBase(unsigned int _k, Comparator _comp)
: comp(_comp)
{
ik = _k;
// Compute log_2{k} for the Loser Tree
_M_log_k = log2(ik - 1) + 1;
// Next greater power of 2.
k = 1 << _M_log_k;
offset = k;
// Avoid default-constructing losers[].key
losers = static_cast<Loser*>(::operator new(2 * k * sizeof(Loser)));
for (unsigned int i = ik - 1; i < k; ++i)
losers[i + k].sup = true;
first_insert = true;
}
/**
* @brief The destructor.
*/
~LoserTreeBase()
{ ::operator delete(losers); }
/**
* @brief Initializes the sequence "source" with the element "key".
*
* @param key the element to insert
* @param source index of the source sequence
* @param sup flag that determines whether the value to insert is an
* explicit supremum.
*/
inline void
insert_start(const T& key, int source, bool sup)
{
unsigned int pos = k + source;
if(first_insert)
{
// Construct all keys, so we can easily deconstruct them.
for (unsigned int i = 0; i < (2 * k); ++i)
new(&(losers[i].key)) T(key);
first_insert = false;
}
else
new(&(losers[pos].key)) T(key);
losers[pos].sup = sup;
losers[pos].source = source;
}
/**
* @return the index of the sequence with the smallest element.
*/
int get_min_source()
{ return losers[0].source; }
};
/**
* @brief Stable LoserTree variant.
*
* Provides the stable implementations of insert_start, init_winner,
* init and delete_min_insert.
*
* Unstable variant is done using partial specialisation below.
*/
template<bool stable/* default == true */, typename T, typename Comparator>
class LoserTree : public LoserTreeBase<T, Comparator>
{
typedef LoserTreeBase<T, Comparator> Base;
using Base::k;
using Base::losers;
using Base::first_insert;
public:
LoserTree(unsigned int _k, Comparator _comp)
: Base::LoserTreeBase(_k, _comp)
{}
unsigned int
init_winner(unsigned int root)
{
if (root >= k)
{
return root;
}
else
{
unsigned int left = init_winner (2 * root);
unsigned int right = init_winner (2 * root + 1);
if (losers[right].sup
|| (!losers[left].sup
&& !comp(losers[right].key, losers[left].key)))
{
// Left one is less or equal.
losers[root] = losers[right];
return left;
}
else
{
// Right one is less.
losers[root] = losers[left];
return right;
}
}
}
void init()
{ losers[0] = losers[init_winner(1)]; }
/**
* @brief Delete the smallest element and insert a new element from
* the previously smallest element's sequence.
*
* This implementation is stable.
*/
// Do not pass a const reference since key will be used as local variable.
void delete_min_insert(T key, bool sup)
{
int source = losers[0].source;
for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
{
// The smaller one gets promoted, ties are broken by source.
if ((sup && (!losers[pos].sup || losers[pos].source < source))
|| (!sup && !losers[pos].sup
&& ((comp(losers[pos].key, key))
|| (!comp(key, losers[pos].key)
&& losers[pos].source < source))))
{
// The other one is smaller.
std::swap(losers[pos].sup, sup);
std::swap(losers[pos].source, source);
std::swap(losers[pos].key, key);
}
}
losers[0].sup = sup;
losers[0].source = source;
losers[0].key = key;
}
};
/**
* @brief Unstable LoserTree variant.
*
* Stability (non-stable here) is selected with partial specialization.
*/
template<typename T, typename Comparator>
class LoserTree</* stable == */false, T, Comparator> :
public LoserTreeBase<T, Comparator>
{
typedef LoserTreeBase<T, Comparator> Base;
using Base::_M_log_k;
using Base::k;
using Base::losers;
using Base::first_insert;
public:
LoserTree(unsigned int _k, Comparator _comp)
: Base::LoserTreeBase(_k, _comp)
{}
/**
* Computes the winner of the competition at position "root".
*
* Called recursively (starting at 0) to build the initial tree.
*
* @param root index of the "game" to start.
*/
unsigned int
init_winner (unsigned int root)
{
if (root >= k)
{
return root;
}
else
{
unsigned int left = init_winner (2 * root);
unsigned int right = init_winner (2 * root + 1);
if (losers[right].sup ||
(!losers[left].sup
&& !comp(losers[right].key, losers[left].key)))
{
// Left one is less or equal.
losers[root] = losers[right];
return left;
}
else
{
// Right one is less.
losers[root] = losers[left];
return right;
}
}
}
inline void
init()
{ losers[0] = losers[init_winner(1)]; }
/**
* Delete the key smallest element and insert the element key instead.
*
* @param key the key to insert
* @param sup true iff key is an explicitly marked supremum
*/
// Do not pass a const reference since key will be used as local variable.
inline void
delete_min_insert(T key, bool sup)
{
#if _GLIBCXX_ASSERTIONS
// loser trees are only used for at least 2 sequences
_GLIBCXX_PARALLEL_ASSERT(_M_log_k > 1);
#endif
void int source = losers[0].source;
insert_start(T key, int source, bool sup) for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
{ {
bool inf = false; // The smaller one gets promoted.
for (unsigned int pos = (offset + source) / 2; pos > 0; pos /= 2) if (sup || (!losers[pos].sup && comp(losers[pos].key, key)))
{ {
if ((!inf && !losers[pos].inf && !sup && !losers[pos].sup // The other one is smaller.
&& comp(losers[pos].key, key)) || losers[pos].inf || sup) std::swap(losers[pos].sup, sup);
{ std::swap(losers[pos].source, source);
// The other one is smaller. std::swap(losers[pos].key, key);
std::swap(losers[pos].key, key); }
std::swap(losers[pos].inf, inf);
std::swap(losers[pos].sup, sup);
std::swap(losers[pos].source, source);
}
}
losers[0].key = key;
losers[0].inf = inf;
losers[0].sup = sup;
losers[0].source = source;
} }
void losers[0].sup = sup;
init() { } losers[0].source = source;
losers[0].key = key;
}
};
void
delete_min_insert(T key, bool sup)
{
bool inf = false;
int source = losers[0].source;
for (unsigned int pos = (offset + source) / 2; pos > 0; pos /= 2)
{
// The smaller one gets promoted.
if ((!inf && !losers[pos].inf && !sup && !losers[pos].sup
&& comp(losers[pos].key, key))
|| losers[pos].inf || sup)
{
// The other one is smaller.
std::swap(losers[pos].key, key);
std::swap(losers[pos].inf, inf);
std::swap(losers[pos].sup, sup);
std::swap(losers[pos].source, source);
}
}
losers[0].key = key;
losers[0].inf = inf;
losers[0].sup = sup;
losers[0].source = source;
}
void
insert_start_stable(T key, int source, bool sup)
{
bool inf = false;
for (unsigned int pos = (offset + source) / 2; pos > 0; pos /= 2)
{
if ((!inf && !losers[pos].inf && !sup && !losers[pos].sup
&& ((comp(losers[pos].key, key))
|| (!comp(key, losers[pos].key)
&& losers[pos].source < source)))
|| losers[pos].inf || sup)
{
// Take next key.
std::swap(losers[pos].key, key);
std::swap(losers[pos].inf, inf);
std::swap(losers[pos].sup, sup);
std::swap(losers[pos].source, source);
}
}
losers[0].key = key;
losers[0].inf = inf;
losers[0].sup = sup;
losers[0].source = source;
}
void /**
init_stable() { } * @brief Base class of Loser Tree implementation using pointers.
*/
void template<typename T, typename Comparator>
delete_min_insert_stable(T key, bool sup) class LoserTreePointerBase
{ {
bool inf = false; protected:
int source = losers[0].source; /** @brief Internal representation of LoserTree elements. */
for (unsigned int pos = (offset + source) / 2; pos > 0; pos /= 2) struct Loser
{ {
if ((!inf && !losers[pos].inf && !sup && !losers[pos].sup bool sup;
&& ((comp(losers[pos].key, key)) int source;
|| (!comp(key, losers[pos].key) const T* keyp;
&& losers[pos].source < source)))
|| losers[pos].inf || sup)
{
std::swap(losers[pos].key, key);
std::swap(losers[pos].inf, inf);
std::swap(losers[pos].sup, sup);
std::swap(losers[pos].source, source);
}
}
losers[0].key = key;
losers[0].inf = inf;
losers[0].sup = sup;
losers[0].source = source;
}
}; };
#endif unsigned int ik, k, offset;
Loser* losers;
#if _GLIBCXX_LOSER_TREE Comparator comp;
/** @brief Guarded loser tree, either copying the whole element into public:
* the tree structure, or looking up the element via the index. LoserTreePointerBase(unsigned int _k, Comparator _comp = std::less<T>())
*
* Guarding is done explicitly through one flag sup per element,
* inf is not needed due to a better initialization routine. This
* is a well-performing variant.
*/
template<typename T, typename Comparator = std::less<T> >
class LoserTree
{
private:
struct Loser
{
bool sup;
int source;
T key;
};
unsigned int ik, k, offset;
Loser* losers;
Comparator comp;
bool first_insert;
public:
LoserTree(unsigned int _k, Comparator _comp = std::less<T>())
: comp(_comp) : comp(_comp)
{ {
ik = _k; ik = _k;
// Next greater power of 2.
k = 1 << (log2(ik - 1) + 1);
offset = k;
// Avoid default-constructing losers[].key
losers = static_cast<Loser*>(::operator new(2 * k * sizeof(Loser)));
for (unsigned int i = ik - 1; i < k; ++i)
losers[i + k].sup = true;
first_insert = true;
}
~LoserTree() // Next greater power of 2.
{ ::operator delete(losers); } k = 1 << (log2(ik - 1) + 1);
offset = k;
losers = new Loser[k * 2];
for (unsigned int i = ik - 1; i < k; i++)
losers[i + k].sup = true;
}
int ~LoserTreePointerBase()
get_min_source() { ::operator delete(losers); }
{ return losers[0].source; }
void int get_min_source()
insert_start(const T& key, int source, bool sup) { return losers[0].source; }
{
unsigned int pos = k + source;
if(first_insert)
{
// Construct all keys, so we can easily deconstruct them.
for (unsigned int i = 0; i < (2 * k); ++i)
::new(&(losers[i].key)) T(key);
first_insert = false;
}
else
::new(&(losers[pos].key)) T(key);
losers[pos].sup = sup;
losers[pos].source = source;
}
unsigned int void insert_start(const T& key, int source, bool sup)
init_winner (unsigned int root) {
{ unsigned int pos = k + source;
if (root >= k)
{ losers[pos].sup = sup;
return root; losers[pos].source = source;
} losers[pos].keyp = &key;
else }
{ };
unsigned int left = init_winner (2 * root);
unsigned int right = init_winner (2 * root + 1); /**
if (losers[right].sup * @brief Stable LoserTree implementation.
|| (!losers[left].sup *
&& !comp(losers[right].key, losers[left].key))) * The unstable variant is implemented using partial instantiation below.
{ */
// Left one is less or equal. template<bool stable/* default == true */, typename T, typename Comparator>
losers[root] = losers[right]; class LoserTreePointer : public LoserTreePointerBase<T, Comparator>
return left; {
} typedef LoserTreePointerBase<T, Comparator> Base;
else using Base::k;
{ using Base::losers;
// Right one is less.
losers[root] = losers[left];
return right;
}
}
}
void public:
init() LoserTreePointer(unsigned int _k, Comparator _comp = std::less<T>())
{ losers[0] = losers[init_winner(1)]; } : Base::LoserTreePointerBase(_k, _comp)
{}
// Do not pass const reference since key will be used as local variable. unsigned int
void init_winner(unsigned int root)
delete_min_insert(T key, bool sup) {
{ if (root >= k)
int source = losers[0].source; {
for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2) return root;
{ }
// The smaller one gets promoted. else
if (sup || (!losers[pos].sup && comp(losers[pos].key, key))) {
{ unsigned int left = init_winner (2 * root);
// The other one is smaller. unsigned int right = init_winner (2 * root + 1);
std::swap(losers[pos].sup, sup); if (losers[right].sup
std::swap(losers[pos].source, source); || (!losers[left].sup && !comp(*losers[right].keyp,
std::swap(losers[pos].key, key); *losers[left].keyp)))
} {
} // Left one is less or equal.
losers[root] = losers[right];
losers[0].sup = sup; return left;
losers[0].source = source; }
losers[0].key = key; else
} {
// Right one is less.
losers[root] = losers[left];
return right;
}
}
}
void init()
{ losers[0] = losers[init_winner(1)]; }
void delete_min_insert(const T& key, bool sup)
{
const T* keyp = &key;
int source = losers[0].source;
for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
{
// The smaller one gets promoted, ties are broken by source.
if ((sup && (!losers[pos].sup || losers[pos].source < source)) ||
(!sup && !losers[pos].sup &&
((comp(*losers[pos].keyp, *keyp)) ||
(!comp(*keyp, *losers[pos].keyp)
&& losers[pos].source < source))))
{
// The other one is smaller.
std::swap(losers[pos].sup, sup);
std::swap(losers[pos].source, source);
std::swap(losers[pos].keyp, keyp);
}
}
losers[0].sup = sup;
losers[0].source = source;
losers[0].keyp = keyp;
}
};
/**
* @brief Unstable LoserTree implementation.
*
* The stable variant is above.
*/
template<typename T, typename Comparator>
class LoserTreePointer</* stable == */false, T, Comparator> :
public LoserTreePointerBase<T, Comparator>
{
typedef LoserTreePointerBase<T, Comparator> Base;
using Base::k;
using Base::losers;
void public:
insert_start_stable(const T& key, int source, bool sup) LoserTreePointer(unsigned int _k, Comparator _comp = std::less<T>())
{ return insert_start(key, source, sup); } : Base::LoserTreePointerBase(_k, _comp)
{}
unsigned int unsigned int
init_winner_stable (unsigned int root) init_winner(unsigned int root)
{ {
if (root >= k) if (root >= k)
{ {
return root; return root;
} }
else else
{ {
unsigned int left = init_winner (2 * root); unsigned int left = init_winner (2 * root);
unsigned int right = init_winner (2 * root + 1); unsigned int right = init_winner (2 * root + 1);
if (losers[right].sup if (losers[right].sup
|| (!losers[left].sup || (!losers[left].sup
&& !comp(losers[right].key, losers[left].key))) && !comp(*losers[right].keyp, *losers[left].keyp)))
{ {
// Left one is less or equal. // Left one is less or equal.
losers[root] = losers[right]; losers[root] = losers[right];
return left; return left;
} }
else else
{ {
// Right one is less. // Right one is less.
losers[root] = losers[left]; losers[root] = losers[left];
return right; return right;
} }
} }
} }
void void init()
init_stable() { losers[0] = losers[init_winner(1)]; }
{ losers[0] = losers[init_winner_stable(1)]; }
void delete_min_insert(const T& key, bool sup)
// Do not pass const reference since key will be used as local variable.
void
delete_min_insert_stable(T key, bool sup)
{
int source = losers[0].source;
for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
{
// The smaller one gets promoted, ties are broken by source.
if ( (sup && (!losers[pos].sup || losers[pos].source < source))
|| (!sup && !losers[pos].sup
&& ((comp(losers[pos].key, key))
|| (!comp(key, losers[pos].key)
&& losers[pos].source < source))))
{
// The other one is smaller.
std::swap(losers[pos].sup, sup);
std::swap(losers[pos].source, source);
std::swap(losers[pos].key, key);
}
}
losers[0].sup = sup;
losers[0].source = source;
losers[0].key = key;
}
};
#endif
#if _GLIBCXX_LOSER_TREE_REFERENCE
/** @brief Guarded loser tree, either copying the whole element into
* the tree structure, or looking up the element via the index.
*
* Guarding is done explicitly through one flag sup per element,
* inf is not needed due to a better initialization routine. This
* is a well-performing variant.
*/
template<typename T, typename Comparator = std::less<T> >
class LoserTreeReference
{ {
#undef COPY const T* keyp = &key;
#ifdef COPY int source = losers[0].source;
#define KEY(i) losers[i].key for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
#define KEY_SOURCE(i) key {
#else // The smaller one gets promoted.
#define KEY(i) keys[losers[i].source] if (sup || (!losers[pos].sup && comp(*losers[pos].keyp, *keyp)))
#define KEY_SOURCE(i) keys[i] {
#endif // The other one is smaller.
private: std::swap(losers[pos].sup, sup);
struct Loser std::swap(losers[pos].source, source);
{ std::swap(losers[pos].keyp, keyp);
bool sup; }
int source; }
#ifdef COPY
T key; losers[0].sup = sup;
#endif losers[0].source = source;
}; losers[0].keyp = keyp;
}
};
/** @brief Base class for unguarded LoserTree implementation.
*
* The whole element is copied into the tree structure.
*
* No guarding is done, therefore not a single input sequence must
* run empty. Unused sequence heads are marked with a sentinel which
* is &gt; all elements that are to be merged.
*
* This is a very fast variant.
*/
template<typename T, typename Comparator>
class LoserTreeUnguardedBase
{
protected:
struct Loser
{
int source;
T key;
};
unsigned int ik, k, offset; unsigned int ik, k, offset;
Loser* losers; Loser* losers;
#ifndef COPY Comparator comp;
T* keys;
#endif
Comparator comp;
public: public:
LoserTreeReference(unsigned int _k, Comparator _comp = std::less<T>()) inline
LoserTreeUnguardedBase(unsigned int _k, const T _sentinel,
Comparator _comp = std::less<T>())
: comp(_comp) : comp(_comp)
{ {
ik = _k; ik = _k;
// Next greater power of 2. // Next greater power of 2.
k = 1 << (log2(ik - 1) + 1); k = 1 << (log2(ik - 1) + 1);
offset = k; offset = k;
losers = new Loser[k * 2]; // Avoid default-constructing losers[].key
#ifndef COPY losers = static_cast<Loser*>(::operator new(2 * k * sizeof(Loser)));
keys = new T[ik];
#endif for (unsigned int i = /*k + ik - 1*/0; i < (2 * k); ++i)
for (unsigned int i = ik - 1; i < k; ++i) {
losers[i + k].sup = true; losers[i].key = _sentinel;
} losers[i].source = -1;
}
~LoserTreeReference() }
{
delete[] losers; inline ~LoserTreeUnguardedBase()
#ifndef COPY { ::operator delete(losers); }
delete[] keys;
#endif inline int
} get_min_source()
{
int // no dummy sequence can ever be at the top!
get_min_source() #if _GLIBCXX_ASSERTIONS
{ return losers[0].source; } _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1);
void
insert_start(T key, int source, bool sup)
{
unsigned int pos = k + source;
losers[pos].sup = sup;
losers[pos].source = source;
KEY(pos) = key;
}
unsigned int
init_winner(unsigned int root)
{
if (root >= k)
{
return root;
}
else
{
unsigned int left = init_winner (2 * root);
unsigned int right = init_winner (2 * root + 1);
if ( losers[right].sup ||
(!losers[left].sup && !comp(KEY(right), KEY(left))))
{
// Left one is less or equal.
losers[root] = losers[right];
return left;
}
else
{
// Right one is less.
losers[root] = losers[left];
return right;
}
}
}
void
init()
{
losers[0] = losers[init_winner(1)];
}
void
delete_min_insert(T key, bool sup)
{
int source = losers[0].source;
for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
{
// The smaller one gets promoted.
if (sup || (!losers[pos].sup && comp(KEY(pos), KEY_SOURCE(source))))
{
// The other one is smaller.
std::swap(losers[pos].sup, sup);
std::swap(losers[pos].source, source);
#ifdef COPY
std::swap(KEY(pos), KEY_SOURCE(source));
#endif
}
}
losers[0].sup = sup;
losers[0].source = source;
#ifdef COPY
KEY(0) = KEY_SOURCE(source);
#endif #endif
} return losers[0].source;
}
void inline void
insert_start_stable(T key, int source, bool sup) insert_start(const T& key, int source, bool)
{ return insert_start(key, source, sup); } {
unsigned int pos = k + source;
unsigned int
init_winner_stable(unsigned int root) new(&(losers[pos].key)) T(key);
{ losers[pos].source = source;
if (root >= k) }
{ };
return root;
} /**
else * @brief Stable implementation of unguarded LoserTree.
{ *
unsigned int left = init_winner (2 * root); * Unstable variant is selected below with partial specialization.
unsigned int right = init_winner (2 * root + 1); */
if (losers[right].sup template<bool stable/* default == true */, typename T, typename Comparator>
|| (!losers[left].sup && !comp(KEY(right), KEY(left)))) class LoserTreeUnguarded : public LoserTreeUnguardedBase<T, Comparator>
{ {
// Left one is less or equal. typedef LoserTreeUnguardedBase<T, Comparator> Base;
losers[root] = losers[right]; using Base::k;
return left; using Base::losers;
}
else public:
{ LoserTreeUnguarded(unsigned int _k, const T _sentinel,
// Right one is less. Comparator _comp = std::less<T>())
losers[root] = losers[left]; : Base::LoserTreeUnguardedBase(_k, _sentinel, _comp)
return right; {}
}
} unsigned int
} init_winner(unsigned int root)
{
void if (root >= k)
init_stable() {
{ losers[0] = losers[init_winner_stable(1)]; } return root;
}
void else
delete_min_insert_stable(T key, bool sup) {
{ unsigned int left = init_winner (2 * root);
int source = losers[0].source; unsigned int right = init_winner (2 * root + 1);
for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2) if (!comp(losers[right].key, losers[left].key))
{ {
// The smaller one gets promoted, ties are broken by source. // Left one is less or equal.
if ((sup && (!losers[pos].sup || losers[pos].source < source)) losers[root] = losers[right];
|| (!sup && !losers[pos].sup return left;
&& ((comp(KEY(pos), KEY_SOURCE(source))) }
|| (!comp(KEY_SOURCE(source), KEY(pos)) else
&& losers[pos].source < source)))) {
{ // Right one is less.
// The other one is smaller. losers[root] = losers[left];
std::swap(losers[pos].sup, sup); return right;
std::swap(losers[pos].source, source); }
#ifdef COPY }
std::swap(KEY(pos), KEY_SOURCE(source)); }
#endif
} inline void
} init()
{
losers[0] = losers[init_winner(1)];
losers[0].sup = sup; // no dummy sequence can ever be at the top at the beginning (0 sequences!)
losers[0].source = source; #if _GLIBCXX_ASSERTIONS
#ifdef COPY _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1);
KEY(0) = KEY_SOURCE(source);
#endif #endif
} }
};
#undef KEY
#undef KEY_SOURCE
// Do not pass a const reference since key will be used as local variable.
inline void
delete_min_insert(T key, bool)
{
// No dummy sequence can ever be at the top and be retrieved!
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1);
#endif #endif
#if _GLIBCXX_LOSER_TREE_POINTER int source = losers[0].source;
printf("%d\n", source);
/** @brief Guarded loser tree, either copying the whole element into for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
the tree structure, or looking up the element via the index. {
* Guarding is done explicitly through one flag sup per element, // The smaller one gets promoted, ties are broken by source.
* inf is not needed due to a better initialization routine. if (comp(losers[pos].key, key)
* This is a well-performing variant. || (!comp(key, losers[pos].key) && losers[pos].source < source))
*/ {
template<typename T, typename Comparator = std::less<T> > // The other one is smaller.
class LoserTreePointer std::swap(losers[pos].source, source);
std::swap(losers[pos].key, key);
}
}
losers[0].source = source;
losers[0].key = key;
}
};
/**
* @brief Non-Stable implementation of unguarded LoserTree.
*
* Stable implementation is above.
*/
template<typename T, typename Comparator>
class LoserTreeUnguarded</* stable == */false, T, Comparator> :
public LoserTreeUnguardedBase<T, Comparator>
{
typedef LoserTreeUnguardedBase<T, Comparator> Base;
using Base::k;
using Base::losers;
public:
LoserTreeUnguarded(unsigned int _k, const T _sentinel,
Comparator _comp = std::less<T>())
: Base::LoserTreeUnguardedBase(_k, _sentinel, _comp)
{}
unsigned int
init_winner (unsigned int root)
{ {
private: if (root >= k)
struct Loser {
{ return root;
bool sup; }
int source; else
const T* keyp; {
}; unsigned int left = init_winner (2 * root);
unsigned int right = init_winner (2 * root + 1);
unsigned int ik, k, offset;
Loser* losers; #if _GLIBCXX_ASSERTIONS
Comparator comp; // If left one is sentinel then right one must be, too.
if (losers[left].source == -1)
public: _GLIBCXX_PARALLEL_ASSERT(losers[right].source == -1);
LoserTreePointer(unsigned int _k, Comparator _comp = std::less<T>())
: comp(_comp)
{
ik = _k;
// Next greater power of 2.
k = 1 << (log2(ik - 1) + 1);
offset = k;
losers = new Loser[k * 2];
for (unsigned int i = ik - 1; i < k; ++i)
losers[i + k].sup = true;
}
~LoserTreePointer()
{ delete[] losers; }
int
get_min_source()
{ return losers[0].source; }
void
insert_start(const T& key, int source, bool sup)
{
unsigned int pos = k + source;
losers[pos].sup = sup;
losers[pos].source = source;
losers[pos].keyp = &key;
}
unsigned int
init_winner(unsigned int root)
{
if (root >= k)
return root;
else
{
unsigned int left = init_winner (2 * root);
unsigned int right = init_winner (2 * root + 1);
if (losers[right].sup
|| (!losers[left].sup
&& !comp(*losers[right].keyp, *losers[left].keyp)))
{
// Left one is less or equal.
losers[root] = losers[right];
return left;
}
else
{
// Right one is less.
losers[root] = losers[left];
return right;
}
}
}
void
init()
{ losers[0] = losers[init_winner(1)]; }
void
delete_min_insert(const T& key, bool sup)
{
const T* keyp = &key;
int source = losers[0].source;
for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
{
// The smaller one gets promoted.
if (sup || (!losers[pos].sup && comp(*losers[pos].keyp, *keyp)))
{
// The other one is smaller.
std::swap(losers[pos].sup, sup);
std::swap(losers[pos].source, source);
std::swap(losers[pos].keyp, keyp);
}
}
losers[0].sup = sup;
losers[0].source = source;
losers[0].keyp = keyp;
}
void
insert_start_stable(const T& key, int source, bool sup)
{ return insert_start(key, source, sup); }
unsigned int
init_winner_stable(unsigned int root)
{
if (root >= k)
{
return root;
}
else
{
unsigned int left = init_winner (2 * root);
unsigned int right = init_winner (2 * root + 1);
if (losers[right].sup
|| (!losers[left].sup && !comp(*losers[right].keyp,
*losers[left].keyp)))
{
// Left one is less or equal.
losers[root] = losers[right];
return left;
}
else
{
// Right one is less.
losers[root] = losers[left];
return right;
}
}
}
void
init_stable()
{ losers[0] = losers[init_winner_stable(1)]; }
void
delete_min_insert_stable(const T& key, bool sup)
{
const T* keyp = &key;
int source = losers[0].source;
for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
{
// The smaller one gets promoted, ties are broken by source.
if ( (sup && (!losers[pos].sup || losers[pos].source < source))
|| (!sup && !losers[pos].sup &&
((comp(*losers[pos].keyp, *keyp))
|| (!comp(*keyp, *losers[pos].keyp)
&& losers[pos].source < source))))
{
// The other one is smaller.
std::swap(losers[pos].sup, sup);
std::swap(losers[pos].source, source);
std::swap(losers[pos].keyp, keyp);
}
}
losers[0].sup = sup;
losers[0].source = source;
losers[0].keyp = keyp;
}
};
#endif #endif
#if _GLIBCXX_LOSER_TREE_UNGUARDED if (!comp(losers[right].key, losers[left].key))
{
/** @brief Unguarded loser tree, copying the whole element into the // Left one is less or equal.
* tree structure. losers[root] = losers[right];
* return left;
* No guarding is done, therefore not a single input sequence must }
* run empty. This is a very fast variant. else
*/ {
template<typename T, typename Comparator = std::less<T> > // Right one is less.
class LoserTreeUnguarded losers[root] = losers[left];
return right;
}
}
}
inline void
init()
{ {
private: losers[0] = losers[init_winner(1)];
struct Loser
{
int source;
T key;
};
unsigned int ik, k, offset;
unsigned int* mapping;
Loser* losers;
Comparator comp;
void
map(unsigned int root, unsigned int begin, unsigned int end)
{
if (begin + 1 == end)
mapping[begin] = root;
else
{
// Next greater or equal power of 2.
unsigned int left = 1 << (log2(end - begin - 1));
map(root * 2, begin, begin + left);
map(root * 2 + 1, begin + left, end);
}
}
public:
LoserTreeUnguarded(unsigned int _k, Comparator _comp = std::less<T>())
: comp(_comp)
{
ik = _k;
// Next greater or equal power of 2.
k = 1 << (log2(ik - 1) + 1);
offset = k;
losers = new Loser[k + ik];
mapping = new unsigned int[ik];
map(1, 0, ik);
}
~LoserTreeUnguarded()
{
delete[] losers;
delete[] mapping;
}
int
get_min_source()
{ return losers[0].source; }
void
insert_start(const T& key, int source, bool)
{
unsigned int pos = mapping[source];
losers[pos].source = source;
losers[pos].key = key;
}
unsigned int
init_winner(unsigned int root, unsigned int begin, unsigned int end)
{
if (begin + 1 == end)
return mapping[begin];
else
{
// Next greater or equal power of 2.
unsigned int division = 1 << (log2(end - begin - 1));
unsigned int left = init_winner(2 * root, begin, begin + division);
unsigned int right =
init_winner(2 * root + 1, begin + division, end);
if (!comp(losers[right].key, losers[left].key))
{
// Left one is less or equal.
losers[root] = losers[right];
return left;
}
else
{
// Right one is less.
losers[root] = losers[left];
return right;
}
}
}
void
init()
{ losers[0] = losers[init_winner(1, 0, ik)]; }
// Do not pass const reference since key will be used as local variable.
void
delete_min_insert(const T& key, bool)
{
losers[0].key = key;
T& keyr = losers[0].key;
int& source = losers[0].source;
for (int pos = mapping[source] / 2; pos > 0; pos /= 2)
{
// The smaller one gets promoted.
if (comp(losers[pos].key, keyr))
{
// The other one is smaller.
std::swap(losers[pos].source, source);
std::swap(losers[pos].key, keyr);
}
}
}
void
insert_start_stable(const T& key, int source, bool)
{ return insert_start(key, source, false); }
void
init_stable()
{ init(); }
void
delete_min_insert_stable(const T& key, bool)
{
losers[0].key = key;
T& keyr = losers[0].key;
int& source = losers[0].source;
for (int pos = mapping[source] / 2; pos > 0; pos /= 2)
{
// The smaller one gets promoted, ties are broken by source.
if (comp(losers[pos].key, keyr)
|| (!comp(keyr, losers[pos].key)
&& losers[pos].source < source))
{
// The other one is smaller.
std::swap(losers[pos].source, source);
std::swap(losers[pos].key, keyr);
}
}
}
};
// no dummy sequence can ever be at the top at the beginning (0 sequences!)
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1);
#endif #endif
}
#if _GLIBCXX_LOSER_TREE_POINTER_UNGUARDED // Do not pass a const reference since key will be used as local variable.
inline void
delete_min_insert(T key, bool)
{
printf("wrong\n");
int source = losers[0].source;
for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
{
// The smaller one gets promoted.
if (comp(losers[pos].key, key))
{
// The other one is smaller.
std::swap(losers[pos].source, source);
std::swap(losers[pos].key, key);
}
}
losers[0].source = source;
losers[0].key = key;
}
};
/** @brief Unguarded loser tree, keeping only pointers to the /** @brief Unguarded loser tree, keeping only pointers to the
* elements in the tree structure. * elements in the tree structure.
...@@ -926,175 +773,233 @@ template<typename T, typename Comparator = std::less<T> > ...@@ -926,175 +773,233 @@ template<typename T, typename Comparator = std::less<T> >
* No guarding is done, therefore not a single input sequence must * No guarding is done, therefore not a single input sequence must
* run empty. This is a very fast variant. * run empty. This is a very fast variant.
*/ */
template<typename T, typename Comparator = std::less<T> > template<typename T, typename Comparator>
class LoserTreePointerUnguarded class LoserTreePointerUnguardedBase
{
protected:
struct Loser
{ {
private: int source;
struct Loser const T* keyp;
{ };
int source;
const T* keyp;
};
unsigned int ik, k, offset;
unsigned int* mapping;
Loser* losers;
Comparator comp;
void map(unsigned int root, unsigned int begin, unsigned int end)
{
if (begin + 1 == end)
mapping[begin] = root;
else
{
// Next greater or equal power of 2.
unsigned int left = 1 << (log2(end - begin - 1));
map(root * 2, begin, begin + left);
map(root * 2 + 1, begin + left, end);
}
}
public:
LoserTreePointerUnguarded(unsigned int _k,
Comparator _comp = std::less<T>())
: comp(_comp)
{
ik = _k;
// Next greater power of 2.
k = 1 << (log2(ik - 1) + 1);
offset = k;
losers = new Loser[k + ik];
mapping = new unsigned int[ik];
map(1, 0, ik);
}
~LoserTreePointerUnguarded()
{
delete[] losers;
delete[] mapping;
}
int
get_min_source()
{ return losers[0].source; }
void
insert_start(const T& key, int source, bool)
{
unsigned int pos = mapping[source];
losers[pos].source = source;
losers[pos].keyp = &key;
}
unsigned int
init_winner(unsigned int root, unsigned int begin, unsigned int end)
{
if (begin + 1 == end)
return mapping[begin];
else
{
// Next greater or equal power of 2.
unsigned int division = 1 << (log2(end - begin - 1));
unsigned int left = init_winner(2 * root, begin, begin + division);
unsigned int right = init_winner(2 * root + 1,
begin + division, end);
if (!comp(*losers[right].keyp, *losers[left].keyp))
{
// Left one is less or equal.
losers[root] = losers[right];
return left;
}
else
{
// Right one is less.
losers[root] = losers[left];
return right;
}
}
}
void unsigned int ik, k, offset;
init() Loser* losers;
{ losers[0] = losers[init_winner(1, 0, ik)]; } const T sentinel;
Comparator comp;
void public:
delete_min_insert(const T& key, bool)
{
const T* keyp = &key;
int& source = losers[0].source;
for (int pos = mapping[source] / 2; pos > 0; pos /= 2)
{
// The smaller one gets promoted.
if (comp(*losers[pos].keyp, *keyp))
{
// The other one is smaller.
std::swap(losers[pos].source, source);
std::swap(losers[pos].keyp, keyp);
}
}
losers[0].keyp = keyp;
}
void inline
insert_start_stable(const T& key, int source, bool) LoserTreePointerUnguardedBase(unsigned int _k, const T _sentinel,
{ return insert_start(key, source, false); } Comparator _comp = std::less<T>())
: sentinel(_sentinel), comp(_comp)
{
ik = _k;
// Next greater power of 2.
k = 1 << (log2(ik - 1) + 1);
offset = k;
// Avoid default-constructing losers[].key
losers = new Loser[2 * k];
for (unsigned int i = /*k + ik - 1*/0; i < (2 * k); ++i)
{
losers[i].keyp = &sentinel;
losers[i].source = -1;
}
}
inline ~LoserTreePointerUnguardedBase()
{ delete[] losers; }
inline int
get_min_source()
{
// no dummy sequence can ever be at the top!
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1);
#endif
return losers[0].source;
}
void inline void
init_stable() insert_start(const T& key, int source, bool)
{ init(); } {
unsigned int pos = k + source;
losers[pos].keyp = &key;
losers[pos].source = source;
}
};
/**
* @brief Stable unguarded LoserTree variant storing pointers.
*
* Unstable variant is implemented below using partial specialization.
*/
template<bool stable/* default == true */, typename T, typename Comparator>
class LoserTreePointerUnguarded :
public LoserTreePointerUnguardedBase<T, Comparator>
{
typedef LoserTreePointerUnguardedBase<T, Comparator> Base;
using Base::k;
using Base::losers;
public:
LoserTreePointerUnguarded(unsigned int _k, const T _sentinel,
Comparator _comp = std::less<T>())
: Base::LoserTreePointerUnguardedBase(_k, _sentinel, _comp)
{}
unsigned int
init_winner(unsigned int root)
{
if (root >= k)
{
return root;
}
else
{
unsigned int left = init_winner (2 * root);
unsigned int right = init_winner (2 * root + 1);
if (!comp(*losers[right].keyp, *losers[left].keyp))
{
// Left one is less or equal.
losers[root] = losers[right];
return left;
}
else
{
// Right one is less.
losers[root] = losers[left];
return right;
}
}
}
inline void
init()
{
losers[0] = losers[init_winner(1)];
void // no dummy sequence can ever be at the top at the beginning (0 sequences!)
delete_min_insert_stable(const T& key, bool) #if _GLIBCXX_ASSERTIONS
{ _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1);
int& source = losers[0].source;
const T* keyp = &key;
for (int pos = mapping[source] / 2; pos > 0; pos /= 2)
{
// The smaller one gets promoted, ties are broken by source.
if (comp(*losers[pos].keyp, *keyp)
|| (!comp(*keyp, *losers[pos].keyp)
&& losers[pos].source < source))
{
// The other one is smaller.
std::swap(losers[pos].source, source);
std::swap(losers[pos].keyp, keyp);
}
}
losers[0].keyp = keyp;
}
};
#endif #endif
}
template<typename _ValueTp, class Comparator> inline void
struct loser_tree_traits delete_min_insert(const T& key, bool sup)
{ {
#if _GLIBCXX_LOSER_TREE const T* keyp = &key;
typedef LoserTree<_ValueTp, Comparator> LT; int source = losers[0].source;
#else for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
# if _GLIBCXX_LOSER_TREE_POINTER {
typedef LoserTreePointer<_ValueTp, Comparator> LT; // The smaller one gets promoted, ties are broken by source.
# else if (comp(*losers[pos].keyp, *keyp)
# error Must define some type in losertree.h. || (!comp(*keyp, *losers[pos].keyp) && losers[pos].source < source))
# endif {
// The other one is smaller.
std::swap(losers[pos].source, source);
std::swap(losers[pos].keyp, keyp);
}
}
losers[0].source = source;
losers[0].keyp = keyp;
// no dummy sequence can ever be at the top!
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1);
#endif
}
};
/**
* @brief Unstable unguarded LoserTree variant storing pointers.
*
* Stable variant is above.
*/
template<typename T, typename Comparator>
class LoserTreePointerUnguarded</* stable == */false, T, Comparator> :
public LoserTreePointerUnguardedBase<T, Comparator>
{
typedef LoserTreePointerUnguardedBase<T, Comparator> Base;
using Base::k;
using Base::losers;
public:
LoserTreePointerUnguarded(unsigned int _k, const T _sentinel,
Comparator _comp = std::less<T>())
: Base::LoserTreePointerUnguardedBase(_k, _sentinel, _comp)
{}
unsigned int
init_winner(unsigned int root)
{
if (root >= k)
{
return root;
}
else
{
unsigned int left = init_winner (2 * root);
unsigned int right = init_winner (2 * root + 1);
#if _GLIBCXX_ASSERTIONS
// If left one is sentinel then right one must be, too.
if (losers[left].source == -1)
_GLIBCXX_PARALLEL_ASSERT(losers[right].source == -1);
#endif #endif
};
template<typename _ValueTp, class Comparator> if (!comp(*losers[right].keyp, *losers[left].keyp))
struct loser_tree_unguarded_traits {
// Left one is less or equal.
losers[root] = losers[right];
return left;
}
else
{
// Right one is less.
losers[root] = losers[left];
return right;
}
}
}
inline void
init()
{ {
#if _GLIBCXX_LOSER_TREE_UNGUARDED losers[0] = losers[init_winner(1)];
typedef LoserTreeUnguarded<_ValueTp, Comparator> LT;
#else // no dummy sequence can ever be at the top at the beginning (0 sequences!)
# if _GLIBCXX_LOSER_TREE_POINTER_UNGUARDED #if _GLIBCXX_ASSERTIONS
typedef LoserTreePointerUnguarded<_ValueTp, Comparator> LT; _GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1);
# else
# error Must define some unguarded type in losertree.h.
# endif
#endif #endif
}; }
} inline void
delete_min_insert(const T& key, bool sup)
{
const T* keyp = &key;
int source = losers[0].source;
for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
{
// The smaller one gets promoted.
if (comp(*(losers[pos].keyp), *keyp))
{
// The other one is smaller.
std::swap(losers[pos].source, source);
std::swap(losers[pos].keyp, keyp);
}
}
losers[0].source = source;
losers[0].keyp = keyp;
}
};
} // namespace __gnu_parallel
#endif #endif
...@@ -239,19 +239,26 @@ namespace __gnu_parallel ...@@ -239,19 +239,26 @@ namespace __gnu_parallel
std::iterator_traits<RandomAccessIterator1>:: std::iterator_traits<RandomAccessIterator1>::
difference_type max_length, Comparator comp) difference_type max_length, Comparator comp)
{ {
typedef typename std::iterator_traits<RandomAccessIterator1>::value_type typedef typename
value_type; std::iterator_traits<RandomAccessIterator1>::value_type value_type;
typedef typename std::iterator_traits<RandomAccessIterator1>:: typedef typename std::iterator_traits<RandomAccessIterator1>::
difference_type difference_type1 /* == difference_type2 */; difference_type difference_type1 /* == difference_type2 */;
typedef typename std::iterator_traits<RandomAccessIterator3>:: typedef typename std::iterator_traits<RandomAccessIterator3>::
difference_type difference_type3; difference_type difference_type3;
typedef typename std::pair<RandomAccessIterator1, RandomAccessIterator1>
iterator_pair;
std::pair<RandomAccessIterator1, RandomAccessIterator1> std::pair<RandomAccessIterator1, RandomAccessIterator1>
seqs[2] = { std::make_pair(begin1, end1), seqs[2] = { std::make_pair(begin1, end1),
std::make_pair(begin2, end2) }; std::make_pair(begin2, end2) };
RandomAccessIterator3 RandomAccessIterator3
target_end = parallel_multiway_merge(seqs, seqs + 2, target, target_end = parallel_multiway_merge
comp, max_length, true, false); < /* stable = */ true, /* sentinels = */ false>(
seqs, seqs + 2, target, comp,
multiway_merge_exact_splitting
< /* stable = */ true, iterator_pair*,
Comparator, difference_type1>,
max_length);
return target_end; return target_end;
} }
......
...@@ -40,7 +40,7 @@ ...@@ -40,7 +40,7 @@
* This file is a GNU parallel extension to the Standard C++ Library. * This file is a GNU parallel extension to the Standard C++ Library.
*/ */
// Written by Johannes Singler. // Written by Johannes Singler and Manuel Holtgrewe.
#ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGE_H #ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGE_H
#define _GLIBCXX_PARALLEL_MULTIWAY_MERGE_H #define _GLIBCXX_PARALLEL_MULTIWAY_MERGE_H
...@@ -50,7 +50,6 @@ ...@@ -50,7 +50,6 @@
#include <bits/stl_algo.h> #include <bits/stl_algo.h>
#include <parallel/features.h> #include <parallel/features.h>
#include <parallel/parallel.h> #include <parallel/parallel.h>
#include <parallel/merge.h>
#include <parallel/losertree.h> #include <parallel/losertree.h>
#if _GLIBCXX_ASSERTIONS #if _GLIBCXX_ASSERTIONS
#include <parallel/checkers.h> #include <parallel/checkers.h>
...@@ -59,27 +58,34 @@ ...@@ -59,27 +58,34 @@
/** @brief Length of a sequence described by a pair of iterators. */ /** @brief Length of a sequence described by a pair of iterators. */
#define _GLIBCXX_PARALLEL_LENGTH(s) ((s).second - (s).first) #define _GLIBCXX_PARALLEL_LENGTH(s) ((s).second - (s).first)
// XXX need iterator typedefs
namespace __gnu_parallel namespace __gnu_parallel
{ {
// Announce guarded and unguarded iterator.
template<typename RandomAccessIterator, typename Comparator> template<typename RandomAccessIterator, typename Comparator>
class guarded_iterator; class guarded_iterator;
// Making the arguments const references seems to dangerous,
// the user-defined comparator might not be const.
template<typename RandomAccessIterator, typename Comparator> template<typename RandomAccessIterator, typename Comparator>
inline bool inline bool
operator<(guarded_iterator<RandomAccessIterator, Comparator>& bi1, operator<(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
guarded_iterator<RandomAccessIterator, Comparator>& bi2); guarded_iterator<RandomAccessIterator, Comparator>& bi2);
template<typename RandomAccessIterator, typename Comparator> template<typename RandomAccessIterator, typename Comparator>
inline bool inline bool
operator<=(guarded_iterator<RandomAccessIterator, Comparator>& bi1, operator<=(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
guarded_iterator<RandomAccessIterator, Comparator>& bi2); guarded_iterator<RandomAccessIterator, Comparator>& bi2);
/** @brief Iterator wrapper supporting an implicit supremum at the end /** @brief Iterator wrapper supporting an implicit supremum at the end
of the sequence, dominating all comparisons. * of the sequence, dominating all comparisons.
* Deriving from RandomAccessIterator is not possible since *
* RandomAccessIterator need not be a class. * The implicit supremum comes with a performance cost.
*/ *
* Deriving from RandomAccessIterator is not possible since
* RandomAccessIterator need not be a class.
*/
template<typename RandomAccessIterator, typename Comparator> template<typename RandomAccessIterator, typename Comparator>
class guarded_iterator class guarded_iterator
{ {
...@@ -100,7 +106,7 @@ template<typename RandomAccessIterator, typename Comparator> ...@@ -100,7 +106,7 @@ template<typename RandomAccessIterator, typename Comparator>
* @param comp Comparator provided for associated overloaded * @param comp Comparator provided for associated overloaded
* compare operators. */ * compare operators. */
guarded_iterator(RandomAccessIterator begin, guarded_iterator(RandomAccessIterator begin,
RandomAccessIterator end, Comparator& comp) RandomAccessIterator end, Comparator& comp)
: current(begin), end(end), comp(comp) : current(begin), end(end), comp(comp)
{ } { }
...@@ -115,7 +121,7 @@ template<typename RandomAccessIterator, typename Comparator> ...@@ -115,7 +121,7 @@ template<typename RandomAccessIterator, typename Comparator>
/** @brief Dereference operator. /** @brief Dereference operator.
* @return Referenced element. */ * @return Referenced element. */
typename std::iterator_traits<RandomAccessIterator>::value_type typename std::iterator_traits<RandomAccessIterator>::value_type&
operator*() operator*()
{ return *current; } { return *current; }
...@@ -158,7 +164,7 @@ template<typename RandomAccessIterator, typename Comparator> ...@@ -158,7 +164,7 @@ template<typename RandomAccessIterator, typename Comparator>
template<typename RandomAccessIterator, typename Comparator> template<typename RandomAccessIterator, typename Comparator>
inline bool inline bool
operator<=(guarded_iterator<RandomAccessIterator, Comparator>& bi1, operator<=(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
guarded_iterator<RandomAccessIterator, Comparator>& bi2) guarded_iterator<RandomAccessIterator, Comparator>& bi2)
{ {
if (bi2.current == bi2.end) //bi1 is sup if (bi2.current == bi2.end) //bi1 is sup
return bi1.current != bi1.end; //bi2 is not sup return bi1.current != bi1.end; //bi2 is not sup
...@@ -185,7 +191,7 @@ template<typename RandomAccessIterator, typename Comparator> ...@@ -185,7 +191,7 @@ template<typename RandomAccessIterator, typename Comparator>
{ {
private: private:
/** @brief Current iterator position. */ /** @brief Current iterator position. */
RandomAccessIterator& current; RandomAccessIterator current;
/** @brief Comparator. */ /** @brief Comparator. */
mutable Comparator& comp; mutable Comparator& comp;
...@@ -195,7 +201,7 @@ template<typename RandomAccessIterator, typename Comparator> ...@@ -195,7 +201,7 @@ template<typename RandomAccessIterator, typename Comparator>
* @param end Unused, only for compatibility. * @param end Unused, only for compatibility.
* @param comp Unused, only for compatibility. */ * @param comp Unused, only for compatibility. */
unguarded_iterator(RandomAccessIterator begin, unguarded_iterator(RandomAccessIterator begin,
RandomAccessIterator end, Comparator& comp) RandomAccessIterator end, Comparator& comp)
: current(begin), comp(comp) : current(begin), comp(comp)
{ } { }
...@@ -210,7 +216,7 @@ template<typename RandomAccessIterator, typename Comparator> ...@@ -210,7 +216,7 @@ template<typename RandomAccessIterator, typename Comparator>
/** @brief Dereference operator. /** @brief Dereference operator.
* @return Referenced element. */ * @return Referenced element. */
typename std::iterator_traits<RandomAccessIterator>::value_type typename std::iterator_traits<RandomAccessIterator>::value_type&
operator*() operator*()
{ return *current; } { return *current; }
...@@ -256,159 +262,41 @@ template<typename RandomAccessIterator, typename Comparator> ...@@ -256,159 +262,41 @@ template<typename RandomAccessIterator, typename Comparator>
return !(bi1.comp)(*bi2, *bi1); return !(bi1.comp)(*bi2, *bi1);
} }
/** Prepare a set of sequences to be merged without a (end) guard
* @param seqs_begin
* @param seqs_end
* @param comp
* @param min_sequence
* @param stable
* @pre (seqs_end - seqs_begin > 0) */
template<typename RandomAccessIteratorIterator, typename Comparator>
typename std::iterator_traits<
typename std::iterator_traits<RandomAccessIteratorIterator>::value_type
::first_type>::difference_type
prepare_unguarded(RandomAccessIteratorIterator seqs_begin,
RandomAccessIteratorIterator seqs_end, Comparator comp,
int& min_sequence, bool stable)
{
_GLIBCXX_CALL(seqs_end - seqs_begin)
typedef typename std::iterator_traits<RandomAccessIteratorIterator>
::value_type::first_type
RandomAccessIterator1;
typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
value_type;
typedef typename std::iterator_traits<RandomAccessIterator1>
::difference_type
difference_type;
if ((*seqs_begin).first == (*seqs_begin).second)
{
// Empty sequence found, it's the first one.
min_sequence = 0;
return -1;
}
// Last element in sequence.
value_type min = *((*seqs_begin).second - 1);
min_sequence = 0;
for (RandomAccessIteratorIterator s = seqs_begin + 1; s != seqs_end; ++s)
{
if ((*s).first == (*s).second)
{
// Empty sequence found.
min_sequence = static_cast<int>(s - seqs_begin);
return -1;
}
// Last element in sequence.
const value_type& v = *((*s).second - 1);
if (comp(v, min)) //strictly smaller
{
min = v;
min_sequence = static_cast<int>(s - seqs_begin);
}
}
difference_type overhang_size = 0;
int s = 0;
for (s = 0; s <= min_sequence; ++s)
{
RandomAccessIterator1 split;
if (stable)
split = std::upper_bound(seqs_begin[s].first, seqs_begin[s].second,
min, comp);
else
split = std::lower_bound(seqs_begin[s].first, seqs_begin[s].second,
min, comp);
overhang_size += seqs_begin[s].second - split;
}
for (; s < (seqs_end - seqs_begin); ++s)
{
RandomAccessIterator1 split = std::lower_bound(
seqs_begin[s].first, seqs_begin[s].second, min, comp);
overhang_size += seqs_begin[s].second - split;
}
// So many elements will be left over afterwards.
return overhang_size;
}
/** Prepare a set of sequences to be merged with a (end) guard (sentinel)
* @param seqs_begin
* @param seqs_end
* @param comp */
template<typename RandomAccessIteratorIterator, typename Comparator>
typename std::iterator_traits<typename std::iterator_traits<
RandomAccessIteratorIterator>::value_type::first_type>::difference_type
prepare_unguarded_sentinel(RandomAccessIteratorIterator seqs_begin,
RandomAccessIteratorIterator seqs_end,
Comparator comp)
{
_GLIBCXX_CALL(seqs_end - seqs_begin)
typedef typename std::iterator_traits<RandomAccessIteratorIterator>
::value_type::first_type
RandomAccessIterator1;
typedef typename std::iterator_traits<RandomAccessIterator1>
::value_type
value_type;
typedef typename std::iterator_traits<RandomAccessIterator1>
::difference_type
difference_type;
// Last element in sequence.
value_type* max = NULL;
for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
{
if ((*s).first == (*s).second)
continue;
// Last element in sequence.
value_type& v = *((*s).second - 1);
// Strictly greater.
if (!max || comp(*max, v))
max = &v;
}
difference_type overhang_size = 0;
for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
{
RandomAccessIterator1 split =
std::lower_bound((*s).first, (*s).second, *max, comp);
overhang_size += (*s).second - split;
// Set sentinel.
*((*s).second) = *max;
}
// So many elements will be left over afterwards.
return overhang_size;
}
/** @brief Highly efficient 3-way merging procedure. /** @brief Highly efficient 3-way merging procedure.
* @param seqs_begin Begin iterator of iterator pair input sequence. *
* @param seqs_end End iterator of iterator pair input sequence. * Merging is done with the algorithm implementation described by Peter
* @param target Begin iterator out output sequence. * Sanders. Basically, the idea is to minimize the number of necessary
* @param comp Comparator. * comparison after merging out an element. The implementation trick
* @param length Maximum length to merge. * that makes this fast is that the order of the sequences is stored
* @param stable Unused, stable anyway. * in the instruction pointer (translated into labels in C++).
* @return End iterator of output sequence. */ *
* This works well for merging up to 4 sequences.
*
* Note that making the merging stable does <em>not</em> come at a
* performance hit.
*
* Whether the merging is done guarded or unguarded is selected by the
* used iterator class.
*
* @param seqs_begin Begin iterator of iterator pair input sequence.
* @param seqs_end End iterator of iterator pair input sequence.
* @param target Begin iterator out output sequence.
* @param comp Comparator.
* @param length Maximum length to merge.
*
* @return End iterator of output sequence.
*/
template<template<typename RAI, typename C> class iterator, template<template<typename RAI, typename C> class iterator,
typename RandomAccessIteratorIterator, typename RandomAccessIteratorIterator,
typename RandomAccessIterator3, typename RandomAccessIterator3,
typename _DifferenceTp, typename _DifferenceTp,
typename Comparator> typename Comparator>
RandomAccessIterator3 RandomAccessIterator3
multiway_merge_3_variant(RandomAccessIteratorIterator seqs_begin, multiway_merge_3_variant(
RandomAccessIteratorIterator seqs_end, RandomAccessIteratorIterator seqs_begin,
RandomAccessIterator3 target, RandomAccessIteratorIterator seqs_end,
Comparator comp, _DifferenceTp length, RandomAccessIterator3 target,
bool stable) Comparator comp, _DifferenceTp length)
{ {
_GLIBCXX_CALL(length); _GLIBCXX_CALL(length);
...@@ -423,6 +311,10 @@ template<template<typename RAI, typename C> class iterator, ...@@ -423,6 +311,10 @@ template<template<typename RAI, typename C> class iterator,
if (length == 0) if (length == 0)
return target; return target;
#if _GLIBCXX_ASSERTIONS
_DifferenceTp orig_length = length;
#endif
iterator<RandomAccessIterator1, Comparator> iterator<RandomAccessIterator1, Comparator>
seq0(seqs_begin[0].first, seqs_begin[0].second, comp), seq0(seqs_begin[0].first, seqs_begin[0].second, comp),
seq1(seqs_begin[1].first, seqs_begin[1].second, comp), seq1(seqs_begin[1].first, seqs_begin[1].second, comp),
...@@ -450,17 +342,16 @@ template<template<typename RAI, typename C> class iterator, ...@@ -450,17 +342,16 @@ template<template<typename RAI, typename C> class iterator,
else else
goto s210; goto s210;
} }
#define _GLIBCXX_PARALLEL_MERGE_3_CASE(a,b,c,c0,c1) \
#define _GLIBCXX_PARALLEL_MERGE_3_CASE(a,b,c,c0,c1)\
s ## a ## b ## c : \ s ## a ## b ## c : \
*target = *seq ## a; \ *target = *seq ## a; \
++target; \ ++target; \
--length; \ --length; \
++seq ## a; \ ++seq ## a; \
if (length == 0) goto finish; \ if (length == 0) goto finish; \
if (seq ## a c0 seq ## b) goto s ## a ## b ## c; \ if (seq ## a c0 seq ## b) goto s ## a ## b ## c; \
if (seq ## a c1 seq ## c) goto s ## b ## a ## c; \ if (seq ## a c1 seq ## c) goto s ## b ## a ## c; \
goto s ## b ## c ## a; goto s ## b ## c ## a;
_GLIBCXX_PARALLEL_MERGE_3_CASE(0, 1, 2, <=, <=); _GLIBCXX_PARALLEL_MERGE_3_CASE(0, 1, 2, <=, <=);
_GLIBCXX_PARALLEL_MERGE_3_CASE(1, 2, 0, <=, < ); _GLIBCXX_PARALLEL_MERGE_3_CASE(1, 2, 0, <=, < );
...@@ -474,6 +365,14 @@ template<template<typename RAI, typename C> class iterator, ...@@ -474,6 +365,14 @@ template<template<typename RAI, typename C> class iterator,
finish: finish:
; ;
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(
((RandomAccessIterator1)seq0 - seqs_begin[0].first) +
((RandomAccessIterator1)seq1 - seqs_begin[1].first) +
((RandomAccessIterator1)seq2 - seqs_begin[2].first)
== orig_length);
#endif
seqs_begin[0].first = seq0; seqs_begin[0].first = seq0;
seqs_begin[1].first = seq1; seqs_begin[1].first = seq1;
seqs_begin[2].first = seq2; seqs_begin[2].first = seq2;
...@@ -481,95 +380,31 @@ template<template<typename RAI, typename C> class iterator, ...@@ -481,95 +380,31 @@ template<template<typename RAI, typename C> class iterator,
return target; return target;
} }
template<typename RandomAccessIteratorIterator, /**
typename RandomAccessIterator3, * @brief Highly efficient 4-way merging procedure.
typename _DifferenceTp, *
typename Comparator> * Merging is done with the algorithm implementation described by Peter
RandomAccessIterator3 * Sanders. Basically, the idea is to minimize the number of necessary
multiway_merge_3_combined(RandomAccessIteratorIterator seqs_begin, * comparison after merging out an element. The implementation trick
RandomAccessIteratorIterator seqs_end, * that makes this fast is that the order of the sequences is stored
RandomAccessIterator3 target, * in the instruction pointer (translated into goto labels in C++).
Comparator comp, *
_DifferenceTp length, bool stable) * This works well for merging up to 4 sequences.
{ *
_GLIBCXX_CALL(length); * Note that making the merging stable does <em>not</em> come at a
* performance hit.
typedef _DifferenceTp difference_type; *
typedef typename std::iterator_traits<RandomAccessIteratorIterator> * Whether the merging is done guarded or unguarded is selected by the
::value_type::first_type * used iterator class.
RandomAccessIterator1; *
typedef typename std::iterator_traits<RandomAccessIterator1>::value_type * @param seqs_begin Begin iterator of iterator pair input sequence.
value_type; * @param seqs_end End iterator of iterator pair input sequence.
* @param target Begin iterator out output sequence.
int min_seq; * @param comp Comparator.
RandomAccessIterator3 target_end; * @param length Maximum length to merge.
*
// Stable anyway. * @return End iterator of output sequence.
difference_type overhang = */
prepare_unguarded(seqs_begin, seqs_end, comp, min_seq, true);
difference_type total_length = 0;
for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
total_length += _GLIBCXX_PARALLEL_LENGTH(*s);
if (overhang != -1)
{
difference_type unguarded_length =
std::min(length, total_length - overhang);
target_end = multiway_merge_3_variant<unguarded_iterator>
(seqs_begin, seqs_end, target, comp, unguarded_length, stable);
overhang = length - unguarded_length;
}
else
{
// Empty sequence found.
overhang = length;
target_end = target;
}
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
_GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
#endif
switch (min_seq)
{
case 0:
// Iterators will be advanced accordingly.
target_end = merge_advance(seqs_begin[1].first, seqs_begin[1].second,
seqs_begin[2].first, seqs_begin[2].second,
target_end, overhang, comp);
break;
case 1:
target_end = merge_advance(seqs_begin[0].first, seqs_begin[0].second,
seqs_begin[2].first, seqs_begin[2].second,
target_end, overhang, comp);
break;
case 2:
target_end = merge_advance(seqs_begin[0].first, seqs_begin[0].second,
seqs_begin[1].first, seqs_begin[1].second,
target_end, overhang, comp);
break;
default:
_GLIBCXX_PARALLEL_ASSERT(false);
}
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
_GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
#endif
return target_end;
}
/** @brief Highly efficient 4-way merging procedure.
* @param seqs_begin Begin iterator of iterator pair input sequence.
* @param seqs_end End iterator of iterator pair input sequence.
* @param target Begin iterator out output sequence.
* @param comp Comparator.
* @param length Maximum length to merge.
* @param stable Unused, stable anyway.
* @return End iterator of output sequence. */
template<template<typename RAI, typename C> class iterator, template<template<typename RAI, typename C> class iterator,
typename RandomAccessIteratorIterator, typename RandomAccessIteratorIterator,
typename RandomAccessIterator3, typename RandomAccessIterator3,
...@@ -579,7 +414,7 @@ template<template<typename RAI, typename C> class iterator, ...@@ -579,7 +414,7 @@ template<template<typename RAI, typename C> class iterator,
multiway_merge_4_variant(RandomAccessIteratorIterator seqs_begin, multiway_merge_4_variant(RandomAccessIteratorIterator seqs_begin,
RandomAccessIteratorIterator seqs_end, RandomAccessIteratorIterator seqs_end,
RandomAccessIterator3 target, RandomAccessIterator3 target,
Comparator comp, _DifferenceTp length, bool stable) Comparator comp, _DifferenceTp length)
{ {
_GLIBCXX_CALL(length); _GLIBCXX_CALL(length);
typedef _DifferenceTp difference_type; typedef _DifferenceTp difference_type;
...@@ -676,651 +511,467 @@ template<template<typename RAI, typename C> class iterator, ...@@ -676,651 +511,467 @@ template<template<typename RAI, typename C> class iterator,
return target; return target;
} }
template<typename RandomAccessIteratorIterator, /** @brief Multi-way merging procedure for a high branching factor,
* guarded case.
*
* This merging variant uses a LoserTree class as selected by <tt>LT</tt>.
*
* Stability is selected through the used LoserTree class <tt>LT</tt>.
*
* @param seqs_begin Begin iterator of iterator pair input sequence.
* @param seqs_end End iterator of iterator pair input sequence.
* @param target Begin iterator out output sequence.
* @param comp Comparator.
* @param length Maximum length to merge.
*
* @return End iterator of output sequence.
*/
template<typename LT,
typename RandomAccessIteratorIterator,
typename RandomAccessIterator3, typename RandomAccessIterator3,
typename _DifferenceTp, typename _DifferenceTp,
typename Comparator> typename Comparator>
RandomAccessIterator3 RandomAccessIterator3
multiway_merge_4_combined(RandomAccessIteratorIterator seqs_begin, multiway_merge_loser_tree(RandomAccessIteratorIterator seqs_begin,
RandomAccessIteratorIterator seqs_end, RandomAccessIteratorIterator seqs_end,
RandomAccessIterator3 target, RandomAccessIterator3 target,
Comparator comp, Comparator comp,
_DifferenceTp length, bool stable) _DifferenceTp length)
{ {
_GLIBCXX_CALL(length); _GLIBCXX_CALL(length)
typedef _DifferenceTp difference_type;
typedef _DifferenceTp difference_type;
typedef typename std::iterator_traits<RandomAccessIteratorIterator> typedef typename std::iterator_traits<RandomAccessIteratorIterator>
::value_type::first_type ::value_type::first_type
RandomAccessIterator1; RandomAccessIterator1;
typedef typename std::iterator_traits<RandomAccessIterator1>::value_type typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
value_type; value_type;
int min_seq; int k = static_cast<int>(seqs_end - seqs_begin);
RandomAccessIterator3 target_end;
// Stable anyway. LT lt(k, comp);
difference_type overhang =
prepare_unguarded(seqs_begin, seqs_end, comp, min_seq, true);
difference_type total_length = 0; difference_type total_length = 0;
for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
total_length += _GLIBCXX_PARALLEL_LENGTH(*s);
if (overhang != -1) // Default value for potentially non-default-constructible types.
value_type* arbitrary_element = NULL;
for (int t = 0; t < k; ++t)
{ {
difference_type unguarded_length = if(arbitrary_element == NULL
std::min(length, total_length - overhang); && _GLIBCXX_PARALLEL_LENGTH(seqs_begin[t]) > 0)
target_end = multiway_merge_4_variant<unguarded_iterator> arbitrary_element = &(*seqs_begin[t].first);
(seqs_begin, seqs_end, target, comp, unguarded_length, stable); total_length += _GLIBCXX_PARALLEL_LENGTH(seqs_begin[t]);
overhang = length - unguarded_length;
} }
else
if(total_length == 0)
return target;
for (int t = 0; t < k; ++t)
{ {
// Empty sequence found. if (seqs_begin[t].first == seqs_begin[t].second)
overhang = length; lt.insert_start(*arbitrary_element, t, true);
target_end = target; else
lt.insert_start(*seqs_begin[t].first, t, false);
} }
#if _GLIBCXX_ASSERTIONS lt.init();
_GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
_GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
#endif
std::vector<std::pair<RandomAccessIterator1, RandomAccessIterator1> > const difference_type const_total_length(std::min(total_length, length));
one_missing(seqs_begin, seqs_end);
one_missing.erase(one_missing.begin() + min_seq); //remove
target_end = multiway_merge_3_variant<guarded_iterator>( int source;
one_missing.begin(), one_missing.end(),
target_end, comp, overhang, stable);
// Insert back again. for (difference_type i = 0; i < const_total_length; ++i)
one_missing.insert(one_missing.begin() + min_seq, seqs_begin[min_seq]); {
// Write back modified iterators. //take out
copy(one_missing.begin(), one_missing.end(), seqs_begin); source = lt.get_min_source();
#if _GLIBCXX_ASSERTIONS *(target++) = *(seqs_begin[source].first++);
_GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
_GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
#endif
return target_end; // Feed.
if (seqs_begin[source].first == seqs_begin[source].second)
lt.delete_min_insert(*arbitrary_element, true);
else
// Replace from same source.
lt.delete_min_insert(*seqs_begin[source].first, false);
}
return target;
} }
/** @brief Basic multi-way merging procedure. /** @brief Multi-way merging procedure for a high branching factor,
* unguarded case.
* *
* The head elements are kept in a sorted array, new heads are * Merging is done using the LoserTree class <tt>LT</tt>.
* inserted linearly. *
* @param seqs_begin Begin iterator of iterator pair input sequence. * Stability is selected by the used LoserTrees.
* @param seqs_end End iterator of iterator pair input sequence. *
* @param target Begin iterator out output sequence. * @pre No input will run out of elements during the merge.
* @param comp Comparator. *
* @param length Maximum length to merge. * @param seqs_begin Begin iterator of iterator pair input sequence.
* @param stable Stable merging incurs a performance penalty. * @param seqs_end End iterator of iterator pair input sequence.
* @return End iterator of output sequence. * @param target Begin iterator out output sequence.
* @param comp Comparator.
* @param length Maximum length to merge.
*
* @return End iterator of output sequence.
*/ */
template<typename RandomAccessIteratorIterator, template<typename LT,
typename RandomAccessIteratorIterator,
typename RandomAccessIterator3, typename RandomAccessIterator3,
typename _DifferenceTp, typename _DifferenceTp, typename Comparator>
typename Comparator>
RandomAccessIterator3 RandomAccessIterator3
multiway_merge_bubble(RandomAccessIteratorIterator seqs_begin, multiway_merge_loser_tree_unguarded(RandomAccessIteratorIterator seqs_begin,
RandomAccessIteratorIterator seqs_end, RandomAccessIteratorIterator seqs_end,
RandomAccessIterator3 target, RandomAccessIterator3 target,
Comparator comp, _DifferenceTp length, bool stable) int min_seq, Comparator comp,
_DifferenceTp length)
{ {
_GLIBCXX_CALL(length) _GLIBCXX_CALL(length)
typedef _DifferenceTp difference_type; typedef _DifferenceTp difference_type;
typedef typename std::iterator_traits<RandomAccessIteratorIterator> typedef typename std::iterator_traits<RandomAccessIteratorIterator>
::value_type::first_type ::value_type::first_type
RandomAccessIterator1; RandomAccessIterator1;
typedef typename std::iterator_traits<RandomAccessIterator1>::value_type typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
value_type; value_type;
int k = static_cast<int>(seqs_end - seqs_begin); int k = seqs_end - seqs_begin;
int nrs; // Number of remaining sequences.
// Determine the sentinel. The sentinel is largest/last element of the
// sequences with the smallest largest/last element.
value_type sentinel = *(seqs_begin[min_seq].second - 1);
LT lt(k, sentinel, comp);
// Avoid default constructor.
value_type* fe = static_cast<value_type*>(
::operator new(sizeof(value_type) * k)); // Front elements.
int* source = new int[k];
difference_type total_length = 0; difference_type total_length = 0;
// Write entries into queue. for (int t = 0; t < k; ++t)
nrs = 0;
for (int pi = 0; pi < k; ++pi)
{ {
if (seqs_begin[pi].first != seqs_begin[pi].second) #if _GLIBCXX_ASSERTIONS
{ _GLIBCXX_PARALLEL_ASSERT(seqs_begin[t].first != seqs_begin[t].second);
::new(&(fe[nrs])) value_type(*(seqs_begin[pi].first)); #endif
source[nrs] = pi; lt.insert_start(*seqs_begin[t].first, t, false);
++nrs;
total_length += _GLIBCXX_PARALLEL_LENGTH(seqs_begin[pi]);
}
}
if (stable) total_length += _GLIBCXX_PARALLEL_LENGTH(seqs_begin[t]);
{
// Bubble sort fe and source by fe.
for (int k = 0; k < nrs - 1; ++k)
for (int pi = nrs - 1; pi > k; --pi)
if (comp(fe[pi], fe[pi - 1]) ||
(!comp(fe[pi - 1], fe[pi]) && source[pi] < source[pi - 1]))
{
std::swap(fe[pi - 1], fe[pi]);
std::swap(source[pi - 1], source[pi]);
}
}
else
{
for (int k = 0; k < nrs - 1; ++k)
for (int pi = nrs - 1; pi > k; --pi)
if (comp(fe[pi], fe[pi-1]))
{
std::swap(fe[pi-1], fe[pi]);
std::swap(source[pi-1], source[pi]);
}
} }
// Iterate. lt.init();
if (stable)
{ // Do not go past end.
int j; length = std::min(total_length, length);
while (nrs > 0 && length > 0)
{ int source;
if (source[0] < source[1])
{ #if _GLIBCXX_ASSERTIONS
// fe[0] <= fe[1] difference_type i = 0;
while ((nrs == 1 || !comp(fe[1], fe[0])) && length > 0) #endif
{
*target = fe[0]; RandomAccessIterator3 target_end = target + length;
++target; while (target < target_end)
++(seqs_begin[source[0]].first);
--length;
if (seqs_begin[source[0]].first
== seqs_begin[source[0]].second)
{
// Move everything to the left.
for (int s = 0; s < nrs - 1; ++s)
{
fe[s] = fe[s + 1];
source[s] = source[s + 1];
}
fe[nrs - 1].~value_type(); //Destruct explicitly.
--nrs;
break;
}
else
fe[0] = *(seqs_begin[source[0]].first);
}
}
else
{
// fe[0] < fe[1]
while ((nrs == 1 || comp(fe[0], fe[1])) && length > 0)
{
*target = fe[0];
++target;
++(seqs_begin[source[0]].first);
--length;
if (seqs_begin[source[0]].first
== seqs_begin[source[0]].second)
{
for (int s = 0; s < nrs - 1; ++s)
{
fe[s] = fe[s + 1];
source[s] = source[s + 1];
}
fe[nrs - 1].~value_type(); //Destruct explicitly.
--nrs;
break;
}
else
fe[0] = *(seqs_begin[source[0]].first);
}
}
// Sink down.
j = 1;
while ((j < nrs) && (comp(fe[j], fe[j - 1])
|| (!comp(fe[j - 1], fe[j])
&& (source[j] < source[j - 1]))))
{
std::swap(fe[j - 1], fe[j]);
std::swap(source[j - 1], source[j]);
++j;
}
}
}
else
{ {
int j; // Take out.
while (nrs > 0 && length > 0) source = lt.get_min_source();
{
// fe[0] <= fe[1]
while (nrs == 1 || (!comp(fe[1], fe[0])) && length > 0)
{
*target = fe[0];
++target;
++seqs_begin[source[0]].first;
--length;
if (seqs_begin[source[0]].first
== seqs_begin[source[0]].second)
{
for (int s = 0; s < (nrs - 1); ++s)
{
fe[s] = fe[s + 1];
source[s] = source[s + 1];
}
fe[nrs - 1].~value_type(); //Destruct explicitly.
--nrs;
break;
}
else
fe[0] = *(seqs_begin[source[0]].first);
}
// Sink down.
j = 1;
while ((j < nrs) && comp(fe[j], fe[j - 1]))
{
std::swap(fe[j - 1], fe[j]);
std::swap(source[j - 1], source[j]);
++j;
}
}
}
::operator delete(fe); //Destructors already called. #if _GLIBCXX_ASSERTIONS
delete[] source; _GLIBCXX_PARALLEL_ASSERT(0 <= source && source < k);
_GLIBCXX_PARALLEL_ASSERT(i == 0
|| !comp(*(seqs_begin[source].first), *(target - 1)));
#endif
return target; // Feed.
} *(target++) = *(seqs_begin[source].first++);
/** @brief Multi-way merging procedure for a high branching factor, #if _GLIBCXX_ASSERTIONS
* guarded case. _GLIBCXX_PARALLEL_ASSERT(
* (seqs_begin[source].first != seqs_begin[source].second)
* The head elements are kept in a loser tree. || (i >= length - 1));
* @param seqs_begin Begin iterator of iterator pair input sequence. ++i;
* @param seqs_end End iterator of iterator pair input sequence. #endif
* @param target Begin iterator out output sequence. // Replace from same source.
* @param comp Comparator. lt.delete_min_insert(*seqs_begin[source].first, false);
* @param length Maximum length to merge.
* @param stable Stable merging incurs a performance penalty.
* @return End iterator of output sequence.
*/
template<typename LT,
typename RandomAccessIteratorIterator,
typename RandomAccessIterator3,
typename _DifferenceTp,
typename Comparator>
RandomAccessIterator3
multiway_merge_loser_tree(RandomAccessIteratorIterator seqs_begin,
RandomAccessIteratorIterator seqs_end,
RandomAccessIterator3 target,
Comparator comp,
_DifferenceTp length, bool stable)
{
_GLIBCXX_CALL(length)
typedef _DifferenceTp difference_type;
typedef typename std::iterator_traits<RandomAccessIteratorIterator>
::value_type::first_type
RandomAccessIterator1;
typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
value_type;
int k = static_cast<int>(seqs_end - seqs_begin);
LT lt(k, comp);
difference_type total_length = 0;
// Default value for potentially non-default-constructible types.
value_type* arbitrary_element = NULL;
for (int t = 0; t < k; ++t)
{
if(arbitrary_element == NULL
&& _GLIBCXX_PARALLEL_LENGTH(seqs_begin[t]) > 0)
arbitrary_element = &(*seqs_begin[t].first);
total_length += _GLIBCXX_PARALLEL_LENGTH(seqs_begin[t]);
}
if(total_length == 0)
return target;
for (int t = 0; t < k; ++t)
{
if (stable)
{
if (seqs_begin[t].first == seqs_begin[t].second)
lt.insert_start_stable(*arbitrary_element, t, true);
else
lt.insert_start_stable(*seqs_begin[t].first, t, false);
}
else
{
if (seqs_begin[t].first == seqs_begin[t].second)
lt.insert_start(*arbitrary_element, t, true);
else
lt.insert_start(*seqs_begin[t].first, t, false);
}
}
if (stable)
lt.init_stable();
else
lt.init();
total_length = std::min(total_length, length);
int source;
if (stable)
{
for (difference_type i = 0; i < total_length; ++i)
{
// Take out.
source = lt.get_min_source();
*(target++) = *(seqs_begin[source].first++);
// Feed.
if (seqs_begin[source].first == seqs_begin[source].second)
lt.delete_min_insert_stable(*arbitrary_element, true);
else
// Replace from same source.
lt.delete_min_insert_stable(*seqs_begin[source].first, false);
}
}
else
{
for (difference_type i = 0; i < total_length; ++i)
{
//take out
source = lt.get_min_source();
*(target++) = *(seqs_begin[source].first++);
// Feed.
if (seqs_begin[source].first == seqs_begin[source].second)
lt.delete_min_insert(*arbitrary_element, true);
else
// Replace from same source.
lt.delete_min_insert(*seqs_begin[source].first, false);
}
} }
return target; return target;
} }
/** @brief Multi-way merging procedure for a high branching factor, /** @brief Multi-way merging procedure for a high branching factor,
* unguarded case. * requiring sentinels to exist.
* @param stable The value must the same as for the used LoserTrees.
* @param UnguardedLoserTree Loser Tree variant to use for the unguarded
* merging.
* @param GuardedLoserTree Loser Tree variant to use for the guarded
* merging.
* *
* The head elements are kept in a loser tree. * @param seqs_begin Begin iterator of iterator pair input sequence.
* @param seqs_begin Begin iterator of iterator pair input sequence. * @param seqs_end End iterator of iterator pair input sequence.
* @param seqs_end End iterator of iterator pair input sequence. * @param target Begin iterator out output sequence.
* @param target Begin iterator out output sequence. * @param comp Comparator.
* @param comp Comparator. * @param length Maximum length to merge.
* @param length Maximum length to merge. *
* @param stable Stable merging incurs a performance penalty. * @return End iterator of output sequence.
* @return End iterator of output sequence.
* @pre No input will run out of elements during the merge.
*/ */
template<typename LT, template<
typename RandomAccessIteratorIterator, typename UnguardedLoserTree,
typename RandomAccessIterator3, typename RandomAccessIteratorIterator,
typename _DifferenceTp, typename Comparator> typename RandomAccessIterator3,
typename _DifferenceTp,
typename Comparator>
RandomAccessIterator3 RandomAccessIterator3
multiway_merge_loser_tree_unguarded(RandomAccessIteratorIterator seqs_begin, multiway_merge_loser_tree_sentinel(RandomAccessIteratorIterator seqs_begin,
RandomAccessIteratorIterator seqs_end, RandomAccessIteratorIterator seqs_end,
RandomAccessIterator3 target, RandomAccessIterator3 target,
Comparator comp, Comparator comp,
_DifferenceTp length, bool stable) _DifferenceTp length)
{ {
_GLIBCXX_CALL(length) _GLIBCXX_CALL(length)
typedef _DifferenceTp difference_type;
typedef _DifferenceTp difference_type;
typedef std::iterator_traits<RandomAccessIteratorIterator> traits_type;
typedef typename std::iterator_traits<RandomAccessIteratorIterator> typedef typename std::iterator_traits<RandomAccessIteratorIterator>
::value_type::first_type ::value_type::first_type
RandomAccessIterator1; RandomAccessIterator1;
typedef typename std::iterator_traits<RandomAccessIterator1>::value_type typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
value_type; value_type;
int k = seqs_end - seqs_begin; RandomAccessIterator3 target_end;
LT lt(k, comp);
difference_type total_length = 0; difference_type total_length = 0;
for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
for (int t = 0; t < k; ++t)
{ {
#if _GLIBCXX_ASSERTIONS total_length += _GLIBCXX_PARALLEL_LENGTH(*s);
_GLIBCXX_PARALLEL_ASSERT(seqs_begin[t].first != seqs_begin[t].second);
#endif
if (stable)
lt.insert_start_stable(*seqs_begin[t].first, t, false);
else
lt.insert_start(*seqs_begin[t].first, t, false);
total_length += _GLIBCXX_PARALLEL_LENGTH(seqs_begin[t]); // Move the sequends end behind the sentinel spots. This has the
// effect that the sentinel appears to be within the sequence. Then,
// we can use the unguarded variant if we merge out as many
// non-sentinel elements as we have.
++((*s).second);
} }
if (stable) difference_type unguarded_length =
lt.init_stable(); std::min(length, total_length);
else target_end = multiway_merge_loser_tree_unguarded
lt.init(); <UnguardedLoserTree>
(seqs_begin, seqs_end, target, 0, comp, unguarded_length);
// Do not go past end.
length = std::min(total_length, length);
int source;
#if _GLIBCXX_ASSERTIONS
difference_type i = 0;
#endif
if (stable)
{
RandomAccessIterator3 target_end = target + length;
while (target < target_end)
{
// Take out.
source = lt.get_min_source();
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(i == 0
|| !comp(*(seqs_begin[source].first), *(target - 1)));
#endif
*(target++) = *(seqs_begin[source].first++);
#if _GLIBCXX_ASSERTIONS #if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT( _GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
(seqs_begin[source].first != seqs_begin[source].second) _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
|| (i == length - 1));
++i;
#endif #endif
// Feed.
// Replace from same source.
lt.delete_min_insert_stable(*seqs_begin[source].first, false);
} // Restore the sequence ends so the sentinels are not contained in the
} // sequence any more (see comment in loop above).
else for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
{ { --((*s).second); }
RandomAccessIterator3 target_end = target + length;
while (target < target_end)
{
// Take out.
source = lt.get_min_source();
#if _GLIBCXX_ASSERTIONS return target_end;
if (i > 0 && comp(*(seqs_begin[source].first), *(target - 1))) }
printf(" %i %i %i\n", length, i, source);
_GLIBCXX_PARALLEL_ASSERT(i == 0
|| !comp(*(seqs_begin[source].first), *(target - 1)));
#endif
*(target++) = *(seqs_begin[source].first++); /**
* @brief Traits for determining whether the loser tree should
* use pointers or copies.
*
* The field "use_pointer" is used to determine whether to use pointers in
* the loser trees or whether to copy the values into the loser tree.
*
* The default behavior is to use pointers if the data type is 4 times as
* big as the pointer to it.
*
* Specialize for your data type to customize the behavior.
*
* Example:
*
* template<>
* struct loser_tree_traits<int>
* { static const bool use_pointer = false; };
*
* template<>
* struct loser_tree_traits<heavyweight_type>
* { static const bool use_pointer = true; };
*
* @param T type to give the loser tree traits for.
*/
template <typename T>
struct loser_tree_traits
{
/**
* @brief True iff to use pointers instead of values in loser trees.
*
* The default behavior is to use pointers if the data type is four
* times as big as the pointer to it.
*/
static const bool use_pointer = (sizeof(T) > 4 * sizeof(T*));
};
#if _GLIBCXX_ASSERTIONS /**
if (!((seqs_begin[source].first != seqs_begin[source].second) * @brief Switch for 3-way merging with sentinels turned off.
|| (i >= length - 1))) *
printf(" %i %i %i\n", length, i, source); * Note that 3-way merging is always stable!
_GLIBCXX_PARALLEL_ASSERT( */
(seqs_begin[source].first != seqs_begin[source].second) template<
|| (i >= length - 1)); bool sentinels /*default == false*/,
++i; typename RandomAccessIteratorIterator,
#endif typename RandomAccessIterator3,
// Feed. typename _DifferenceTp,
// Replace from same source. typename Comparator>
lt.delete_min_insert(*seqs_begin[source].first, false); struct multiway_merge_3_variant_sentinel_switch
} {
} RandomAccessIterator3 operator()(
RandomAccessIteratorIterator seqs_begin,
RandomAccessIteratorIterator seqs_end,
RandomAccessIterator3 target,
Comparator comp, _DifferenceTp length)
{
return multiway_merge_3_variant<guarded_iterator>(
seqs_begin, seqs_end, target, comp, length);
}
};
return target; /**
* @brief Switch for 3-way merging with sentinels turned on.
*
* Note that 3-way merging is always stable!
*/
template<
typename RandomAccessIteratorIterator,
typename RandomAccessIterator3,
typename _DifferenceTp,
typename Comparator>
struct multiway_merge_3_variant_sentinel_switch
<true, RandomAccessIteratorIterator, RandomAccessIterator3,
_DifferenceTp, Comparator>
{
RandomAccessIterator3 operator()(
RandomAccessIteratorIterator seqs_begin,
RandomAccessIteratorIterator seqs_end,
RandomAccessIterator3 target,
Comparator comp, _DifferenceTp length)
{
return multiway_merge_3_variant<unguarded_iterator>(
seqs_begin, seqs_end, target, comp, length);
} }
};
template<typename RandomAccessIteratorIterator, /**
typename RandomAccessIterator3, * @brief Switch for 4-way merging with sentinels turned off.
typename _DifferenceTp, *
typename Comparator> * Note that 4-way merging is always stable!
RandomAccessIterator3 */
multiway_merge_loser_tree_combined(RandomAccessIteratorIterator seqs_begin, template<
RandomAccessIteratorIterator seqs_end, bool sentinels /*default == false*/,
RandomAccessIterator3 target, typename RandomAccessIteratorIterator,
Comparator comp, typename RandomAccessIterator3,
_DifferenceTp length, bool stable) typename _DifferenceTp,
typename Comparator>
struct multiway_merge_4_variant_sentinel_switch
{
RandomAccessIterator3 operator()(
RandomAccessIteratorIterator seqs_begin,
RandomAccessIteratorIterator seqs_end,
RandomAccessIterator3 target,
Comparator comp, _DifferenceTp length)
{ {
_GLIBCXX_CALL(length) return multiway_merge_4_variant<guarded_iterator>(
seqs_begin, seqs_end, target, comp, length);
}
};
typedef _DifferenceTp difference_type; /**
* @brief Switch for 4-way merging with sentinels turned on.
*
* Note that 4-way merging is always stable!
*/
template<
typename RandomAccessIteratorIterator,
typename RandomAccessIterator3,
typename _DifferenceTp,
typename Comparator>
struct multiway_merge_4_variant_sentinel_switch
<true, RandomAccessIteratorIterator, RandomAccessIterator3,
_DifferenceTp, Comparator>
{
RandomAccessIterator3 operator()(
RandomAccessIteratorIterator seqs_begin,
RandomAccessIteratorIterator seqs_end,
RandomAccessIterator3 target,
Comparator comp, _DifferenceTp length)
{
return multiway_merge_4_variant<unguarded_iterator>(
seqs_begin, seqs_end, target, comp, length);
}
};
/**
* @brief Switch for k-way merging with sentinels turned on.
*/
template<
bool sentinels,
bool stable,
typename RandomAccessIteratorIterator,
typename RandomAccessIterator3,
typename _DifferenceTp,
typename Comparator>
struct multiway_merge_k_variant_sentinel_switch
{
RandomAccessIterator3 operator()(
RandomAccessIteratorIterator seqs_begin,
RandomAccessIteratorIterator seqs_end,
RandomAccessIterator3 target,
Comparator comp, _DifferenceTp length)
{
typedef typename std::iterator_traits<RandomAccessIteratorIterator> typedef typename std::iterator_traits<RandomAccessIteratorIterator>
::value_type::first_type ::value_type::first_type
RandomAccessIterator1; RandomAccessIterator1;
typedef typename std::iterator_traits<RandomAccessIterator1>::value_type typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
value_type; value_type;
int min_seq; return multiway_merge_loser_tree_sentinel<
RandomAccessIterator3 target_end; typename __gnu_cxx::__conditional_type<
difference_type overhang = prepare_unguarded(seqs_begin, seqs_end, loser_tree_traits<value_type>::use_pointer
comp, min_seq, stable); , LoserTreePointerUnguarded<stable, value_type, Comparator>
, LoserTreeUnguarded<stable, value_type, Comparator>
difference_type total_length = 0; >::__type>(seqs_begin, seqs_end, target, comp, length);
for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
total_length += _GLIBCXX_PARALLEL_LENGTH(*s);
if (overhang != -1)
{
difference_type unguarded_length =
std::min(length, total_length - overhang);
target_end = multiway_merge_loser_tree_unguarded
<typename loser_tree_unguarded_traits<value_type, Comparator>::LT>
(seqs_begin, seqs_end, target, comp, unguarded_length, stable);
overhang = length - unguarded_length;
}
else
{
// Empty sequence found.
overhang = length;
target_end = target;
}
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
_GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
#endif
target_end = multiway_merge_loser_tree
<typename loser_tree_traits<value_type, Comparator>::LT>
(seqs_begin, seqs_end, target_end, comp, overhang, stable);
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
_GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
#endif
return target_end;
} }
};
template<typename RandomAccessIteratorIterator, /**
typename RandomAccessIterator3, * @brief Switch for k-way merging with sentinels turned off.
typename _DifferenceTp, */
typename Comparator> template<
RandomAccessIterator3 bool stable,
multiway_merge_loser_tree_sentinel(RandomAccessIteratorIterator seqs_begin, typename RandomAccessIteratorIterator,
RandomAccessIteratorIterator seqs_end, typename RandomAccessIterator3,
RandomAccessIterator3 target, typename _DifferenceTp,
Comparator comp, typename Comparator>
_DifferenceTp length, bool stable) struct multiway_merge_k_variant_sentinel_switch
<false, stable, RandomAccessIteratorIterator, RandomAccessIterator3,
_DifferenceTp, Comparator>
{
RandomAccessIterator3 operator()(
RandomAccessIteratorIterator seqs_begin,
RandomAccessIteratorIterator seqs_end,
RandomAccessIterator3 target,
Comparator comp, _DifferenceTp length)
{ {
_GLIBCXX_CALL(length)
typedef _DifferenceTp difference_type;
typedef std::iterator_traits<RandomAccessIteratorIterator> traits_type;
typedef typename std::iterator_traits<RandomAccessIteratorIterator> typedef typename std::iterator_traits<RandomAccessIteratorIterator>
::value_type::first_type ::value_type::first_type
RandomAccessIterator1; RandomAccessIterator1;
typedef typename std::iterator_traits<RandomAccessIterator1>::value_type typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
value_type; value_type;
RandomAccessIterator3 target_end; return multiway_merge_loser_tree<
difference_type overhang = typename __gnu_cxx::__conditional_type<
prepare_unguarded_sentinel(seqs_begin, seqs_end, comp); loser_tree_traits<value_type>::use_pointer
, LoserTreePointer<stable, value_type, Comparator>
difference_type total_length = 0; , LoserTree<stable, value_type, Comparator>
for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s) >::__type >(seqs_begin, seqs_end, target, comp, length);
{
total_length += _GLIBCXX_PARALLEL_LENGTH(*s);
// Sentinel spot.
++((*s).second);
}
difference_type unguarded_length =
std::min(length, total_length - overhang);
target_end = multiway_merge_loser_tree_unguarded
<typename loser_tree_unguarded_traits<value_type, Comparator>::LT>
(seqs_begin, seqs_end, target, comp, unguarded_length, stable);
overhang = length - unguarded_length;
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
_GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
#endif
// Copy rest stable.
for (RandomAccessIteratorIterator s = seqs_begin;
s != seqs_end && overhang > 0; ++s)
{
// Restore.
--((*s).second);
difference_type local_length =
std::min<difference_type>(overhang, _GLIBCXX_PARALLEL_LENGTH(*s));
target_end = std::copy((*s).first, (*s).first + local_length,
target_end);
(*s).first += local_length;
overhang -= local_length;
}
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(overhang == 0);
_GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
_GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
#endif
return target_end;
} }
};
/** @brief Sequential multi-way merging switch. /** @brief Sequential multi-way merging switch.
* *
* The _GLIBCXX_PARALLEL_DECISION if based on the branching factor and * The _GLIBCXX_PARALLEL_DECISION is based on the branching factor and
* runtime settings. * runtime settings.
* @param seqs_begin Begin iterator of iterator pair input sequence. * @param seqs_begin Begin iterator of iterator pair input sequence.
* @param seqs_end End iterator of iterator pair input sequence. * @param seqs_end End iterator of iterator pair input sequence.
...@@ -1330,17 +981,18 @@ template<typename RandomAccessIteratorIterator, ...@@ -1330,17 +981,18 @@ template<typename RandomAccessIteratorIterator,
* @param stable Stable merging incurs a performance penalty. * @param stable Stable merging incurs a performance penalty.
* @param sentinel The sequences have a sentinel element. * @param sentinel The sequences have a sentinel element.
* @return End iterator of output sequence. */ * @return End iterator of output sequence. */
template<typename RandomAccessIteratorIterator, template<
typename RandomAccessIterator3, bool stable,
typename _DifferenceTp, bool sentinels,
typename Comparator> typename RandomAccessIteratorIterator,
typename RandomAccessIterator3,
typename _DifferenceTp,
typename Comparator>
RandomAccessIterator3 RandomAccessIterator3
multiway_merge(RandomAccessIteratorIterator seqs_begin, sequential_multiway_merge(RandomAccessIteratorIterator seqs_begin,
RandomAccessIteratorIterator seqs_end, RandomAccessIteratorIterator seqs_end,
RandomAccessIterator3 target, RandomAccessIterator3 target,
Comparator comp, _DifferenceTp length, Comparator comp, _DifferenceTp length)
bool stable, bool sentinel,
sequential_tag)
{ {
_GLIBCXX_CALL(length) _GLIBCXX_CALL(length)
...@@ -1353,17 +1005,14 @@ template<typename RandomAccessIteratorIterator, ...@@ -1353,17 +1005,14 @@ template<typename RandomAccessIteratorIterator,
#if _GLIBCXX_ASSERTIONS #if _GLIBCXX_ASSERTIONS
for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s) for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
_GLIBCXX_PARALLEL_ASSERT(is_sorted((*s).first, (*s).second, comp)); {
_GLIBCXX_PARALLEL_ASSERT(is_sorted((*s).first, (*s).second, comp));
}
#endif #endif
RandomAccessIterator3 return_target = target; RandomAccessIterator3 return_target = target;
int k = static_cast<int>(seqs_end - seqs_begin); int k = static_cast<int>(seqs_end - seqs_begin);
_MultiwayMergeAlgorithm mwma = _Settings::get().multiway_merge_algorithm;
if (!sentinel && mwma == LOSER_TREE_SENTINEL)
mwma = LOSER_TREE_COMBINED;
switch (k) switch (k)
{ {
case 0: case 0:
...@@ -1382,113 +1031,30 @@ template<typename RandomAccessIteratorIterator, ...@@ -1382,113 +1031,30 @@ template<typename RandomAccessIteratorIterator,
target, length, comp); target, length, comp);
break; break;
case 3: case 3:
switch (mwma) return_target = multiway_merge_3_variant_sentinel_switch<
{ sentinels
case LOSER_TREE_COMBINED: , RandomAccessIteratorIterator
return_target = multiway_merge_3_combined(seqs_begin, , RandomAccessIterator3
seqs_end, , _DifferenceTp
target, , Comparator>()(seqs_begin, seqs_end, target, comp, length);
comp, length,
stable);
break;
case LOSER_TREE_SENTINEL:
return_target =
multiway_merge_3_variant<unguarded_iterator>(seqs_begin,
seqs_end,
target,
comp, length,
stable);
break;
default:
return_target =
multiway_merge_3_variant<guarded_iterator>(seqs_begin,
seqs_end,
target,
comp, length,
stable);
break;
}
break; break;
case 4: case 4:
switch (mwma) return_target = multiway_merge_4_variant_sentinel_switch<
{ sentinels
case LOSER_TREE_COMBINED: , RandomAccessIteratorIterator
return_target = multiway_merge_4_combined(seqs_begin, , RandomAccessIterator3
seqs_end, , _DifferenceTp
target, , Comparator>()(seqs_begin, seqs_end, target, comp, length);
comp, length, stable);
break;
case LOSER_TREE_SENTINEL:
return_target =
multiway_merge_4_variant<unguarded_iterator>(seqs_begin,
seqs_end,
target,
comp, length,
stable);
break;
default:
return_target = multiway_merge_4_variant<guarded_iterator>(
seqs_begin,
seqs_end,
target,
comp, length, stable);
break;
}
break; break;
default: default:
{ return_target = multiway_merge_k_variant_sentinel_switch<
switch (mwma) sentinels
{ , stable
case BUBBLE: , RandomAccessIteratorIterator
return_target = multiway_merge_bubble(seqs_begin, , RandomAccessIterator3
seqs_end, , _DifferenceTp
target, , Comparator>()(seqs_begin, seqs_end, target, comp, length);
comp, length, stable); break;
break;
#if _GLIBCXX_LOSER_TREE_EXPLICIT
case LOSER_TREE_EXPLICIT:
return_target = multiway_merge_loser_tree<
LoserTreeExplicit<value_type, Comparator> >(seqs_begin,
seqs_end,
target,
comp, length,
stable);
break;
#endif
#if _GLIBCXX_LOSER_TREE
case LOSER_TREE:
return_target = multiway_merge_loser_tree<
LoserTree<value_type, Comparator> >(seqs_begin,
seqs_end,
target,
comp, length,
stable);
break;
#endif
#if _GLIBCXX_LOSER_TREE_COMBINED
case LOSER_TREE_COMBINED:
return_target = multiway_merge_loser_tree_combined(seqs_begin,
seqs_end,
target,
comp, length,
stable);
break;
#endif
#if _GLIBCXX_LOSER_TREE_SENTINEL
case LOSER_TREE_SENTINEL:
return_target = multiway_merge_loser_tree_sentinel(seqs_begin,
seqs_end,
target,
comp, length,
stable);
break;
#endif
default:
// multiway_merge algorithm not implemented.
_GLIBCXX_PARALLEL_ASSERT(0);
break;
}
}
} }
#if _GLIBCXX_ASSERTIONS #if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target + length, comp)); _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target + length, comp));
...@@ -1497,38 +1063,246 @@ template<typename RandomAccessIteratorIterator, ...@@ -1497,38 +1063,246 @@ template<typename RandomAccessIteratorIterator,
return return_target; return return_target;
} }
/**
* @brief Stable sorting functor.
*
* Used to reduce code instanciation in multiway_merge_sampling_splitting.
*/
template<bool stable, class RandomAccessIterator, class StrictWeakOrdering>
struct sampling_sorter
{
void operator()(RandomAccessIterator first, RandomAccessIterator last,
StrictWeakOrdering comp)
{ __gnu_sequential::stable_sort(first, last, comp); }
};
/**
* @brief Non-stable sorting functor.
*
* Used to reduce code instanciation in multiway_merge_sampling_splitting.
*/
template<class RandomAccessIterator, class StrictWeakOrdering>
struct sampling_sorter<false, RandomAccessIterator, StrictWeakOrdering>
{
void operator()(RandomAccessIterator first, RandomAccessIterator last,
StrictWeakOrdering comp)
{ __gnu_sequential::sort(first, last, comp); }
};
/**
* @brief Sampling based splitting for parallel multiway-merge routine.
*/
template<
bool stable
, typename RandomAccessIteratorIterator
, typename Comparator
, typename difference_type>
void multiway_merge_sampling_splitting(
RandomAccessIteratorIterator seqs_begin,
RandomAccessIteratorIterator seqs_end,
Comparator comp, difference_type length,
difference_type total_length,
std::vector<std::pair<difference_type, difference_type> > *pieces)
{
typedef typename std::iterator_traits<RandomAccessIteratorIterator>
::value_type::first_type
RandomAccessIterator1;
typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
value_type;
// k sequences.
int k = static_cast<int>(seqs_end - seqs_begin);
int num_threads = omp_get_num_threads();
difference_type num_samples =
__gnu_parallel::_Settings::get().merge_oversampling * num_threads;
value_type* samples = static_cast<value_type*>(
::operator new(sizeof(value_type) * k * num_samples));
// Sample.
for (int s = 0; s < k; ++s)
for (difference_type i = 0; i < num_samples; ++i)
{
difference_type sample_index =
static_cast<difference_type>(
_GLIBCXX_PARALLEL_LENGTH(seqs_begin[s]) * (double(i + 1) /
(num_samples + 1)) * (double(length)
/ total_length));
new(&(samples[s * num_samples + i])) value_type(
seqs_begin[s].first[sample_index]);
}
// Sort stable or non-stable, depending on value of template parameter
// "stable".
sampling_sorter<stable, value_type*, Comparator>()(
samples, samples + (num_samples * k), comp);
for (int slab = 0; slab < num_threads; ++slab)
// For each slab / processor.
for (int seq = 0; seq < k; ++seq)
{
// For each sequence.
if (slab > 0)
pieces[slab][seq].first =
std::upper_bound(
seqs_begin[seq].first,
seqs_begin[seq].second,
samples[num_samples * k * slab / num_threads],
comp)
- seqs_begin[seq].first;
else
{
// Absolute beginning.
pieces[slab][seq].first = 0;
}
if ((slab + 1) < num_threads)
pieces[slab][seq].second =
std::upper_bound(
seqs_begin[seq].first,
seqs_begin[seq].second,
samples[num_samples * k * (slab + 1) /
num_threads], comp)
- seqs_begin[seq].first;
else
pieces[slab][seq].second = _GLIBCXX_PARALLEL_LENGTH(seqs_begin[seq]);
}
::operator delete(samples);
}
/**
* @brief Exact splitting for parallel multiway-merge routine.
*/
template<
bool stable
, typename RandomAccessIteratorIterator
, typename Comparator
, typename difference_type>
void multiway_merge_exact_splitting(
RandomAccessIteratorIterator seqs_begin,
RandomAccessIteratorIterator seqs_end,
Comparator comp,
difference_type length,
difference_type total_length,
std::vector<std::pair<difference_type, difference_type> > *pieces)
{
typedef typename std::iterator_traits<RandomAccessIteratorIterator>
::value_type::first_type
RandomAccessIterator1;
const bool tight = (total_length == length);
// k sequences.
const int k = static_cast<int>(seqs_end - seqs_begin);
const int num_threads = omp_get_num_threads();
// (Settings::multiway_merge_splitting == __gnu_parallel::_Settings::EXACT).
std::vector<RandomAccessIterator1>* offsets =
new std::vector<RandomAccessIterator1>[num_threads];
std::vector<
std::pair<RandomAccessIterator1, RandomAccessIterator1>
> se(k);
copy(seqs_begin, seqs_end, se.begin());
difference_type* borders =
new difference_type[num_threads + 1];
equally_split(length, num_threads, borders);
for (int s = 0; s < (num_threads - 1); ++s)
{
offsets[s].resize(k);
multiseq_partition(
se.begin(), se.end(), borders[s + 1],
offsets[s].begin(), comp);
// Last one also needed and available.
if (!tight)
{
offsets[num_threads - 1].resize(k);
multiseq_partition(se.begin(), se.end(),
difference_type(length),
offsets[num_threads - 1].begin(), comp);
}
}
for (int slab = 0; slab < num_threads; ++slab)
{
// For each slab / processor.
for (int seq = 0; seq < k; ++seq)
{
// For each sequence.
if (slab == 0)
{
// Absolute beginning.
pieces[slab][seq].first = 0;
}
else
pieces[slab][seq].first =
pieces[slab - 1][seq].second;
if (!tight || slab < (num_threads - 1))
pieces[slab][seq].second =
offsets[slab][seq] - seqs_begin[seq].first;
else
{
// slab == num_threads - 1
pieces[slab][seq].second =
_GLIBCXX_PARALLEL_LENGTH(seqs_begin[seq]);
}
}
}
delete[] offsets;
}
/** @brief Parallel multi-way merge routine. /** @brief Parallel multi-way merge routine.
* *
* The _GLIBCXX_PARALLEL_DECISION if based on the branching factor * The _GLIBCXX_PARALLEL_DECISION is based on the branching factor
* and runtime settings. * and runtime settings.
* @param seqs_begin Begin iterator of iterator pair input sequence. *
* @param seqs_end End iterator of iterator pair input sequence. * Must not be called if the number of sequences is 1.
* @param target Begin iterator out output sequence. *
* @param comp Comparator. * @param Splitter functor to split input (either exact or sampling based)
* @param length Maximum length to merge. *
* @param stable Stable merging incurs a performance penalty. * @param seqs_begin Begin iterator of iterator pair input sequence.
* @param sentinel Ignored. * @param seqs_end End iterator of iterator pair input sequence.
* @return End iterator of output sequence. * @param target Begin iterator out output sequence.
* @param comp Comparator.
* @param length Maximum length to merge.
* @param stable Stable merging incurs a performance penalty.
* @param sentinel Ignored.
* @return End iterator of output sequence.
*/ */
template<typename RandomAccessIteratorIterator, template<
typename RandomAccessIterator3, bool stable,
typename _DifferenceTp, bool sentinels,
typename Comparator> typename RandomAccessIteratorIterator,
typename RandomAccessIterator3,
typename _DifferenceTp,
typename Splitter,
typename Comparator
>
RandomAccessIterator3 RandomAccessIterator3
parallel_multiway_merge(RandomAccessIteratorIterator seqs_begin, parallel_multiway_merge(RandomAccessIteratorIterator seqs_begin,
RandomAccessIteratorIterator seqs_end, RandomAccessIteratorIterator seqs_end,
RandomAccessIterator3 target, RandomAccessIterator3 target,
Comparator comp, Comparator comp,
_DifferenceTp length, bool stable, bool sentinel) Splitter splitter,
_DifferenceTp length)
{ {
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(seqs_end - seqs_begin > 1);
#endif
_GLIBCXX_CALL(length) _GLIBCXX_CALL(length)
typedef _DifferenceTp difference_type; typedef _DifferenceTp difference_type;
typedef typename std::iterator_traits<RandomAccessIteratorIterator> typedef typename std::iterator_traits<RandomAccessIteratorIterator>
::value_type::first_type ::value_type::first_type
RandomAccessIterator1; RandomAccessIterator1;
typedef typename std::iterator_traits<RandomAccessIterator1>::value_type typedef typename
value_type; std::iterator_traits<RandomAccessIterator1>::value_type value_type;
// k sequences. // k sequences.
int k = static_cast<int>(seqs_end - seqs_begin); int k = static_cast<int>(seqs_end - seqs_begin);
...@@ -1543,13 +1317,10 @@ template<typename RandomAccessIteratorIterator, ...@@ -1543,13 +1317,10 @@ template<typename RandomAccessIteratorIterator,
if (total_length == 0 || k == 0) if (total_length == 0 || k == 0)
return target; return target;
bool tight = (total_length == length);
std::vector<std::pair<difference_type, difference_type> >* pieces; std::vector<std::pair<difference_type, difference_type> >* pieces;
thread_index_t num_threads = static_cast<thread_index_t>( thread_index_t num_threads = static_cast<thread_index_t>(
std::min<difference_type>(get_max_threads(), total_length)); std::min<difference_type>(get_max_threads(), total_length));
const _Settings& __s = _Settings::get();
# pragma omp parallel num_threads (num_threads) # pragma omp parallel num_threads (num_threads)
{ {
...@@ -1562,126 +1333,12 @@ template<typename RandomAccessIteratorIterator, ...@@ -1562,126 +1333,12 @@ template<typename RandomAccessIteratorIterator,
for (int s = 0; s < num_threads; ++s) for (int s = 0; s < num_threads; ++s)
pieces[s].resize(k); pieces[s].resize(k);
difference_type num_samples = __s.merge_oversampling difference_type num_samples =
* num_threads; __gnu_parallel::_Settings::get().merge_oversampling *
num_threads;
if (__s.multiway_merge_splitting == SAMPLING) splitter(seqs_begin, seqs_end, comp, length, total_length,
{ pieces);
value_type* samples = static_cast<value_type*>(
::operator new(sizeof(value_type) * k * num_samples));
// Sample.
for (int s = 0; s < k; ++s)
for (difference_type i = 0; i < num_samples; ++i)
{
difference_type sample_index =
static_cast<difference_type>(
_GLIBCXX_PARALLEL_LENGTH(seqs_begin[s])
* (double(i + 1) / (num_samples + 1))
* (double(length) / total_length));
::new(&(samples[s * num_samples + i]))
value_type(seqs_begin[s].first[sample_index]);
}
if (stable)
__gnu_sequential::stable_sort(samples, samples
+ (num_samples * k), comp);
else
__gnu_sequential::sort(samples, samples
+ (num_samples * k), comp);
for (int slab = 0; slab < num_threads; ++slab)
// For each slab / processor.
for (int seq = 0; seq < k; ++seq)
{
// For each sequence.
if (slab > 0)
pieces[slab][seq].first =
std::upper_bound(seqs_begin[seq].first,
seqs_begin[seq].second,
samples[num_samples * k
* slab / num_threads],
comp)
- seqs_begin[seq].first;
else
{
// Absolute beginning.
pieces[slab][seq].first = 0;
}
if ((slab + 1) < num_threads)
pieces[slab][seq].second =
std::upper_bound(seqs_begin[seq].first,
seqs_begin[seq].second,
samples[num_samples * k
* (slab + 1)
/ num_threads], comp)
- seqs_begin[seq].first;
else
pieces[slab][seq].second
= _GLIBCXX_PARALLEL_LENGTH(seqs_begin[seq]);
}
::operator delete(samples);
}
else
{
// (_Settings::multiway_merge_splitting == _Settings::EXACT).
std::vector<RandomAccessIterator1>* offsets =
new std::vector<RandomAccessIterator1>[num_threads];
std::vector<
std::pair<RandomAccessIterator1, RandomAccessIterator1>
> se(k);
copy(seqs_begin, seqs_end, se.begin());
difference_type* borders =
new difference_type[num_threads + 1];
equally_split(length, num_threads, borders);
for (int s = 0; s < (num_threads - 1); ++s)
{
offsets[s].resize(k);
multiseq_partition(
se.begin(), se.end(), borders[s + 1],
offsets[s].begin(), comp);
// Last one also needed and available.
if (!tight)
{
offsets[num_threads - 1].resize(k);
multiseq_partition(se.begin(), se.end(),
difference_type(length),
offsets[num_threads - 1].begin(),
comp);
}
}
for (int slab = 0; slab < num_threads; ++slab)
{
// For each slab / processor.
for (int seq = 0; seq < k; ++seq)
{
// For each sequence.
if (slab == 0)
{
// Absolute beginning.
pieces[slab][seq].first = 0;
}
else
pieces[slab][seq].first =
pieces[slab - 1][seq].second;
if (!tight || slab < (num_threads - 1))
pieces[slab][seq].second =
offsets[slab][seq] - seqs_begin[seq].first;
else
{
// slab == num_threads - 1
pieces[slab][seq].second =
_GLIBCXX_PARALLEL_LENGTH(seqs_begin[seq]);
}
}
}
delete[] offsets;
}
} //single } //single
thread_index_t iam = omp_get_thread_num(); thread_index_t iam = omp_get_thread_num();
...@@ -1701,15 +1358,14 @@ template<typename RandomAccessIteratorIterator, ...@@ -1701,15 +1358,14 @@ template<typename RandomAccessIteratorIterator,
for (int s = 0; s < k; ++s) for (int s = 0; s < k; ++s)
{ {
chunks[s] = std::make_pair( chunks[s] = std::make_pair(
seqs_begin[s].first + pieces[iam][s].first, seqs_begin[s].first + pieces[iam][s].first,
seqs_begin[s].first + pieces[iam][s].second); seqs_begin[s].first + pieces[iam][s].second);
local_length += _GLIBCXX_PARALLEL_LENGTH(chunks[s]); local_length += _GLIBCXX_PARALLEL_LENGTH(chunks[s]);
} }
multiway_merge( sequential_multiway_merge<stable, sentinels>(
chunks, chunks + k, target + target_position, comp, chunks, chunks + k, target + target_position, comp,
std::min(local_length, length - target_position), std::min(local_length, length - target_position));
stable, false, sequential_tag());
delete[] chunks; delete[] chunks;
} }
...@@ -1727,7 +1383,7 @@ template<typename RandomAccessIteratorIterator, ...@@ -1727,7 +1383,7 @@ template<typename RandomAccessIteratorIterator,
(pieces[iam][1].second - pieces[iam][1].first), (pieces[iam][1].second - pieces[iam][1].first),
comp); comp);
} }
} //parallel } // parallel
#if _GLIBCXX_ASSERTIONS #if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target + length, comp)); _GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target + length, comp));
...@@ -1743,88 +1399,605 @@ template<typename RandomAccessIteratorIterator, ...@@ -1743,88 +1399,605 @@ template<typename RandomAccessIteratorIterator,
} }
/** /**
* @brief Multi-way merging front-end. * @brief Multiway Merge Frontend.
* @param seqs_begin Begin iterator of iterator pair input sequence. *
* @param seqs_end End iterator of iterator pair input sequence. * Merge the sequences specified by seqs_begin and seqs_end into
* @param target Begin iterator out output sequence. * target. seqs_begin and seqs_end must point to a sequence of
* @param comp Comparator. * pairs. These pairs must contain an iterator to the beginning
* @param length Maximum length to merge. * of a sequence in their first entry and an iterator the end of
* @param stable Stable merging incurs a performance penalty. * the same sequence in their second entry.
* @return End iterator of output sequence. *
* Ties are broken arbitrarily. See stable_multiway_merge for a variant
* that breaks ties by sequence number but is slower.
*
* The first entries of the pairs (i.e. the begin iterators) will be moved
* forward.
*
* The output sequence has to provide enough space for all elements
* that are written to it.
*
* This function will merge the input sequences:
*
* - not stable
* - parallel, depending on the input size and Settings
* - using sampling for splitting
* - not using sentinels
*
* Example:
*
* <pre>
* int sequences[10][10];
* for (int i = 0; i < 10; ++i)
* for (int j = 0; i < 10; ++j)
* sequences[i][j] = j;
*
* int out[33];
* std::vector<std::pair<int*> > seqs;
* for (int i = 0; i < 10; ++i)
* { seqs.push(std::make_pair<int*>(sequences[i], sequences[i] + 10)) }
*
* multiway_merge(seqs.begin(), seqs.end(), target, std::less<int>(), 33);
* </pre>
*
* @see stable_multiway_merge
*
* @pre All input sequences must be sorted.
* @pre Target must provide enough space to merge out length elements or
* the number of elements in all sequences, whichever is smaller.
*
* @post [target, return value) contains merged elements from the
* input sequences.
* @post return value - target = min(length, number of elements in all
* sequences).
*
* @param RandomAccessIteratorPairIterator iterator over sequence
* of pairs of iterators
* @param RandomAccessIteratorOut iterator over target sequence
* @param _DifferenceTp difference type for the sequence
* @param Comparator strict weak ordering type to compare elements
* in sequences
*
* @param seqs_begin begin of sequence sequence
* @param seqs_end end of sequence sequence
* @param target target sequence to merge to.
* @param comp strict weak ordering to use for element comparison.
* @param length the number of elements to merge into target.
*
* @return end iterator of output sequence
*/ */
template<typename RandomAccessIteratorPairIterator, template<
typename RandomAccessIterator3, typename RandomAccessIteratorPairIterator
typename _DifferenceTp, , typename RandomAccessIteratorOut
typename Comparator> , typename _DifferenceTp
RandomAccessIterator3 , typename Comparator>
multiway_merge(RandomAccessIteratorPairIterator seqs_begin, RandomAccessIteratorOut
RandomAccessIteratorPairIterator seqs_end, multiway_merge(RandomAccessIteratorPairIterator seqs_begin
RandomAccessIterator3 target, Comparator comp, , RandomAccessIteratorPairIterator seqs_end
_DifferenceTp length, bool stable) , RandomAccessIteratorOut target
{ , Comparator comp, _DifferenceTp length)
{
typedef _DifferenceTp difference_type;
_GLIBCXX_CALL(seqs_end - seqs_begin)
// catch special case: no sequences
if (seqs_begin == seqs_end)
return target;
// Execute merge; maybe parallel, depending on the number of merged
// elements and the number of sequences and global thresholds in
// Settings.
RandomAccessIteratorOut target_end;
if ((seqs_end - seqs_begin > 1) &&
_GLIBCXX_PARALLEL_CONDITION(
((seqs_end - seqs_begin) >=
__gnu_parallel::_Settings::get().multiway_merge_minimal_k)
&& ((sequence_index_t)length >=
__gnu_parallel::_Settings::get().multiway_merge_minimal_n)))
target_end = parallel_multiway_merge
</* stable = */ false, /* sentinels = */ false>
(seqs_begin, seqs_end, target, comp,
multiway_merge_sampling_splitting</* stable = */ false,
RandomAccessIteratorPairIterator, Comparator, _DifferenceTp>,
static_cast<difference_type>(length));
else
target_end = sequential_multiway_merge
</* stable = */false, /* sentinels = */ false>(
seqs_begin, seqs_end,
target, comp, length);
return target_end;
}
template<
typename RandomAccessIteratorPairIterator
, typename RandomAccessIteratorOut
, typename _DifferenceTp
, typename Comparator>
RandomAccessIteratorOut
multiway_merge(RandomAccessIteratorPairIterator seqs_begin
, RandomAccessIteratorPairIterator seqs_end
, RandomAccessIteratorOut target
, Comparator comp, _DifferenceTp length
, __gnu_parallel::sequential_tag)
{
typedef _DifferenceTp difference_type;
_GLIBCXX_CALL(seqs_end - seqs_begin)
// catch special case: no sequences
if (seqs_begin == seqs_end)
return target;
// Execute multiway merge *sequentially*.
return sequential_multiway_merge
</* stable = */ false, /* sentinels = */ false>
(seqs_begin, seqs_end, target, comp, length);
}
template<
typename RandomAccessIteratorPairIterator
, typename RandomAccessIteratorOut
, typename _DifferenceTp
, typename Comparator>
RandomAccessIteratorOut
multiway_merge(RandomAccessIteratorPairIterator seqs_begin
, RandomAccessIteratorPairIterator seqs_end
, RandomAccessIteratorOut target
, Comparator comp, _DifferenceTp length
, __gnu_parallel::exact_tag)
{
typedef _DifferenceTp difference_type; typedef _DifferenceTp difference_type;
_GLIBCXX_CALL(seqs_end - seqs_begin) _GLIBCXX_CALL(seqs_end - seqs_begin)
// catch special case: no sequences
if (seqs_begin == seqs_end) if (seqs_begin == seqs_end)
return target; return target;
const _Settings& __s = _Settings::get(); // Execute merge; maybe parallel, depending on the number of merged
// elements and the number of sequences and global thresholds in
RandomAccessIterator3 target_end; // Settings.
if (_GLIBCXX_PARALLEL_CONDITION( RandomAccessIteratorOut target_end;
((seqs_end - seqs_begin) >= __s.multiway_merge_minimal_k) if ((seqs_end - seqs_begin > 1) &&
&& ((sequence_index_t)length >= __s.multiway_merge_minimal_n))) _GLIBCXX_PARALLEL_CONDITION(
target_end = parallel_multiway_merge(seqs_begin, seqs_end, ((seqs_end - seqs_begin) >=
target, comp, __gnu_parallel::_Settings::get().multiway_merge_minimal_k)
static_cast<difference_type>(length), && ((sequence_index_t)length >=
stable, false); __gnu_parallel::_Settings::get().multiway_merge_minimal_n)))
target_end = parallel_multiway_merge
</* stable = */ false, /* sentinels = */ false>(
seqs_begin, seqs_end,
target, comp,
multiway_merge_exact_splitting</* stable = */ false,
RandomAccessIteratorPairIterator, Comparator, _DifferenceTp>,
static_cast<difference_type>(length));
else else
target_end = multiway_merge(seqs_begin, seqs_end, target, comp, length, target_end = sequential_multiway_merge
stable, false, sequential_tag()); </* stable = */ false, /* sentinels = */ false>(
seqs_begin, seqs_end,
target, comp, length);
return target_end; return target_end;
} }
/** @brief Multi-way merging front-end. template<
* @param seqs_begin Begin iterator of iterator pair input sequence. typename RandomAccessIteratorPairIterator
* @param seqs_end End iterator of iterator pair input sequence. , typename RandomAccessIteratorOut
* @param target Begin iterator out output sequence. , typename _DifferenceTp
* @param comp Comparator. , typename Comparator>
* @param length Maximum length to merge. RandomAccessIteratorOut
* @param stable Stable merging incurs a performance penalty. stable_multiway_merge(RandomAccessIteratorPairIterator seqs_begin
* @return End iterator of output sequence. , RandomAccessIteratorPairIterator seqs_end
* @pre For each @c i, @c seqs_begin[i].second must be the end , RandomAccessIteratorOut target
* marker of the sequence, but also reference the one more sentinel , Comparator comp, _DifferenceTp length)
* element. */ {
template<typename RandomAccessIteratorPairIterator,
typename RandomAccessIterator3,
typename _DifferenceTp,
typename Comparator>
RandomAccessIterator3
multiway_merge_sentinel(RandomAccessIteratorPairIterator seqs_begin,
RandomAccessIteratorPairIterator seqs_end,
RandomAccessIterator3 target,
Comparator comp,
_DifferenceTp length,
bool stable)
{
typedef _DifferenceTp difference_type; typedef _DifferenceTp difference_type;
_GLIBCXX_CALL(seqs_end - seqs_begin)
// catch special case: no sequences
if (seqs_begin == seqs_end) if (seqs_begin == seqs_end)
return target; return target;
// Execute merge; maybe parallel, depending on the number of merged
// elements and the number of sequences and global thresholds in
// Settings.
RandomAccessIteratorOut target_end;
if ((seqs_end - seqs_begin > 1) &&
_GLIBCXX_PARALLEL_CONDITION(
((seqs_end - seqs_begin) >=
__gnu_parallel::_Settings::get().multiway_merge_minimal_k)
&& ((sequence_index_t)length >=
__gnu_parallel::_Settings::get().multiway_merge_minimal_n)))
target_end = parallel_multiway_merge
</* stable = */ true, /* sentinels = */ false>(
seqs_begin, seqs_end,
target, comp,
multiway_merge_sampling_splitting</* stable = */ true,
RandomAccessIteratorPairIterator, Comparator, _DifferenceTp>,
static_cast<difference_type>(length));
else
target_end = sequential_multiway_merge
</* stable = */ true, /* sentinels = */ false>(
seqs_begin, seqs_end,
target, comp, length);
return target_end;
}
template<
typename RandomAccessIteratorPairIterator
, typename RandomAccessIteratorOut
, typename _DifferenceTp
, typename Comparator>
RandomAccessIteratorOut
stable_multiway_merge(RandomAccessIteratorPairIterator seqs_begin
, RandomAccessIteratorPairIterator seqs_end
, RandomAccessIteratorOut target
, Comparator comp, _DifferenceTp length
, __gnu_parallel::sequential_tag)
{
typedef _DifferenceTp difference_type;
_GLIBCXX_CALL(seqs_end - seqs_begin)
// catch special case: no sequences
if (seqs_begin == seqs_end)
{ return target; }
// Execute multiway merge *sequentially*.
return sequential_multiway_merge
</* stable = */ true, /* sentinels = */ false>
(seqs_begin, seqs_end, target, comp, length);
}
template<
typename RandomAccessIteratorPairIterator
, typename RandomAccessIteratorOut
, typename _DifferenceTp
, typename Comparator>
RandomAccessIteratorOut
stable_multiway_merge(RandomAccessIteratorPairIterator seqs_begin
, RandomAccessIteratorPairIterator seqs_end
, RandomAccessIteratorOut target
, Comparator comp, _DifferenceTp length
, __gnu_parallel::exact_tag)
{
typedef _DifferenceTp difference_type;
_GLIBCXX_CALL(seqs_end - seqs_begin) _GLIBCXX_CALL(seqs_end - seqs_begin)
const _Settings& __s = _Settings::get(); // catch special case: no sequences
const bool cond1 = seqs_end - seqs_begin >= __s.multiway_merge_minimal_k; if (seqs_begin == seqs_end)
const bool cond2 = sequence_index_t(length) >= __s.multiway_merge_minimal_n; { return target; }
if (_GLIBCXX_PARALLEL_CONDITION(cond1 && cond2))
return parallel_multiway_merge(seqs_begin, seqs_end, target, comp, // Execute merge; maybe parallel, depending on the number of merged
length, stable, true); // elements and the number of sequences and global thresholds in
// Settings.
RandomAccessIteratorOut target_end;
if ((seqs_end - seqs_begin > 1) &&
_GLIBCXX_PARALLEL_CONDITION(
((seqs_end - seqs_begin) >=
__gnu_parallel::_Settings::get().multiway_merge_minimal_k)
&& ((sequence_index_t)length >=
__gnu_parallel::_Settings::get().multiway_merge_minimal_n)))
target_end = parallel_multiway_merge
</* stable = */ true, /* sentinels = */ false>(
seqs_begin, seqs_end,
target, comp,
multiway_merge_exact_splitting
</* stable = */ true, RandomAccessIteratorPairIterator,
Comparator, _DifferenceTp>,
static_cast<difference_type>(length));
else else
return multiway_merge(seqs_begin, seqs_end, target, comp, length, stable, target_end = sequential_multiway_merge</* stable = */ true,
true, sequential_tag()); /* sentinels = */ false>(
} seqs_begin, seqs_end,
target, comp, length);
return target_end;
} }
/**
* @brief Multiway Merge Frontend.
*
* Merge the sequences specified by seqs_begin and seqs_end into
* target. seqs_begin and seqs_end must point to a sequence of
* pairs. These pairs must contain an iterator to the beginning
* of a sequence in their first entry and an iterator the end of
* the same sequence in their second entry.
*
* Ties are broken arbitrarily. See stable_multiway_merge for a variant
* that breaks ties by sequence number but is slower.
*
* The first entries of the pairs (i.e. the begin iterators) will be moved
* forward.
*
* The output sequence has to provide enough space for all elements
* that are written to it.
*
* This function will merge the input sequences:
*
* - not stable
* - parallel, depending on the input size and Settings
* - using sampling for splitting
* - using sentinels
*
* You have to take care that the element the end iterator points to is
* readable and contains a value that is greater than any other non-sentinel
* value in all sequences.
*
* Example:
*
* <pre>
* int sequences[10][11];
* for (int i = 0; i < 10; ++i)
* for (int j = 0; i < 11; ++j)
* sequences[i][j] = j; // last one is sentinel!
*
* int out[33];
* std::vector<std::pair<int*> > seqs;
* for (int i = 0; i < 10; ++i)
* { seqs.push(std::make_pair<int*>(sequences[i], sequences[i] + 10)) }
*
* multiway_merge(seqs.begin(), seqs.end(), target, std::less<int>(), 33);
* </pre>
*
* @pre All input sequences must be sorted.
* @pre Target must provide enough space to merge out length elements or
* the number of elements in all sequences, whichever is smaller.
* @pre For each @c i, @c seqs_begin[i].second must be the end
* marker of the sequence, but also reference the one more sentinel
* element.
*
* @post [target, return value) contains merged elements from the
* input sequences.
* @post return value - target = min(length, number of elements in all
* sequences).
*
* @see stable_multiway_merge_sentinels
*
* @param RandomAccessIteratorPairIterator iterator over sequence
* of pairs of iterators
* @param RandomAccessIteratorOut iterator over target sequence
* @param _DifferenceTp difference type for the sequence
* @param Comparator strict weak ordering type to compare elements
* in sequences
*
* @param seqs_begin begin of sequence sequence
* @param seqs_end end of sequence sequence
* @param target target sequence to merge to.
* @param comp strict weak ordering to use for element comparison.
* @param length the number of elements to merge into target.
*
* @return end iterator of output sequence
*/
template<
typename RandomAccessIteratorPairIterator
, typename RandomAccessIteratorOut
, typename _DifferenceTp
, typename Comparator>
RandomAccessIteratorOut
multiway_merge_sentinels(RandomAccessIteratorPairIterator seqs_begin
, RandomAccessIteratorPairIterator seqs_end
, RandomAccessIteratorOut target
, Comparator comp, _DifferenceTp length)
{
typedef _DifferenceTp difference_type;
_GLIBCXX_CALL(seqs_end - seqs_begin)
// catch special case: no sequences
if (seqs_begin == seqs_end)
{ return target; }
// Execute merge; maybe parallel, depending on the number of merged
// elements and the number of sequences and global thresholds in
// Settings.
RandomAccessIteratorOut target_end;
if ((seqs_end - seqs_begin > 1) &&
_GLIBCXX_PARALLEL_CONDITION(
((seqs_end - seqs_begin) >=
__gnu_parallel::_Settings::get().multiway_merge_minimal_k)
&& ((sequence_index_t)length >=
__gnu_parallel::_Settings::get().multiway_merge_minimal_n)))
target_end = parallel_multiway_merge
</* stable = */ false, /* sentinels = */ true>
(seqs_begin, seqs_end, target, comp,
multiway_merge_sampling_splitting
</* stable = */ false, RandomAccessIteratorPairIterator,
Comparator, _DifferenceTp>,
static_cast<difference_type>(length));
else
target_end = sequential_multiway_merge
</* stable = */false, /* sentinels = */ true>(
seqs_begin, seqs_end,
target, comp, length);
return target_end;
}
template<
typename RandomAccessIteratorPairIterator
, typename RandomAccessIteratorOut
, typename _DifferenceTp
, typename Comparator>
RandomAccessIteratorOut
multiway_merge_sentinels(RandomAccessIteratorPairIterator seqs_begin
, RandomAccessIteratorPairIterator seqs_end
, RandomAccessIteratorOut target
, Comparator comp, _DifferenceTp length
, __gnu_parallel::sequential_tag)
{
typedef _DifferenceTp difference_type;
_GLIBCXX_CALL(seqs_end - seqs_begin)
// catch special case: no sequences
if (seqs_begin == seqs_end)
{ return target; }
// Execute multiway merge *sequentially*.
return sequential_multiway_merge
</* stable = */ false, /* sentinels = */ true>
(seqs_begin, seqs_end, target, comp, length);
}
template<
typename RandomAccessIteratorPairIterator
, typename RandomAccessIteratorOut
, typename _DifferenceTp
, typename Comparator>
RandomAccessIteratorOut
multiway_merge_sentinels(RandomAccessIteratorPairIterator seqs_begin
, RandomAccessIteratorPairIterator seqs_end
, RandomAccessIteratorOut target
, Comparator comp, _DifferenceTp length
, __gnu_parallel::exact_tag)
{
typedef _DifferenceTp difference_type;
_GLIBCXX_CALL(seqs_end - seqs_begin)
// catch special case: no sequences
if (seqs_begin == seqs_end)
{ return target; }
// Execute merge; maybe parallel, depending on the number of merged
// elements and the number of sequences and global thresholds in
// Settings.
RandomAccessIteratorOut target_end;
if ((seqs_end - seqs_begin > 1) &&
_GLIBCXX_PARALLEL_CONDITION(
((seqs_end - seqs_begin) >=
__gnu_parallel::_Settings::get().multiway_merge_minimal_k)
&& ((sequence_index_t)length >=
__gnu_parallel::_Settings::get().multiway_merge_minimal_n)))
target_end = parallel_multiway_merge
</* stable = */ false, /* sentinels = */ true>(
seqs_begin, seqs_end,
target, comp,
multiway_merge_exact_splitting
</* stable = */ false, RandomAccessIteratorPairIterator,
Comparator, _DifferenceTp>,
static_cast<difference_type>(length));
else
target_end = sequential_multiway_merge
</* stable = */ false, /* sentinels = */ true>(
seqs_begin, seqs_end,
target, comp, length);
return target_end;
}
template<
typename RandomAccessIteratorPairIterator
, typename RandomAccessIteratorOut
, typename _DifferenceTp
, typename Comparator>
RandomAccessIteratorOut
stable_multiway_merge_sentinels(RandomAccessIteratorPairIterator seqs_begin
, RandomAccessIteratorPairIterator seqs_end
, RandomAccessIteratorOut target
, Comparator comp, _DifferenceTp length)
{
typedef _DifferenceTp difference_type;
_GLIBCXX_CALL(seqs_end - seqs_begin)
// catch special case: no sequences
if (seqs_begin == seqs_end)
{ return target; }
// Execute merge; maybe parallel, depending on the number of merged
// elements and the number of sequences and global thresholds in
// Settings.
RandomAccessIteratorOut target_end;
if ((seqs_end - seqs_begin > 1) &&
_GLIBCXX_PARALLEL_CONDITION(
((seqs_end - seqs_begin) >=
__gnu_parallel::_Settings::get().multiway_merge_minimal_k)
&& ((sequence_index_t)length >=
__gnu_parallel::_Settings::get().multiway_merge_minimal_n)))
target_end = parallel_multiway_merge
</* stable = */ true, /* sentinels = */ true>(
seqs_begin, seqs_end,
target, comp,
multiway_merge_sampling_splitting
</* stable = */ true, RandomAccessIteratorPairIterator,
Comparator, _DifferenceTp>,
static_cast<difference_type>(length));
else
target_end = sequential_multiway_merge
</* stable = */ true, /* sentinels = */ true>(
seqs_begin, seqs_end,
target, comp, length);
return target_end;
}
template<
typename RandomAccessIteratorPairIterator
, typename RandomAccessIteratorOut
, typename _DifferenceTp
, typename Comparator>
RandomAccessIteratorOut
stable_multiway_merge_sentinels(RandomAccessIteratorPairIterator seqs_begin
, RandomAccessIteratorPairIterator seqs_end
, RandomAccessIteratorOut target
, Comparator comp, _DifferenceTp length
, __gnu_parallel::sequential_tag)
{
typedef _DifferenceTp difference_type;
_GLIBCXX_CALL(seqs_end - seqs_begin)
// catch special case: no sequences
if (seqs_begin == seqs_end)
{ return target; }
// Execute multiway merge *sequentially*.
return sequential_multiway_merge
</* stable = */ true, /* sentinels = */ true>
(seqs_begin, seqs_end, target, comp, length);
}
template<
typename RandomAccessIteratorPairIterator
, typename RandomAccessIteratorOut
, typename _DifferenceTp
, typename Comparator>
RandomAccessIteratorOut
stable_multiway_merge_sentinels(RandomAccessIteratorPairIterator seqs_begin
, RandomAccessIteratorPairIterator seqs_end
, RandomAccessIteratorOut target
, Comparator comp, _DifferenceTp length
, __gnu_parallel::exact_tag)
{
typedef _DifferenceTp difference_type;
_GLIBCXX_CALL(seqs_end - seqs_begin)
// catch special case: no sequences
if (seqs_begin == seqs_end)
{ return target; }
// Execute merge; maybe parallel, depending on the number of merged
// elements and the number of sequences and global thresholds in
// Settings.
RandomAccessIteratorOut target_end;
if ((seqs_end - seqs_begin > 1) &&
_GLIBCXX_PARALLEL_CONDITION(
((seqs_end - seqs_begin) >=
__gnu_parallel::_Settings::get().multiway_merge_minimal_k)
&& ((sequence_index_t)length >=
__gnu_parallel::_Settings::get().multiway_merge_minimal_n)))
target_end = parallel_multiway_merge
</* stable = */ true, /* sentinels = */ true>(
seqs_begin, seqs_end,
target, comp,
multiway_merge_exact_splitting
</* stable = */ true, RandomAccessIteratorPairIterator,
Comparator, _DifferenceTp>,
static_cast<difference_type>(length));
else
target_end = sequential_multiway_merge
</* stable = */ true, /* sentinels = */ true>(
seqs_begin, seqs_end,
target, comp, length);
return target_end;
}
}; // namespace __gnu_parallel
#endif #endif
...@@ -80,26 +80,9 @@ template<typename RandomAccessIterator> ...@@ -80,26 +80,9 @@ template<typename RandomAccessIterator>
/** @brief Start indices, per thread. */ /** @brief Start indices, per thread. */
difference_type* starts; difference_type* starts;
/** @brief Temporary arrays for each thread.
*
* Indirection Allows using the temporary storage in different
* ways, without code duplication.
* @see _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST */
value_type** temporaries;
#if _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST
/** @brief Storage in which to sort. */ /** @brief Storage in which to sort. */
RandomAccessIterator* sorting_places; value_type** temporary;
/** @brief Storage into which to merge. */
value_type** merging_places;
#else
/** @brief Storage in which to sort. */
value_type** sorting_places;
/** @brief Storage into which to merge. */
RandomAccessIterator* merging_places;
#endif
/** @brief Samples. */ /** @brief Samples. */
value_type* samples; value_type* samples;
...@@ -108,9 +91,6 @@ template<typename RandomAccessIterator> ...@@ -108,9 +91,6 @@ template<typename RandomAccessIterator>
/** @brief Pieces of data to merge @c [thread][sequence] */ /** @brief Pieces of data to merge @c [thread][sequence] */
std::vector<Piece<difference_type> >* pieces; std::vector<Piece<difference_type> >* pieces;
/** @brief Stable sorting desired. */
bool stable;
}; };
/** /**
...@@ -122,7 +102,7 @@ template<typename RandomAccessIterator> ...@@ -122,7 +102,7 @@ template<typename RandomAccessIterator>
template<typename RandomAccessIterator, typename _DifferenceTp> template<typename RandomAccessIterator, typename _DifferenceTp>
void void
determine_samples(PMWMSSortingData<RandomAccessIterator>* sd, determine_samples(PMWMSSortingData<RandomAccessIterator>* sd,
_DifferenceTp& num_samples) _DifferenceTp num_samples)
{ {
typedef std::iterator_traits<RandomAccessIterator> traits_type; typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::value_type value_type; typedef typename traits_type::value_type value_type;
...@@ -130,8 +110,6 @@ template<typename RandomAccessIterator, typename _DifferenceTp> ...@@ -130,8 +110,6 @@ template<typename RandomAccessIterator, typename _DifferenceTp>
thread_index_t iam = omp_get_thread_num(); thread_index_t iam = omp_get_thread_num();
num_samples = _Settings::get().sort_mwms_oversampling * sd->num_threads - 1;
difference_type* es = new difference_type[num_samples + 2]; difference_type* es = new difference_type[num_samples + 2];
equally_split(sd->starts[iam + 1] - sd->starts[iam], equally_split(sd->starts[iam + 1] - sd->starts[iam],
...@@ -144,11 +122,201 @@ template<typename RandomAccessIterator, typename _DifferenceTp> ...@@ -144,11 +122,201 @@ template<typename RandomAccessIterator, typename _DifferenceTp>
delete[] es; delete[] es;
} }
/** @brief Split consistently. */
template<bool exact, typename RandomAccessIterator,
typename Comparator, typename SortingPlacesIterator>
struct split_consistently
{
};
/** @brief Split by exact splitting. */
template<typename RandomAccessIterator, typename Comparator,
typename SortingPlacesIterator>
struct split_consistently
<true, RandomAccessIterator, Comparator, SortingPlacesIterator>
{
void operator()(
const thread_index_t iam,
PMWMSSortingData<RandomAccessIterator>* sd,
Comparator& comp,
const typename
std::iterator_traits<RandomAccessIterator>::difference_type
num_samples)
const
{
# pragma omp barrier
std::vector<std::pair<SortingPlacesIterator, SortingPlacesIterator> >
seqs(sd->num_threads);
for (thread_index_t s = 0; s < sd->num_threads; s++)
seqs[s] = std::make_pair(sd->temporary[s],
sd->temporary[s]
+ (sd->starts[s + 1] - sd->starts[s]));
std::vector<SortingPlacesIterator> offsets(sd->num_threads);
// if not last thread
if (iam < sd->num_threads - 1)
multiseq_partition(seqs.begin(), seqs.end(),
sd->starts[iam + 1], offsets.begin(), comp);
for (int seq = 0; seq < sd->num_threads; seq++)
{
// for each sequence
if (iam < (sd->num_threads - 1))
sd->pieces[iam][seq].end = offsets[seq] - seqs[seq].first;
else
// very end of this sequence
sd->pieces[iam][seq].end =
sd->starts[seq + 1] - sd->starts[seq];
}
# pragma omp barrier
for (thread_index_t seq = 0; seq < sd->num_threads; seq++)
{
// For each sequence.
if (iam > 0)
sd->pieces[iam][seq].begin = sd->pieces[iam - 1][seq].end;
else
// Absolute beginning.
sd->pieces[iam][seq].begin = 0;
}
}
};
/** @brief Split by sampling. */
template<typename RandomAccessIterator, typename Comparator,
typename SortingPlacesIterator>
struct split_consistently<false, RandomAccessIterator, Comparator,
SortingPlacesIterator>
{
void operator()(
const thread_index_t iam,
PMWMSSortingData<RandomAccessIterator>* sd,
Comparator& comp,
const typename
std::iterator_traits<RandomAccessIterator>::difference_type
num_samples)
const
{
typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::value_type value_type;
typedef typename traits_type::difference_type difference_type;
determine_samples(sd, num_samples);
# pragma omp barrier
# pragma omp single
__gnu_sequential::sort(sd->samples,
sd->samples + (num_samples * sd->num_threads),
comp);
# pragma omp barrier
for (thread_index_t s = 0; s < sd->num_threads; ++s)
{
// For each sequence.
if (num_samples * iam > 0)
sd->pieces[iam][s].begin =
std::lower_bound(sd->temporary[s],
sd->temporary[s]
+ (sd->starts[s + 1] - sd->starts[s]),
sd->samples[num_samples * iam],
comp)
- sd->temporary[s];
else
// Absolute beginning.
sd->pieces[iam][s].begin = 0;
if ((num_samples * (iam + 1)) < (num_samples * sd->num_threads))
sd->pieces[iam][s].end =
std::lower_bound(sd->temporary[s],
sd->temporary[s]
+ (sd->starts[s + 1] - sd->starts[s]),
sd->samples[num_samples * (iam + 1)],
comp)
- sd->temporary[s];
else
// Absolute end.
sd->pieces[iam][s].end = sd->starts[s + 1] - sd->starts[s];
}
}
};
template<bool stable, typename RandomAccessIterator, typename Comparator>
struct possibly_stable_sort
{
};
template<typename RandomAccessIterator, typename Comparator>
struct possibly_stable_sort<true, RandomAccessIterator, Comparator>
{
void operator()(const RandomAccessIterator& begin,
const RandomAccessIterator& end, Comparator& comp) const
{
__gnu_sequential::stable_sort(begin, end, comp);
}
};
template<typename RandomAccessIterator, typename Comparator>
struct possibly_stable_sort<false, RandomAccessIterator, Comparator>
{
void operator()(const RandomAccessIterator begin,
const RandomAccessIterator end, Comparator& comp) const
{
__gnu_sequential::sort(begin, end, comp);
}
};
template<bool stable, typename SeqRandomAccessIterator,
typename RandomAccessIterator, typename Comparator,
typename DiffType>
struct possibly_stable_multiway_merge
{
};
template<typename SeqRandomAccessIterator, typename RandomAccessIterator,
typename Comparator, typename DiffType>
struct possibly_stable_multiway_merge
<true, SeqRandomAccessIterator, RandomAccessIterator, Comparator,
DiffType>
{
void operator()(const SeqRandomAccessIterator& seqs_begin,
const SeqRandomAccessIterator& seqs_end,
const RandomAccessIterator& target,
Comparator& comp,
DiffType length_am) const
{
stable_multiway_merge(seqs_begin, seqs_end, target, comp,
length_am, sequential_tag());
}
};
template<typename SeqRandomAccessIterator, typename RandomAccessIterator,
typename Comparator, typename DiffType>
struct possibly_stable_multiway_merge
<false, SeqRandomAccessIterator, RandomAccessIterator, Comparator,
DiffType>
{
void operator()(const SeqRandomAccessIterator& seqs_begin,
const SeqRandomAccessIterator& seqs_end,
const RandomAccessIterator& target,
Comparator& comp,
DiffType length_am) const
{
multiway_merge(seqs_begin, seqs_end, target, comp,
length_am, sequential_tag());
}
};
/** @brief PMWMS code executed by each thread. /** @brief PMWMS code executed by each thread.
* @param sd Pointer to algorithm data. * @param sd Pointer to algorithm data.
* @param comp Comparator. * @param comp Comparator.
*/ */
template<typename RandomAccessIterator, typename Comparator> template<bool stable, bool exact, typename RandomAccessIterator,
typename Comparator>
void void
parallel_sort_mwms_pu(PMWMSSortingData<RandomAccessIterator>* sd, parallel_sort_mwms_pu(PMWMSSortingData<RandomAccessIterator>* sd,
Comparator& comp) Comparator& comp)
...@@ -162,165 +330,65 @@ template<typename RandomAccessIterator, typename Comparator> ...@@ -162,165 +330,65 @@ template<typename RandomAccessIterator, typename Comparator>
// Length of this thread's chunk, before merging. // Length of this thread's chunk, before merging.
difference_type length_local = sd->starts[iam + 1] - sd->starts[iam]; difference_type length_local = sd->starts[iam + 1] - sd->starts[iam];
#if _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST // Sort in temporary storage, leave space for sentinel.
typedef RandomAccessIterator SortingPlacesIterator;
// Sort in input storage.
sd->sorting_places[iam] = sd->source + sd->starts[iam];
#else
typedef value_type* SortingPlacesIterator; typedef value_type* SortingPlacesIterator;
// Sort in temporary storage, leave space for sentinel. sd->temporary[iam] =
sd->sorting_places[iam] = sd->temporaries[iam] =
static_cast<value_type*>( static_cast<value_type*>(
::operator new(sizeof(value_type) * (length_local + 1))); ::operator new(sizeof(value_type) * (length_local + 1)));
// Copy there. // Copy there.
std::uninitialized_copy(sd->source + sd->starts[iam], std::uninitialized_copy(sd->source + sd->starts[iam],
sd->source + sd->starts[iam] + length_local, sd->source + sd->starts[iam] + length_local,
sd->sorting_places[iam]); sd->temporary[iam]);
#endif
// Sort locally.
if (sd->stable)
__gnu_sequential::stable_sort(sd->sorting_places[iam],
sd->sorting_places[iam] + length_local,
comp);
else
__gnu_sequential::sort(sd->sorting_places[iam],
sd->sorting_places[iam] + length_local,
comp);
// Invariant: locally sorted subsequence in sd->sorting_places[iam],
// sd->sorting_places[iam] + length_local.
const _Settings& __s = _Settings::get();
if (__s.sort_splitting == SAMPLING)
{
difference_type num_samples;
determine_samples(sd, num_samples);
# pragma omp barrier
# pragma omp single
__gnu_sequential::sort(sd->samples,
sd->samples + (num_samples * sd->num_threads),
comp);
# pragma omp barrier
for (int s = 0; s < sd->num_threads; ++s)
{
// For each sequence.
if (num_samples * iam > 0)
sd->pieces[iam][s].begin =
std::lower_bound(sd->sorting_places[s],
sd->sorting_places[s]
+ (sd->starts[s + 1] - sd->starts[s]),
sd->samples[num_samples * iam],
comp)
- sd->sorting_places[s];
else
// Absolute beginning.
sd->pieces[iam][s].begin = 0;
if ((num_samples * (iam + 1)) < (num_samples * sd->num_threads))
sd->pieces[iam][s].end =
std::lower_bound(sd->sorting_places[s],
sd->sorting_places[s]
+ (sd->starts[s + 1] - sd->starts[s]),
sd->samples[num_samples * (iam + 1)],
comp)
- sd->sorting_places[s];
else
// Absolute end.
sd->pieces[iam][s].end = sd->starts[s + 1] - sd->starts[s];
}
}
else if (__s.sort_splitting == EXACT)
{
# pragma omp barrier
std::vector<std::pair<SortingPlacesIterator, SortingPlacesIterator> > possibly_stable_sort<stable, SortingPlacesIterator, Comparator>()
seqs(sd->num_threads); (sd->temporary[iam], sd->temporary[iam] + length_local, comp);
for (int s = 0; s < sd->num_threads; ++s)
seqs[s] = std::make_pair(sd->sorting_places[s],
sd->sorting_places[s]
+ (sd->starts[s + 1] - sd->starts[s]));
std::vector<SortingPlacesIterator> offsets(sd->num_threads); // Invariant: locally sorted subsequence in sd->temporary[iam],
// sd->temporary[iam] + length_local.
// if not last thread // No barrier here: Synchronization is done by the splitting routine.
if (iam < sd->num_threads - 1)
multiseq_partition(seqs.begin(), seqs.end(),
sd->starts[iam + 1], offsets.begin(), comp);
for (int seq = 0; seq < sd->num_threads; ++seq) difference_type num_samples =
{ _Settings::get().sort_mwms_oversampling * sd->num_threads - 1;
// for each sequence split_consistently
if (iam < (sd->num_threads - 1)) <exact, RandomAccessIterator, Comparator, SortingPlacesIterator>()
sd->pieces[iam][seq].end = offsets[seq] - seqs[seq].first; (iam, sd, comp, num_samples);
else
// very end of this sequence
sd->pieces[iam][seq].end = (sd->starts[seq + 1]
- sd->starts[seq]);
}
# pragma omp barrier
for (int seq = 0; seq < sd->num_threads; ++seq)
{
// For each sequence.
if (iam > 0)
sd->pieces[iam][seq].begin = sd->pieces[iam - 1][seq].end;
else
// Absolute beginning.
sd->pieces[iam][seq].begin = 0;
}
}
// Offset from target begin, length after merging. // Offset from target begin, length after merging.
difference_type offset = 0, length_am = 0; difference_type offset = 0, length_am = 0;
for (int s = 0; s < sd->num_threads; ++s) for (thread_index_t s = 0; s < sd->num_threads; s++)
{ {
length_am += sd->pieces[iam][s].end - sd->pieces[iam][s].begin; length_am += sd->pieces[iam][s].end - sd->pieces[iam][s].begin;
offset += sd->pieces[iam][s].begin; offset += sd->pieces[iam][s].begin;
} }
#if _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST typedef std::vector<
// Merge to temporary storage, uninitialized creation not possible std::pair<SortingPlacesIterator, SortingPlacesIterator> >
// since there is no multiway_merge calling the placement new seq_vector_type;
// instead of the assignment operator. seq_vector_type seqs(sd->num_threads);
// XXX incorrect (de)construction
sd->merging_places[iam] = sd->temporaries[iam] =
static_cast<value_type*>(::operator new(sizeof(value_type)
* length_am));
#else
// Merge directly to target.
sd->merging_places[iam] = sd->source + offset;
#endif
std::vector<std::pair<SortingPlacesIterator, SortingPlacesIterator> >
seqs(sd->num_threads);
for (int s = 0; s < sd->num_threads; ++s) for (int s = 0; s < sd->num_threads; ++s)
{ {
seqs[s] = seqs[s] =
std::make_pair(sd->sorting_places[s] + sd->pieces[iam][s].begin, std::make_pair(sd->temporary[s] + sd->pieces[iam][s].begin,
sd->sorting_places[s] + sd->pieces[iam][s].end); sd->temporary[s] + sd->pieces[iam][s].end);
} }
multiway_merge(seqs.begin(), seqs.end(), sd->merging_places[iam], comp, possibly_stable_multiway_merge<
length_am, sd->stable, false, sequential_tag()); stable,
typename seq_vector_type::iterator,
RandomAccessIterator,
Comparator, difference_type>()
(seqs.begin(), seqs.end(),
sd->source + offset, comp,
length_am);
# pragma omp barrier # pragma omp barrier
#if _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST ::operator delete(sd->temporary[iam]);
// Write back.
std::copy(sd->merging_places[iam],
sd->merging_places[iam] + length_am,
sd->source + offset);
#endif
::operator delete(sd->temporaries[iam]);
} }
/** @brief PMWMS main call. /** @brief PMWMS main call.
...@@ -329,21 +397,22 @@ template<typename RandomAccessIterator, typename Comparator> ...@@ -329,21 +397,22 @@ template<typename RandomAccessIterator, typename Comparator>
* @param comp Comparator. * @param comp Comparator.
* @param n Length of sequence. * @param n Length of sequence.
* @param num_threads Number of threads to use. * @param num_threads Number of threads to use.
* @param stable Stable sorting.
*/ */
template<typename RandomAccessIterator, typename Comparator> template<bool stable, bool exact, typename RandomAccessIterator,
typename Comparator>
void void
parallel_sort_mwms(RandomAccessIterator begin, RandomAccessIterator end, parallel_sort_mwms(RandomAccessIterator begin, RandomAccessIterator end,
Comparator comp, typename Comparator comp,
std::iterator_traits<RandomAccessIterator>:: thread_index_t num_threads)
difference_type n, int num_threads, bool stable)
{ {
_GLIBCXX_CALL(n) _GLIBCXX_CALL(end - begin)
typedef std::iterator_traits<RandomAccessIterator> traits_type; typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::value_type value_type; typedef typename traits_type::value_type value_type;
typedef typename traits_type::difference_type difference_type; typedef typename traits_type::difference_type difference_type;
difference_type n = end - begin;
if (n <= 1) if (n <= 1)
return; return;
...@@ -354,7 +423,6 @@ template<typename RandomAccessIterator, typename Comparator> ...@@ -354,7 +423,6 @@ template<typename RandomAccessIterator, typename Comparator>
// shared variables // shared variables
PMWMSSortingData<RandomAccessIterator> sd; PMWMSSortingData<RandomAccessIterator> sd;
difference_type* starts; difference_type* starts;
const _Settings& __s = _Settings::get();
# pragma omp parallel num_threads(num_threads) # pragma omp parallel num_threads(num_threads)
{ {
...@@ -364,23 +432,16 @@ template<typename RandomAccessIterator, typename Comparator> ...@@ -364,23 +432,16 @@ template<typename RandomAccessIterator, typename Comparator>
{ {
sd.num_threads = num_threads; sd.num_threads = num_threads;
sd.source = begin; sd.source = begin;
sd.temporaries = new value_type*[num_threads];
#if _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST
sd.sorting_places = new RandomAccessIterator[num_threads];
sd.merging_places = new value_type*[num_threads];
#else
sd.sorting_places = new value_type*[num_threads];
sd.merging_places = new RandomAccessIterator[num_threads];
#endif
if (__s.sort_splitting == SAMPLING) sd.temporary = new value_type*[num_threads];
if (!exact)
{ {
unsigned int size = difference_type size =
(__s.sort_mwms_oversampling * num_threads - 1) (_Settings::get().sort_mwms_oversampling * num_threads - 1)
* num_threads; * num_threads;
sd.samples = static_cast<value_type*>( sd.samples = static_cast<value_type*>(
::operator new(size * sizeof(value_type))); ::operator new(size * sizeof(value_type)));
} }
else else
sd.samples = NULL; sd.samples = NULL;
...@@ -390,7 +451,6 @@ template<typename RandomAccessIterator, typename Comparator> ...@@ -390,7 +451,6 @@ template<typename RandomAccessIterator, typename Comparator>
for (int s = 0; s < num_threads; ++s) for (int s = 0; s < num_threads; ++s)
sd.pieces[s].resize(num_threads); sd.pieces[s].resize(num_threads);
starts = sd.starts = new difference_type[num_threads + 1]; starts = sd.starts = new difference_type[num_threads + 1];
sd.stable = stable;
difference_type chunk_length = n / num_threads; difference_type chunk_length = n / num_threads;
difference_type split = n % num_threads; difference_type split = n % num_threads;
...@@ -401,18 +461,16 @@ template<typename RandomAccessIterator, typename Comparator> ...@@ -401,18 +461,16 @@ template<typename RandomAccessIterator, typename Comparator>
pos += (i < split) ? (chunk_length + 1) : chunk_length; pos += (i < split) ? (chunk_length + 1) : chunk_length;
} }
starts[num_threads] = pos; starts[num_threads] = pos;
} } //single
// Now sort in parallel. // Now sort in parallel.
parallel_sort_mwms_pu(&sd, comp); parallel_sort_mwms_pu<stable, exact>(&sd, comp);
} //parallel } //parallel
delete[] starts; delete[] starts;
delete[] sd.temporaries; delete[] sd.temporary;
delete[] sd.sorting_places;
delete[] sd.merging_places;
if (__s.sort_splitting == SAMPLING) if (!exact)
::operator delete(sd.samples); ::operator delete(sd.samples);
delete[] sd.offsets; delete[] sd.offsets;
......
...@@ -71,7 +71,7 @@ namespace __gnu_parallel ...@@ -71,7 +71,7 @@ namespace __gnu_parallel
template<typename RandomAccessIterator, typename Comparator> template<typename RandomAccessIterator, typename Comparator>
inline void inline void
parallel_sort(RandomAccessIterator begin, RandomAccessIterator end, parallel_sort(RandomAccessIterator begin, RandomAccessIterator end,
Comparator comp, bool stable) Comparator comp, bool stable)
{ {
_GLIBCXX_CALL(end - begin) _GLIBCXX_CALL(end - begin)
typedef std::iterator_traits<RandomAccessIterator> traits_type; typedef std::iterator_traits<RandomAccessIterator> traits_type;
...@@ -79,25 +79,43 @@ namespace __gnu_parallel ...@@ -79,25 +79,43 @@ namespace __gnu_parallel
typedef typename traits_type::difference_type difference_type; typedef typename traits_type::difference_type difference_type;
if (begin != end) if (begin != end)
{ {
difference_type n = end - begin; difference_type n = end - begin;
if (false) ; if (false) ;
#if _GLIBCXX_MERGESORT #if _GLIBCXX_MERGESORT
else if (stable || _Settings::get().sort_algorithm == MWMS) else if (stable)
parallel_sort_mwms(begin, end, comp, n, get_max_threads(), stable); {
if(_Settings::get().sort_splitting == EXACT)
parallel_sort_mwms<true, true>
(begin, end, comp, get_max_threads());
else
parallel_sort_mwms<true, false>
(begin, end, comp, get_max_threads());
}
else if (_Settings::get().sort_algorithm == MWMS)
{
if(_Settings::get().sort_splitting == EXACT)
parallel_sort_mwms<false, true>
(begin, end, comp, get_max_threads());
else
parallel_sort_mwms<false, false>
(begin, end, comp, get_max_threads());
}
#endif #endif
#if _GLIBCXX_QUICKSORT #if _GLIBCXX_QUICKSORT
else if (!stable && _Settings::get().sort_algorithm == QS) else if (!stable && _Settings::get().sort_algorithm == QS)
parallel_sort_qs(begin, end, comp, n, get_max_threads()); parallel_sort_qs(begin, end, comp, n, get_max_threads());
#endif #endif
#if _GLIBCXX_BAL_QUICKSORT #if _GLIBCXX_BAL_QUICKSORT
else if (!stable && _Settings::get().sort_algorithm == QS_BALANCED) else if (!stable && _Settings::get().sort_algorithm == QS_BALANCED)
parallel_sort_qsb(begin, end, comp, n, get_max_threads()); parallel_sort_qsb(begin, end, comp, n, get_max_threads());
#endif #endif
else else if(stable)
__gnu_sequential::sort(begin, end, comp); __gnu_sequential::stable_sort(begin, end, comp);
} else
__gnu_sequential::sort(begin, end, comp);
}
} }
} // end namespace __gnu_parallel } // end namespace __gnu_parallel
......
...@@ -44,6 +44,9 @@ namespace __gnu_parallel ...@@ -44,6 +44,9 @@ namespace __gnu_parallel
/** @brief Forces sequential execution at compile time. */ /** @brief Forces sequential execution at compile time. */
struct sequential_tag { }; struct sequential_tag { };
/** @brief Forces exact splitting in multiway merge at compile time. */
struct exact_tag { };
/** @brief Recommends parallel execution at compile time. */ /** @brief Recommends parallel execution at compile time. */
struct parallel_tag { }; struct parallel_tag { };
......
...@@ -87,15 +87,10 @@ namespace __gnu_parallel ...@@ -87,15 +87,10 @@ namespace __gnu_parallel
/// Merging algorithms: /// Merging algorithms:
// bubblesort-alike, loser-tree variants, enum sentinel. // bubblesort-alike, loser-tree variants, enum sentinel.
enum _MultiwayMergeAlgorithm enum _MultiwayMergeAlgorithm
{ {
BUBBLE, LOSER_TREE
LOSER_TREE_EXPLICIT,
LOSER_TREE,
LOSER_TREE_COMBINED,
LOSER_TREE_SENTINEL,
ENUM_SENTINEL
}; };
/// Partial sum algorithms: recursive, linear. /// Partial sum algorithms: recursive, linear.
enum _PartialSumAlgorithm enum _PartialSumAlgorithm
{ {
......
// Copyright (C) 2008 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 2, or (at your option)
// any later version.
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License along
// with this library; see the file COPYING. If not, write to the Free
// Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
// USA.
#include <algorithm>
#include <functional>
#include <tr1/functional>
// libstdc++/35588
int main()
{
using namespace std;
using namespace tr1;
using namespace placeholders;
int t[10];
sort(t, t+10, bind(less<int>(), _1, _2));
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment