Commit f9985df5 by Johannes Singler Committed by Johannes Singler

re PR libstdc++/35588 ([parallel mode] parallel std::sort and bind())

2008-04-07  Johannes Singler  <singler@ira.uka.de>

         * include/parallel/multiway_merge.h:
           Moved decisions to compiletime instead of runtime.
         * include/parallel/losertree.h:
           Removed obsolete variants, added variant that uses pointers
           in the loser tree.
         * include/parallel/types.h:
           Remove obsolete settings options from enum.
         * include/parallel/features.h:
           Remove obsolete compile-time switches.
         * include/parallel/compiletime_settings.h:
           Remove obsolete variant that copies back *after* sorting.
         * include/parallel/tags.h:
           Add one new tag for compile-time switch.
         * include/parallel/merge.h:
           Adapt to changes in multiway_merge.h.
         * include/parallel/multiway_mergesort.h:
           Adapt to changes in multiway_merge.h.
           Factor out splitting variants.
           Remove obsolete variant that copies back *after* sorting.
         * include/parallel/sort.h:
           Adapt to changes in multiway_mergesort.h.
         * testsuite/25_algorithms/sort/35588.cc:
           Added test case from / for PR 35588.

From-SVN: r133975
parent 1d4cd3d0
2008-04-07 Johannes Singler <singler@ira.uka.de>
* include/parallel/multiway_merge.h:
Moved decisions to compiletime instead of runtime.
* include/parallel/losertree.h:
Removed obsolete variants, added variant that uses pointers
in the loser tree.
* include/parallel/types.h:
Remove obsolete settings options from enum.
* include/parallel/features.h:
Remove obsolete compile-time switches.
* include/parallel/compiletime_settings.h:
Remove obsolete variant that copies back *after* sorting.
* include/parallel/tags.h:
Add one new tag for compile-time switch.
* include/parallel/merge.h:
Adapt to changes in multiway_merge.h.
* include/parallel/multiway_mergesort.h:
Adapt to changes in multiway_merge.h.
Factor out splitting variants.
Remove obsolete variant that copies back *after* sorting.
* include/parallel/sort.h:
Adapt to changes in multiway_mergesort.h.
* testsuite/25_algorithms/sort/35588.cc:
Added test case from / for PR 35588.
2008-03-29 Paolo Carlini <pcarlini@suse.de>
PR libstdc++/35725
......
......@@ -73,17 +73,9 @@
* __gnu_parallel::parallel_random_shuffle(). */
#define _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 0
#endif
#ifndef _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
#ifndef _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
/** @brief Switch on many _GLIBCXX_PARALLEL_ASSERTions in parallel code.
* Consider the size of the TLB for
* __gnu_parallel::parallel_random_shuffle(). */
#define _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB 0
#endif
#ifndef _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST
/** @brief First copy the data, sort it locally, and merge it back
* (0); or copy it back after everything is done (1).
*
* Recommendation: 0 */
#define _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST 0
#endif
......@@ -61,66 +61,6 @@
#define _GLIBCXX_BAL_QUICKSORT 1
#endif
#ifndef _GLIBCXX_LOSER_TREE
/** @def _GLIBCXX_LOSER_TREE
* @brief Include guarded (sequences may run empty) loser tree,
* moving objects.
* @see __gnu_parallel::_Settings multiway_merge_algorithm */
#define _GLIBCXX_LOSER_TREE 1
#endif
#ifndef _GLIBCXX_LOSER_TREE_EXPLICIT
/** @def _GLIBCXX_LOSER_TREE_EXPLICIT
* @brief Include standard loser tree, storing two flags for infimum
* and supremum.
* @see __gnu_parallel::_Settings multiway_merge_algorithm */
#define _GLIBCXX_LOSER_TREE_EXPLICIT 0
#endif
#ifndef _GLIBCXX_LOSER_TREE_REFERENCE
/** @def _GLIBCXX_LOSER_TREE_REFERENCE
* @brief Include some loser tree variant.
* @see __gnu_parallel::_Settings multiway_merge_algorithm */
#define _GLIBCXX_LOSER_TREE_REFERENCE 0
#endif
#ifndef _GLIBCXX_LOSER_TREE_POINTER
/** @def _GLIBCXX_LOSER_TREE_POINTER
* @brief Include some loser tree variant.
* @see __gnu_parallel::_Settings multiway_merge_algorithm */
#define _GLIBCXX_LOSER_TREE_POINTER 1
#endif
#ifndef _GLIBCXX_LOSER_TREE_UNGUARDED
/** @def _GLIBCXX_LOSER_TREE_UNGUARDED
* @brief Include unguarded (sequences must not run empty) loser
* tree, moving objects.
* @see __gnu_parallel::_Settings multiway_merge_algorithm */
#define _GLIBCXX_LOSER_TREE_UNGUARDED 0
#endif
#ifndef _GLIBCXX_LOSER_TREE_POINTER_UNGUARDED
/** @def _GLIBCXX_LOSER_TREE_POINTER_UNGUARDED
* @brief Include some loser tree variant.
* @see __gnu_parallel::_Settings multiway_merge_algorithm */
#define _GLIBCXX_LOSER_TREE_POINTER_UNGUARDED 1
#endif
#ifndef _GLIBCXX_LOSER_TREE_COMBINED
/** @def _GLIBCXX_LOSER_TREE_COMBINED
* @brief Include some loser tree variant.
* @see __gnu_parallel::_Settings multiway_merge_algorithm */
#define _GLIBCXX_LOSER_TREE_COMBINED 0
#endif
#ifndef _GLIBCXX_LOSER_TREE_SENTINEL
/** @def _GLIBCXX_LOSER_TREE_SENTINEL
* @brief Include some loser tree variant.
* @see __gnu_parallel::_Settings multiway_merge_algorithm */
#define _GLIBCXX_LOSER_TREE_SENTINEL 0
#endif
#ifndef _GLIBCXX_FIND_GROWING_BLOCKS
/** @brief Include the growing blocks variant for std::find.
* @see __gnu_parallel::_Settings::find_algorithm */
......
......@@ -47,878 +47,725 @@
namespace __gnu_parallel
{
#if _GLIBCXX_LOSER_TREE_EXPLICIT
/** @brief Guarded loser tree, copying the whole element into the
* tree structure.
*
* Guarding is done explicitly through two flags per element, inf
* and sup This is a quite slow variant.
*/
template<typename T, typename Comparator = std::less<T> >
class LoserTreeExplicit
/**
* @brief Guarded loser/tournament tree.
*
* The smallest element is at the top.
*
* Guarding is done explicitly through one flag sup per element,
* inf is not needed due to a better initialization routine. This
* is a well-performing variant.
*
* @param T the element type
* @param Comparator the comparator to use, defaults to std::less<T>
*/
template<typename T, typename Comparator>
class LoserTreeBase
{
protected:
/** @brief Internal representation of a LoserTree element. */
struct Loser
{
private:
struct Loser
{
// The relevant element.
T key;
// Is this an infimum or supremum element?
bool inf, sup;
// Number of the sequence the element comes from.
int source;
};
unsigned int size, offset;
Loser* losers;
Comparator comp;
public:
LoserTreeExplicit(unsigned int _size, Comparator _comp = std::less<T>())
: comp(_comp)
{
size = _size;
offset = size;
losers = new Loser[size];
for (unsigned int l = 0; l < size; ++l)
{
//losers[l].key = ... stays unset
losers[l].inf = true;
losers[l].sup = false;
//losers[l].source = -1; //sentinel
}
}
~LoserTreeExplicit()
{ delete[] losers; }
/** @brief flag, true iff this is a "maximum" sentinel. */
bool sup;
/** @brief index of the source sequence. */
int source;
/** @brief key of the element in the LoserTree. */
T key;
};
int
get_min_source()
{ return losers[0].source; }
unsigned int ik, k, offset;
/** log_2{k} */
unsigned int _M_log_k;
/** @brief LoserTree elements. */
Loser* losers;
/** @brief Comparator to use. */
Comparator comp;
/**
* @brief State flag that determines whether the LoserTree is empty.
*
* Only used for building the LoserTree.
*/
bool first_insert;
public:
/**
* @brief The constructor.
*
* @param _k The number of sequences to merge.
* @param _comp The comparator to use.
*/
LoserTreeBase(unsigned int _k, Comparator _comp)
: comp(_comp)
{
ik = _k;
// Compute log_2{k} for the Loser Tree
_M_log_k = log2(ik - 1) + 1;
// Next greater power of 2.
k = 1 << _M_log_k;
offset = k;
// Avoid default-constructing losers[].key
losers = static_cast<Loser*>(::operator new(2 * k * sizeof(Loser)));
for (unsigned int i = ik - 1; i < k; ++i)
losers[i + k].sup = true;
first_insert = true;
}
/**
* @brief The destructor.
*/
~LoserTreeBase()
{ ::operator delete(losers); }
/**
* @brief Initializes the sequence "source" with the element "key".
*
* @param key the element to insert
* @param source index of the source sequence
* @param sup flag that determines whether the value to insert is an
* explicit supremum.
*/
inline void
insert_start(const T& key, int source, bool sup)
{
unsigned int pos = k + source;
if(first_insert)
{
// Construct all keys, so we can easily deconstruct them.
for (unsigned int i = 0; i < (2 * k); ++i)
new(&(losers[i].key)) T(key);
first_insert = false;
}
else
new(&(losers[pos].key)) T(key);
losers[pos].sup = sup;
losers[pos].source = source;
}
/**
* @return the index of the sequence with the smallest element.
*/
int get_min_source()
{ return losers[0].source; }
};
/**
* @brief Stable LoserTree variant.
*
* Provides the stable implementations of insert_start, init_winner,
* init and delete_min_insert.
*
* Unstable variant is done using partial specialisation below.
*/
template<bool stable/* default == true */, typename T, typename Comparator>
class LoserTree : public LoserTreeBase<T, Comparator>
{
typedef LoserTreeBase<T, Comparator> Base;
using Base::k;
using Base::losers;
using Base::first_insert;
public:
LoserTree(unsigned int _k, Comparator _comp)
: Base::LoserTreeBase(_k, _comp)
{}
unsigned int
init_winner(unsigned int root)
{
if (root >= k)
{
return root;
}
else
{
unsigned int left = init_winner (2 * root);
unsigned int right = init_winner (2 * root + 1);
if (losers[right].sup
|| (!losers[left].sup
&& !comp(losers[right].key, losers[left].key)))
{
// Left one is less or equal.
losers[root] = losers[right];
return left;
}
else
{
// Right one is less.
losers[root] = losers[left];
return right;
}
}
}
void init()
{ losers[0] = losers[init_winner(1)]; }
/**
* @brief Delete the smallest element and insert a new element from
* the previously smallest element's sequence.
*
* This implementation is stable.
*/
// Do not pass a const reference since key will be used as local variable.
void delete_min_insert(T key, bool sup)
{
int source = losers[0].source;
for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
{
// The smaller one gets promoted, ties are broken by source.
if ((sup && (!losers[pos].sup || losers[pos].source < source))
|| (!sup && !losers[pos].sup
&& ((comp(losers[pos].key, key))
|| (!comp(key, losers[pos].key)
&& losers[pos].source < source))))
{
// The other one is smaller.
std::swap(losers[pos].sup, sup);
std::swap(losers[pos].source, source);
std::swap(losers[pos].key, key);
}
}
losers[0].sup = sup;
losers[0].source = source;
losers[0].key = key;
}
};
/**
* @brief Unstable LoserTree variant.
*
* Stability (non-stable here) is selected with partial specialization.
*/
template<typename T, typename Comparator>
class LoserTree</* stable == */false, T, Comparator> :
public LoserTreeBase<T, Comparator>
{
typedef LoserTreeBase<T, Comparator> Base;
using Base::_M_log_k;
using Base::k;
using Base::losers;
using Base::first_insert;
public:
LoserTree(unsigned int _k, Comparator _comp)
: Base::LoserTreeBase(_k, _comp)
{}
/**
* Computes the winner of the competition at position "root".
*
* Called recursively (starting at 0) to build the initial tree.
*
* @param root index of the "game" to start.
*/
unsigned int
init_winner (unsigned int root)
{
if (root >= k)
{
return root;
}
else
{
unsigned int left = init_winner (2 * root);
unsigned int right = init_winner (2 * root + 1);
if (losers[right].sup ||
(!losers[left].sup
&& !comp(losers[right].key, losers[left].key)))
{
// Left one is less or equal.
losers[root] = losers[right];
return left;
}
else
{
// Right one is less.
losers[root] = losers[left];
return right;
}
}
}
inline void
init()
{ losers[0] = losers[init_winner(1)]; }
/**
* Delete the key smallest element and insert the element key instead.
*
* @param key the key to insert
* @param sup true iff key is an explicitly marked supremum
*/
// Do not pass a const reference since key will be used as local variable.
inline void
delete_min_insert(T key, bool sup)
{
#if _GLIBCXX_ASSERTIONS
// loser trees are only used for at least 2 sequences
_GLIBCXX_PARALLEL_ASSERT(_M_log_k > 1);
#endif
void
insert_start(T key, int source, bool sup)
int source = losers[0].source;
for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
{
bool inf = false;
for (unsigned int pos = (offset + source) / 2; pos > 0; pos /= 2)
{
if ((!inf && !losers[pos].inf && !sup && !losers[pos].sup
&& comp(losers[pos].key, key)) || losers[pos].inf || sup)
{
// The other one is smaller.
std::swap(losers[pos].key, key);
std::swap(losers[pos].inf, inf);
std::swap(losers[pos].sup, sup);
std::swap(losers[pos].source, source);
}
}
losers[0].key = key;
losers[0].inf = inf;
losers[0].sup = sup;
losers[0].source = source;
// The smaller one gets promoted.
if (sup || (!losers[pos].sup && comp(losers[pos].key, key)))
{
// The other one is smaller.
std::swap(losers[pos].sup, sup);
std::swap(losers[pos].source, source);
std::swap(losers[pos].key, key);
}
}
void
init() { }
losers[0].sup = sup;
losers[0].source = source;
losers[0].key = key;
}
};
void
delete_min_insert(T key, bool sup)
{
bool inf = false;
int source = losers[0].source;
for (unsigned int pos = (offset + source) / 2; pos > 0; pos /= 2)
{
// The smaller one gets promoted.
if ((!inf && !losers[pos].inf && !sup && !losers[pos].sup
&& comp(losers[pos].key, key))
|| losers[pos].inf || sup)
{
// The other one is smaller.
std::swap(losers[pos].key, key);
std::swap(losers[pos].inf, inf);
std::swap(losers[pos].sup, sup);
std::swap(losers[pos].source, source);
}
}
losers[0].key = key;
losers[0].inf = inf;
losers[0].sup = sup;
losers[0].source = source;
}
void
insert_start_stable(T key, int source, bool sup)
{
bool inf = false;
for (unsigned int pos = (offset + source) / 2; pos > 0; pos /= 2)
{
if ((!inf && !losers[pos].inf && !sup && !losers[pos].sup
&& ((comp(losers[pos].key, key))
|| (!comp(key, losers[pos].key)
&& losers[pos].source < source)))
|| losers[pos].inf || sup)
{
// Take next key.
std::swap(losers[pos].key, key);
std::swap(losers[pos].inf, inf);
std::swap(losers[pos].sup, sup);
std::swap(losers[pos].source, source);
}
}
losers[0].key = key;
losers[0].inf = inf;
losers[0].sup = sup;
losers[0].source = source;
}
void
init_stable() { }
void
delete_min_insert_stable(T key, bool sup)
{
bool inf = false;
int source = losers[0].source;
for (unsigned int pos = (offset + source) / 2; pos > 0; pos /= 2)
{
if ((!inf && !losers[pos].inf && !sup && !losers[pos].sup
&& ((comp(losers[pos].key, key))
|| (!comp(key, losers[pos].key)
&& losers[pos].source < source)))
|| losers[pos].inf || sup)
{
std::swap(losers[pos].key, key);
std::swap(losers[pos].inf, inf);
std::swap(losers[pos].sup, sup);
std::swap(losers[pos].source, source);
}
}
losers[0].key = key;
losers[0].inf = inf;
losers[0].sup = sup;
losers[0].source = source;
}
/**
* @brief Base class of Loser Tree implementation using pointers.
*/
template<typename T, typename Comparator>
class LoserTreePointerBase
{
protected:
/** @brief Internal representation of LoserTree elements. */
struct Loser
{
bool sup;
int source;
const T* keyp;
};
#endif
#if _GLIBCXX_LOSER_TREE
unsigned int ik, k, offset;
Loser* losers;
Comparator comp;
/** @brief Guarded loser tree, either copying the whole element into
* the tree structure, or looking up the element via the index.
*
* Guarding is done explicitly through one flag sup per element,
* inf is not needed due to a better initialization routine. This
* is a well-performing variant.
*/
template<typename T, typename Comparator = std::less<T> >
class LoserTree
{
private:
struct Loser
{
bool sup;
int source;
T key;
};
unsigned int ik, k, offset;
Loser* losers;
Comparator comp;
bool first_insert;
public:
LoserTree(unsigned int _k, Comparator _comp = std::less<T>())
public:
LoserTreePointerBase(unsigned int _k, Comparator _comp = std::less<T>())
: comp(_comp)
{
ik = _k;
// Next greater power of 2.
k = 1 << (log2(ik - 1) + 1);
offset = k;
// Avoid default-constructing losers[].key
losers = static_cast<Loser*>(::operator new(2 * k * sizeof(Loser)));
for (unsigned int i = ik - 1; i < k; ++i)
losers[i + k].sup = true;
first_insert = true;
}
{
ik = _k;
~LoserTree()
{ ::operator delete(losers); }
// Next greater power of 2.
k = 1 << (log2(ik - 1) + 1);
offset = k;
losers = new Loser[k * 2];
for (unsigned int i = ik - 1; i < k; i++)
losers[i + k].sup = true;
}
int
get_min_source()
{ return losers[0].source; }
~LoserTreePointerBase()
{ ::operator delete(losers); }
void
insert_start(const T& key, int source, bool sup)
{
unsigned int pos = k + source;
if(first_insert)
{
// Construct all keys, so we can easily deconstruct them.
for (unsigned int i = 0; i < (2 * k); ++i)
::new(&(losers[i].key)) T(key);
first_insert = false;
}
else
::new(&(losers[pos].key)) T(key);
losers[pos].sup = sup;
losers[pos].source = source;
}
int get_min_source()
{ return losers[0].source; }
unsigned int
init_winner (unsigned int root)
{
if (root >= k)
{
return root;
}
else
{
unsigned int left = init_winner (2 * root);
unsigned int right = init_winner (2 * root + 1);
if (losers[right].sup
|| (!losers[left].sup
&& !comp(losers[right].key, losers[left].key)))
{
// Left one is less or equal.
losers[root] = losers[right];
return left;
}
else
{
// Right one is less.
losers[root] = losers[left];
return right;
}
}
}
void insert_start(const T& key, int source, bool sup)
{
unsigned int pos = k + source;
losers[pos].sup = sup;
losers[pos].source = source;
losers[pos].keyp = &key;
}
};
/**
* @brief Stable LoserTree implementation.
*
* The unstable variant is implemented using partial instantiation below.
*/
template<bool stable/* default == true */, typename T, typename Comparator>
class LoserTreePointer : public LoserTreePointerBase<T, Comparator>
{
typedef LoserTreePointerBase<T, Comparator> Base;
using Base::k;
using Base::losers;
void
init()
{ losers[0] = losers[init_winner(1)]; }
public:
LoserTreePointer(unsigned int _k, Comparator _comp = std::less<T>())
: Base::LoserTreePointerBase(_k, _comp)
{}
// Do not pass const reference since key will be used as local variable.
void
delete_min_insert(T key, bool sup)
{
int source = losers[0].source;
for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
{
// The smaller one gets promoted.
if (sup || (!losers[pos].sup && comp(losers[pos].key, key)))
{
// The other one is smaller.
std::swap(losers[pos].sup, sup);
std::swap(losers[pos].source, source);
std::swap(losers[pos].key, key);
}
}
losers[0].sup = sup;
losers[0].source = source;
losers[0].key = key;
}
unsigned int
init_winner(unsigned int root)
{
if (root >= k)
{
return root;
}
else
{
unsigned int left = init_winner (2 * root);
unsigned int right = init_winner (2 * root + 1);
if (losers[right].sup
|| (!losers[left].sup && !comp(*losers[right].keyp,
*losers[left].keyp)))
{
// Left one is less or equal.
losers[root] = losers[right];
return left;
}
else
{
// Right one is less.
losers[root] = losers[left];
return right;
}
}
}
void init()
{ losers[0] = losers[init_winner(1)]; }
void delete_min_insert(const T& key, bool sup)
{
const T* keyp = &key;
int source = losers[0].source;
for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
{
// The smaller one gets promoted, ties are broken by source.
if ((sup && (!losers[pos].sup || losers[pos].source < source)) ||
(!sup && !losers[pos].sup &&
((comp(*losers[pos].keyp, *keyp)) ||
(!comp(*keyp, *losers[pos].keyp)
&& losers[pos].source < source))))
{
// The other one is smaller.
std::swap(losers[pos].sup, sup);
std::swap(losers[pos].source, source);
std::swap(losers[pos].keyp, keyp);
}
}
losers[0].sup = sup;
losers[0].source = source;
losers[0].keyp = keyp;
}
};
/**
* @brief Unstable LoserTree implementation.
*
* The stable variant is above.
*/
template<typename T, typename Comparator>
class LoserTreePointer</* stable == */false, T, Comparator> :
public LoserTreePointerBase<T, Comparator>
{
typedef LoserTreePointerBase<T, Comparator> Base;
using Base::k;
using Base::losers;
void
insert_start_stable(const T& key, int source, bool sup)
{ return insert_start(key, source, sup); }
public:
LoserTreePointer(unsigned int _k, Comparator _comp = std::less<T>())
: Base::LoserTreePointerBase(_k, _comp)
{}
unsigned int
init_winner_stable (unsigned int root)
{
if (root >= k)
{
return root;
}
else
{
unsigned int left = init_winner (2 * root);
unsigned int right = init_winner (2 * root + 1);
if (losers[right].sup
unsigned int
init_winner(unsigned int root)
{
if (root >= k)
{
return root;
}
else
{
unsigned int left = init_winner (2 * root);
unsigned int right = init_winner (2 * root + 1);
if (losers[right].sup
|| (!losers[left].sup
&& !comp(losers[right].key, losers[left].key)))
{
// Left one is less or equal.
losers[root] = losers[right];
return left;
}
else
{
// Right one is less.
losers[root] = losers[left];
return right;
}
}
}
void
init_stable()
{ losers[0] = losers[init_winner_stable(1)]; }
// Do not pass const reference since key will be used as local variable.
void
delete_min_insert_stable(T key, bool sup)
{
int source = losers[0].source;
for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
{
// The smaller one gets promoted, ties are broken by source.
if ( (sup && (!losers[pos].sup || losers[pos].source < source))
|| (!sup && !losers[pos].sup
&& ((comp(losers[pos].key, key))
|| (!comp(key, losers[pos].key)
&& losers[pos].source < source))))
{
// The other one is smaller.
std::swap(losers[pos].sup, sup);
std::swap(losers[pos].source, source);
std::swap(losers[pos].key, key);
}
}
losers[0].sup = sup;
losers[0].source = source;
losers[0].key = key;
}
};
#endif
#if _GLIBCXX_LOSER_TREE_REFERENCE
/** @brief Guarded loser tree, either copying the whole element into
* the tree structure, or looking up the element via the index.
*
* Guarding is done explicitly through one flag sup per element,
* inf is not needed due to a better initialization routine. This
* is a well-performing variant.
*/
template<typename T, typename Comparator = std::less<T> >
class LoserTreeReference
&& !comp(*losers[right].keyp, *losers[left].keyp)))
{
// Left one is less or equal.
losers[root] = losers[right];
return left;
}
else
{
// Right one is less.
losers[root] = losers[left];
return right;
}
}
}
void init()
{ losers[0] = losers[init_winner(1)]; }
void delete_min_insert(const T& key, bool sup)
{
#undef COPY
#ifdef COPY
#define KEY(i) losers[i].key
#define KEY_SOURCE(i) key
#else
#define KEY(i) keys[losers[i].source]
#define KEY_SOURCE(i) keys[i]
#endif
private:
struct Loser
{
bool sup;
int source;
#ifdef COPY
T key;
#endif
};
const T* keyp = &key;
int source = losers[0].source;
for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
{
// The smaller one gets promoted.
if (sup || (!losers[pos].sup && comp(*losers[pos].keyp, *keyp)))
{
// The other one is smaller.
std::swap(losers[pos].sup, sup);
std::swap(losers[pos].source, source);
std::swap(losers[pos].keyp, keyp);
}
}
losers[0].sup = sup;
losers[0].source = source;
losers[0].keyp = keyp;
}
};
/** @brief Base class for unguarded LoserTree implementation.
*
* The whole element is copied into the tree structure.
*
* No guarding is done, therefore not a single input sequence must
* run empty. Unused sequence heads are marked with a sentinel which
* is &gt; all elements that are to be merged.
*
* This is a very fast variant.
*/
template<typename T, typename Comparator>
class LoserTreeUnguardedBase
{
protected:
struct Loser
{
int source;
T key;
};
unsigned int ik, k, offset;
Loser* losers;
#ifndef COPY
T* keys;
#endif
Comparator comp;
unsigned int ik, k, offset;
Loser* losers;
Comparator comp;
public:
LoserTreeReference(unsigned int _k, Comparator _comp = std::less<T>())
public:
inline
LoserTreeUnguardedBase(unsigned int _k, const T _sentinel,
Comparator _comp = std::less<T>())
: comp(_comp)
{
ik = _k;
// Next greater power of 2.
k = 1 << (log2(ik - 1) + 1);
offset = k;
losers = new Loser[k * 2];
#ifndef COPY
keys = new T[ik];
#endif
for (unsigned int i = ik - 1; i < k; ++i)
losers[i + k].sup = true;
}
~LoserTreeReference()
{
delete[] losers;
#ifndef COPY
delete[] keys;
#endif
}
int
get_min_source()
{ return losers[0].source; }
void
insert_start(T key, int source, bool sup)
{
unsigned int pos = k + source;
losers[pos].sup = sup;
losers[pos].source = source;
KEY(pos) = key;
}
unsigned int
init_winner(unsigned int root)
{
if (root >= k)
{
return root;
}
else
{
unsigned int left = init_winner (2 * root);
unsigned int right = init_winner (2 * root + 1);
if ( losers[right].sup ||
(!losers[left].sup && !comp(KEY(right), KEY(left))))
{
// Left one is less or equal.
losers[root] = losers[right];
return left;
}
else
{
// Right one is less.
losers[root] = losers[left];
return right;
}
}
}
void
init()
{
losers[0] = losers[init_winner(1)];
}
void
delete_min_insert(T key, bool sup)
{
int source = losers[0].source;
for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
{
// The smaller one gets promoted.
if (sup || (!losers[pos].sup && comp(KEY(pos), KEY_SOURCE(source))))
{
// The other one is smaller.
std::swap(losers[pos].sup, sup);
std::swap(losers[pos].source, source);
#ifdef COPY
std::swap(KEY(pos), KEY_SOURCE(source));
#endif
}
}
losers[0].sup = sup;
losers[0].source = source;
#ifdef COPY
KEY(0) = KEY_SOURCE(source);
{
ik = _k;
// Next greater power of 2.
k = 1 << (log2(ik - 1) + 1);
offset = k;
// Avoid default-constructing losers[].key
losers = static_cast<Loser*>(::operator new(2 * k * sizeof(Loser)));
for (unsigned int i = /*k + ik - 1*/0; i < (2 * k); ++i)
{
losers[i].key = _sentinel;
losers[i].source = -1;
}
}
inline ~LoserTreeUnguardedBase()
{ ::operator delete(losers); }
inline int
get_min_source()
{
// no dummy sequence can ever be at the top!
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1);
#endif
}
return losers[0].source;
}
void
insert_start_stable(T key, int source, bool sup)
{ return insert_start(key, source, sup); }
unsigned int
init_winner_stable(unsigned int root)
{
if (root >= k)
{
return root;
}
else
{
unsigned int left = init_winner (2 * root);
unsigned int right = init_winner (2 * root + 1);
if (losers[right].sup
|| (!losers[left].sup && !comp(KEY(right), KEY(left))))
{
// Left one is less or equal.
losers[root] = losers[right];
return left;
}
else
{
// Right one is less.
losers[root] = losers[left];
return right;
}
}
}
void
init_stable()
{ losers[0] = losers[init_winner_stable(1)]; }
void
delete_min_insert_stable(T key, bool sup)
{
int source = losers[0].source;
for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
{
// The smaller one gets promoted, ties are broken by source.
if ((sup && (!losers[pos].sup || losers[pos].source < source))
|| (!sup && !losers[pos].sup
&& ((comp(KEY(pos), KEY_SOURCE(source)))
|| (!comp(KEY_SOURCE(source), KEY(pos))
&& losers[pos].source < source))))
{
// The other one is smaller.
std::swap(losers[pos].sup, sup);
std::swap(losers[pos].source, source);
#ifdef COPY
std::swap(KEY(pos), KEY_SOURCE(source));
#endif
}
}
inline void
insert_start(const T& key, int source, bool)
{
unsigned int pos = k + source;
new(&(losers[pos].key)) T(key);
losers[pos].source = source;
}
};
/**
* @brief Stable implementation of unguarded LoserTree.
*
* Unstable variant is selected below with partial specialization.
*/
template<bool stable/* default == true */, typename T, typename Comparator>
class LoserTreeUnguarded : public LoserTreeUnguardedBase<T, Comparator>
{
typedef LoserTreeUnguardedBase<T, Comparator> Base;
using Base::k;
using Base::losers;
public:
LoserTreeUnguarded(unsigned int _k, const T _sentinel,
Comparator _comp = std::less<T>())
: Base::LoserTreeUnguardedBase(_k, _sentinel, _comp)
{}
unsigned int
init_winner(unsigned int root)
{
if (root >= k)
{
return root;
}
else
{
unsigned int left = init_winner (2 * root);
unsigned int right = init_winner (2 * root + 1);
if (!comp(losers[right].key, losers[left].key))
{
// Left one is less or equal.
losers[root] = losers[right];
return left;
}
else
{
// Right one is less.
losers[root] = losers[left];
return right;
}
}
}
inline void
init()
{
losers[0] = losers[init_winner(1)];
losers[0].sup = sup;
losers[0].source = source;
#ifdef COPY
KEY(0) = KEY_SOURCE(source);
// no dummy sequence can ever be at the top at the beginning (0 sequences!)
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1);
#endif
}
};
#undef KEY
#undef KEY_SOURCE
}
// Do not pass a const reference since key will be used as local variable.
inline void
delete_min_insert(T key, bool)
{
// No dummy sequence can ever be at the top and be retrieved!
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1);
#endif
#if _GLIBCXX_LOSER_TREE_POINTER
/** @brief Guarded loser tree, either copying the whole element into
the tree structure, or looking up the element via the index.
* Guarding is done explicitly through one flag sup per element,
* inf is not needed due to a better initialization routine.
* This is a well-performing variant.
*/
template<typename T, typename Comparator = std::less<T> >
class LoserTreePointer
int source = losers[0].source;
printf("%d\n", source);
for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
{
// The smaller one gets promoted, ties are broken by source.
if (comp(losers[pos].key, key)
|| (!comp(key, losers[pos].key) && losers[pos].source < source))
{
// The other one is smaller.
std::swap(losers[pos].source, source);
std::swap(losers[pos].key, key);
}
}
losers[0].source = source;
losers[0].key = key;
}
};
/**
* @brief Non-Stable implementation of unguarded LoserTree.
*
* Stable implementation is above.
*/
template<typename T, typename Comparator>
class LoserTreeUnguarded</* stable == */false, T, Comparator> :
public LoserTreeUnguardedBase<T, Comparator>
{
typedef LoserTreeUnguardedBase<T, Comparator> Base;
using Base::k;
using Base::losers;
public:
LoserTreeUnguarded(unsigned int _k, const T _sentinel,
Comparator _comp = std::less<T>())
: Base::LoserTreeUnguardedBase(_k, _sentinel, _comp)
{}
unsigned int
init_winner (unsigned int root)
{
private:
struct Loser
{
bool sup;
int source;
const T* keyp;
};
unsigned int ik, k, offset;
Loser* losers;
Comparator comp;
public:
LoserTreePointer(unsigned int _k, Comparator _comp = std::less<T>())
: comp(_comp)
{
ik = _k;
// Next greater power of 2.
k = 1 << (log2(ik - 1) + 1);
offset = k;
losers = new Loser[k * 2];
for (unsigned int i = ik - 1; i < k; ++i)
losers[i + k].sup = true;
}
~LoserTreePointer()
{ delete[] losers; }
int
get_min_source()
{ return losers[0].source; }
void
insert_start(const T& key, int source, bool sup)
{
unsigned int pos = k + source;
losers[pos].sup = sup;
losers[pos].source = source;
losers[pos].keyp = &key;
}
unsigned int
init_winner(unsigned int root)
{
if (root >= k)
return root;
else
{
unsigned int left = init_winner (2 * root);
unsigned int right = init_winner (2 * root + 1);
if (losers[right].sup
|| (!losers[left].sup
&& !comp(*losers[right].keyp, *losers[left].keyp)))
{
// Left one is less or equal.
losers[root] = losers[right];
return left;
}
else
{
// Right one is less.
losers[root] = losers[left];
return right;
}
}
}
void
init()
{ losers[0] = losers[init_winner(1)]; }
void
delete_min_insert(const T& key, bool sup)
{
const T* keyp = &key;
int source = losers[0].source;
for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
{
// The smaller one gets promoted.
if (sup || (!losers[pos].sup && comp(*losers[pos].keyp, *keyp)))
{
// The other one is smaller.
std::swap(losers[pos].sup, sup);
std::swap(losers[pos].source, source);
std::swap(losers[pos].keyp, keyp);
}
}
losers[0].sup = sup;
losers[0].source = source;
losers[0].keyp = keyp;
}
void
insert_start_stable(const T& key, int source, bool sup)
{ return insert_start(key, source, sup); }
unsigned int
init_winner_stable(unsigned int root)
{
if (root >= k)
{
return root;
}
else
{
unsigned int left = init_winner (2 * root);
unsigned int right = init_winner (2 * root + 1);
if (losers[right].sup
|| (!losers[left].sup && !comp(*losers[right].keyp,
*losers[left].keyp)))
{
// Left one is less or equal.
losers[root] = losers[right];
return left;
}
else
{
// Right one is less.
losers[root] = losers[left];
return right;
}
}
}
void
init_stable()
{ losers[0] = losers[init_winner_stable(1)]; }
void
delete_min_insert_stable(const T& key, bool sup)
{
const T* keyp = &key;
int source = losers[0].source;
for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
{
// The smaller one gets promoted, ties are broken by source.
if ( (sup && (!losers[pos].sup || losers[pos].source < source))
|| (!sup && !losers[pos].sup &&
((comp(*losers[pos].keyp, *keyp))
|| (!comp(*keyp, *losers[pos].keyp)
&& losers[pos].source < source))))
{
// The other one is smaller.
std::swap(losers[pos].sup, sup);
std::swap(losers[pos].source, source);
std::swap(losers[pos].keyp, keyp);
}
}
losers[0].sup = sup;
losers[0].source = source;
losers[0].keyp = keyp;
}
};
if (root >= k)
{
return root;
}
else
{
unsigned int left = init_winner (2 * root);
unsigned int right = init_winner (2 * root + 1);
#if _GLIBCXX_ASSERTIONS
// If left one is sentinel then right one must be, too.
if (losers[left].source == -1)
_GLIBCXX_PARALLEL_ASSERT(losers[right].source == -1);
#endif
#if _GLIBCXX_LOSER_TREE_UNGUARDED
/** @brief Unguarded loser tree, copying the whole element into the
* tree structure.
*
* No guarding is done, therefore not a single input sequence must
* run empty. This is a very fast variant.
*/
template<typename T, typename Comparator = std::less<T> >
class LoserTreeUnguarded
if (!comp(losers[right].key, losers[left].key))
{
// Left one is less or equal.
losers[root] = losers[right];
return left;
}
else
{
// Right one is less.
losers[root] = losers[left];
return right;
}
}
}
inline void
init()
{
private:
struct Loser
{
int source;
T key;
};
unsigned int ik, k, offset;
unsigned int* mapping;
Loser* losers;
Comparator comp;
void
map(unsigned int root, unsigned int begin, unsigned int end)
{
if (begin + 1 == end)
mapping[begin] = root;
else
{
// Next greater or equal power of 2.
unsigned int left = 1 << (log2(end - begin - 1));
map(root * 2, begin, begin + left);
map(root * 2 + 1, begin + left, end);
}
}
public:
LoserTreeUnguarded(unsigned int _k, Comparator _comp = std::less<T>())
: comp(_comp)
{
ik = _k;
// Next greater or equal power of 2.
k = 1 << (log2(ik - 1) + 1);
offset = k;
losers = new Loser[k + ik];
mapping = new unsigned int[ik];
map(1, 0, ik);
}
~LoserTreeUnguarded()
{
delete[] losers;
delete[] mapping;
}
int
get_min_source()
{ return losers[0].source; }
void
insert_start(const T& key, int source, bool)
{
unsigned int pos = mapping[source];
losers[pos].source = source;
losers[pos].key = key;
}
unsigned int
init_winner(unsigned int root, unsigned int begin, unsigned int end)
{
if (begin + 1 == end)
return mapping[begin];
else
{
// Next greater or equal power of 2.
unsigned int division = 1 << (log2(end - begin - 1));
unsigned int left = init_winner(2 * root, begin, begin + division);
unsigned int right =
init_winner(2 * root + 1, begin + division, end);
if (!comp(losers[right].key, losers[left].key))
{
// Left one is less or equal.
losers[root] = losers[right];
return left;
}
else
{
// Right one is less.
losers[root] = losers[left];
return right;
}
}
}
void
init()
{ losers[0] = losers[init_winner(1, 0, ik)]; }
// Do not pass const reference since key will be used as local variable.
void
delete_min_insert(const T& key, bool)
{
losers[0].key = key;
T& keyr = losers[0].key;
int& source = losers[0].source;
for (int pos = mapping[source] / 2; pos > 0; pos /= 2)
{
// The smaller one gets promoted.
if (comp(losers[pos].key, keyr))
{
// The other one is smaller.
std::swap(losers[pos].source, source);
std::swap(losers[pos].key, keyr);
}
}
}
void
insert_start_stable(const T& key, int source, bool)
{ return insert_start(key, source, false); }
void
init_stable()
{ init(); }
void
delete_min_insert_stable(const T& key, bool)
{
losers[0].key = key;
T& keyr = losers[0].key;
int& source = losers[0].source;
for (int pos = mapping[source] / 2; pos > 0; pos /= 2)
{
// The smaller one gets promoted, ties are broken by source.
if (comp(losers[pos].key, keyr)
|| (!comp(keyr, losers[pos].key)
&& losers[pos].source < source))
{
// The other one is smaller.
std::swap(losers[pos].source, source);
std::swap(losers[pos].key, keyr);
}
}
}
};
losers[0] = losers[init_winner(1)];
// no dummy sequence can ever be at the top at the beginning (0 sequences!)
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1);
#endif
}
#if _GLIBCXX_LOSER_TREE_POINTER_UNGUARDED
// Do not pass a const reference since key will be used as local variable.
inline void
delete_min_insert(T key, bool)
{
printf("wrong\n");
int source = losers[0].source;
for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
{
// The smaller one gets promoted.
if (comp(losers[pos].key, key))
{
// The other one is smaller.
std::swap(losers[pos].source, source);
std::swap(losers[pos].key, key);
}
}
losers[0].source = source;
losers[0].key = key;
}
};
/** @brief Unguarded loser tree, keeping only pointers to the
* elements in the tree structure.
......@@ -926,175 +773,233 @@ template<typename T, typename Comparator = std::less<T> >
* No guarding is done, therefore not a single input sequence must
* run empty. This is a very fast variant.
*/
template<typename T, typename Comparator = std::less<T> >
class LoserTreePointerUnguarded
template<typename T, typename Comparator>
class LoserTreePointerUnguardedBase
{
protected:
struct Loser
{
private:
struct Loser
{
int source;
const T* keyp;
};
unsigned int ik, k, offset;
unsigned int* mapping;
Loser* losers;
Comparator comp;
void map(unsigned int root, unsigned int begin, unsigned int end)
{
if (begin + 1 == end)
mapping[begin] = root;
else
{
// Next greater or equal power of 2.
unsigned int left = 1 << (log2(end - begin - 1));
map(root * 2, begin, begin + left);
map(root * 2 + 1, begin + left, end);
}
}
public:
LoserTreePointerUnguarded(unsigned int _k,
Comparator _comp = std::less<T>())
: comp(_comp)
{
ik = _k;
// Next greater power of 2.
k = 1 << (log2(ik - 1) + 1);
offset = k;
losers = new Loser[k + ik];
mapping = new unsigned int[ik];
map(1, 0, ik);
}
~LoserTreePointerUnguarded()
{
delete[] losers;
delete[] mapping;
}
int
get_min_source()
{ return losers[0].source; }
void
insert_start(const T& key, int source, bool)
{
unsigned int pos = mapping[source];
losers[pos].source = source;
losers[pos].keyp = &key;
}
unsigned int
init_winner(unsigned int root, unsigned int begin, unsigned int end)
{
if (begin + 1 == end)
return mapping[begin];
else
{
// Next greater or equal power of 2.
unsigned int division = 1 << (log2(end - begin - 1));
unsigned int left = init_winner(2 * root, begin, begin + division);
unsigned int right = init_winner(2 * root + 1,
begin + division, end);
if (!comp(*losers[right].keyp, *losers[left].keyp))
{
// Left one is less or equal.
losers[root] = losers[right];
return left;
}
else
{
// Right one is less.
losers[root] = losers[left];
return right;
}
}
}
int source;
const T* keyp;
};
void
init()
{ losers[0] = losers[init_winner(1, 0, ik)]; }
unsigned int ik, k, offset;
Loser* losers;
const T sentinel;
Comparator comp;
void
delete_min_insert(const T& key, bool)
{
const T* keyp = &key;
int& source = losers[0].source;
for (int pos = mapping[source] / 2; pos > 0; pos /= 2)
{
// The smaller one gets promoted.
if (comp(*losers[pos].keyp, *keyp))
{
// The other one is smaller.
std::swap(losers[pos].source, source);
std::swap(losers[pos].keyp, keyp);
}
}
losers[0].keyp = keyp;
}
public:
void
insert_start_stable(const T& key, int source, bool)
{ return insert_start(key, source, false); }
inline
LoserTreePointerUnguardedBase(unsigned int _k, const T _sentinel,
Comparator _comp = std::less<T>())
: sentinel(_sentinel), comp(_comp)
{
ik = _k;
// Next greater power of 2.
k = 1 << (log2(ik - 1) + 1);
offset = k;
// Avoid default-constructing losers[].key
losers = new Loser[2 * k];
for (unsigned int i = /*k + ik - 1*/0; i < (2 * k); ++i)
{
losers[i].keyp = &sentinel;
losers[i].source = -1;
}
}
inline ~LoserTreePointerUnguardedBase()
{ delete[] losers; }
inline int
get_min_source()
{
// no dummy sequence can ever be at the top!
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1);
#endif
return losers[0].source;
}
void
init_stable()
{ init(); }
inline void
insert_start(const T& key, int source, bool)
{
unsigned int pos = k + source;
losers[pos].keyp = &key;
losers[pos].source = source;
}
};
/**
* @brief Stable unguarded LoserTree variant storing pointers.
*
* Unstable variant is implemented below using partial specialization.
*/
template<bool stable/* default == true */, typename T, typename Comparator>
class LoserTreePointerUnguarded :
public LoserTreePointerUnguardedBase<T, Comparator>
{
typedef LoserTreePointerUnguardedBase<T, Comparator> Base;
using Base::k;
using Base::losers;
public:
LoserTreePointerUnguarded(unsigned int _k, const T _sentinel,
Comparator _comp = std::less<T>())
: Base::LoserTreePointerUnguardedBase(_k, _sentinel, _comp)
{}
unsigned int
init_winner(unsigned int root)
{
if (root >= k)
{
return root;
}
else
{
unsigned int left = init_winner (2 * root);
unsigned int right = init_winner (2 * root + 1);
if (!comp(*losers[right].keyp, *losers[left].keyp))
{
// Left one is less or equal.
losers[root] = losers[right];
return left;
}
else
{
// Right one is less.
losers[root] = losers[left];
return right;
}
}
}
inline void
init()
{
losers[0] = losers[init_winner(1)];
void
delete_min_insert_stable(const T& key, bool)
{
int& source = losers[0].source;
const T* keyp = &key;
for (int pos = mapping[source] / 2; pos > 0; pos /= 2)
{
// The smaller one gets promoted, ties are broken by source.
if (comp(*losers[pos].keyp, *keyp)
|| (!comp(*keyp, *losers[pos].keyp)
&& losers[pos].source < source))
{
// The other one is smaller.
std::swap(losers[pos].source, source);
std::swap(losers[pos].keyp, keyp);
}
}
losers[0].keyp = keyp;
}
};
// no dummy sequence can ever be at the top at the beginning (0 sequences!)
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1);
#endif
}
template<typename _ValueTp, class Comparator>
struct loser_tree_traits
inline void
delete_min_insert(const T& key, bool sup)
{
#if _GLIBCXX_LOSER_TREE
typedef LoserTree<_ValueTp, Comparator> LT;
#else
# if _GLIBCXX_LOSER_TREE_POINTER
typedef LoserTreePointer<_ValueTp, Comparator> LT;
# else
# error Must define some type in losertree.h.
# endif
const T* keyp = &key;
int source = losers[0].source;
for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
{
// The smaller one gets promoted, ties are broken by source.
if (comp(*losers[pos].keyp, *keyp)
|| (!comp(*keyp, *losers[pos].keyp) && losers[pos].source < source))
{
// The other one is smaller.
std::swap(losers[pos].source, source);
std::swap(losers[pos].keyp, keyp);
}
}
losers[0].source = source;
losers[0].keyp = keyp;
// no dummy sequence can ever be at the top!
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1);
#endif
}
};
/**
* @brief Unstable unguarded LoserTree variant storing pointers.
*
* Stable variant is above.
*/
template<typename T, typename Comparator>
class LoserTreePointerUnguarded</* stable == */false, T, Comparator> :
public LoserTreePointerUnguardedBase<T, Comparator>
{
typedef LoserTreePointerUnguardedBase<T, Comparator> Base;
using Base::k;
using Base::losers;
public:
LoserTreePointerUnguarded(unsigned int _k, const T _sentinel,
Comparator _comp = std::less<T>())
: Base::LoserTreePointerUnguardedBase(_k, _sentinel, _comp)
{}
unsigned int
init_winner(unsigned int root)
{
if (root >= k)
{
return root;
}
else
{
unsigned int left = init_winner (2 * root);
unsigned int right = init_winner (2 * root + 1);
#if _GLIBCXX_ASSERTIONS
// If left one is sentinel then right one must be, too.
if (losers[left].source == -1)
_GLIBCXX_PARALLEL_ASSERT(losers[right].source == -1);
#endif
};
template<typename _ValueTp, class Comparator>
struct loser_tree_unguarded_traits
if (!comp(*losers[right].keyp, *losers[left].keyp))
{
// Left one is less or equal.
losers[root] = losers[right];
return left;
}
else
{
// Right one is less.
losers[root] = losers[left];
return right;
}
}
}
inline void
init()
{
#if _GLIBCXX_LOSER_TREE_UNGUARDED
typedef LoserTreeUnguarded<_ValueTp, Comparator> LT;
#else
# if _GLIBCXX_LOSER_TREE_POINTER_UNGUARDED
typedef LoserTreePointerUnguarded<_ValueTp, Comparator> LT;
# else
# error Must define some unguarded type in losertree.h.
# endif
losers[0] = losers[init_winner(1)];
// no dummy sequence can ever be at the top at the beginning (0 sequences!)
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(losers[0].source != -1);
#endif
};
}
}
inline void
delete_min_insert(const T& key, bool sup)
{
const T* keyp = &key;
int source = losers[0].source;
for (unsigned int pos = (k + source) / 2; pos > 0; pos /= 2)
{
// The smaller one gets promoted.
if (comp(*(losers[pos].keyp), *keyp))
{
// The other one is smaller.
std::swap(losers[pos].source, source);
std::swap(losers[pos].keyp, keyp);
}
}
losers[0].source = source;
losers[0].keyp = keyp;
}
};
} // namespace __gnu_parallel
#endif
......@@ -239,19 +239,26 @@ namespace __gnu_parallel
std::iterator_traits<RandomAccessIterator1>::
difference_type max_length, Comparator comp)
{
typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
value_type;
typedef typename
std::iterator_traits<RandomAccessIterator1>::value_type value_type;
typedef typename std::iterator_traits<RandomAccessIterator1>::
difference_type difference_type1 /* == difference_type2 */;
typedef typename std::iterator_traits<RandomAccessIterator3>::
difference_type difference_type3;
typedef typename std::pair<RandomAccessIterator1, RandomAccessIterator1>
iterator_pair;
std::pair<RandomAccessIterator1, RandomAccessIterator1>
seqs[2] = { std::make_pair(begin1, end1),
std::make_pair(begin2, end2) };
RandomAccessIterator3
target_end = parallel_multiway_merge(seqs, seqs + 2, target,
comp, max_length, true, false);
RandomAccessIterator3
target_end = parallel_multiway_merge
< /* stable = */ true, /* sentinels = */ false>(
seqs, seqs + 2, target, comp,
multiway_merge_exact_splitting
< /* stable = */ true, iterator_pair*,
Comparator, difference_type1>,
max_length);
return target_end;
}
......
......@@ -40,7 +40,7 @@
* This file is a GNU parallel extension to the Standard C++ Library.
*/
// Written by Johannes Singler.
// Written by Johannes Singler and Manuel Holtgrewe.
#ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGE_H
#define _GLIBCXX_PARALLEL_MULTIWAY_MERGE_H
......@@ -50,7 +50,6 @@
#include <bits/stl_algo.h>
#include <parallel/features.h>
#include <parallel/parallel.h>
#include <parallel/merge.h>
#include <parallel/losertree.h>
#if _GLIBCXX_ASSERTIONS
#include <parallel/checkers.h>
......@@ -59,27 +58,34 @@
/** @brief Length of a sequence described by a pair of iterators. */
#define _GLIBCXX_PARALLEL_LENGTH(s) ((s).second - (s).first)
// XXX need iterator typedefs
namespace __gnu_parallel
{
// Announce guarded and unguarded iterator.
template<typename RandomAccessIterator, typename Comparator>
class guarded_iterator;
// Making the arguments const references seems to dangerous,
// the user-defined comparator might not be const.
template<typename RandomAccessIterator, typename Comparator>
inline bool
operator<(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
guarded_iterator<RandomAccessIterator, Comparator>& bi2);
guarded_iterator<RandomAccessIterator, Comparator>& bi2);
template<typename RandomAccessIterator, typename Comparator>
inline bool
operator<=(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
guarded_iterator<RandomAccessIterator, Comparator>& bi2);
guarded_iterator<RandomAccessIterator, Comparator>& bi2);
/** @brief Iterator wrapper supporting an implicit supremum at the end
of the sequence, dominating all comparisons.
* Deriving from RandomAccessIterator is not possible since
* RandomAccessIterator need not be a class.
*/
/** @brief Iterator wrapper supporting an implicit supremum at the end
* of the sequence, dominating all comparisons.
*
* The implicit supremum comes with a performance cost.
*
* Deriving from RandomAccessIterator is not possible since
* RandomAccessIterator need not be a class.
*/
template<typename RandomAccessIterator, typename Comparator>
class guarded_iterator
{
......@@ -100,7 +106,7 @@ template<typename RandomAccessIterator, typename Comparator>
* @param comp Comparator provided for associated overloaded
* compare operators. */
guarded_iterator(RandomAccessIterator begin,
RandomAccessIterator end, Comparator& comp)
RandomAccessIterator end, Comparator& comp)
: current(begin), end(end), comp(comp)
{ }
......@@ -115,7 +121,7 @@ template<typename RandomAccessIterator, typename Comparator>
/** @brief Dereference operator.
* @return Referenced element. */
typename std::iterator_traits<RandomAccessIterator>::value_type
typename std::iterator_traits<RandomAccessIterator>::value_type&
operator*()
{ return *current; }
......@@ -158,7 +164,7 @@ template<typename RandomAccessIterator, typename Comparator>
template<typename RandomAccessIterator, typename Comparator>
inline bool
operator<=(guarded_iterator<RandomAccessIterator, Comparator>& bi1,
guarded_iterator<RandomAccessIterator, Comparator>& bi2)
guarded_iterator<RandomAccessIterator, Comparator>& bi2)
{
if (bi2.current == bi2.end) //bi1 is sup
return bi1.current != bi1.end; //bi2 is not sup
......@@ -185,7 +191,7 @@ template<typename RandomAccessIterator, typename Comparator>
{
private:
/** @brief Current iterator position. */
RandomAccessIterator& current;
RandomAccessIterator current;
/** @brief Comparator. */
mutable Comparator& comp;
......@@ -195,7 +201,7 @@ template<typename RandomAccessIterator, typename Comparator>
* @param end Unused, only for compatibility.
* @param comp Unused, only for compatibility. */
unguarded_iterator(RandomAccessIterator begin,
RandomAccessIterator end, Comparator& comp)
RandomAccessIterator end, Comparator& comp)
: current(begin), comp(comp)
{ }
......@@ -210,7 +216,7 @@ template<typename RandomAccessIterator, typename Comparator>
/** @brief Dereference operator.
* @return Referenced element. */
typename std::iterator_traits<RandomAccessIterator>::value_type
typename std::iterator_traits<RandomAccessIterator>::value_type&
operator*()
{ return *current; }
......@@ -256,159 +262,41 @@ template<typename RandomAccessIterator, typename Comparator>
return !(bi1.comp)(*bi2, *bi1);
}
/** Prepare a set of sequences to be merged without a (end) guard
* @param seqs_begin
* @param seqs_end
* @param comp
* @param min_sequence
* @param stable
* @pre (seqs_end - seqs_begin > 0) */
template<typename RandomAccessIteratorIterator, typename Comparator>
typename std::iterator_traits<
typename std::iterator_traits<RandomAccessIteratorIterator>::value_type
::first_type>::difference_type
prepare_unguarded(RandomAccessIteratorIterator seqs_begin,
RandomAccessIteratorIterator seqs_end, Comparator comp,
int& min_sequence, bool stable)
{
_GLIBCXX_CALL(seqs_end - seqs_begin)
typedef typename std::iterator_traits<RandomAccessIteratorIterator>
::value_type::first_type
RandomAccessIterator1;
typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
value_type;
typedef typename std::iterator_traits<RandomAccessIterator1>
::difference_type
difference_type;
if ((*seqs_begin).first == (*seqs_begin).second)
{
// Empty sequence found, it's the first one.
min_sequence = 0;
return -1;
}
// Last element in sequence.
value_type min = *((*seqs_begin).second - 1);
min_sequence = 0;
for (RandomAccessIteratorIterator s = seqs_begin + 1; s != seqs_end; ++s)
{
if ((*s).first == (*s).second)
{
// Empty sequence found.
min_sequence = static_cast<int>(s - seqs_begin);
return -1;
}
// Last element in sequence.
const value_type& v = *((*s).second - 1);
if (comp(v, min)) //strictly smaller
{
min = v;
min_sequence = static_cast<int>(s - seqs_begin);
}
}
difference_type overhang_size = 0;
int s = 0;
for (s = 0; s <= min_sequence; ++s)
{
RandomAccessIterator1 split;
if (stable)
split = std::upper_bound(seqs_begin[s].first, seqs_begin[s].second,
min, comp);
else
split = std::lower_bound(seqs_begin[s].first, seqs_begin[s].second,
min, comp);
overhang_size += seqs_begin[s].second - split;
}
for (; s < (seqs_end - seqs_begin); ++s)
{
RandomAccessIterator1 split = std::lower_bound(
seqs_begin[s].first, seqs_begin[s].second, min, comp);
overhang_size += seqs_begin[s].second - split;
}
// So many elements will be left over afterwards.
return overhang_size;
}
/** Prepare a set of sequences to be merged with a (end) guard (sentinel)
* @param seqs_begin
* @param seqs_end
* @param comp */
template<typename RandomAccessIteratorIterator, typename Comparator>
typename std::iterator_traits<typename std::iterator_traits<
RandomAccessIteratorIterator>::value_type::first_type>::difference_type
prepare_unguarded_sentinel(RandomAccessIteratorIterator seqs_begin,
RandomAccessIteratorIterator seqs_end,
Comparator comp)
{
_GLIBCXX_CALL(seqs_end - seqs_begin)
typedef typename std::iterator_traits<RandomAccessIteratorIterator>
::value_type::first_type
RandomAccessIterator1;
typedef typename std::iterator_traits<RandomAccessIterator1>
::value_type
value_type;
typedef typename std::iterator_traits<RandomAccessIterator1>
::difference_type
difference_type;
// Last element in sequence.
value_type* max = NULL;
for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
{
if ((*s).first == (*s).second)
continue;
// Last element in sequence.
value_type& v = *((*s).second - 1);
// Strictly greater.
if (!max || comp(*max, v))
max = &v;
}
difference_type overhang_size = 0;
for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
{
RandomAccessIterator1 split =
std::lower_bound((*s).first, (*s).second, *max, comp);
overhang_size += (*s).second - split;
// Set sentinel.
*((*s).second) = *max;
}
// So many elements will be left over afterwards.
return overhang_size;
}
/** @brief Highly efficient 3-way merging procedure.
* @param seqs_begin Begin iterator of iterator pair input sequence.
* @param seqs_end End iterator of iterator pair input sequence.
* @param target Begin iterator out output sequence.
* @param comp Comparator.
* @param length Maximum length to merge.
* @param stable Unused, stable anyway.
* @return End iterator of output sequence. */
*
* Merging is done with the algorithm implementation described by Peter
* Sanders. Basically, the idea is to minimize the number of necessary
* comparison after merging out an element. The implementation trick
* that makes this fast is that the order of the sequences is stored
* in the instruction pointer (translated into labels in C++).
*
* This works well for merging up to 4 sequences.
*
* Note that making the merging stable does <em>not</em> come at a
* performance hit.
*
* Whether the merging is done guarded or unguarded is selected by the
* used iterator class.
*
* @param seqs_begin Begin iterator of iterator pair input sequence.
* @param seqs_end End iterator of iterator pair input sequence.
* @param target Begin iterator out output sequence.
* @param comp Comparator.
* @param length Maximum length to merge.
*
* @return End iterator of output sequence.
*/
template<template<typename RAI, typename C> class iterator,
typename RandomAccessIteratorIterator,
typename RandomAccessIterator3,
typename _DifferenceTp,
typename Comparator>
RandomAccessIterator3
multiway_merge_3_variant(RandomAccessIteratorIterator seqs_begin,
RandomAccessIteratorIterator seqs_end,
RandomAccessIterator3 target,
Comparator comp, _DifferenceTp length,
bool stable)
multiway_merge_3_variant(
RandomAccessIteratorIterator seqs_begin,
RandomAccessIteratorIterator seqs_end,
RandomAccessIterator3 target,
Comparator comp, _DifferenceTp length)
{
_GLIBCXX_CALL(length);
......@@ -423,6 +311,10 @@ template<template<typename RAI, typename C> class iterator,
if (length == 0)
return target;
#if _GLIBCXX_ASSERTIONS
_DifferenceTp orig_length = length;
#endif
iterator<RandomAccessIterator1, Comparator>
seq0(seqs_begin[0].first, seqs_begin[0].second, comp),
seq1(seqs_begin[1].first, seqs_begin[1].second, comp),
......@@ -450,17 +342,16 @@ template<template<typename RAI, typename C> class iterator,
else
goto s210;
}
#define _GLIBCXX_PARALLEL_MERGE_3_CASE(a,b,c,c0,c1)\
#define _GLIBCXX_PARALLEL_MERGE_3_CASE(a,b,c,c0,c1) \
s ## a ## b ## c : \
*target = *seq ## a; \
++target; \
--length; \
++seq ## a; \
if (length == 0) goto finish; \
if (seq ## a c0 seq ## b) goto s ## a ## b ## c; \
if (seq ## a c1 seq ## c) goto s ## b ## a ## c; \
goto s ## b ## c ## a;
++target; \
--length; \
++seq ## a; \
if (length == 0) goto finish; \
if (seq ## a c0 seq ## b) goto s ## a ## b ## c; \
if (seq ## a c1 seq ## c) goto s ## b ## a ## c; \
goto s ## b ## c ## a;
_GLIBCXX_PARALLEL_MERGE_3_CASE(0, 1, 2, <=, <=);
_GLIBCXX_PARALLEL_MERGE_3_CASE(1, 2, 0, <=, < );
......@@ -474,6 +365,14 @@ template<template<typename RAI, typename C> class iterator,
finish:
;
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(
((RandomAccessIterator1)seq0 - seqs_begin[0].first) +
((RandomAccessIterator1)seq1 - seqs_begin[1].first) +
((RandomAccessIterator1)seq2 - seqs_begin[2].first)
== orig_length);
#endif
seqs_begin[0].first = seq0;
seqs_begin[1].first = seq1;
seqs_begin[2].first = seq2;
......@@ -481,95 +380,31 @@ template<template<typename RAI, typename C> class iterator,
return target;
}
template<typename RandomAccessIteratorIterator,
typename RandomAccessIterator3,
typename _DifferenceTp,
typename Comparator>
RandomAccessIterator3
multiway_merge_3_combined(RandomAccessIteratorIterator seqs_begin,
RandomAccessIteratorIterator seqs_end,
RandomAccessIterator3 target,
Comparator comp,
_DifferenceTp length, bool stable)
{
_GLIBCXX_CALL(length);
typedef _DifferenceTp difference_type;
typedef typename std::iterator_traits<RandomAccessIteratorIterator>
::value_type::first_type
RandomAccessIterator1;
typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
value_type;
int min_seq;
RandomAccessIterator3 target_end;
// Stable anyway.
difference_type overhang =
prepare_unguarded(seqs_begin, seqs_end, comp, min_seq, true);
difference_type total_length = 0;
for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
total_length += _GLIBCXX_PARALLEL_LENGTH(*s);
if (overhang != -1)
{
difference_type unguarded_length =
std::min(length, total_length - overhang);
target_end = multiway_merge_3_variant<unguarded_iterator>
(seqs_begin, seqs_end, target, comp, unguarded_length, stable);
overhang = length - unguarded_length;
}
else
{
// Empty sequence found.
overhang = length;
target_end = target;
}
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
_GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
#endif
switch (min_seq)
{
case 0:
// Iterators will be advanced accordingly.
target_end = merge_advance(seqs_begin[1].first, seqs_begin[1].second,
seqs_begin[2].first, seqs_begin[2].second,
target_end, overhang, comp);
break;
case 1:
target_end = merge_advance(seqs_begin[0].first, seqs_begin[0].second,
seqs_begin[2].first, seqs_begin[2].second,
target_end, overhang, comp);
break;
case 2:
target_end = merge_advance(seqs_begin[0].first, seqs_begin[0].second,
seqs_begin[1].first, seqs_begin[1].second,
target_end, overhang, comp);
break;
default:
_GLIBCXX_PARALLEL_ASSERT(false);
}
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
_GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
#endif
return target_end;
}
/** @brief Highly efficient 4-way merging procedure.
* @param seqs_begin Begin iterator of iterator pair input sequence.
* @param seqs_end End iterator of iterator pair input sequence.
* @param target Begin iterator out output sequence.
* @param comp Comparator.
* @param length Maximum length to merge.
* @param stable Unused, stable anyway.
* @return End iterator of output sequence. */
/**
* @brief Highly efficient 4-way merging procedure.
*
* Merging is done with the algorithm implementation described by Peter
* Sanders. Basically, the idea is to minimize the number of necessary
* comparison after merging out an element. The implementation trick
* that makes this fast is that the order of the sequences is stored
* in the instruction pointer (translated into goto labels in C++).
*
* This works well for merging up to 4 sequences.
*
* Note that making the merging stable does <em>not</em> come at a
* performance hit.
*
* Whether the merging is done guarded or unguarded is selected by the
* used iterator class.
*
* @param seqs_begin Begin iterator of iterator pair input sequence.
* @param seqs_end End iterator of iterator pair input sequence.
* @param target Begin iterator out output sequence.
* @param comp Comparator.
* @param length Maximum length to merge.
*
* @return End iterator of output sequence.
*/
template<template<typename RAI, typename C> class iterator,
typename RandomAccessIteratorIterator,
typename RandomAccessIterator3,
......@@ -579,7 +414,7 @@ template<template<typename RAI, typename C> class iterator,
multiway_merge_4_variant(RandomAccessIteratorIterator seqs_begin,
RandomAccessIteratorIterator seqs_end,
RandomAccessIterator3 target,
Comparator comp, _DifferenceTp length, bool stable)
Comparator comp, _DifferenceTp length)
{
_GLIBCXX_CALL(length);
typedef _DifferenceTp difference_type;
......@@ -676,651 +511,467 @@ template<template<typename RAI, typename C> class iterator,
return target;
}
template<typename RandomAccessIteratorIterator,
/** @brief Multi-way merging procedure for a high branching factor,
* guarded case.
*
* This merging variant uses a LoserTree class as selected by <tt>LT</tt>.
*
* Stability is selected through the used LoserTree class <tt>LT</tt>.
*
* @param seqs_begin Begin iterator of iterator pair input sequence.
* @param seqs_end End iterator of iterator pair input sequence.
* @param target Begin iterator out output sequence.
* @param comp Comparator.
* @param length Maximum length to merge.
*
* @return End iterator of output sequence.
*/
template<typename LT,
typename RandomAccessIteratorIterator,
typename RandomAccessIterator3,
typename _DifferenceTp,
typename Comparator>
RandomAccessIterator3
multiway_merge_4_combined(RandomAccessIteratorIterator seqs_begin,
multiway_merge_loser_tree(RandomAccessIteratorIterator seqs_begin,
RandomAccessIteratorIterator seqs_end,
RandomAccessIterator3 target,
Comparator comp,
_DifferenceTp length, bool stable)
_DifferenceTp length)
{
_GLIBCXX_CALL(length);
typedef _DifferenceTp difference_type;
_GLIBCXX_CALL(length)
typedef _DifferenceTp difference_type;
typedef typename std::iterator_traits<RandomAccessIteratorIterator>
::value_type::first_type
RandomAccessIterator1;
typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
value_type;
int min_seq;
RandomAccessIterator3 target_end;
int k = static_cast<int>(seqs_end - seqs_begin);
// Stable anyway.
difference_type overhang =
prepare_unguarded(seqs_begin, seqs_end, comp, min_seq, true);
LT lt(k, comp);
difference_type total_length = 0;
for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
total_length += _GLIBCXX_PARALLEL_LENGTH(*s);
if (overhang != -1)
// Default value for potentially non-default-constructible types.
value_type* arbitrary_element = NULL;
for (int t = 0; t < k; ++t)
{
difference_type unguarded_length =
std::min(length, total_length - overhang);
target_end = multiway_merge_4_variant<unguarded_iterator>
(seqs_begin, seqs_end, target, comp, unguarded_length, stable);
overhang = length - unguarded_length;
if(arbitrary_element == NULL
&& _GLIBCXX_PARALLEL_LENGTH(seqs_begin[t]) > 0)
arbitrary_element = &(*seqs_begin[t].first);
total_length += _GLIBCXX_PARALLEL_LENGTH(seqs_begin[t]);
}
else
if(total_length == 0)
return target;
for (int t = 0; t < k; ++t)
{
// Empty sequence found.
overhang = length;
target_end = target;
if (seqs_begin[t].first == seqs_begin[t].second)
lt.insert_start(*arbitrary_element, t, true);
else
lt.insert_start(*seqs_begin[t].first, t, false);
}
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
_GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
#endif
lt.init();
std::vector<std::pair<RandomAccessIterator1, RandomAccessIterator1> >
one_missing(seqs_begin, seqs_end);
one_missing.erase(one_missing.begin() + min_seq); //remove
const difference_type const_total_length(std::min(total_length, length));
target_end = multiway_merge_3_variant<guarded_iterator>(
one_missing.begin(), one_missing.end(),
target_end, comp, overhang, stable);
int source;
// Insert back again.
one_missing.insert(one_missing.begin() + min_seq, seqs_begin[min_seq]);
// Write back modified iterators.
copy(one_missing.begin(), one_missing.end(), seqs_begin);
for (difference_type i = 0; i < const_total_length; ++i)
{
//take out
source = lt.get_min_source();
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
_GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
#endif
*(target++) = *(seqs_begin[source].first++);
return target_end;
// Feed.
if (seqs_begin[source].first == seqs_begin[source].second)
lt.delete_min_insert(*arbitrary_element, true);
else
// Replace from same source.
lt.delete_min_insert(*seqs_begin[source].first, false);
}
return target;
}
/** @brief Basic multi-way merging procedure.
/** @brief Multi-way merging procedure for a high branching factor,
* unguarded case.
*
* The head elements are kept in a sorted array, new heads are
* inserted linearly.
* @param seqs_begin Begin iterator of iterator pair input sequence.
* @param seqs_end End iterator of iterator pair input sequence.
* @param target Begin iterator out output sequence.
* @param comp Comparator.
* @param length Maximum length to merge.
* @param stable Stable merging incurs a performance penalty.
* @return End iterator of output sequence.
* Merging is done using the LoserTree class <tt>LT</tt>.
*
* Stability is selected by the used LoserTrees.
*
* @pre No input will run out of elements during the merge.
*
* @param seqs_begin Begin iterator of iterator pair input sequence.
* @param seqs_end End iterator of iterator pair input sequence.
* @param target Begin iterator out output sequence.
* @param comp Comparator.
* @param length Maximum length to merge.
*
* @return End iterator of output sequence.
*/
template<typename RandomAccessIteratorIterator,
template<typename LT,
typename RandomAccessIteratorIterator,
typename RandomAccessIterator3,
typename _DifferenceTp,
typename Comparator>
typename _DifferenceTp, typename Comparator>
RandomAccessIterator3
multiway_merge_bubble(RandomAccessIteratorIterator seqs_begin,
RandomAccessIteratorIterator seqs_end,
RandomAccessIterator3 target,
Comparator comp, _DifferenceTp length, bool stable)
multiway_merge_loser_tree_unguarded(RandomAccessIteratorIterator seqs_begin,
RandomAccessIteratorIterator seqs_end,
RandomAccessIterator3 target,
int min_seq, Comparator comp,
_DifferenceTp length)
{
_GLIBCXX_CALL(length)
typedef _DifferenceTp difference_type;
typedef typename std::iterator_traits<RandomAccessIteratorIterator>
::value_type::first_type
RandomAccessIterator1;
typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
value_type;
int k = static_cast<int>(seqs_end - seqs_begin);
int nrs; // Number of remaining sequences.
int k = seqs_end - seqs_begin;
// Determine the sentinel. The sentinel is largest/last element of the
// sequences with the smallest largest/last element.
value_type sentinel = *(seqs_begin[min_seq].second - 1);
LT lt(k, sentinel, comp);
// Avoid default constructor.
value_type* fe = static_cast<value_type*>(
::operator new(sizeof(value_type) * k)); // Front elements.
int* source = new int[k];
difference_type total_length = 0;
// Write entries into queue.
nrs = 0;
for (int pi = 0; pi < k; ++pi)
for (int t = 0; t < k; ++t)
{
if (seqs_begin[pi].first != seqs_begin[pi].second)
{
::new(&(fe[nrs])) value_type(*(seqs_begin[pi].first));
source[nrs] = pi;
++nrs;
total_length += _GLIBCXX_PARALLEL_LENGTH(seqs_begin[pi]);
}
}
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(seqs_begin[t].first != seqs_begin[t].second);
#endif
lt.insert_start(*seqs_begin[t].first, t, false);
if (stable)
{
// Bubble sort fe and source by fe.
for (int k = 0; k < nrs - 1; ++k)
for (int pi = nrs - 1; pi > k; --pi)
if (comp(fe[pi], fe[pi - 1]) ||
(!comp(fe[pi - 1], fe[pi]) && source[pi] < source[pi - 1]))
{
std::swap(fe[pi - 1], fe[pi]);
std::swap(source[pi - 1], source[pi]);
}
}
else
{
for (int k = 0; k < nrs - 1; ++k)
for (int pi = nrs - 1; pi > k; --pi)
if (comp(fe[pi], fe[pi-1]))
{
std::swap(fe[pi-1], fe[pi]);
std::swap(source[pi-1], source[pi]);
}
total_length += _GLIBCXX_PARALLEL_LENGTH(seqs_begin[t]);
}
// Iterate.
if (stable)
{
int j;
while (nrs > 0 && length > 0)
{
if (source[0] < source[1])
{
// fe[0] <= fe[1]
while ((nrs == 1 || !comp(fe[1], fe[0])) && length > 0)
{
*target = fe[0];
++target;
++(seqs_begin[source[0]].first);
--length;
if (seqs_begin[source[0]].first
== seqs_begin[source[0]].second)
{
// Move everything to the left.
for (int s = 0; s < nrs - 1; ++s)
{
fe[s] = fe[s + 1];
source[s] = source[s + 1];
}
fe[nrs - 1].~value_type(); //Destruct explicitly.
--nrs;
break;
}
else
fe[0] = *(seqs_begin[source[0]].first);
}
}
else
{
// fe[0] < fe[1]
while ((nrs == 1 || comp(fe[0], fe[1])) && length > 0)
{
*target = fe[0];
++target;
++(seqs_begin[source[0]].first);
--length;
if (seqs_begin[source[0]].first
== seqs_begin[source[0]].second)
{
for (int s = 0; s < nrs - 1; ++s)
{
fe[s] = fe[s + 1];
source[s] = source[s + 1];
}
fe[nrs - 1].~value_type(); //Destruct explicitly.
--nrs;
break;
}
else
fe[0] = *(seqs_begin[source[0]].first);
}
}
// Sink down.
j = 1;
while ((j < nrs) && (comp(fe[j], fe[j - 1])
|| (!comp(fe[j - 1], fe[j])
&& (source[j] < source[j - 1]))))
{
std::swap(fe[j - 1], fe[j]);
std::swap(source[j - 1], source[j]);
++j;
}
}
}
else
lt.init();
// Do not go past end.
length = std::min(total_length, length);
int source;
#if _GLIBCXX_ASSERTIONS
difference_type i = 0;
#endif
RandomAccessIterator3 target_end = target + length;
while (target < target_end)
{
int j;
while (nrs > 0 && length > 0)
{
// fe[0] <= fe[1]
while (nrs == 1 || (!comp(fe[1], fe[0])) && length > 0)
{
*target = fe[0];
++target;
++seqs_begin[source[0]].first;
--length;
if (seqs_begin[source[0]].first
== seqs_begin[source[0]].second)
{
for (int s = 0; s < (nrs - 1); ++s)
{
fe[s] = fe[s + 1];
source[s] = source[s + 1];
}
fe[nrs - 1].~value_type(); //Destruct explicitly.
--nrs;
break;
}
else
fe[0] = *(seqs_begin[source[0]].first);
}
// Sink down.
j = 1;
while ((j < nrs) && comp(fe[j], fe[j - 1]))
{
std::swap(fe[j - 1], fe[j]);
std::swap(source[j - 1], source[j]);
++j;
}
}
}
// Take out.
source = lt.get_min_source();
::operator delete(fe); //Destructors already called.
delete[] source;
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(0 <= source && source < k);
_GLIBCXX_PARALLEL_ASSERT(i == 0
|| !comp(*(seqs_begin[source].first), *(target - 1)));
#endif
return target;
}
// Feed.
*(target++) = *(seqs_begin[source].first++);
/** @brief Multi-way merging procedure for a high branching factor,
* guarded case.
*
* The head elements are kept in a loser tree.
* @param seqs_begin Begin iterator of iterator pair input sequence.
* @param seqs_end End iterator of iterator pair input sequence.
* @param target Begin iterator out output sequence.
* @param comp Comparator.
* @param length Maximum length to merge.
* @param stable Stable merging incurs a performance penalty.
* @return End iterator of output sequence.
*/
template<typename LT,
typename RandomAccessIteratorIterator,
typename RandomAccessIterator3,
typename _DifferenceTp,
typename Comparator>
RandomAccessIterator3
multiway_merge_loser_tree(RandomAccessIteratorIterator seqs_begin,
RandomAccessIteratorIterator seqs_end,
RandomAccessIterator3 target,
Comparator comp,
_DifferenceTp length, bool stable)
{
_GLIBCXX_CALL(length)
typedef _DifferenceTp difference_type;
typedef typename std::iterator_traits<RandomAccessIteratorIterator>
::value_type::first_type
RandomAccessIterator1;
typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
value_type;
int k = static_cast<int>(seqs_end - seqs_begin);
LT lt(k, comp);
difference_type total_length = 0;
// Default value for potentially non-default-constructible types.
value_type* arbitrary_element = NULL;
for (int t = 0; t < k; ++t)
{
if(arbitrary_element == NULL
&& _GLIBCXX_PARALLEL_LENGTH(seqs_begin[t]) > 0)
arbitrary_element = &(*seqs_begin[t].first);
total_length += _GLIBCXX_PARALLEL_LENGTH(seqs_begin[t]);
}
if(total_length == 0)
return target;
for (int t = 0; t < k; ++t)
{
if (stable)
{
if (seqs_begin[t].first == seqs_begin[t].second)
lt.insert_start_stable(*arbitrary_element, t, true);
else
lt.insert_start_stable(*seqs_begin[t].first, t, false);
}
else
{
if (seqs_begin[t].first == seqs_begin[t].second)
lt.insert_start(*arbitrary_element, t, true);
else
lt.insert_start(*seqs_begin[t].first, t, false);
}
}
if (stable)
lt.init_stable();
else
lt.init();
total_length = std::min(total_length, length);
int source;
if (stable)
{
for (difference_type i = 0; i < total_length; ++i)
{
// Take out.
source = lt.get_min_source();
*(target++) = *(seqs_begin[source].first++);
// Feed.
if (seqs_begin[source].first == seqs_begin[source].second)
lt.delete_min_insert_stable(*arbitrary_element, true);
else
// Replace from same source.
lt.delete_min_insert_stable(*seqs_begin[source].first, false);
}
}
else
{
for (difference_type i = 0; i < total_length; ++i)
{
//take out
source = lt.get_min_source();
*(target++) = *(seqs_begin[source].first++);
// Feed.
if (seqs_begin[source].first == seqs_begin[source].second)
lt.delete_min_insert(*arbitrary_element, true);
else
// Replace from same source.
lt.delete_min_insert(*seqs_begin[source].first, false);
}
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(
(seqs_begin[source].first != seqs_begin[source].second)
|| (i >= length - 1));
++i;
#endif
// Replace from same source.
lt.delete_min_insert(*seqs_begin[source].first, false);
}
return target;
}
/** @brief Multi-way merging procedure for a high branching factor,
* unguarded case.
* requiring sentinels to exist.
* @param stable The value must the same as for the used LoserTrees.
* @param UnguardedLoserTree Loser Tree variant to use for the unguarded
* merging.
* @param GuardedLoserTree Loser Tree variant to use for the guarded
* merging.
*
* The head elements are kept in a loser tree.
* @param seqs_begin Begin iterator of iterator pair input sequence.
* @param seqs_end End iterator of iterator pair input sequence.
* @param target Begin iterator out output sequence.
* @param comp Comparator.
* @param length Maximum length to merge.
* @param stable Stable merging incurs a performance penalty.
* @return End iterator of output sequence.
* @pre No input will run out of elements during the merge.
* @param seqs_begin Begin iterator of iterator pair input sequence.
* @param seqs_end End iterator of iterator pair input sequence.
* @param target Begin iterator out output sequence.
* @param comp Comparator.
* @param length Maximum length to merge.
*
* @return End iterator of output sequence.
*/
template<typename LT,
typename RandomAccessIteratorIterator,
typename RandomAccessIterator3,
typename _DifferenceTp, typename Comparator>
template<
typename UnguardedLoserTree,
typename RandomAccessIteratorIterator,
typename RandomAccessIterator3,
typename _DifferenceTp,
typename Comparator>
RandomAccessIterator3
multiway_merge_loser_tree_unguarded(RandomAccessIteratorIterator seqs_begin,
RandomAccessIteratorIterator seqs_end,
RandomAccessIterator3 target,
Comparator comp,
_DifferenceTp length, bool stable)
multiway_merge_loser_tree_sentinel(RandomAccessIteratorIterator seqs_begin,
RandomAccessIteratorIterator seqs_end,
RandomAccessIterator3 target,
Comparator comp,
_DifferenceTp length)
{
_GLIBCXX_CALL(length)
typedef _DifferenceTp difference_type;
typedef _DifferenceTp difference_type;
typedef std::iterator_traits<RandomAccessIteratorIterator> traits_type;
typedef typename std::iterator_traits<RandomAccessIteratorIterator>
::value_type::first_type
RandomAccessIterator1;
typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
value_type;
int k = seqs_end - seqs_begin;
LT lt(k, comp);
RandomAccessIterator3 target_end;
difference_type total_length = 0;
for (int t = 0; t < k; ++t)
for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
{
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(seqs_begin[t].first != seqs_begin[t].second);
#endif
if (stable)
lt.insert_start_stable(*seqs_begin[t].first, t, false);
else
lt.insert_start(*seqs_begin[t].first, t, false);
total_length += _GLIBCXX_PARALLEL_LENGTH(*s);
total_length += _GLIBCXX_PARALLEL_LENGTH(seqs_begin[t]);
// Move the sequends end behind the sentinel spots. This has the
// effect that the sentinel appears to be within the sequence. Then,
// we can use the unguarded variant if we merge out as many
// non-sentinel elements as we have.
++((*s).second);
}
if (stable)
lt.init_stable();
else
lt.init();
// Do not go past end.
length = std::min(total_length, length);
int source;
#if _GLIBCXX_ASSERTIONS
difference_type i = 0;
#endif
if (stable)
{
RandomAccessIterator3 target_end = target + length;
while (target < target_end)
{
// Take out.
source = lt.get_min_source();
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(i == 0
|| !comp(*(seqs_begin[source].first), *(target - 1)));
#endif
*(target++) = *(seqs_begin[source].first++);
difference_type unguarded_length =
std::min(length, total_length);
target_end = multiway_merge_loser_tree_unguarded
<UnguardedLoserTree>
(seqs_begin, seqs_end, target, 0, comp, unguarded_length);
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(
(seqs_begin[source].first != seqs_begin[source].second)
|| (i == length - 1));
++i;
_GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
_GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
#endif
// Feed.
// Replace from same source.
lt.delete_min_insert_stable(*seqs_begin[source].first, false);
}
}
else
{
RandomAccessIterator3 target_end = target + length;
while (target < target_end)
{
// Take out.
source = lt.get_min_source();
// Restore the sequence ends so the sentinels are not contained in the
// sequence any more (see comment in loop above).
for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
{ --((*s).second); }
#if _GLIBCXX_ASSERTIONS
if (i > 0 && comp(*(seqs_begin[source].first), *(target - 1)))
printf(" %i %i %i\n", length, i, source);
_GLIBCXX_PARALLEL_ASSERT(i == 0
|| !comp(*(seqs_begin[source].first), *(target - 1)));
#endif
return target_end;
}
*(target++) = *(seqs_begin[source].first++);
/**
* @brief Traits for determining whether the loser tree should
* use pointers or copies.
*
* The field "use_pointer" is used to determine whether to use pointers in
* the loser trees or whether to copy the values into the loser tree.
*
* The default behavior is to use pointers if the data type is 4 times as
* big as the pointer to it.
*
* Specialize for your data type to customize the behavior.
*
* Example:
*
* template<>
* struct loser_tree_traits<int>
* { static const bool use_pointer = false; };
*
* template<>
* struct loser_tree_traits<heavyweight_type>
* { static const bool use_pointer = true; };
*
* @param T type to give the loser tree traits for.
*/
template <typename T>
struct loser_tree_traits
{
/**
* @brief True iff to use pointers instead of values in loser trees.
*
* The default behavior is to use pointers if the data type is four
* times as big as the pointer to it.
*/
static const bool use_pointer = (sizeof(T) > 4 * sizeof(T*));
};
#if _GLIBCXX_ASSERTIONS
if (!((seqs_begin[source].first != seqs_begin[source].second)
|| (i >= length - 1)))
printf(" %i %i %i\n", length, i, source);
_GLIBCXX_PARALLEL_ASSERT(
(seqs_begin[source].first != seqs_begin[source].second)
|| (i >= length - 1));
++i;
#endif
// Feed.
// Replace from same source.
lt.delete_min_insert(*seqs_begin[source].first, false);
}
}
/**
* @brief Switch for 3-way merging with sentinels turned off.
*
* Note that 3-way merging is always stable!
*/
template<
bool sentinels /*default == false*/,
typename RandomAccessIteratorIterator,
typename RandomAccessIterator3,
typename _DifferenceTp,
typename Comparator>
struct multiway_merge_3_variant_sentinel_switch
{
RandomAccessIterator3 operator()(
RandomAccessIteratorIterator seqs_begin,
RandomAccessIteratorIterator seqs_end,
RandomAccessIterator3 target,
Comparator comp, _DifferenceTp length)
{
return multiway_merge_3_variant<guarded_iterator>(
seqs_begin, seqs_end, target, comp, length);
}
};
return target;
/**
* @brief Switch for 3-way merging with sentinels turned on.
*
* Note that 3-way merging is always stable!
*/
template<
typename RandomAccessIteratorIterator,
typename RandomAccessIterator3,
typename _DifferenceTp,
typename Comparator>
struct multiway_merge_3_variant_sentinel_switch
<true, RandomAccessIteratorIterator, RandomAccessIterator3,
_DifferenceTp, Comparator>
{
RandomAccessIterator3 operator()(
RandomAccessIteratorIterator seqs_begin,
RandomAccessIteratorIterator seqs_end,
RandomAccessIterator3 target,
Comparator comp, _DifferenceTp length)
{
return multiway_merge_3_variant<unguarded_iterator>(
seqs_begin, seqs_end, target, comp, length);
}
};
template<typename RandomAccessIteratorIterator,
typename RandomAccessIterator3,
typename _DifferenceTp,
typename Comparator>
RandomAccessIterator3
multiway_merge_loser_tree_combined(RandomAccessIteratorIterator seqs_begin,
RandomAccessIteratorIterator seqs_end,
RandomAccessIterator3 target,
Comparator comp,
_DifferenceTp length, bool stable)
/**
* @brief Switch for 4-way merging with sentinels turned off.
*
* Note that 4-way merging is always stable!
*/
template<
bool sentinels /*default == false*/,
typename RandomAccessIteratorIterator,
typename RandomAccessIterator3,
typename _DifferenceTp,
typename Comparator>
struct multiway_merge_4_variant_sentinel_switch
{
RandomAccessIterator3 operator()(
RandomAccessIteratorIterator seqs_begin,
RandomAccessIteratorIterator seqs_end,
RandomAccessIterator3 target,
Comparator comp, _DifferenceTp length)
{
_GLIBCXX_CALL(length)
return multiway_merge_4_variant<guarded_iterator>(
seqs_begin, seqs_end, target, comp, length);
}
};
typedef _DifferenceTp difference_type;
/**
* @brief Switch for 4-way merging with sentinels turned on.
*
* Note that 4-way merging is always stable!
*/
template<
typename RandomAccessIteratorIterator,
typename RandomAccessIterator3,
typename _DifferenceTp,
typename Comparator>
struct multiway_merge_4_variant_sentinel_switch
<true, RandomAccessIteratorIterator, RandomAccessIterator3,
_DifferenceTp, Comparator>
{
RandomAccessIterator3 operator()(
RandomAccessIteratorIterator seqs_begin,
RandomAccessIteratorIterator seqs_end,
RandomAccessIterator3 target,
Comparator comp, _DifferenceTp length)
{
return multiway_merge_4_variant<unguarded_iterator>(
seqs_begin, seqs_end, target, comp, length);
}
};
/**
* @brief Switch for k-way merging with sentinels turned on.
*/
template<
bool sentinels,
bool stable,
typename RandomAccessIteratorIterator,
typename RandomAccessIterator3,
typename _DifferenceTp,
typename Comparator>
struct multiway_merge_k_variant_sentinel_switch
{
RandomAccessIterator3 operator()(
RandomAccessIteratorIterator seqs_begin,
RandomAccessIteratorIterator seqs_end,
RandomAccessIterator3 target,
Comparator comp, _DifferenceTp length)
{
typedef typename std::iterator_traits<RandomAccessIteratorIterator>
::value_type::first_type
RandomAccessIterator1;
typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
value_type;
int min_seq;
RandomAccessIterator3 target_end;
difference_type overhang = prepare_unguarded(seqs_begin, seqs_end,
comp, min_seq, stable);
difference_type total_length = 0;
for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
total_length += _GLIBCXX_PARALLEL_LENGTH(*s);
if (overhang != -1)
{
difference_type unguarded_length =
std::min(length, total_length - overhang);
target_end = multiway_merge_loser_tree_unguarded
<typename loser_tree_unguarded_traits<value_type, Comparator>::LT>
(seqs_begin, seqs_end, target, comp, unguarded_length, stable);
overhang = length - unguarded_length;
}
else
{
// Empty sequence found.
overhang = length;
target_end = target;
}
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
_GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
#endif
target_end = multiway_merge_loser_tree
<typename loser_tree_traits<value_type, Comparator>::LT>
(seqs_begin, seqs_end, target_end, comp, overhang, stable);
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
_GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
#endif
return target_end;
return multiway_merge_loser_tree_sentinel<
typename __gnu_cxx::__conditional_type<
loser_tree_traits<value_type>::use_pointer
, LoserTreePointerUnguarded<stable, value_type, Comparator>
, LoserTreeUnguarded<stable, value_type, Comparator>
>::__type>(seqs_begin, seqs_end, target, comp, length);
}
};
template<typename RandomAccessIteratorIterator,
typename RandomAccessIterator3,
typename _DifferenceTp,
typename Comparator>
RandomAccessIterator3
multiway_merge_loser_tree_sentinel(RandomAccessIteratorIterator seqs_begin,
RandomAccessIteratorIterator seqs_end,
RandomAccessIterator3 target,
Comparator comp,
_DifferenceTp length, bool stable)
/**
* @brief Switch for k-way merging with sentinels turned off.
*/
template<
bool stable,
typename RandomAccessIteratorIterator,
typename RandomAccessIterator3,
typename _DifferenceTp,
typename Comparator>
struct multiway_merge_k_variant_sentinel_switch
<false, stable, RandomAccessIteratorIterator, RandomAccessIterator3,
_DifferenceTp, Comparator>
{
RandomAccessIterator3 operator()(
RandomAccessIteratorIterator seqs_begin,
RandomAccessIteratorIterator seqs_end,
RandomAccessIterator3 target,
Comparator comp, _DifferenceTp length)
{
_GLIBCXX_CALL(length)
typedef _DifferenceTp difference_type;
typedef std::iterator_traits<RandomAccessIteratorIterator> traits_type;
typedef typename std::iterator_traits<RandomAccessIteratorIterator>
::value_type::first_type
RandomAccessIterator1;
typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
value_type;
RandomAccessIterator3 target_end;
difference_type overhang =
prepare_unguarded_sentinel(seqs_begin, seqs_end, comp);
difference_type total_length = 0;
for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
{
total_length += _GLIBCXX_PARALLEL_LENGTH(*s);
// Sentinel spot.
++((*s).second);
}
difference_type unguarded_length =
std::min(length, total_length - overhang);
target_end = multiway_merge_loser_tree_unguarded
<typename loser_tree_unguarded_traits<value_type, Comparator>::LT>
(seqs_begin, seqs_end, target, comp, unguarded_length, stable);
overhang = length - unguarded_length;
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(target_end == target + length - overhang);
_GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
#endif
// Copy rest stable.
for (RandomAccessIteratorIterator s = seqs_begin;
s != seqs_end && overhang > 0; ++s)
{
// Restore.
--((*s).second);
difference_type local_length =
std::min<difference_type>(overhang, _GLIBCXX_PARALLEL_LENGTH(*s));
target_end = std::copy((*s).first, (*s).first + local_length,
target_end);
(*s).first += local_length;
overhang -= local_length;
}
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(overhang == 0);
_GLIBCXX_PARALLEL_ASSERT(target_end == target + length);
_GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target_end, comp));
#endif
return target_end;
return multiway_merge_loser_tree<
typename __gnu_cxx::__conditional_type<
loser_tree_traits<value_type>::use_pointer
, LoserTreePointer<stable, value_type, Comparator>
, LoserTree<stable, value_type, Comparator>
>::__type >(seqs_begin, seqs_end, target, comp, length);
}
};
/** @brief Sequential multi-way merging switch.
*
* The _GLIBCXX_PARALLEL_DECISION if based on the branching factor and
* The _GLIBCXX_PARALLEL_DECISION is based on the branching factor and
* runtime settings.
* @param seqs_begin Begin iterator of iterator pair input sequence.
* @param seqs_end End iterator of iterator pair input sequence.
......@@ -1330,17 +981,18 @@ template<typename RandomAccessIteratorIterator,
* @param stable Stable merging incurs a performance penalty.
* @param sentinel The sequences have a sentinel element.
* @return End iterator of output sequence. */
template<typename RandomAccessIteratorIterator,
typename RandomAccessIterator3,
typename _DifferenceTp,
typename Comparator>
template<
bool stable,
bool sentinels,
typename RandomAccessIteratorIterator,
typename RandomAccessIterator3,
typename _DifferenceTp,
typename Comparator>
RandomAccessIterator3
multiway_merge(RandomAccessIteratorIterator seqs_begin,
sequential_multiway_merge(RandomAccessIteratorIterator seqs_begin,
RandomAccessIteratorIterator seqs_end,
RandomAccessIterator3 target,
Comparator comp, _DifferenceTp length,
bool stable, bool sentinel,
sequential_tag)
Comparator comp, _DifferenceTp length)
{
_GLIBCXX_CALL(length)
......@@ -1353,17 +1005,14 @@ template<typename RandomAccessIteratorIterator,
#if _GLIBCXX_ASSERTIONS
for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; ++s)
_GLIBCXX_PARALLEL_ASSERT(is_sorted((*s).first, (*s).second, comp));
{
_GLIBCXX_PARALLEL_ASSERT(is_sorted((*s).first, (*s).second, comp));
}
#endif
RandomAccessIterator3 return_target = target;
RandomAccessIterator3 return_target = target;
int k = static_cast<int>(seqs_end - seqs_begin);
_MultiwayMergeAlgorithm mwma = _Settings::get().multiway_merge_algorithm;
if (!sentinel && mwma == LOSER_TREE_SENTINEL)
mwma = LOSER_TREE_COMBINED;
switch (k)
{
case 0:
......@@ -1382,113 +1031,30 @@ template<typename RandomAccessIteratorIterator,
target, length, comp);
break;
case 3:
switch (mwma)
{
case LOSER_TREE_COMBINED:
return_target = multiway_merge_3_combined(seqs_begin,
seqs_end,
target,
comp, length,
stable);
break;
case LOSER_TREE_SENTINEL:
return_target =
multiway_merge_3_variant<unguarded_iterator>(seqs_begin,
seqs_end,
target,
comp, length,
stable);
break;
default:
return_target =
multiway_merge_3_variant<guarded_iterator>(seqs_begin,
seqs_end,
target,
comp, length,
stable);
break;
}
return_target = multiway_merge_3_variant_sentinel_switch<
sentinels
, RandomAccessIteratorIterator
, RandomAccessIterator3
, _DifferenceTp
, Comparator>()(seqs_begin, seqs_end, target, comp, length);
break;
case 4:
switch (mwma)
{
case LOSER_TREE_COMBINED:
return_target = multiway_merge_4_combined(seqs_begin,
seqs_end,
target,
comp, length, stable);
break;
case LOSER_TREE_SENTINEL:
return_target =
multiway_merge_4_variant<unguarded_iterator>(seqs_begin,
seqs_end,
target,
comp, length,
stable);
break;
default:
return_target = multiway_merge_4_variant<guarded_iterator>(
seqs_begin,
seqs_end,
target,
comp, length, stable);
break;
}
return_target = multiway_merge_4_variant_sentinel_switch<
sentinels
, RandomAccessIteratorIterator
, RandomAccessIterator3
, _DifferenceTp
, Comparator>()(seqs_begin, seqs_end, target, comp, length);
break;
default:
{
switch (mwma)
{
case BUBBLE:
return_target = multiway_merge_bubble(seqs_begin,
seqs_end,
target,
comp, length, stable);
break;
#if _GLIBCXX_LOSER_TREE_EXPLICIT
case LOSER_TREE_EXPLICIT:
return_target = multiway_merge_loser_tree<
LoserTreeExplicit<value_type, Comparator> >(seqs_begin,
seqs_end,
target,
comp, length,
stable);
break;
#endif
#if _GLIBCXX_LOSER_TREE
case LOSER_TREE:
return_target = multiway_merge_loser_tree<
LoserTree<value_type, Comparator> >(seqs_begin,
seqs_end,
target,
comp, length,
stable);
break;
#endif
#if _GLIBCXX_LOSER_TREE_COMBINED
case LOSER_TREE_COMBINED:
return_target = multiway_merge_loser_tree_combined(seqs_begin,
seqs_end,
target,
comp, length,
stable);
break;
#endif
#if _GLIBCXX_LOSER_TREE_SENTINEL
case LOSER_TREE_SENTINEL:
return_target = multiway_merge_loser_tree_sentinel(seqs_begin,
seqs_end,
target,
comp, length,
stable);
break;
#endif
default:
// multiway_merge algorithm not implemented.
_GLIBCXX_PARALLEL_ASSERT(0);
break;
}
}
return_target = multiway_merge_k_variant_sentinel_switch<
sentinels
, stable
, RandomAccessIteratorIterator
, RandomAccessIterator3
, _DifferenceTp
, Comparator>()(seqs_begin, seqs_end, target, comp, length);
break;
}
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target + length, comp));
......@@ -1497,38 +1063,246 @@ template<typename RandomAccessIteratorIterator,
return return_target;
}
/**
* @brief Stable sorting functor.
*
* Used to reduce code instanciation in multiway_merge_sampling_splitting.
*/
template<bool stable, class RandomAccessIterator, class StrictWeakOrdering>
struct sampling_sorter
{
void operator()(RandomAccessIterator first, RandomAccessIterator last,
StrictWeakOrdering comp)
{ __gnu_sequential::stable_sort(first, last, comp); }
};
/**
* @brief Non-stable sorting functor.
*
* Used to reduce code instanciation in multiway_merge_sampling_splitting.
*/
template<class RandomAccessIterator, class StrictWeakOrdering>
struct sampling_sorter<false, RandomAccessIterator, StrictWeakOrdering>
{
void operator()(RandomAccessIterator first, RandomAccessIterator last,
StrictWeakOrdering comp)
{ __gnu_sequential::sort(first, last, comp); }
};
/**
* @brief Sampling based splitting for parallel multiway-merge routine.
*/
template<
bool stable
, typename RandomAccessIteratorIterator
, typename Comparator
, typename difference_type>
void multiway_merge_sampling_splitting(
RandomAccessIteratorIterator seqs_begin,
RandomAccessIteratorIterator seqs_end,
Comparator comp, difference_type length,
difference_type total_length,
std::vector<std::pair<difference_type, difference_type> > *pieces)
{
typedef typename std::iterator_traits<RandomAccessIteratorIterator>
::value_type::first_type
RandomAccessIterator1;
typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
value_type;
// k sequences.
int k = static_cast<int>(seqs_end - seqs_begin);
int num_threads = omp_get_num_threads();
difference_type num_samples =
__gnu_parallel::_Settings::get().merge_oversampling * num_threads;
value_type* samples = static_cast<value_type*>(
::operator new(sizeof(value_type) * k * num_samples));
// Sample.
for (int s = 0; s < k; ++s)
for (difference_type i = 0; i < num_samples; ++i)
{
difference_type sample_index =
static_cast<difference_type>(
_GLIBCXX_PARALLEL_LENGTH(seqs_begin[s]) * (double(i + 1) /
(num_samples + 1)) * (double(length)
/ total_length));
new(&(samples[s * num_samples + i])) value_type(
seqs_begin[s].first[sample_index]);
}
// Sort stable or non-stable, depending on value of template parameter
// "stable".
sampling_sorter<stable, value_type*, Comparator>()(
samples, samples + (num_samples * k), comp);
for (int slab = 0; slab < num_threads; ++slab)
// For each slab / processor.
for (int seq = 0; seq < k; ++seq)
{
// For each sequence.
if (slab > 0)
pieces[slab][seq].first =
std::upper_bound(
seqs_begin[seq].first,
seqs_begin[seq].second,
samples[num_samples * k * slab / num_threads],
comp)
- seqs_begin[seq].first;
else
{
// Absolute beginning.
pieces[slab][seq].first = 0;
}
if ((slab + 1) < num_threads)
pieces[slab][seq].second =
std::upper_bound(
seqs_begin[seq].first,
seqs_begin[seq].second,
samples[num_samples * k * (slab + 1) /
num_threads], comp)
- seqs_begin[seq].first;
else
pieces[slab][seq].second = _GLIBCXX_PARALLEL_LENGTH(seqs_begin[seq]);
}
::operator delete(samples);
}
/**
* @brief Exact splitting for parallel multiway-merge routine.
*/
template<
bool stable
, typename RandomAccessIteratorIterator
, typename Comparator
, typename difference_type>
void multiway_merge_exact_splitting(
RandomAccessIteratorIterator seqs_begin,
RandomAccessIteratorIterator seqs_end,
Comparator comp,
difference_type length,
difference_type total_length,
std::vector<std::pair<difference_type, difference_type> > *pieces)
{
typedef typename std::iterator_traits<RandomAccessIteratorIterator>
::value_type::first_type
RandomAccessIterator1;
const bool tight = (total_length == length);
// k sequences.
const int k = static_cast<int>(seqs_end - seqs_begin);
const int num_threads = omp_get_num_threads();
// (Settings::multiway_merge_splitting == __gnu_parallel::_Settings::EXACT).
std::vector<RandomAccessIterator1>* offsets =
new std::vector<RandomAccessIterator1>[num_threads];
std::vector<
std::pair<RandomAccessIterator1, RandomAccessIterator1>
> se(k);
copy(seqs_begin, seqs_end, se.begin());
difference_type* borders =
new difference_type[num_threads + 1];
equally_split(length, num_threads, borders);
for (int s = 0; s < (num_threads - 1); ++s)
{
offsets[s].resize(k);
multiseq_partition(
se.begin(), se.end(), borders[s + 1],
offsets[s].begin(), comp);
// Last one also needed and available.
if (!tight)
{
offsets[num_threads - 1].resize(k);
multiseq_partition(se.begin(), se.end(),
difference_type(length),
offsets[num_threads - 1].begin(), comp);
}
}
for (int slab = 0; slab < num_threads; ++slab)
{
// For each slab / processor.
for (int seq = 0; seq < k; ++seq)
{
// For each sequence.
if (slab == 0)
{
// Absolute beginning.
pieces[slab][seq].first = 0;
}
else
pieces[slab][seq].first =
pieces[slab - 1][seq].second;
if (!tight || slab < (num_threads - 1))
pieces[slab][seq].second =
offsets[slab][seq] - seqs_begin[seq].first;
else
{
// slab == num_threads - 1
pieces[slab][seq].second =
_GLIBCXX_PARALLEL_LENGTH(seqs_begin[seq]);
}
}
}
delete[] offsets;
}
/** @brief Parallel multi-way merge routine.
*
* The _GLIBCXX_PARALLEL_DECISION if based on the branching factor
* and runtime settings.
* @param seqs_begin Begin iterator of iterator pair input sequence.
* @param seqs_end End iterator of iterator pair input sequence.
* @param target Begin iterator out output sequence.
* @param comp Comparator.
* @param length Maximum length to merge.
* @param stable Stable merging incurs a performance penalty.
* @param sentinel Ignored.
* @return End iterator of output sequence.
* The _GLIBCXX_PARALLEL_DECISION is based on the branching factor
* and runtime settings.
*
* Must not be called if the number of sequences is 1.
*
* @param Splitter functor to split input (either exact or sampling based)
*
* @param seqs_begin Begin iterator of iterator pair input sequence.
* @param seqs_end End iterator of iterator pair input sequence.
* @param target Begin iterator out output sequence.
* @param comp Comparator.
* @param length Maximum length to merge.
* @param stable Stable merging incurs a performance penalty.
* @param sentinel Ignored.
* @return End iterator of output sequence.
*/
template<typename RandomAccessIteratorIterator,
typename RandomAccessIterator3,
typename _DifferenceTp,
typename Comparator>
template<
bool stable,
bool sentinels,
typename RandomAccessIteratorIterator,
typename RandomAccessIterator3,
typename _DifferenceTp,
typename Splitter,
typename Comparator
>
RandomAccessIterator3
parallel_multiway_merge(RandomAccessIteratorIterator seqs_begin,
RandomAccessIteratorIterator seqs_end,
RandomAccessIterator3 target,
Comparator comp,
_DifferenceTp length, bool stable, bool sentinel)
RandomAccessIterator3 target,
Comparator comp,
Splitter splitter,
_DifferenceTp length)
{
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(seqs_end - seqs_begin > 1);
#endif
_GLIBCXX_CALL(length)
typedef _DifferenceTp difference_type;
typedef typename std::iterator_traits<RandomAccessIteratorIterator>
::value_type::first_type
RandomAccessIterator1;
typedef typename std::iterator_traits<RandomAccessIterator1>::value_type
value_type;
typedef typename
std::iterator_traits<RandomAccessIterator1>::value_type value_type;
// k sequences.
int k = static_cast<int>(seqs_end - seqs_begin);
......@@ -1543,13 +1317,10 @@ template<typename RandomAccessIteratorIterator,
if (total_length == 0 || k == 0)
return target;
bool tight = (total_length == length);
std::vector<std::pair<difference_type, difference_type> >* pieces;
thread_index_t num_threads = static_cast<thread_index_t>(
std::min<difference_type>(get_max_threads(), total_length));
const _Settings& __s = _Settings::get();
std::min<difference_type>(get_max_threads(), total_length));
# pragma omp parallel num_threads (num_threads)
{
......@@ -1562,126 +1333,12 @@ template<typename RandomAccessIteratorIterator,
for (int s = 0; s < num_threads; ++s)
pieces[s].resize(k);
difference_type num_samples = __s.merge_oversampling
* num_threads;
difference_type num_samples =
__gnu_parallel::_Settings::get().merge_oversampling *
num_threads;
if (__s.multiway_merge_splitting == SAMPLING)
{
value_type* samples = static_cast<value_type*>(
::operator new(sizeof(value_type) * k * num_samples));
// Sample.
for (int s = 0; s < k; ++s)
for (difference_type i = 0; i < num_samples; ++i)
{
difference_type sample_index =
static_cast<difference_type>(
_GLIBCXX_PARALLEL_LENGTH(seqs_begin[s])
* (double(i + 1) / (num_samples + 1))
* (double(length) / total_length));
::new(&(samples[s * num_samples + i]))
value_type(seqs_begin[s].first[sample_index]);
}
if (stable)
__gnu_sequential::stable_sort(samples, samples
+ (num_samples * k), comp);
else
__gnu_sequential::sort(samples, samples
+ (num_samples * k), comp);
for (int slab = 0; slab < num_threads; ++slab)
// For each slab / processor.
for (int seq = 0; seq < k; ++seq)
{
// For each sequence.
if (slab > 0)
pieces[slab][seq].first =
std::upper_bound(seqs_begin[seq].first,
seqs_begin[seq].second,
samples[num_samples * k
* slab / num_threads],
comp)
- seqs_begin[seq].first;
else
{
// Absolute beginning.
pieces[slab][seq].first = 0;
}
if ((slab + 1) < num_threads)
pieces[slab][seq].second =
std::upper_bound(seqs_begin[seq].first,
seqs_begin[seq].second,
samples[num_samples * k
* (slab + 1)
/ num_threads], comp)
- seqs_begin[seq].first;
else
pieces[slab][seq].second
= _GLIBCXX_PARALLEL_LENGTH(seqs_begin[seq]);
}
::operator delete(samples);
}
else
{
// (_Settings::multiway_merge_splitting == _Settings::EXACT).
std::vector<RandomAccessIterator1>* offsets =
new std::vector<RandomAccessIterator1>[num_threads];
std::vector<
std::pair<RandomAccessIterator1, RandomAccessIterator1>
> se(k);
copy(seqs_begin, seqs_end, se.begin());
difference_type* borders =
new difference_type[num_threads + 1];
equally_split(length, num_threads, borders);
for (int s = 0; s < (num_threads - 1); ++s)
{
offsets[s].resize(k);
multiseq_partition(
se.begin(), se.end(), borders[s + 1],
offsets[s].begin(), comp);
// Last one also needed and available.
if (!tight)
{
offsets[num_threads - 1].resize(k);
multiseq_partition(se.begin(), se.end(),
difference_type(length),
offsets[num_threads - 1].begin(),
comp);
}
}
for (int slab = 0; slab < num_threads; ++slab)
{
// For each slab / processor.
for (int seq = 0; seq < k; ++seq)
{
// For each sequence.
if (slab == 0)
{
// Absolute beginning.
pieces[slab][seq].first = 0;
}
else
pieces[slab][seq].first =
pieces[slab - 1][seq].second;
if (!tight || slab < (num_threads - 1))
pieces[slab][seq].second =
offsets[slab][seq] - seqs_begin[seq].first;
else
{
// slab == num_threads - 1
pieces[slab][seq].second =
_GLIBCXX_PARALLEL_LENGTH(seqs_begin[seq]);
}
}
}
delete[] offsets;
}
splitter(seqs_begin, seqs_end, comp, length, total_length,
pieces);
} //single
thread_index_t iam = omp_get_thread_num();
......@@ -1701,15 +1358,14 @@ template<typename RandomAccessIteratorIterator,
for (int s = 0; s < k; ++s)
{
chunks[s] = std::make_pair(
seqs_begin[s].first + pieces[iam][s].first,
seqs_begin[s].first + pieces[iam][s].second);
seqs_begin[s].first + pieces[iam][s].first,
seqs_begin[s].first + pieces[iam][s].second);
local_length += _GLIBCXX_PARALLEL_LENGTH(chunks[s]);
}
multiway_merge(
sequential_multiway_merge<stable, sentinels>(
chunks, chunks + k, target + target_position, comp,
std::min(local_length, length - target_position),
stable, false, sequential_tag());
std::min(local_length, length - target_position));
delete[] chunks;
}
......@@ -1727,7 +1383,7 @@ template<typename RandomAccessIteratorIterator,
(pieces[iam][1].second - pieces[iam][1].first),
comp);
}
} //parallel
} // parallel
#if _GLIBCXX_ASSERTIONS
_GLIBCXX_PARALLEL_ASSERT(is_sorted(target, target + length, comp));
......@@ -1743,88 +1399,605 @@ template<typename RandomAccessIteratorIterator,
}
/**
* @brief Multi-way merging front-end.
* @param seqs_begin Begin iterator of iterator pair input sequence.
* @param seqs_end End iterator of iterator pair input sequence.
* @param target Begin iterator out output sequence.
* @param comp Comparator.
* @param length Maximum length to merge.
* @param stable Stable merging incurs a performance penalty.
* @return End iterator of output sequence.
* @brief Multiway Merge Frontend.
*
* Merge the sequences specified by seqs_begin and seqs_end into
* target. seqs_begin and seqs_end must point to a sequence of
* pairs. These pairs must contain an iterator to the beginning
* of a sequence in their first entry and an iterator the end of
* the same sequence in their second entry.
*
* Ties are broken arbitrarily. See stable_multiway_merge for a variant
* that breaks ties by sequence number but is slower.
*
* The first entries of the pairs (i.e. the begin iterators) will be moved
* forward.
*
* The output sequence has to provide enough space for all elements
* that are written to it.
*
* This function will merge the input sequences:
*
* - not stable
* - parallel, depending on the input size and Settings
* - using sampling for splitting
* - not using sentinels
*
* Example:
*
* <pre>
* int sequences[10][10];
* for (int i = 0; i < 10; ++i)
* for (int j = 0; i < 10; ++j)
* sequences[i][j] = j;
*
* int out[33];
* std::vector<std::pair<int*> > seqs;
* for (int i = 0; i < 10; ++i)
* { seqs.push(std::make_pair<int*>(sequences[i], sequences[i] + 10)) }
*
* multiway_merge(seqs.begin(), seqs.end(), target, std::less<int>(), 33);
* </pre>
*
* @see stable_multiway_merge
*
* @pre All input sequences must be sorted.
* @pre Target must provide enough space to merge out length elements or
* the number of elements in all sequences, whichever is smaller.
*
* @post [target, return value) contains merged elements from the
* input sequences.
* @post return value - target = min(length, number of elements in all
* sequences).
*
* @param RandomAccessIteratorPairIterator iterator over sequence
* of pairs of iterators
* @param RandomAccessIteratorOut iterator over target sequence
* @param _DifferenceTp difference type for the sequence
* @param Comparator strict weak ordering type to compare elements
* in sequences
*
* @param seqs_begin begin of sequence sequence
* @param seqs_end end of sequence sequence
* @param target target sequence to merge to.
* @param comp strict weak ordering to use for element comparison.
* @param length the number of elements to merge into target.
*
* @return end iterator of output sequence
*/
template<typename RandomAccessIteratorPairIterator,
typename RandomAccessIterator3,
typename _DifferenceTp,
typename Comparator>
RandomAccessIterator3
multiway_merge(RandomAccessIteratorPairIterator seqs_begin,
RandomAccessIteratorPairIterator seqs_end,
RandomAccessIterator3 target, Comparator comp,
_DifferenceTp length, bool stable)
{
template<
typename RandomAccessIteratorPairIterator
, typename RandomAccessIteratorOut
, typename _DifferenceTp
, typename Comparator>
RandomAccessIteratorOut
multiway_merge(RandomAccessIteratorPairIterator seqs_begin
, RandomAccessIteratorPairIterator seqs_end
, RandomAccessIteratorOut target
, Comparator comp, _DifferenceTp length)
{
typedef _DifferenceTp difference_type;
_GLIBCXX_CALL(seqs_end - seqs_begin)
// catch special case: no sequences
if (seqs_begin == seqs_end)
return target;
// Execute merge; maybe parallel, depending on the number of merged
// elements and the number of sequences and global thresholds in
// Settings.
RandomAccessIteratorOut target_end;
if ((seqs_end - seqs_begin > 1) &&
_GLIBCXX_PARALLEL_CONDITION(
((seqs_end - seqs_begin) >=
__gnu_parallel::_Settings::get().multiway_merge_minimal_k)
&& ((sequence_index_t)length >=
__gnu_parallel::_Settings::get().multiway_merge_minimal_n)))
target_end = parallel_multiway_merge
</* stable = */ false, /* sentinels = */ false>
(seqs_begin, seqs_end, target, comp,
multiway_merge_sampling_splitting</* stable = */ false,
RandomAccessIteratorPairIterator, Comparator, _DifferenceTp>,
static_cast<difference_type>(length));
else
target_end = sequential_multiway_merge
</* stable = */false, /* sentinels = */ false>(
seqs_begin, seqs_end,
target, comp, length);
return target_end;
}
template<
typename RandomAccessIteratorPairIterator
, typename RandomAccessIteratorOut
, typename _DifferenceTp
, typename Comparator>
RandomAccessIteratorOut
multiway_merge(RandomAccessIteratorPairIterator seqs_begin
, RandomAccessIteratorPairIterator seqs_end
, RandomAccessIteratorOut target
, Comparator comp, _DifferenceTp length
, __gnu_parallel::sequential_tag)
{
typedef _DifferenceTp difference_type;
_GLIBCXX_CALL(seqs_end - seqs_begin)
// catch special case: no sequences
if (seqs_begin == seqs_end)
return target;
// Execute multiway merge *sequentially*.
return sequential_multiway_merge
</* stable = */ false, /* sentinels = */ false>
(seqs_begin, seqs_end, target, comp, length);
}
template<
typename RandomAccessIteratorPairIterator
, typename RandomAccessIteratorOut
, typename _DifferenceTp
, typename Comparator>
RandomAccessIteratorOut
multiway_merge(RandomAccessIteratorPairIterator seqs_begin
, RandomAccessIteratorPairIterator seqs_end
, RandomAccessIteratorOut target
, Comparator comp, _DifferenceTp length
, __gnu_parallel::exact_tag)
{
typedef _DifferenceTp difference_type;
_GLIBCXX_CALL(seqs_end - seqs_begin)
// catch special case: no sequences
if (seqs_begin == seqs_end)
return target;
const _Settings& __s = _Settings::get();
RandomAccessIterator3 target_end;
if (_GLIBCXX_PARALLEL_CONDITION(
((seqs_end - seqs_begin) >= __s.multiway_merge_minimal_k)
&& ((sequence_index_t)length >= __s.multiway_merge_minimal_n)))
target_end = parallel_multiway_merge(seqs_begin, seqs_end,
target, comp,
static_cast<difference_type>(length),
stable, false);
// Execute merge; maybe parallel, depending on the number of merged
// elements and the number of sequences and global thresholds in
// Settings.
RandomAccessIteratorOut target_end;
if ((seqs_end - seqs_begin > 1) &&
_GLIBCXX_PARALLEL_CONDITION(
((seqs_end - seqs_begin) >=
__gnu_parallel::_Settings::get().multiway_merge_minimal_k)
&& ((sequence_index_t)length >=
__gnu_parallel::_Settings::get().multiway_merge_minimal_n)))
target_end = parallel_multiway_merge
</* stable = */ false, /* sentinels = */ false>(
seqs_begin, seqs_end,
target, comp,
multiway_merge_exact_splitting</* stable = */ false,
RandomAccessIteratorPairIterator, Comparator, _DifferenceTp>,
static_cast<difference_type>(length));
else
target_end = multiway_merge(seqs_begin, seqs_end, target, comp, length,
stable, false, sequential_tag());
target_end = sequential_multiway_merge
</* stable = */ false, /* sentinels = */ false>(
seqs_begin, seqs_end,
target, comp, length);
return target_end;
}
}
/** @brief Multi-way merging front-end.
* @param seqs_begin Begin iterator of iterator pair input sequence.
* @param seqs_end End iterator of iterator pair input sequence.
* @param target Begin iterator out output sequence.
* @param comp Comparator.
* @param length Maximum length to merge.
* @param stable Stable merging incurs a performance penalty.
* @return End iterator of output sequence.
* @pre For each @c i, @c seqs_begin[i].second must be the end
* marker of the sequence, but also reference the one more sentinel
* element. */
template<typename RandomAccessIteratorPairIterator,
typename RandomAccessIterator3,
typename _DifferenceTp,
typename Comparator>
RandomAccessIterator3
multiway_merge_sentinel(RandomAccessIteratorPairIterator seqs_begin,
RandomAccessIteratorPairIterator seqs_end,
RandomAccessIterator3 target,
Comparator comp,
_DifferenceTp length,
bool stable)
{
template<
typename RandomAccessIteratorPairIterator
, typename RandomAccessIteratorOut
, typename _DifferenceTp
, typename Comparator>
RandomAccessIteratorOut
stable_multiway_merge(RandomAccessIteratorPairIterator seqs_begin
, RandomAccessIteratorPairIterator seqs_end
, RandomAccessIteratorOut target
, Comparator comp, _DifferenceTp length)
{
typedef _DifferenceTp difference_type;
_GLIBCXX_CALL(seqs_end - seqs_begin)
// catch special case: no sequences
if (seqs_begin == seqs_end)
return target;
// Execute merge; maybe parallel, depending on the number of merged
// elements and the number of sequences and global thresholds in
// Settings.
RandomAccessIteratorOut target_end;
if ((seqs_end - seqs_begin > 1) &&
_GLIBCXX_PARALLEL_CONDITION(
((seqs_end - seqs_begin) >=
__gnu_parallel::_Settings::get().multiway_merge_minimal_k)
&& ((sequence_index_t)length >=
__gnu_parallel::_Settings::get().multiway_merge_minimal_n)))
target_end = parallel_multiway_merge
</* stable = */ true, /* sentinels = */ false>(
seqs_begin, seqs_end,
target, comp,
multiway_merge_sampling_splitting</* stable = */ true,
RandomAccessIteratorPairIterator, Comparator, _DifferenceTp>,
static_cast<difference_type>(length));
else
target_end = sequential_multiway_merge
</* stable = */ true, /* sentinels = */ false>(
seqs_begin, seqs_end,
target, comp, length);
return target_end;
}
template<
typename RandomAccessIteratorPairIterator
, typename RandomAccessIteratorOut
, typename _DifferenceTp
, typename Comparator>
RandomAccessIteratorOut
stable_multiway_merge(RandomAccessIteratorPairIterator seqs_begin
, RandomAccessIteratorPairIterator seqs_end
, RandomAccessIteratorOut target
, Comparator comp, _DifferenceTp length
, __gnu_parallel::sequential_tag)
{
typedef _DifferenceTp difference_type;
_GLIBCXX_CALL(seqs_end - seqs_begin)
// catch special case: no sequences
if (seqs_begin == seqs_end)
{ return target; }
// Execute multiway merge *sequentially*.
return sequential_multiway_merge
</* stable = */ true, /* sentinels = */ false>
(seqs_begin, seqs_end, target, comp, length);
}
template<
typename RandomAccessIteratorPairIterator
, typename RandomAccessIteratorOut
, typename _DifferenceTp
, typename Comparator>
RandomAccessIteratorOut
stable_multiway_merge(RandomAccessIteratorPairIterator seqs_begin
, RandomAccessIteratorPairIterator seqs_end
, RandomAccessIteratorOut target
, Comparator comp, _DifferenceTp length
, __gnu_parallel::exact_tag)
{
typedef _DifferenceTp difference_type;
_GLIBCXX_CALL(seqs_end - seqs_begin)
const _Settings& __s = _Settings::get();
const bool cond1 = seqs_end - seqs_begin >= __s.multiway_merge_minimal_k;
const bool cond2 = sequence_index_t(length) >= __s.multiway_merge_minimal_n;
if (_GLIBCXX_PARALLEL_CONDITION(cond1 && cond2))
return parallel_multiway_merge(seqs_begin, seqs_end, target, comp,
length, stable, true);
// catch special case: no sequences
if (seqs_begin == seqs_end)
{ return target; }
// Execute merge; maybe parallel, depending on the number of merged
// elements and the number of sequences and global thresholds in
// Settings.
RandomAccessIteratorOut target_end;
if ((seqs_end - seqs_begin > 1) &&
_GLIBCXX_PARALLEL_CONDITION(
((seqs_end - seqs_begin) >=
__gnu_parallel::_Settings::get().multiway_merge_minimal_k)
&& ((sequence_index_t)length >=
__gnu_parallel::_Settings::get().multiway_merge_minimal_n)))
target_end = parallel_multiway_merge
</* stable = */ true, /* sentinels = */ false>(
seqs_begin, seqs_end,
target, comp,
multiway_merge_exact_splitting
</* stable = */ true, RandomAccessIteratorPairIterator,
Comparator, _DifferenceTp>,
static_cast<difference_type>(length));
else
return multiway_merge(seqs_begin, seqs_end, target, comp, length, stable,
true, sequential_tag());
}
target_end = sequential_multiway_merge</* stable = */ true,
/* sentinels = */ false>(
seqs_begin, seqs_end,
target, comp, length);
return target_end;
}
/**
* @brief Multiway Merge Frontend.
*
* Merge the sequences specified by seqs_begin and seqs_end into
* target. seqs_begin and seqs_end must point to a sequence of
* pairs. These pairs must contain an iterator to the beginning
* of a sequence in their first entry and an iterator the end of
* the same sequence in their second entry.
*
* Ties are broken arbitrarily. See stable_multiway_merge for a variant
* that breaks ties by sequence number but is slower.
*
* The first entries of the pairs (i.e. the begin iterators) will be moved
* forward.
*
* The output sequence has to provide enough space for all elements
* that are written to it.
*
* This function will merge the input sequences:
*
* - not stable
* - parallel, depending on the input size and Settings
* - using sampling for splitting
* - using sentinels
*
* You have to take care that the element the end iterator points to is
* readable and contains a value that is greater than any other non-sentinel
* value in all sequences.
*
* Example:
*
* <pre>
* int sequences[10][11];
* for (int i = 0; i < 10; ++i)
* for (int j = 0; i < 11; ++j)
* sequences[i][j] = j; // last one is sentinel!
*
* int out[33];
* std::vector<std::pair<int*> > seqs;
* for (int i = 0; i < 10; ++i)
* { seqs.push(std::make_pair<int*>(sequences[i], sequences[i] + 10)) }
*
* multiway_merge(seqs.begin(), seqs.end(), target, std::less<int>(), 33);
* </pre>
*
* @pre All input sequences must be sorted.
* @pre Target must provide enough space to merge out length elements or
* the number of elements in all sequences, whichever is smaller.
* @pre For each @c i, @c seqs_begin[i].second must be the end
* marker of the sequence, but also reference the one more sentinel
* element.
*
* @post [target, return value) contains merged elements from the
* input sequences.
* @post return value - target = min(length, number of elements in all
* sequences).
*
* @see stable_multiway_merge_sentinels
*
* @param RandomAccessIteratorPairIterator iterator over sequence
* of pairs of iterators
* @param RandomAccessIteratorOut iterator over target sequence
* @param _DifferenceTp difference type for the sequence
* @param Comparator strict weak ordering type to compare elements
* in sequences
*
* @param seqs_begin begin of sequence sequence
* @param seqs_end end of sequence sequence
* @param target target sequence to merge to.
* @param comp strict weak ordering to use for element comparison.
* @param length the number of elements to merge into target.
*
* @return end iterator of output sequence
*/
template<
typename RandomAccessIteratorPairIterator
, typename RandomAccessIteratorOut
, typename _DifferenceTp
, typename Comparator>
RandomAccessIteratorOut
multiway_merge_sentinels(RandomAccessIteratorPairIterator seqs_begin
, RandomAccessIteratorPairIterator seqs_end
, RandomAccessIteratorOut target
, Comparator comp, _DifferenceTp length)
{
typedef _DifferenceTp difference_type;
_GLIBCXX_CALL(seqs_end - seqs_begin)
// catch special case: no sequences
if (seqs_begin == seqs_end)
{ return target; }
// Execute merge; maybe parallel, depending on the number of merged
// elements and the number of sequences and global thresholds in
// Settings.
RandomAccessIteratorOut target_end;
if ((seqs_end - seqs_begin > 1) &&
_GLIBCXX_PARALLEL_CONDITION(
((seqs_end - seqs_begin) >=
__gnu_parallel::_Settings::get().multiway_merge_minimal_k)
&& ((sequence_index_t)length >=
__gnu_parallel::_Settings::get().multiway_merge_minimal_n)))
target_end = parallel_multiway_merge
</* stable = */ false, /* sentinels = */ true>
(seqs_begin, seqs_end, target, comp,
multiway_merge_sampling_splitting
</* stable = */ false, RandomAccessIteratorPairIterator,
Comparator, _DifferenceTp>,
static_cast<difference_type>(length));
else
target_end = sequential_multiway_merge
</* stable = */false, /* sentinels = */ true>(
seqs_begin, seqs_end,
target, comp, length);
return target_end;
}
template<
typename RandomAccessIteratorPairIterator
, typename RandomAccessIteratorOut
, typename _DifferenceTp
, typename Comparator>
RandomAccessIteratorOut
multiway_merge_sentinels(RandomAccessIteratorPairIterator seqs_begin
, RandomAccessIteratorPairIterator seqs_end
, RandomAccessIteratorOut target
, Comparator comp, _DifferenceTp length
, __gnu_parallel::sequential_tag)
{
typedef _DifferenceTp difference_type;
_GLIBCXX_CALL(seqs_end - seqs_begin)
// catch special case: no sequences
if (seqs_begin == seqs_end)
{ return target; }
// Execute multiway merge *sequentially*.
return sequential_multiway_merge
</* stable = */ false, /* sentinels = */ true>
(seqs_begin, seqs_end, target, comp, length);
}
template<
typename RandomAccessIteratorPairIterator
, typename RandomAccessIteratorOut
, typename _DifferenceTp
, typename Comparator>
RandomAccessIteratorOut
multiway_merge_sentinels(RandomAccessIteratorPairIterator seqs_begin
, RandomAccessIteratorPairIterator seqs_end
, RandomAccessIteratorOut target
, Comparator comp, _DifferenceTp length
, __gnu_parallel::exact_tag)
{
typedef _DifferenceTp difference_type;
_GLIBCXX_CALL(seqs_end - seqs_begin)
// catch special case: no sequences
if (seqs_begin == seqs_end)
{ return target; }
// Execute merge; maybe parallel, depending on the number of merged
// elements and the number of sequences and global thresholds in
// Settings.
RandomAccessIteratorOut target_end;
if ((seqs_end - seqs_begin > 1) &&
_GLIBCXX_PARALLEL_CONDITION(
((seqs_end - seqs_begin) >=
__gnu_parallel::_Settings::get().multiway_merge_minimal_k)
&& ((sequence_index_t)length >=
__gnu_parallel::_Settings::get().multiway_merge_minimal_n)))
target_end = parallel_multiway_merge
</* stable = */ false, /* sentinels = */ true>(
seqs_begin, seqs_end,
target, comp,
multiway_merge_exact_splitting
</* stable = */ false, RandomAccessIteratorPairIterator,
Comparator, _DifferenceTp>,
static_cast<difference_type>(length));
else
target_end = sequential_multiway_merge
</* stable = */ false, /* sentinels = */ true>(
seqs_begin, seqs_end,
target, comp, length);
return target_end;
}
template<
typename RandomAccessIteratorPairIterator
, typename RandomAccessIteratorOut
, typename _DifferenceTp
, typename Comparator>
RandomAccessIteratorOut
stable_multiway_merge_sentinels(RandomAccessIteratorPairIterator seqs_begin
, RandomAccessIteratorPairIterator seqs_end
, RandomAccessIteratorOut target
, Comparator comp, _DifferenceTp length)
{
typedef _DifferenceTp difference_type;
_GLIBCXX_CALL(seqs_end - seqs_begin)
// catch special case: no sequences
if (seqs_begin == seqs_end)
{ return target; }
// Execute merge; maybe parallel, depending on the number of merged
// elements and the number of sequences and global thresholds in
// Settings.
RandomAccessIteratorOut target_end;
if ((seqs_end - seqs_begin > 1) &&
_GLIBCXX_PARALLEL_CONDITION(
((seqs_end - seqs_begin) >=
__gnu_parallel::_Settings::get().multiway_merge_minimal_k)
&& ((sequence_index_t)length >=
__gnu_parallel::_Settings::get().multiway_merge_minimal_n)))
target_end = parallel_multiway_merge
</* stable = */ true, /* sentinels = */ true>(
seqs_begin, seqs_end,
target, comp,
multiway_merge_sampling_splitting
</* stable = */ true, RandomAccessIteratorPairIterator,
Comparator, _DifferenceTp>,
static_cast<difference_type>(length));
else
target_end = sequential_multiway_merge
</* stable = */ true, /* sentinels = */ true>(
seqs_begin, seqs_end,
target, comp, length);
return target_end;
}
template<
typename RandomAccessIteratorPairIterator
, typename RandomAccessIteratorOut
, typename _DifferenceTp
, typename Comparator>
RandomAccessIteratorOut
stable_multiway_merge_sentinels(RandomAccessIteratorPairIterator seqs_begin
, RandomAccessIteratorPairIterator seqs_end
, RandomAccessIteratorOut target
, Comparator comp, _DifferenceTp length
, __gnu_parallel::sequential_tag)
{
typedef _DifferenceTp difference_type;
_GLIBCXX_CALL(seqs_end - seqs_begin)
// catch special case: no sequences
if (seqs_begin == seqs_end)
{ return target; }
// Execute multiway merge *sequentially*.
return sequential_multiway_merge
</* stable = */ true, /* sentinels = */ true>
(seqs_begin, seqs_end, target, comp, length);
}
template<
typename RandomAccessIteratorPairIterator
, typename RandomAccessIteratorOut
, typename _DifferenceTp
, typename Comparator>
RandomAccessIteratorOut
stable_multiway_merge_sentinels(RandomAccessIteratorPairIterator seqs_begin
, RandomAccessIteratorPairIterator seqs_end
, RandomAccessIteratorOut target
, Comparator comp, _DifferenceTp length
, __gnu_parallel::exact_tag)
{
typedef _DifferenceTp difference_type;
_GLIBCXX_CALL(seqs_end - seqs_begin)
// catch special case: no sequences
if (seqs_begin == seqs_end)
{ return target; }
// Execute merge; maybe parallel, depending on the number of merged
// elements and the number of sequences and global thresholds in
// Settings.
RandomAccessIteratorOut target_end;
if ((seqs_end - seqs_begin > 1) &&
_GLIBCXX_PARALLEL_CONDITION(
((seqs_end - seqs_begin) >=
__gnu_parallel::_Settings::get().multiway_merge_minimal_k)
&& ((sequence_index_t)length >=
__gnu_parallel::_Settings::get().multiway_merge_minimal_n)))
target_end = parallel_multiway_merge
</* stable = */ true, /* sentinels = */ true>(
seqs_begin, seqs_end,
target, comp,
multiway_merge_exact_splitting
</* stable = */ true, RandomAccessIteratorPairIterator,
Comparator, _DifferenceTp>,
static_cast<difference_type>(length));
else
target_end = sequential_multiway_merge
</* stable = */ true, /* sentinels = */ true>(
seqs_begin, seqs_end,
target, comp, length);
return target_end;
}
}; // namespace __gnu_parallel
#endif
......@@ -80,26 +80,9 @@ template<typename RandomAccessIterator>
/** @brief Start indices, per thread. */
difference_type* starts;
/** @brief Temporary arrays for each thread.
*
* Indirection Allows using the temporary storage in different
* ways, without code duplication.
* @see _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST */
value_type** temporaries;
#if _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST
/** @brief Storage in which to sort. */
RandomAccessIterator* sorting_places;
value_type** temporary;
/** @brief Storage into which to merge. */
value_type** merging_places;
#else
/** @brief Storage in which to sort. */
value_type** sorting_places;
/** @brief Storage into which to merge. */
RandomAccessIterator* merging_places;
#endif
/** @brief Samples. */
value_type* samples;
......@@ -108,9 +91,6 @@ template<typename RandomAccessIterator>
/** @brief Pieces of data to merge @c [thread][sequence] */
std::vector<Piece<difference_type> >* pieces;
/** @brief Stable sorting desired. */
bool stable;
};
/**
......@@ -122,7 +102,7 @@ template<typename RandomAccessIterator>
template<typename RandomAccessIterator, typename _DifferenceTp>
void
determine_samples(PMWMSSortingData<RandomAccessIterator>* sd,
_DifferenceTp& num_samples)
_DifferenceTp num_samples)
{
typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::value_type value_type;
......@@ -130,8 +110,6 @@ template<typename RandomAccessIterator, typename _DifferenceTp>
thread_index_t iam = omp_get_thread_num();
num_samples = _Settings::get().sort_mwms_oversampling * sd->num_threads - 1;
difference_type* es = new difference_type[num_samples + 2];
equally_split(sd->starts[iam + 1] - sd->starts[iam],
......@@ -144,11 +122,201 @@ template<typename RandomAccessIterator, typename _DifferenceTp>
delete[] es;
}
/** @brief Split consistently. */
template<bool exact, typename RandomAccessIterator,
typename Comparator, typename SortingPlacesIterator>
struct split_consistently
{
};
/** @brief Split by exact splitting. */
template<typename RandomAccessIterator, typename Comparator,
typename SortingPlacesIterator>
struct split_consistently
<true, RandomAccessIterator, Comparator, SortingPlacesIterator>
{
void operator()(
const thread_index_t iam,
PMWMSSortingData<RandomAccessIterator>* sd,
Comparator& comp,
const typename
std::iterator_traits<RandomAccessIterator>::difference_type
num_samples)
const
{
# pragma omp barrier
std::vector<std::pair<SortingPlacesIterator, SortingPlacesIterator> >
seqs(sd->num_threads);
for (thread_index_t s = 0; s < sd->num_threads; s++)
seqs[s] = std::make_pair(sd->temporary[s],
sd->temporary[s]
+ (sd->starts[s + 1] - sd->starts[s]));
std::vector<SortingPlacesIterator> offsets(sd->num_threads);
// if not last thread
if (iam < sd->num_threads - 1)
multiseq_partition(seqs.begin(), seqs.end(),
sd->starts[iam + 1], offsets.begin(), comp);
for (int seq = 0; seq < sd->num_threads; seq++)
{
// for each sequence
if (iam < (sd->num_threads - 1))
sd->pieces[iam][seq].end = offsets[seq] - seqs[seq].first;
else
// very end of this sequence
sd->pieces[iam][seq].end =
sd->starts[seq + 1] - sd->starts[seq];
}
# pragma omp barrier
for (thread_index_t seq = 0; seq < sd->num_threads; seq++)
{
// For each sequence.
if (iam > 0)
sd->pieces[iam][seq].begin = sd->pieces[iam - 1][seq].end;
else
// Absolute beginning.
sd->pieces[iam][seq].begin = 0;
}
}
};
/** @brief Split by sampling. */
template<typename RandomAccessIterator, typename Comparator,
typename SortingPlacesIterator>
struct split_consistently<false, RandomAccessIterator, Comparator,
SortingPlacesIterator>
{
void operator()(
const thread_index_t iam,
PMWMSSortingData<RandomAccessIterator>* sd,
Comparator& comp,
const typename
std::iterator_traits<RandomAccessIterator>::difference_type
num_samples)
const
{
typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::value_type value_type;
typedef typename traits_type::difference_type difference_type;
determine_samples(sd, num_samples);
# pragma omp barrier
# pragma omp single
__gnu_sequential::sort(sd->samples,
sd->samples + (num_samples * sd->num_threads),
comp);
# pragma omp barrier
for (thread_index_t s = 0; s < sd->num_threads; ++s)
{
// For each sequence.
if (num_samples * iam > 0)
sd->pieces[iam][s].begin =
std::lower_bound(sd->temporary[s],
sd->temporary[s]
+ (sd->starts[s + 1] - sd->starts[s]),
sd->samples[num_samples * iam],
comp)
- sd->temporary[s];
else
// Absolute beginning.
sd->pieces[iam][s].begin = 0;
if ((num_samples * (iam + 1)) < (num_samples * sd->num_threads))
sd->pieces[iam][s].end =
std::lower_bound(sd->temporary[s],
sd->temporary[s]
+ (sd->starts[s + 1] - sd->starts[s]),
sd->samples[num_samples * (iam + 1)],
comp)
- sd->temporary[s];
else
// Absolute end.
sd->pieces[iam][s].end = sd->starts[s + 1] - sd->starts[s];
}
}
};
template<bool stable, typename RandomAccessIterator, typename Comparator>
struct possibly_stable_sort
{
};
template<typename RandomAccessIterator, typename Comparator>
struct possibly_stable_sort<true, RandomAccessIterator, Comparator>
{
void operator()(const RandomAccessIterator& begin,
const RandomAccessIterator& end, Comparator& comp) const
{
__gnu_sequential::stable_sort(begin, end, comp);
}
};
template<typename RandomAccessIterator, typename Comparator>
struct possibly_stable_sort<false, RandomAccessIterator, Comparator>
{
void operator()(const RandomAccessIterator begin,
const RandomAccessIterator end, Comparator& comp) const
{
__gnu_sequential::sort(begin, end, comp);
}
};
template<bool stable, typename SeqRandomAccessIterator,
typename RandomAccessIterator, typename Comparator,
typename DiffType>
struct possibly_stable_multiway_merge
{
};
template<typename SeqRandomAccessIterator, typename RandomAccessIterator,
typename Comparator, typename DiffType>
struct possibly_stable_multiway_merge
<true, SeqRandomAccessIterator, RandomAccessIterator, Comparator,
DiffType>
{
void operator()(const SeqRandomAccessIterator& seqs_begin,
const SeqRandomAccessIterator& seqs_end,
const RandomAccessIterator& target,
Comparator& comp,
DiffType length_am) const
{
stable_multiway_merge(seqs_begin, seqs_end, target, comp,
length_am, sequential_tag());
}
};
template<typename SeqRandomAccessIterator, typename RandomAccessIterator,
typename Comparator, typename DiffType>
struct possibly_stable_multiway_merge
<false, SeqRandomAccessIterator, RandomAccessIterator, Comparator,
DiffType>
{
void operator()(const SeqRandomAccessIterator& seqs_begin,
const SeqRandomAccessIterator& seqs_end,
const RandomAccessIterator& target,
Comparator& comp,
DiffType length_am) const
{
multiway_merge(seqs_begin, seqs_end, target, comp,
length_am, sequential_tag());
}
};
/** @brief PMWMS code executed by each thread.
* @param sd Pointer to algorithm data.
* @param comp Comparator.
*/
template<typename RandomAccessIterator, typename Comparator>
template<bool stable, bool exact, typename RandomAccessIterator,
typename Comparator>
void
parallel_sort_mwms_pu(PMWMSSortingData<RandomAccessIterator>* sd,
Comparator& comp)
......@@ -162,165 +330,65 @@ template<typename RandomAccessIterator, typename Comparator>
// Length of this thread's chunk, before merging.
difference_type length_local = sd->starts[iam + 1] - sd->starts[iam];
#if _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST
typedef RandomAccessIterator SortingPlacesIterator;
// Sort in temporary storage, leave space for sentinel.
// Sort in input storage.
sd->sorting_places[iam] = sd->source + sd->starts[iam];
#else
typedef value_type* SortingPlacesIterator;
// Sort in temporary storage, leave space for sentinel.
sd->sorting_places[iam] = sd->temporaries[iam] =
sd->temporary[iam] =
static_cast<value_type*>(
::operator new(sizeof(value_type) * (length_local + 1)));
// Copy there.
std::uninitialized_copy(sd->source + sd->starts[iam],
sd->source + sd->starts[iam] + length_local,
sd->sorting_places[iam]);
#endif
// Sort locally.
if (sd->stable)
__gnu_sequential::stable_sort(sd->sorting_places[iam],
sd->sorting_places[iam] + length_local,
comp);
else
__gnu_sequential::sort(sd->sorting_places[iam],
sd->sorting_places[iam] + length_local,
comp);
// Invariant: locally sorted subsequence in sd->sorting_places[iam],
// sd->sorting_places[iam] + length_local.
const _Settings& __s = _Settings::get();
if (__s.sort_splitting == SAMPLING)
{
difference_type num_samples;
determine_samples(sd, num_samples);
# pragma omp barrier
# pragma omp single
__gnu_sequential::sort(sd->samples,
sd->samples + (num_samples * sd->num_threads),
comp);
# pragma omp barrier
for (int s = 0; s < sd->num_threads; ++s)
{
// For each sequence.
if (num_samples * iam > 0)
sd->pieces[iam][s].begin =
std::lower_bound(sd->sorting_places[s],
sd->sorting_places[s]
+ (sd->starts[s + 1] - sd->starts[s]),
sd->samples[num_samples * iam],
comp)
- sd->sorting_places[s];
else
// Absolute beginning.
sd->pieces[iam][s].begin = 0;
if ((num_samples * (iam + 1)) < (num_samples * sd->num_threads))
sd->pieces[iam][s].end =
std::lower_bound(sd->sorting_places[s],
sd->sorting_places[s]
+ (sd->starts[s + 1] - sd->starts[s]),
sd->samples[num_samples * (iam + 1)],
comp)
- sd->sorting_places[s];
else
// Absolute end.
sd->pieces[iam][s].end = sd->starts[s + 1] - sd->starts[s];
}
}
else if (__s.sort_splitting == EXACT)
{
# pragma omp barrier
sd->temporary[iam]);
std::vector<std::pair<SortingPlacesIterator, SortingPlacesIterator> >
seqs(sd->num_threads);
for (int s = 0; s < sd->num_threads; ++s)
seqs[s] = std::make_pair(sd->sorting_places[s],
sd->sorting_places[s]
+ (sd->starts[s + 1] - sd->starts[s]));
possibly_stable_sort<stable, SortingPlacesIterator, Comparator>()
(sd->temporary[iam], sd->temporary[iam] + length_local, comp);
std::vector<SortingPlacesIterator> offsets(sd->num_threads);
// Invariant: locally sorted subsequence in sd->temporary[iam],
// sd->temporary[iam] + length_local.
// if not last thread
if (iam < sd->num_threads - 1)
multiseq_partition(seqs.begin(), seqs.end(),
sd->starts[iam + 1], offsets.begin(), comp);
// No barrier here: Synchronization is done by the splitting routine.
for (int seq = 0; seq < sd->num_threads; ++seq)
{
// for each sequence
if (iam < (sd->num_threads - 1))
sd->pieces[iam][seq].end = offsets[seq] - seqs[seq].first;
else
// very end of this sequence
sd->pieces[iam][seq].end = (sd->starts[seq + 1]
- sd->starts[seq]);
}
# pragma omp barrier
for (int seq = 0; seq < sd->num_threads; ++seq)
{
// For each sequence.
if (iam > 0)
sd->pieces[iam][seq].begin = sd->pieces[iam - 1][seq].end;
else
// Absolute beginning.
sd->pieces[iam][seq].begin = 0;
}
}
difference_type num_samples =
_Settings::get().sort_mwms_oversampling * sd->num_threads - 1;
split_consistently
<exact, RandomAccessIterator, Comparator, SortingPlacesIterator>()
(iam, sd, comp, num_samples);
// Offset from target begin, length after merging.
difference_type offset = 0, length_am = 0;
for (int s = 0; s < sd->num_threads; ++s)
for (thread_index_t s = 0; s < sd->num_threads; s++)
{
length_am += sd->pieces[iam][s].end - sd->pieces[iam][s].begin;
offset += sd->pieces[iam][s].begin;
}
#if _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST
// Merge to temporary storage, uninitialized creation not possible
// since there is no multiway_merge calling the placement new
// instead of the assignment operator.
// XXX incorrect (de)construction
sd->merging_places[iam] = sd->temporaries[iam] =
static_cast<value_type*>(::operator new(sizeof(value_type)
* length_am));
#else
// Merge directly to target.
sd->merging_places[iam] = sd->source + offset;
#endif
std::vector<std::pair<SortingPlacesIterator, SortingPlacesIterator> >
seqs(sd->num_threads);
typedef std::vector<
std::pair<SortingPlacesIterator, SortingPlacesIterator> >
seq_vector_type;
seq_vector_type seqs(sd->num_threads);
for (int s = 0; s < sd->num_threads; ++s)
{
seqs[s] =
std::make_pair(sd->sorting_places[s] + sd->pieces[iam][s].begin,
sd->sorting_places[s] + sd->pieces[iam][s].end);
std::make_pair(sd->temporary[s] + sd->pieces[iam][s].begin,
sd->temporary[s] + sd->pieces[iam][s].end);
}
multiway_merge(seqs.begin(), seqs.end(), sd->merging_places[iam], comp,
length_am, sd->stable, false, sequential_tag());
possibly_stable_multiway_merge<
stable,
typename seq_vector_type::iterator,
RandomAccessIterator,
Comparator, difference_type>()
(seqs.begin(), seqs.end(),
sd->source + offset, comp,
length_am);
# pragma omp barrier
#if _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST
// Write back.
std::copy(sd->merging_places[iam],
sd->merging_places[iam] + length_am,
sd->source + offset);
#endif
::operator delete(sd->temporaries[iam]);
::operator delete(sd->temporary[iam]);
}
/** @brief PMWMS main call.
......@@ -329,21 +397,22 @@ template<typename RandomAccessIterator, typename Comparator>
* @param comp Comparator.
* @param n Length of sequence.
* @param num_threads Number of threads to use.
* @param stable Stable sorting.
*/
template<typename RandomAccessIterator, typename Comparator>
template<bool stable, bool exact, typename RandomAccessIterator,
typename Comparator>
void
parallel_sort_mwms(RandomAccessIterator begin, RandomAccessIterator end,
Comparator comp, typename
std::iterator_traits<RandomAccessIterator>::
difference_type n, int num_threads, bool stable)
Comparator comp,
thread_index_t num_threads)
{
_GLIBCXX_CALL(n)
_GLIBCXX_CALL(end - begin)
typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::value_type value_type;
typedef typename traits_type::difference_type difference_type;
difference_type n = end - begin;
if (n <= 1)
return;
......@@ -354,7 +423,6 @@ template<typename RandomAccessIterator, typename Comparator>
// shared variables
PMWMSSortingData<RandomAccessIterator> sd;
difference_type* starts;
const _Settings& __s = _Settings::get();
# pragma omp parallel num_threads(num_threads)
{
......@@ -364,23 +432,16 @@ template<typename RandomAccessIterator, typename Comparator>
{
sd.num_threads = num_threads;
sd.source = begin;
sd.temporaries = new value_type*[num_threads];
#if _GLIBCXX_MULTIWAY_MERGESORT_COPY_LAST
sd.sorting_places = new RandomAccessIterator[num_threads];
sd.merging_places = new value_type*[num_threads];
#else
sd.sorting_places = new value_type*[num_threads];
sd.merging_places = new RandomAccessIterator[num_threads];
#endif
if (__s.sort_splitting == SAMPLING)
sd.temporary = new value_type*[num_threads];
if (!exact)
{
unsigned int size =
(__s.sort_mwms_oversampling * num_threads - 1)
difference_type size =
(_Settings::get().sort_mwms_oversampling * num_threads - 1)
* num_threads;
sd.samples = static_cast<value_type*>(
::operator new(size * sizeof(value_type)));
::operator new(size * sizeof(value_type)));
}
else
sd.samples = NULL;
......@@ -390,7 +451,6 @@ template<typename RandomAccessIterator, typename Comparator>
for (int s = 0; s < num_threads; ++s)
sd.pieces[s].resize(num_threads);
starts = sd.starts = new difference_type[num_threads + 1];
sd.stable = stable;
difference_type chunk_length = n / num_threads;
difference_type split = n % num_threads;
......@@ -401,18 +461,16 @@ template<typename RandomAccessIterator, typename Comparator>
pos += (i < split) ? (chunk_length + 1) : chunk_length;
}
starts[num_threads] = pos;
}
} //single
// Now sort in parallel.
parallel_sort_mwms_pu(&sd, comp);
parallel_sort_mwms_pu<stable, exact>(&sd, comp);
} //parallel
delete[] starts;
delete[] sd.temporaries;
delete[] sd.sorting_places;
delete[] sd.merging_places;
delete[] sd.temporary;
if (__s.sort_splitting == SAMPLING)
if (!exact)
::operator delete(sd.samples);
delete[] sd.offsets;
......
......@@ -71,7 +71,7 @@ namespace __gnu_parallel
template<typename RandomAccessIterator, typename Comparator>
inline void
parallel_sort(RandomAccessIterator begin, RandomAccessIterator end,
Comparator comp, bool stable)
Comparator comp, bool stable)
{
_GLIBCXX_CALL(end - begin)
typedef std::iterator_traits<RandomAccessIterator> traits_type;
......@@ -79,25 +79,43 @@ namespace __gnu_parallel
typedef typename traits_type::difference_type difference_type;
if (begin != end)
{
difference_type n = end - begin;
{
difference_type n = end - begin;
if (false) ;
if (false) ;
#if _GLIBCXX_MERGESORT
else if (stable || _Settings::get().sort_algorithm == MWMS)
parallel_sort_mwms(begin, end, comp, n, get_max_threads(), stable);
else if (stable)
{
if(_Settings::get().sort_splitting == EXACT)
parallel_sort_mwms<true, true>
(begin, end, comp, get_max_threads());
else
parallel_sort_mwms<true, false>
(begin, end, comp, get_max_threads());
}
else if (_Settings::get().sort_algorithm == MWMS)
{
if(_Settings::get().sort_splitting == EXACT)
parallel_sort_mwms<false, true>
(begin, end, comp, get_max_threads());
else
parallel_sort_mwms<false, false>
(begin, end, comp, get_max_threads());
}
#endif
#if _GLIBCXX_QUICKSORT
else if (!stable && _Settings::get().sort_algorithm == QS)
parallel_sort_qs(begin, end, comp, n, get_max_threads());
else if (!stable && _Settings::get().sort_algorithm == QS)
parallel_sort_qs(begin, end, comp, n, get_max_threads());
#endif
#if _GLIBCXX_BAL_QUICKSORT
else if (!stable && _Settings::get().sort_algorithm == QS_BALANCED)
parallel_sort_qsb(begin, end, comp, n, get_max_threads());
else if (!stable && _Settings::get().sort_algorithm == QS_BALANCED)
parallel_sort_qsb(begin, end, comp, n, get_max_threads());
#endif
else
__gnu_sequential::sort(begin, end, comp);
}
else if(stable)
__gnu_sequential::stable_sort(begin, end, comp);
else
__gnu_sequential::sort(begin, end, comp);
}
}
} // end namespace __gnu_parallel
......
......@@ -44,6 +44,9 @@ namespace __gnu_parallel
/** @brief Forces sequential execution at compile time. */
struct sequential_tag { };
/** @brief Forces exact splitting in multiway merge at compile time. */
struct exact_tag { };
/** @brief Recommends parallel execution at compile time. */
struct parallel_tag { };
......
......@@ -87,15 +87,10 @@ namespace __gnu_parallel
/// Merging algorithms:
// bubblesort-alike, loser-tree variants, enum sentinel.
enum _MultiwayMergeAlgorithm
{
BUBBLE,
LOSER_TREE_EXPLICIT,
LOSER_TREE,
LOSER_TREE_COMBINED,
LOSER_TREE_SENTINEL,
ENUM_SENTINEL
{
LOSER_TREE
};
/// Partial sum algorithms: recursive, linear.
enum _PartialSumAlgorithm
{
......
// Copyright (C) 2008 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 2, or (at your option)
// any later version.
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License along
// with this library; see the file COPYING. If not, write to the Free
// Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
// USA.
#include <algorithm>
#include <functional>
#include <tr1/functional>
// libstdc++/35588
int main()
{
using namespace std;
using namespace tr1;
using namespace placeholders;
int t[10];
sort(t, t+10, bind(less<int>(), _1, _2));
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment