Commit dfbed397 by Benjamin Kosnik Committed by Benjamin Kosnik

PR libstdc++/33489 continued.

2007-10-09  Benjamin Kosnik  <bkoz@montsouris.artheist.org>

	PR libstdc++/33489 continued.		
	* include/parallel/features.h (_GLIBCXX_LOSER_TREE): Set to zero.
	(_GLIBCXX_LOSER_TREE_POINTER): Set to one.
	(_GLIBCXX_LOSER_TREE_UNGUARDED): Set to zero.
	(_GLIBCXX_LOSER_TREE_POINTER_UNGUARDED): Set to one.
	* include/parallel/multiway_merge.h (parallel_multiway_merge):
	Change array of value_type to array of value_type pointers.
 	(multiway_merge_bubble): Same.
	(multiway_merge_loser_tree): Same.
	* include/parallel/merge.h (merge_advance_movc): Change to avoid
	default construction.
	* include/parallel/multiseq_selection.h (multiseq_partition):
	Replace value_type, bool pair with value_type*, null-initialized.
	* include/parallel/multiway_mergesort.h (parallel_sort_mwms):
	Don't use array form of operator new for value_types.
	(parallel_sort_mwms_pu): Same.
	* include/parallel/quicksort.h (parallel_sort_qs_divide): Don't
	use array form to construct pointer to value_type on stack,
	instead use __builtin_alloca.	
	* include/parallel/random_shuffle.h (sequential_random_shuffle): Same,
	but use operator new.
	(parallel_random_shuffle_drs_pu): Same.
	* include/parallel/partial_sum.h ( parallel_partial_sum_linear): Same.

	* include/parallel/losertree.h: Format.
	* include/parallel/settings.h: Format.

	* include/parallel/multiway_merge.h: Move traits to....
	* include/parallel/losertree.h: ... here.

From-SVN: r129179
parent afd428f8
2007-10-09 Benjamin Kosnik <bkoz@montsouris.artheist.org>
PR libstdc++/33489 continued.
* include/parallel/features.h (_GLIBCXX_LOSER_TREE): Set to zero.
(_GLIBCXX_LOSER_TREE_POINTER): Set to one.
(_GLIBCXX_LOSER_TREE_UNGUARDED): Set to zero.
(_GLIBCXX_LOSER_TREE_POINTER_UNGUARDED): Set to one.
* include/parallel/multiway_merge.h (parallel_multiway_merge):
Change array of value_type to array of value_type pointers.
(multiway_merge_bubble): Same.
(multiway_merge_loser_tree): Same.
* include/parallel/merge.h (merge_advance_movc): Change to avoid
default construction.
* include/parallel/multiseq_selection.h (multiseq_partition):
Replace value_type, bool pair with value_type*, null-initialized.
* include/parallel/multiway_mergesort.h (parallel_sort_mwms):
Don't use array form of operator new for value_types.
(parallel_sort_mwms_pu): Same.
* include/parallel/quicksort.h (parallel_sort_qs_divide): Don't
use array form to construct pointer to value_type on stack,
instead use __builtin_alloca.
* include/parallel/random_shuffle.h (sequential_random_shuffle): Same,
but use operator new.
(parallel_random_shuffle_drs_pu): Same.
* include/parallel/partial_sum.h ( parallel_partial_sum_linear): Same.
* include/parallel/losertree.h: Format.
* include/parallel/settings.h: Format.
* include/parallel/multiway_merge.h: Move traits to....
* include/parallel/losertree.h: ... here.
2007-10-09 Paolo Carlini <pcarlini@suse.de> 2007-10-09 Paolo Carlini <pcarlini@suse.de>
* include/tr1_impl/type_traitsfwd.h (add_reference): Remove. * include/tr1_impl/type_traitsfwd.h (add_reference): Remove.
......
...@@ -66,7 +66,7 @@ ...@@ -66,7 +66,7 @@
* @brief Include guarded (sequences may run empty) loser tree, * @brief Include guarded (sequences may run empty) loser tree,
* moving objects. * moving objects.
* @see __gnu_parallel::Settings multiway_merge_algorithm */ * @see __gnu_parallel::Settings multiway_merge_algorithm */
#define _GLIBCXX_LOSER_TREE 1 #define _GLIBCXX_LOSER_TREE 0
#endif #endif
#ifndef _GLIBCXX_LOSER_TREE_EXPLICIT #ifndef _GLIBCXX_LOSER_TREE_EXPLICIT
...@@ -88,7 +88,7 @@ ...@@ -88,7 +88,7 @@
/** @def _GLIBCXX_LOSER_TREE_POINTER /** @def _GLIBCXX_LOSER_TREE_POINTER
* @brief Include some loser tree variant. * @brief Include some loser tree variant.
* @see __gnu_parallel::Settings multiway_merge_algorithm */ * @see __gnu_parallel::Settings multiway_merge_algorithm */
#define _GLIBCXX_LOSER_TREE_POINTER 0 #define _GLIBCXX_LOSER_TREE_POINTER 1
#endif #endif
#ifndef _GLIBCXX_LOSER_TREE_UNGUARDED #ifndef _GLIBCXX_LOSER_TREE_UNGUARDED
...@@ -96,14 +96,14 @@ ...@@ -96,14 +96,14 @@
* @brief Include unguarded (sequences must not run empty) loser * @brief Include unguarded (sequences must not run empty) loser
* tree, moving objects. * tree, moving objects.
* @see __gnu_parallel::Settings multiway_merge_algorithm */ * @see __gnu_parallel::Settings multiway_merge_algorithm */
#define _GLIBCXX_LOSER_TREE_UNGUARDED 1 #define _GLIBCXX_LOSER_TREE_UNGUARDED 0
#endif #endif
#ifndef _GLIBCXX_LOSER_TREE_POINTER_UNGUARDED #ifndef _GLIBCXX_LOSER_TREE_POINTER_UNGUARDED
/** @def _GLIBCXX_LOSER_TREE_POINTER_UNGUARDED /** @def _GLIBCXX_LOSER_TREE_POINTER_UNGUARDED
* @brief Include some loser tree variant. * @brief Include some loser tree variant.
* @see __gnu_parallel::Settings multiway_merge_algorithm */ * @see __gnu_parallel::Settings multiway_merge_algorithm */
#define _GLIBCXX_LOSER_TREE_POINTER_UNGUARDED 0 #define _GLIBCXX_LOSER_TREE_POINTER_UNGUARDED 1
#endif #endif
#ifndef _GLIBCXX_LOSER_TREE_COMBINED #ifndef _GLIBCXX_LOSER_TREE_COMBINED
......
...@@ -36,7 +36,7 @@ ...@@ -36,7 +36,7 @@
// Written by Johannes Singler. // Written by Johannes Singler.
#ifndef _GLIBCXX_PARALLEL_LOSERTREE_H #ifndef _GLIBCXX_PARALLEL_LOSERTREE_H
#define _GLIBCXX_PARALLEL_LOSERTREE_H #define _GLIBCXX_PARALLEL_LOSERTREE_H 1
#include <functional> #include <functional>
...@@ -133,7 +133,8 @@ namespace __gnu_parallel ...@@ -133,7 +133,8 @@ namespace __gnu_parallel
for (unsigned int pos = (offset + source) / 2; pos > 0; pos /= 2) for (unsigned int pos = (offset + source) / 2; pos > 0; pos /= 2)
{ {
// The smaller one gets promoted. // The smaller one gets promoted.
if ((!inf && !losers[pos].inf && !sup && !losers[pos].sup && comp(losers[pos].key, key)) if ((!inf && !losers[pos].inf && !sup && !losers[pos].sup
&& comp(losers[pos].key, key))
|| losers[pos].inf || sup) || losers[pos].inf || sup)
{ {
// The other one is smaller. // The other one is smaller.
...@@ -251,7 +252,8 @@ namespace __gnu_parallel ...@@ -251,7 +252,8 @@ namespace __gnu_parallel
print() print()
{ {
for (unsigned int i = 0; i < (k * 2); i++) for (unsigned int i = 0; i < (k * 2); i++)
printf("%d %d from %d, %d\n", i, losers[i].key, losers[i].source, losers[i].sup); printf("%d %d from %d, %d\n", i, losers[i].key,
losers[i].source, losers[i].sup);
} }
inline int inline int
...@@ -682,7 +684,8 @@ namespace __gnu_parallel ...@@ -682,7 +684,8 @@ namespace __gnu_parallel
init() init()
{ losers[0] = losers[init_winner(1)]; } { losers[0] = losers[init_winner(1)]; }
inline void delete_min_insert(const T& key, bool sup) inline void
delete_min_insert(const T& key, bool sup)
{ {
const T* keyp = &key; const T* keyp = &key;
int source = losers[0].source; int source = losers[0].source;
...@@ -708,7 +711,7 @@ namespace __gnu_parallel ...@@ -708,7 +711,7 @@ namespace __gnu_parallel
{ return insert_start(key, source, sup); } { return insert_start(key, source, sup); }
unsigned int unsigned int
init_winner_stable (unsigned int root) init_winner_stable(unsigned int root)
{ {
if (root >= k) if (root >= k)
{ {
...@@ -719,7 +722,8 @@ namespace __gnu_parallel ...@@ -719,7 +722,8 @@ namespace __gnu_parallel
unsigned int left = init_winner (2 * root); unsigned int left = init_winner (2 * root);
unsigned int right = init_winner (2 * root + 1); unsigned int right = init_winner (2 * root + 1);
if (losers[right].sup if (losers[right].sup
|| (!losers[left].sup && !comp(*losers[right].keyp, *losers[left].keyp))) || (!losers[left].sup && !comp(*losers[right].keyp,
*losers[left].keyp)))
{ {
// Left one is less or equal. // Left one is less or equal.
losers[root] = losers[right]; losers[root] = losers[right];
...@@ -749,7 +753,8 @@ namespace __gnu_parallel ...@@ -749,7 +753,8 @@ namespace __gnu_parallel
if ( (sup && (!losers[pos].sup || losers[pos].source < source)) || if ( (sup && (!losers[pos].sup || losers[pos].source < source)) ||
(!sup && !losers[pos].sup && (!sup && !losers[pos].sup &&
((comp(*losers[pos].keyp, *keyp)) || ((comp(*losers[pos].keyp, *keyp)) ||
(!comp(*keyp, *losers[pos].keyp) && losers[pos].source < source)))) (!comp(*keyp, *losers[pos].keyp)
&& losers[pos].source < source))))
{ {
// The other one is smaller. // The other one is smaller.
std::swap(losers[pos].sup, sup); std::swap(losers[pos].sup, sup);
...@@ -1061,7 +1066,8 @@ namespace __gnu_parallel ...@@ -1061,7 +1066,8 @@ namespace __gnu_parallel
{ {
// The smaller one gets promoted, ties are broken by source. // The smaller one gets promoted, ties are broken by source.
if (comp(*losers[pos].keyp, *keyp) if (comp(*losers[pos].keyp, *keyp)
|| (!comp(*keyp, *losers[pos].keyp) && losers[pos].source < source)) || (!comp(*keyp, *losers[pos].keyp)
&& losers[pos].source < source))
{ {
// The other one is smaller. // The other one is smaller.
std::swap(losers[pos].source, source); std::swap(losers[pos].source, source);
...@@ -1072,6 +1078,35 @@ namespace __gnu_parallel ...@@ -1072,6 +1078,35 @@ namespace __gnu_parallel
} }
}; };
#endif #endif
template<typename _ValueTp, class Comparator>
struct loser_tree_traits
{
#if _GLIBCXX_LOSER_TREE
typedef LoserTree<_ValueTp, Comparator> LT;
#else
# if _GLIBCXX_LOSER_TREE_POINTER
typedef LoserTreePointer<_ValueTp, Comparator> LT;
# else
# error Must define some type in losertree.h.
# endif
#endif
};
template<typename _ValueTp, class Comparator>
struct loser_tree_traits_unguarded
{
#if _GLIBCXX_LOSER_TREE_UNGUARDED
typedef LoserTreeUnguarded<_ValueTp, Comparator> LT;
#else
# if _GLIBCXX_LOSER_TREE_POINTER_UNGUARDED
typedef LoserTreePointerUnguarded<_ValueTp, Comparator> LT;
# else
# error Must define some type in losertree.h.
# endif
#endif
};
} }
#endif #endif
...@@ -113,15 +113,10 @@ namespace __gnu_parallel ...@@ -113,15 +113,10 @@ namespace __gnu_parallel
while (begin1 != end1 && begin2 != end2 && max_length > 0) while (begin1 != end1 && begin2 != end2 && max_length > 0)
{ {
value_type1 element1; RandomAccessIterator1 next1 = begin1 + 1;
value_type2 element2; RandomAccessIterator2 next2 = begin2 + 1;
RandomAccessIterator1 next1; value_type1 element1 = *begin1;
RandomAccessIterator2 next2; value_type2 element2 = *begin2;
next1 = begin1 + 1;
next2 = begin2 + 1;
element1 = *begin1;
element2 = *begin2;
if (comp(element2, element1)) if (comp(element2, element1))
{ {
......
...@@ -132,10 +132,10 @@ namespace __gnu_parallel ...@@ -132,10 +132,10 @@ namespace __gnu_parallel
typedef typename std::iterator_traits<RanSeqs>::value_type::first_type It; typedef typename std::iterator_traits<RanSeqs>::value_type::first_type It;
typedef typename std::iterator_traits<It>::difference_type difference_type; typedef typename std::iterator_traits<It>::difference_type difference_type;
typedef typename std::iterator_traits<It>::value_type T; typedef typename std::iterator_traits<It>::value_type value_type;
lexicographic<T, int, Comparator> lcomp(comp); lexicographic<value_type, int, Comparator> lcomp(comp);
lexicographic_reverse<T, int, Comparator> lrcomp(comp); lexicographic_reverse<value_type, int, Comparator> lrcomp(comp);
// Number of sequences, number of elements in total (possibly // Number of sequences, number of elements in total (possibly
// including padding). // including padding).
...@@ -188,7 +188,7 @@ namespace __gnu_parallel ...@@ -188,7 +188,7 @@ namespace __gnu_parallel
#define S(i) (begin_seqs[i].first) #define S(i) (begin_seqs[i].first)
// Initial partition. // Initial partition.
std::vector<std::pair<T, int> > sample; std::vector<std::pair<value_type, int> > sample;
for (int i = 0; i < m; i++) for (int i = 0; i < m; i++)
if (n < ns[i]) //sequence long enough if (n < ns[i]) //sequence long enough
...@@ -213,7 +213,7 @@ namespace __gnu_parallel ...@@ -213,7 +213,7 @@ namespace __gnu_parallel
n /= 2; n /= 2;
int lmax_seq = -1; // to avoid warning int lmax_seq = -1; // to avoid warning
const T* lmax = NULL; // impossible to avoid the warning? const value_type* lmax = NULL; // impossible to avoid the warning?
for (int i = 0; i < m; i++) for (int i = 0; i < m; i++)
{ {
if (a[i] > 0) if (a[i] > 0)
...@@ -258,7 +258,7 @@ namespace __gnu_parallel ...@@ -258,7 +258,7 @@ namespace __gnu_parallel
if (skew > 0) if (skew > 0)
{ {
// Move to the left, find smallest. // Move to the left, find smallest.
std::priority_queue<std::pair<T, int>, std::vector<std::pair<T, int> >, lexicographic_reverse<T, int, Comparator> > pq(lrcomp); std::priority_queue<std::pair<value_type, int>, std::vector<std::pair<value_type, int> >, lexicographic_reverse<value_type, int, Comparator> > pq(lrcomp);
for (int i = 0; i < m; i++) for (int i = 0; i < m; i++)
if (b[i] < ns[i]) if (b[i] < ns[i])
...@@ -279,7 +279,7 @@ namespace __gnu_parallel ...@@ -279,7 +279,7 @@ namespace __gnu_parallel
else if (skew < 0) else if (skew < 0)
{ {
// Move to the right, find greatest. // Move to the right, find greatest.
std::priority_queue<std::pair<T, int>, std::vector<std::pair<T, int> >, lexicographic<T, int, Comparator> > pq(lcomp); std::priority_queue<std::pair<value_type, int>, std::vector<std::pair<value_type, int> >, lexicographic<value_type, int, Comparator> > pq(lcomp);
for (int i = 0; i < m; i++) for (int i = 0; i < m; i++)
if (a[i] > 0) if (a[i] > 0)
...@@ -308,36 +308,30 @@ namespace __gnu_parallel ...@@ -308,36 +308,30 @@ namespace __gnu_parallel
// Compare the keys on both edges of the border. // Compare the keys on both edges of the border.
// Maximum of left edge, minimum of right edge. // Maximum of left edge, minimum of right edge.
bool maxleftset = false, minrightset = false; value_type* maxleft = NULL;
T maxleft, minright; // Impossible to avoid the warning? value_type* minright = NULL;
for (int i = 0; i < m; i++) for (int i = 0; i < m; i++)
{ {
if (a[i] > 0) if (a[i] > 0)
{ {
if (!maxleftset) if (!maxleft)
{ maxleft = &(S(i)[a[i] - 1]);
maxleft = S(i)[a[i] - 1];
maxleftset = true;
}
else else
{ {
// Max, favor rear sequences. // Max, favor rear sequences.
if (!comp(S(i)[a[i] - 1], maxleft)) if (!comp(S(i)[a[i] - 1], *maxleft))
maxleft = S(i)[a[i] - 1]; maxleft = &(S(i)[a[i] - 1]);
} }
} }
if (b[i] < ns[i]) if (b[i] < ns[i])
{ {
if (!minrightset) if (!minright)
{ minright = &(S(i)[b[i]]);
minright = S(i)[b[i]];
minrightset = true;
}
else else
{ {
// Min, favor fore sequences. // Min, favor fore sequences.
if (comp(S(i)[b[i]], minright)) if (comp(S(i)[b[i]], *minright))
minright = S(i)[b[i]]; minright = &(S(i)[b[i]]);
} }
} }
} }
...@@ -352,7 +346,6 @@ namespace __gnu_parallel ...@@ -352,7 +346,6 @@ namespace __gnu_parallel
} }
/** /**
* @brief Selects the element at a certain global rank from several * @brief Selects the element at a certain global rank from several
* sorted sequences. * sorted sequences.
......
...@@ -346,8 +346,7 @@ namespace __gnu_parallel ...@@ -346,8 +346,7 @@ namespace __gnu_parallel
difference_type; difference_type;
// Last element in sequence. // Last element in sequence.
value_type max; value_type* max = NULL;
bool max_found = false;
for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++) for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++)
{ {
if ((*s).first == (*s).second) if ((*s).first == (*s).second)
...@@ -357,20 +356,19 @@ namespace __gnu_parallel ...@@ -357,20 +356,19 @@ namespace __gnu_parallel
value_type& v = *((*s).second - 1); value_type& v = *((*s).second - 1);
// Strictly greater. // Strictly greater.
if (!max_found || comp(max, v)) if (!max || comp(*max, v))
max = v; max = &v;
max_found = true;
} }
difference_type overhang_size = 0; difference_type overhang_size = 0;
for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++) for (RandomAccessIteratorIterator s = seqs_begin; s != seqs_end; s++)
{ {
RandomAccessIterator1 split = std::lower_bound((*s).first, (*s).second, max, comp); RandomAccessIterator1 split = std::lower_bound((*s).first, (*s).second,
*max, comp);
overhang_size += (*s).second - split; overhang_size += (*s).second - split;
// Set sentinel. // Set sentinel.
*((*s).second) = max; *((*s).second) = *max;
} }
// So many elements will be left over afterwards. // So many elements will be left over afterwards.
...@@ -722,7 +720,7 @@ namespace __gnu_parallel ...@@ -722,7 +720,7 @@ namespace __gnu_parallel
// Num remaining pieces. // Num remaining pieces.
int k = static_cast<int>(seqs_end - seqs_begin), nrp; int k = static_cast<int>(seqs_end - seqs_begin), nrp;
value_type* pl = new value_type[k]; value_type* pl = static_cast<value_type*>(::operator new(sizeof(value_type) * k));
int* source = new int[k]; int* source = new int[k];
difference_type total_length = 0; difference_type total_length = 0;
...@@ -902,19 +900,21 @@ namespace __gnu_parallel ...@@ -902,19 +900,21 @@ namespace __gnu_parallel
difference_type total_length = 0; difference_type total_length = 0;
// Default value for potentially non-default-constructible types.
value_type* defaultcons = NULL;
for (int t = 0; t < k; t++) for (int t = 0; t < k; t++)
{ {
if (stable) if (stable)
{ {
if (seqs_begin[t].first == seqs_begin[t].second) if (seqs_begin[t].first == seqs_begin[t].second)
lt.insert_start_stable(value_type(), t, true); lt.insert_start_stable(*defaultcons, t, true);
else else
lt.insert_start_stable(*seqs_begin[t].first, t, false); lt.insert_start_stable(*seqs_begin[t].first, t, false);
} }
else else
{ {
if (seqs_begin[t].first == seqs_begin[t].second) if (seqs_begin[t].first == seqs_begin[t].second)
lt.insert_start(value_type(), t, true); lt.insert_start(*defaultcons, t, true);
else else
lt.insert_start(*seqs_begin[t].first, t, false); lt.insert_start(*seqs_begin[t].first, t, false);
} }
...@@ -942,7 +942,7 @@ namespace __gnu_parallel ...@@ -942,7 +942,7 @@ namespace __gnu_parallel
// Feed. // Feed.
if (seqs_begin[source].first == seqs_begin[source].second) if (seqs_begin[source].first == seqs_begin[source].second)
lt.delete_min_insert_stable(value_type(), true); lt.delete_min_insert_stable(*defaultcons, true);
else else
// Replace from same source. // Replace from same source.
lt.delete_min_insert_stable(*seqs_begin[source].first, false); lt.delete_min_insert_stable(*seqs_begin[source].first, false);
...@@ -960,7 +960,7 @@ namespace __gnu_parallel ...@@ -960,7 +960,7 @@ namespace __gnu_parallel
// Feed. // Feed.
if (seqs_begin[source].first == seqs_begin[source].second) if (seqs_begin[source].first == seqs_begin[source].second)
lt.delete_min_insert(value_type(), true); lt.delete_min_insert(*defaultcons, true);
else else
// Replace from same source. // Replace from same source.
lt.delete_min_insert(*seqs_begin[source].first, false); lt.delete_min_insert(*seqs_begin[source].first, false);
...@@ -1083,59 +1083,6 @@ namespace __gnu_parallel ...@@ -1083,59 +1083,6 @@ namespace __gnu_parallel
return target; return target;
} }
template<typename _ValueTp, class Comparator>
struct loser_tree_traits
{
typedef LoserTree/*Pointer*/<_ValueTp, Comparator> LT;
};
/*#define NO_POINTER(T) \
template<typename Comparator> \
struct loser_tree_traits<T, Comparator> \
{ \
typedef LoserTreePointer<T, Comparator> LT; \
};*/
//
// NO_POINTER(unsigned char)
// NO_POINTER(char)
// NO_POINTER(unsigned short)
// NO_POINTER(short)
// NO_POINTER(unsigned int)
// NO_POINTER(int)
// NO_POINTER(unsigned long)
// NO_POINTER(long)
// NO_POINTER(unsigned long long)
// NO_POINTER(long long)
//
// #undef NO_POINTER
template<typename _ValueTp, class Comparator>
struct loser_tree_traits_unguarded
{
typedef LoserTreeUnguarded<_ValueTp, Comparator> LT;
};
/*#define NO_POINTER_UNGUARDED(T) \
template<typename Comparator> \
struct loser_tree_traits_unguarded<T, Comparator> \
{ \
typedef LoserTreePointerUnguarded<T, Comparator> LT; \
};*/
//
// NO_POINTER_UNGUARDED(unsigned char)
// NO_POINTER_UNGUARDED(char)
// NO_POINTER_UNGUARDED(unsigned short)
// NO_POINTER_UNGUARDED(short)
// NO_POINTER_UNGUARDED(unsigned int)
// NO_POINTER_UNGUARDED(int)
// NO_POINTER_UNGUARDED(unsigned long)
// NO_POINTER_UNGUARDED(long)
// NO_POINTER_UNGUARDED(unsigned long long)
// NO_POINTER_UNGUARDED(long long)
//
// #undef NO_POINTER_UNGUARDED
template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator> template<typename RandomAccessIteratorIterator, typename RandomAccessIterator3, typename _DifferenceTp, typename Comparator>
RandomAccessIterator3 RandomAccessIterator3
multiway_merge_loser_tree_combined(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable) multiway_merge_loser_tree_combined(RandomAccessIteratorIterator seqs_begin, RandomAccessIteratorIterator seqs_end, RandomAccessIterator3 target, Comparator comp, _DifferenceTp length, bool stable)
...@@ -1423,7 +1370,7 @@ namespace __gnu_parallel ...@@ -1423,7 +1370,7 @@ namespace __gnu_parallel
if (Settings::multiway_merge_splitting == Settings::SAMPLING) if (Settings::multiway_merge_splitting == Settings::SAMPLING)
{ {
value_type* samples = new value_type[k * num_samples]; value_type* samples = static_cast<value_type*>(::operator new(sizeof(value_type) * k * num_samples));
// Sample. // Sample.
for (int s = 0; s < k; s++) for (int s = 0; s < k; s++)
for (int i = 0; (difference_type)i < num_samples; i++) for (int i = 0; (difference_type)i < num_samples; i++)
......
...@@ -160,7 +160,6 @@ namespace __gnu_parallel ...@@ -160,7 +160,6 @@ namespace __gnu_parallel
typedef typename traits_type::difference_type difference_type; typedef typename traits_type::difference_type difference_type;
Timing<sequential_tag> t; Timing<sequential_tag> t;
t.tic(); t.tic();
PMWMSSortingData<RandomAccessIterator>* sd = d->sd; PMWMSSortingData<RandomAccessIterator>* sd = d->sd;
...@@ -178,7 +177,7 @@ namespace __gnu_parallel ...@@ -178,7 +177,7 @@ namespace __gnu_parallel
typedef value_type* SortingPlacesIterator; typedef value_type* SortingPlacesIterator;
// Sort in temporary storage, leave space for sentinel. // Sort in temporary storage, leave space for sentinel.
sd->sorting_places[iam] = sd->temporaries[iam] = static_cast<value_type*>(::operator new(sizeof(value_type) *(length_local + 1))); sd->sorting_places[iam] = sd->temporaries[iam] = static_cast<value_type*>(::operator new(sizeof(value_type) * (length_local + 1)));
// Copy there. // Copy there.
std::uninitialized_copy(sd->source + sd->starts[iam], sd->source + sd->starts[iam] + length_local, sd->sorting_places[iam]); std::uninitialized_copy(sd->source + sd->starts[iam], sd->source + sd->starts[iam] + length_local, sd->sorting_places[iam]);
...@@ -208,7 +207,9 @@ namespace __gnu_parallel ...@@ -208,7 +207,9 @@ namespace __gnu_parallel
t.tic("sample/wait"); t.tic("sample/wait");
#pragma omp single #pragma omp single
__gnu_sequential::sort(sd->samples, sd->samples + (num_samples * d->num_threads), comp); __gnu_sequential::sort(sd->samples,
sd->samples + (num_samples * d->num_threads),
comp);
#pragma omp barrier #pragma omp barrier
...@@ -288,7 +289,7 @@ namespace __gnu_parallel ...@@ -288,7 +289,7 @@ namespace __gnu_parallel
// Merge to temporary storage, uninitialized creation not possible // Merge to temporary storage, uninitialized creation not possible
// since there is no multiway_merge calling the placement new // since there is no multiway_merge calling the placement new
// instead of the assignment operator. // instead of the assignment operator.
sd->merging_places[iam] = sd->temporaries[iam] = new value_type[length_am]; sd->merging_places[iam] = sd->temporaries[iam] = static_cast<value_type*>(::operator new(sizeof(value_type) * length_am));
#else #else
// Merge directly to target. // Merge directly to target.
sd->merging_places[iam] = sd->source + offset; sd->merging_places[iam] = sd->source + offset;
...@@ -337,7 +338,10 @@ namespace __gnu_parallel ...@@ -337,7 +338,10 @@ namespace __gnu_parallel
*/ */
template<typename RandomAccessIterator, typename Comparator> template<typename RandomAccessIterator, typename Comparator>
inline void inline void
parallel_sort_mwms(RandomAccessIterator begin, RandomAccessIterator end, Comparator comp, typename std::iterator_traits<RandomAccessIterator>::difference_type n, int num_threads, bool stable) parallel_sort_mwms(RandomAccessIterator begin, RandomAccessIterator end,
Comparator comp,
typename std::iterator_traits<RandomAccessIterator>::difference_type n,
int num_threads, bool stable)
{ {
_GLIBCXX_CALL(n) _GLIBCXX_CALL(n)
...@@ -366,7 +370,14 @@ namespace __gnu_parallel ...@@ -366,7 +370,14 @@ namespace __gnu_parallel
#endif #endif
if (Settings::sort_splitting == Settings::SAMPLING) if (Settings::sort_splitting == Settings::SAMPLING)
sd.samples = new value_type[num_threads * (Settings::sort_mwms_oversampling * num_threads - 1)]; {
unsigned int sz = Settings::sort_mwms_oversampling * num_threads - 1;
sz *= num_threads;
// Equivalent to value_type[sz], without need of default construction.
sz *= sizeof(value_type);
sd.samples = static_cast<value_type*>(::operator new(sz));
}
else else
sd.samples = NULL; sd.samples = NULL;
...@@ -377,7 +388,9 @@ namespace __gnu_parallel ...@@ -377,7 +388,9 @@ namespace __gnu_parallel
PMWMSSorterPU<RandomAccessIterator>* pus = new PMWMSSorterPU<RandomAccessIterator>[num_threads]; PMWMSSorterPU<RandomAccessIterator>* pus = new PMWMSSorterPU<RandomAccessIterator>[num_threads];
difference_type* starts = sd.starts = new difference_type[num_threads + 1]; difference_type* starts = sd.starts = new difference_type[num_threads + 1];
difference_type chunk_length = n / num_threads, split = n % num_threads, start = 0; difference_type chunk_length = n / num_threads;
difference_type split = n % num_threads;
difference_type start = 0;
for (int i = 0; i < num_threads; i++) for (int i = 0; i < num_threads; i++)
{ {
starts[i] = start; starts[i] = start;
......
...@@ -119,7 +119,7 @@ namespace __gnu_parallel ...@@ -119,7 +119,7 @@ namespace __gnu_parallel
borders[num_threads + 1] = n; borders[num_threads + 1] = n;
} }
value_type* sums = new value_type[num_threads]; value_type* sums = static_cast<value_type*>(::operator new(sizeof(value_type) * num_threads));
OutputIterator target_end; OutputIterator target_end;
#pragma omp parallel num_threads(num_threads) #pragma omp parallel num_threads(num_threads)
......
...@@ -65,10 +65,14 @@ namespace __gnu_parallel ...@@ -65,10 +65,14 @@ namespace __gnu_parallel
difference_type n = end - begin; difference_type n = end - begin;
num_samples = std::min(num_samples, n); num_samples = std::min(num_samples, n);
value_type samples[num_samples]; value_type* samples = static_cast<value_type*>(__builtin_alloca(sizeof(value_type) * num_samples));
for (difference_type s = 0; s < num_samples; s++) for (difference_type s = 0; s < num_samples; s++)
samples[s] = begin[(unsigned long long)s * n / num_samples]; {
const unsigned long long index = static_cast<unsigned long long>(s)
* n / num_samples;
samples[s] = begin[index];
}
__gnu_sequential::sort(samples, samples + num_samples, comp); __gnu_sequential::sort(samples, samples + num_samples, comp);
......
...@@ -203,7 +203,7 @@ namespace __gnu_parallel ...@@ -203,7 +203,7 @@ namespace __gnu_parallel
offset = sd->dist[s + 1][d->num_threads]; offset = sd->dist[s + 1][d->num_threads];
} }
sd->temporaries[iam] = new value_type[offset]; sd->temporaries[iam] = static_cast<value_type*>(::operator new(sizeof(value_type) * offset));
t.tic(); t.tic();
...@@ -446,7 +446,7 @@ namespace __gnu_parallel ...@@ -446,7 +446,7 @@ namespace __gnu_parallel
if (num_bins > 1) if (num_bins > 1)
{ {
value_type* target = new value_type[n]; value_type* target = static_cast<value_type*>(::operator new(sizeof(value_type) * n));
bin_index* oracles = new bin_index[n]; bin_index* oracles = new bin_index[n];
difference_type* dist0 = new difference_type[num_bins + 1], * dist1 = new difference_type[num_bins + 1]; difference_type* dist0 = new difference_type[num_bins + 1], * dist1 = new difference_type[num_bins + 1];
......
...@@ -165,8 +165,10 @@ namespace ...@@ -165,8 +165,10 @@ namespace
/** @brief Minimal input size for parallel sorting. */ /** @brief Minimal input size for parallel sorting. */
static volatile sequence_index_t sort_minimal_n; static volatile sequence_index_t sort_minimal_n;
/** @brief Oversampling factor for parallel std::sort (MWMS). */ /** @brief Oversampling factor for parallel std::sort (MWMS). */
static volatile unsigned int sort_mwms_oversampling; static volatile unsigned int sort_mwms_oversampling;
/** @brief Such many samples to take to find a good pivot /** @brief Such many samples to take to find a good pivot
(quicksort). */ (quicksort). */
static volatile unsigned int sort_qs_num_samples_preset; static volatile unsigned int sort_qs_num_samples_preset;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment