Commit 1661473b by Johannes Singler Committed by Johannes Singler

multiway_merge.h: Destruct only elements that were have been constructed before.

2007-11-28  Johannes Singler  <singler@ira.uka.de>

        * include/parallel/multiway_merge.h: Destruct only elements that
        were have been constructed before. Code beautifying and formatting.
        * include/parallel/losertree.h: (Copy) construct all loser tree
        item keys, so they can be deconstructed all at once.
        * include/parallel/quicksort.h: Fix memory leak.
        * include/parallel/random_shuffle.h: Use copy constructor instead
        of assignment. Code beautifying and formatting.
        * include/parallel/unique_copy.h: Use assignment instead of copy
        constructor.
        * include/parallel/multiway_mergesort.h: Use copy constructor
        instead of assignment. Code beautifying and formatting.
        * include/parallel/random_shuffle.h: Use copy constructor instead
        of assignment. Code beautifying.

From-SVN: r130490
parent 87300e8c
2007-11-28 Johannes Singler <singler@ira.uka.de>
* include/parallel/multiway_merge.h: Destruct only elements that
were have been constructed before. Code beautifying and formatting.
* include/parallel/losertree.h: (Copy) construct all loser tree
item keys, so they can be deconstructed all at once.
* include/parallel/quicksort.h: Fix memory leak.
* include/parallel/random_shuffle.h: Use copy constructor instead
of assignment. Code beautifying and formatting.
* include/parallel/unique_copy.h: Use assignment instead of copy
constructor.
* include/parallel/multiway_mergesort.h: Use copy constructor
instead of assignment. Code beautifying and formatting.
* include/parallel/random_shuffle.h: Use copy constructor instead
of assignment. Code beautifying.
2007-11-27 Kaz Kojima <kkojima@gcc.gnu.org>
* testsuite/tr1/5_numerical_facilities/special_functions/
......
......@@ -230,6 +230,7 @@ template<typename T, typename Comparator = std::less<T> >
unsigned int ik, k, offset;
Loser* losers;
Comparator comp;
bool first_insert;
public:
inline LoserTree(unsigned int _k, Comparator _comp = std::less<T>())
......@@ -240,9 +241,12 @@ template<typename T, typename Comparator = std::less<T> >
// Next greater power of 2.
k = 1 << (log2(ik - 1) + 1);
offset = k;
losers = static_cast<Loser*>(::operator new(k * 2 * sizeof(Loser)));
for (unsigned int i = ik - 1; i < k; i++)
// Avoid default-constructing losers[].key
losers = static_cast<Loser*>(::operator new(2 * k * sizeof(Loser)));
for (unsigned int i = ik - 1; i < k; ++i)
losers[i + k].sup = true;
first_insert = true;
}
inline ~LoserTree()
......@@ -257,9 +261,18 @@ template<typename T, typename Comparator = std::less<T> >
{
unsigned int pos = k + source;
if(first_insert)
{
// Construct all keys, so we can easily deconstruct them.
for (unsigned int i = 0; i < (2 * k); ++i)
new(&(losers[i].key)) T(key);
first_insert = false;
}
else
new(&(losers[pos].key)) T(key);
losers[pos].sup = sup;
losers[pos].source = source;
new(&(losers[pos].key)) T(key);
}
unsigned int
......@@ -282,7 +295,8 @@ template<typename T, typename Comparator = std::less<T> >
return left;
}
else
{ // Right one is less.
{
// Right one is less.
losers[root] = losers[left];
return right;
}
......
......@@ -124,6 +124,8 @@ template<typename RandomAccessIterator, typename _DifferenceTp>
determine_samples(PMWMSSortingData<RandomAccessIterator>* sd,
_DifferenceTp& num_samples)
{
typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::value_type value_type;
typedef _DifferenceTp difference_type;
thread_index_t iam = omp_get_thread_num();
......@@ -137,8 +139,8 @@ template<typename RandomAccessIterator, typename _DifferenceTp>
num_samples + 1, es);
for (difference_type i = 0; i < num_samples; i++)
sd->samples[iam * num_samples + i] =
sd->source[sd->starts[iam] + es[i + 1]];
new(&(sd->samples[iam * num_samples + i])) value_type(
sd->source[sd->starts[iam] + es[i + 1]]);
delete[] es;
}
......@@ -213,7 +215,8 @@ template<typename RandomAccessIterator, typename Comparator>
if (num_samples * iam > 0)
sd->pieces[iam][s].begin =
std::lower_bound(sd->sorting_places[s],
sd->sorting_places[s] + sd->starts[s + 1] - sd->starts[s],
sd->sorting_places[s]
+ (sd->starts[s + 1] - sd->starts[s]),
sd->samples[num_samples * iam],
comp)
- sd->sorting_places[s];
......@@ -224,8 +227,10 @@ template<typename RandomAccessIterator, typename Comparator>
if ((num_samples * (iam + 1)) < (num_samples * sd->num_threads))
sd->pieces[iam][s].end =
std::lower_bound(sd->sorting_places[s],
sd->sorting_places[s] + sd->starts[s + 1] - sd->starts[s],
sd->samples[num_samples * (iam + 1)], comp)
sd->sorting_places[s]
+ (sd->starts[s + 1] - sd->starts[s]),
sd->samples[num_samples * (iam + 1)],
comp)
- sd->sorting_places[s];
else
// Absolute end.
......@@ -240,7 +245,8 @@ template<typename RandomAccessIterator, typename Comparator>
seqs(sd->num_threads);
for (int s = 0; s < sd->num_threads; s++)
seqs[s] = std::make_pair(sd->sorting_places[s],
sd->sorting_places[s] + sd->starts[s + 1] - sd->starts[s]);
sd->sorting_places[s]
+ (sd->starts[s + 1] - sd->starts[s]));
std::vector<SortingPlacesIterator> offsets(sd->num_threads);
......@@ -256,7 +262,8 @@ template<typename RandomAccessIterator, typename Comparator>
sd->pieces[iam][seq].end = offsets[seq] - seqs[seq].first;
else
// very end of this sequence
sd->pieces[iam][seq].end = sd->starts[seq + 1] - sd->starts[seq];
sd->pieces[iam][seq].end =
sd->starts[seq + 1] - sd->starts[seq];
}
# pragma omp barrier
......@@ -284,6 +291,7 @@ template<typename RandomAccessIterator, typename Comparator>
// Merge to temporary storage, uninitialized creation not possible
// since there is no multiway_merge calling the placement new
// instead of the assignment operator.
// XXX incorrect (de)construction
sd->merging_places[iam] = sd->temporaries[iam] =
static_cast<value_type*>(
::operator new(sizeof(value_type) * length_am));
......@@ -296,11 +304,13 @@ template<typename RandomAccessIterator, typename Comparator>
for (int s = 0; s < sd->num_threads; s++)
{
seqs[s] = std::make_pair(sd->sorting_places[s] + sd->pieces[iam][s].begin,
seqs[s] =
std::make_pair(sd->sorting_places[s] + sd->pieces[iam][s].begin,
sd->sorting_places[s] + sd->pieces[iam][s].end);
}
multiway_merge(seqs.begin(), seqs.end(), sd->merging_places[iam], comp, length_am, sd->stable, false, sequential_tag());
multiway_merge(seqs.begin(), seqs.end(), sd->merging_places[iam], comp,
length_am, sd->stable, false, sequential_tag());
# pragma omp barrier
......@@ -326,7 +336,8 @@ template<typename RandomAccessIterator, typename Comparator>
inline void
parallel_sort_mwms(RandomAccessIterator begin, RandomAccessIterator end,
Comparator comp,
typename std::iterator_traits<RandomAccessIterator>::difference_type n,
typename std::iterator_traits<RandomAccessIterator>
::difference_type n,
int num_threads,
bool stable)
{
......@@ -368,7 +379,8 @@ template<typename RandomAccessIterator, typename Comparator>
if (Settings::sort_splitting == Settings::SAMPLING)
{
unsigned int size =
(Settings::sort_mwms_oversampling * num_threads - 1) * num_threads;
(Settings::sort_mwms_oversampling * num_threads - 1)
* num_threads;
sd.samples = static_cast<value_type*>(
::operator new(size * sizeof(value_type)));
}
......
......@@ -73,8 +73,8 @@ template<
{
value = bin_op(value, *begin);
*result = value;
result++;
begin++;
++result;
++begin;
}
return result;
}
......@@ -103,6 +103,9 @@ template<
typedef typename traits_type::value_type value_type;
typedef typename traits_type::difference_type difference_type;
if (begin == end)
return result;
thread_index_t num_threads =
std::min<difference_type>(get_max_threads(), n - 1);
......@@ -133,7 +136,7 @@ template<
((double)num_threads + Settings::partial_sum_dilatation)),
borderstart = n - num_threads * chunk_length;
borders[0] = 0;
for (int i = 1; i < (num_threads + 1); i++)
for (int i = 1; i < (num_threads + 1); ++i)
{
borders[i] = borderstart;
borderstart += chunk_length;
......@@ -146,20 +149,21 @@ template<
OutputIterator target_end;
} //single
int iam = omp_get_thread_num();
thread_index_t iam = omp_get_thread_num();
if (iam == 0)
{
*result = *begin;
parallel_partial_sum_basecase(begin + 1, begin + borders[1],
result + 1, bin_op, *begin);
sums[0] = *(result + borders[1] - 1);
new(&(sums[iam])) value_type(*(result + borders[1] - 1));
}
else
{
sums[iam] = std::accumulate(begin + borders[iam] + 1,
new(&(sums[iam])) value_type(
std::accumulate(begin + borders[iam] + 1,
begin + borders[iam + 1],
*(begin + borders[iam]),
bin_op, __gnu_parallel::sequential_tag());
bin_op, __gnu_parallel::sequential_tag()));
}
# pragma omp barrier
......
......@@ -74,13 +74,13 @@ namespace __gnu_parallel
// Allocate uninitialized, to avoid default constructor.
value_type* samples = static_cast<value_type*>(
operator new(num_samples * sizeof(value_type)));
::operator new(num_samples * sizeof(value_type)));
for (difference_type s = 0; s < num_samples; s++)
for (difference_type s = 0; s < num_samples; ++s)
{
const unsigned long long index = static_cast<unsigned long long>(s)
* n / num_samples;
new(samples + s) value_type(begin[index]);
new(&(samples[s])) value_type(begin[index]);
}
__gnu_sequential::sort(samples, samples + num_samples, comp);
......@@ -91,6 +91,8 @@ namespace __gnu_parallel
pred(comp, pivot);
difference_type split = parallel_partition(begin, end, pred, num_threads);
delete[] samples;
return split;
}
......
......@@ -144,23 +144,23 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator>
value_type** temporaries = new value_type*[d->num_threads];
// Compute oracles and count appearances.
for (bin_index b = 0; b < sd->num_bins + 1; b++)
for (bin_index b = 0; b < sd->num_bins + 1; ++b)
dist[b] = 0;
int num_bits = sd->num_bits;
random_number rng(d->seed);
// First main loop.
for (difference_type i = 0; i < length; i++)
for (difference_type i = 0; i < length; ++i)
{
bin_index oracle = random_number_pow2(num_bits, rng);
oracles[i] = oracle;
// To allow prefix (partial) sum.
dist[oracle + 1]++;
++(dist[oracle + 1]);
}
for (bin_index b = 0; b < sd->num_bins + 1; b++)
for (bin_index b = 0; b < sd->num_bins + 1; ++b)
sd->dist[b][iam + 1] = dist[b];
# pragma omp barrier
......@@ -169,7 +169,7 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator>
{
// Sum up bins, sd->dist[s + 1][d->num_threads] now contains the
// total number of items in bin s
for (bin_index s = 0; s < sd->num_bins; s++)
for (bin_index s = 0; s < sd->num_bins; ++s)
__gnu_sequential::partial_sum(sd->dist[s + 1],
sd->dist[s + 1] + d->num_threads + 1,
sd->dist[s + 1]);
......@@ -178,14 +178,14 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator>
# pragma omp barrier
sequence_index_t offset = 0, global_offset = 0;
for (bin_index s = 0; s < d->bins_begin; s++)
for (bin_index s = 0; s < d->bins_begin; ++s)
global_offset += sd->dist[s + 1][d->num_threads];
# pragma omp barrier
for (bin_index s = d->bins_begin; s < d->bins_end; s++)
for (bin_index s = d->bins_begin; s < d->bins_end; ++s)
{
for (int t = 0; t < d->num_threads + 1; t++)
for (int t = 0; t < d->num_threads + 1; ++t)
sd->dist[s + 1][t] += offset;
offset = sd->dist[s + 1][d->num_threads];
}
......@@ -196,24 +196,25 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator>
# pragma omp barrier
// Draw local copies to avoid false sharing.
for (bin_index b = 0; b < sd->num_bins + 1; b++)
for (bin_index b = 0; b < sd->num_bins + 1; ++b)
dist[b] = sd->dist[b][iam];
for (bin_index b = 0; b < sd->num_bins; b++)
for (bin_index b = 0; b < sd->num_bins; ++b)
bin_proc[b] = sd->bin_proc[b];
for (thread_index_t t = 0; t < d->num_threads; t++)
for (thread_index_t t = 0; t < d->num_threads; ++t)
temporaries[t] = sd->temporaries[t];
RandomAccessIterator source = sd->source;
difference_type start = sd->starts[iam];
// Distribute according to oracles, second main loop.
for (difference_type i = 0; i < length; i++)
for (difference_type i = 0; i < length; ++i)
{
bin_index target_bin = oracles[i];
thread_index_t target_p = bin_proc[target_bin];
// Last column [d->num_threads] stays unchanged.
temporaries[target_p][dist[target_bin + 1]++] = *(source + i + start);
new(&(temporaries[target_p][dist[target_bin + 1]++])) value_type(
*(source + i + start));
}
delete[] oracles;
......@@ -224,7 +225,7 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator>
# pragma omp barrier
// Shuffle bins internally.
for (bin_index b = d->bins_begin; b < d->bins_end; b++)
for (bin_index b = d->bins_begin; b < d->bins_end; ++b)
{
value_type* begin =
sd->temporaries[iam] +
......@@ -338,9 +339,9 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator>
sd.temporaries = new value_type*[num_threads];
sd.dist = new difference_type*[num_bins + 1];
sd.bin_proc = new thread_index_t[num_bins];
for (bin_index b = 0; b < num_bins + 1; b++)
for (bin_index b = 0; b < num_bins + 1; ++b)
sd.dist[b] = new difference_type[num_threads + 1];
for (bin_index b = 0; b < (num_bins + 1); b++)
for (bin_index b = 0; b < (num_bins + 1); ++b)
{
sd.dist[0][0] = 0;
sd.dist[b][0] = 0;
......@@ -354,7 +355,7 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator>
split = n % num_threads, start = 0;
difference_type bin_chunk_length = num_bins / num_threads,
bin_split = num_bins % num_threads;
for (thread_index_t i = 0; i < num_threads; i++)
for (thread_index_t i = 0; i < num_threads; ++i)
{
starts[i] = start;
start += (i < split) ? (chunk_length + 1) : chunk_length;
......@@ -364,7 +365,7 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator>
bin_cursor += (i < bin_split) ?
(bin_chunk_length + 1) : bin_chunk_length;
pus[i].bins_end = bin_cursor;
for (; j < bin_cursor; j++)
for (; j < bin_cursor; ++j)
sd.bin_proc[j] = i;
pus[i].num_threads = num_threads;
pus[i].seed = rng(std::numeric_limits<uint32>::max());
......@@ -378,7 +379,7 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator>
delete[] starts;
delete[] sd.bin_proc;
for (int s = 0; s < (num_bins + 1); s++)
for (int s = 0; s < (num_bins + 1); ++s)
delete[] sd.dist[s];
delete[] sd.dist;
delete[] sd.temporaries;
......@@ -455,31 +456,31 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator>
difference_type* dist0 = new difference_type[num_bins + 1],
* dist1 = new difference_type[num_bins + 1];
for (int b = 0; b < num_bins + 1; b++)
for (int b = 0; b < num_bins + 1; ++b)
dist0[b] = 0;
random_number bitrng(rng(0xFFFFFFFF));
for (difference_type i = 0; i < n; i++)
for (difference_type i = 0; i < n; ++i)
{
bin_index oracle = random_number_pow2(num_bits, bitrng);
oracles[i] = oracle;
// To allow prefix (partial) sum.
dist0[oracle + 1]++;
++(dist0[oracle + 1]);
}
// Sum up bins.
__gnu_sequential::partial_sum(dist0, dist0 + num_bins + 1, dist0);
for (int b = 0; b < num_bins + 1; b++)
for (int b = 0; b < num_bins + 1; ++b)
dist1[b] = dist0[b];
// Distribute according to oracles.
for (difference_type i = 0; i < n; i++)
target[(dist0[oracles[i]])++] = *(begin + i);
for (difference_type i = 0; i < n; ++i)
new(&(target[(dist0[oracles[i]])++])) value_type(*(begin + i));
for (int b = 0; b < num_bins; b++)
for (int b = 0; b < num_bins; ++b)
{
sequential_random_shuffle(target + dist1[b],
target + dist1[b + 1],
......
......@@ -100,16 +100,14 @@ template<
end = borders[iam + 1];
i++;
new (static_cast<void *>(&*out)) value_type(*first);
out++;
*out++ = *first;
for (InputIterator iter = first + begin; iter < first + end; ++iter)
{
if (!binary_pred(*iter, *(iter-1)))
{
i++;
new (static_cast<void *>(&*out)) value_type(*iter);
out++;
*out++ = *iter;
}
}
}
......@@ -153,8 +151,7 @@ template<
if (iter == first || !binary_pred(*iter, *(iter-1)))
{
i++;
new (static_cast<void *>(&*iter_out)) value_type(*iter);
iter_out++;
*iter_out++ = *iter;
}
}
......@@ -170,8 +167,7 @@ template<
{
if (!binary_pred(*iter, *(iter-1)))
{
new (static_cast<void *> (&*iter_out)) value_type(*iter);
iter_out++;
*iter_out++ = *iter;
}
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment