Commit 1acba85b by Johannes Singler Committed by Johannes Singler

algobase.h: Uglify internal identifiers.

2009-09-16  Johannes Singler  <singler@ira.uka.de>

        * include/parallel/algobase.h: Uglify internal identifiers.
        * include/parallel/algo.h: Likewise.
        * include/parallel/algorithmfwd.h: Likewise.
        * include/parallel/balanced_quicksort.h: Likewise.
        * include/parallel/base.h: Likewise.
        * include/parallel/checkers.h: Likewise.
        * include/parallel/compatibility.h: Likewise.
        * include/parallel/compiletime_settings.: Likewise.
        * include/parallel/equally_split.h: Likewise.
        * include/parallel/features.h: Likewise.
        * include/parallel/find.h: Likewise.
        * include/parallel/find_selectors.h: Likewise.
        * include/parallel/for_each.h: Likewise.
        * include/parallel/for_each_selectors.h: Likewise.
        * include/parallel/iterator.h: Likewise.
        * include/parallel/list_partition.h: Likewise.
        * include/parallel/losertree.h: Likewise.
        * include/parallel/merge.h: Likewise.
        * include/parallel/multiseq_selection.h: Likewise.
        * include/parallel/multiway_merge.h: Likewise.
        * include/parallel/multiway_mergesort.h: Likewise.
        * include/parallel/numeric: Likewise.
        * include/parallel/numericfwd.h: Likewise.
        * include/parallel/omp_loop.h: Likewise.
        * include/parallel/omp_loop_static.h: Likewise.
        * include/parallel/par_loop.h: Likewise.
        * include/parallel/partial_sum.h: Likewise.
        * include/parallel/partition.h: Likewise.
        * include/parallel/queue.h: Likewise.
        * include/parallel/quicksort.h: Likewise.
        * include/parallel/random_number.h: Likewise.
        * include/parallel/random_shuffle.h: Likewise.
        * include/parallel/search.h: Likewise.
        * include/parallel/set_operations.h: Likewise.
        * include/parallel/settings.h: Likewise.
        * include/parallel/sort.h: Likewise.
        * include/parallel/tags.h: Likewise.
        * include/parallel/types.h: Likewise.
        * include/parallel/unique_copy.h: Likewise.
        * include/parallel/workstealing.h: Likewise.

From-SVN: r151741
parent 4075e7e8
2009-09-16 Johannes Singler <singler@ira.uka.de>
* include/parallel/algobase.h: Uglify internal identifiers.
* include/parallel/algo.h: Likewise.
* include/parallel/algorithm: Likewise.
* include/parallel/algorithmfwd.h: Likewise.
* include/parallel/balanced_quicksort.h: Likewise.
* include/parallel/base.h: Likewise.
* include/parallel/basic_iterator.h: Likewise.
* include/parallel/checkers.h: Likewise.
* include/parallel/compatibility.h: Likewise.
* include/parallel/compiletime_settings.: Likewise.
* include/parallel/equally_split.h: Likewise.
* include/parallel/features.h: Likewise.
* include/parallel/find.h: Likewise.
* include/parallel/find_selectors.h: Likewise.
* include/parallel/for_each.h: Likewise.
* include/parallel/for_each_selectors.h: Likewise.
* include/parallel/iterator.h: Likewise.
* include/parallel/list_partition.h: Likewise.
* include/parallel/losertree.h: Likewise.
* include/parallel/merge.h: Likewise.
* include/parallel/multiseq_selection.h: Likewise.
* include/parallel/multiway_merge.h: Likewise.
* include/parallel/multiway_mergesort.h: Likewise.
* include/parallel/numeric: Likewise.
* include/parallel/numericfwd.h: Likewise.
* include/parallel/omp_loop.h: Likewise.
* include/parallel/omp_loop_static.h: Likewise.
* include/parallel/parallel.h: Likewise.
* include/parallel/par_loop.h: Likewise.
* include/parallel/partial_sum.h: Likewise.
* include/parallel/partition.h: Likewise.
* include/parallel/queue.h: Likewise.
* include/parallel/quicksort.h: Likewise.
* include/parallel/random_number.h: Likewise.
* include/parallel/random_shuffle.h: Likewise.
* include/parallel/search.h: Likewise.
* include/parallel/set_operations.h: Likewise.
* include/parallel/settings.h: Likewise.
* include/parallel/sort.h: Likewise.
* include/parallel/tags.h: Likewise.
* include/parallel/types.h: Likewise.
* include/parallel/unique_copy.h: Likewise.
* include/parallel/workstealing.h: Likewise.
2009-09-14 Paolo Carlini <paolo.carlini@oracle.com>
PR libstdc++/41037
......
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -39,115 +39,115 @@
namespace __gnu_parallel
{
/**
* @brief Check whether @c [begin, @c end) is sorted according to @c comp.
* @param begin Begin iterator of sequence.
* @param end End iterator of sequence.
* @param comp Comparator.
* @return @c true if sorted, @c false otherwise.
* @brief Check whether @__c [__begin, @__c __end) is sorted according to @__c __comp.
* @param __begin Begin iterator of sequence.
* @param __end End iterator of sequence.
* @param __comp Comparator.
* @return @__c true if sorted, @__c false otherwise.
*/
// XXX Comparator default template argument
template<typename InputIterator, typename Comparator>
// XXX Compare default template argument
template<typename _IIter, typename _Compare>
bool
is_sorted(InputIterator begin, InputIterator end,
Comparator comp
= std::less<typename std::iterator_traits<InputIterator>::
value_type>())
__is_sorted(_IIter __begin, _IIter __end,
_Compare __comp
= std::less<typename std::iterator_traits<_IIter>::
_ValueType>())
{
if (begin == end)
if (__begin == __end)
return true;
InputIterator current(begin), recent(begin);
_IIter __current(__begin), __recent(__begin);
unsigned long long position = 1;
for (current++; current != end; current++)
unsigned long long __position = 1;
for (__current++; __current != __end; __current++)
{
if (comp(*current, *recent))
if (__comp(*__current, *__recent))
{
printf("is_sorted: check failed before position %i.\n",
position);
printf("__is_sorted: check failed before position %__i.\n",
__position);
return false;
}
recent = current;
position++;
__recent = __current;
__position++;
}
return true;
}
/**
* @brief Check whether @c [begin, @c end) is sorted according to @c comp.
* @brief Check whether @__c [__begin, @__c __end) is sorted according to @__c __comp.
* Prints the position in case an unordered pair is found.
* @param begin Begin iterator of sequence.
* @param end End iterator of sequence.
* @param first_failure The first failure is returned in this variable.
* @param comp Comparator.
* @return @c true if sorted, @c false otherwise.
* @param __begin Begin iterator of sequence.
* @param __end End iterator of sequence.
* @param __first_failure The first failure is returned in this variable.
* @param __comp Comparator.
* @return @__c true if sorted, @__c false otherwise.
*/
// XXX Comparator default template argument
template<typename InputIterator, typename Comparator>
// XXX Compare default template argument
template<typename _IIter, typename _Compare>
bool
is_sorted_failure(InputIterator begin, InputIterator end,
InputIterator& first_failure,
Comparator comp
= std::less<typename std::iterator_traits<InputIterator>::
value_type>())
is_sorted_failure(_IIter __begin, _IIter __end,
_IIter& __first_failure,
_Compare __comp
= std::less<typename std::iterator_traits<_IIter>::
_ValueType>())
{
if (begin == end)
if (__begin == __end)
return true;
InputIterator current(begin), recent(begin);
_IIter __current(__begin), __recent(__begin);
unsigned long long position = 1;
for (current++; current != end; current++)
unsigned long long __position = 1;
for (__current++; __current != __end; __current++)
{
if (comp(*current, *recent))
if (__comp(*__current, *__recent))
{
first_failure = current;
printf("is_sorted: check failed before position %lld.\n",
position);
__first_failure = __current;
printf("__is_sorted: check failed before position %lld.\n",
__position);
return false;
}
recent = current;
position++;
__recent = __current;
__position++;
}
first_failure = end;
__first_failure = __end;
return true;
}
/**
* @brief Check whether @c [begin, @c end) is sorted according to @c comp.
* @brief Check whether @__c [__begin, @__c __end) is sorted according to @__c __comp.
* Prints all unordered pair, including the surrounding two elements.
* @param begin Begin iterator of sequence.
* @param end End iterator of sequence.
* @param comp Comparator.
* @return @c true if sorted, @c false otherwise.
* @param __begin Begin iterator of sequence.
* @param __end End iterator of sequence.
* @param __comp Comparator.
* @return @__c true if sorted, @__c false otherwise.
*/
template<typename InputIterator, typename Comparator>
template<typename _IIter, typename _Compare>
bool
// XXX Comparator default template argument
is_sorted_print_failures(InputIterator begin, InputIterator end,
Comparator comp
// XXX Compare default template argument
is_sorted_print_failures(_IIter __begin, _IIter __end,
_Compare __comp
= std::less<typename std::iterator_traits
<InputIterator>::value_type>())
<_IIter>::value_type>())
{
if (begin == end)
if (__begin == __end)
return true;
InputIterator recent(begin);
bool ok = true;
_IIter __recent(__begin);
bool __ok = true;
for (InputIterator pos(begin + 1); pos != end; pos++)
for (_IIter __pos(__begin + 1); __pos != __end; __pos++)
{
if (comp(*pos, *recent))
if (__comp(*__pos, *__recent))
{
printf("%ld: %d %d %d %d\n", pos - begin, *(pos - 2),
*(pos- 1), *pos, *(pos + 1));
ok = false;
printf("%ld: %d %d %d %d\n", __pos - __begin, *(__pos - 2),
*(__pos- 1), *__pos, *(__pos + 1));
__ok = false;
}
recent = pos;
__recent = __pos;
}
return ok;
return __ok;
}
}
......
......@@ -38,15 +38,15 @@
/** @def _GLIBCXX_CALL
* @brief Macro to produce log message when entering a function.
* @param n Input size.
* @param __n Input size.
* @see _GLIBCXX_VERBOSE_LEVEL */
#if (_GLIBCXX_VERBOSE_LEVEL == 0)
#define _GLIBCXX_CALL(n)
#define _GLIBCXX_CALL(__n)
#endif
#if (_GLIBCXX_VERBOSE_LEVEL == 1)
#define _GLIBCXX_CALL(n) \
printf(" %s:\niam = %d, n = %ld, num_threads = %d\n", \
__PRETTY_FUNCTION__, omp_get_thread_num(), (n), get_max_threads());
#define _GLIBCXX_CALL(__n) \
printf(" %__s:\niam = %d, __n = %ld, __num_threads = %d\n", \
__PRETTY_FUNCTION__, omp_get_thread_num(), (__n), __get_max_threads());
#endif
#ifndef _GLIBCXX_SCALE_DOWN_FPU
......@@ -64,12 +64,12 @@
#ifndef _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
/** @brief Switch on many _GLIBCXX_PARALLEL_ASSERTions in parallel code.
* Consider the size of the L1 cache for
* __gnu_parallel::parallel_random_shuffle(). */
* gnu_parallel::__parallel_random_shuffle(). */
#define _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 0
#endif
#ifndef _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
/** @brief Switch on many _GLIBCXX_PARALLEL_ASSERTions in parallel code.
* Consider the size of the TLB for
* __gnu_parallel::parallel_random_shuffle(). */
* gnu_parallel::__parallel_random_shuffle(). */
#define _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB 0
#endif
......@@ -33,54 +33,54 @@
namespace __gnu_parallel
{
/** @brief Function to split a sequence into parts of almost equal size.
/** @brief function to split a sequence into parts of almost equal size.
*
* The resulting sequence s of length num_threads+1 contains the splitting
* positions when splitting the range [0,n) into parts of almost
* The resulting sequence __s of length __num_threads+1 contains the splitting
* positions when splitting the range [0,__n) into parts of almost
* equal size (plus minus 1). The first entry is 0, the last one
* n. There may result empty parts.
* @param n Number of elements
* @param num_threads Number of parts
* @param s Splitters
* @returns End of splitter sequence, i. e. @c s+num_threads+1 */
template<typename difference_type, typename OutputIterator>
OutputIterator
equally_split(difference_type n, thread_index_t num_threads, OutputIterator s)
* n. There may result empty parts.
* @param __n Number of elements
* @param __num_threads Number of parts
* @param __s Splitters
* @returns End of splitter sequence, i.e. @__c __s+__num_threads+1 */
template<typename _DifferenceType, typename _OutputIterator>
_OutputIterator
equally_split(_DifferenceType __n, _ThreadIndex __num_threads, _OutputIterator __s)
{
difference_type chunk_length = n / num_threads;
difference_type num_longer_chunks = n % num_threads;
difference_type pos = 0;
for (thread_index_t i = 0; i < num_threads; ++i)
_DifferenceType __chunk_length = __n / __num_threads;
_DifferenceType __num_longer_chunks = __n % __num_threads;
_DifferenceType __pos = 0;
for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
{
*s++ = pos;
pos += (i < num_longer_chunks) ? (chunk_length + 1) : chunk_length;
*__s++ = __pos;
__pos += (__i < __num_longer_chunks) ? (__chunk_length + 1) : __chunk_length;
}
*s++ = n;
return s;
*__s++ = __n;
return __s;
}
/** @brief Function to split a sequence into parts of almost equal size.
/** @brief function to split a sequence into parts of almost equal size.
*
* Returns the position of the splitting point between
* thread number thread_no (included) and
* thread number thread_no+1 (excluded).
* @param n Number of elements
* @param num_threads Number of parts
* @returns _SplittingAlgorithm point */
template<typename difference_type>
difference_type
equally_split_point(difference_type n,
thread_index_t num_threads,
thread_index_t thread_no)
* thread number __thread_no (included) and
* thread number __thread_no+1 (excluded).
* @param __n Number of elements
* @param __num_threads Number of parts
* @returns splitting point */
template<typename _DifferenceType>
_DifferenceType
equally_split_point(_DifferenceType __n,
_ThreadIndex __num_threads,
_ThreadIndex __thread_no)
{
difference_type chunk_length = n / num_threads;
difference_type num_longer_chunks = n % num_threads;
if (thread_no < num_longer_chunks)
return thread_no * (chunk_length + 1);
_DifferenceType __chunk_length = __n / __num_threads;
_DifferenceType __num_longer_chunks = __n % __num_threads;
if (__thread_no < __num_longer_chunks)
return __thread_no * (__chunk_length + 1);
else
return num_longer_chunks * (chunk_length + 1)
+ (thread_no - num_longer_chunks) * chunk_length;
return __num_longer_chunks * (__chunk_length + 1)
+ (__thread_no - __num_longer_chunks) * __chunk_length;
}
}
......
......@@ -78,7 +78,7 @@
#ifndef _GLIBCXX_TREE_INITIAL_SPLITTING
/** @def _GLIBCXX_TREE_INITIAL_SPLITTING
* @brief Include the initial splitting variant for
* _Rb_tree::insert_unique(InputIterator beg, InputIterator end).
* _Rb_tree::insert_unique(_IIter beg, _IIter __end).
* @see __gnu_parallel::_Rb_tree */
#define _GLIBCXX_TREE_INITIAL_SPLITTING 1
#endif
......@@ -86,7 +86,7 @@
#ifndef _GLIBCXX_TREE_DYNAMIC_BALANCING
/** @def _GLIBCXX_TREE_DYNAMIC_BALANCING
* @brief Include the dynamic balancing variant for
* _Rb_tree::insert_unique(InputIterator beg, InputIterator end).
* _Rb_tree::insert_unique(_IIter beg, _IIter __end).
* @see __gnu_parallel::_Rb_tree */
#define _GLIBCXX_TREE_DYNAMIC_BALANCING 1
#endif
......@@ -94,7 +94,7 @@
#ifndef _GLIBCXX_TREE_FULL_COPY
/** @def _GLIBCXX_TREE_FULL_COPY
* @brief In order to sort the input sequence of
* _Rb_tree::insert_unique(InputIterator beg, InputIterator end) a
* _Rb_tree::insert_unique(_IIter beg, _IIter __end) a
* full copy of the input elements is done.
* @see __gnu_parallel::_Rb_tree */
#define _GLIBCXX_TREE_FULL_COPY 1
......
......@@ -42,55 +42,55 @@
namespace __gnu_parallel
{
/** @brief Chose the desired algorithm by evaluating @c parallelism_tag.
* @param begin Begin iterator of input sequence.
* @param end End iterator of input sequence.
* @param user_op A user-specified functor (comparator, predicate,
/** @brief Chose the desired algorithm by evaluating @__c __parallelism_tag.
* @param __begin Begin iterator of input sequence.
* @param __end End iterator of input sequence.
* @param __user_op A user-specified functor (comparator, predicate,
* associative operator,...)
* @param functionality functor to "process" an element with
* user_op (depends on desired functionality, e. g. accumulate,
* @param __functionality functor to "process" an element with
* __user_op (depends on desired functionality, e. g. accumulate,
* for_each,...
* @param reduction Reduction functor.
* @param reduction_start Initial value for reduction.
* @param output Output iterator.
* @param bound Maximum number of elements processed.
* @param parallelism_tag Parallelization method */
template<typename InputIterator, typename UserOp,
typename Functionality, typename Red, typename Result>
UserOp
for_each_template_random_access(InputIterator begin, InputIterator end,
UserOp user_op,
Functionality& functionality,
Red reduction, Result reduction_start,
Result& output, typename
std::iterator_traits<InputIterator>::
difference_type bound,
_Parallelism parallelism_tag)
* @param __reduction Reduction functor.
* @param __reduction_start Initial value for reduction.
* @param __output Output iterator.
* @param __bound Maximum number of elements processed.
* @param __parallelism_tag Parallelization method */
template<typename _IIter, typename _UserOp,
typename _Functionality, typename _Red, typename _Result>
_UserOp
__for_each_template_random_access(_IIter __begin, _IIter __end,
_UserOp __user_op,
_Functionality& __functionality,
_Red __reduction, _Result __reduction_start,
_Result& __output, typename
std::iterator_traits<_IIter>::
difference_type __bound,
_Parallelism __parallelism_tag)
{
if (parallelism_tag == parallel_unbalanced)
return for_each_template_random_access_ed(begin, end, user_op,
functionality, reduction,
reduction_start,
output, bound);
else if (parallelism_tag == parallel_omp_loop)
return for_each_template_random_access_omp_loop(begin, end, user_op,
functionality,
reduction,
reduction_start,
output, bound);
else if (parallelism_tag == parallel_omp_loop_static)
return for_each_template_random_access_omp_loop(begin, end, user_op,
functionality,
reduction,
reduction_start,
output, bound);
if (__parallelism_tag == parallel_unbalanced)
return for_each_template_random_access_ed(__begin, __end, __user_op,
__functionality, __reduction,
__reduction_start,
__output, __bound);
else if (__parallelism_tag == parallel_omp_loop)
return for_each_template_random_access_omp_loop(__begin, __end, __user_op,
__functionality,
__reduction,
__reduction_start,
__output, __bound);
else if (__parallelism_tag == parallel_omp_loop_static)
return for_each_template_random_access_omp_loop(__begin, __end, __user_op,
__functionality,
__reduction,
__reduction_start,
__output, __bound);
else //e. g. parallel_balanced
return for_each_template_random_access_workstealing(begin, end,
user_op,
functionality,
reduction,
reduction_start,
output, bound);
return for_each_template_random_access_workstealing(__begin, __end,
__user_op,
__functionality,
__reduction,
__reduction_start,
__output, __bound);
}
}
......
......@@ -40,160 +40,160 @@ namespace __gnu_parallel
/** @brief A pair of iterators. The usual iterator operations are
* applied to both child iterators.
*/
template<typename Iterator1, typename Iterator2, typename IteratorCategory>
class iterator_pair : public std::pair<Iterator1, Iterator2>
template<typename _Iterator1, typename _Iterator2, typename _IteratorCategory>
class _IteratorPair : public std::pair<_Iterator1, _Iterator2>
{
private:
typedef iterator_pair<Iterator1, Iterator2, IteratorCategory> type;
typedef std::pair<Iterator1, Iterator2> base_type;
typedef _IteratorPair<_Iterator1, _Iterator2, _IteratorCategory> _Self;
typedef std::pair<_Iterator1, _Iterator2> _Base;
public:
typedef IteratorCategory iterator_category;
typedef _IteratorCategory iterator_category;
typedef void value_type;
typedef std::iterator_traits<Iterator1> traits_type;
typedef typename traits_type::difference_type difference_type;
typedef type* pointer;
typedef type& reference;
typedef std::iterator_traits<_Iterator1> _TraitsType;
typedef typename _TraitsType::difference_type difference_type;
typedef _Self* pointer;
typedef _Self& reference;
iterator_pair() { }
_IteratorPair() { }
iterator_pair(const Iterator1& first, const Iterator2& second)
: base_type(first, second) { }
_IteratorPair(const _Iterator1& __first, const _Iterator2& __second)
: _Base(__first, __second) { }
// Pre-increment operator.
type&
_Self&
operator++()
{
++base_type::first;
++base_type::second;
++_Base::first;
++_Base::second;
return *this;
}
// Post-increment operator.
const type
const _Self
operator++(int)
{ return type(base_type::first++, base_type::second++); }
{ return _Self(_Base::first++, _Base::second++); }
// Pre-decrement operator.
type&
_Self&
operator--()
{
--base_type::first;
--base_type::second;
--_Base::first;
--_Base::second;
return *this;
}
// Post-decrement operator.
const type
const _Self
operator--(int)
{ return type(base_type::first--, base_type::second--); }
{ return _Self(_Base::first--, _Base::second--); }
// Type conversion.
operator Iterator2() const
{ return base_type::second; }
operator _Iterator2() const
{ return _Base::second; }
type&
operator=(const type& other)
_Self&
operator=(const _Self& __other)
{
base_type::first = other.first;
base_type::second = other.second;
_Base::first = __other.first;
_Base::second = __other.second;
return *this;
}
type
operator+(difference_type delta) const
{ return type(base_type::first + delta, base_type::second + delta); }
_Self
operator+(difference_type __delta) const
{ return _Self(_Base::first + __delta, _Base::second + __delta); }
difference_type
operator-(const type& other) const
{ return base_type::first - other.first; }
operator-(const _Self& __other) const
{ return _Base::first - __other.first; }
};
/** @brief A triple of iterators. The usual iterator operations are
applied to all three child iterators.
*/
template<typename Iterator1, typename Iterator2, typename Iterator3,
typename IteratorCategory>
class iterator_triple
template<typename _Iterator1, typename _Iterator2, typename _Iterator3,
typename _IteratorCategory>
class _IteratorTriple
{
private:
typedef iterator_triple<Iterator1, Iterator2, Iterator3,
IteratorCategory> type;
typedef _IteratorTriple<_Iterator1, _Iterator2, _Iterator3,
_IteratorCategory> _Self;
public:
typedef IteratorCategory iterator_category;
typedef _IteratorCategory iterator_category;
typedef void value_type;
typedef typename std::iterator_traits<Iterator1>::difference_type
typedef typename std::iterator_traits<_Iterator1>::difference_type
difference_type;
typedef type* pointer;
typedef type& reference;
typedef _Self* pointer;
typedef _Self& reference;
Iterator1 first;
Iterator2 second;
Iterator3 third;
_Iterator1 __first;
_Iterator2 __second;
_Iterator3 __third;
iterator_triple() { }
_IteratorTriple() { }
iterator_triple(const Iterator1& _first, const Iterator2& _second,
const Iterator3& _third)
_IteratorTriple(const _Iterator1& _first, const _Iterator2& _second,
const _Iterator3& _third)
{
first = _first;
second = _second;
third = _third;
__first = _first;
__second = _second;
__third = _third;
}
// Pre-increment operator.
type&
_Self&
operator++()
{
++first;
++second;
++third;
++__first;
++__second;
++__third;
return *this;
}
// Post-increment operator.
const type
const _Self
operator++(int)
{ return type(first++, second++, third++); }
{ return _Self(__first++, __second++, __third++); }
// Pre-decrement operator.
type&
_Self&
operator--()
{
--first;
--second;
--third;
--__first;
--__second;
--__third;
return *this;
}
// Post-decrement operator.
const type
const _Self
operator--(int)
{ return type(first--, second--, third--); }
{ return _Self(__first--, __second--, __third--); }
// Type conversion.
operator Iterator3() const
{ return third; }
operator _Iterator3() const
{ return __third; }
type&
operator=(const type& other)
_Self&
operator=(const _Self& __other)
{
first = other.first;
second = other.second;
third = other.third;
__first = __other.__first;
__second = __other.__second;
__third = __other.__third;
return *this;
}
type
operator+(difference_type delta) const
{ return type(first + delta, second + delta, third + delta); }
_Self
operator+(difference_type __delta) const
{ return _Self(__first + __delta, __second + __delta, __third + __delta); }
difference_type
operator-(const type& other) const
{ return first - other.first; }
operator-(const _Self& __other) const
{ return __first - __other.__first; }
};
}
......
......@@ -3,12 +3,12 @@
// Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the terms
// software; you can redistribute __it and/or modify __it under the terms
// of the GNU General Public License as published by the Free Software
// Foundation; either version 3, or (at your option) any later
// version.
// This library is distributed in the hope that it will be useful, but
// This library is distributed in the hope that __it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// General Public License for more details.
......@@ -23,7 +23,7 @@
// <http://www.gnu.org/licenses/>.
/** @file parallel/list_partition.h
* @brief Functionality to split sequence referenced by only input
* @brief _Functionality to split __sequence referenced by only input
* iterators.
* This file is a GNU parallel extension to the Standard C++ Library.
*/
......@@ -39,137 +39,137 @@
namespace __gnu_parallel
{
/** @brief Shrinks and doubles the ranges.
* @param os_starts Start positions worked on (oversampled).
* @param count_to_two Counts up to 2.
* @param range_length Current length of a chunk.
* @param make_twice Whether the @c os_starts is allowed to be
* @param __os_starts Start positions worked on (oversampled).
* @param __count_to_two Counts up to 2.
* @param __range_length Current length of a chunk.
* @param __make_twice Whether the @__c __os_starts is allowed to be
* grown or not
*/
template<typename InputIterator>
template<typename _IIter>
void
shrink_and_double(std::vector<InputIterator>& os_starts,
size_t& count_to_two, size_t& range_length,
const bool make_twice)
__shrink_and_double(std::vector<_IIter>& __os_starts,
size_t& __count_to_two, size_t& __range_length,
const bool __make_twice)
{
++count_to_two;
if (not make_twice or count_to_two < 2)
shrink(os_starts, count_to_two, range_length);
++__count_to_two;
if (not __make_twice or __count_to_two < 2)
__shrink(__os_starts, __count_to_two, __range_length);
else
{
os_starts.resize((os_starts.size() - 1) * 2 + 1);
count_to_two = 0;
__os_starts.resize((__os_starts.size() - 1) * 2 + 1);
__count_to_two = 0;
}
}
/** @brief Combines two ranges into one and thus halves the number of ranges.
* @param os_starts Start positions worked on (oversampled).
* @param count_to_two Counts up to 2.
* @param range_length Current length of a chunk. */
template<typename InputIterator>
* @param __os_starts Start positions worked on (oversampled).
* @param __count_to_two Counts up to 2.
* @param __range_length Current length of a chunk. */
template<typename _IIter>
void
shrink(std::vector<InputIterator>& os_starts, size_t& count_to_two,
size_t& range_length)
__shrink(std::vector<_IIter>& __os_starts, size_t& __count_to_two,
size_t& __range_length)
{
for (typename std::vector<InputIterator>::size_type i = 0;
i <= (os_starts.size() / 2); ++i)
os_starts[i] = os_starts[i * 2];
range_length *= 2;
for (typename std::vector<_IIter>::size_type __i = 0;
__i <= (__os_starts.size() / 2); ++__i)
__os_starts[__i] = __os_starts[__i * 2];
__range_length *= 2;
}
/** @brief Splits a sequence given by input iterators into parts of
* almost equal size
*
* The function needs only one pass over the sequence.
* @param begin Begin iterator of input sequence.
* @param end End iterator of input sequence.
* @param starts Start iterators for the resulting parts, dimension
* @c num_parts+1. For convenience, @c starts @c [num_parts]
* @param __begin Begin iterator of input sequence.
* @param __end End iterator of input sequence.
* @param __starts Start iterators for the resulting parts, dimension
* @__c __num_parts+1. For convenience, @__c __starts @__c [__num_parts]
* contains the end iterator of the sequence.
* @param lengths Length of the resulting parts.
* @param num_parts Number of parts to split the sequence into.
* @param f Functor to be applied to each element by traversing it
* @param oversampling Oversampling factor. If 0, then the
* partitions will differ in at most @f$ \sqrt{\mathrm{end} -
* \mathrm{begin}} @f$ elements. Otherwise, the ratio between the
* longest and the shortest part is bounded by @f$
* 1/(\mathrm{oversampling} \cdot \mathrm{num\_parts}) @f$.
* @param __lengths Length of the resulting parts.
* @param __num_parts Number of parts to split the sequence into.
* @param __f Functor to be applied to each element by traversing __it
* @param __oversampling Oversampling factor. If 0, then the
* partitions will differ in at most @__f$ \sqrt{\mathrm{__end} -
* \mathrm{__begin}} @__f$ __elements. Otherwise, the ratio between the
* longest and the shortest part is bounded by @__f$
* 1/(\mathrm{__oversampling} \cdot \mathrm{num\_parts}) @__f$.
* @return Length of the whole sequence.
*/
template<typename InputIterator, typename FunctorType>
template<typename _IIter, typename _FunctorType>
size_t
list_partition(const InputIterator begin, const InputIterator end,
InputIterator* starts, size_t* lengths, const int num_parts,
FunctorType& f, int oversampling = 0)
list_partition(const _IIter __begin, const _IIter __end,
_IIter* __starts, size_t* __lengths, const int __num_parts,
_FunctorType& __f, int __oversampling = 0)
{
bool make_twice = false;
bool __make_twice = false;
// The resizing algorithm is chosen according to the oversampling factor.
if (oversampling == 0)
if (__oversampling == 0)
{
make_twice = true;
oversampling = 1;
__make_twice = true;
__oversampling = 1;
}
std::vector<InputIterator> os_starts(2 * oversampling * num_parts + 1);
std::vector<_IIter> __os_starts(2 * __oversampling * __num_parts + 1);
os_starts[0]= begin;
InputIterator prev = begin, it = begin;
size_t dist_limit = 0, dist = 0;
size_t cur = 1, next = 1;
size_t range_length = 1;
size_t count_to_two = 0;
while (it != end)
__os_starts[0]= __begin;
_IIter __prev = __begin, __it = __begin;
size_t __dist_limit = 0, __dist = 0;
size_t __cur = 1, __next = 1;
size_t __range_length = 1;
size_t __count_to_two = 0;
while (__it != __end)
{
cur = next;
for (; cur < os_starts.size() and it != end; ++cur)
__cur = __next;
for (; __cur < __os_starts.size() and __it != __end; ++__cur)
{
for (dist_limit += range_length;
dist < dist_limit and it != end; ++dist)
for (__dist_limit += __range_length;
__dist < __dist_limit and __it != __end; ++__dist)
{
f(it);
++it;
__f(__it);
++__it;
}
os_starts[cur] = it;
__os_starts[__cur] = __it;
}
// Must compare for end and not cur < os_starts.size() , because
// cur could be == os_starts.size() as well
if (it == end)
// Must compare for end and not __cur < __os_starts.size() , because
// __cur could be == __os_starts.size() as well
if (__it == __end)
break;
shrink_and_double(os_starts, count_to_two, range_length, make_twice);
next = os_starts.size() / 2 + 1;
__shrink_and_double(__os_starts, __count_to_two, __range_length, __make_twice);
__next = __os_starts.size() / 2 + 1;
}
// Calculation of the parts (one must be extracted from current
// because the partition beginning at end, consists only of
// Calculation of the parts (one must be extracted from __current
// because the partition beginning at __end, consists only of
// itself).
size_t size_part = (cur - 1) / num_parts;
int size_greater = static_cast<int>((cur - 1) % num_parts);
starts[0] = os_starts[0];
size_t __size_part = (__cur - 1) / __num_parts;
int __size_greater = static_cast<int>((__cur - 1) % __num_parts);
__starts[0] = __os_starts[0];
size_t index = 0;
size_t __index = 0;
// Smallest partitions.
for (int i = 1; i < (num_parts + 1 - size_greater); ++i)
for (int __i = 1; __i < (__num_parts + 1 - __size_greater); ++__i)
{
lengths[i - 1] = size_part * range_length;
index += size_part;
starts[i] = os_starts[index];
__lengths[__i - 1] = __size_part * __range_length;
__index += __size_part;
__starts[__i] = __os_starts[__index];
}
// Biggest partitions.
for (int i = num_parts + 1 - size_greater; i <= num_parts; ++i)
for (int __i = __num_parts + 1 - __size_greater; __i <= __num_parts; ++__i)
{
lengths[i - 1] = (size_part+1) * range_length;
index += (size_part+1);
starts[i] = os_starts[index];
__lengths[__i - 1] = (__size_part+1) * __range_length;
__index += (__size_part+1);
__starts[__i] = __os_starts[__index];
}
// Correction of the end size (the end iteration has not finished).
lengths[num_parts - 1] -= (dist_limit - dist);
__lengths[__num_parts - 1] -= (__dist_limit - __dist);
return dist;
return __dist;
}
}
......
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -52,7 +52,7 @@ namespace __parallel
template<typename _IIter, typename _Tp, typename _Tag>
_Tp
accumulate_switch(_IIter, _IIter, _Tp, _Tag);
__accumulate_switch(_IIter, _IIter, _Tp, _Tag);
template<typename _IIter, typename _Tp, typename _BinaryOper>
_Tp
......@@ -71,13 +71,13 @@ namespace __parallel
template<typename _IIter, typename _Tp, typename _BinaryOper,
typename _Tag>
_Tp
accumulate_switch(_IIter, _IIter, _Tp, _BinaryOper, _Tag);
__accumulate_switch(_IIter, _IIter, _Tp, _BinaryOper, _Tag);
template<typename _RAIter, typename _Tp, typename _BinaryOper>
_Tp
accumulate_switch(_RAIter, _RAIter, _Tp, _BinaryOper,
__accumulate_switch(_RAIter, _RAIter, _Tp, _BinaryOper,
random_access_iterator_tag,
__gnu_parallel::_Parallelism parallelism
__gnu_parallel::_Parallelism __parallelism
= __gnu_parallel::parallel_unbalanced);
template<typename _IIter, typename _OIter>
......@@ -111,15 +111,15 @@ namespace __parallel
template<typename _IIter, typename _OIter, typename _BinaryOper,
typename _Tag1, typename _Tag2>
_OIter
adjacent_difference_switch(_IIter, _IIter, _OIter, _BinaryOper,
__adjacent_difference_switch(_IIter, _IIter, _OIter, _BinaryOper,
_Tag1, _Tag2);
template<typename _IIter, typename _OIter, typename _BinaryOper>
_OIter
adjacent_difference_switch(_IIter, _IIter, _OIter, _BinaryOper,
__adjacent_difference_switch(_IIter, _IIter, _OIter, _BinaryOper,
random_access_iterator_tag,
random_access_iterator_tag,
__gnu_parallel::_Parallelism parallelism
__gnu_parallel::_Parallelism __parallelism
= __gnu_parallel::parallel_unbalanced);
template<typename _IIter1, typename _IIter2, typename _Tp>
......@@ -157,7 +157,7 @@ namespace __parallel
template<typename _RAIter1, typename _RAIter2, typename _Tp,
typename BinaryFunction1, typename BinaryFunction2>
_Tp
inner_product_switch(_RAIter1, _RAIter1, _RAIter2, _Tp, BinaryFunction1,
__inner_product_switch(_RAIter1, _RAIter1, _RAIter2, _Tp, BinaryFunction1,
BinaryFunction2, random_access_iterator_tag,
random_access_iterator_tag,
__gnu_parallel::_Parallelism
......@@ -167,7 +167,7 @@ namespace __parallel
typename _BinaryFunction1, typename _BinaryFunction2,
typename _Tag1, typename _Tag2>
_Tp
inner_product_switch(_IIter1, _IIter1, _IIter2, _Tp, _BinaryFunction1,
__inner_product_switch(_IIter1, _IIter1, _IIter2, _Tp, _BinaryFunction1,
_BinaryFunction2, _Tag1, _Tag2);
......@@ -182,7 +182,7 @@ namespace __parallel
template<typename _IIter, typename _OIter>
_OIter
partial_sum(_IIter, _IIter, _OIter result);
partial_sum(_IIter, _IIter, _OIter __result);
template<typename _IIter, typename _OIter, typename _BinaryOper>
_OIter
......@@ -191,11 +191,11 @@ namespace __parallel
template<typename _IIter, typename _OIter, typename _BinaryOper,
typename _Tag1, typename _Tag2>
_OIter
partial_sum_switch(_IIter, _IIter, _OIter, _BinaryOper, _Tag1, _Tag2);
__partial_sum_switch(_IIter, _IIter, _OIter, _BinaryOper, _Tag1, _Tag2);
template<typename _IIter, typename _OIter, typename _BinaryOper>
_OIter
partial_sum_switch(_IIter, _IIter, _OIter, _BinaryOper,
__partial_sum_switch(_IIter, _IIter, _OIter, _BinaryOper,
random_access_iterator_tag, random_access_iterator_tag);
} // end namespace
} // end namespace
......
......@@ -44,73 +44,73 @@ namespace __gnu_parallel
/** @brief Embarrassingly parallel algorithm for random access
* iterators, using an OpenMP for loop.
*
* @param begin Begin iterator of element sequence.
* @param end End iterator of element sequence.
* @param o User-supplied functor (comparator, predicate, adding
* @param __begin Begin iterator of element __sequence.
* @param __end End iterator of element __sequence.
* @param __o User-supplied functor (comparator, predicate, adding
* functor, etc.).
* @param f Functor to "process" an element with op (depends on
* @param __f Functor to "process" an element with __op (depends on
* desired functionality, e. g. for std::for_each(), ...).
* @param r Functor to "add" a single result to the already
* processed elements (depends on functionality).
* @param base Base value for reduction.
* @param output Pointer to position where final result is written to
* @param bound Maximum number of elements processed (e. g. for
* @param __r Functor to "add" a single __result to the already
* processed __elements (depends on functionality).
* @param __base Base value for reduction.
* @param __output Pointer to position where final result is written to
* @param __bound Maximum number of elements processed (e. g. for
* std::count_n()).
* @return User-supplied functor (that may contain a part of the result).
*/
template<typename RandomAccessIterator,
typename Op,
typename Fu,
typename Red,
typename Result>
Op
for_each_template_random_access_omp_loop(RandomAccessIterator begin,
RandomAccessIterator end,
Op o, Fu& f, Red r, Result base,
Result& output,
template<typename _RAIter,
typename _Op,
typename _Fu,
typename _Red,
typename _Result>
_Op
for_each_template_random_access_omp_loop(_RAIter __begin,
_RAIter __end,
_Op __o, _Fu& __f, _Red __r, _Result __base,
_Result& __output,
typename std::iterator_traits
<RandomAccessIterator>::
difference_type bound)
<_RAIter>::
difference_type __bound)
{
typedef typename
std::iterator_traits<RandomAccessIterator>::difference_type
difference_type;
std::iterator_traits<_RAIter>::difference_type
_DifferenceType;
difference_type length = end - begin;
thread_index_t num_threads =
__gnu_parallel::min<difference_type>(get_max_threads(), length);
_DifferenceType __length = __end - __begin;
_ThreadIndex __num_threads =
__gnu_parallel::min<_DifferenceType>(__get_max_threads(), __length);
Result *thread_results;
_Result *__thread_results;
# pragma omp parallel num_threads(num_threads)
# pragma omp parallel num_threads(__num_threads)
{
# pragma omp single
{
num_threads = omp_get_num_threads();
thread_results = new Result[num_threads];
__num_threads = omp_get_num_threads();
__thread_results = new _Result[__num_threads];
for (thread_index_t i = 0; i < num_threads; ++i)
thread_results[i] = Result();
for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
__thread_results[__i] = _Result();
}
thread_index_t iam = omp_get_thread_num();
_ThreadIndex __iam = omp_get_thread_num();
# pragma omp for schedule(dynamic, _Settings::get().workstealing_chunk_size)
for (difference_type pos = 0; pos < length; ++pos)
thread_results[iam] =
r(thread_results[iam], f(o, begin+pos));
for (_DifferenceType __pos = 0; __pos < __length; ++__pos)
__thread_results[__iam] =
__r(__thread_results[__iam], __f(__o, __begin+__pos));
} //parallel
for (thread_index_t i = 0; i < num_threads; ++i)
output = r(output, thread_results[i]);
for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
__output = __r(__output, __thread_results[__i]);
delete [] thread_results;
delete [] __thread_results;
// Points to last element processed (needed as return value for
// some algorithms like transform).
f.finish_iterator = begin + length;
__f.finish_iterator = __begin + __length;
return o;
return __o;
}
} // end namespace
......
......@@ -44,72 +44,72 @@ namespace __gnu_parallel
/** @brief Embarrassingly parallel algorithm for random access
* iterators, using an OpenMP for loop with static scheduling.
*
* @param begin Begin iterator of element sequence.
* @param end End iterator of element sequence.
* @param o User-supplied functor (comparator, predicate, adding
* @param __begin Begin iterator of element __sequence.
* @param __end End iterator of element __sequence.
* @param __o User-supplied functor (comparator, predicate, adding
* functor, ...).
* @param f Functor to "process" an element with op (depends on
* @param __f Functor to "process" an element with __op (depends on
* desired functionality, e. g. for std::for_each(), ...).
* @param r Functor to "add" a single result to the already processed
* elements (depends on functionality).
* @param base Base value for reduction.
* @param output Pointer to position where final result is written to
* @param bound Maximum number of elements processed (e. g. for
* @param __r Functor to "add" a single __result to the already processed
* __elements (depends on functionality).
* @param __base Base value for reduction.
* @param __output Pointer to position where final result is written to
* @param __bound Maximum number of elements processed (e. g. for
* std::count_n()).
* @return User-supplied functor (that may contain a part of the result).
*/
template<typename RandomAccessIterator,
typename Op,
typename Fu,
typename Red,
typename Result>
Op
for_each_template_random_access_omp_loop_static(RandomAccessIterator begin,
RandomAccessIterator end,
Op o, Fu& f, Red r,
Result base, Result& output,
template<typename _RAIter,
typename _Op,
typename _Fu,
typename _Red,
typename _Result>
_Op
for_each_template_random_access_omp_loop_static(_RAIter __begin,
_RAIter __end,
_Op __o, _Fu& __f, _Red __r,
_Result __base, _Result& __output,
typename std::iterator_traits
<RandomAccessIterator>::
difference_type bound)
<_RAIter>::
difference_type __bound)
{
typedef typename
std::iterator_traits<RandomAccessIterator>::difference_type
difference_type;
std::iterator_traits<_RAIter>::difference_type
_DifferenceType;
difference_type length = end - begin;
thread_index_t num_threads =
std::min<difference_type>(get_max_threads(), length);
_DifferenceType __length = __end - __begin;
_ThreadIndex __num_threads =
std::min<_DifferenceType>(__get_max_threads(), __length);
Result *thread_results;
_Result *__thread_results;
# pragma omp parallel num_threads(num_threads)
# pragma omp parallel num_threads(__num_threads)
{
# pragma omp single
{
num_threads = omp_get_num_threads();
thread_results = new Result[num_threads];
__num_threads = omp_get_num_threads();
__thread_results = new _Result[__num_threads];
for (thread_index_t i = 0; i < num_threads; ++i)
thread_results[i] = Result();
for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
__thread_results[__i] = _Result();
}
thread_index_t iam = omp_get_thread_num();
_ThreadIndex __iam = omp_get_thread_num();
# pragma omp for schedule(static, _Settings::get().workstealing_chunk_size)
for (difference_type pos = 0; pos < length; ++pos)
thread_results[iam] = r(thread_results[iam], f(o, begin+pos));
for (_DifferenceType __pos = 0; __pos < __length; ++__pos)
__thread_results[__iam] = __r(__thread_results[__iam], __f(__o, __begin+__pos));
} //parallel
for (thread_index_t i = 0; i < num_threads; ++i)
output = r(output, thread_results[i]);
for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
__output = __r(__output, __thread_results[__i]);
delete [] thread_results;
delete [] __thread_results;
// Points to last element processed (needed as return value for
// some algorithms like transform).
f.finish_iterator = begin + length;
__f.finish_iterator = __begin + __length;
return o;
return __o;
}
} // end namespace
......
......@@ -45,89 +45,89 @@ namespace __gnu_parallel
* iterators, using hand-crafted parallelization by equal splitting
* the work.
*
* @param begin Begin iterator of element sequence.
* @param end End iterator of element sequence.
* @param o User-supplied functor (comparator, predicate, adding
* @param __begin Begin iterator of element __sequence.
* @param __end End iterator of element __sequence.
* @param __o User-supplied functor (comparator, predicate, adding
* functor, ...)
* @param f Functor to "process" an element with op (depends on
* @param __f Functor to "process" an element with __op (depends on
* desired functionality, e. g. for std::for_each(), ...).
* @param r Functor to "add" a single result to the already
* processed elements (depends on functionality).
* @param base Base value for reduction.
* @param output Pointer to position where final result is written to
* @param bound Maximum number of elements processed (e. g. for
* @param __r Functor to "add" a single __result to the already
* processed __elements (depends on functionality).
* @param __base Base value for reduction.
* @param __output Pointer to position where final result is written to
* @param __bound Maximum number of elements processed (e. g. for
* std::count_n()).
* @return User-supplied functor (that may contain a part of the result).
*/
template<typename RandomAccessIterator,
typename Op,
typename Fu,
typename Red,
typename Result>
Op
for_each_template_random_access_ed(RandomAccessIterator begin,
RandomAccessIterator end,
Op o, Fu& f, Red r, Result base,
Result& output,
template<typename _RAIter,
typename _Op,
typename _Fu,
typename _Red,
typename _Result>
_Op
for_each_template_random_access_ed(_RAIter __begin,
_RAIter __end,
_Op __o, _Fu& __f, _Red __r, _Result __base,
_Result& __output,
typename std::iterator_traits
<RandomAccessIterator>::
difference_type bound)
<_RAIter>::
difference_type __bound)
{
typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::difference_type difference_type;
const difference_type length = end - begin;
Result *thread_results;
bool* constructed;
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::difference_type _DifferenceType;
const _DifferenceType __length = __end - __begin;
_Result *__thread_results;
bool* __constructed;
thread_index_t num_threads =
__gnu_parallel::min<difference_type>(get_max_threads(), length);
_ThreadIndex __num_threads =
__gnu_parallel::min<_DifferenceType>(__get_max_threads(), __length);
# pragma omp parallel num_threads(num_threads)
# pragma omp parallel num_threads(__num_threads)
{
# pragma omp single
{
num_threads = omp_get_num_threads();
thread_results = static_cast<Result*>(
::operator new(num_threads * sizeof(Result)));
constructed = new bool[num_threads];
__num_threads = omp_get_num_threads();
__thread_results = static_cast<_Result*>(
::operator new(__num_threads * sizeof(_Result)));
__constructed = new bool[__num_threads];
}
thread_index_t iam = omp_get_thread_num();
_ThreadIndex __iam = omp_get_thread_num();
// Neutral element.
Result* reduct = static_cast<Result*>(::operator new(sizeof(Result)));
_Result* __reduct = static_cast<_Result*>(::operator new(sizeof(_Result)));
difference_type
start = equally_split_point(length, num_threads, iam),
stop = equally_split_point(length, num_threads, iam + 1);
_DifferenceType
__start = equally_split_point(__length, __num_threads, __iam),
__stop = equally_split_point(__length, __num_threads, __iam + 1);
if (start < stop)
if (__start < __stop)
{
new(reduct) Result(f(o, begin + start));
++start;
constructed[iam] = true;
new(__reduct) _Result(__f(__o, __begin + __start));
++__start;
__constructed[__iam] = true;
}
else
constructed[iam] = false;
__constructed[__iam] = false;
for (; start < stop; ++start)
*reduct = r(*reduct, f(o, begin + start));
for (; __start < __stop; ++__start)
*__reduct = __r(*__reduct, __f(__o, __begin + __start));
thread_results[iam] = *reduct;
__thread_results[__iam] = *__reduct;
} //parallel
for (thread_index_t i = 0; i < num_threads; ++i)
if (constructed[i])
output = r(output, thread_results[i]);
for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
if (__constructed[__i])
__output = __r(__output, __thread_results[__i]);
// Points to last element processed (needed as return value for
// some algorithms like transform).
f.finish_iterator = begin + length;
__f.finish_iterator = __begin + __length;
delete[] thread_results;
delete[] constructed;
delete[] __thread_results;
delete[] __constructed;
return o;
return __o;
}
} // end namespace
......
......@@ -45,99 +45,99 @@ namespace __gnu_parallel
* atomic access. push_front() and pop_front() must not be called
* concurrently to each other, while pop_back() can be called
* concurrently at all times.
* @c empty(), @c size(), and @c top() are intentionally not provided.
* @__c empty(), @__c size(), and @__c top() are intentionally not provided.
* Calling them would not make sense in a concurrent setting.
* @param T Contained element type. */
template<typename T>
class RestrictedBoundedConcurrentQueue
* @param _Tp Contained element type. */
template<typename _Tp>
class _RestrictedBoundedConcurrentQueue
{
private:
/** @brief Array of elements, seen as cyclic buffer. */
T* base;
_Tp* _M_base;
/** @brief Maximal number of elements contained at the same time. */
sequence_index_t max_size;
_SequenceIndex _M_max_size;
/** @brief Cyclic begin and end pointers contained in one
/** @brief Cyclic __begin and __end pointers contained in one
atomically changeable value. */
_GLIBCXX_VOLATILE lcas_t borders;
_GLIBCXX_VOLATILE _CASable _M_borders;
public:
/** @brief Constructor. Not to be called concurrent, of course.
* @param max_size Maximal number of elements to be contained. */
RestrictedBoundedConcurrentQueue(sequence_index_t max_size)
* @param _M_max_size Maximal number of elements to be contained. */
_RestrictedBoundedConcurrentQueue(_SequenceIndex _M_max_size)
{
this->max_size = max_size;
base = new T[max_size];
borders = encode2(0, 0);
this->_M_max_size = _M_max_size;
_M_base = new _Tp[_M_max_size];
_M_borders = __encode2(0, 0);
#pragma omp flush
}
/** @brief Destructor. Not to be called concurrent, of course. */
~RestrictedBoundedConcurrentQueue()
{ delete[] base; }
~_RestrictedBoundedConcurrentQueue()
{ delete[] _M_base; }
/** @brief Pushes one element into the queue at the front end.
/** @brief Pushes one element into the queue at the front __end.
* Must not be called concurrently with pop_front(). */
void
push_front(const T& t)
push_front(const _Tp& __t)
{
lcas_t former_borders = borders;
int former_front, former_back;
decode2(former_borders, former_front, former_back);
*(base + former_front % max_size) = t;
_CASable __former_borders = _M_borders;
int __former_front, __former_back;
decode2(__former_borders, __former_front, __former_back);
*(_M_base + __former_front % _M_max_size) = __t;
#if _GLIBCXX_ASSERTIONS
// Otherwise: front - back > max_size eventually.
_GLIBCXX_PARALLEL_ASSERT(((former_front + 1) - former_back)
<= max_size);
// Otherwise: front - back > _M_max_size eventually.
_GLIBCXX_PARALLEL_ASSERT(((__former_front + 1) - __former_back)
<= _M_max_size);
#endif
fetch_and_add(&borders, encode2(1, 0));
__fetch_and_add(&_M_borders, __encode2(1, 0));
}
/** @brief Pops one element from the queue at the front end.
/** @brief Pops one element from the queue at the front __end.
* Must not be called concurrently with pop_front(). */
bool
pop_front(T& t)
pop_front(_Tp& __t)
{
int former_front, former_back;
int __former_front, __former_back;
#pragma omp flush
decode2(borders, former_front, former_back);
while (former_front > former_back)
decode2(_M_borders, __former_front, __former_back);
while (__former_front > __former_back)
{
// Chance.
lcas_t former_borders = encode2(former_front, former_back);
lcas_t new_borders = encode2(former_front - 1, former_back);
if (compare_and_swap(&borders, former_borders, new_borders))
_CASable __former_borders = __encode2(__former_front, __former_back);
_CASable __new_borders = __encode2(__former_front - 1, __former_back);
if (__compare_and_swap(&_M_borders, __former_borders, __new_borders))
{
t = *(base + (former_front - 1) % max_size);
__t = *(_M_base + (__former_front - 1) % _M_max_size);
return true;
}
#pragma omp flush
decode2(borders, former_front, former_back);
decode2(_M_borders, __former_front, __former_back);
}
return false;
}
/** @brief Pops one element from the queue at the front end.
/** @brief Pops one element from the queue at the front __end.
* Must not be called concurrently with pop_front(). */
bool
pop_back(T& t) //queue behavior
pop_back(_Tp& __t) //queue behavior
{
int former_front, former_back;
int __former_front, __former_back;
#pragma omp flush
decode2(borders, former_front, former_back);
while (former_front > former_back)
decode2(_M_borders, __former_front, __former_back);
while (__former_front > __former_back)
{
// Chance.
lcas_t former_borders = encode2(former_front, former_back);
lcas_t new_borders = encode2(former_front, former_back + 1);
if (compare_and_swap(&borders, former_borders, new_borders))
_CASable __former_borders = __encode2(__former_front, __former_back);
_CASable __new_borders = __encode2(__former_front, __former_back + 1);
if (__compare_and_swap(&_M_borders, __former_borders, __new_borders))
{
t = *(base + former_back % max_size);
__t = *(_M_base + __former_back % _M_max_size);
return true;
}
#pragma omp flush
decode2(borders, former_front, former_back);
decode2(_M_borders, __former_front, __former_back);
}
return false;
}
......
......@@ -38,140 +38,140 @@
namespace __gnu_parallel
{
/** @brief Unbalanced quicksort divide step.
* @param begin Begin iterator of subsequence.
* @param end End iterator of subsequence.
* @param comp Comparator.
* @param pivot_rank Desired rank of the pivot.
* @param num_samples Choose pivot from that many samples.
* @param num_threads Number of threads that are allowed to work on
* @param __begin Begin iterator of subsequence.
* @param __end End iterator of subsequence.
* @param __comp Comparator.
* @param __pivot_rank Desired __rank of the pivot.
* @param __num_samples Choose pivot from that many samples.
* @param __num_threads Number of threads that are allowed to work on
* this part.
*/
template<typename RandomAccessIterator, typename Comparator>
typename std::iterator_traits<RandomAccessIterator>::difference_type
parallel_sort_qs_divide(RandomAccessIterator begin,
RandomAccessIterator end,
Comparator comp, typename std::iterator_traits
<RandomAccessIterator>::difference_type pivot_rank,
template<typename _RAIter, typename _Compare>
typename std::iterator_traits<_RAIter>::difference_type
__parallel_sort_qs_divide(_RAIter __begin,
_RAIter __end,
_Compare __comp, typename std::iterator_traits
<_RAIter>::difference_type __pivot_rank,
typename std::iterator_traits
<RandomAccessIterator>::difference_type
num_samples, thread_index_t num_threads)
<_RAIter>::difference_type
__num_samples, _ThreadIndex __num_threads)
{
typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::value_type value_type;
typedef typename traits_type::difference_type difference_type;
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
typedef typename _TraitsType::difference_type _DifferenceType;
difference_type n = end - begin;
num_samples = std::min(num_samples, n);
_DifferenceType __n = __end - __begin;
__num_samples = std::min(__num_samples, __n);
// Allocate uninitialized, to avoid default constructor.
value_type* samples =
static_cast<value_type*>(::operator new(num_samples
* sizeof(value_type)));
_ValueType* __samples =
static_cast<_ValueType*>(::operator new(__num_samples
* sizeof(_ValueType)));
for (difference_type s = 0; s < num_samples; ++s)
for (_DifferenceType __s = 0; __s < __num_samples; ++__s)
{
const unsigned long long index = static_cast<unsigned long long>(s)
* n / num_samples;
::new(&(samples[s])) value_type(begin[index]);
const unsigned long long __index = static_cast<unsigned long long>(__s)
* __n / __num_samples;
::new(&(__samples[__s])) _ValueType(__begin[__index]);
}
__gnu_sequential::sort(samples, samples + num_samples, comp);
__gnu_sequential::sort(__samples, __samples + __num_samples, __comp);
value_type& pivot = samples[pivot_rank * num_samples / n];
_ValueType& pivot = __samples[__pivot_rank * __num_samples / __n];
__gnu_parallel::binder2nd<Comparator, value_type, value_type, bool>
pred(comp, pivot);
difference_type split =
parallel_partition(begin, end, pred, num_threads);
__gnu_parallel::binder2nd<_Compare, _ValueType, _ValueType, bool>
__pred(__comp, pivot);
_DifferenceType __split =
__parallel_partition(__begin, __end, __pred, __num_threads);
::operator delete(samples);
::operator delete(__samples);
return split;
return __split;
}
/** @brief Unbalanced quicksort conquer step.
* @param begin Begin iterator of subsequence.
* @param end End iterator of subsequence.
* @param comp Comparator.
* @param num_threads Number of threads that are allowed to work on
* @param __begin Begin iterator of subsequence.
* @param __end End iterator of subsequence.
* @param __comp Comparator.
* @param __num_threads Number of threads that are allowed to work on
* this part.
*/
template<typename RandomAccessIterator, typename Comparator>
template<typename _RAIter, typename _Compare>
void
parallel_sort_qs_conquer(RandomAccessIterator begin,
RandomAccessIterator end,
Comparator comp,
thread_index_t num_threads)
__parallel_sort_qs_conquer(_RAIter __begin,
_RAIter __end,
_Compare __comp,
_ThreadIndex __num_threads)
{
typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::value_type value_type;
typedef typename traits_type::difference_type difference_type;
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
typedef typename _TraitsType::difference_type _DifferenceType;
if (num_threads <= 1)
if (__num_threads <= 1)
{
__gnu_sequential::sort(begin, end, comp);
__gnu_sequential::sort(__begin, __end, __comp);
return;
}
difference_type n = end - begin, pivot_rank;
_DifferenceType __n = __end - __begin, __pivot_rank;
if (n <= 1)
if (__n <= 1)
return;
thread_index_t num_threads_left;
_ThreadIndex __num_threads_left;
if ((num_threads % 2) == 1)
num_threads_left = num_threads / 2 + 1;
if ((__num_threads % 2) == 1)
__num_threads_left = __num_threads / 2 + 1;
else
num_threads_left = num_threads / 2;
__num_threads_left = __num_threads / 2;
pivot_rank = n * num_threads_left / num_threads;
__pivot_rank = __n * __num_threads_left / __num_threads;
difference_type split =
parallel_sort_qs_divide(begin, end, comp, pivot_rank,
_DifferenceType __split =
__parallel_sort_qs_divide(__begin, __end, __comp, __pivot_rank,
_Settings::get().sort_qs_num_samples_preset,
num_threads);
__num_threads);
#pragma omp parallel sections num_threads(2)
{
#pragma omp section
parallel_sort_qs_conquer(begin, begin + split,
comp, num_threads_left);
__parallel_sort_qs_conquer(__begin, __begin + __split,
__comp, __num_threads_left);
#pragma omp section
parallel_sort_qs_conquer(begin + split, end,
comp, num_threads - num_threads_left);
__parallel_sort_qs_conquer(__begin + __split, __end,
__comp, __num_threads - __num_threads_left);
}
}
/** @brief Unbalanced quicksort main call.
* @param begin Begin iterator of input sequence.
* @param end End iterator input sequence, ignored.
* @param comp Comparator.
* @param num_threads Number of threads that are allowed to work on
* @param __begin Begin iterator of input sequence.
* @param __end End iterator input sequence, ignored.
* @param __comp Comparator.
* @param __num_threads Number of threads that are allowed to work on
* this part.
*/
template<typename RandomAccessIterator, typename Comparator>
template<typename _RAIter, typename _Compare>
void
parallel_sort_qs(RandomAccessIterator begin,
RandomAccessIterator end,
Comparator comp,
thread_index_t num_threads)
__parallel_sort_qs(_RAIter __begin,
_RAIter __end,
_Compare __comp,
_ThreadIndex __num_threads)
{
_GLIBCXX_CALL(n)
_GLIBCXX_CALL(__n)
typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::value_type value_type;
typedef typename traits_type::difference_type difference_type;
typedef std::iterator_traits<_RAIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
typedef typename _TraitsType::difference_type _DifferenceType;
difference_type n = end - begin;
_DifferenceType __n = __end - __begin;
// At least one element per processor.
if (num_threads > n)
num_threads = static_cast<thread_index_t>(n);
if (__num_threads > __n)
__num_threads = static_cast<_ThreadIndex>(__n);
parallel_sort_qs_conquer(begin, begin + n, comp, num_threads);
__parallel_sort_qs_conquer(__begin, __begin + __n, __comp, __num_threads);
}
} //namespace __gnu_parallel
......
......@@ -38,84 +38,84 @@
namespace __gnu_parallel
{
/** @brief Random number generator, based on the Mersenne twister. */
class random_number
class _RandomNumber
{
private:
std::tr1::mt19937 mt;
uint64 supremum;
uint64 RAND_SUP;
double supremum_reciprocal;
double RAND_SUP_REC;
std::tr1::mt19937 _M_mt;
uint64 _M_supremum;
uint64 _RAND_SUP;
double _M_supremum_reciprocal;
double _RAND_SUP_REC;
// Assumed to be twice as long as the usual random number.
uint64 cache;
uint64 __cache;
// Bit results.
int bits_left;
int __bits_left;
static uint32
scale_down(uint64 x,
__scale_down(uint64 __x,
#if _GLIBCXX_SCALE_DOWN_FPU
uint64 /*supremum*/, double supremum_reciprocal)
uint64 /*_M_supremum*/, double _M_supremum_reciprocal)
#else
uint64 supremum, double /*supremum_reciprocal*/)
uint64 _M_supremum, double /*_M_supremum_reciprocal*/)
#endif
{
#if _GLIBCXX_SCALE_DOWN_FPU
return uint32(x * supremum_reciprocal);
return uint32(__x * _M_supremum_reciprocal);
#else
return static_cast<uint32>(x % supremum);
return static_cast<uint32>(__x % _M_supremum);
#endif
}
public:
/** @brief Default constructor. Seed with 0. */
random_number()
: mt(0), supremum(0x100000000ULL),
RAND_SUP(1ULL << (sizeof(uint32) * 8)),
supremum_reciprocal(double(supremum) / double(RAND_SUP)),
RAND_SUP_REC(1.0 / double(RAND_SUP)),
cache(0), bits_left(0) { }
_RandomNumber()
: _M_mt(0), _M_supremum(0x100000000ULL),
_RAND_SUP(1ULL << (sizeof(uint32) * 8)),
_M_supremum_reciprocal(double(_M_supremum) / double(_RAND_SUP)),
_RAND_SUP_REC(1.0 / double(_RAND_SUP)),
__cache(0), __bits_left(0) { }
/** @brief Constructor.
* @param seed Random seed.
* @param supremum Generate integer random numbers in the
* interval @c [0,supremum). */
random_number(uint32 seed, uint64 supremum = 0x100000000ULL)
: mt(seed), supremum(supremum),
RAND_SUP(1ULL << (sizeof(uint32) * 8)),
supremum_reciprocal(double(supremum) / double(RAND_SUP)),
RAND_SUP_REC(1.0 / double(RAND_SUP)),
cache(0), bits_left(0) { }
* @param __seed Random __seed.
* @param _M_supremum Generate integer random numbers in the
* interval @__c [0,_M_supremum). */
_RandomNumber(uint32 __seed, uint64 _M_supremum = 0x100000000ULL)
: _M_mt(__seed), _M_supremum(_M_supremum),
_RAND_SUP(1ULL << (sizeof(uint32) * 8)),
_M_supremum_reciprocal(double(_M_supremum) / double(_RAND_SUP)),
_RAND_SUP_REC(1.0 / double(_RAND_SUP)),
__cache(0), __bits_left(0) { }
/** @brief Generate unsigned random 32-bit integer. */
uint32
operator()()
{ return scale_down(mt(), supremum, supremum_reciprocal); }
{ return __scale_down(_M_mt(), _M_supremum, _M_supremum_reciprocal); }
/** @brief Generate unsigned random 32-bit integer in the
interval @c [0,local_supremum). */
interval @__c [0,local_supremum). */
uint32
operator()(uint64 local_supremum)
{
return scale_down(mt(), local_supremum,
double(local_supremum * RAND_SUP_REC));
return __scale_down(_M_mt(), local_supremum,
double(local_supremum * _RAND_SUP_REC));
}
/** @brief Generate a number of random bits, run-time parameter.
* @param bits Number of bits to generate. */
unsigned long
genrand_bits(int bits)
__genrand_bits(int bits)
{
unsigned long res = cache & ((1 << bits) - 1);
cache = cache >> bits;
bits_left -= bits;
if (bits_left < 32)
unsigned long __res = __cache & ((1 << bits) - 1);
__cache = __cache >> bits;
__bits_left -= bits;
if (__bits_left < 32)
{
cache |= ((uint64(mt())) << bits_left);
bits_left += 32;
__cache |= ((uint64(_M_mt())) << __bits_left);
__bits_left += 32;
}
return res;
return __res;
}
};
......
......@@ -23,7 +23,7 @@
// <http://www.gnu.org/licenses/>.
/** @file parallel/search.h
* @brief Parallel implementation base for std::search() and
* @brief Parallel implementation __base for std::search() and
* std::search_n().
* This file is a GNU parallel extension to the Standard C++ Library.
*/
......@@ -42,130 +42,130 @@
namespace __gnu_parallel
{
/**
* @brief Precalculate advances for Knuth-Morris-Pratt algorithm.
* @param elements Begin iterator of sequence to search for.
* @param length Length of sequence to search for.
* @param advances Returned offsets.
* @brief Precalculate __advances for Knuth-Morris-Pratt algorithm.
* @param __elements Begin iterator of sequence to search for.
* @param __length Length of sequence to search for.
* @param __advances Returned __offsets.
*/
template<typename RandomAccessIterator, typename _DifferenceTp>
template<typename _RAIter, typename _DifferenceTp>
void
calc_borders(RandomAccessIterator elements, _DifferenceTp length,
_DifferenceTp* off)
__calc_borders(_RAIter __elements, _DifferenceTp __length,
_DifferenceTp* __off)
{
typedef _DifferenceTp difference_type;
typedef _DifferenceTp _DifferenceType;
off[0] = -1;
if (length > 1)
off[1] = 0;
difference_type k = 0;
for (difference_type j = 2; j <= length; j++)
__off[0] = -1;
if (__length > 1)
__off[1] = 0;
_DifferenceType __k = 0;
for (_DifferenceType __j = 2; __j <= __length; __j++)
{
while ((k >= 0) && !(elements[k] == elements[j-1]))
k = off[k];
off[j] = ++k;
while ((__k >= 0) && !(__elements[__k] == __elements[__j-1]))
__k = __off[__k];
__off[__j] = ++__k;
}
}
// Generic parallel find algorithm (requires random access iterator).
/** @brief Parallel std::search.
* @param begin1 Begin iterator of first sequence.
* @param end1 End iterator of first sequence.
* @param begin2 Begin iterator of second sequence.
* @param end2 End iterator of second sequence.
* @param pred Find predicate.
* @param __begin1 Begin iterator of first sequence.
* @param __end1 End iterator of first sequence.
* @param __begin2 Begin iterator of second sequence.
* @param __end2 End iterator of second sequence.
* @param __pred Find predicate.
* @return Place of finding in first sequences. */
template<typename _RandomAccessIterator1,
typename _RandomAccessIterator2,
typename Pred>
_RandomAccessIterator1
search_template(_RandomAccessIterator1 begin1, _RandomAccessIterator1 end1,
_RandomAccessIterator2 begin2, _RandomAccessIterator2 end2,
Pred pred)
template<typename __RAIter1,
typename __RAIter2,
typename _Pred>
__RAIter1
__search_template(__RAIter1 __begin1, __RAIter1 __end1,
__RAIter2 __begin2, __RAIter2 __end2,
_Pred __pred)
{
typedef std::iterator_traits<_RandomAccessIterator1> traits_type;
typedef typename traits_type::difference_type difference_type;
typedef std::iterator_traits<__RAIter1> _TraitsType;
typedef typename _TraitsType::difference_type _DifferenceType;
_GLIBCXX_CALL((end1 - begin1) + (end2 - begin2));
_GLIBCXX_CALL((__end1 - __begin1) + (__end2 - __begin2));
difference_type pattern_length = end2 - begin2;
_DifferenceType __pattern_length = __end2 - __begin2;
// Pattern too short.
if(pattern_length <= 0)
return end1;
if(__pattern_length <= 0)
return __end1;
// Last point to start search.
difference_type input_length = (end1 - begin1) - pattern_length;
_DifferenceType __input_length = (__end1 - __begin1) - __pattern_length;
// Where is first occurrence of pattern? defaults to end.
difference_type result = (end1 - begin1);
difference_type *splitters;
_DifferenceType __result = (__end1 - __begin1);
_DifferenceType *__splitters;
// Pattern too long.
if (input_length < 0)
return end1;
if (__input_length < 0)
return __end1;
omp_lock_t result_lock;
omp_init_lock(&result_lock);
omp_lock_t __result_lock;
omp_init_lock(&__result_lock);
thread_index_t num_threads =
std::max<difference_type>(1,
std::min<difference_type>(input_length, get_max_threads()));
_ThreadIndex __num_threads =
std::max<_DifferenceType>(1,
std::min<_DifferenceType>(__input_length, __get_max_threads()));
difference_type advances[pattern_length];
calc_borders(begin2, pattern_length, advances);
_DifferenceType __advances[__pattern_length];
__calc_borders(__begin2, __pattern_length, __advances);
# pragma omp parallel num_threads(num_threads)
# pragma omp parallel num_threads(__num_threads)
{
# pragma omp single
{
num_threads = omp_get_num_threads();
splitters = new difference_type[num_threads + 1];
equally_split(input_length, num_threads, splitters);
__num_threads = omp_get_num_threads();
__splitters = new _DifferenceType[__num_threads + 1];
equally_split(__input_length, __num_threads, __splitters);
}
thread_index_t iam = omp_get_thread_num();
_ThreadIndex __iam = omp_get_thread_num();
difference_type start = splitters[iam], stop = splitters[iam + 1];
_DifferenceType __start = __splitters[__iam], __stop = __splitters[__iam + 1];
difference_type pos_in_pattern = 0;
bool found_pattern = false;
_DifferenceType __pos_in_pattern = 0;
bool __found_pattern = false;
while (start <= stop && !found_pattern)
while (__start <= __stop && !__found_pattern)
{
// Get new value of result.
#pragma omp flush(result)
#pragma omp flush(__result)
// No chance for this thread to find first occurrence.
if (result < start)
if (__result < __start)
break;
while (pred(begin1[start + pos_in_pattern],
begin2[pos_in_pattern]))
while (__pred(__begin1[__start + __pos_in_pattern],
__begin2[__pos_in_pattern]))
{
++pos_in_pattern;
if (pos_in_pattern == pattern_length)
++__pos_in_pattern;
if (__pos_in_pattern == __pattern_length)
{
// Found new candidate for result.
omp_set_lock(&result_lock);
result = std::min(result, start);
omp_unset_lock(&result_lock);
omp_set_lock(&__result_lock);
__result = std::min(__result, __start);
omp_unset_lock(&__result_lock);
found_pattern = true;
__found_pattern = true;
break;
}
}
// Make safe jump.
start += (pos_in_pattern - advances[pos_in_pattern]);
pos_in_pattern =
(advances[pos_in_pattern] < 0) ? 0 : advances[pos_in_pattern];
__start += (__pos_in_pattern - __advances[__pos_in_pattern]);
__pos_in_pattern =
(__advances[__pos_in_pattern] < 0) ? 0 : __advances[__pos_in_pattern];
}
} //parallel
omp_destroy_lock(&result_lock);
omp_destroy_lock(&__result_lock);
delete[] splitters;
delete[] __splitters;
// Return iterator on found element.
return (begin1 + result);
return (__begin1 + __result);
}
} // end namespace
......
......@@ -30,7 +30,7 @@
* @section parallelization_decision
* The decision whether to run an algorithm in parallel.
*
* There are several ways the user can switch on and off the parallel
* There are several ways the user can switch on and __off the parallel
* execution of an algorithm, both at compile- and run-time.
*
* Only sequential execution can be forced at compile-time. This
......@@ -46,11 +46,11 @@
*
* To force sequential execution of an algorithm ultimately at
* compile-time, the user must add the tag
* __gnu_parallel::sequential_tag() to the end of the parameter list,
* gnu_parallel::sequential_tag() to the end of the parameter list,
* e. g.
*
* \code
* std::sort(v.begin(), v.end(), __gnu_parallel::sequential_tag());
* std::sort(__v.begin(), __v.end(), __gnu_parallel::sequential_tag());
* \endcode
*
* This is compatible with all overloaded algorithm variants. No
......@@ -60,18 +60,18 @@
* If the algorithm call is not forced to be executed sequentially
* at compile-time, the decision is made at run-time.
* The global variable __gnu_parallel::_Settings::algorithm_strategy
* is checked. It is a tristate variable corresponding to:
* is checked. _It is a tristate variable corresponding to:
*
* a. force_sequential, meaning the sequential algorithm is executed.
* b. force_parallel, meaning the parallel algorithm is executed.
* c. heuristic
* b. force_parallel, meaning the parallel algorithm is executed.
* c. heuristic
*
* For heuristic, the parallel algorithm implementation is called
* only if the input size is sufficiently large. For most
* algorithms, the input size is the (combined) length of the input
* sequence(s). The threshold can be set by the user, individually
* sequence(__s). The threshold can be set by the user, individually
* for each algorithm. The according variables are called
* __gnu_parallel::_Settings::[algorithm]_minimal_n .
* gnu_parallel::_Settings::[algorithm]_minimal_n .
*
* For some of the algorithms, there are even more tuning options,
* e. g. the ability to choose from multiple algorithm variants. See
......@@ -88,24 +88,24 @@
/**
* @brief Determine at compile(?)-time if the parallel variant of an
* algorithm should be called.
* @param c A condition that is convertible to bool that is overruled by
* @param __c A condition that is convertible to bool that is overruled by
* __gnu_parallel::_Settings::algorithm_strategy. Usually a decision
* based on the input size.
*/
#define _GLIBCXX_PARALLEL_CONDITION(c) (__gnu_parallel::_Settings::get().algorithm_strategy != __gnu_parallel::force_sequential && ((__gnu_parallel::get_max_threads() > 1 && (c)) || __gnu_parallel::_Settings::get().algorithm_strategy == __gnu_parallel::force_parallel))
#define _GLIBCXX_PARALLEL_CONDITION(__c) (__gnu_parallel::_Settings::get().algorithm_strategy != __gnu_parallel::force_sequential && ((__gnu_parallel::__get_max_threads() > 1 && (__c)) || __gnu_parallel::_Settings::get().algorithm_strategy == __gnu_parallel::force_parallel))
/*
inline bool
parallel_condition(bool c)
parallel_condition(bool __c)
{
bool ret = false;
const _Settings& s = _Settings::get();
if (s.algorithm_strategy != force_seqential)
const _Settings& __s = _Settings::get();
if (__s.algorithm_strategy != force_seqential)
{
if (s.algorithm_strategy == force_parallel)
if (__s.algorithm_strategy == force_parallel)
ret = true;
else
ret = get_max_threads() > 1 && c;
ret = __get_max_threads() > 1 && __c;
}
return ret;
}
......@@ -131,49 +131,49 @@ namespace __gnu_parallel
// Per-algorithm settings.
/// Minimal input size for accumulate.
sequence_index_t accumulate_minimal_n;
_SequenceIndex accumulate_minimal_n;
/// Minimal input size for adjacent_difference.
unsigned int adjacent_difference_minimal_n;
/// Minimal input size for count and count_if.
sequence_index_t count_minimal_n;
_SequenceIndex count_minimal_n;
/// Minimal input size for fill.
sequence_index_t fill_minimal_n;
_SequenceIndex fill_minimal_n;
/// Block size increase factor for find.
double find_increasing_factor;
/// Initial block size for find.
sequence_index_t find_initial_block_size;
_SequenceIndex find_initial_block_size;
/// Maximal block size for find.
sequence_index_t find_maximum_block_size;
_SequenceIndex find_maximum_block_size;
/// Start with looking for this many elements sequentially, for find.
sequence_index_t find_sequential_search_size;
_SequenceIndex find_sequential_search_size;
/// Minimal input size for for_each.
sequence_index_t for_each_minimal_n;
_SequenceIndex for_each_minimal_n;
/// Minimal input size for generate.
sequence_index_t generate_minimal_n;
_SequenceIndex generate_minimal_n;
/// Minimal input size for max_element.
sequence_index_t max_element_minimal_n;
_SequenceIndex max_element_minimal_n;
/// Minimal input size for merge.
sequence_index_t merge_minimal_n;
_SequenceIndex merge_minimal_n;
/// Oversampling factor for merge.
unsigned int merge_oversampling;
/// Minimal input size for min_element.
sequence_index_t min_element_minimal_n;
_SequenceIndex min_element_minimal_n;
/// Minimal input size for multiway_merge.
sequence_index_t multiway_merge_minimal_n;
_SequenceIndex multiway_merge_minimal_n;
/// Oversampling factor for multiway_merge.
int multiway_merge_minimal_k;
......@@ -182,22 +182,22 @@ namespace __gnu_parallel
unsigned int multiway_merge_oversampling;
/// Minimal input size for nth_element.
sequence_index_t nth_element_minimal_n;
_SequenceIndex nth_element_minimal_n;
/// Chunk size for partition.
sequence_index_t partition_chunk_size;
_SequenceIndex partition_chunk_size;
/// Chunk size for partition, relative to input size. If > 0.0,
/// this value overrides partition_chunk_size.
double partition_chunk_share;
/// Minimal input size for partition.
sequence_index_t partition_minimal_n;
_SequenceIndex partition_minimal_n;
/// Minimal input size for partial_sort.
sequence_index_t partial_sort_minimal_n;
_SequenceIndex partial_sort_minimal_n;
/// Ratio for partial_sum. Assume "sum and write result" to be
/// Ratio for partial_sum. Assume "sum and write __result" to be
/// this factor slower than just "sum".
float partial_sum_dilation;
......@@ -208,22 +208,22 @@ namespace __gnu_parallel
unsigned int random_shuffle_minimal_n;
/// Minimal input size for replace and replace_if.
sequence_index_t replace_minimal_n;
_SequenceIndex replace_minimal_n;
/// Minimal input size for set_difference.
sequence_index_t set_difference_minimal_n;
_SequenceIndex set_difference_minimal_n;
/// Minimal input size for set_intersection.
sequence_index_t set_intersection_minimal_n;
_SequenceIndex set_intersection_minimal_n;
/// Minimal input size for set_symmetric_difference.
sequence_index_t set_symmetric_difference_minimal_n;
_SequenceIndex set_symmetric_difference_minimal_n;
/// Minimal input size for set_union.
sequence_index_t set_union_minimal_n;
_SequenceIndex set_union_minimal_n;
/// Minimal input size for parallel sorting.
sequence_index_t sort_minimal_n;
_SequenceIndex sort_minimal_n;
/// Oversampling factor for parallel std::sort (MWMS).
unsigned int sort_mwms_oversampling;
......@@ -231,38 +231,38 @@ namespace __gnu_parallel
/// Such many samples to take to find a good pivot (quicksort).
unsigned int sort_qs_num_samples_preset;
/// Maximal subsequence length to switch to unbalanced base case.
/// Maximal subsequence __length to switch to unbalanced __base case.
/// Applies to std::sort with dynamically load-balanced quicksort.
sequence_index_t sort_qsb_base_case_maximal_n;
_SequenceIndex sort_qsb_base_case_maximal_n;
/// Minimal input size for parallel std::transform.
sequence_index_t transform_minimal_n;
_SequenceIndex transform_minimal_n;
/// Minimal input size for unique_copy.
sequence_index_t unique_copy_minimal_n;
_SequenceIndex unique_copy_minimal_n;
sequence_index_t workstealing_chunk_size;
_SequenceIndex workstealing_chunk_size;
// Hardware dependent tuning parameters.
/// Size of the L1 cache in bytes (underestimation).
/// size of the L1 cache in bytes (underestimation).
unsigned long long L1_cache_size;
/// Size of the L2 cache in bytes (underestimation).
/// size of the L2 cache in bytes (underestimation).
unsigned long long L2_cache_size;
/// Size of the Translation Lookaside Buffer (underestimation).
/// size of the Translation Lookaside Buffer (underestimation).
unsigned int TLB_size;
/// Overestimation of cache line size. Used to avoid false
/// sharing, i. e. elements of different threads are at least this
/// sharing, i.e. elements of different threads are at least this
/// amount apart.
unsigned int cache_line_size;
// Statistics.
/// The number of stolen ranges in load-balanced quicksort.
sequence_index_t qsb_steals;
_SequenceIndex qsb_steals;
/// Get the global settings.
_GLIBCXX_CONST static const _Settings&
......
......@@ -46,37 +46,37 @@ namespace __gnu_parallel
struct parallel_tag
{
private:
thread_index_t num_threads;
_ThreadIndex __num_threads;
public:
/** @brief Default constructor. Use default number of threads. */
parallel_tag()
{
this->num_threads = 0;
this->__num_threads = 0;
}
/** @brief Default constructor. Recommend number of threads to use.
* @param num_threads Desired number of threads. */
parallel_tag(thread_index_t num_threads)
* @param __num_threads Desired number of threads. */
parallel_tag(_ThreadIndex __num_threads)
{
this->num_threads = num_threads;
this->__num_threads = __num_threads;
}
/** @brief Find out desired number of threads.
/** @brief Find __out desired number of threads.
* @return Desired number of threads. */
inline thread_index_t get_num_threads()
inline _ThreadIndex __get_num_threads()
{
if(num_threads == 0)
if(__num_threads == 0)
return omp_get_max_threads();
else
return num_threads;
return __num_threads;
}
/** @brief Set the desired number of threads.
* @param num_threads Desired number of threads. */
inline void set_num_threads(thread_index_t num_threads)
* @param __num_threads Desired number of threads. */
inline void set_num_threads(_ThreadIndex __num_threads)
{
this->num_threads = num_threads;
this->__num_threads = __num_threads;
}
};
......@@ -85,8 +85,8 @@ namespace __gnu_parallel
struct default_parallel_tag : public parallel_tag
{
default_parallel_tag() { }
default_parallel_tag(thread_index_t num_threads)
: parallel_tag(num_threads) { }
default_parallel_tag(_ThreadIndex __num_threads)
: parallel_tag(__num_threads) { }
};
/** @brief Recommends parallel execution using dynamic
......@@ -111,21 +111,21 @@ namespace __gnu_parallel
/** @brief Forces parallel merging
* with exact splitting, at compile time. */
* with __exact splitting, at compile time. */
struct exact_tag : public parallel_tag
{
exact_tag() { }
exact_tag(thread_index_t num_threads)
: parallel_tag(num_threads) { }
exact_tag(_ThreadIndex __num_threads)
: parallel_tag(__num_threads) { }
};
/** @brief Forces parallel merging
* with exact splitting, at compile time. */
* with __exact splitting, at compile time. */
struct sampling_tag : public parallel_tag
{
sampling_tag() { }
sampling_tag(thread_index_t num_threads)
: parallel_tag(num_threads) { }
sampling_tag(_ThreadIndex __num_threads)
: parallel_tag(__num_threads) { }
};
......@@ -134,17 +134,17 @@ namespace __gnu_parallel
struct multiway_mergesort_tag : public parallel_tag
{
multiway_mergesort_tag() { }
multiway_mergesort_tag(thread_index_t num_threads)
: parallel_tag(num_threads) { }
multiway_mergesort_tag(_ThreadIndex __num_threads)
: parallel_tag(__num_threads) { }
};
/** @brief Forces parallel sorting using multiway mergesort
* with exact splitting at compile time. */
* with __exact splitting at compile time. */
struct multiway_mergesort_exact_tag : public parallel_tag
{
multiway_mergesort_exact_tag() { }
multiway_mergesort_exact_tag(thread_index_t num_threads)
: parallel_tag(num_threads) { }
multiway_mergesort_exact_tag(_ThreadIndex __num_threads)
: parallel_tag(__num_threads) { }
};
/** @brief Forces parallel sorting using multiway mergesort
......@@ -152,8 +152,8 @@ namespace __gnu_parallel
struct multiway_mergesort_sampling_tag : public parallel_tag
{
multiway_mergesort_sampling_tag() { }
multiway_mergesort_sampling_tag(thread_index_t num_threads)
: parallel_tag(num_threads) { }
multiway_mergesort_sampling_tag(_ThreadIndex __num_threads)
: parallel_tag(__num_threads) { }
};
/** @brief Forces parallel sorting using unbalanced quicksort
......@@ -161,8 +161,8 @@ namespace __gnu_parallel
struct quicksort_tag : public parallel_tag
{
quicksort_tag() { }
quicksort_tag(thread_index_t num_threads)
: parallel_tag(num_threads) { }
quicksort_tag(_ThreadIndex __num_threads)
: parallel_tag(__num_threads) { }
};
/** @brief Forces parallel sorting using balanced quicksort
......@@ -170,8 +170,8 @@ namespace __gnu_parallel
struct balanced_quicksort_tag : public parallel_tag
{
balanced_quicksort_tag() { }
balanced_quicksort_tag(thread_index_t num_threads)
: parallel_tag(num_threads) { }
balanced_quicksort_tag(_ThreadIndex __num_threads)
: parallel_tag(__num_threads) { }
};
......
......@@ -79,7 +79,7 @@ namespace __gnu_parallel
};
/// Merging algorithms:
// bubblesort-alike, loser-tree variants, enum sentinel.
// bubblesort-alike, loser-tree variants, enum __sentinel.
enum _MultiwayMergeAlgorithm
{
LOSER_TREE
......@@ -92,7 +92,7 @@ namespace __gnu_parallel
LINEAR
};
/// Sorting/merging algorithms: sampling, exact.
/// Sorting/merging algorithms: sampling, __exact.
enum _SplittingAlgorithm
{
SAMPLING,
......@@ -108,7 +108,7 @@ namespace __gnu_parallel
EQUAL_SPLIT
};
/// Integer Types.
/// _Integer Types.
// XXX need to use <cstdint>
/** @brief 16-bit signed integer. */
typedef short int16;
......@@ -129,27 +129,27 @@ namespace __gnu_parallel
typedef unsigned long long uint64;
/**
* @brief Unsigned integer to index elements.
* @brief Unsigned integer to index __elements.
* The total number of elements for each algorithm must fit into this type.
*/
typedef uint64 sequence_index_t;
typedef uint64 _SequenceIndex;
/**
* @brief Unsigned integer to index a thread number.
* The maximum thread number (for each processor) must fit into this type.
*/
typedef uint16 thread_index_t;
typedef uint16 _ThreadIndex;
// XXX atomics interface?
/// Longest compare-and-swappable integer type on this platform.
typedef int64 lcas_t;
typedef int64 _CASable;
// XXX numeric_limits::digits?
/// Number of bits of ::lcas_t.
static const int lcas_t_bits = sizeof(lcas_t) * 8;
/// Number of bits of ::_CASable.
static const int _CASable_bits = sizeof(_CASable) * 8;
/// ::lcas_t with the right half of bits set to 1.
static const lcas_t lcas_t_mask = ((lcas_t(1) << (lcas_t_bits / 2)) - 1);
/// ::_CASable with the right half of bits set to 1.
static const _CASable _CASable_mask = ((_CASable(1) << (_CASable_bits / 2)) - 1);
}
#endif /* _GLIBCXX_PARALLEL_TYPES_H */
......@@ -38,153 +38,153 @@
namespace __gnu_parallel
{
/** @brief Parallel std::unique_copy(), w/o explicit equality predicate.
* @param first Begin iterator of input sequence.
* @param last End iterator of input sequence.
* @param result Begin iterator of result sequence.
* @param binary_pred Equality predicate.
* @return End iterator of result sequence. */
template<typename InputIterator,
class OutputIterator,
class BinaryPredicate>
OutputIterator
parallel_unique_copy(InputIterator first, InputIterator last,
OutputIterator result, BinaryPredicate binary_pred)
/** @brief Parallel std::unique_copy(), w/__o explicit equality predicate.
* @param __first Begin iterator of input sequence.
* @param __last End iterator of input sequence.
* @param __result Begin iterator of result __sequence.
* @param __binary_pred Equality predicate.
* @return End iterator of result __sequence. */
template<typename _IIter,
class _OutputIterator,
class _BinaryPredicate>
_OutputIterator
__parallel_unique_copy(_IIter __first, _IIter __last,
_OutputIterator __result, _BinaryPredicate __binary_pred)
{
_GLIBCXX_CALL(last - first)
_GLIBCXX_CALL(__last - __first)
typedef std::iterator_traits<InputIterator> traits_type;
typedef typename traits_type::value_type value_type;
typedef typename traits_type::difference_type difference_type;
typedef std::iterator_traits<_IIter> _TraitsType;
typedef typename _TraitsType::value_type _ValueType;
typedef typename _TraitsType::difference_type _DifferenceType;
difference_type size = last - first;
_DifferenceType size = __last - __first;
if (size == 0)
return result;
return __result;
// Let the first thread process two parts.
difference_type *counter;
difference_type *borders;
_DifferenceType *__counter;
_DifferenceType *__borders;
thread_index_t num_threads = get_max_threads();
_ThreadIndex __num_threads = __get_max_threads();
// First part contains at least one element.
# pragma omp parallel num_threads(num_threads)
# pragma omp parallel num_threads(__num_threads)
{
# pragma omp single
{
num_threads = omp_get_num_threads();
borders = new difference_type[num_threads + 2];
equally_split(size, num_threads + 1, borders);
counter = new difference_type[num_threads + 1];
__num_threads = omp_get_num_threads();
__borders = new _DifferenceType[__num_threads + 2];
equally_split(size, __num_threads + 1, __borders);
__counter = new _DifferenceType[__num_threads + 1];
}
thread_index_t iam = omp_get_thread_num();
_ThreadIndex __iam = omp_get_thread_num();
difference_type begin, end;
_DifferenceType __begin, __end;
// Check for length without duplicates
// Needed for position in output
difference_type i = 0;
OutputIterator out = result;
_DifferenceType __i = 0;
_OutputIterator __out = __result;
if (iam == 0)
if (__iam == 0)
{
begin = borders[0] + 1; // == 1
end = borders[iam + 1];
__begin = __borders[0] + 1; // == 1
__end = __borders[__iam + 1];
++i;
*out++ = *first;
++__i;
*__out++ = *__first;
for (InputIterator iter = first + begin; iter < first + end; ++iter)
for (_IIter iter = __first + __begin; iter < __first + __end; ++iter)
{
if (!binary_pred(*iter, *(iter-1)))
if (!__binary_pred(*iter, *(iter-1)))
{
++i;
*out++ = *iter;
++__i;
*__out++ = *iter;
}
}
}
else
{
begin = borders[iam]; //one part
end = borders[iam + 1];
__begin = __borders[__iam]; //one part
__end = __borders[__iam + 1];
for (InputIterator iter = first + begin; iter < first + end; ++iter)
for (_IIter iter = __first + __begin; iter < __first + __end; ++iter)
{
if (!binary_pred(*iter, *(iter - 1)))
++i;
if (!__binary_pred(*iter, *(iter - 1)))
++__i;
}
}
counter[iam] = i;
__counter[__iam] = __i;
// Last part still untouched.
difference_type begin_output;
_DifferenceType __begin_output;
# pragma omp barrier
// Store result in output on calculated positions.
begin_output = 0;
__begin_output = 0;
if (iam == 0)
if (__iam == 0)
{
for (int t = 0; t < num_threads; ++t)
begin_output += counter[t];
for (int __t = 0; __t < __num_threads; ++__t)
__begin_output += __counter[__t];
i = 0;
__i = 0;
OutputIterator iter_out = result + begin_output;
_OutputIterator __iter_out = __result + __begin_output;
begin = borders[num_threads];
end = size;
__begin = __borders[__num_threads];
__end = size;
for (InputIterator iter = first + begin; iter < first + end; ++iter)
for (_IIter iter = __first + __begin; iter < __first + __end; ++iter)
{
if (iter == first || !binary_pred(*iter, *(iter - 1)))
if (iter == __first || !__binary_pred(*iter, *(iter - 1)))
{
++i;
*iter_out++ = *iter;
++__i;
*__iter_out++ = *iter;
}
}
counter[num_threads] = i;
__counter[__num_threads] = __i;
}
else
{
for (int t = 0; t < iam; t++)
begin_output += counter[t];
for (int __t = 0; __t < __iam; __t++)
__begin_output += __counter[__t];
OutputIterator iter_out = result + begin_output;
for (InputIterator iter = first + begin; iter < first + end; ++iter)
_OutputIterator __iter_out = __result + __begin_output;
for (_IIter iter = __first + __begin; iter < __first + __end; ++iter)
{
if (!binary_pred(*iter, *(iter-1)))
*iter_out++ = *iter;
if (!__binary_pred(*iter, *(iter-1)))
*__iter_out++ = *iter;
}
}
}
difference_type end_output = 0;
for (int t = 0; t < num_threads + 1; t++)
end_output += counter[t];
_DifferenceType __end_output = 0;
for (int __t = 0; __t < __num_threads + 1; __t++)
__end_output += __counter[__t];
delete[] borders;
delete[] __borders;
return result + end_output;
return __result + __end_output;
}
/** @brief Parallel std::unique_copy(), without explicit equality predicate
* @param first Begin iterator of input sequence.
* @param last End iterator of input sequence.
* @param result Begin iterator of result sequence.
* @return End iterator of result sequence. */
template<typename InputIterator, class OutputIterator>
inline OutputIterator
parallel_unique_copy(InputIterator first, InputIterator last,
OutputIterator result)
* @param __first Begin iterator of input sequence.
* @param __last End iterator of input sequence.
* @param __result Begin iterator of result __sequence.
* @return End iterator of result __sequence. */
template<typename _IIter, class _OutputIterator>
inline _OutputIterator
__parallel_unique_copy(_IIter __first, _IIter __last,
_OutputIterator __result)
{
typedef typename std::iterator_traits<InputIterator>::value_type
value_type;
return parallel_unique_copy(first, last, result,
std::equal_to<value_type>());
typedef typename std::iterator_traits<_IIter>::value_type
_ValueType;
return __parallel_unique_copy(__first, __last, __result,
std::equal_to<_ValueType>());
}
}//namespace __gnu_parallel
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment