Commit ee1b5fc5 by Benjamin Kosnik Committed by Benjamin Kosnik

re PR libstdc++/34797 ([parallel mode] Settings are separated for each compilation unit)

2008-02-17  Benjamin Kosnik  <bkoz@redhat.com>

	PR libstdc++/34797
	* include/parallel/settings.h (_Settings): Reconstruct Settings class
	here, uglify, remove anonymous namespace and static
	members. Convert to datum.	
	* include/parallel/types.h: Move Settings:: enumerations here, uglify.
	* src/parallel_settings.cc: New, definition for _Settings member
	functions.	
	* include/parallel/multiway_merge.h: Same.
	* include/parallel/for_each.h: Same.
	* include/parallel/workstealing.h: Same.
	* include/parallel/base.h: Same.
	* include/parallel/numeric
	* include/parallel/features.h: Same.
	* include/parallel/quicksort.h: Same.
	* include/parallel/equally_split.h: Same.
	* include/parallel/algorithmfwd.h: Same.
	* include/parallel/omp_loop_static.h: Same.
	* include/parallel/random_shuffle.h: Same.
	* include/parallel/balanced_quicksort.h: Same.
	* include/parallel/tags.h: Same.
	* include/parallel/multiway_mergesort.h: Same.
	* include/parallel/numericfwd.h: Same.
	* include/parallel/partition.h: Same.
	* include/parallel/partial_sum.h: Same.
	* include/parallel/find.h: Same.
	* include/parallel/algo.h: Same.
	* include/parallel/omp_loop.h: Same.
	* include/parallel/sort.h: Same.

	* src/Makefile.am (parallel_sources): Add parallel_settings.cc.
	* src/Makefile.in: Regenerate.
	
	* config/abi/pre/gnu.ver: Export _Settings::get and _Settings::set.

From-SVN: r132383
parent e69044cb
2008-02-17 Benjamin Kosnik <bkoz@redhat.com>
PR libstdc++/34797
* include/parallel/settings.h (_Settings): Reconstruct Settings class
here, uglify, remove anonymous namespace and static
members. Convert to datum.
* include/parallel/types.h: Move Settings:: enumerations here, uglify.
* src/parallel_settings.cc: New, definition for _Settings member
functions.
* include/parallel/multiway_merge.h: Same.
* include/parallel/for_each.h: Same.
* include/parallel/workstealing.h: Same.
* include/parallel/base.h: Same.
* include/parallel/numeric
* include/parallel/features.h: Same.
* include/parallel/quicksort.h: Same.
* include/parallel/equally_split.h: Same.
* include/parallel/algorithmfwd.h: Same.
* include/parallel/omp_loop_static.h: Same.
* include/parallel/random_shuffle.h: Same.
* include/parallel/balanced_quicksort.h: Same.
* include/parallel/tags.h: Same.
* include/parallel/multiway_mergesort.h: Same.
* include/parallel/numericfwd.h: Same.
* include/parallel/partition.h: Same.
* include/parallel/partial_sum.h: Same.
* include/parallel/find.h: Same.
* include/parallel/algo.h: Same.
* include/parallel/omp_loop.h: Same.
* include/parallel/sort.h: Same.
* src/Makefile.am (parallel_sources): Add parallel_settings.cc.
* src/Makefile.in: Regenerate.
* config/abi/pre/gnu.ver: Export _Settings::get and _Settings::set.
2008-02-17 Paolo Carlini <pcarlini@suse.de> 2008-02-17 Paolo Carlini <pcarlini@suse.de>
PR libstdc++/35221 PR libstdc++/35221
......
...@@ -779,7 +779,15 @@ GLIBCXX_3.4.10 { ...@@ -779,7 +779,15 @@ GLIBCXX_3.4.10 {
_ZNKSt4hashISt10error_codeEclES0_; _ZNKSt4hashISt10error_codeEclES0_;
_ZNKSt4hashI[eg]EclE[eg]; _ZNKSt4hashI[eg]EclE[eg];
_ZSt17__verify_grouping*;
_ZNSt8__detail12__prime_listE;
_ZNSt3tr18__detail12__prime_listE;
# for parallel mode # for parallel mode
_ZN14__gnu_parallel9_Settings3getEv;
_ZN14__gnu_parallel9_Settings3setERS0_;
_ZNSt9__cxx199815_List_node_base4hook*; _ZNSt9__cxx199815_List_node_base4hook*;
_ZNSt9__cxx199815_List_node_base4swap*; _ZNSt9__cxx199815_List_node_base4swap*;
_ZNSt9__cxx199815_List_node_base6unhookEv; _ZNSt9__cxx199815_List_node_base6unhookEv;
...@@ -788,10 +796,6 @@ GLIBCXX_3.4.10 { ...@@ -788,10 +796,6 @@ GLIBCXX_3.4.10 {
_ZNSt15basic_streambufI[cw]St11char_traitsI[cw]EE6stosscEv; _ZNSt15basic_streambufI[cw]St11char_traitsI[cw]EE6stosscEv;
_ZSt17__verify_grouping*;
_ZNSt8__detail12__prime_listE;
_ZNSt3tr18__detail12__prime_listE;
_ZN9__gnu_cxx18stdio_sync_filebufI[cw]St11char_traitsI[cw]EE4syncEv; _ZN9__gnu_cxx18stdio_sync_filebufI[cw]St11char_traitsI[cw]EE4syncEv;
_ZN9__gnu_cxx18stdio_sync_filebufI[cw]St11char_traitsI[cw]EE[5-9CD]*; _ZN9__gnu_cxx18stdio_sync_filebufI[cw]St11char_traitsI[cw]EE[5-9CD]*;
......
...@@ -90,7 +90,7 @@ namespace __parallel ...@@ -90,7 +90,7 @@ namespace __parallel
template<typename _IIter, typename _Tp> template<typename _IIter, typename _Tp>
typename iterator_traits<_IIter>::difference_type typename iterator_traits<_IIter>::difference_type
count(_IIter, _IIter, const _Tp&, __gnu_parallel::parallelism); count(_IIter, _IIter, const _Tp&, __gnu_parallel::_Parallelism);
template<typename _IIter, typename _Tp, typename _IterTag> template<typename _IIter, typename _Tp, typename _IterTag>
typename iterator_traits<_IIter>::difference_type typename iterator_traits<_IIter>::difference_type
...@@ -99,7 +99,7 @@ namespace __parallel ...@@ -99,7 +99,7 @@ namespace __parallel
template<typename _RAIter, typename _Tp> template<typename _RAIter, typename _Tp>
typename iterator_traits<_RAIter>::difference_type typename iterator_traits<_RAIter>::difference_type
count_switch(_RAIter, _RAIter, const _Tp&, random_access_iterator_tag, count_switch(_RAIter, _RAIter, const _Tp&, random_access_iterator_tag,
__gnu_parallel::parallelism); __gnu_parallel::_Parallelism);
template<typename _IIter, typename _Predicate> template<typename _IIter, typename _Predicate>
...@@ -112,7 +112,7 @@ namespace __parallel ...@@ -112,7 +112,7 @@ namespace __parallel
template<typename _IIter, typename _Predicate> template<typename _IIter, typename _Predicate>
typename iterator_traits<_IIter>::difference_type typename iterator_traits<_IIter>::difference_type
count_if(_IIter, _IIter, _Predicate, __gnu_parallel::parallelism); count_if(_IIter, _IIter, _Predicate, __gnu_parallel::_Parallelism);
template<typename _IIter, typename _Predicate, typename _IterTag> template<typename _IIter, typename _Predicate, typename _IterTag>
typename iterator_traits<_IIter>::difference_type typename iterator_traits<_IIter>::difference_type
...@@ -121,7 +121,7 @@ namespace __parallel ...@@ -121,7 +121,7 @@ namespace __parallel
template<typename _RAIter, typename _Predicate> template<typename _RAIter, typename _Predicate>
typename iterator_traits<_RAIter>::difference_type typename iterator_traits<_RAIter>::difference_type
count_if_switch(_RAIter, _RAIter, _Predicate, random_access_iterator_tag, count_if_switch(_RAIter, _RAIter, _Predicate, random_access_iterator_tag,
__gnu_parallel::parallelism); __gnu_parallel::_Parallelism);
// algobase.h // algobase.h
template<typename _IIter1, typename _IIter2> template<typename _IIter1, typename _IIter2>
...@@ -219,7 +219,7 @@ namespace __parallel ...@@ -219,7 +219,7 @@ namespace __parallel
template<typename _Iterator, typename _Function> template<typename _Iterator, typename _Function>
_Function _Function
for_each(_Iterator, _Iterator, _Function, __gnu_parallel::parallelism); for_each(_Iterator, _Iterator, _Function, __gnu_parallel::_Parallelism);
template<typename _IIter, typename _Function, typename _IterTag> template<typename _IIter, typename _Function, typename _IterTag>
_Function _Function
...@@ -228,7 +228,7 @@ namespace __parallel ...@@ -228,7 +228,7 @@ namespace __parallel
template<typename _RAIter, typename _Function> template<typename _RAIter, typename _Function>
_Function _Function
for_each_switch(_RAIter, _RAIter, _Function, random_access_iterator_tag, for_each_switch(_RAIter, _RAIter, _Function, random_access_iterator_tag,
__gnu_parallel::parallelism); __gnu_parallel::_Parallelism);
template<typename _FIter, typename _Generator> template<typename _FIter, typename _Generator>
...@@ -241,7 +241,7 @@ namespace __parallel ...@@ -241,7 +241,7 @@ namespace __parallel
template<typename _FIter, typename _Generator> template<typename _FIter, typename _Generator>
void void
generate(_FIter, _FIter, _Generator, __gnu_parallel::parallelism); generate(_FIter, _FIter, _Generator, __gnu_parallel::_Parallelism);
template<typename _FIter, typename _Generator, typename _IterTag> template<typename _FIter, typename _Generator, typename _IterTag>
void void
...@@ -250,7 +250,7 @@ namespace __parallel ...@@ -250,7 +250,7 @@ namespace __parallel
template<typename _RAIter, typename _Generator> template<typename _RAIter, typename _Generator>
void void
generate_switch(_RAIter, _RAIter, _Generator, random_access_iterator_tag, generate_switch(_RAIter, _RAIter, _Generator, random_access_iterator_tag,
__gnu_parallel::parallelism); __gnu_parallel::_Parallelism);
template<typename _OIter, typename _Size, typename _Generator> template<typename _OIter, typename _Size, typename _Generator>
_OIter _OIter
...@@ -262,7 +262,7 @@ namespace __parallel ...@@ -262,7 +262,7 @@ namespace __parallel
template<typename _OIter, typename _Size, typename _Generator> template<typename _OIter, typename _Size, typename _Generator>
_OIter _OIter
generate_n(_OIter, _Size, _Generator, __gnu_parallel::parallelism); generate_n(_OIter, _Size, _Generator, __gnu_parallel::_Parallelism);
template<typename _OIter, typename _Size, typename _Generator, template<typename _OIter, typename _Size, typename _Generator,
typename _IterTag> typename _IterTag>
...@@ -272,7 +272,7 @@ namespace __parallel ...@@ -272,7 +272,7 @@ namespace __parallel
template<typename _RAIter, typename _Size, typename _Generator> template<typename _RAIter, typename _Size, typename _Generator>
_RAIter _RAIter
generate_n_switch(_RAIter, _Size, _Generator, random_access_iterator_tag, generate_n_switch(_RAIter, _Size, _Generator, random_access_iterator_tag,
__gnu_parallel::parallelism); __gnu_parallel::_Parallelism);
template<typename _IIter1, typename _IIter2> template<typename _IIter1, typename _IIter2>
bool bool
...@@ -416,7 +416,7 @@ namespace __parallel ...@@ -416,7 +416,7 @@ namespace __parallel
template<typename _IIter, typename _OIter, typename UnaryOperation> template<typename _IIter, typename _OIter, typename UnaryOperation>
_OIter _OIter
transform(_IIter, _IIter, _OIter, UnaryOperation, transform(_IIter, _IIter, _OIter, UnaryOperation,
__gnu_parallel::parallelism); __gnu_parallel::_Parallelism);
template<typename _IIter, typename _OIter, typename UnaryOperation, template<typename _IIter, typename _OIter, typename UnaryOperation,
typename _IterTag1, typename _IterTag2> typename _IterTag1, typename _IterTag2>
...@@ -429,7 +429,7 @@ namespace __parallel ...@@ -429,7 +429,7 @@ namespace __parallel
_RAOIter _RAOIter
transform1_switch(_RAIIter, _RAIIter, _RAOIter, UnaryOperation, transform1_switch(_RAIIter, _RAIIter, _RAOIter, UnaryOperation,
random_access_iterator_tag, random_access_iterator_tag, random_access_iterator_tag, random_access_iterator_tag,
__gnu_parallel::parallelism); __gnu_parallel::_Parallelism);
template<typename _IIter1, typename _IIter2, typename _OIter, template<typename _IIter1, typename _IIter2, typename _OIter,
...@@ -447,7 +447,7 @@ namespace __parallel ...@@ -447,7 +447,7 @@ namespace __parallel
typename _BiOperation> typename _BiOperation>
_OIter _OIter
transform(_IIter1, _IIter1, _IIter2, _OIter, _BiOperation, transform(_IIter1, _IIter1, _IIter2, _OIter, _BiOperation,
__gnu_parallel::parallelism); __gnu_parallel::_Parallelism);
template<typename _RAIter1, typename _RAIter2, typename _RAIter3, template<typename _RAIter1, typename _RAIter2, typename _RAIter3,
typename _BiOperation> typename _BiOperation>
...@@ -455,7 +455,7 @@ namespace __parallel ...@@ -455,7 +455,7 @@ namespace __parallel
transform2_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter3, _BiOperation, transform2_switch(_RAIter1, _RAIter1, _RAIter2, _RAIter3, _BiOperation,
random_access_iterator_tag, random_access_iterator_tag, random_access_iterator_tag, random_access_iterator_tag,
random_access_iterator_tag, random_access_iterator_tag,
__gnu_parallel::parallelism); __gnu_parallel::_Parallelism);
template<typename _IIter1, typename _IIter2, typename _OIter, template<typename _IIter1, typename _IIter2, typename _OIter,
typename _BiOperation, typename _Tag1, typename _BiOperation, typename _Tag1,
...@@ -477,7 +477,7 @@ namespace __parallel ...@@ -477,7 +477,7 @@ namespace __parallel
template<typename _FIter, typename _Tp> template<typename _FIter, typename _Tp>
void void
replace(_FIter, _FIter, const _Tp&, const _Tp&, replace(_FIter, _FIter, const _Tp&, const _Tp&,
__gnu_parallel::parallelism); __gnu_parallel::_Parallelism);
template<typename _FIter, typename _Tp, typename _IterTag> template<typename _FIter, typename _Tp, typename _IterTag>
void void
...@@ -486,7 +486,7 @@ namespace __parallel ...@@ -486,7 +486,7 @@ namespace __parallel
template<typename _RAIter, typename _Tp> template<typename _RAIter, typename _Tp>
void void
replace_switch(_RAIter, _RAIter, const _Tp&, const _Tp&, replace_switch(_RAIter, _RAIter, const _Tp&, const _Tp&,
random_access_iterator_tag, __gnu_parallel::parallelism); random_access_iterator_tag, __gnu_parallel::_Parallelism);
template<typename _FIter, typename _Predicate, typename _Tp> template<typename _FIter, typename _Predicate, typename _Tp>
...@@ -501,7 +501,7 @@ namespace __parallel ...@@ -501,7 +501,7 @@ namespace __parallel
template<typename _FIter, typename _Predicate, typename _Tp> template<typename _FIter, typename _Predicate, typename _Tp>
void void
replace_if(_FIter, _FIter, _Predicate, const _Tp&, replace_if(_FIter, _FIter, _Predicate, const _Tp&,
__gnu_parallel::parallelism); __gnu_parallel::_Parallelism);
template<typename _FIter, typename _Predicate, typename _Tp, template<typename _FIter, typename _Predicate, typename _Tp,
typename _IterTag> typename _IterTag>
...@@ -512,7 +512,7 @@ namespace __parallel ...@@ -512,7 +512,7 @@ namespace __parallel
void void
replace_if_switch(_RAIter, _RAIter, _Predicate, const _Tp&, replace_if_switch(_RAIter, _RAIter, _Predicate, const _Tp&,
random_access_iterator_tag, random_access_iterator_tag,
__gnu_parallel::parallelism); __gnu_parallel::_Parallelism);
template<typename _FIter> template<typename _FIter>
...@@ -525,7 +525,7 @@ namespace __parallel ...@@ -525,7 +525,7 @@ namespace __parallel
template<typename _FIter> template<typename _FIter>
_FIter _FIter
max_element(_FIter, _FIter, __gnu_parallel::parallelism); max_element(_FIter, _FIter, __gnu_parallel::_Parallelism);
template<typename _FIter, typename _Compare> template<typename _FIter, typename _Compare>
_FIter _FIter
...@@ -537,7 +537,7 @@ namespace __parallel ...@@ -537,7 +537,7 @@ namespace __parallel
template<typename _FIter, typename _Compare> template<typename _FIter, typename _Compare>
_FIter _FIter
max_element(_FIter, _FIter, _Compare, __gnu_parallel::parallelism); max_element(_FIter, _FIter, _Compare, __gnu_parallel::_Parallelism);
template<typename _FIter, typename _Compare, typename _IterTag> template<typename _FIter, typename _Compare, typename _IterTag>
_FIter _FIter
...@@ -546,7 +546,7 @@ namespace __parallel ...@@ -546,7 +546,7 @@ namespace __parallel
template<typename _RAIter, typename _Compare> template<typename _RAIter, typename _Compare>
_RAIter _RAIter
max_element_switch(_RAIter, _RAIter, _Compare, random_access_iterator_tag, max_element_switch(_RAIter, _RAIter, _Compare, random_access_iterator_tag,
__gnu_parallel::parallelism); __gnu_parallel::_Parallelism);
template<typename _IIter1, typename _IIter2, typename _OIter> template<typename _IIter1, typename _IIter2, typename _OIter>
...@@ -594,7 +594,7 @@ namespace __parallel ...@@ -594,7 +594,7 @@ namespace __parallel
template<typename _FIter> template<typename _FIter>
_FIter _FIter
min_element(_FIter, _FIter, __gnu_parallel::parallelism parallelism_tag); min_element(_FIter, _FIter, __gnu_parallel::_Parallelism parallelism_tag);
template<typename _FIter, typename _Compare> template<typename _FIter, typename _Compare>
_FIter _FIter
...@@ -606,7 +606,7 @@ namespace __parallel ...@@ -606,7 +606,7 @@ namespace __parallel
template<typename _FIter, typename _Compare> template<typename _FIter, typename _Compare>
_FIter _FIter
min_element(_FIter, _FIter, _Compare, __gnu_parallel::parallelism); min_element(_FIter, _FIter, _Compare, __gnu_parallel::_Parallelism);
template<typename _FIter, typename _Compare, typename _IterTag> template<typename _FIter, typename _Compare, typename _IterTag>
_FIter _FIter
...@@ -615,7 +615,7 @@ namespace __parallel ...@@ -615,7 +615,7 @@ namespace __parallel
template<typename _RAIter, typename _Compare> template<typename _RAIter, typename _Compare>
_RAIter _RAIter
min_element_switch(_RAIter, _RAIter, _Compare, random_access_iterator_tag, min_element_switch(_RAIter, _RAIter, _Compare, random_access_iterator_tag,
__gnu_parallel::parallelism); __gnu_parallel::_Parallelism);
template<typename _RAIter> template<typename _RAIter>
void void
......
...@@ -252,7 +252,7 @@ template<typename RandomAccessIterator, typename Comparator> ...@@ -252,7 +252,7 @@ template<typename RandomAccessIterator, typename Comparator>
QSBThreadLocal<RandomAccessIterator>& tl = *tls[iam]; QSBThreadLocal<RandomAccessIterator>& tl = *tls[iam];
difference_type base_case_n = Settings::sort_qsb_base_case_maximal_n; difference_type base_case_n = _Settings::get().sort_qsb_base_case_maximal_n;
if (base_case_n < 2) if (base_case_n < 2)
base_case_n = 2; base_case_n = 2;
thread_index_t num_threads = tl.num_threads; thread_index_t num_threads = tl.num_threads;
......
...@@ -38,11 +38,12 @@ ...@@ -38,11 +38,12 @@
#ifndef _GLIBCXX_PARALLEL_BASE_H #ifndef _GLIBCXX_PARALLEL_BASE_H
#define _GLIBCXX_PARALLEL_BASE_H 1 #define _GLIBCXX_PARALLEL_BASE_H 1
#include <parallel/features.h> #include <cstdio>
#include <functional> #include <functional>
#include <omp.h>
#include <parallel/features.h>
#include <parallel/basic_iterator.h> #include <parallel/basic_iterator.h>
#include <parallel/parallel.h> #include <parallel/parallel.h>
#include <cstdio>
// Parallel mode namespaces. // Parallel mode namespaces.
...@@ -67,6 +68,7 @@ namespace __gnu_parallel ...@@ -67,6 +68,7 @@ namespace __gnu_parallel
*/ */
namespace __gnu_sequential namespace __gnu_sequential
{ {
// Import whatever is the serial version.
#ifdef _GLIBCXX_PARALLEL #ifdef _GLIBCXX_PARALLEL
using namespace std::__norm; using namespace std::__norm;
#else #else
...@@ -77,6 +79,22 @@ namespace __gnu_sequential ...@@ -77,6 +79,22 @@ namespace __gnu_sequential
namespace __gnu_parallel namespace __gnu_parallel
{ {
// NB: Including this file cannot produce (unresolved) symbols from
// the OpenMP runtime unless the parallel mode is actually invoked
// and active, which imples that the OpenMP runtime is actually
// going to be linked in.
inline int
get_max_threads()
{
int __i = omp_get_max_threads();
return __i > 1 ? __i : 1;
}
inline bool
is_parallel(const _Parallelism __p) { return __p != sequential; }
// XXX remove std::duplicates from here if possible, // XXX remove std::duplicates from here if possible,
// XXX but keep minimal dependencies. // XXX but keep minimal dependencies.
...@@ -175,11 +193,8 @@ template<typename _Predicate, typename argument_type> ...@@ -175,11 +193,8 @@ template<typename _Predicate, typename argument_type>
/** @brief Similar to std::binder1st, /** @brief Similar to std::binder1st,
* but giving the argument types explicitly. */ * but giving the argument types explicitly. */
template< template<typename _Operation, typename first_argument_type,
typename _Operation, typename second_argument_type, typename result_type>
typename first_argument_type,
typename second_argument_type,
typename result_type>
class binder1st class binder1st
: public std::unary_function<second_argument_type, result_type> : public std::unary_function<second_argument_type, result_type>
{ {
...@@ -207,11 +222,8 @@ template< ...@@ -207,11 +222,8 @@ template<
* @brief Similar to std::binder2nd, but giving the argument types * @brief Similar to std::binder2nd, but giving the argument types
* explicitly. * explicitly.
*/ */
template< template<typename _Operation, typename first_argument_type,
typename _Operation, typename second_argument_type, typename result_type>
typename first_argument_type,
typename second_argument_type,
typename result_type>
class binder2nd class binder2nd
: public std::unary_function<first_argument_type, result_type> : public std::unary_function<first_argument_type, result_type>
{ {
......
...@@ -51,13 +51,11 @@ namespace __gnu_parallel ...@@ -51,13 +51,11 @@ namespace __gnu_parallel
* @returns End of splitter sequence, i. e. @c s+num_threads+1 */ * @returns End of splitter sequence, i. e. @c s+num_threads+1 */
template<typename difference_type, typename OutputIterator> template<typename difference_type, typename OutputIterator>
OutputIterator OutputIterator
equally_split(difference_type n, equally_split(difference_type n, thread_index_t num_threads, OutputIterator s)
thread_index_t num_threads,
OutputIterator s)
{ {
difference_type chunk_length = n / num_threads, difference_type chunk_length = n / num_threads;
num_longer_chunks = n % num_threads, difference_type num_longer_chunks = n % num_threads;
pos = 0; difference_type pos = 0;
for (thread_index_t i = 0; i < num_threads; ++i) for (thread_index_t i = 0; i < num_threads; ++i)
{ {
*s++ = pos; *s++ = pos;
...@@ -75,17 +73,16 @@ template<typename difference_type, typename OutputIterator> ...@@ -75,17 +73,16 @@ template<typename difference_type, typename OutputIterator>
* thread number thread_no+1 (excluded). * thread number thread_no+1 (excluded).
* @param n Number of elements * @param n Number of elements
* @param num_threads Number of parts * @param num_threads Number of parts
* @returns Splitting point */ * @returns _SplittingAlgorithm point */
template<typename difference_type> template<typename difference_type>
difference_type difference_type
equally_split_point(difference_type n, equally_split_point(difference_type n,
thread_index_t num_threads, thread_index_t num_threads,
thread_index_t thread_no) thread_index_t thread_no)
{ {
difference_type chunk_length = n / num_threads, difference_type chunk_length = n / num_threads;
num_longer_chunks = n % num_threads; difference_type num_longer_chunks = n % num_threads;
if (thread_no < num_longer_chunks)
if(thread_no < num_longer_chunks)
return thread_no * (chunk_length + 1); return thread_no * (chunk_length + 1);
else else
return num_longer_chunks * (chunk_length + 1) return num_longer_chunks * (chunk_length + 1)
......
...@@ -43,21 +43,21 @@ ...@@ -43,21 +43,21 @@
#ifndef _GLIBCXX_MERGESORT #ifndef _GLIBCXX_MERGESORT
/** @def _GLIBCXX_MERGESORT /** @def _GLIBCXX_MERGESORT
* @brief Include parallel multi-way mergesort. * @brief Include parallel multi-way mergesort.
* @see __gnu_parallel::Settings::sort_algorithm */ * @see __gnu_parallel::_Settings::sort_algorithm */
#define _GLIBCXX_MERGESORT 1 #define _GLIBCXX_MERGESORT 1
#endif #endif
#ifndef _GLIBCXX_QUICKSORT #ifndef _GLIBCXX_QUICKSORT
/** @def _GLIBCXX_QUICKSORT /** @def _GLIBCXX_QUICKSORT
* @brief Include parallel unbalanced quicksort. * @brief Include parallel unbalanced quicksort.
* @see __gnu_parallel::Settings::sort_algorithm */ * @see __gnu_parallel::_Settings::sort_algorithm */
#define _GLIBCXX_QUICKSORT 1 #define _GLIBCXX_QUICKSORT 1
#endif #endif
#ifndef _GLIBCXX_BAL_QUICKSORT #ifndef _GLIBCXX_BAL_QUICKSORT
/** @def _GLIBCXX_BAL_QUICKSORT /** @def _GLIBCXX_BAL_QUICKSORT
* @brief Include parallel dynamically load-balanced quicksort. * @brief Include parallel dynamically load-balanced quicksort.
* @see __gnu_parallel::Settings::sort_algorithm */ * @see __gnu_parallel::_Settings::sort_algorithm */
#define _GLIBCXX_BAL_QUICKSORT 1 #define _GLIBCXX_BAL_QUICKSORT 1
#endif #endif
...@@ -65,7 +65,7 @@ ...@@ -65,7 +65,7 @@
/** @def _GLIBCXX_LOSER_TREE /** @def _GLIBCXX_LOSER_TREE
* @brief Include guarded (sequences may run empty) loser tree, * @brief Include guarded (sequences may run empty) loser tree,
* moving objects. * moving objects.
* @see __gnu_parallel::Settings multiway_merge_algorithm */ * @see __gnu_parallel::_Settings multiway_merge_algorithm */
#define _GLIBCXX_LOSER_TREE 1 #define _GLIBCXX_LOSER_TREE 1
#endif #endif
...@@ -73,21 +73,21 @@ ...@@ -73,21 +73,21 @@
/** @def _GLIBCXX_LOSER_TREE_EXPLICIT /** @def _GLIBCXX_LOSER_TREE_EXPLICIT
* @brief Include standard loser tree, storing two flags for infimum * @brief Include standard loser tree, storing two flags for infimum
* and supremum. * and supremum.
* @see __gnu_parallel::Settings multiway_merge_algorithm */ * @see __gnu_parallel::_Settings multiway_merge_algorithm */
#define _GLIBCXX_LOSER_TREE_EXPLICIT 0 #define _GLIBCXX_LOSER_TREE_EXPLICIT 0
#endif #endif
#ifndef _GLIBCXX_LOSER_TREE_REFERENCE #ifndef _GLIBCXX_LOSER_TREE_REFERENCE
/** @def _GLIBCXX_LOSER_TREE_REFERENCE /** @def _GLIBCXX_LOSER_TREE_REFERENCE
* @brief Include some loser tree variant. * @brief Include some loser tree variant.
* @see __gnu_parallel::Settings multiway_merge_algorithm */ * @see __gnu_parallel::_Settings multiway_merge_algorithm */
#define _GLIBCXX_LOSER_TREE_REFERENCE 0 #define _GLIBCXX_LOSER_TREE_REFERENCE 0
#endif #endif
#ifndef _GLIBCXX_LOSER_TREE_POINTER #ifndef _GLIBCXX_LOSER_TREE_POINTER
/** @def _GLIBCXX_LOSER_TREE_POINTER /** @def _GLIBCXX_LOSER_TREE_POINTER
* @brief Include some loser tree variant. * @brief Include some loser tree variant.
* @see __gnu_parallel::Settings multiway_merge_algorithm */ * @see __gnu_parallel::_Settings multiway_merge_algorithm */
#define _GLIBCXX_LOSER_TREE_POINTER 1 #define _GLIBCXX_LOSER_TREE_POINTER 1
#endif #endif
...@@ -95,48 +95,48 @@ ...@@ -95,48 +95,48 @@
/** @def _GLIBCXX_LOSER_TREE_UNGUARDED /** @def _GLIBCXX_LOSER_TREE_UNGUARDED
* @brief Include unguarded (sequences must not run empty) loser * @brief Include unguarded (sequences must not run empty) loser
* tree, moving objects. * tree, moving objects.
* @see __gnu_parallel::Settings multiway_merge_algorithm */ * @see __gnu_parallel::_Settings multiway_merge_algorithm */
#define _GLIBCXX_LOSER_TREE_UNGUARDED 0 #define _GLIBCXX_LOSER_TREE_UNGUARDED 0
#endif #endif
#ifndef _GLIBCXX_LOSER_TREE_POINTER_UNGUARDED #ifndef _GLIBCXX_LOSER_TREE_POINTER_UNGUARDED
/** @def _GLIBCXX_LOSER_TREE_POINTER_UNGUARDED /** @def _GLIBCXX_LOSER_TREE_POINTER_UNGUARDED
* @brief Include some loser tree variant. * @brief Include some loser tree variant.
* @see __gnu_parallel::Settings multiway_merge_algorithm */ * @see __gnu_parallel::_Settings multiway_merge_algorithm */
#define _GLIBCXX_LOSER_TREE_POINTER_UNGUARDED 1 #define _GLIBCXX_LOSER_TREE_POINTER_UNGUARDED 1
#endif #endif
#ifndef _GLIBCXX_LOSER_TREE_COMBINED #ifndef _GLIBCXX_LOSER_TREE_COMBINED
/** @def _GLIBCXX_LOSER_TREE_COMBINED /** @def _GLIBCXX_LOSER_TREE_COMBINED
* @brief Include some loser tree variant. * @brief Include some loser tree variant.
* @see __gnu_parallel::Settings multiway_merge_algorithm */ * @see __gnu_parallel::_Settings multiway_merge_algorithm */
#define _GLIBCXX_LOSER_TREE_COMBINED 0 #define _GLIBCXX_LOSER_TREE_COMBINED 0
#endif #endif
#ifndef _GLIBCXX_LOSER_TREE_SENTINEL #ifndef _GLIBCXX_LOSER_TREE_SENTINEL
/** @def _GLIBCXX_LOSER_TREE_SENTINEL /** @def _GLIBCXX_LOSER_TREE_SENTINEL
* @brief Include some loser tree variant. * @brief Include some loser tree variant.
* @see __gnu_parallel::Settings multiway_merge_algorithm */ * @see __gnu_parallel::_Settings multiway_merge_algorithm */
#define _GLIBCXX_LOSER_TREE_SENTINEL 0 #define _GLIBCXX_LOSER_TREE_SENTINEL 0
#endif #endif
#ifndef _GLIBCXX_FIND_GROWING_BLOCKS #ifndef _GLIBCXX_FIND_GROWING_BLOCKS
/** @brief Include the growing blocks variant for std::find. /** @brief Include the growing blocks variant for std::find.
* @see __gnu_parallel::Settings::find_distribution */ * @see __gnu_parallel::_Settings::find_algorithm */
#define _GLIBCXX_FIND_GROWING_BLOCKS 1 #define _GLIBCXX_FIND_GROWING_BLOCKS 1
#endif #endif
#ifndef _GLIBCXX_FIND_CONSTANT_SIZE_BLOCKS #ifndef _GLIBCXX_FIND_CONSTANT_SIZE_BLOCKS
/** @brief Include the equal-sized blocks variant for std::find. /** @brief Include the equal-sized blocks variant for std::find.
* @see __gnu_parallel::Settings::find_distribution */ * @see __gnu_parallel::_Settings::find_algorithm */
#define _GLIBCXX_FIND_CONSTANT_SIZE_BLOCKS 1 #define _GLIBCXX_FIND_CONSTANT_SIZE_BLOCKS 1
#endif #endif
#ifndef _GLIBCXX_FIND_EQUAL_SPLIT #ifndef _GLIBCXX_FIND_EQUAL_SPLIT
/** @def _GLIBCXX_FIND_EQUAL_SPLIT /** @def _GLIBCXX_FIND_EQUAL_SPLIT
* @brief Include the equal splitting variant for std::find. * @brief Include the equal splitting variant for std::find.
* @see __gnu_parallel::Settings::find_distribution */ * @see __gnu_parallel::_Settings::find_algorithm */
#define _GLIBCXX_FIND_EQUAL_SPLIT 1 #define _GLIBCXX_FIND_EQUAL_SPLIT 1
#endif #endif
......
...@@ -66,15 +66,15 @@ template<typename RandomAccessIterator1, ...@@ -66,15 +66,15 @@ template<typename RandomAccessIterator1,
find_template(RandomAccessIterator1 begin1, RandomAccessIterator1 end1, find_template(RandomAccessIterator1 begin1, RandomAccessIterator1 end1,
RandomAccessIterator2 begin2, Pred pred, Selector selector) RandomAccessIterator2 begin2, Pred pred, Selector selector)
{ {
switch (Settings::find_distribution) switch (_Settings::get().find_algorithm)
{ {
case Settings::GROWING_BLOCKS: case GROWING_BLOCKS:
return find_template(begin1, end1, begin2, pred, selector, return find_template(begin1, end1, begin2, pred, selector,
growing_blocks_tag()); growing_blocks_tag());
case Settings::CONSTANT_SIZE_BLOCKS: case CONSTANT_SIZE_BLOCKS:
return find_template(begin1, end1, begin2, pred, selector, return find_template(begin1, end1, begin2, pred, selector,
constant_size_blocks_tag()); constant_size_blocks_tag());
case Settings::EQUAL_SPLIT: case EQUAL_SPLIT:
return find_template(begin1, end1, begin2, pred, selector, return find_template(begin1, end1, begin2, pred, selector,
equal_split_tag()); equal_split_tag());
default: default:
...@@ -176,10 +176,10 @@ template<typename RandomAccessIterator1, ...@@ -176,10 +176,10 @@ template<typename RandomAccessIterator1,
* @param pred Find predicate. * @param pred Find predicate.
* @param selector Functionality (e. g. std::find_if (), std::equal(),...) * @param selector Functionality (e. g. std::find_if (), std::equal(),...)
* @return Place of finding in both sequences. * @return Place of finding in both sequences.
* @see __gnu_parallel::Settings::find_sequential_search_size * @see __gnu_parallel::_Settings::find_sequential_search_size
* @see __gnu_parallel::Settings::find_initial_block_size * @see __gnu_parallel::_Settings::find_initial_block_size
* @see __gnu_parallel::Settings::find_maximum_block_size * @see __gnu_parallel::_Settings::find_maximum_block_size
* @see __gnu_parallel::Settings::find_increasing_factor * @see __gnu_parallel::_Settings::find_increasing_factor
* *
* There are two main differences between the growing blocks and * There are two main differences between the growing blocks and
* the constant-size blocks variants. * the constant-size blocks variants.
...@@ -204,10 +204,12 @@ template<typename RandomAccessIterator1, ...@@ -204,10 +204,12 @@ template<typename RandomAccessIterator1,
typedef typename traits_type::difference_type difference_type; typedef typename traits_type::difference_type difference_type;
typedef typename traits_type::value_type value_type; typedef typename traits_type::value_type value_type;
const _Settings& __s = _Settings::get();
difference_type length = end1 - begin1; difference_type length = end1 - begin1;
difference_type sequential_search_size = difference_type sequential_search_size =
std::min<difference_type>(length, Settings::find_sequential_search_size); std::min<difference_type>(length, __s.find_sequential_search_size);
// Try it sequentially first. // Try it sequentially first.
std::pair<RandomAccessIterator1, RandomAccessIterator2> find_seq_result = std::pair<RandomAccessIterator1, RandomAccessIterator2> find_seq_result =
...@@ -233,7 +235,7 @@ template<typename RandomAccessIterator1, ...@@ -233,7 +235,7 @@ template<typename RandomAccessIterator1,
// Not within first k elements -> start parallel. // Not within first k elements -> start parallel.
thread_index_t iam = omp_get_thread_num(); thread_index_t iam = omp_get_thread_num();
difference_type block_size = Settings::find_initial_block_size; difference_type block_size = __s.find_initial_block_size;
difference_type start = difference_type start =
fetch_and_add<difference_type>(&next_block_start, block_size); fetch_and_add<difference_type>(&next_block_start, block_size);
...@@ -269,9 +271,8 @@ template<typename RandomAccessIterator1, ...@@ -269,9 +271,8 @@ template<typename RandomAccessIterator1,
} }
block_size = block_size =
std::min<difference_type>(block_size std::min<difference_type>(block_size * __s.find_increasing_factor,
* Settings::find_increasing_factor, __s.find_maximum_block_size);
Settings::find_maximum_block_size);
// Get new block, update pointer to next block. // Get new block, update pointer to next block.
start = start =
...@@ -302,8 +303,8 @@ template<typename RandomAccessIterator1, ...@@ -302,8 +303,8 @@ template<typename RandomAccessIterator1,
* @param pred Find predicate. * @param pred Find predicate.
* @param selector Functionality (e. g. std::find_if (), std::equal(),...) * @param selector Functionality (e. g. std::find_if (), std::equal(),...)
* @return Place of finding in both sequences. * @return Place of finding in both sequences.
* @see __gnu_parallel::Settings::find_sequential_search_size * @see __gnu_parallel::_Settings::find_sequential_search_size
* @see __gnu_parallel::Settings::find_block_size * @see __gnu_parallel::_Settings::find_block_size
* There are two main differences between the growing blocks and the * There are two main differences between the growing blocks and the
* constant-size blocks variants. * constant-size blocks variants.
* 1. For GB, the block size grows; for CSB, the block size is fixed. * 1. For GB, the block size grows; for CSB, the block size is fixed.
...@@ -325,10 +326,12 @@ template<typename RandomAccessIterator1, ...@@ -325,10 +326,12 @@ template<typename RandomAccessIterator1,
typedef typename traits_type::difference_type difference_type; typedef typename traits_type::difference_type difference_type;
typedef typename traits_type::value_type value_type; typedef typename traits_type::value_type value_type;
const _Settings& __s = _Settings::get();
difference_type length = end1 - begin1; difference_type length = end1 - begin1;
difference_type sequential_search_size = std::min<difference_type>( difference_type sequential_search_size = std::min<difference_type>(
length, Settings::find_sequential_search_size); length, __s.find_sequential_search_size);
// Try it sequentially first. // Try it sequentially first.
std::pair<RandomAccessIterator1, RandomAccessIterator2> find_seq_result = std::pair<RandomAccessIterator1, RandomAccessIterator2> find_seq_result =
...@@ -351,7 +354,7 @@ template<typename RandomAccessIterator1, ...@@ -351,7 +354,7 @@ template<typename RandomAccessIterator1,
num_threads = omp_get_num_threads(); num_threads = omp_get_num_threads();
thread_index_t iam = omp_get_thread_num(); thread_index_t iam = omp_get_thread_num();
difference_type block_size = Settings::find_initial_block_size; difference_type block_size = __s.find_initial_block_size;
// First element of thread's current iteration. // First element of thread's current iteration.
difference_type iteration_start = sequential_search_size; difference_type iteration_start = sequential_search_size;
......
...@@ -71,7 +71,7 @@ namespace __gnu_parallel ...@@ -71,7 +71,7 @@ namespace __gnu_parallel
Result& output, typename Result& output, typename
std::iterator_traits<InputIterator>:: std::iterator_traits<InputIterator>::
difference_type bound, difference_type bound,
parallelism parallelism_tag) _Parallelism parallelism_tag)
{ {
if (parallelism_tag == parallel_unbalanced) if (parallelism_tag == parallel_unbalanced)
return for_each_template_random_access_ed(begin, end, user_op, return for_each_template_random_access_ed(begin, end, user_op,
......
...@@ -1359,11 +1359,10 @@ template<typename RandomAccessIteratorIterator, ...@@ -1359,11 +1359,10 @@ template<typename RandomAccessIteratorIterator,
RandomAccessIterator3 return_target = target; RandomAccessIterator3 return_target = target;
int k = static_cast<int>(seqs_end - seqs_begin); int k = static_cast<int>(seqs_end - seqs_begin);
Settings::MultiwayMergeAlgorithm mwma = _MultiwayMergeAlgorithm mwma = _Settings::get().multiway_merge_algorithm;
Settings::multiway_merge_algorithm;
if (!sentinel && mwma == Settings::LOSER_TREE_SENTINEL) if (!sentinel && mwma == LOSER_TREE_SENTINEL)
mwma = Settings::LOSER_TREE_COMBINED; mwma = LOSER_TREE_COMBINED;
switch (k) switch (k)
{ {
...@@ -1385,14 +1384,14 @@ template<typename RandomAccessIteratorIterator, ...@@ -1385,14 +1384,14 @@ template<typename RandomAccessIteratorIterator,
case 3: case 3:
switch (mwma) switch (mwma)
{ {
case Settings::LOSER_TREE_COMBINED: case LOSER_TREE_COMBINED:
return_target = multiway_merge_3_combined(seqs_begin, return_target = multiway_merge_3_combined(seqs_begin,
seqs_end, seqs_end,
target, target,
comp, length, comp, length,
stable); stable);
break; break;
case Settings::LOSER_TREE_SENTINEL: case LOSER_TREE_SENTINEL:
return_target = return_target =
multiway_merge_3_variant<unguarded_iterator>(seqs_begin, multiway_merge_3_variant<unguarded_iterator>(seqs_begin,
seqs_end, seqs_end,
...@@ -1413,13 +1412,13 @@ template<typename RandomAccessIteratorIterator, ...@@ -1413,13 +1412,13 @@ template<typename RandomAccessIteratorIterator,
case 4: case 4:
switch (mwma) switch (mwma)
{ {
case Settings::LOSER_TREE_COMBINED: case LOSER_TREE_COMBINED:
return_target = multiway_merge_4_combined(seqs_begin, return_target = multiway_merge_4_combined(seqs_begin,
seqs_end, seqs_end,
target, target,
comp, length, stable); comp, length, stable);
break; break;
case Settings::LOSER_TREE_SENTINEL: case LOSER_TREE_SENTINEL:
return_target = return_target =
multiway_merge_4_variant<unguarded_iterator>(seqs_begin, multiway_merge_4_variant<unguarded_iterator>(seqs_begin,
seqs_end, seqs_end,
...@@ -1440,14 +1439,14 @@ template<typename RandomAccessIteratorIterator, ...@@ -1440,14 +1439,14 @@ template<typename RandomAccessIteratorIterator,
{ {
switch (mwma) switch (mwma)
{ {
case Settings::BUBBLE: case BUBBLE:
return_target = multiway_merge_bubble(seqs_begin, return_target = multiway_merge_bubble(seqs_begin,
seqs_end, seqs_end,
target, target,
comp, length, stable); comp, length, stable);
break; break;
#if _GLIBCXX_LOSER_TREE_EXPLICIT #if _GLIBCXX_LOSER_TREE_EXPLICIT
case Settings::LOSER_TREE_EXPLICIT: case LOSER_TREE_EXPLICIT:
return_target = multiway_merge_loser_tree< return_target = multiway_merge_loser_tree<
LoserTreeExplicit<value_type, Comparator> >(seqs_begin, LoserTreeExplicit<value_type, Comparator> >(seqs_begin,
seqs_end, seqs_end,
...@@ -1457,7 +1456,7 @@ template<typename RandomAccessIteratorIterator, ...@@ -1457,7 +1456,7 @@ template<typename RandomAccessIteratorIterator,
break; break;
#endif #endif
#if _GLIBCXX_LOSER_TREE #if _GLIBCXX_LOSER_TREE
case Settings::LOSER_TREE: case LOSER_TREE:
return_target = multiway_merge_loser_tree< return_target = multiway_merge_loser_tree<
LoserTree<value_type, Comparator> >(seqs_begin, LoserTree<value_type, Comparator> >(seqs_begin,
seqs_end, seqs_end,
...@@ -1467,7 +1466,7 @@ template<typename RandomAccessIteratorIterator, ...@@ -1467,7 +1466,7 @@ template<typename RandomAccessIteratorIterator,
break; break;
#endif #endif
#if _GLIBCXX_LOSER_TREE_COMBINED #if _GLIBCXX_LOSER_TREE_COMBINED
case Settings::LOSER_TREE_COMBINED: case LOSER_TREE_COMBINED:
return_target = multiway_merge_loser_tree_combined(seqs_begin, return_target = multiway_merge_loser_tree_combined(seqs_begin,
seqs_end, seqs_end,
target, target,
...@@ -1476,7 +1475,7 @@ template<typename RandomAccessIteratorIterator, ...@@ -1476,7 +1475,7 @@ template<typename RandomAccessIteratorIterator,
break; break;
#endif #endif
#if _GLIBCXX_LOSER_TREE_SENTINEL #if _GLIBCXX_LOSER_TREE_SENTINEL
case Settings::LOSER_TREE_SENTINEL: case LOSER_TREE_SENTINEL:
return_target = multiway_merge_loser_tree_sentinel(seqs_begin, return_target = multiway_merge_loser_tree_sentinel(seqs_begin,
seqs_end, seqs_end,
target, target,
...@@ -1550,6 +1549,7 @@ template<typename RandomAccessIteratorIterator, ...@@ -1550,6 +1549,7 @@ template<typename RandomAccessIteratorIterator,
thread_index_t num_threads = static_cast<thread_index_t>( thread_index_t num_threads = static_cast<thread_index_t>(
std::min<difference_type>(get_max_threads(), total_length)); std::min<difference_type>(get_max_threads(), total_length));
const _Settings& __s = _Settings::get();
# pragma omp parallel num_threads (num_threads) # pragma omp parallel num_threads (num_threads)
{ {
...@@ -1562,10 +1562,10 @@ template<typename RandomAccessIteratorIterator, ...@@ -1562,10 +1562,10 @@ template<typename RandomAccessIteratorIterator,
for (int s = 0; s < num_threads; ++s) for (int s = 0; s < num_threads; ++s)
pieces[s].resize(k); pieces[s].resize(k);
difference_type num_samples = difference_type num_samples = __s.merge_oversampling
Settings::merge_oversampling * num_threads; * num_threads;
if (Settings::multiway_merge_splitting == Settings::SAMPLING) if (__s.multiway_merge_splitting == SAMPLING)
{ {
value_type* samples = static_cast<value_type*>( value_type* samples = static_cast<value_type*>(
::operator new(sizeof(value_type) * k * num_samples)); ::operator new(sizeof(value_type) * k * num_samples));
...@@ -1623,7 +1623,7 @@ template<typename RandomAccessIteratorIterator, ...@@ -1623,7 +1623,7 @@ template<typename RandomAccessIteratorIterator,
} }
else else
{ {
// (Settings::multiway_merge_splitting == Settings::EXACT). // (_Settings::multiway_merge_splitting == _Settings::EXACT).
std::vector<RandomAccessIterator1>* offsets = std::vector<RandomAccessIterator1>* offsets =
new std::vector<RandomAccessIterator1>[num_threads]; new std::vector<RandomAccessIterator1>[num_threads];
std::vector< std::vector<
...@@ -1768,10 +1768,12 @@ template<typename RandomAccessIteratorPairIterator, ...@@ -1768,10 +1768,12 @@ template<typename RandomAccessIteratorPairIterator,
if (seqs_begin == seqs_end) if (seqs_begin == seqs_end)
return target; return target;
const _Settings& __s = _Settings::get();
RandomAccessIterator3 target_end; RandomAccessIterator3 target_end;
if (_GLIBCXX_PARALLEL_CONDITION( if (_GLIBCXX_PARALLEL_CONDITION(
((seqs_end - seqs_begin) >= Settings::multiway_merge_minimal_k) ((seqs_end - seqs_begin) >= __s.multiway_merge_minimal_k)
&& ((sequence_index_t)length >= Settings::multiway_merge_minimal_n))) && ((sequence_index_t)length >= __s.multiway_merge_minimal_n)))
target_end = parallel_multiway_merge(seqs_begin, seqs_end, target_end = parallel_multiway_merge(seqs_begin, seqs_end,
target, comp, target, comp,
static_cast<difference_type>(length), static_cast<difference_type>(length),
...@@ -1813,15 +1815,14 @@ template<typename RandomAccessIteratorPairIterator, ...@@ -1813,15 +1815,14 @@ template<typename RandomAccessIteratorPairIterator,
_GLIBCXX_CALL(seqs_end - seqs_begin) _GLIBCXX_CALL(seqs_end - seqs_begin)
if (_GLIBCXX_PARALLEL_CONDITION( const _Settings& __s = _Settings::get();
((seqs_end - seqs_begin) >= Settings::multiway_merge_minimal_k) const bool cond1 = seqs_end - seqs_begin >= __s.multiway_merge_minimal_k;
&& ((sequence_index_t)length >= Settings::multiway_merge_minimal_n))) const bool cond2 = sequence_index_t(length) >= __s.multiway_merge_minimal_n;
return parallel_multiway_merge( if (_GLIBCXX_PARALLEL_CONDITION(cond1 && cond2))
seqs_begin, seqs_end, return parallel_multiway_merge(seqs_begin, seqs_end, target, comp,
target, comp, static_cast<difference_type>(length), stable, true); length, stable, true);
else else
return multiway_merge(seqs_begin, seqs_end, return multiway_merge(seqs_begin, seqs_end, target, comp, length, stable,
target, comp, length, stable,
true, sequential_tag()); true, sequential_tag());
} }
} }
......
...@@ -130,8 +130,7 @@ template<typename RandomAccessIterator, typename _DifferenceTp> ...@@ -130,8 +130,7 @@ template<typename RandomAccessIterator, typename _DifferenceTp>
thread_index_t iam = omp_get_thread_num(); thread_index_t iam = omp_get_thread_num();
num_samples = num_samples = _Settings::get().sort_mwms_oversampling * sd->num_threads - 1;
Settings::sort_mwms_oversampling * sd->num_threads - 1;
difference_type* es = new difference_type[num_samples + 2]; difference_type* es = new difference_type[num_samples + 2];
...@@ -194,8 +193,8 @@ template<typename RandomAccessIterator, typename Comparator> ...@@ -194,8 +193,8 @@ template<typename RandomAccessIterator, typename Comparator>
// Invariant: locally sorted subsequence in sd->sorting_places[iam], // Invariant: locally sorted subsequence in sd->sorting_places[iam],
// sd->sorting_places[iam] + length_local. // sd->sorting_places[iam] + length_local.
const _Settings& __s = _Settings::get();
if (Settings::sort_splitting == Settings::SAMPLING) if (__s.sort_splitting == SAMPLING)
{ {
difference_type num_samples; difference_type num_samples;
determine_samples(sd, num_samples); determine_samples(sd, num_samples);
...@@ -237,7 +236,7 @@ template<typename RandomAccessIterator, typename Comparator> ...@@ -237,7 +236,7 @@ template<typename RandomAccessIterator, typename Comparator>
sd->pieces[iam][s].end = sd->starts[s + 1] - sd->starts[s]; sd->pieces[iam][s].end = sd->starts[s + 1] - sd->starts[s];
} }
} }
else if (Settings::sort_splitting == Settings::EXACT) else if (__s.sort_splitting == EXACT)
{ {
# pragma omp barrier # pragma omp barrier
...@@ -355,6 +354,7 @@ template<typename RandomAccessIterator, typename Comparator> ...@@ -355,6 +354,7 @@ template<typename RandomAccessIterator, typename Comparator>
// shared variables // shared variables
PMWMSSortingData<RandomAccessIterator> sd; PMWMSSortingData<RandomAccessIterator> sd;
difference_type* starts; difference_type* starts;
const _Settings& __s = _Settings::get();
# pragma omp parallel num_threads(num_threads) # pragma omp parallel num_threads(num_threads)
{ {
...@@ -374,10 +374,10 @@ template<typename RandomAccessIterator, typename Comparator> ...@@ -374,10 +374,10 @@ template<typename RandomAccessIterator, typename Comparator>
sd.merging_places = new RandomAccessIterator[num_threads]; sd.merging_places = new RandomAccessIterator[num_threads];
#endif #endif
if (Settings::sort_splitting == Settings::SAMPLING) if (__s.sort_splitting == SAMPLING)
{ {
unsigned int size = unsigned int size =
(Settings::sort_mwms_oversampling * num_threads - 1) (__s.sort_mwms_oversampling * num_threads - 1)
* num_threads; * num_threads;
sd.samples = static_cast<value_type*>( sd.samples = static_cast<value_type*>(
::operator new(size * sizeof(value_type))); ::operator new(size * sizeof(value_type)));
...@@ -412,7 +412,7 @@ template<typename RandomAccessIterator, typename Comparator> ...@@ -412,7 +412,7 @@ template<typename RandomAccessIterator, typename Comparator>
delete[] sd.sorting_places; delete[] sd.sorting_places;
delete[] sd.merging_places; delete[] sd.merging_places;
if (Settings::sort_splitting == Settings::SAMPLING) if (__s.sort_splitting == SAMPLING)
::operator delete(sd.samples); ::operator delete(sd.samples);
delete[] sd.offsets; delete[] sd.offsets;
......
...@@ -91,12 +91,12 @@ namespace __parallel ...@@ -91,12 +91,12 @@ namespace __parallel
accumulate_switch(_RandomAccessIterator begin, _RandomAccessIterator end, accumulate_switch(_RandomAccessIterator begin, _RandomAccessIterator end,
T init, BinaryOperation binary_op, T init, BinaryOperation binary_op,
random_access_iterator_tag, random_access_iterator_tag,
__gnu_parallel::parallelism parallelism_tag __gnu_parallel::_Parallelism parallelism_tag
= __gnu_parallel::parallel_unbalanced) = __gnu_parallel::parallel_unbalanced)
{ {
if (_GLIBCXX_PARALLEL_CONDITION( if (_GLIBCXX_PARALLEL_CONDITION(
static_cast<__gnu_parallel::sequence_index_t>(end - begin) static_cast<__gnu_parallel::sequence_index_t>(end - begin)
>= __gnu_parallel::Settings::accumulate_minimal_n >= __gnu_parallel::_Settings::get().accumulate_minimal_n
&& __gnu_parallel::is_parallel(parallelism_tag))) && __gnu_parallel::is_parallel(parallelism_tag)))
{ {
T res = init; T res = init;
...@@ -121,7 +121,7 @@ namespace __parallel ...@@ -121,7 +121,7 @@ namespace __parallel
template<typename InputIterator, typename T> template<typename InputIterator, typename T>
inline T inline T
accumulate(InputIterator begin, InputIterator end, T init, accumulate(InputIterator begin, InputIterator end, T init,
__gnu_parallel::parallelism parallelism_tag) __gnu_parallel::_Parallelism parallelism_tag)
{ {
typedef std::iterator_traits<InputIterator> iterator_traits; typedef std::iterator_traits<InputIterator> iterator_traits;
typedef typename iterator_traits::value_type value_type; typedef typename iterator_traits::value_type value_type;
...@@ -149,7 +149,7 @@ namespace __parallel ...@@ -149,7 +149,7 @@ namespace __parallel
inline T inline T
accumulate(InputIterator begin, InputIterator end, T init, accumulate(InputIterator begin, InputIterator end, T init,
BinaryOperation binary_op, BinaryOperation binary_op,
__gnu_parallel::parallelism parallelism_tag) __gnu_parallel::_Parallelism parallelism_tag)
{ {
typedef iterator_traits<InputIterator> iterator_traits; typedef iterator_traits<InputIterator> iterator_traits;
typedef typename iterator_traits::iterator_category iterator_category; typedef typename iterator_traits::iterator_category iterator_category;
...@@ -197,11 +197,11 @@ namespace __parallel ...@@ -197,11 +197,11 @@ namespace __parallel
BinaryFunction2 binary_op2, BinaryFunction2 binary_op2,
random_access_iterator_tag, random_access_iterator_tag,
random_access_iterator_tag, random_access_iterator_tag,
__gnu_parallel::parallelism parallelism_tag __gnu_parallel::_Parallelism parallelism_tag
= __gnu_parallel::parallel_unbalanced) = __gnu_parallel::parallel_unbalanced)
{ {
if (_GLIBCXX_PARALLEL_CONDITION((last1 - first1) if (_GLIBCXX_PARALLEL_CONDITION((last1 - first1)
>= __gnu_parallel::Settings:: >= __gnu_parallel::_Settings::get().
accumulate_minimal_n accumulate_minimal_n
&& __gnu_parallel:: && __gnu_parallel::
is_parallel(parallelism_tag))) is_parallel(parallelism_tag)))
...@@ -241,7 +241,7 @@ namespace __parallel ...@@ -241,7 +241,7 @@ namespace __parallel
inner_product(InputIterator1 first1, InputIterator1 last1, inner_product(InputIterator1 first1, InputIterator1 last1,
InputIterator2 first2, T init, BinaryFunction1 binary_op1, InputIterator2 first2, T init, BinaryFunction1 binary_op1,
BinaryFunction2 binary_op2, BinaryFunction2 binary_op2,
__gnu_parallel::parallelism parallelism_tag) __gnu_parallel::_Parallelism parallelism_tag)
{ {
typedef iterator_traits<InputIterator1> traits1_type; typedef iterator_traits<InputIterator1> traits1_type;
typedef typename traits1_type::iterator_category iterator1_category; typedef typename traits1_type::iterator_category iterator1_category;
...@@ -276,7 +276,7 @@ namespace __parallel ...@@ -276,7 +276,7 @@ namespace __parallel
inline T inline T
inner_product(InputIterator1 first1, InputIterator1 last1, inner_product(InputIterator1 first1, InputIterator1 last1,
InputIterator2 first2, T init, InputIterator2 first2, T init,
__gnu_parallel::parallelism parallelism_tag) __gnu_parallel::_Parallelism parallelism_tag)
{ {
typedef iterator_traits<InputIterator1> traits_type1; typedef iterator_traits<InputIterator1> traits_type1;
typedef typename traits_type1::value_type value_type1; typedef typename traits_type1::value_type value_type1;
...@@ -347,7 +347,7 @@ namespace __parallel ...@@ -347,7 +347,7 @@ namespace __parallel
{ {
if (_GLIBCXX_PARALLEL_CONDITION( if (_GLIBCXX_PARALLEL_CONDITION(
static_cast<__gnu_parallel::sequence_index_t>(end - begin) static_cast<__gnu_parallel::sequence_index_t>(end - begin)
>= __gnu_parallel::Settings::partial_sum_minimal_n)) >= __gnu_parallel::_Settings::get().partial_sum_minimal_n))
return __gnu_parallel::parallel_partial_sum(begin, end, return __gnu_parallel::parallel_partial_sum(begin, end,
result, bin_op); result, bin_op);
else else
...@@ -416,12 +416,12 @@ namespace __parallel ...@@ -416,12 +416,12 @@ namespace __parallel
OutputIterator result, BinaryOperation bin_op, OutputIterator result, BinaryOperation bin_op,
random_access_iterator_tag, random_access_iterator_tag,
random_access_iterator_tag, random_access_iterator_tag,
__gnu_parallel::parallelism parallelism_tag __gnu_parallel::_Parallelism parallelism_tag
= __gnu_parallel::parallel_balanced) = __gnu_parallel::parallel_balanced)
{ {
if (_GLIBCXX_PARALLEL_CONDITION( if (_GLIBCXX_PARALLEL_CONDITION(
static_cast<__gnu_parallel::sequence_index_t>(end - begin) static_cast<__gnu_parallel::sequence_index_t>(end - begin)
>= __gnu_parallel::Settings::adjacent_difference_minimal_n >= __gnu_parallel::_Settings::get().adjacent_difference_minimal_n
&& __gnu_parallel::is_parallel(parallelism_tag))) && __gnu_parallel::is_parallel(parallelism_tag)))
{ {
bool dummy = true; bool dummy = true;
...@@ -448,7 +448,7 @@ namespace __parallel ...@@ -448,7 +448,7 @@ namespace __parallel
inline OutputIterator inline OutputIterator
adjacent_difference(InputIterator begin, InputIterator end, adjacent_difference(InputIterator begin, InputIterator end,
OutputIterator result, OutputIterator result,
__gnu_parallel::parallelism parallelism_tag) __gnu_parallel::_Parallelism parallelism_tag)
{ {
typedef iterator_traits<InputIterator> traits_type; typedef iterator_traits<InputIterator> traits_type;
typedef typename traits_type::value_type value_type; typedef typename traits_type::value_type value_type;
...@@ -471,7 +471,7 @@ namespace __parallel ...@@ -471,7 +471,7 @@ namespace __parallel
inline OutputIterator inline OutputIterator
adjacent_difference(InputIterator begin, InputIterator end, adjacent_difference(InputIterator begin, InputIterator end,
OutputIterator result, BinaryOperation binary_op, OutputIterator result, BinaryOperation binary_op,
__gnu_parallel::parallelism parallelism_tag) __gnu_parallel::_Parallelism parallelism_tag)
{ {
typedef iterator_traits<InputIterator> traitsi_type; typedef iterator_traits<InputIterator> traitsi_type;
typedef typename traitsi_type::iterator_category iteratori_category; typedef typename traitsi_type::iterator_category iteratori_category;
......
...@@ -54,7 +54,7 @@ namespace __parallel ...@@ -54,7 +54,7 @@ namespace __parallel
template<typename _IIter, typename _Tp> template<typename _IIter, typename _Tp>
_Tp _Tp
accumulate(_IIter, _IIter, _Tp, __gnu_parallel::parallelism); accumulate(_IIter, _IIter, _Tp, __gnu_parallel::_Parallelism);
template<typename _IIter, typename _Tp, typename _Tag> template<typename _IIter, typename _Tp, typename _Tag>
_Tp _Tp
...@@ -72,7 +72,7 @@ namespace __parallel ...@@ -72,7 +72,7 @@ namespace __parallel
template<typename _IIter, typename _Tp, typename _BinaryOper> template<typename _IIter, typename _Tp, typename _BinaryOper>
_Tp _Tp
accumulate(_IIter, _IIter, _Tp, _BinaryOper, accumulate(_IIter, _IIter, _Tp, _BinaryOper,
__gnu_parallel::parallelism); __gnu_parallel::_Parallelism);
template<typename _IIter, typename _Tp, typename _BinaryOper, template<typename _IIter, typename _Tp, typename _BinaryOper,
typename _Tag> typename _Tag>
...@@ -83,7 +83,7 @@ namespace __parallel ...@@ -83,7 +83,7 @@ namespace __parallel
_Tp _Tp
accumulate_switch(_RAIter, _RAIter, _Tp, _BinaryOper, accumulate_switch(_RAIter, _RAIter, _Tp, _BinaryOper,
random_access_iterator_tag, random_access_iterator_tag,
__gnu_parallel::parallelism); __gnu_parallel::_Parallelism);
template<typename _IIter, typename _OIter> template<typename _IIter, typename _OIter>
_OIter _OIter
...@@ -106,12 +106,12 @@ namespace __parallel ...@@ -106,12 +106,12 @@ namespace __parallel
template<typename _IIter, typename _OIter> template<typename _IIter, typename _OIter>
_OIter _OIter
adjacent_difference(_IIter, _IIter, _OIter, adjacent_difference(_IIter, _IIter, _OIter,
__gnu_parallel::parallelism); __gnu_parallel::_Parallelism);
template<typename _IIter, typename _OIter, typename _BinaryOper> template<typename _IIter, typename _OIter, typename _BinaryOper>
_OIter _OIter
adjacent_difference(_IIter, _IIter, _OIter, _BinaryOper, adjacent_difference(_IIter, _IIter, _OIter, _BinaryOper,
__gnu_parallel::parallelism); __gnu_parallel::_Parallelism);
template<typename _IIter, typename _OIter, typename _BinaryOper, template<typename _IIter, typename _OIter, typename _BinaryOper,
typename _Tag1, typename _Tag2> typename _Tag1, typename _Tag2>
...@@ -124,7 +124,7 @@ namespace __parallel ...@@ -124,7 +124,7 @@ namespace __parallel
adjacent_difference_switch(_IIter, _IIter, _OIter, _BinaryOper, adjacent_difference_switch(_IIter, _IIter, _OIter, _BinaryOper,
random_access_iterator_tag, random_access_iterator_tag,
random_access_iterator_tag, random_access_iterator_tag,
__gnu_parallel::parallelism); __gnu_parallel::_Parallelism);
template<typename _IIter1, typename _IIter2, typename _Tp> template<typename _IIter1, typename _IIter2, typename _Tp>
_Tp _Tp
...@@ -138,7 +138,7 @@ namespace __parallel ...@@ -138,7 +138,7 @@ namespace __parallel
template<typename _IIter1, typename _IIter2, typename _Tp> template<typename _IIter1, typename _IIter2, typename _Tp>
_Tp _Tp
inner_product(_IIter1, _IIter1, _IIter2, _Tp, inner_product(_IIter1, _IIter1, _IIter2, _Tp,
__gnu_parallel::parallelism); __gnu_parallel::_Parallelism);
template<typename _IIter1, typename _IIter2, typename _Tp, template<typename _IIter1, typename _IIter2, typename _Tp,
typename _BinaryFunction1, typename _BinaryFunction2> typename _BinaryFunction1, typename _BinaryFunction2>
...@@ -156,7 +156,7 @@ namespace __parallel ...@@ -156,7 +156,7 @@ namespace __parallel
typename BinaryFunction1, typename BinaryFunction2> typename BinaryFunction1, typename BinaryFunction2>
_Tp _Tp
inner_product(_IIter1, _IIter1, _IIter2, _Tp, BinaryFunction1, inner_product(_IIter1, _IIter1, _IIter2, _Tp, BinaryFunction1,
BinaryFunction2, __gnu_parallel::parallelism); BinaryFunction2, __gnu_parallel::_Parallelism);
template<typename _RAIter1, typename _RAIter2, typename _Tp, template<typename _RAIter1, typename _RAIter2, typename _Tp,
typename BinaryFunction1, typename BinaryFunction2> typename BinaryFunction1, typename BinaryFunction2>
...@@ -164,7 +164,7 @@ namespace __parallel ...@@ -164,7 +164,7 @@ namespace __parallel
inner_product_switch(_RAIter1, _RAIter1, _RAIter2, _Tp, BinaryFunction1, inner_product_switch(_RAIter1, _RAIter1, _RAIter2, _Tp, BinaryFunction1,
BinaryFunction2, random_access_iterator_tag, BinaryFunction2, random_access_iterator_tag,
random_access_iterator_tag, random_access_iterator_tag,
__gnu_parallel::parallelism); __gnu_parallel::_Parallelism);
template<typename _IIter1, typename _IIter2, typename _Tp, template<typename _IIter1, typename _IIter2, typename _Tp,
typename _BinaryFunction1, typename _BinaryFunction2, typename _BinaryFunction1, typename _BinaryFunction2,
......
...@@ -101,7 +101,7 @@ template<typename RandomAccessIterator, ...@@ -101,7 +101,7 @@ template<typename RandomAccessIterator,
thread_index_t iam = omp_get_thread_num(); thread_index_t iam = omp_get_thread_num();
# pragma omp for schedule(dynamic, Settings::workstealing_chunk_size) # pragma omp for schedule(dynamic, _Settings::get().workstealing_chunk_size)
for (difference_type pos = 0; pos < length; ++pos) for (difference_type pos = 0; pos < length; ++pos)
thread_results[iam] = thread_results[iam] =
r(thread_results[iam], f(o, begin+pos)); r(thread_results[iam], f(o, begin+pos));
......
...@@ -101,7 +101,7 @@ template<typename RandomAccessIterator, ...@@ -101,7 +101,7 @@ template<typename RandomAccessIterator,
thread_index_t iam = omp_get_thread_num(); thread_index_t iam = omp_get_thread_num();
# pragma omp for schedule(static, Settings::workstealing_chunk_size) # pragma omp for schedule(static, _Settings::get().workstealing_chunk_size)
for (difference_type pos = 0; pos < length; ++pos) for (difference_type pos = 0; pos < length; ++pos)
thread_results[iam] = r(thread_results[iam], f(o, begin+pos)); thread_results[iam] = r(thread_results[iam], f(o, begin+pos));
} //parallel } //parallel
......
...@@ -118,6 +118,8 @@ template<typename InputIterator, ...@@ -118,6 +118,8 @@ template<typename InputIterator,
difference_type* borders; difference_type* borders;
value_type* sums; value_type* sums;
const _Settings& __s = _Settings::get();
# pragma omp parallel num_threads(num_threads) # pragma omp parallel num_threads(num_threads)
{ {
# pragma omp single # pragma omp single
...@@ -126,14 +128,13 @@ template<typename InputIterator, ...@@ -126,14 +128,13 @@ template<typename InputIterator,
borders = new difference_type[num_threads + 2]; borders = new difference_type[num_threads + 2];
if (Settings::partial_sum_dilatation == 1.0f) if (__s.partial_sum_dilation == 1.0f)
equally_split(n, num_threads + 1, borders); equally_split(n, num_threads + 1, borders);
else else
{ {
difference_type chunk_length = difference_type chunk_length =
((double)n ((double)n
/ ((double)num_threads / ((double)num_threads + __s.partial_sum_dilation)),
+ Settings::partial_sum_dilatation)),
borderstart = n - num_threads * chunk_length; borderstart = n - num_threads * chunk_length;
borders[0] = 0; borders[0] = 0;
for (int i = 1; i < (num_threads + 1); ++i) for (int i = 1; i < (num_threads + 1); ++i)
...@@ -209,9 +210,9 @@ template<typename InputIterator, ...@@ -209,9 +210,9 @@ template<typename InputIterator,
difference_type n = end - begin; difference_type n = end - begin;
switch (Settings::partial_sum_algorithm) switch (_Settings::get().partial_sum_algorithm)
{ {
case Settings::LINEAR: case LINEAR:
// Need an initial offset. // Need an initial offset.
return parallel_partial_sum_linear(begin, end, result, bin_op, n); return parallel_partial_sum_linear(begin, end, result, bin_op, n);
default: default:
......
...@@ -69,6 +69,8 @@ template<typename RandomAccessIterator, typename Predicate> ...@@ -69,6 +69,8 @@ template<typename RandomAccessIterator, typename Predicate>
_GLIBCXX_CALL(n) _GLIBCXX_CALL(n)
const _Settings& __s = _Settings::get();
// Shared. // Shared.
_GLIBCXX_VOLATILE difference_type left = 0, right = n - 1; _GLIBCXX_VOLATILE difference_type left = 0, right = n - 1;
_GLIBCXX_VOLATILE difference_type leftover_left, leftover_right; _GLIBCXX_VOLATILE difference_type leftover_left, leftover_right;
...@@ -91,14 +93,12 @@ template<typename RandomAccessIterator, typename Predicate> ...@@ -91,14 +93,12 @@ template<typename RandomAccessIterator, typename Predicate>
reserved_left = new bool[num_threads]; reserved_left = new bool[num_threads];
reserved_right = new bool[num_threads]; reserved_right = new bool[num_threads];
if (Settings::partition_chunk_share > 0.0) if (__s.partition_chunk_share > 0.0)
chunk_size = std::max<difference_type>(Settings:: chunk_size = std::max<difference_type>(__s.partition_chunk_size,
partition_chunk_size, (double)n * __s.partition_chunk_share
(double)n * Settings::
partition_chunk_share
/ (double)num_threads); / (double)num_threads);
else else
chunk_size = Settings::partition_chunk_size; chunk_size = __s.partition_chunk_size;
} }
while (right - left + 1 >= 2 * num_threads * chunk_size) while (right - left + 1 >= 2 * num_threads * chunk_size)
...@@ -346,7 +346,7 @@ template<typename RandomAccessIterator, typename Comparator> ...@@ -346,7 +346,7 @@ template<typename RandomAccessIterator, typename Comparator>
random_number rng; random_number rng;
difference_type minimum_length = difference_type minimum_length =
std::max<difference_type>(2, Settings::partition_minimal_n); std::max<difference_type>(2, _Settings::get().partition_minimal_n);
// Break if input range to small. // Break if input range to small.
while (static_cast<sequence_index_t>(end - begin) >= minimum_length) while (static_cast<sequence_index_t>(end - begin) >= minimum_length)
...@@ -409,7 +409,7 @@ template<typename RandomAccessIterator, typename Comparator> ...@@ -409,7 +409,7 @@ template<typename RandomAccessIterator, typename Comparator>
break; break;
} }
// Only at most Settings::partition_minimal_n elements left. // Only at most _Settings::partition_minimal_n elements left.
__gnu_sequential::sort(begin, end, comp); __gnu_sequential::sort(begin, end, comp);
} }
......
...@@ -134,7 +134,7 @@ namespace __gnu_parallel ...@@ -134,7 +134,7 @@ namespace __gnu_parallel
difference_type split = difference_type split =
parallel_sort_qs_divide(begin, end, comp, pivot_rank, parallel_sort_qs_divide(begin, end, comp, pivot_rank,
Settings::sort_qs_num_samples_preset, _Settings::get().sort_qs_num_samples_preset,
num_threads); num_threads);
#pragma omp parallel sections #pragma omp parallel sections
...@@ -179,8 +179,6 @@ namespace __gnu_parallel ...@@ -179,8 +179,6 @@ namespace __gnu_parallel
if (num_threads > n) if (num_threads > n)
num_threads = static_cast<thread_index_t>(n); num_threads = static_cast<thread_index_t>(n);
Settings::sort_qs_num_samples_preset = 100;
// Hard to avoid. // Hard to avoid.
omp_set_num_threads(num_threads); omp_set_num_threads(num_threads);
......
...@@ -274,6 +274,8 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator> ...@@ -274,6 +274,8 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator>
_GLIBCXX_CALL(n) _GLIBCXX_CALL(n)
const _Settings& __s = _Settings::get();
if (num_threads > n) if (num_threads > n)
num_threads = static_cast<thread_index_t>(n); num_threads = static_cast<thread_index_t>(n);
...@@ -284,7 +286,7 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator> ...@@ -284,7 +286,7 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator>
// Must fit into L1. // Must fit into L1.
num_bins_cache = std::max<difference_type>( num_bins_cache = std::max<difference_type>(
1, n / (Settings::L1_cache_size_lb / sizeof(value_type))); 1, n / (__s.L1_cache_size_lb / sizeof(value_type)));
num_bins_cache = round_up_to_pow2(num_bins_cache); num_bins_cache = round_up_to_pow2(num_bins_cache);
// No more buckets than TLB entries, power of 2 // No more buckets than TLB entries, power of 2
...@@ -293,7 +295,7 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator> ...@@ -293,7 +295,7 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator>
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
// 2 TLB entries needed per bin. // 2 TLB entries needed per bin.
num_bins = std::min<difference_type>(Settings::TLB_size / 2, num_bins); num_bins = std::min<difference_type>(__s.TLB_size / 2, num_bins);
#endif #endif
num_bins = round_up_to_pow2(num_bins); num_bins = round_up_to_pow2(num_bins);
...@@ -303,7 +305,7 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator> ...@@ -303,7 +305,7 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator>
// Now try the L2 cache // Now try the L2 cache
// Must fit into L2 // Must fit into L2
num_bins_cache = static_cast<bin_index>(std::max<difference_type>( num_bins_cache = static_cast<bin_index>(std::max<difference_type>(
1, n / (Settings::L2_cache_size / sizeof(value_type)))); 1, n / (__s.L2_cache_size / sizeof(value_type))));
num_bins_cache = round_up_to_pow2(num_bins_cache); num_bins_cache = round_up_to_pow2(num_bins_cache);
// No more buckets than TLB entries, power of 2. // No more buckets than TLB entries, power of 2.
...@@ -313,7 +315,7 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator> ...@@ -313,7 +315,7 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator>
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
// 2 TLB entries needed per bin. // 2 TLB entries needed per bin.
num_bins = std::min( num_bins = std::min(
static_cast<difference_type>(Settings::TLB_size / 2), num_bins); static_cast<difference_type>(__s.TLB_size / 2), num_bins);
#endif #endif
num_bins = round_up_to_pow2(num_bins); num_bins = round_up_to_pow2(num_bins);
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
...@@ -403,6 +405,7 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator> ...@@ -403,6 +405,7 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator>
typedef typename traits_type::difference_type difference_type; typedef typename traits_type::difference_type difference_type;
difference_type n = end - begin; difference_type n = end - begin;
const _Settings& __s = _Settings::get();
bin_index num_bins, num_bins_cache; bin_index num_bins, num_bins_cache;
...@@ -410,7 +413,7 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator> ...@@ -410,7 +413,7 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator>
// Try the L1 cache first, must fit into L1. // Try the L1 cache first, must fit into L1.
num_bins_cache = num_bins_cache =
std::max<difference_type> std::max<difference_type>
(1, n / (Settings::L1_cache_size_lb / sizeof(value_type))); (1, n / (__s.L1_cache_size_lb / sizeof(value_type)));
num_bins_cache = round_up_to_pow2(num_bins_cache); num_bins_cache = round_up_to_pow2(num_bins_cache);
// No more buckets than TLB entries, power of 2 // No more buckets than TLB entries, power of 2
...@@ -418,7 +421,7 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator> ...@@ -418,7 +421,7 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator>
num_bins = std::min(n, (difference_type)num_bins_cache); num_bins = std::min(n, (difference_type)num_bins_cache);
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
// 2 TLB entries needed per bin // 2 TLB entries needed per bin
num_bins = std::min((difference_type)Settings::TLB_size / 2, num_bins); num_bins = std::min((difference_type)__s.TLB_size / 2, num_bins);
#endif #endif
num_bins = round_up_to_pow2(num_bins); num_bins = round_up_to_pow2(num_bins);
...@@ -428,7 +431,7 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator> ...@@ -428,7 +431,7 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator>
// Now try the L2 cache, must fit into L2. // Now try the L2 cache, must fit into L2.
num_bins_cache = num_bins_cache =
static_cast<bin_index>(std::max<difference_type>( static_cast<bin_index>(std::max<difference_type>(
1, n / (Settings::L2_cache_size / sizeof(value_type)))); 1, n / (__s.L2_cache_size / sizeof(value_type))));
num_bins_cache = round_up_to_pow2(num_bins_cache); num_bins_cache = round_up_to_pow2(num_bins_cache);
// No more buckets than TLB entries, power of 2 // No more buckets than TLB entries, power of 2
...@@ -439,7 +442,7 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator> ...@@ -439,7 +442,7 @@ template<typename RandomAccessIterator, typename RandomNumberGenerator>
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_TLB
// 2 TLB entries needed per bin // 2 TLB entries needed per bin
num_bins = num_bins =
std::min<difference_type>(Settings::TLB_size / 2, num_bins); std::min<difference_type>(__s.TLB_size / 2, num_bins);
#endif #endif
num_bins = round_up_to_pow2(num_bins); num_bins = round_up_to_pow2(num_bins);
#if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1 #if _GLIBCXX_RANDOM_SHUFFLE_CONSIDER_L1
......
...@@ -84,16 +84,15 @@ namespace __gnu_parallel ...@@ -84,16 +84,15 @@ namespace __gnu_parallel
if (false) ; if (false) ;
#if _GLIBCXX_MERGESORT #if _GLIBCXX_MERGESORT
else if (Settings::sort_algorithm == Settings::MWMS || stable) else if (stable || _Settings::get().sort_algorithm == MWMS)
parallel_sort_mwms(begin, end, comp, n, get_max_threads(), stable); parallel_sort_mwms(begin, end, comp, n, get_max_threads(), stable);
#endif #endif
#if _GLIBCXX_QUICKSORT #if _GLIBCXX_QUICKSORT
else if (Settings::sort_algorithm == Settings::QS && !stable) else if (!stable && _Settings::get().sort_algorithm == QS)
parallel_sort_qs(begin, end, comp, n, get_max_threads()); parallel_sort_qs(begin, end, comp, n, get_max_threads());
#endif #endif
#if _GLIBCXX_BAL_QUICKSORT #if _GLIBCXX_BAL_QUICKSORT
else if (Settings::sort_algorithm == Settings::QS_BALANCED else if (!stable && _Settings::get().sort_algorithm == QS_BALANCED)
&& !stable)
parallel_sort_qsb(begin, end, comp, n, get_max_threads()); parallel_sort_qsb(begin, end, comp, n, get_max_threads());
#endif #endif
else else
......
...@@ -64,18 +64,19 @@ namespace __gnu_parallel ...@@ -64,18 +64,19 @@ namespace __gnu_parallel
struct omp_loop_static_tag : public parallel_tag { }; struct omp_loop_static_tag : public parallel_tag { };
// XXX settings.h Settings::FindDistribution struct find_tag { };
/** @brief Selects the growing block size variant for std::find(). /** @brief Selects the growing block size variant for std::find().
@see _GLIBCXX_FIND_GROWING_BLOCKS */ @see _GLIBCXX_FIND_GROWING_BLOCKS */
struct growing_blocks_tag { }; struct growing_blocks_tag : public find_tag { };
/** @brief Selects the constant block size variant for std::find(). /** @brief Selects the constant block size variant for std::find().
@see _GLIBCXX_FIND_CONSTANT_SIZE_BLOCKS */ @see _GLIBCXX_FIND_CONSTANT_SIZE_BLOCKS */
struct constant_size_blocks_tag { }; struct constant_size_blocks_tag : public find_tag { };
/** @brief Selects the equal splitting variant for std::find(). /** @brief Selects the equal splitting variant for std::find().
@see _GLIBCXX_FIND_EQUAL_SPLIT */ @see _GLIBCXX_FIND_EQUAL_SPLIT */
struct equal_split_tag { }; struct equal_split_tag : public find_tag { };
} }
#endif /* _GLIBCXX_PARALLEL_TAGS_H */ #endif /* _GLIBCXX_PARALLEL_TAGS_H */
...@@ -44,8 +44,8 @@ namespace __gnu_parallel ...@@ -44,8 +44,8 @@ namespace __gnu_parallel
{ {
// Enumerated types. // Enumerated types.
/// @brief Run-time equivalents for the compile-time tags. /// Run-time equivalents for the compile-time tags.
enum parallelism enum _Parallelism
{ {
/// Not parallel. /// Not parallel.
sequential, sequential,
...@@ -66,9 +66,60 @@ namespace __gnu_parallel ...@@ -66,9 +66,60 @@ namespace __gnu_parallel
parallel_taskqueue parallel_taskqueue
}; };
inline bool /// Strategies for run-time algorithm selection:
is_parallel(const parallelism __p) { return __p != sequential; } // force_sequential, force_parallel, heuristic.
enum _AlgorithmStrategy
{
heuristic,
force_sequential,
force_parallel
};
/// Sorting algorithms:
// multi-way mergesort, quicksort, load-balanced quicksort.
enum _SortAlgorithm
{
MWMS,
QS,
QS_BALANCED
};
/// Merging algorithms:
// bubblesort-alike, loser-tree variants, enum sentinel.
enum _MultiwayMergeAlgorithm
{
BUBBLE,
LOSER_TREE_EXPLICIT,
LOSER_TREE,
LOSER_TREE_COMBINED,
LOSER_TREE_SENTINEL,
ENUM_SENTINEL
};
/// Partial sum algorithms: recursive, linear.
enum _PartialSumAlgorithm
{
RECURSIVE,
LINEAR
};
/// Sorting/merging algorithms: sampling, exact.
enum _SplittingAlgorithm
{
SAMPLING,
EXACT
};
/// Find algorithms:
// growing blocks, equal-sized blocks, equal splitting.
enum _FindAlgorithm
{
GROWING_BLOCKS,
CONSTANT_SIZE_BLOCKS,
EQUAL_SPLIT
};
/// Integer Types.
// XXX need to use <cstdint> // XXX need to use <cstdint>
/** @brief 16-bit signed integer. */ /** @brief 16-bit signed integer. */
typedef short int16; typedef short int16;
...@@ -101,20 +152,14 @@ namespace __gnu_parallel ...@@ -101,20 +152,14 @@ namespace __gnu_parallel
typedef uint16 thread_index_t; typedef uint16 thread_index_t;
// XXX atomics interface? // XXX atomics interface?
/** /// Longest compare-and-swappable integer type on this platform.
* @brief Longest compare-and-swappable integer type on this platform.
*/
typedef int64 lcas_t; typedef int64 lcas_t;
// XXX numeric_limits::digits? // XXX numeric_limits::digits?
/** /// Number of bits of ::lcas_t.
* @brief Number of bits of ::lcas_t.
*/
static const int lcas_t_bits = sizeof(lcas_t) * 8; static const int lcas_t_bits = sizeof(lcas_t) * 8;
/** /// ::lcas_t with the right half of bits set to 1.
* @brief ::lcas_t with the right half of bits set to 1.
*/
static const lcas_t lcas_t_mask = ((lcas_t(1) << (lcas_t_bits / 2)) - 1); static const lcas_t lcas_t_mask = ((lcas_t(1) << (lcas_t_bits / 2)) - 1);
} }
......
...@@ -115,16 +115,15 @@ template<typename RandomAccessIterator, ...@@ -115,16 +115,15 @@ template<typename RandomAccessIterator,
typedef std::iterator_traits<RandomAccessIterator> traits_type; typedef std::iterator_traits<RandomAccessIterator> traits_type;
typedef typename traits_type::difference_type difference_type; typedef typename traits_type::difference_type difference_type;
const _Settings& __s = _Settings::get();
difference_type chunk_size = difference_type chunk_size = static_cast<difference_type>(__s.workstealing_chunk_size);
static_cast<difference_type>(Settings::workstealing_chunk_size);
// How many jobs? // How many jobs?
difference_type length = (bound < 0) ? (end - begin) : bound; difference_type length = (bound < 0) ? (end - begin) : bound;
// To avoid false sharing in a cache line. // To avoid false sharing in a cache line.
const int stride = const int stride = __s.cache_line_size * 10 / sizeof(Job<difference_type>) + 1;
Settings::cache_line_size * 10 / sizeof(Job<difference_type>) + 1;
// Total number of threads currently working. // Total number of threads currently working.
thread_index_t busy = 0; thread_index_t busy = 0;
......
...@@ -121,7 +121,7 @@ basic_file.cc: ${glibcxx_srcdir}/$(BASIC_FILE_CC) ...@@ -121,7 +121,7 @@ basic_file.cc: ${glibcxx_srcdir}/$(BASIC_FILE_CC)
$(LN_S) ${glibcxx_srcdir}/$(BASIC_FILE_CC) ./$@ || true $(LN_S) ${glibcxx_srcdir}/$(BASIC_FILE_CC) ./$@ || true
if ENABLE_PARALLEL if ENABLE_PARALLEL
parallel_sources = parallel_list.cc parallel_sources = parallel_list.cc parallel_settings.cc
else else
parallel_sources = parallel_sources =
endif endif
...@@ -221,6 +221,11 @@ parallel_list.lo: parallel_list.cc ...@@ -221,6 +221,11 @@ parallel_list.lo: parallel_list.cc
parallel_list.o: parallel_list.cc parallel_list.o: parallel_list.cc
$(CXXCOMPILE) $(PARALLEL_FLAGS) -c $< $(CXXCOMPILE) $(PARALLEL_FLAGS) -c $<
parallel_settings.lo: parallel_settings.cc
$(LTCXXCOMPILE) $(PARALLEL_FLAGS) -c $<
parallel_settings.o: parallel_settings.cc
$(CXXCOMPILE) $(PARALLEL_FLAGS) -c $<
# Use special rules for the C++0x sources so that the proper flags are passed. # Use special rules for the C++0x sources so that the proper flags are passed.
system_error.lo: system_error.cc system_error.lo: system_error.cc
$(LTCXXCOMPILE) -std=gnu++0x -c $< $(LTCXXCOMPILE) -std=gnu++0x -c $<
......
...@@ -84,12 +84,13 @@ am__libstdc___la_SOURCES_DIST = bitmap_allocator.cc pool_allocator.cc \ ...@@ -84,12 +84,13 @@ am__libstdc___la_SOURCES_DIST = bitmap_allocator.cc pool_allocator.cc \
codecvt_members.cc collate_members.cc ctype_members.cc \ codecvt_members.cc collate_members.cc ctype_members.cc \
messages_members.cc monetary_members.cc numeric_members.cc \ messages_members.cc monetary_members.cc numeric_members.cc \
time_members.cc basic_file.cc c++locale.cc \ time_members.cc basic_file.cc c++locale.cc \
compatibility-ldbl.cc parallel_list.cc compatibility-ldbl.cc parallel_list.cc parallel_settings.cc
am__objects_1 = atomicity.lo codecvt_members.lo collate_members.lo \ am__objects_1 = atomicity.lo codecvt_members.lo collate_members.lo \
ctype_members.lo messages_members.lo monetary_members.lo \ ctype_members.lo messages_members.lo monetary_members.lo \
numeric_members.lo time_members.lo numeric_members.lo time_members.lo
@GLIBCXX_LDBL_COMPAT_TRUE@am__objects_2 = compatibility-ldbl.lo @GLIBCXX_LDBL_COMPAT_TRUE@am__objects_2 = compatibility-ldbl.lo
@ENABLE_PARALLEL_TRUE@am__objects_3 = parallel_list.lo @ENABLE_PARALLEL_TRUE@am__objects_3 = parallel_list.lo \
@ENABLE_PARALLEL_TRUE@ parallel_settings.lo
am__objects_4 = basic_file.lo c++locale.lo $(am__objects_2) \ am__objects_4 = basic_file.lo c++locale.lo $(am__objects_2) \
$(am__objects_3) $(am__objects_3)
am__objects_5 = bitmap_allocator.lo pool_allocator.lo mt_allocator.lo \ am__objects_5 = bitmap_allocator.lo pool_allocator.lo mt_allocator.lo \
...@@ -359,7 +360,7 @@ host_sources_extra = \ ...@@ -359,7 +360,7 @@ host_sources_extra = \
basic_file.cc c++locale.cc ${ldbl_compat_sources} ${parallel_sources} basic_file.cc c++locale.cc ${ldbl_compat_sources} ${parallel_sources}
@ENABLE_PARALLEL_FALSE@parallel_sources = @ENABLE_PARALLEL_FALSE@parallel_sources =
@ENABLE_PARALLEL_TRUE@parallel_sources = parallel_list.cc @ENABLE_PARALLEL_TRUE@parallel_sources = parallel_list.cc parallel_settings.cc
@GLIBCXX_LDBL_COMPAT_FALSE@ldbl_compat_sources = @GLIBCXX_LDBL_COMPAT_FALSE@ldbl_compat_sources =
@GLIBCXX_LDBL_COMPAT_TRUE@ldbl_compat_sources = compatibility-ldbl.cc @GLIBCXX_LDBL_COMPAT_TRUE@ldbl_compat_sources = compatibility-ldbl.cc
...@@ -810,6 +811,11 @@ parallel_list.lo: parallel_list.cc ...@@ -810,6 +811,11 @@ parallel_list.lo: parallel_list.cc
parallel_list.o: parallel_list.cc parallel_list.o: parallel_list.cc
$(CXXCOMPILE) $(PARALLEL_FLAGS) -c $< $(CXXCOMPILE) $(PARALLEL_FLAGS) -c $<
parallel_settings.lo: parallel_settings.cc
$(LTCXXCOMPILE) $(PARALLEL_FLAGS) -c $<
parallel_settings.o: parallel_settings.cc
$(CXXCOMPILE) $(PARALLEL_FLAGS) -c $<
# Use special rules for the C++0x sources so that the proper flags are passed. # Use special rules for the C++0x sources so that the proper flags are passed.
system_error.lo: system_error.cc system_error.lo: system_error.cc
$(LTCXXCOMPILE) -std=gnu++0x -c $< $(LTCXXCOMPILE) -std=gnu++0x -c $<
......
// Default settings for parallel mode -*- C++ -*-
// Copyright (C) 2007 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 2, or (at your option)
// any later version.
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License along
// with this library; see the file COPYING. If not, write to the Free
// Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
// USA.
// As a special exception, you may use this file as part of a free software
// library without restriction. Specifically, if other files instantiate
// templates or use macros or inline functions from this file, or you compile
// this file and link it with other files to produce an executable, this
// file does not by itself cause the resulting executable to be covered by
// the GNU General Public License. This exception does not however
// invalidate any other reasons why the executable file might be covered by
// the GNU General Public License.
#include <parallel/settings.h>
namespace
{
__gnu_parallel::_Settings s;
}
namespace __gnu_parallel
{
const _Settings&
_Settings::get() throw()
{ return s; }
// XXX MT
void
_Settings::set(_Settings& obj) throw()
{ s = obj; }
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment