Commit 18971f1f by Tim Shen Committed by Tim Shen

regex_executor.h: Add _TodoList class.

2013-10-08  Tim Shen  <timshen91@gmail.com>

	* include/bits/regex_executor.h: Add _TodoList class.
	* include/bits/regex_executor.tcc (_BFSExecutor<>::_M_main): Add
	_M_match_stack and _M_stack to make everything faster. Break if
	_M_stack is empty, to reduce unnecessary idling.
	* testsuite/performance/28_regex/split.cc: New.

From-SVN: r203261
parent 59a2a4e2
2013-10-08 Tim Shen <timshen91@gmail.com>
* include/bits/regex_executor.h: Add _TodoList class.
* include/bits/regex_executor.tcc (_BFSExecutor<>::_M_main): Add
_M_match_stack and _M_stack to make everything faster. Break if
_M_stack is empty, to reduce unnecessary idling.
* testsuite/performance/28_regex/split.cc: New.
2013-10-06 Tim Shen <timshen91@gmail.com> 2013-10-06 Tim Shen <timshen91@gmail.com>
* include/bits/regex.h: (regex_token_iterator<>::regex_token_iterator): * include/bits/regex.h: (regex_token_iterator<>::regex_token_iterator):
......
...@@ -102,22 +102,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -102,22 +102,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
} }
bool bool
_M_search() _M_search();
{
if (_M_flags & regex_constants::match_continuous)
return _M_search_from_first();
auto __cur = _M_begin;
do
{
_M_match_mode = false;
_M_init(__cur);
if (_M_main())
return true;
}
// Continue when __cur == _M_end
while (__cur++ != _M_end);
return false;
}
bool bool
_M_is_word(_CharT __ch) const _M_is_word(_CharT __ch) const
...@@ -346,6 +331,46 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -346,6 +331,46 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
}; };
typedef std::unique_ptr<_ResultsEntry> _ResultsPtr; typedef std::unique_ptr<_ResultsEntry> _ResultsPtr;
class _TodoList
{
public:
explicit
_TodoList(size_t __sz)
: _M_states(), _M_exists(__sz, false)
{ }
void _M_push(_StateIdT __u)
{
_GLIBCXX_DEBUG_ASSERT(__u < _M_exists.size());
if (!_M_exists[__u])
{
_M_exists[__u] = true;
_M_states.push_back(__u);
}
}
_StateIdT _M_pop()
{
auto __ret = _M_states.back();
_M_states.pop_back();
_M_exists[__ret] = false;
return __ret;
}
bool _M_empty() const
{ return _M_states.empty(); }
void _M_clear()
{
_M_states.clear();
_M_exists.assign(_M_exists.size(), false);
}
private:
std::vector<_StateIdT> _M_states;
std::vector<bool> _M_exists;
};
public: public:
_BFSExecutor(_BiIter __begin, _BFSExecutor(_BiIter __begin,
_BiIter __end, _BiIter __end,
...@@ -355,6 +380,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -355,6 +380,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
: _BaseT(__begin, __end, __results, __re, __flags), : _BaseT(__begin, __end, __results, __re, __flags),
_M_nfa(*std::static_pointer_cast<_NFA<_CharT, _TraitsT>> _M_nfa(*std::static_pointer_cast<_NFA<_CharT, _TraitsT>>
(__re._M_automaton)), (__re._M_automaton)),
_M_match_stack(_M_nfa.size()),
_M_stack(_M_nfa.size()),
_M_start_state(_M_nfa._M_start()) _M_start_state(_M_nfa._M_start())
{ } { }
...@@ -362,14 +389,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -362,14 +389,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
void void
_M_init(_BiIter __cur) _M_init(_BiIter __cur)
{ {
_GLIBCXX_DEBUG_ASSERT(this->_M_start_state != _S_invalid_state_id);
this->_M_current = __cur; this->_M_current = __cur;
_M_covered.clear(); _M_covered.clear();
_ResultsVec& __res(this->_M_results); _ResultsVec& __res(this->_M_results);
_M_covered[this->_M_start_state] = _M_covered[this->_M_start_state] =
_ResultsPtr(new _ResultsEntry(__res.size(), _ResultsPtr(new _ResultsEntry(__res.size(),
_M_nfa._M_quant_count)); _M_nfa._M_quant_count));
_M_e_closure(); _M_stack._M_push(this->_M_start_state);
} }
void void
...@@ -398,11 +424,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -398,11 +424,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
this->_M_flags)); this->_M_flags));
} }
const _NFAT& _M_nfa;
std::map<_StateIdT, _ResultsPtr> _M_covered; std::map<_StateIdT, _ResultsPtr> _M_covered;
_TodoList _M_match_stack;
_TodoList _M_stack;
_StateIdT _M_start_state;
// To record global optimal solution. // To record global optimal solution.
_ResultsPtr _M_cur_results; _ResultsPtr _M_cur_results;
const _NFAT& _M_nfa;
_StateIdT _M_start_state;
}; };
//@} regex-detail //@} regex-detail
......
...@@ -36,12 +36,29 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -36,12 +36,29 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
template<typename _BiIter, typename _Alloc, template<typename _BiIter, typename _Alloc,
typename _CharT, typename _TraitsT> typename _CharT, typename _TraitsT>
bool _Executor<_BiIter, _Alloc, _CharT, _TraitsT>::
_M_search()
{
if (_M_flags & regex_constants::match_continuous)
return _M_search_from_first();
auto __cur = _M_begin;
do
{
_M_match_mode = false;
_M_init(__cur);
if (_M_main())
return true;
}
// Continue when __cur == _M_end
while (__cur++ != _M_end);
return false;
}
template<typename _BiIter, typename _Alloc,
typename _CharT, typename _TraitsT>
bool _DFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>:: bool _DFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
_M_dfs(_StateIdT __i) _M_dfs(_StateIdT __i)
{ {
if (__i == _S_invalid_state_id)
// This is not that certain. Need deeper investigate.
return false;
auto& __current = this->_M_current; auto& __current = this->_M_current;
const auto& __state = _M_nfa[__i]; const auto& __state = _M_nfa[__i];
bool __ret = false; bool __ret = false;
...@@ -161,6 +178,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -161,6 +178,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
bool _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>:: bool _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
_M_main() _M_main()
{ {
_M_e_closure();
bool __ret = false; bool __ret = false;
if (!this->_M_match_mode if (!this->_M_match_mode
&& !(this->_M_flags & regex_constants::match_not_null)) && !(this->_M_flags & regex_constants::match_not_null))
...@@ -169,6 +187,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -169,6 +187,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{ {
_M_move(); _M_move();
++this->_M_current; ++this->_M_current;
if (_M_stack._M_empty())
break;
_M_e_closure(); _M_e_closure();
if (!this->_M_match_mode) if (!this->_M_match_mode)
// To keep regex_search greedy, no "return true" here. // To keep regex_search greedy, no "return true" here.
...@@ -178,6 +198,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -178,6 +198,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__ret = _M_includes_some(); __ret = _M_includes_some();
if (__ret) if (__ret)
this->_M_set_results(_M_cur_results->_M_get()); this->_M_set_results(_M_cur_results->_M_get());
_M_match_stack._M_clear();
_GLIBCXX_DEBUG_ASSERT(_M_stack._M_empty());
return __ret; return __ret;
} }
...@@ -186,42 +208,34 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -186,42 +208,34 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
void _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>:: void _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
_M_e_closure() _M_e_closure()
{ {
std::queue<_StateIdT> __q;
std::vector<bool> __in_q(_M_nfa.size(), false);
auto& __current = this->_M_current; auto& __current = this->_M_current;
for (auto& __it : _M_covered) while (!_M_stack._M_empty())
{
__in_q[__it.first] = true;
__q.push(__it.first);
}
while (!__q.empty())
{ {
auto __u = __q.front(); auto __u = _M_stack._M_pop();
__q.pop(); _GLIBCXX_DEBUG_ASSERT(_M_covered.count(__u));
__in_q[__u] = false;
const auto& __state = _M_nfa[__u]; const auto& __state = _M_nfa[__u];
// Can be implemented using method, but there will be too many // Can be implemented using method, but there will be too many
// arguments. I would use macro function before C++11, but lambda is // arguments. I would use macro function before C++11, but lambda is
// a better choice, since hopefully compiler can inline it. // a better choice, since hopefully compiler can inline it.
auto __add_visited_state = [&](_StateIdT __v) auto __add_visited_state = [=](_StateIdT __v)
{ {
if (__v == _S_invalid_state_id) if (_M_covered.count(__v) == 0)
return;
if (_M_covered.count(__u) != 0
&& (_M_covered.count(__v) == 0
|| *_M_covered[__u] < *_M_covered[__v]))
{ {
_M_covered[__v] = _M_covered[__v] =
_ResultsPtr(new _ResultsEntry(*_M_covered[__u])); _ResultsPtr(new _ResultsEntry(*_M_covered[__u]));
_M_stack._M_push(__v);
return;
}
auto& __cu = _M_covered[__u];
auto& __cv = _M_covered[__v];
if (*__cu < *__cv)
{
__cv = _ResultsPtr(new _ResultsEntry(*__cu));
// if a state is updated, it's outgoing neighbors should be // if a state is updated, it's outgoing neighbors should be
// reconsidered too. Push them to the queue. // reconsidered too. Push them to the queue.
if (!__in_q[__v]) _M_stack._M_push(__v);
{
__in_q[__v] = true;
__q.push(__v);
}
} }
}; };
...@@ -233,13 +247,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -233,13 +247,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
case _S_opcode_alternative: case _S_opcode_alternative:
{ {
__add_visited_state(__state._M_next); __add_visited_state(__state._M_next);
auto __back = auto& __cu = *_M_covered[__u];
_M_covered[__u]->_M_quant_keys[__state._M_quant_index]; auto __back = __cu._M_quant_keys[__state._M_quant_index];
_M_covered[__u]->_M_inc(__state._M_quant_index, __cu._M_inc(__state._M_quant_index, __state._M_neg);
__state._M_neg);
__add_visited_state(__state._M_alt); __add_visited_state(__state._M_alt);
_M_covered[__u]->_M_quant_keys[__state._M_quant_index] __cu._M_quant_keys[__state._M_quant_index] = __back;
= __back;
} }
break; break;
case _S_opcode_subexpr_begin: case _S_opcode_subexpr_begin:
...@@ -281,6 +293,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -281,6 +293,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__add_visited_state(__state._M_next); __add_visited_state(__state._M_next);
break; break;
case _S_opcode_match: case _S_opcode_match:
_M_match_stack._M_push(__u);
break; break;
case _S_opcode_accept: case _S_opcode_accept:
break; break;
...@@ -296,15 +309,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -296,15 +309,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_move() _M_move()
{ {
decltype(_M_covered) __next; decltype(_M_covered) __next;
for (auto& __it : _M_covered) while (!_M_match_stack._M_empty())
{ {
const auto& __state = _M_nfa[__it.first]; auto __u = _M_match_stack._M_pop();
if (__state._M_opcode == _S_opcode_match const auto& __state = _M_nfa[__u];
&& __state._M_matches(*this->_M_current)) auto& __cu = _M_covered[__u];
if (__state._M_next != _S_invalid_state_id) if (__state._M_matches(*this->_M_current)
if (__next.count(__state._M_next) == 0 && (__next.count(__state._M_next) == 0
|| *__it.second < *__next[__state._M_next]) || *__cu < *__next[__state._M_next]))
__next[__state._M_next] = move(__it.second); {
__next[__state._M_next] = std::move(__cu);
_M_stack._M_push(__state._M_next);
}
} }
_M_covered = move(__next); _M_covered = move(__next);
} }
...@@ -314,31 +330,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -314,31 +330,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
bool _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>:: bool _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
_M_includes_some() _M_includes_some()
{ {
auto& __s = _M_nfa._M_final_states();
auto& __t = _M_covered;
bool __succ = false; bool __succ = false;
if (__s.size() > 0 && __t.size() > 0) for (auto __u : _M_nfa._M_final_states())
{ if (_M_covered.count(__u))
auto __first = __s.begin(); {
auto __second = __t.begin(); __succ = true;
while (__first != __s.end() && __second != __t.end()) auto& __cu = _M_covered[__u];
{ if (_M_cur_results == nullptr || *__cu < *_M_cur_results)
if (*__first < __second->first) _M_cur_results = _ResultsPtr(new _ResultsEntry(*__cu));
++__first; }
else if (*__first > __second->first)
++__second;
else
{
if (_M_cur_results == nullptr
|| *__second->second < *_M_cur_results)
_M_cur_results =
_ResultsPtr(new _ResultsEntry(*__second->second));
__succ = true;
++__first;
++__second;
}
}
}
return __succ; return __succ;
} }
......
// Copyright (C) 2013 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License along
// with this library; see the file COPYING3. If not see
// <http://www.gnu.org/licenses/>.
// 2013-10-08 Tim Shen <timshen91@gmail.com>
#include <testsuite_performance.h>
#include <regex>
using namespace __gnu_test;
using namespace std;
void split(string s)
{
regex re("\\s+");
for (auto it = sregex_token_iterator(s.begin(), s.end(), re, -1);
it != sregex_token_iterator();
++it)
{
}
}
int main()
{
string source = "\
// Copyright (C) 2013 Free Software Foundation, Inc.\n\
//\n\
// This file is part of the GNU ISO C++ Library. This library is free\n\
// software; you can redistribute it and/or modify it under the\n\
// terms of the GNU General Public License as published by the\n\
// Free Software Foundation; either version 3, or (at your option)\n\
// any later version.\n\
\n\
// This library is distributed in the hope that it will be useful,\n\
// but WITHOUT ANY WARRANTY; without even the implied warranty of\n\
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n\
// GNU General Public License for more details.\n\
\n\
// You should have received a copy of the GNU General Public License along\n\
// with this library; see the file COPYING3. If not see\n\
// <http://www.gnu.org/licenses/>.\n\
\n\
// 2013-10-08 Tim Shen <timshen91@gmail.com>\n\
\n\
#include <testsuite_performance.h>\n\
#include <regex>\n\
\n\
using namespace __gnu_test;\n\
using namespace std;\n\
\n\
void split(string s)\n\
{\n\
regex re(\"\\s+\");\n\
for (auto it = sregex_token_iterator(s.begin(), s.end(), re, -1);\n\
it != sregex_token_iterator();\n\
++it)\n\
{\n\
}\n\
}\n\
\n\
int main()\n\
{\n\
string source = \"\";\n\
time_counter time;\n\
resource_counter resource;\n\
\n\
source = source + source;\n\
source = source + source;\n\
source = source + source;\n\
source = source + source;\n\
source = source + source;\n\
source = source + source;\n\
source = source + source;\n\
source = source + source;\n\
\n\
start_counters(time, resource);\n\
split(source);\n\
stop_counters(time, resource);\n\
report_performance(__FILE__, \"\", time, resource);\n\
\n\
return 0;\n\
}\n";
time_counter time;
resource_counter resource;
source = source + source;
source = source + source;
source = source + source;
source = source + source;
source = source + source;
source = source + source;
source = source + source;
source = source + source;
start_counters(time, resource);
split(source);
stop_counters(time, resource);
report_performance(__FILE__, "", time, resource);
return 0;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment