Commit 603b781b by Tim Shen Committed by Tim Shen

regex.h: Remove virtual class _Automaton.

2013-10-20  Tim Shen  <timshen91@gmail.com>

	* include/bits/regex.h: Remove virtual class _Automaton.
	* include/bits/regex_automaton.h: Likewise.
	* include/bits/regex.tcc: Adjust comment for policy changing.
	* include/bits/regex_executor.h: Update comments of complexity.
	* include/bits/regex_executor.tcc: Adjust executor choosing
	policy. Now DFS executor is the default one.
	* testsuite/util/testsuite_regex.h (regex_match_debug,
	regex_search_debug): Adjust for policy changing.

From-SVN: r203875
parent f5b9d5e8
2013-10-20 Tim Shen <timshen91@gmail.com>
* include/bits/regex.h: Remove virtual class _Automaton.
* include/bits/regex_automaton.h: Likewise.
* include/bits/regex.tcc: Adjust comment for policy changing.
* include/bits/regex_executor.h: Update comments of complexity.
* include/bits/regex_executor.tcc: Adjust executor choosing
policy. Now DFS executor is the default one.
* testsuite/util/testsuite_regex.h (regex_match_debug,
regex_search_debug): Adjust for policy changing.
2013-10-20 Chris Jefferson <chris@bubblescope.net> 2013-10-20 Chris Jefferson <chris@bubblescope.net>
Paolo Carlini <paolo.carlini@oracle.com> Paolo Carlini <paolo.carlini@oracle.com>
......
...@@ -727,7 +727,7 @@ _GLIBCXX_END_NAMESPACE_VERSION ...@@ -727,7 +727,7 @@ _GLIBCXX_END_NAMESPACE_VERSION
#endif #endif
protected: protected:
typedef std::shared_ptr<__detail::_Automaton<_Ch_type, _Rx_traits>> typedef std::shared_ptr<__detail::_NFA<_Ch_type, _Rx_traits>>
_AutomatonPtr; _AutomatonPtr;
template<typename _BiIter, typename _Alloc, template<typename _BiIter, typename _Alloc,
......
...@@ -38,8 +38,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -38,8 +38,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
// Result of merging regex_match and regex_search. // Result of merging regex_match and regex_search.
// //
// __policy now can be _S_auto(auto dispatch by checking back-references) // __policy now can be _S_auto (auto dispatch) and _S_alternate (use
// and _S_force_dfs(just use _DFSExecutor). // the other one if possible, for test purpose).
// //
// That __match_mode is true means regex_match, else regex_search. // That __match_mode is true means regex_match, else regex_search.
template<typename _BiIter, typename _Alloc, template<typename _BiIter, typename _Alloc,
......
...@@ -104,31 +104,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -104,31 +104,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
#endif #endif
}; };
/// Base class for, um, automata. Could be an NFA or a DFA. Your choice.
template<typename _CharT, typename _TraitsT>
class _Automaton
{
public:
typedef size_t _SizeT;
public:
virtual
~_Automaton()
{ }
virtual _SizeT
_M_sub_count() const = 0;
#ifdef _GLIBCXX_DEBUG
virtual std::ostream&
_M_dot(std::ostream& __ostr) const = 0;
#endif
};
template<typename _CharT, typename _TraitsT> template<typename _CharT, typename _TraitsT>
class _NFA class _NFA
: public _Automaton<_CharT, _TraitsT>, : public std::vector<_State<_CharT, _TraitsT>>
public std::vector<_State<_CharT, _TraitsT>>
{ {
public: public:
typedef _State<_CharT, _TraitsT> _StateT; typedef _State<_CharT, _TraitsT> _StateT;
......
...@@ -179,8 +179,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -179,8 +179,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
// TODO: This approach is exponentially slow for certain input. // TODO: This approach is exponentially slow for certain input.
// Try to compile the NFA to a DFA. // Try to compile the NFA to a DFA.
// //
// Time complexity: exponential // Time complexity: o(match_length), O(2^(_M_nfa->size()))
// Space complexity: O(__end - __begin) // Space complexity: \theta(match_results.size() + match_length)
template<typename _BiIter, typename _Alloc, template<typename _BiIter, typename _Alloc,
typename _CharT, typename _TraitsT> typename _CharT, typename _TraitsT>
class _DFSExecutor class _DFSExecutor
...@@ -200,16 +200,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -200,16 +200,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
const _RegexT& __re, const _RegexT& __re,
_FlagT __flags) _FlagT __flags)
: _BaseT(__begin, __end, __results, __re, __flags), : _BaseT(__begin, __end, __results, __re, __flags),
_M_nfa(*std::static_pointer_cast<_NFA<_CharT, _TraitsT>> _M_nfa(__re._M_automaton), _M_start_state(_M_nfa->_M_start())
(__re._M_automaton)),
_M_start_state(_M_nfa._M_start())
{ } { }
private: private:
void void
_M_init(_BiIter __cur) _M_init(_BiIter __cur)
{ {
_M_cur_results.resize(_M_nfa._M_sub_count() + 2); _M_cur_results.resize(_M_nfa->_M_sub_count() + 2);
this->_M_current = __cur; this->_M_current = __cur;
} }
...@@ -235,9 +233,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -235,9 +233,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
} }
// To record current solution. // To record current solution.
_ResultsVec _M_cur_results; std::shared_ptr<_NFAT> _M_nfa;
const _NFAT& _M_nfa; _ResultsVec _M_cur_results;
_StateIdT _M_start_state; _StateIdT _M_start_state;
}; };
// Like the DFS approach, it try every possible state transition; Unlike DFS, // Like the DFS approach, it try every possible state transition; Unlike DFS,
...@@ -251,8 +249,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -251,8 +249,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
// matching head. When states transit, solutions will be compared and // matching head. When states transit, solutions will be compared and
// deduplicated(based on which greedy mode we have). // deduplicated(based on which greedy mode we have).
// //
// Time complexity: O((__end - __begin) * _M_nfa.size()) // Time complexity: o(match_length * (quantifier_number
// Space complexity: O(_M_nfa.size() * _M_nfa.mark_count()) // + match_results.size())
// O(match_length * _M_nfa->size()
// * (quantifier_number + match_results.size())
// Space complexity: o(quantifier_number + match_results.size())
// O(_M_nfa->size()
// * (quantifier_number + match_results.size())
template<typename _BiIter, typename _Alloc, template<typename _BiIter, typename _Alloc,
typename _CharT, typename _TraitsT> typename _CharT, typename _TraitsT>
class _BFSExecutor class _BFSExecutor
...@@ -382,11 +385,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -382,11 +385,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
const _RegexT& __re, const _RegexT& __re,
_FlagT __flags) _FlagT __flags)
: _BaseT(__begin, __end, __results, __re, __flags), : _BaseT(__begin, __end, __results, __re, __flags),
_M_nfa(*std::static_pointer_cast<_NFA<_CharT, _TraitsT>> _M_nfa(__re._M_automaton), _M_match_stack(_M_nfa->size()),
(__re._M_automaton)), _M_stack(_M_nfa->size()), _M_start_state(_M_nfa->_M_start())
_M_match_stack(_M_nfa.size()),
_M_stack(_M_nfa.size()),
_M_start_state(_M_nfa._M_start())
{ } { }
private: private:
...@@ -398,7 +398,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -398,7 +398,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_ResultsVec& __res(this->_M_results); _ResultsVec& __res(this->_M_results);
_M_covered[this->_M_start_state] = _M_covered[this->_M_start_state] =
_ResultsPtr(new _ResultsEntry(__res.size(), _ResultsPtr(new _ResultsEntry(__res.size(),
_M_nfa._M_quant_count)); _M_nfa->_M_quant_count));
_M_stack._M_push(this->_M_start_state); _M_stack._M_push(this->_M_start_state);
} }
...@@ -428,7 +428,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -428,7 +428,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
this->_M_flags)); this->_M_flags));
} }
const _NFAT& _M_nfa; std::shared_ptr<_NFAT> _M_nfa;
std::map<_StateIdT, _ResultsPtr> _M_covered; std::map<_StateIdT, _ResultsPtr> _M_covered;
_TodoList _M_match_stack; _TodoList _M_match_stack;
_TodoList _M_stack; _TodoList _M_stack;
......
...@@ -28,6 +28,13 @@ ...@@ -28,6 +28,13 @@
* Do not attempt to use it directly. @headername{regex} * Do not attempt to use it directly. @headername{regex}
*/ */
// See below __get_executor to get what this is talking about. The default
// value 1 indicated a conservative optimization without giving up worst case
// performance.
#ifndef _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT
#define _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT 1
#endif
namespace std _GLIBCXX_VISIBILITY(default) namespace std _GLIBCXX_VISIBILITY(default)
{ {
namespace __detail namespace __detail
...@@ -60,7 +67,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -60,7 +67,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_dfs(_StateIdT __i) _M_dfs(_StateIdT __i)
{ {
auto& __current = this->_M_current; auto& __current = this->_M_current;
const auto& __state = _M_nfa[__i]; const auto& __state = (*_M_nfa)[__i];
bool __ret = false; bool __ret = false;
switch (__state._M_opcode) switch (__state._M_opcode)
{ {
...@@ -216,7 +223,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -216,7 +223,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{ {
auto __u = _M_stack._M_pop(); auto __u = _M_stack._M_pop();
_GLIBCXX_DEBUG_ASSERT(_M_covered.count(__u)); _GLIBCXX_DEBUG_ASSERT(_M_covered.count(__u));
const auto& __state = _M_nfa[__u]; const auto& __state = (*_M_nfa)[__u];
// Can be implemented using method, but there will be too many // Can be implemented using method, but there will be too many
// arguments. I would use macro function before C++11, but lambda is // arguments. I would use macro function before C++11, but lambda is
...@@ -314,7 +321,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -314,7 +321,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
while (!_M_match_stack._M_empty()) while (!_M_match_stack._M_empty())
{ {
auto __u = _M_match_stack._M_pop(); auto __u = _M_match_stack._M_pop();
const auto& __state = _M_nfa[__u]; const auto& __state = (*_M_nfa)[__u];
auto& __cu = _M_covered[__u]; auto& __cu = _M_covered[__u];
if (__state._M_matches(*this->_M_current) if (__state._M_matches(*this->_M_current)
&& (__next.count(__state._M_next) == 0 && (__next.count(__state._M_next) == 0
...@@ -333,7 +340,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -333,7 +340,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_includes_some() _M_includes_some()
{ {
bool __succ = false; bool __succ = false;
for (auto __u : _M_nfa._M_final_states()) for (auto __u : _M_nfa->_M_final_states())
if (_M_covered.count(__u)) if (_M_covered.count(__u))
{ {
__succ = true; __succ = true;
...@@ -380,8 +387,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -380,8 +387,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
} }
enum class _RegexExecutorPolicy : int enum class _RegexExecutorPolicy : int
{ _S_auto, _S_force_dfs }; { _S_auto, _S_alternate };
// This function decide which executor to use under given circumstances.
// The _S_auto policy now is the following: if a NFA has no back-references
// and has more than _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT quantifiers
// (*, +, ?), the _BFSExecutor will be used, other wise _DFSExecutor. This is
// because _DFSExecutor has a exponential upper bound, but better best-case
// performace. Meanwhile, _BFSExecutor can effectively prevent from
// exponential-long time matching (which must contains many quantifiers), but
// it's slower in average.
//
// For simple regex, _BFSExecutor could be 2 or more times slower than
// _DFSExecutor.
//
// Of course, _BFSExecutor cannot handle back-references.
template<typename _BiIter, typename _Alloc, template<typename _BiIter, typename _Alloc,
typename _CharT, typename _TraitsT, typename _CharT, typename _TraitsT,
_RegexExecutorPolicy __policy> _RegexExecutorPolicy __policy>
...@@ -396,12 +416,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -396,12 +416,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_ExecutorPtr; _ExecutorPtr;
typedef _DFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT> _DFSExecutorT; typedef _DFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT> _DFSExecutorT;
typedef _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT> _BFSExecutorT; typedef _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT> _BFSExecutorT;
auto __p = std::static_pointer_cast<_NFA<_CharT, _TraitsT>> if (!__re._M_automaton->_M_has_backref
(__re._M_automaton); && (__policy == _RegexExecutorPolicy::_S_alternate
if (__policy == _RegexExecutorPolicy::_S_force_dfs || __re._M_automaton->_M_quant_count
|| (__policy == _RegexExecutorPolicy::_S_auto && __p->_M_has_backref)) > _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT))
return _ExecutorPtr(new _DFSExecutorT(__b, __e, __m, __re, __flags)); return _ExecutorPtr(new _BFSExecutorT(__b, __e, __m, __re, __flags));
return _ExecutorPtr(new _BFSExecutorT(__b, __e, __m, __re, __flags)); return _ExecutorPtr(new _DFSExecutorT(__b, __e, __m, __re, __flags));
} }
_GLIBCXX_END_NAMESPACE_VERSION _GLIBCXX_END_NAMESPACE_VERSION
......
...@@ -148,7 +148,7 @@ namespace __gnu_test ...@@ -148,7 +148,7 @@ namespace __gnu_test
(__s, __e, __m, __re, __flags); (__s, __e, __m, __re, __flags);
match_results<_Bi_iter, _Alloc> __mm; match_results<_Bi_iter, _Alloc> __mm;
auto __res2 = __regex_algo_impl<_Bi_iter, _Alloc, _Ch_type, _Rx_traits, auto __res2 = __regex_algo_impl<_Bi_iter, _Alloc, _Ch_type, _Rx_traits,
_RegexExecutorPolicy::_S_force_dfs, true> _RegexExecutorPolicy::_S_alternate, true>
(__s, __e, __mm, __re, __flags); (__s, __e, __mm, __re, __flags);
if (__res1 == __res2 && __m == __mm) if (__res1 == __res2 && __m == __mm)
return __res1; return __res1;
...@@ -234,7 +234,7 @@ namespace __gnu_test ...@@ -234,7 +234,7 @@ namespace __gnu_test
(__s, __e, __m, __re, __flags); (__s, __e, __m, __re, __flags);
match_results<_Bi_iter, _Alloc> __mm; match_results<_Bi_iter, _Alloc> __mm;
auto __res2 = __regex_algo_impl<_Bi_iter, _Alloc, _Ch_type, _Rx_traits, auto __res2 = __regex_algo_impl<_Bi_iter, _Alloc, _Ch_type, _Rx_traits,
_RegexExecutorPolicy::_S_force_dfs, false> _RegexExecutorPolicy::_S_alternate, false>
(__s, __e, __mm, __re, __flags); (__s, __e, __mm, __re, __flags);
if (__res1 == __res2 && __m == __mm) if (__res1 == __res2 && __m == __mm)
return __res1; return __res1;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment