Commit 097f0bcf by Jonathan Wakely Committed by Jonathan Wakely

regex_compiler.h (__detail::_BracketMatcher): Reorder members to avoid wasted…

regex_compiler.h (__detail::_BracketMatcher): Reorder members to avoid wasted space when not using a cache.

	* include/bits/regex_compiler.h (__detail::_BracketMatcher): Reorder
	members to avoid wasted space when not using a cache.
	(__detail::_BracketMatcher::_M_ready()): Sort and deduplicate set.
	* include/bits/regex_compiler.tcc
	(__detail::_BracketMatcher::_M_apply(_CharT, false_type)): Use binary
	search on set.
	* include/bits/regex_executor.h (__detail::_Executor::_Match_mode):
	New enumeration type to indicate match mode.
	(__detail::_Executor::_State_info): New type holding members only
	needed in BFS-mode. Replace unique_ptr<vector<bool>> with
	unique_ptr<bool[]>.
	(__detail::_Executor::_M_rep_once_more, __detail::_Executor::_M_dfs):
	Replace template parameter with run-time function parameter.
	(__detail::_Executor::_M_main): Likewise. Dispatch to ...
	(__detail::_Executor::_M_main_dispatch): New overloaded functions to
	implement DFS and BFS mode.
	* include/bits/regex_executor.tcc (__detail::_Executor::_M_main):
	Split implementation into ...
	(__detail::_Executor::_M_main_dispatch): New overloaded functions.
	(__detail::_Executor::_M_lookahead): Create nested executor on stack.
	(__detail::_Executor::_M_rep_once_more): Pass match mode as function
	argument instead of template argument.
	(__detail::_Executor::_M_dfs): Likewise.
	* include/bits/regex_scanner.tcc: Fix typos in comments.
	* testsuite/performance/28_regex/range.cc: New.

From-SVN: r211143
parent 0d732cca
...@@ -9,6 +9,32 @@ ...@@ -9,6 +9,32 @@
* testsuite/30_threads/async/forced_unwind.cc: New. * testsuite/30_threads/async/forced_unwind.cc: New.
* testsuite/30_threads/packaged_task/forced_unwind.cc: New. * testsuite/30_threads/packaged_task/forced_unwind.cc: New.
* include/bits/regex_compiler.h (__detail::_BracketMatcher): Reorder
members to avoid wasted space when not using a cache.
(__detail::_BracketMatcher::_M_ready()): Sort and deduplicate set.
* include/bits/regex_compiler.tcc
(__detail::_BracketMatcher::_M_apply(_CharT, false_type)): Use binary
search on set.
* include/bits/regex_executor.h (__detail::_Executor::_Match_mode):
New enumeration type to indicate match mode.
(__detail::_Executor::_State_info): New type holding members only
needed in BFS-mode. Replace unique_ptr<vector<bool>> with
unique_ptr<bool[]>.
(__detail::_Executor::_M_rep_once_more, __detail::_Executor::_M_dfs):
Replace template parameter with run-time function parameter.
(__detail::_Executor::_M_main): Likewise. Dispatch to ...
(__detail::_Executor::_M_main_dispatch): New overloaded functions to
implement DFS and BFS mode.
* include/bits/regex_executor.tcc (__detail::_Executor::_M_main):
Split implementation into ...
(__detail::_Executor::_M_main_dispatch): New overloaded functions.
(__detail::_Executor::_M_lookahead): Create nested executor on stack.
(__detail::_Executor::_M_rep_once_more): Pass match mode as function
argument instead of template argument.
(__detail::_Executor::_M_dfs): Likewise.
* include/bits/regex_scanner.tcc: Fix typos in comments.
* testsuite/performance/28_regex/range.cc: New.
2014-06-02 Rüdiger Sonderfeld <ruediger@c-plusplus.de> 2014-06-02 Rüdiger Sonderfeld <ruediger@c-plusplus.de>
Jonathan Wakely <jwakely@redhat.com> Jonathan Wakely <jwakely@redhat.com>
......
...@@ -32,7 +32,7 @@ ...@@ -32,7 +32,7 @@
// If _GLIBCXX_REGEX_USE_THOMPSON_NFA is defined, the thompson NFA // If _GLIBCXX_REGEX_USE_THOMPSON_NFA is defined, the thompson NFA
// algorithm will be used. This algorithm is not enabled by default, // algorithm will be used. This algorithm is not enabled by default,
// and cannot be used if the regex contains back-references, but has better // and cannot be used if the regex contains back-references, but has better
// (polynomial instead of exponential) worst case performace. // (polynomial instead of exponential) worst case performance.
// See __regex_algo_impl below. // See __regex_algo_impl below.
namespace std _GLIBCXX_VISIBILITY(default) namespace std _GLIBCXX_VISIBILITY(default)
......
...@@ -43,7 +43,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -43,7 +43,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
struct _BracketMatcher; struct _BracketMatcher;
/** /**
* @brief Builds an NFA from an input iterator interval. * @brief Builds an NFA from an input iterator range.
* *
* The %_TraitsT type should fulfill requirements [28.3]. * The %_TraitsT type should fulfill requirements [28.3].
*/ */
...@@ -329,7 +329,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -329,7 +329,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
operator()(_CharT __ch) const operator()(_CharT __ch) const
{ {
_GLIBCXX_DEBUG_ASSERT(_M_is_ready); _GLIBCXX_DEBUG_ASSERT(_M_is_ready);
return _M_apply(__ch, _IsChar()); return _M_apply(__ch, _UseCache());
} }
void void
...@@ -400,21 +400,28 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -400,21 +400,28 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
void void
_M_ready() _M_ready()
{ {
_M_make_cache(_IsChar()); std::sort(_M_char_set.begin(), _M_char_set.end());
auto __end = std::unique(_M_char_set.begin(), _M_char_set.end());
_M_char_set.erase(__end, _M_char_set.end());
_M_make_cache(_UseCache());
#ifdef _GLIBCXX_DEBUG #ifdef _GLIBCXX_DEBUG
_M_is_ready = true; _M_is_ready = true;
#endif #endif
} }
private: private:
typedef typename is_same<_CharT, char>::type _IsChar; // Currently we only use the cache for char
typedef typename std::is_same<_CharT, char>::type _UseCache;
static constexpr size_t
_S_cache_size() { return 1ul << (sizeof(_CharT) * __CHAR_BIT__); }
struct _Dummy { }; struct _Dummy { };
typedef typename conditional<_IsChar::value, typedef typename std::conditional<_UseCache::value,
std::bitset<1 << (8 * sizeof(_CharT))>, std::bitset<_S_cache_size()>,
_Dummy>::type _CacheT; _Dummy>::type _CacheT;
typedef typename make_unsigned<_CharT>::type _UnsignedCharT; typedef typename std::make_unsigned<_CharT>::type _UnsignedCharT;
private:
bool bool
_M_apply(_CharT __ch, false_type) const; _M_apply(_CharT __ch, false_type) const;
...@@ -425,9 +432,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -425,9 +432,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
void void
_M_make_cache(true_type) _M_make_cache(true_type)
{ {
for (size_t __i = 0; __i < _M_cache.size(); __i++) for (unsigned __i = 0; __i < _M_cache.size(); __i++)
_M_cache[static_cast<_UnsignedCharT>(__i)] = _M_cache[__i] = _M_apply(static_cast<_CharT>(__i), false_type());
_M_apply(__i, false_type());
} }
void void
...@@ -435,7 +441,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -435,7 +441,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{ } { }
private: private:
_CacheT _M_cache;
std::vector<_CharT> _M_char_set; std::vector<_CharT> _M_char_set;
std::vector<_StringT> _M_equiv_set; std::vector<_StringT> _M_equiv_set;
std::vector<pair<_StrTransT, _StrTransT>> _M_range_set; std::vector<pair<_StrTransT, _StrTransT>> _M_range_set;
...@@ -444,6 +449,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -444,6 +449,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_TransT _M_translator; _TransT _M_translator;
const _TraitsT& _M_traits; const _TraitsT& _M_traits;
bool _M_is_non_matching; bool _M_is_non_matching;
_CacheT _M_cache;
#ifdef _GLIBCXX_DEBUG #ifdef _GLIBCXX_DEBUG
bool _M_is_ready; bool _M_is_ready;
#endif #endif
......
...@@ -37,9 +37,9 @@ ...@@ -37,9 +37,9 @@
// When compiling, states are *chained* instead of tree- or graph-constructed. // When compiling, states are *chained* instead of tree- or graph-constructed.
// It's more like structured programs: there's if statement and loop statement. // It's more like structured programs: there's if statement and loop statement.
// //
// For alternative structure(say "a|b"), aka "if statement", two branchs should // For alternative structure (say "a|b"), aka "if statement", two branches
// be constructed. However, these two shall merge to an "end_tag" at the end of // should be constructed. However, these two shall merge to an "end_tag" at
// this operator: // the end of this operator:
// //
// branch1 // branch1
// / \ // / \
...@@ -151,7 +151,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -151,7 +151,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
else if (_M_match_token(_ScannerT::_S_token_line_end)) else if (_M_match_token(_ScannerT::_S_token_line_end))
_M_stack.push(_StateSeqT(_M_nfa, _M_nfa._M_insert_line_end())); _M_stack.push(_StateSeqT(_M_nfa, _M_nfa._M_insert_line_end()));
else if (_M_match_token(_ScannerT::_S_token_word_bound)) else if (_M_match_token(_ScannerT::_S_token_word_bound))
// _M_value[0] == 'n' means it's negtive, say "not word boundary". // _M_value[0] == 'n' means it's negative, say "not word boundary".
_M_stack.push(_StateSeqT(_M_nfa, _M_nfa. _M_stack.push(_StateSeqT(_M_nfa, _M_nfa.
_M_insert_word_bound(_M_value[0] == 'n'))); _M_insert_word_bound(_M_value[0] == 'n')));
else if (_M_match_token(_ScannerT::_S_token_subexpr_lookahead_begin)) else if (_M_match_token(_ScannerT::_S_token_subexpr_lookahead_begin))
...@@ -256,7 +256,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -256,7 +256,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
auto __end = _M_nfa._M_insert_dummy(); auto __end = _M_nfa._M_insert_dummy();
// _M_alt is the "match more" branch, and _M_next is the // _M_alt is the "match more" branch, and _M_next is the
// "match less" one. Switch _M_alt and _M_next of all created // "match less" one. Switch _M_alt and _M_next of all created
// nodes. This is a hacking but IMO works well. // nodes. This is a hack but IMO works well.
std::stack<_StateIdT> __stack; std::stack<_StateIdT> __stack;
for (long __i = 0; __i < __n; ++__i) for (long __i = 0; __i < __n; ++__i)
{ {
...@@ -511,12 +511,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -511,12 +511,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_BracketMatcher<_TraitsT, __icase, __collate>:: _BracketMatcher<_TraitsT, __icase, __collate>::
_M_apply(_CharT __ch, false_type) const _M_apply(_CharT __ch, false_type) const
{ {
bool __ret = false; bool __ret = std::binary_search(_M_char_set.begin(), _M_char_set.end(),
if (std::find(_M_char_set.begin(), _M_char_set.end(), _M_translator._M_translate(__ch));
_M_translator._M_translate(__ch)) if (!__ret)
!= _M_char_set.end())
__ret = true;
else
{ {
auto __s = _M_translator._M_transform(__ch); auto __s = _M_translator._M_transform(__ch);
for (auto& __it : _M_range_set) for (auto& __it : _M_range_set)
......
...@@ -42,8 +42,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -42,8 +42,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
*/ */
/** /**
* @brief Takes a regex and an input string in and * @brief Takes a regex and an input string and does the matching.
* do the matching.
* *
* The %_Executor class has two modes: DFS mode and BFS mode, controlled * The %_Executor class has two modes: DFS mode and BFS mode, controlled
* by the template parameter %__dfs_mode. * by the template parameter %__dfs_mode.
...@@ -52,6 +51,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -52,6 +51,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
bool __dfs_mode> bool __dfs_mode>
class _Executor class _Executor
{ {
using __search_mode = integral_constant<bool, __dfs_mode>;
using __dfs = true_type;
using __bfs = false_type;
enum class _Match_mode : unsigned char { _Exact, _Prefix };
public: public:
typedef typename iterator_traits<_BiIter>::value_type _CharT; typedef typename iterator_traits<_BiIter>::value_type _CharT;
typedef basic_regex<_CharT, _TraitsT> _RegexT; typedef basic_regex<_CharT, _TraitsT> _RegexT;
...@@ -71,24 +76,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -71,24 +76,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_re(__re), _M_re(__re),
_M_nfa(*__re._M_automaton), _M_nfa(*__re._M_automaton),
_M_results(__results), _M_results(__results),
_M_match_queue(__dfs_mode ? nullptr
: new vector<pair<_StateIdT, _ResultsVec>>()),
_M_rep_count(_M_nfa.size()), _M_rep_count(_M_nfa.size()),
_M_visited(__dfs_mode ? nullptr : new vector<bool>(_M_nfa.size())), _M_states(_M_nfa._M_start(), _M_nfa.size()),
_M_flags((__flags & regex_constants::match_prev_avail) _M_flags((__flags & regex_constants::match_prev_avail)
? (__flags ? (__flags
& ~regex_constants::match_not_bol & ~regex_constants::match_not_bol
& ~regex_constants::match_not_bow) & ~regex_constants::match_not_bow)
: __flags), : __flags)
_M_start_state(_M_nfa._M_start())
{ } { }
// Set matched when string exactly match the pattern. // Set matched when string exactly matches the pattern.
bool bool
_M_match() _M_match()
{ {
_M_current = _M_begin; _M_current = _M_begin;
return _M_main<true>(); return _M_main(_Match_mode::_Exact);
} }
// Set matched when some prefix of the string matches the pattern. // Set matched when some prefix of the string matches the pattern.
...@@ -96,24 +98,28 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -96,24 +98,28 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_search_from_first() _M_search_from_first()
{ {
_M_current = _M_begin; _M_current = _M_begin;
return _M_main<false>(); return _M_main(_Match_mode::_Prefix);
} }
bool bool
_M_search(); _M_search();
private: private:
template<bool __match_mode>
void void
_M_rep_once_more(_StateIdT); _M_rep_once_more(_Match_mode __match_mode, _StateIdT);
template<bool __match_mode>
void void
_M_dfs(_StateIdT __start); _M_dfs(_Match_mode __match_mode, _StateIdT __start);
bool
_M_main(_Match_mode __match_mode)
{ return _M_main_dispatch(__match_mode, __search_mode{}); }
template<bool __match_mode>
bool bool
_M_main(); _M_main_dispatch(_Match_mode __match_mode, __dfs);
bool
_M_main_dispatch(_Match_mode __match_mode, __bfs);
bool bool
_M_is_word(_CharT __ch) const _M_is_word(_CharT __ch) const
...@@ -144,6 +150,53 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -144,6 +150,53 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
bool bool
_M_lookahead(_State<_TraitsT> __state); _M_lookahead(_State<_TraitsT> __state);
// Holds additional information used in BFS-mode.
template<typename _SearchMode, typename _ResultsVec>
struct _State_info;
template<typename _ResultsVec>
struct _State_info<__bfs, _ResultsVec>
{
explicit
_State_info(_StateIdT __start, size_t __n)
: _M_start(__start), _M_visited_states(new bool[__n]())
{ }
bool _M_visited(_StateIdT __i)
{
if (_M_visited_states[__i])
return true;
_M_visited_states[__i] = true;
return false;
}
void _M_queue(_StateIdT __i, const _ResultsVec& __res)
{ _M_match_queue.emplace_back(__i, __res); }
// Saves states that need to be considered for the next character.
vector<pair<_StateIdT, _ResultsVec>> _M_match_queue;
// Indicates which states are already visited.
unique_ptr<bool[]> _M_visited_states;
// To record current solution.
_StateIdT _M_start;
};
template<typename _ResultsVec>
struct _State_info<__dfs, _ResultsVec>
{
explicit
_State_info(_StateIdT __start, size_t) : _M_start(__start)
{ }
// Dummy implementations for DFS mode.
bool _M_visited(_StateIdT) const { return false; }
void _M_queue(_StateIdT, const _ResultsVec&) { }
// To record current solution.
_StateIdT _M_start;
};
public: public:
_ResultsVec _M_cur_results; _ResultsVec _M_cur_results;
_BiIter _M_current; _BiIter _M_current;
...@@ -152,15 +205,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -152,15 +205,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
const _RegexT& _M_re; const _RegexT& _M_re;
const _NFAT& _M_nfa; const _NFAT& _M_nfa;
_ResultsVec& _M_results; _ResultsVec& _M_results;
// Used in BFS, saving states that need to be considered for the next
// character.
unique_ptr<vector<pair<_StateIdT, _ResultsVec>>> _M_match_queue;
// Used in BFS, indicating that which state is already visited.
vector<pair<_BiIter, int>> _M_rep_count; vector<pair<_BiIter, int>> _M_rep_count;
unique_ptr<vector<bool>> _M_visited; _State_info<__search_mode, _ResultsVec> _M_states;
_FlagT _M_flags; _FlagT _M_flags;
// To record current solution.
_StateIdT _M_start_state;
// Do we have a solution so far? // Do we have a solution so far?
bool _M_has_sol; bool _M_has_sol;
}; };
......
...@@ -45,7 +45,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -45,7 +45,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
do do
{ {
_M_current = __cur; _M_current = __cur;
if (_M_main<false>()) if (_M_main(_Match_mode::_Prefix))
return true; return true;
} }
// Continue when __cur == _M_end // Continue when __cur == _M_end
...@@ -53,8 +53,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -53,8 +53,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return false; return false;
} }
// This function operates in different modes, DFS mode or BFS mode, indicated // The _M_main function operates in different modes, DFS mode or BFS mode,
// by template parameter __dfs_mode. See _M_main for details. // indicated by template parameter __dfs_mode, and dispatches to one of the
// _M_main_dispatch overloads.
// //
// ------------------------------------------------------------ // ------------------------------------------------------------
// //
...@@ -62,12 +63,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -62,12 +63,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
// //
// It applies a Depth-First-Search (aka backtracking) on given NFA and input // It applies a Depth-First-Search (aka backtracking) on given NFA and input
// string. // string.
// At the very beginning the executor stands in the start state, then it tries // At the very beginning the executor stands in the start state, then it
// every possible state transition in current state recursively. Some state // tries every possible state transition in current state recursively. Some
// transitions consume input string, say, a single-char-matcher or a // state transitions consume input string, say, a single-char-matcher or a
// back-reference matcher; some don't, like assertion or other anchor nodes. // back-reference matcher; some don't, like assertion or other anchor nodes.
// When the input is exhausted and/or the current state is an accepting state, // When the input is exhausted and/or the current state is an accepting
// the whole executor returns true. // state, the whole executor returns true.
// //
// TODO: This approach is exponentially slow for certain input. // TODO: This approach is exponentially slow for certain input.
// Try to compile the NFA to a DFA. // Try to compile the NFA to a DFA.
...@@ -75,6 +76,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -75,6 +76,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
// Time complexity: \Omega(match_length), O(2^(_M_nfa.size())) // Time complexity: \Omega(match_length), O(2^(_M_nfa.size()))
// Space complexity: \theta(match_results.size() + match_length) // Space complexity: \theta(match_results.size() + match_length)
// //
template<typename _BiIter, typename _Alloc, typename _TraitsT,
bool __dfs_mode>
bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
_M_main_dispatch(_Match_mode __match_mode, __dfs)
{
_M_has_sol = false;
_M_cur_results = _M_results;
_M_dfs(__match_mode, _M_states._M_start);
return _M_has_sol;
}
// ------------------------------------------------------------ // ------------------------------------------------------------
// //
// BFS mode: // BFS mode:
...@@ -84,11 +96,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -84,11 +96,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
// //
// It first computes epsilon closure (states that can be achieved without // It first computes epsilon closure (states that can be achieved without
// consuming characters) for every state that's still matching, // consuming characters) for every state that's still matching,
// using the same DFS algorithm, but doesn't re-enter states (find a true in // using the same DFS algorithm, but doesn't re-enter states (using
// _M_visited), nor follows _S_opcode_match. // _M_states._M_visited to check), nor follow _S_opcode_match.
// //
// Then apply DFS using every _S_opcode_match (in _M_match_queue) as the start // Then apply DFS using every _S_opcode_match (in _M_states._M_match_queue)
// state. // as the start state.
// //
// It significantly reduces potential duplicate states, so has a better // It significantly reduces potential duplicate states, so has a better
// upper bound; but it requires more overhead. // upper bound; but it requires more overhead.
...@@ -99,44 +111,33 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -99,44 +111,33 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
// O(_M_nfa.size() * match_results.size()) // O(_M_nfa.size() * match_results.size())
template<typename _BiIter, typename _Alloc, typename _TraitsT, template<typename _BiIter, typename _Alloc, typename _TraitsT,
bool __dfs_mode> bool __dfs_mode>
template<bool __match_mode>
bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: bool _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
_M_main() _M_main_dispatch(_Match_mode __match_mode, __bfs)
{
if (__dfs_mode)
{
_M_has_sol = false;
_M_cur_results = _M_results;
_M_dfs<__match_mode>(_M_start_state);
return _M_has_sol;
}
else
{ {
_M_match_queue->push_back(make_pair(_M_start_state, _M_results)); _M_states._M_queue(_M_states._M_start, _M_results);
bool __ret = false; bool __ret = false;
while (1) while (1)
{ {
_M_has_sol = false; _M_has_sol = false;
if (_M_match_queue->empty()) if (_M_states._M_match_queue.empty())
break; break;
_M_visited->assign(_M_visited->size(), false); std::fill_n(_M_states._M_visited_states.get(), _M_nfa.size(), false);
auto _M_old_queue = std::move(*_M_match_queue); auto __old_queue = std::move(_M_states._M_match_queue);
for (auto __task : _M_old_queue) for (auto& __task : __old_queue)
{ {
_M_cur_results = __task.second; _M_cur_results = std::move(__task.second);
_M_dfs<__match_mode>(__task.first); _M_dfs(__match_mode, __task.first);
} }
if (!__match_mode) if (__match_mode == _Match_mode::_Prefix)
__ret |= _M_has_sol; __ret |= _M_has_sol;
if (_M_current == _M_end) if (_M_current == _M_end)
break; break;
++_M_current; ++_M_current;
} }
if (__match_mode) if (__match_mode == _Match_mode::_Exact)
__ret = _M_has_sol; __ret = _M_has_sol;
return __ret; return __ret;
} }
}
// Return whether now match the given sub-NFA. // Return whether now match the given sub-NFA.
template<typename _BiIter, typename _Alloc, typename _TraitsT, template<typename _BiIter, typename _Alloc, typename _TraitsT,
...@@ -145,13 +146,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -145,13 +146,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_lookahead(_State<_TraitsT> __state) _M_lookahead(_State<_TraitsT> __state)
{ {
_ResultsVec __what(_M_cur_results.size()); _ResultsVec __what(_M_cur_results.size());
auto __sub = std::unique_ptr<_Executor>(new _Executor(_M_current, _Executor __sub(_M_current, _M_end, __what, _M_re, _M_flags);
_M_end, __sub._M_states._M_start = __state._M_alt;
__what, if (__sub._M_search_from_first())
_M_re,
_M_flags));
__sub->_M_start_state = __state._M_alt;
if (__sub->_M_search_from_first())
{ {
for (size_t __i = 0; __i < __what.size(); __i++) for (size_t __i = 0; __i < __what.size(); __i++)
if (__what[__i].matched) if (__what[__i].matched)
...@@ -169,9 +166,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -169,9 +166,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
// we need to spare one more time for potential group capture. // we need to spare one more time for potential group capture.
template<typename _BiIter, typename _Alloc, typename _TraitsT, template<typename _BiIter, typename _Alloc, typename _TraitsT,
bool __dfs_mode> bool __dfs_mode>
template<bool __match_mode>
void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
_M_rep_once_more(_StateIdT __i) _M_rep_once_more(_Match_mode __match_mode, _StateIdT __i)
{ {
const auto& __state = _M_nfa[__i]; const auto& __state = _M_nfa[__i];
auto& __rep_count = _M_rep_count[__i]; auto& __rep_count = _M_rep_count[__i];
...@@ -180,7 +176,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -180,7 +176,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
auto __back = __rep_count; auto __back = __rep_count;
__rep_count.first = _M_current; __rep_count.first = _M_current;
__rep_count.second = 1; __rep_count.second = 1;
_M_dfs<__match_mode>(__state._M_alt); _M_dfs(__match_mode, __state._M_alt);
__rep_count = __back; __rep_count = __back;
} }
else else
...@@ -188,7 +184,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -188,7 +184,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
if (__rep_count.second < 2) if (__rep_count.second < 2)
{ {
__rep_count.second++; __rep_count.second++;
_M_dfs<__match_mode>(__state._M_alt); _M_dfs(__match_mode, __state._M_alt);
__rep_count.second--; __rep_count.second--;
} }
} }
...@@ -196,16 +192,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -196,16 +192,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
template<typename _BiIter, typename _Alloc, typename _TraitsT, template<typename _BiIter, typename _Alloc, typename _TraitsT,
bool __dfs_mode> bool __dfs_mode>
template<bool __match_mode>
void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>:: void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
_M_dfs(_StateIdT __i) _M_dfs(_Match_mode __match_mode, _StateIdT __i)
{ {
if (!__dfs_mode) if (_M_states._M_visited(__i))
{
if ((*_M_visited)[__i])
return; return;
(*_M_visited)[__i] = true;
}
const auto& __state = _M_nfa[__i]; const auto& __state = _M_nfa[__i];
// Every change on _M_cur_results and _M_current will be rolled back after // Every change on _M_cur_results and _M_current will be rolled back after
...@@ -221,33 +212,33 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -221,33 +212,33 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
// Greedy. // Greedy.
if (!__state._M_neg) if (!__state._M_neg)
{ {
_M_rep_once_more<__match_mode>(__i); _M_rep_once_more(__match_mode, __i);
// If it's DFS executor and already accepted, we're done. // If it's DFS executor and already accepted, we're done.
if (!__dfs_mode || !_M_has_sol) if (!__dfs_mode || !_M_has_sol)
_M_dfs<__match_mode>(__state._M_next); _M_dfs(__match_mode, __state._M_next);
} }
else // Non-greedy mode else // Non-greedy mode
{ {
if (__dfs_mode) if (__dfs_mode)
{ {
// vice-versa. // vice-versa.
_M_dfs<__match_mode>(__state._M_next); _M_dfs(__match_mode, __state._M_next);
if (!_M_has_sol) if (!_M_has_sol)
_M_rep_once_more<__match_mode>(__i); _M_rep_once_more(__match_mode, __i);
} }
else else
{ {
// DON'T attempt anything, because there's already another // DON'T attempt anything, because there's already another
// state with higher priority accepted. This state cannot be // state with higher priority accepted. This state cannot
// better by attempting its next node. // be better by attempting its next node.
if (!_M_has_sol) if (!_M_has_sol)
{ {
_M_dfs<__match_mode>(__state._M_next); _M_dfs(__match_mode, __state._M_next);
// DON'T attempt anything if it's already accepted. An // DON'T attempt anything if it's already accepted. An
// accepted state *must* be better than a solution that // accepted state *must* be better than a solution that
// matches a non-greedy quantifier one more time. // matches a non-greedy quantifier one more time.
if (!_M_has_sol) if (!_M_has_sol)
_M_rep_once_more<__match_mode>(__i); _M_rep_once_more(__match_mode, __i);
} }
} }
} }
...@@ -258,7 +249,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -258,7 +249,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
auto& __res = _M_cur_results[__state._M_subexpr]; auto& __res = _M_cur_results[__state._M_subexpr];
auto __back = __res.first; auto __back = __res.first;
__res.first = _M_current; __res.first = _M_current;
_M_dfs<__match_mode>(__state._M_next); _M_dfs(__match_mode, __state._M_next);
__res.first = __back; __res.first = __back;
} }
break; break;
...@@ -268,27 +259,27 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -268,27 +259,27 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
auto __back = __res; auto __back = __res;
__res.second = _M_current; __res.second = _M_current;
__res.matched = true; __res.matched = true;
_M_dfs<__match_mode>(__state._M_next); _M_dfs(__match_mode, __state._M_next);
__res = __back; __res = __back;
} }
break; break;
case _S_opcode_line_begin_assertion: case _S_opcode_line_begin_assertion:
if (_M_at_begin()) if (_M_at_begin())
_M_dfs<__match_mode>(__state._M_next); _M_dfs(__match_mode, __state._M_next);
break; break;
case _S_opcode_line_end_assertion: case _S_opcode_line_end_assertion:
if (_M_at_end()) if (_M_at_end())
_M_dfs<__match_mode>(__state._M_next); _M_dfs(__match_mode, __state._M_next);
break; break;
case _S_opcode_word_boundary: case _S_opcode_word_boundary:
if (_M_word_boundary(__state) == !__state._M_neg) if (_M_word_boundary(__state) == !__state._M_neg)
_M_dfs<__match_mode>(__state._M_next); _M_dfs(__match_mode, __state._M_next);
break; break;
// Here __state._M_alt offers a single start node for a sub-NFA. // Here __state._M_alt offers a single start node for a sub-NFA.
// We recursively invoke our algorithm to match the sub-NFA. // We recursively invoke our algorithm to match the sub-NFA.
case _S_opcode_subexpr_lookahead: case _S_opcode_subexpr_lookahead:
if (_M_lookahead(__state) == !__state._M_neg) if (_M_lookahead(__state) == !__state._M_neg)
_M_dfs<__match_mode>(__state._M_next); _M_dfs(__match_mode, __state._M_next);
break; break;
case _S_opcode_match: case _S_opcode_match:
if (__dfs_mode) if (__dfs_mode)
...@@ -296,14 +287,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -296,14 +287,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
if (_M_current != _M_end && __state._M_matches(*_M_current)) if (_M_current != _M_end && __state._M_matches(*_M_current))
{ {
++_M_current; ++_M_current;
_M_dfs<__match_mode>(__state._M_next); _M_dfs(__match_mode, __state._M_next);
--_M_current; --_M_current;
} }
} }
else else
if (__state._M_matches(*_M_current)) if (__state._M_matches(*_M_current))
_M_match_queue->push_back(make_pair(__state._M_next, _M_states._M_queue(__state._M_next, _M_cur_results);
_M_cur_results));
break; break;
// First fetch the matched result from _M_cur_results as __submatch; // First fetch the matched result from _M_cur_results as __submatch;
// then compare it with // then compare it with
...@@ -328,11 +318,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -328,11 +318,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{ {
auto __backup = _M_current; auto __backup = _M_current;
_M_current = __last; _M_current = __last;
_M_dfs<__match_mode>(__state._M_next); _M_dfs(__match_mode, __state._M_next);
_M_current = __backup; _M_current = __backup;
} }
else else
_M_dfs<__match_mode>(__state._M_next); _M_dfs(__match_mode, __state._M_next);
} }
} }
break; break;
...@@ -340,7 +330,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -340,7 +330,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
if (__dfs_mode) if (__dfs_mode)
{ {
_GLIBCXX_DEBUG_ASSERT(!_M_has_sol); _GLIBCXX_DEBUG_ASSERT(!_M_has_sol);
if (__match_mode) if (__match_mode == _Match_mode::_Exact)
_M_has_sol = _M_current == _M_end; _M_has_sol = _M_current == _M_end;
else else
_M_has_sol = true; _M_has_sol = true;
...@@ -355,7 +345,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -355,7 +345,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
if (_M_current == _M_begin if (_M_current == _M_begin
&& (_M_flags & regex_constants::match_not_null)) && (_M_flags & regex_constants::match_not_null))
break; break;
if (!__match_mode || _M_current == _M_end) if (__match_mode == _Match_mode::_Prefix || _M_current == _M_end)
if (!_M_has_sol) if (!_M_has_sol)
{ {
_M_has_sol = true; _M_has_sol = true;
...@@ -364,9 +354,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -364,9 +354,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
} }
break; break;
case _S_opcode_alternative: case _S_opcode_alternative:
_M_dfs<__match_mode>(__state._M_alt); _M_dfs(__match_mode, __state._M_alt);
if (!__dfs_mode || !_M_has_sol) if (!__dfs_mode || !_M_has_sol)
_M_dfs<__match_mode>(__state._M_next); _M_dfs(__match_mode, __state._M_next);
break; break;
default: default:
_GLIBCXX_DEBUG_ASSERT(false); _GLIBCXX_DEBUG_ASSERT(false);
......
...@@ -227,14 +227,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -227,14 +227,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
} }
} }
// In POSIX, when encountering "[]" or "[^]", the ']' is interpreted // In POSIX, when encountering "[]" or "[^]", the ']' is interpreted
// literally. So "[]]" or "[^]]" is valid regex. See the testcases // literally. So "[]]" and "[^]]" are valid regexes. See the testcases
// `*/empty_range.cc`. // `*/empty_range.cc`.
else if (__c == ']' && (_M_is_ecma() || !_M_at_bracket_start)) else if (__c == ']' && (_M_is_ecma() || !_M_at_bracket_start))
{ {
_M_token = _S_token_bracket_end; _M_token = _S_token_bracket_end;
_M_state = _S_state_normal; _M_state = _S_state_normal;
} }
// ECMAScirpt and awk permmits escaping in bracket. // ECMAScript and awk permits escaping in bracket.
else if (__c == '\\' && (_M_is_ecma() || _M_is_awk())) else if (__c == '\\' && (_M_is_ecma() || _M_is_awk()))
(this->*_M_eat_escape)(); (this->*_M_eat_escape)();
else else
...@@ -344,7 +344,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -344,7 +344,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
} }
_M_token = _S_token_hex_num; _M_token = _S_token_hex_num;
} }
// ECMAScript recongnizes multi-digit back-references. // ECMAScript recognizes multi-digit back-references.
else if (_M_ctype.is(_CtypeT::digit, __c)) else if (_M_ctype.is(_CtypeT::digit, __c))
{ {
_M_value.assign(1, __c); _M_value.assign(1, __c);
...@@ -392,6 +392,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -392,6 +392,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
else else
{ {
#ifdef __STRICT_ANSI__ #ifdef __STRICT_ANSI__
// POSIX says it is undefined to escape ordinary characters
__throw_regex_error(regex_constants::error_escape); __throw_regex_error(regex_constants::error_escape);
#else #else
_M_token = _S_token_ord_char; _M_token = _S_token_ord_char;
...@@ -435,8 +436,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -435,8 +436,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__throw_regex_error(regex_constants::error_escape); __throw_regex_error(regex_constants::error_escape);
} }
// Eats a character class or throwns an exception. // Eats a character class or throws an exception.
// __ch cound be ':', '.' or '=', _M_current is the char after ']' when // __ch could be ':', '.' or '=', _M_current is the char after ']' when
// returning. // returning.
template<typename _CharT> template<typename _CharT>
void void
......
// Copyright (C) 2014 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License along
// with this library; see the file COPYING3. If not see
// <http://www.gnu.org/licenses/>.
#include <regex>
#include <testsuite_performance.h>
using namespace __gnu_test;
int main()
{
time_counter time;
resource_counter resource;
start_counters(time, resource);
// this should get compiled to just L"[abcd]"
auto re = std::wregex(L'[' + std::wstring(300, L'a') + L"bc"
+ std::wstring(1000, 'a') + L"d]");
bool ok = true;
for (int i = 0; i < 100000; ++i)
ok = ok && (std::regex_match(L"b", re) && std::regex_match(L"d", re));
stop_counters(time, resource);
report_performance(__FILE__, "", time, resource);
return ok ? 0 : 1;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment