Commit a670a9bb by Tim Shen Committed by Tim Shen

regex_automaton.h (_NFA<>::_M_insert_repeat): Add _S_opcode_repeat support to…

regex_automaton.h (_NFA<>::_M_insert_repeat): Add _S_opcode_repeat support to distingush a loop from _S_opcode_alternative.

2014-04-27  Tim Shen  <timshen91@gmail.com>

	* include/bits/regex_automaton.h (_NFA<>::_M_insert_repeat):
	Add _S_opcode_repeat support to distingush a loop from
	_S_opcode_alternative.
	* include/bits/regex_automaton.tcc (_State_base::_M_print,
	_State_base::_M_dot, _NFA<>::_M_eliminate_dummy,
	_StateSeq<>::_M_clone): Likewise.
	* include/bits/regex_compiler.tcc (_Compiler<>::_M_quantifier):
	Likewise.
	* include/bits/regex_executor.tcc (_Executor<>::_M_dfs): Likewise.
	* include/bits/regex_scanner.tcc (_Scanner<>::_M_eat_escape_ecma):
	Uglify local variable __i.
	* include/bits/regex_compiler.h (_BracketMatcher<>::_M_make_cache):
	Use size_t instead of int to compare with vector::size().

2014-04-27  Tim Shen  <timshen91@gmail.com>

	* include/bits/regex_executor.h: Add _M_rep_count to track how
	many times this repeat node are visited.
	* include/bits/regex_executor.tcc (_Executor<>::_M_rep_once_more,
	_Executor<>::_M_dfs): Use _M_rep_count to prevent entering
	infinite loop.

2014-04-27  Tim Shen  <timshen91@gmail.com>

	* include/bits/regex.tcc (__regex_algo_impl<>): Remove
	_GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT and use
	_GLIBCXX_REGEX_USE_THOMPSON_NFA instead.
	* include/bits/regex_automaton.h: Remove quantifier counting variable.
	* include/bits/regex_automaton.tcc (_State_base::_M_dot):
	Adjust debug NFA dump.

From-SVN: r209844
parent f956adb9
2014-04-27 Tim Shen <timshen91@gmail.com>
* include/bits/regex_automaton.h (_NFA<>::_M_insert_repeat):
Add _S_opcode_repeat support to distingush a loop from
_S_opcode_alternative.
* include/bits/regex_automaton.tcc (_State_base::_M_print,
_State_base::_M_dot, _NFA<>::_M_eliminate_dummy,
_StateSeq<>::_M_clone): Likewise.
* include/bits/regex_compiler.tcc (_Compiler<>::_M_quantifier):
Likewise.
* include/bits/regex_executor.tcc (_Executor<>::_M_dfs): Likewise.
* include/bits/regex_scanner.tcc (_Scanner<>::_M_eat_escape_ecma):
Uglify local variable __i.
* include/bits/regex_compiler.h (_BracketMatcher<>::_M_make_cache):
Use size_t instead of int to compare with vector::size().
2014-04-27 Tim Shen <timshen91@gmail.com>
* include/bits/regex_executor.h: Add _M_rep_count to track how
many times this repeat node are visited.
* include/bits/regex_executor.tcc (_Executor<>::_M_rep_once_more,
_Executor<>::_M_dfs): Use _M_rep_count to prevent entering
infinite loop.
2014-04-27 Tim Shen <timshen91@gmail.com>
* include/bits/regex.tcc (__regex_algo_impl<>): Remove
_GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT and use
_GLIBCXX_REGEX_USE_THOMPSON_NFA instead.
* include/bits/regex_automaton.h: Remove quantifier counting variable.
* include/bits/regex_automaton.tcc (_State_base::_M_dot):
Adjust debug NFA dump.
2014-04-25 Lars Gullik Bjønnes <larsbj@gullik.org> 2014-04-25 Lars Gullik Bjønnes <larsbj@gullik.org>
PR libstdc++/60710 PR libstdc++/60710
......
...@@ -28,12 +28,12 @@ ...@@ -28,12 +28,12 @@
* Do not attempt to use it directly. @headername{regex} * Do not attempt to use it directly. @headername{regex}
*/ */
// See below __regex_algo_impl to get what this is talking about. The default // A non-standard switch to let the user pick the matching algorithm.
// value 1 indicated a conservative optimization without giving up worst case // If _GLIBCXX_REGEX_USE_THOMPSON_NFA is defined, the thompson NFA
// performance. // algorithm will be used. This algorithm is not enabled by default,
#ifndef _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT // and cannot be used if the regex contains back-references, but has better
#define _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT 1 // (polynomial instead of exponential) worst case performace.
#endif // See __regex_algo_impl below.
namespace std _GLIBCXX_VISIBILITY(default) namespace std _GLIBCXX_VISIBILITY(default)
{ {
...@@ -66,24 +66,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -66,24 +66,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
for (auto& __it : __res) for (auto& __it : __res)
__it.matched = false; __it.matched = false;
// This function decide which executor to use under given circumstances. // __policy is used by testsuites so that they can use Thompson NFA
// The _S_auto policy now is the following: if a NFA has no // without defining a macro. Users should define
// back-references and has more than _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT // _GLIBCXX_REGEX_USE_THOMPSON_NFA if they need to use this approach.
// quantifiers (*, +, ?), the BFS executor will be used, other wise
// DFS executor. This is because DFS executor has a exponential upper
// bound, but better best-case performace. Meanwhile, BFS executor can
// effectively prevent from exponential-long time matching (which must
// contains many quantifiers), but it's slower in average.
//
// For simple regex, BFS executor could be 2 or more times slower than
// DFS executor.
//
// Of course, BFS executor cannot handle back-references.
bool __ret; bool __ret;
if (!__re._M_automaton->_M_has_backref if (!__re._M_automaton->_M_has_backref
&& (__policy == _RegexExecutorPolicy::_S_alternate #ifndef _GLIBCXX_REGEX_USE_THOMPSON_NFA
|| __re._M_automaton->_M_quant_count && __policy == _RegexExecutorPolicy::_S_alternate
> _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT)) #endif
)
{ {
_Executor<_BiIter, _Alloc, _TraitsT, false> _Executor<_BiIter, _Alloc, _TraitsT, false>
__executor(__s, __e, __m, __re, __flags); __executor(__s, __e, __m, __re, __flags);
......
...@@ -52,6 +52,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -52,6 +52,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{ {
_S_opcode_unknown, _S_opcode_unknown,
_S_opcode_alternative, _S_opcode_alternative,
_S_opcode_repeat,
_S_opcode_backref, _S_opcode_backref,
_S_opcode_line_begin_assertion, _S_opcode_line_begin_assertion,
_S_opcode_line_end_assertion, _S_opcode_line_end_assertion,
...@@ -74,9 +75,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -74,9 +75,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
size_t _M_backref_index; // for _S_opcode_backref size_t _M_backref_index; // for _S_opcode_backref
struct struct
{ {
// for _S_opcode_alternative. // for _S_opcode_alternative, _S_opcode_repeat and
_StateIdT _M_quant_index; // _S_opcode_subexpr_lookahead
// for _S_opcode_alternative or _S_opcode_subexpr_lookahead
_StateIdT _M_alt; _StateIdT _M_alt;
// for _S_opcode_word_boundary or _S_opcode_subexpr_lookahead or // for _S_opcode_word_boundary or _S_opcode_subexpr_lookahead or
// quantifiers (ungreedy if set true) // quantifiers (ungreedy if set true)
...@@ -120,7 +120,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -120,7 +120,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
explicit explicit
_NFA_base(_FlagT __f) _NFA_base(_FlagT __f)
: _M_flags(__f), _M_start_state(0), _M_subexpr_count(0), : _M_flags(__f), _M_start_state(0), _M_subexpr_count(0),
_M_quant_count(0), _M_has_backref(false) _M_has_backref(false)
{ } { }
_NFA_base(_NFA_base&&) = default; _NFA_base(_NFA_base&&) = default;
...@@ -145,7 +145,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -145,7 +145,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_FlagT _M_flags; _FlagT _M_flags;
_StateIdT _M_start_state; _StateIdT _M_start_state;
_SizeT _M_subexpr_count; _SizeT _M_subexpr_count;
_SizeT _M_quant_count;
bool _M_has_backref; bool _M_has_backref;
}; };
...@@ -175,7 +174,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -175,7 +174,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_StateT __tmp(_S_opcode_alternative); _StateT __tmp(_S_opcode_alternative);
// It labels every quantifier to make greedy comparison easier in BFS // It labels every quantifier to make greedy comparison easier in BFS
// approach. // approach.
__tmp._M_quant_index = this->_M_quant_count++; __tmp._M_next = __next;
__tmp._M_alt = __alt;
return _M_insert_state(std::move(__tmp));
}
_StateIdT
_M_insert_repeat(_StateIdT __next, _StateIdT __alt, bool __neg)
{
_StateT __tmp(_S_opcode_repeat);
// It labels every quantifier to make greedy comparison easier in BFS
// approach.
__tmp._M_next = __next; __tmp._M_next = __next;
__tmp._M_alt = __alt; __tmp._M_alt = __alt;
__tmp._M_neg = __neg; __tmp._M_neg = __neg;
......
...@@ -41,6 +41,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -41,6 +41,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
switch (_M_opcode) switch (_M_opcode)
{ {
case _S_opcode_alternative: case _S_opcode_alternative:
case _S_opcode_repeat:
ostr << "alt next=" << _M_next << " alt=" << _M_alt; ostr << "alt next=" << _M_next << " alt=" << _M_alt;
break; break;
case _S_opcode_subexpr_begin: case _S_opcode_subexpr_begin:
...@@ -72,11 +73,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -72,11 +73,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
switch (_M_opcode) switch (_M_opcode)
{ {
case _S_opcode_alternative: case _S_opcode_alternative:
case _S_opcode_repeat:
__ostr << __id << " [label=\"" << __id << "\\nALT\"];\n" __ostr << __id << " [label=\"" << __id << "\\nALT\"];\n"
<< __id << " -> " << _M_next << __id << " -> " << _M_next
<< " [label=\"epsilon\", tailport=\"s\"];\n" << " [label=\"next\", tailport=\"s\"];\n"
<< __id << " -> " << _M_alt << __id << " -> " << _M_alt
<< " [label=\"epsilon\", tailport=\"n\"];\n"; << " [label=\"alt\", tailport=\"n\"];\n";
break; break;
case _S_opcode_backref: case _S_opcode_backref:
__ostr << __id << " [label=\"" << __id << "\\nBACKREF " __ostr << __id << " [label=\"" << __id << "\\nBACKREF "
...@@ -174,6 +176,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -174,6 +176,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
== _S_opcode_dummy) == _S_opcode_dummy)
__it._M_next = (*this)[__it._M_next]._M_next; __it._M_next = (*this)[__it._M_next]._M_next;
if (__it._M_opcode == _S_opcode_alternative if (__it._M_opcode == _S_opcode_alternative
|| __it._M_opcode == _S_opcode_repeat
|| __it._M_opcode == _S_opcode_subexpr_lookahead) || __it._M_opcode == _S_opcode_subexpr_lookahead)
while (__it._M_alt >= 0 && (*this)[__it._M_alt]._M_opcode while (__it._M_alt >= 0 && (*this)[__it._M_alt]._M_opcode
== _S_opcode_dummy) == _S_opcode_dummy)
...@@ -198,6 +201,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -198,6 +201,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
auto __id = _M_nfa._M_insert_state(__dup); auto __id = _M_nfa._M_insert_state(__dup);
__m[__u] = __id; __m[__u] = __id;
if (__dup._M_opcode == _S_opcode_alternative if (__dup._M_opcode == _S_opcode_alternative
|| __dup._M_opcode == _S_opcode_repeat
|| __dup._M_opcode == _S_opcode_subexpr_lookahead) || __dup._M_opcode == _S_opcode_subexpr_lookahead)
if (__dup._M_alt != _S_invalid_state_id && __m[__dup._M_alt] == -1) if (__dup._M_alt != _S_invalid_state_id && __m[__dup._M_alt] == -1)
__stack.push(__dup._M_alt); __stack.push(__dup._M_alt);
...@@ -217,6 +221,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -217,6 +221,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__ref._M_next = __m[__ref._M_next]; __ref._M_next = __m[__ref._M_next];
} }
if (__ref._M_opcode == _S_opcode_alternative if (__ref._M_opcode == _S_opcode_alternative
|| __ref._M_opcode == _S_opcode_repeat
|| __ref._M_opcode == _S_opcode_subexpr_lookahead) || __ref._M_opcode == _S_opcode_subexpr_lookahead)
if (__ref._M_alt != _S_invalid_state_id) if (__ref._M_alt != _S_invalid_state_id)
{ {
......
...@@ -421,7 +421,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -421,7 +421,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
void void
_M_make_cache(true_type) _M_make_cache(true_type)
{ {
for (int __i = 0; __i < _M_cache.size(); __i++) for (size_t __i = 0; __i < _M_cache.size(); __i++)
_M_cache[static_cast<_UnsignedCharT>(__i)] = _M_cache[static_cast<_UnsignedCharT>(__i)] =
_M_apply(__i, false_type()); _M_apply(__i, false_type());
} }
......
...@@ -188,8 +188,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -188,8 +188,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{ {
__init(); __init();
auto __e = _M_pop(); auto __e = _M_pop();
_StateSeqT __r(_M_nfa, _M_nfa._M_insert_alt(_S_invalid_state_id, _StateSeqT __r(_M_nfa, _M_nfa._M_insert_repeat(_S_invalid_state_id,
__e._M_start, __neg)); __e._M_start, __neg));
__e._M_append(__r); __e._M_append(__r);
_M_stack.push(__r); _M_stack.push(__r);
} }
...@@ -197,8 +197,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -197,8 +197,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{ {
__init(); __init();
auto __e = _M_pop(); auto __e = _M_pop();
__e._M_append(_M_nfa._M_insert_alt(_S_invalid_state_id, __e._M_start, __e._M_append(_M_nfa._M_insert_repeat(_S_invalid_state_id,
__neg)); __e._M_start, __neg));
_M_stack.push(__e); _M_stack.push(__e);
} }
else if (_M_match_token(_ScannerT::_S_token_opt)) else if (_M_match_token(_ScannerT::_S_token_opt))
...@@ -206,8 +206,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -206,8 +206,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__init(); __init();
auto __e = _M_pop(); auto __e = _M_pop();
auto __end = _M_nfa._M_insert_dummy(); auto __end = _M_nfa._M_insert_dummy();
_StateSeqT __r(_M_nfa, _M_nfa._M_insert_alt(_S_invalid_state_id, _StateSeqT __r(_M_nfa, _M_nfa._M_insert_repeat(_S_invalid_state_id,
__e._M_start, __neg)); __e._M_start, __neg));
__e._M_append(__end); __e._M_append(__end);
__r._M_append(__end); __r._M_append(__end);
_M_stack.push(__r); _M_stack.push(__r);
...@@ -244,8 +244,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -244,8 +244,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{ {
auto __tmp = __r._M_clone(); auto __tmp = __r._M_clone();
_StateSeqT __s(_M_nfa, _StateSeqT __s(_M_nfa,
_M_nfa._M_insert_alt(_S_invalid_state_id, _M_nfa._M_insert_repeat(_S_invalid_state_id,
__tmp._M_start, __neg)); __tmp._M_start, __neg));
__tmp._M_append(__s); __tmp._M_append(__s);
__e._M_append(__s); __e._M_append(__s);
} }
...@@ -261,8 +261,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -261,8 +261,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
for (long __i = 0; __i < __n; ++__i) for (long __i = 0; __i < __n; ++__i)
{ {
auto __tmp = __r._M_clone(); auto __tmp = __r._M_clone();
auto __alt = _M_nfa._M_insert_alt(__tmp._M_start, auto __alt = _M_nfa._M_insert_repeat(__tmp._M_start,
__end, __neg); __end, __neg);
__stack.push(__alt); __stack.push(__alt);
__e._M_append(_StateSeqT(_M_nfa, __alt, __tmp._M_end)); __e._M_append(_StateSeqT(_M_nfa, __alt, __tmp._M_end));
} }
......
...@@ -73,6 +73,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -73,6 +73,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_results(__results), _M_results(__results),
_M_match_queue(__dfs_mode ? nullptr _M_match_queue(__dfs_mode ? nullptr
: new vector<pair<_StateIdT, _ResultsVec>>()), : new vector<pair<_StateIdT, _ResultsVec>>()),
_M_rep_count(_M_nfa.size()),
_M_visited(__dfs_mode ? nullptr : new vector<bool>(_M_nfa.size())), _M_visited(__dfs_mode ? nullptr : new vector<bool>(_M_nfa.size())),
_M_flags((__flags & regex_constants::match_prev_avail) _M_flags((__flags & regex_constants::match_prev_avail)
? (__flags ? (__flags
...@@ -104,6 +105,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -104,6 +105,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
private: private:
template<bool __match_mode> template<bool __match_mode>
void void
_M_rep_once_more(_StateIdT);
template<bool __match_mode>
void
_M_dfs(_StateIdT __start); _M_dfs(_StateIdT __start);
template<bool __match_mode> template<bool __match_mode>
...@@ -149,9 +154,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -149,9 +154,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_ResultsVec& _M_results; _ResultsVec& _M_results;
// Used in BFS, saving states that need to be considered for the next // Used in BFS, saving states that need to be considered for the next
// character. // character.
std::unique_ptr<vector<pair<_StateIdT, _ResultsVec>>> _M_match_queue; unique_ptr<vector<pair<_StateIdT, _ResultsVec>>> _M_match_queue;
// Used in BFS, indicating that which state is already visited. // Used in BFS, indicating that which state is already visited.
std::unique_ptr<vector<bool>> _M_visited; vector<pair<_BiIter, int>> _M_rep_count;
unique_ptr<vector<bool>> _M_visited;
_FlagT _M_flags; _FlagT _M_flags;
// To record current solution. // To record current solution.
_StateIdT _M_start_state; _StateIdT _M_start_state;
......
...@@ -161,7 +161,39 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -161,7 +161,39 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return false; return false;
} }
// TODO: Use a function vector to dispatch, instead of using switch-case. // __rep_count records how many times (__rep_count.second)
// this node is visited under certain input iterator
// (__rep_count.first). This prevent the executor from entering
// infinite loop by refusing to continue when it's already been
// visited more than twice. It's `twice` instead of `once` because
// we need to spare one more time for potential group capture.
template<typename _BiIter, typename _Alloc, typename _TraitsT,
bool __dfs_mode>
template<bool __match_mode>
void _Executor<_BiIter, _Alloc, _TraitsT, __dfs_mode>::
_M_rep_once_more(_StateIdT __i)
{
const auto& __state = _M_nfa[__i];
auto& __rep_count = _M_rep_count[__i];
if (__rep_count.second == 0 || __rep_count.first != _M_current)
{
auto __back = __rep_count;
__rep_count.first = _M_current;
__rep_count.second = 1;
_M_dfs<__match_mode>(__state._M_alt);
__rep_count = __back;
}
else
{
if (__rep_count.second < 2)
{
__rep_count.second++;
_M_dfs<__match_mode>(__state._M_alt);
__rep_count.second--;
}
}
};
template<typename _BiIter, typename _Alloc, typename _TraitsT, template<typename _BiIter, typename _Alloc, typename _TraitsT,
bool __dfs_mode> bool __dfs_mode>
template<bool __match_mode> template<bool __match_mode>
...@@ -184,69 +216,61 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -184,69 +216,61 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
// of this quantifier". Executing _M_next first or _M_alt first don't // of this quantifier". Executing _M_next first or _M_alt first don't
// mean the same thing, and we need to choose the correct order under // mean the same thing, and we need to choose the correct order under
// given greedy mode. // given greedy mode.
case _S_opcode_alternative: case _S_opcode_repeat:
// Greedy. {
if (!__state._M_neg) // Greedy.
{ if (!__state._M_neg)
// "Once more" is preferred in greedy mode. {
_M_dfs<__match_mode>(__state._M_alt); _M_rep_once_more<__match_mode>(__i);
// If it's DFS executor and already accepted, we're done. // If it's DFS executor and already accepted, we're done.
if (!__dfs_mode || !_M_has_sol) if (!__dfs_mode || !_M_has_sol)
_M_dfs<__match_mode>(__state._M_next);
}
else // Non-greedy mode
{
if (__dfs_mode)
{
// vice-versa.
_M_dfs<__match_mode>(__state._M_next); _M_dfs<__match_mode>(__state._M_next);
if (!_M_has_sol) }
_M_dfs<__match_mode>(__state._M_alt); else // Non-greedy mode
} {
else if (__dfs_mode)
{ {
// DON'T attempt anything, because there's already another // vice-versa.
// state with higher priority accepted. This state cannot be _M_dfs<__match_mode>(__state._M_next);
// better by attempting its next node. if (!_M_has_sol)
if (!_M_has_sol) _M_rep_once_more<__match_mode>(__i);
{ }
_M_dfs<__match_mode>(__state._M_next); else
// DON'T attempt anything if it's already accepted. An {
// accepted state *must* be better than a solution that // DON'T attempt anything, because there's already another
// matches a non-greedy quantifier one more time. // state with higher priority accepted. This state cannot be
if (!_M_has_sol) // better by attempting its next node.
_M_dfs<__match_mode>(__state._M_alt); if (!_M_has_sol)
} {
} _M_dfs<__match_mode>(__state._M_next);
// DON'T attempt anything if it's already accepted. An
// accepted state *must* be better than a solution that
// matches a non-greedy quantifier one more time.
if (!_M_has_sol)
_M_rep_once_more<__match_mode>(__i);
}
}
}
} }
break; break;
case _S_opcode_subexpr_begin: case _S_opcode_subexpr_begin:
// If there's nothing changed since last visit, do NOT continue. {
// This prevents the executor from get into infinite loop when using auto& __res = _M_cur_results[__state._M_subexpr];
// "()*" to match "". auto __back = __res.first;
if (!_M_cur_results[__state._M_subexpr].matched __res.first = _M_current;
|| _M_cur_results[__state._M_subexpr].first != _M_current) _M_dfs<__match_mode>(__state._M_next);
{ __res.first = __back;
auto& __res = _M_cur_results[__state._M_subexpr]; }
auto __back = __res.first;
__res.first = _M_current;
_M_dfs<__match_mode>(__state._M_next);
__res.first = __back;
}
break; break;
case _S_opcode_subexpr_end: case _S_opcode_subexpr_end:
if (_M_cur_results[__state._M_subexpr].second != _M_current {
|| _M_cur_results[__state._M_subexpr].matched != true) auto& __res = _M_cur_results[__state._M_subexpr];
{ auto __back = __res;
auto& __res = _M_cur_results[__state._M_subexpr]; __res.second = _M_current;
auto __back = __res; __res.matched = true;
__res.second = _M_current;
__res.matched = true;
_M_dfs<__match_mode>(__state._M_next);
__res = __back;
}
else
_M_dfs<__match_mode>(__state._M_next); _M_dfs<__match_mode>(__state._M_next);
__res = __back;
}
break; break;
case _S_opcode_line_begin_assertion: case _S_opcode_line_begin_assertion:
if (_M_at_begin()) if (_M_at_begin())
...@@ -339,6 +363,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -339,6 +363,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
} }
} }
break; break;
case _S_opcode_alternative:
_M_dfs<__match_mode>(__state._M_alt);
if (!__dfs_mode || !_M_has_sol)
_M_dfs<__match_mode>(__state._M_next);
break;
default: default:
_GLIBCXX_DEBUG_ASSERT(false); _GLIBCXX_DEBUG_ASSERT(false);
} }
......
...@@ -335,7 +335,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -335,7 +335,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
else if (__c == 'x' || __c == 'u') else if (__c == 'x' || __c == 'u')
{ {
_M_value.erase(); _M_value.erase();
for (int i = 0; i < (__c == 'x' ? 2 : 4); i++) for (int __i = 0; __i < (__c == 'x' ? 2 : 4); __i++)
{ {
if (_M_current == _M_end if (_M_current == _M_end
|| !_M_ctype.is(_CtypeT::xdigit, *_M_current)) || !_M_ctype.is(_CtypeT::xdigit, *_M_current))
......
...@@ -50,6 +50,7 @@ test01() ...@@ -50,6 +50,7 @@ test01()
const char s[] = ""; const char s[] = "";
VERIFY( regex_match_debug(s, m, re) ); VERIFY( regex_match_debug(s, m, re) );
} }
VERIFY(regex_match_debug("", regex("(?:)*")));
} }
int int
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment