Commit ddf41e9d by Tim Shen Committed by Tim Shen

regex_automaton.tcc (_StateSeq<>::_M_clone()): Do not use std::map.

2014-01-17  Tim Shen  <timshen91@gmail.com>

	* include/bits/regex_automaton.tcc (_StateSeq<>::_M_clone()): Do not
	use std::map.
	* include/bits/regex_automaton.h: Do not use std::set.
	* include/bits/regex_compiler.h (_BracketMatcher<>::_M_add_char(),
	_BracketMatcher<>::_M_add_collating_element(),
	_BracketMatcher<>::_M_add_equivalence_class(),
	_BracketMatcher<>::_M_make_range()): Likewise.
	* include/bits/regex_compiler.tcc (_BracketMatcher<>::_M_apply()):
	Likewise.
	* include/bits/regex_executor.h: Do not use std::queue.
	* include/bits/regex_executor.tcc (_Executor<>::_M_main(),
	_Executor<>::_M_dfs()): Likewise.
	* include/std/regex: Remove <map>, <set> and <queue>.

2014-01-17  Tim Shen  <timshen91@gmail.com>

	* include/bits/regex.h (__compile_nfa<>(), basic_regex<>::basic_regex(),
	basic_regex<>::assign()): Change __compile_nfa to accept
	const _CharT* only.
	* include/bits/regex_compiler.h: Change _Compiler's template
	argument from <_FwdIter, _TraitsT> to <_TraitsT>.
	* include/bits/regex_compiler.tcc: Likewise.

2014-01-17  Tim Shen  <timshen91@gmail.com>

	* include/bits/regex_compiler.h: Change _ScannerT into char-type
	templated.
	* include/bits/regex_scanner.h (_Scanner<>::_Scanner()): Separate
	_ScannerBase from _Scanner; Change _Scanner's template argument from
	_FwdIter to _CharT. Avoid use of std::map and std::set by using arrays
	instead.
	* include/bits/regex_scanner.tcc (_Scanner<>::_Scanner(),
	_Scanner<>::_M_scan_normal(), _Scanner<>::_M_eat_escape_ecma(),
	_Scanner<>::_M_eat_escape_posix(), _Scanner<>::_M_eat_escape_awk()):
	Likewise.
	* include/std/regex: Add <cstring> for using strchr.

2014-01-17  Tim Shen  <timshen91@gmail.com>

	* bits/regex_automaton.tcc: Indentation fix.
	* bits/regex_compiler.h (__compile_nfa<>(), _Compiler<>,
	_RegexTranslator<> _AnyMatcher<>, _CharMatcher<>,
	_BracketMatcher<>): Add bool option template parameters and
	specializations to make matching more efficient and space saving.
	* bits/regex_compiler.tcc: Likewise.

From-SVN: r206690
parent 9e6f9ad6
2014-01-17 Tim Shen <timshen91@gmail.com>
* include/bits/regex_automaton.tcc (_StateSeq<>::_M_clone()): Do not
use std::map.
* include/bits/regex_automaton.h: Do not use std::set.
* include/bits/regex_compiler.h (_BracketMatcher<>::_M_add_char(),
_BracketMatcher<>::_M_add_collating_element(),
_BracketMatcher<>::_M_add_equivalence_class(),
_BracketMatcher<>::_M_make_range()): Likewise.
* include/bits/regex_compiler.tcc (_BracketMatcher<>::_M_apply()):
Likewise.
* include/bits/regex_executor.h: Do not use std::queue.
* include/bits/regex_executor.tcc (_Executor<>::_M_main(),
_Executor<>::_M_dfs()): Likewise.
* include/std/regex: Remove <map>, <set> and <queue>.
2014-01-17 Tim Shen <timshen91@gmail.com>
* include/bits/regex.h (__compile_nfa<>(), basic_regex<>::basic_regex(),
basic_regex<>::assign()): Change __compile_nfa to accept
const _CharT* only.
* include/bits/regex_compiler.h: Change _Compiler's template
argument from <_FwdIter, _TraitsT> to <_TraitsT>.
* include/bits/regex_compiler.tcc: Likewise.
2014-01-17 Tim Shen <timshen91@gmail.com>
* include/bits/regex_compiler.h: Change _ScannerT into char-type
templated.
* include/bits/regex_scanner.h (_Scanner<>::_Scanner()): Separate
_ScannerBase from _Scanner; Change _Scanner's template argument from
_FwdIter to _CharT. Avoid use of std::map and std::set by using arrays
instead.
* include/bits/regex_scanner.tcc (_Scanner<>::_Scanner(),
_Scanner<>::_M_scan_normal(), _Scanner<>::_M_eat_escape_ecma(),
_Scanner<>::_M_eat_escape_posix(), _Scanner<>::_M_eat_escape_awk()):
Likewise.
* include/std/regex: Add <cstring> for using strchr.
2014-01-17 Tim Shen <timshen91@gmail.com>
* bits/regex_automaton.tcc: Indentation fix.
* bits/regex_compiler.h (__compile_nfa<>(), _Compiler<>,
_RegexTranslator<> _AnyMatcher<>, _CharMatcher<>,
_BracketMatcher<>): Add bool option template parameters and
specializations to make matching more efficient and space saving.
* bits/regex_compiler.tcc: Likewise.
2014-01-15 François Dumont <fdumont@gcc.gnu.org>
PR libstdc++/59712
......
......@@ -60,51 +60,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
template<typename, typename, typename, bool>
class _Executor;
template<typename _Tp>
struct __has_contiguous_iter : std::false_type { };
template<typename _Ch, typename _Tr, typename _Alloc>
struct __has_contiguous_iter<std::basic_string<_Ch, _Tr, _Alloc>>
: std::true_type // string<Ch> storage is contiguous
{ };
template<typename _Tp, typename _Alloc>
struct __has_contiguous_iter<std::vector<_Tp, _Alloc>>
: std::true_type // vector<Tp> storage is contiguous
{ };
template<typename _Alloc>
struct __has_contiguous_iter<std::vector<bool, _Alloc>>
: std::false_type // vector<bool> storage is not contiguous
{ };
template<typename _Tp>
struct __is_contiguous_normal_iter : std::false_type { };
template<typename _Tp, typename _Cont>
struct
__is_contiguous_normal_iter<__gnu_cxx::__normal_iterator<_Tp, _Cont>>
: __has_contiguous_iter<_Cont>::type
{ };
template<typename _Iter, typename _TraitsT>
using __enable_if_contiguous_normal_iter
= typename enable_if< __is_contiguous_normal_iter<_Iter>::value,
std::shared_ptr<_NFA<_TraitsT>> >::type;
template<typename _Iter, typename _TraitsT>
using __disable_if_contiguous_normal_iter
= typename enable_if< !__is_contiguous_normal_iter<_Iter>::value,
std::shared_ptr<_NFA<_TraitsT>> >::type;
template<typename _FwdIter, typename _TraitsT>
__disable_if_contiguous_normal_iter<_FwdIter, _TraitsT>
__compile_nfa(_FwdIter __first, _FwdIter __last, const _TraitsT& __traits,
regex_constants::syntax_option_type __flags);
template<typename _Iter, typename _TraitsT>
__enable_if_contiguous_normal_iter<_Iter, _TraitsT>
__compile_nfa(_Iter __first, _Iter __last, const _TraitsT& __traits,
template<typename _TraitsT>
inline std::shared_ptr<_NFA<_TraitsT>>
__compile_nfa(const typename _TraitsT::char_type* __first,
const typename _TraitsT::char_type* __last,
const _TraitsT& __traits,
regex_constants::syntax_option_type __flags);
_GLIBCXX_END_NAMESPACE_VERSION
......@@ -561,7 +521,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
flag_type __f = ECMAScript)
: _M_flags(__f),
_M_original_str(__first, __last),
_M_automaton(__detail::__compile_nfa(__first, __last, _M_traits,
_M_automaton(__detail::__compile_nfa(_M_original_str.c_str(),
_M_original_str.c_str()
+ _M_original_str.size(),
_M_traits,
_M_flags))
{ }
......@@ -698,7 +661,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{
_M_flags = __flags;
_M_original_str.assign(__s.begin(), __s.end());
_M_automaton = __detail::__compile_nfa(__s.begin(), __s.end(),
auto __p = _M_original_str.c_str();
_M_automaton = __detail::__compile_nfa(__p,
__p + _M_original_str.size(),
_M_traits, _M_flags);
return *this;
}
......
......@@ -41,7 +41,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
*/
typedef long _StateIdT;
typedef std::set<_StateIdT> _StateSet;
static const _StateIdT _S_invalid_state_id = -1;
template<typename _CharT>
......@@ -138,16 +137,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_start() const
{ return _M_start_state; }
const _StateSet&
_M_final_states() const
{ return _M_accepting_states; }
_SizeT
_M_sub_count() const
{ return _M_subexpr_count; }
std::vector<size_t> _M_paren_stack;
_StateSet _M_accepting_states;
_FlagT _M_flags;
_StateIdT _M_start_state;
_SizeT _M_subexpr_count;
......@@ -172,7 +166,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_insert_accept()
{
auto __ret = _M_insert_state(_StateT(_S_opcode_accept));
this->_M_accepting_states.insert(__ret);
return __ret;
}
......
......@@ -186,7 +186,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_StateSeq<_TraitsT>
_StateSeq<_TraitsT>::_M_clone()
{
std::map<_StateIdT, _StateIdT> __m;
std::vector<_StateIdT> __m(_M_nfa.size(), -1);
std::stack<_StateIdT> __stack;
__stack.push(_M_start);
while (!__stack.empty())
......@@ -194,30 +194,35 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
auto __u = __stack.top();
__stack.pop();
auto __dup = _M_nfa[__u];
// _M_insert_state() never return -1
auto __id = _M_nfa._M_insert_state(__dup);
__m[__u] = __id;
if (__u == _M_end)
continue;
if (__m.count(__dup._M_next) == 0)
if (__dup._M_next != _S_invalid_state_id && __m[__dup._M_next] == -1)
__stack.push(__dup._M_next);
if (__dup._M_opcode == _S_opcode_alternative
|| __dup._M_opcode == _S_opcode_subexpr_lookahead)
if (__m.count(__dup._M_alt) == 0)
if (__dup._M_alt != _S_invalid_state_id && __m[__dup._M_alt] == -1)
__stack.push(__dup._M_alt);
}
for (auto __it : __m)
long __size = static_cast<long>(__m.size());
for (long __k = 0; __k < __size; __k++)
{
auto& __ref = _M_nfa[__it.second];
if (__ref._M_next != -1)
long __v;
if ((__v = __m[__k]) == -1)
continue;
auto& __ref = _M_nfa[__v];
if (__ref._M_next != _S_invalid_state_id)
{
_GLIBCXX_DEBUG_ASSERT(__m.count(__ref._M_next));
_GLIBCXX_DEBUG_ASSERT(__m[__ref._M_next] != -1);
__ref._M_next = __m[__ref._M_next];
}
if (__ref._M_opcode == _S_opcode_alternative
|| __ref._M_opcode == _S_opcode_subexpr_lookahead)
if (__ref._M_alt != -1)
if (__ref._M_alt != _S_invalid_state_id)
{
_GLIBCXX_DEBUG_ASSERT(__m.count(__ref._M_alt));
_GLIBCXX_DEBUG_ASSERT(__m[__ref._M_alt] != -1);
__ref._M_alt = __m[__ref._M_alt];
}
}
......
......@@ -59,9 +59,9 @@ namespace __detail
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION
template<typename _FwdIter, typename _TraitsT>
_Compiler<_FwdIter, _TraitsT>::
_Compiler(_FwdIter __b, _FwdIter __e,
template<typename _TraitsT>
_Compiler<_TraitsT>::
_Compiler(_IterT __b, _IterT __e,
const _TraitsT& __traits, _FlagT __flags)
: _M_flags((__flags
& (regex_constants::ECMAScript
......@@ -89,9 +89,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_nfa._M_eliminate_dummy();
}
template<typename _FwdIter, typename _TraitsT>
template<typename _TraitsT>
void
_Compiler<_FwdIter, _TraitsT>::
_Compiler<_TraitsT>::
_M_disjunction()
{
this->_M_alternative();
......@@ -110,9 +110,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
}
}
template<typename _FwdIter, typename _TraitsT>
template<typename _TraitsT>
void
_Compiler<_FwdIter, _TraitsT>::
_Compiler<_TraitsT>::
_M_alternative()
{
if (this->_M_term())
......@@ -126,9 +126,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_stack.push(_StateSeqT(_M_nfa, _M_nfa._M_insert_dummy()));
}
template<typename _FwdIter, typename _TraitsT>
template<typename _TraitsT>
bool
_Compiler<_FwdIter, _TraitsT>::
_Compiler<_TraitsT>::
_M_term()
{
if (this->_M_assertion())
......@@ -141,9 +141,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return false;
}
template<typename _FwdIter, typename _TraitsT>
template<typename _TraitsT>
bool
_Compiler<_FwdIter, _TraitsT>::
_Compiler<_TraitsT>::
_M_assertion()
{
if (_M_match_token(_ScannerT::_S_token_line_begin))
......@@ -172,9 +172,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return true;
}
template<typename _FwdIter, typename _TraitsT>
template<typename _TraitsT>
void
_Compiler<_FwdIter, _TraitsT>::
_Compiler<_TraitsT>::
_M_quantifier()
{
bool __neg = (_M_flags & regex_constants::ECMAScript);
......@@ -278,52 +278,39 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
}
}
template<typename _FwdIter, typename _TraitsT>
#define __INSERT_REGEX_MATCHER(__func, args...)\
do\
if (!(_M_flags & regex_constants::icase))\
if (!(_M_flags & regex_constants::collate))\
__func<false, false>(args);\
else\
__func<false, true>(args);\
else\
if (!(_M_flags & regex_constants::collate))\
__func<true, false>(args);\
else\
__func<true, true>(args);\
while (false)
template<typename _TraitsT>
bool
_Compiler<_FwdIter, _TraitsT>::
_Compiler<_TraitsT>::
_M_atom()
{
if (_M_match_token(_ScannerT::_S_token_anychar))
{
if (_M_flags & regex_constants::ECMAScript)
_M_stack.push(_StateSeqT(_M_nfa,
_M_nfa._M_insert_matcher
(_AnyMatcher<_TraitsT,
true>(_M_traits))));
if (!(_M_flags & regex_constants::ECMAScript))
__INSERT_REGEX_MATCHER(_M_insert_any_matcher_posix);
else
_M_stack.push(_StateSeqT(_M_nfa,
_M_nfa._M_insert_matcher
(_AnyMatcher<_TraitsT,
false>(_M_traits))));
__INSERT_REGEX_MATCHER(_M_insert_any_matcher_ecma);
}
else if (_M_try_char())
{
if (_M_flags & regex_constants::icase)
_M_stack.push(_StateSeqT(_M_nfa,
_M_nfa._M_insert_matcher
(_CharMatcher<_TraitsT,
true>(_M_value[0],
_M_traits))));
else
_M_stack.push(_StateSeqT(_M_nfa,
_M_nfa._M_insert_matcher
(_CharMatcher<_TraitsT,
false>(_M_value[0],
_M_traits))));
}
__INSERT_REGEX_MATCHER(_M_insert_char_matcher);
else if (_M_match_token(_ScannerT::_S_token_backref))
_M_stack.push(_StateSeqT(_M_nfa, _M_nfa.
_M_insert_backref(_M_cur_int_value(10))));
else if (_M_match_token(_ScannerT::_S_token_quoted_class))
{
_GLIBCXX_DEBUG_ASSERT(_M_value.size() == 1);
_BMatcherT __matcher(_M_ctype.is(_CtypeT::upper, _M_value[0]),
_M_traits, _M_flags);
__matcher._M_add_character_class(_M_value);
__matcher._M_ready();
_M_stack.push(_StateSeqT(_M_nfa,
_M_nfa._M_insert_matcher(std::move(__matcher))));
}
__INSERT_REGEX_MATCHER(_M_insert_character_class_matcher);
else if (_M_match_token(_ScannerT::_S_token_subexpr_no_group_begin))
{
_StateSeqT __r(_M_nfa, _M_nfa._M_insert_dummy());
......@@ -348,28 +335,90 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return true;
}
template<typename _FwdIter, typename _TraitsT>
template<typename _TraitsT>
bool
_Compiler<_FwdIter, _TraitsT>::
_Compiler<_TraitsT>::
_M_bracket_expression()
{
bool __neg =
_M_match_token(_ScannerT::_S_token_bracket_neg_begin);
if (!(__neg || _M_match_token(_ScannerT::_S_token_bracket_begin)))
return false;
_BMatcherT __matcher(__neg, _M_traits, _M_flags);
__INSERT_REGEX_MATCHER(_M_insert_bracket_matcher, __neg);
return true;
}
#undef __INSERT_REGEX_MATCHER
template<typename _TraitsT>
template<bool __icase, bool __collate>
void
_Compiler<_TraitsT>::
_M_insert_any_matcher_ecma()
{
_M_stack.push(_StateSeqT(_M_nfa,
_M_nfa._M_insert_matcher
(_AnyMatcher<_TraitsT, true, __icase, __collate>
(_M_traits))));
}
template<typename _TraitsT>
template<bool __icase, bool __collate>
void
_Compiler<_TraitsT>::
_M_insert_any_matcher_posix()
{
_M_stack.push(_StateSeqT(_M_nfa,
_M_nfa._M_insert_matcher
(_AnyMatcher<_TraitsT, false, __icase, __collate>
(_M_traits))));
}
template<typename _TraitsT>
template<bool __icase, bool __collate>
void
_Compiler<_TraitsT>::
_M_insert_char_matcher()
{
_M_stack.push(_StateSeqT(_M_nfa,
_M_nfa._M_insert_matcher
(_CharMatcher<_TraitsT, __icase, __collate>
(_M_value[0], _M_traits))));
}
template<typename _TraitsT>
template<bool __icase, bool __collate>
void
_Compiler<_TraitsT>::
_M_insert_character_class_matcher()
{
_GLIBCXX_DEBUG_ASSERT(_M_value.size() == 1);
_BracketMatcher<_TraitsT, __icase, __collate> __matcher
(_M_ctype.is(_CtypeT::upper, _M_value[0]), _M_traits);
__matcher._M_add_character_class(_M_value);
__matcher._M_ready();
_M_stack.push(_StateSeqT(_M_nfa,
_M_nfa._M_insert_matcher(std::move(__matcher))));
}
template<typename _TraitsT>
template<bool __icase, bool __collate>
void
_Compiler<_TraitsT>::
_M_insert_bracket_matcher(bool __neg)
{
_BracketMatcher<_TraitsT, __icase, __collate> __matcher(__neg, _M_traits);
while (!_M_match_token(_ScannerT::_S_token_bracket_end))
_M_expression_term(__matcher);
__matcher._M_ready();
_M_stack.push(_StateSeqT(_M_nfa,
_M_nfa._M_insert_matcher(std::move(__matcher))));
return true;
}
template<typename _FwdIter, typename _TraitsT>
template<typename _TraitsT>
template<bool __icase, bool __collate>
void
_Compiler<_FwdIter, _TraitsT>::
_M_expression_term(_BMatcherT& __matcher)
_Compiler<_TraitsT>::
_M_expression_term(_BracketMatcher<_TraitsT, __icase, __collate>& __matcher)
{
if (_M_match_token(_ScannerT::_S_token_collsymbol))
__matcher._M_add_collating_element(_M_value);
......@@ -403,9 +452,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__throw_regex_error(regex_constants::error_brack);
}
template<typename _FwdIter, typename _TraitsT>
template<typename _TraitsT>
bool
_Compiler<_FwdIter, _TraitsT>::
_Compiler<_TraitsT>::
_M_try_char()
{
bool __is_char = false;
......@@ -424,9 +473,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return __is_char;
}
template<typename _FwdIter, typename _TraitsT>
template<typename _TraitsT>
bool
_Compiler<_FwdIter, _TraitsT>::
_Compiler<_TraitsT>::
_M_match_token(_TokenT token)
{
if (token == _M_scanner._M_get_token())
......@@ -438,9 +487,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return false;
}
template<typename _FwdIter, typename _TraitsT>
template<typename _TraitsT>
int
_Compiler<_FwdIter, _TraitsT>::
_Compiler<_TraitsT>::
_M_cur_int_value(int __radix)
{
long __v = 0;
......@@ -450,25 +499,31 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return __v;
}
template<typename _TraitsT>
template<typename _TraitsT, bool __icase, bool __collate>
bool
_BracketMatcher<_TraitsT>::_M_apply(_CharT __ch, false_type) const
_BracketMatcher<_TraitsT, __icase, __collate>::
_M_apply(_CharT __ch, false_type) const
{
bool __ret = false;
if (_M_traits.isctype(__ch, _M_class_set)
|| _M_char_set.count(_M_translate(__ch))
|| _M_equiv_set.count(_M_traits.transform_primary(&__ch, &__ch+1)))
if (std::find(_M_char_set.begin(), _M_char_set.end(),
_M_translator._M_translate(__ch))
!= _M_char_set.end())
__ret = true;
else
{
_StringT __s = _M_get_str(_M_flags & regex_constants::collate
? _M_translate(__ch) : __ch);
auto __s = _M_translator._M_transform(__ch);
for (auto& __it : _M_range_set)
if (__it.first <= __s && __s <= __it.second)
{
__ret = true;
break;
}
if (_M_traits.isctype(__ch, _M_class_set))
__ret = true;
else if (std::find(_M_equiv_set.begin(), _M_equiv_set.end(),
_M_traits.transform_primary(&__ch, &__ch+1))
!= _M_equiv_set.end())
__ret = true;
}
if (_M_is_non_matching)
return !__ret;
......
......@@ -65,7 +65,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_nfa(*__re._M_automaton),
_M_results(__results),
_M_match_queue(__dfs_mode ? nullptr
: new queue<pair<_StateIdT, _ResultsVec>>()),
: new vector<pair<_StateIdT, _ResultsVec>>()),
_M_visited(__dfs_mode ? nullptr : new vector<bool>(_M_nfa.size())),
_M_flags((__flags & regex_constants::match_prev_avail)
? (__flags
......@@ -142,7 +142,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_ResultsVec& _M_results;
// Used in BFS, saving states that need to be considered for the next
// character.
std::unique_ptr<queue<pair<_StateIdT, _ResultsVec>>> _M_match_queue;
std::unique_ptr<vector<pair<_StateIdT, _ResultsVec>>> _M_match_queue;
// Used in BFS, indicating that which state is already visited.
std::unique_ptr<vector<bool>> _M_visited;
_FlagT _M_flags;
......
......@@ -111,7 +111,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
}
else
{
_M_match_queue->push(make_pair(_M_start_state, _M_results));
_M_match_queue->push_back(make_pair(_M_start_state, _M_results));
bool __ret = false;
while (1)
{
......@@ -120,10 +120,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
break;
_M_visited->assign(_M_visited->size(), false);
auto _M_old_queue = std::move(*_M_match_queue);
while (!_M_old_queue.empty())
for (auto __task : _M_old_queue)
{
auto __task = _M_old_queue.front();
_M_old_queue.pop();
_M_cur_results = __task.second;
_M_dfs<__match_mode>(__task.first);
}
......@@ -279,7 +277,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
}
else
if (__state._M_matches(*_M_current))
_M_match_queue->push(make_pair(__state._M_next, _M_cur_results));
_M_match_queue->push_back(make_pair(__state._M_next,
_M_cur_results));
break;
// First fetch the matched result from _M_cur_results as __submatch;
// then compare it with
......
......@@ -39,25 +39,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
* @{
*/
/**
* @brief struct _Scanner. Scans an input range for regex tokens.
*
* The %_Scanner class interprets the regular expression pattern in
* the input range passed to its constructor as a sequence of parse
* tokens passed to the regular expression compiler. The sequence
* of tokens provided depends on the flag settings passed to the
* constructor: different regular expression grammars will interpret
* the same input pattern in syntactically different ways.
*/
template<typename _FwdIter>
class _Scanner
struct _ScannerBase
{
public:
typedef typename std::iterator_traits<_FwdIter>::value_type _CharT;
typedef std::basic_string<_CharT> _StringT;
typedef regex_constants::syntax_option_type _FlagT;
typedef const std::ctype<_CharT> _CtypeT;
/// Token types returned from the scanner.
enum _TokenT
{
......@@ -93,7 +77,137 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_S_token_unknown
};
_Scanner(_FwdIter __begin, _FwdIter __end,
protected:
typedef regex_constants::syntax_option_type _FlagT;
enum _StateT
{
_S_state_normal,
_S_state_in_brace,
_S_state_in_bracket,
};
protected:
_ScannerBase(_FlagT __flags)
: _M_state(_S_state_normal),
_M_flags(__flags),
_M_escape_tbl(_M_is_ecma()
? _M_ecma_escape_tbl
: _M_awk_escape_tbl),
_M_spec_char(_M_is_ecma()
? _M_ecma_spec_char
: _M_is_basic()
? _M_basic_spec_char
: _M_extended_spec_char),
_M_at_bracket_start(false)
{ }
protected:
const char*
_M_find_escape(char __c)
{
auto __it = _M_escape_tbl;
for (; __it->first != '\0'; ++__it)
if (__it->first == __c)
return &__it->second;
return nullptr;
}
bool
_M_is_ecma() const
{ return _M_flags & regex_constants::ECMAScript; }
bool
_M_is_basic() const
{ return _M_flags & (regex_constants::basic | regex_constants::grep); }
bool
_M_is_extended() const
{
return _M_flags & (regex_constants::extended
| regex_constants::egrep
| regex_constants::awk);
}
bool
_M_is_grep() const
{ return _M_flags & (regex_constants::grep | regex_constants::egrep); }
bool
_M_is_awk() const
{ return _M_flags & regex_constants::awk; }
protected:
const std::pair<char, _TokenT> _M_token_tbl[9] =
{
{'^', _S_token_line_begin},
{'$', _S_token_line_end},
{'.', _S_token_anychar},
{'*', _S_token_closure0},
{'+', _S_token_closure1},
{'?', _S_token_opt},
{'|', _S_token_or},
{'\n', _S_token_or}, // grep and egrep
{'\0', _S_token_or},
};
const std::pair<char, char> _M_ecma_escape_tbl[8] =
{
{'0', '\0'},
{'b', '\b'},
{'f', '\f'},
{'n', '\n'},
{'r', '\r'},
{'t', '\t'},
{'v', '\v'},
{'\0', '\0'},
};
const std::pair<char, char> _M_awk_escape_tbl[11] =
{
{'"', '"'},
{'/', '/'},
{'\\', '\\'},
{'a', '\a'},
{'b', '\b'},
{'f', '\f'},
{'n', '\n'},
{'r', '\r'},
{'t', '\t'},
{'v', '\v'},
{'\0', '\0'},
};
const char* _M_ecma_spec_char = "^$\\.*+?()[]{}|";
const char* _M_basic_spec_char = ".[\\*^$";
const char* _M_extended_spec_char = ".[\\()*+?{|^$";
_StateT _M_state;
_FlagT _M_flags;
_TokenT _M_token;
const std::pair<char, char>* _M_escape_tbl;
const char* _M_spec_char;
bool _M_at_bracket_start;
};
/**
* @brief struct _Scanner. Scans an input range for regex tokens.
*
* The %_Scanner class interprets the regular expression pattern in
* the input range passed to its constructor as a sequence of parse
* tokens passed to the regular expression compiler. The sequence
* of tokens provided depends on the flag settings passed to the
* constructor: different regular expression grammars will interpret
* the same input pattern in syntactically different ways.
*/
template<typename _CharT>
class _Scanner
: public _ScannerBase
{
public:
typedef const _CharT* _IterT;
typedef std::basic_string<_CharT> _StringT;
typedef regex_constants::syntax_option_type _FlagT;
typedef const std::ctype<_CharT> _CtypeT;
_Scanner(_IterT __begin, _IterT __end,
_FlagT __flags, std::locale __loc);
void
......@@ -113,13 +227,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
#endif
private:
enum _StateT
{
_S_state_normal,
_S_state_in_brace,
_S_state_in_bracket,
};
void
_M_scan_normal();
......@@ -141,49 +248,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
void
_M_eat_class(char);
constexpr bool
_M_is_ecma()
{ return _M_flags & regex_constants::ECMAScript; }
constexpr bool
_M_is_basic()
{ return _M_flags & (regex_constants::basic | regex_constants::grep); }
constexpr bool
_M_is_extended()
{
return _M_flags & (regex_constants::extended
| regex_constants::egrep
| regex_constants::awk);
}
constexpr bool
_M_is_grep()
{ return _M_flags & (regex_constants::grep | regex_constants::egrep); }
constexpr bool
_M_is_awk()
{ return _M_flags & regex_constants::awk; }
_StateT _M_state;
_FwdIter _M_current;
_FwdIter _M_end;
_FlagT _M_flags;
_IterT _M_current;
_IterT _M_end;
_CtypeT& _M_ctype;
_TokenT _M_token;
_StringT _M_value;
bool _M_at_bracket_start;
public:
// FIXME: make them static when this file is stable.
const std::map<char, _TokenT> _M_token_map;
const std::map<char, char> _M_ecma_escape_map;
const std::map<char, char> _M_awk_escape_map;
const std::set<char> _M_ecma_spec_char;
const std::set<char> _M_basic_spec_char;
const std::set<char> _M_extended_spec_char;
const std::map<char, char>& _M_escape_map;
const std::set<char>& _M_spec_char;
void (_Scanner::* _M_eat_escape)();
};
......
......@@ -52,106 +52,22 @@ namespace __detail
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION
template<typename _FwdIter>
_Scanner<_FwdIter>::
_Scanner(_FwdIter __begin, _FwdIter __end,
template<typename _CharT>
_Scanner<_CharT>::
_Scanner(typename _Scanner::_IterT __begin,
typename _Scanner::_IterT __end,
_FlagT __flags, std::locale __loc)
: _M_state(_S_state_normal), _M_current(__begin), _M_end(__end),
_M_flags(__flags),
: _ScannerBase(__flags),
_M_current(__begin), _M_end(__end),
_M_ctype(std::use_facet<_CtypeT>(__loc)),
_M_at_bracket_start(false),
_M_token_map
{
{'^', _S_token_line_begin},
{'$', _S_token_line_end},
{'.', _S_token_anychar},
{'*', _S_token_closure0},
{'+', _S_token_closure1},
{'?', _S_token_opt},
{'|', _S_token_or},
// grep and egrep
{'\n', _S_token_or},
},
_M_ecma_escape_map
{
{'0', '\0'},
{'b', '\b'},
{'f', '\f'},
{'n', '\n'},
{'r', '\r'},
{'t', '\t'},
{'v', '\v'},
},
_M_awk_escape_map
{
{'"', '"'},
{'/', '/'},
{'\\', '\\'},
{'a', '\a'},
{'b', '\b'},
{'f', '\f'},
{'n', '\n'},
{'r', '\r'},
{'t', '\t'},
{'v', '\v'},
},
_M_ecma_spec_char
{
'^',
'$',
'\\',
'.',
'*',
'+',
'?',
'(',
')',
'[',
']',
'{',
'}',
'|',
},
_M_basic_spec_char
{
'.',
'[',
'\\',
'*',
'^',
'$',
},
_M_extended_spec_char
{
'.',
'[',
'\\',
'(',
')',
'*',
'+',
'?',
'{',
'|',
'^',
'$',
},
_M_escape_map(_M_is_ecma()
? _M_ecma_escape_map
: _M_awk_escape_map),
_M_spec_char(_M_is_ecma()
? _M_ecma_spec_char
: _M_is_basic()
? _M_basic_spec_char
: _M_extended_spec_char),
_M_eat_escape(_M_is_ecma()
? &_Scanner::_M_eat_escape_ecma
: &_Scanner::_M_eat_escape_posix)
{ _M_advance(); }
template<typename _FwdIter>
template<typename _CharT>
void
_Scanner<_FwdIter>::
_Scanner<_CharT>::
_M_advance()
{
if (_M_current == _M_end)
......@@ -173,12 +89,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
// Differences between styles:
// 1) "\(", "\)", "\{" in basic. It's not escaping.
// 2) "(?:", "(?=", "(?!" in ECMAScript.
template<typename _FwdIter>
template<typename _CharT>
void
_Scanner<_FwdIter>::
_Scanner<_CharT>::
_M_scan_normal()
{
auto __c = *_M_current++;
const char* __pos;
if (__c == '\\')
{
......@@ -244,11 +161,23 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_state = _S_state_in_brace;
_M_token = _S_token_interval_begin;
}
else if ((_M_spec_char.count(_M_ctype.narrow(__c, '\0'))
else if (((__pos = std::strchr(_M_spec_char, _M_ctype.narrow(__c, '\0')))
!= nullptr
&& *__pos != '\0'
&& __c != ']'
&& __c != '}')
|| (_M_is_grep() && __c == '\n'))
_M_token = _M_token_map.at(__c);
{
auto __it = _M_token_tbl;
auto __narrowc = _M_ctype.narrow(__c, '\0');
for (; __it->first != '\0'; ++__it)
if (__it->first == __narrowc)
{
_M_token = __it->second;
return;
}
_GLIBCXX_DEBUG_ASSERT(false);
}
else
{
_M_token = _S_token_ord_char;
......@@ -259,9 +188,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
// Differences between styles:
// 1) different semantics of "[]" and "[^]".
// 2) Escaping in bracket expr.
template<typename _FwdIter>
template<typename _CharT>
void
_Scanner<_FwdIter>::
_Scanner<_CharT>::
_M_scan_in_bracket()
{
if (_M_current == _M_end)
......@@ -316,9 +245,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
// Differences between styles:
// 1) "\}" in basic style.
template<typename _FwdIter>
template<typename _CharT>
void
_Scanner<_FwdIter>::
_Scanner<_CharT>::
_M_scan_in_brace()
{
if (_M_current == _M_end)
......@@ -357,21 +286,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__throw_regex_error(regex_constants::error_badbrace);
}
template<typename _FwdIter>
template<typename _CharT>
void
_Scanner<_FwdIter>::
_Scanner<_CharT>::
_M_eat_escape_ecma()
{
if (_M_current == _M_end)
__throw_regex_error(regex_constants::error_escape);
auto __c = *_M_current++;
auto __pos = _M_find_escape(_M_ctype.narrow(__c, '\0'));
if (_M_escape_map.count(_M_ctype.narrow(__c, '\0'))
&& (__c != 'b' || _M_state == _S_state_in_bracket))
if (__pos != nullptr && (__c != 'b' || _M_state == _S_state_in_bracket))
{
_M_token = _S_token_ord_char;
_M_value.assign(1, _M_escape_map.at(__c));
_M_value.assign(1, *__pos);
}
else if (__c == 'b')
{
......@@ -431,17 +360,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
// Differences between styles:
// 1) Extended doesn't support backref, but basic does.
template<typename _FwdIter>
template<typename _CharT>
void
_Scanner<_FwdIter>::
_Scanner<_CharT>::
_M_eat_escape_posix()
{
if (_M_current == _M_end)
__throw_regex_error(regex_constants::error_escape);
auto __c = *_M_current;
auto __pos = std::strchr(_M_spec_char, _M_ctype.narrow(__c, '\0'));
if (_M_spec_char.count(_M_ctype.narrow(__c, '\0')))
if (__pos != nullptr && *__pos != '\0')
{
_M_token = _S_token_ord_char;
_M_value.assign(1, __c);
......@@ -469,17 +399,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
++_M_current;
}
template<typename _FwdIter>
template<typename _CharT>
void
_Scanner<_FwdIter>::
_Scanner<_CharT>::
_M_eat_escape_awk()
{
auto __c = *_M_current++;
auto __pos = _M_find_escape(_M_ctype.narrow(__c, '\0'));
if (_M_escape_map.count(_M_ctype.narrow(__c, '\0')))
if (__pos != nullptr)
{
_M_token = _S_token_ord_char;
_M_value.assign(1, _M_escape_map.at(__c));
_M_value.assign(1, *__pos);
}
// \ddd for oct representation
else if (_M_ctype.is(_CtypeT::digit, __c)
......@@ -505,9 +436,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
// Eats a character class or throwns an exception.
// __ch cound be ':', '.' or '=', _M_current is the char after ']' when
// returning.
template<typename _FwdIter>
template<typename _CharT>
void
_Scanner<_FwdIter>::
_Scanner<_CharT>::
_M_eat_class(char __ch)
{
for (_M_value.clear(); _M_current != _M_end && *_M_current != __ch;)
......@@ -525,9 +456,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
}
#ifdef _GLIBCXX_DEBUG
template<typename _FwdIter>
template<typename _CharT>
std::ostream&
_Scanner<_FwdIter>::
_Scanner<_CharT>::
_M_print(std::ostream& ostr)
{
switch (_M_token)
......
......@@ -44,15 +44,13 @@
#include <iterator>
#include <locale>
#include <memory>
#include <map>
#include <queue>
#include <set>
#include <sstream>
#include <stack>
#include <stdexcept>
#include <string>
#include <utility>
#include <vector>
#include <cstring>
#include <bits/regex_constants.h>
#include <bits/regex_error.h>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment