Commit ddf41e9d by Tim Shen Committed by Tim Shen

regex_automaton.tcc (_StateSeq<>::_M_clone()): Do not use std::map.

2014-01-17  Tim Shen  <timshen91@gmail.com>

	* include/bits/regex_automaton.tcc (_StateSeq<>::_M_clone()): Do not
	use std::map.
	* include/bits/regex_automaton.h: Do not use std::set.
	* include/bits/regex_compiler.h (_BracketMatcher<>::_M_add_char(),
	_BracketMatcher<>::_M_add_collating_element(),
	_BracketMatcher<>::_M_add_equivalence_class(),
	_BracketMatcher<>::_M_make_range()): Likewise.
	* include/bits/regex_compiler.tcc (_BracketMatcher<>::_M_apply()):
	Likewise.
	* include/bits/regex_executor.h: Do not use std::queue.
	* include/bits/regex_executor.tcc (_Executor<>::_M_main(),
	_Executor<>::_M_dfs()): Likewise.
	* include/std/regex: Remove <map>, <set> and <queue>.

2014-01-17  Tim Shen  <timshen91@gmail.com>

	* include/bits/regex.h (__compile_nfa<>(), basic_regex<>::basic_regex(),
	basic_regex<>::assign()): Change __compile_nfa to accept
	const _CharT* only.
	* include/bits/regex_compiler.h: Change _Compiler's template
	argument from <_FwdIter, _TraitsT> to <_TraitsT>.
	* include/bits/regex_compiler.tcc: Likewise.

2014-01-17  Tim Shen  <timshen91@gmail.com>

	* include/bits/regex_compiler.h: Change _ScannerT into char-type
	templated.
	* include/bits/regex_scanner.h (_Scanner<>::_Scanner()): Separate
	_ScannerBase from _Scanner; Change _Scanner's template argument from
	_FwdIter to _CharT. Avoid use of std::map and std::set by using arrays
	instead.
	* include/bits/regex_scanner.tcc (_Scanner<>::_Scanner(),
	_Scanner<>::_M_scan_normal(), _Scanner<>::_M_eat_escape_ecma(),
	_Scanner<>::_M_eat_escape_posix(), _Scanner<>::_M_eat_escape_awk()):
	Likewise.
	* include/std/regex: Add <cstring> for using strchr.

2014-01-17  Tim Shen  <timshen91@gmail.com>

	* bits/regex_automaton.tcc: Indentation fix.
	* bits/regex_compiler.h (__compile_nfa<>(), _Compiler<>,
	_RegexTranslator<> _AnyMatcher<>, _CharMatcher<>,
	_BracketMatcher<>): Add bool option template parameters and
	specializations to make matching more efficient and space saving.
	* bits/regex_compiler.tcc: Likewise.

From-SVN: r206690
parent 9e6f9ad6
2014-01-17 Tim Shen <timshen91@gmail.com>
* include/bits/regex_automaton.tcc (_StateSeq<>::_M_clone()): Do not
use std::map.
* include/bits/regex_automaton.h: Do not use std::set.
* include/bits/regex_compiler.h (_BracketMatcher<>::_M_add_char(),
_BracketMatcher<>::_M_add_collating_element(),
_BracketMatcher<>::_M_add_equivalence_class(),
_BracketMatcher<>::_M_make_range()): Likewise.
* include/bits/regex_compiler.tcc (_BracketMatcher<>::_M_apply()):
Likewise.
* include/bits/regex_executor.h: Do not use std::queue.
* include/bits/regex_executor.tcc (_Executor<>::_M_main(),
_Executor<>::_M_dfs()): Likewise.
* include/std/regex: Remove <map>, <set> and <queue>.
2014-01-17 Tim Shen <timshen91@gmail.com>
* include/bits/regex.h (__compile_nfa<>(), basic_regex<>::basic_regex(),
basic_regex<>::assign()): Change __compile_nfa to accept
const _CharT* only.
* include/bits/regex_compiler.h: Change _Compiler's template
argument from <_FwdIter, _TraitsT> to <_TraitsT>.
* include/bits/regex_compiler.tcc: Likewise.
2014-01-17 Tim Shen <timshen91@gmail.com>
* include/bits/regex_compiler.h: Change _ScannerT into char-type
templated.
* include/bits/regex_scanner.h (_Scanner<>::_Scanner()): Separate
_ScannerBase from _Scanner; Change _Scanner's template argument from
_FwdIter to _CharT. Avoid use of std::map and std::set by using arrays
instead.
* include/bits/regex_scanner.tcc (_Scanner<>::_Scanner(),
_Scanner<>::_M_scan_normal(), _Scanner<>::_M_eat_escape_ecma(),
_Scanner<>::_M_eat_escape_posix(), _Scanner<>::_M_eat_escape_awk()):
Likewise.
* include/std/regex: Add <cstring> for using strchr.
2014-01-17 Tim Shen <timshen91@gmail.com>
* bits/regex_automaton.tcc: Indentation fix.
* bits/regex_compiler.h (__compile_nfa<>(), _Compiler<>,
_RegexTranslator<> _AnyMatcher<>, _CharMatcher<>,
_BracketMatcher<>): Add bool option template parameters and
specializations to make matching more efficient and space saving.
* bits/regex_compiler.tcc: Likewise.
2014-01-15 François Dumont <fdumont@gcc.gnu.org> 2014-01-15 François Dumont <fdumont@gcc.gnu.org>
PR libstdc++/59712 PR libstdc++/59712
......
...@@ -60,51 +60,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -60,51 +60,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
template<typename, typename, typename, bool> template<typename, typename, typename, bool>
class _Executor; class _Executor;
template<typename _Tp> template<typename _TraitsT>
struct __has_contiguous_iter : std::false_type { }; inline std::shared_ptr<_NFA<_TraitsT>>
__compile_nfa(const typename _TraitsT::char_type* __first,
template<typename _Ch, typename _Tr, typename _Alloc> const typename _TraitsT::char_type* __last,
struct __has_contiguous_iter<std::basic_string<_Ch, _Tr, _Alloc>> const _TraitsT& __traits,
: std::true_type // string<Ch> storage is contiguous
{ };
template<typename _Tp, typename _Alloc>
struct __has_contiguous_iter<std::vector<_Tp, _Alloc>>
: std::true_type // vector<Tp> storage is contiguous
{ };
template<typename _Alloc>
struct __has_contiguous_iter<std::vector<bool, _Alloc>>
: std::false_type // vector<bool> storage is not contiguous
{ };
template<typename _Tp>
struct __is_contiguous_normal_iter : std::false_type { };
template<typename _Tp, typename _Cont>
struct
__is_contiguous_normal_iter<__gnu_cxx::__normal_iterator<_Tp, _Cont>>
: __has_contiguous_iter<_Cont>::type
{ };
template<typename _Iter, typename _TraitsT>
using __enable_if_contiguous_normal_iter
= typename enable_if< __is_contiguous_normal_iter<_Iter>::value,
std::shared_ptr<_NFA<_TraitsT>> >::type;
template<typename _Iter, typename _TraitsT>
using __disable_if_contiguous_normal_iter
= typename enable_if< !__is_contiguous_normal_iter<_Iter>::value,
std::shared_ptr<_NFA<_TraitsT>> >::type;
template<typename _FwdIter, typename _TraitsT>
__disable_if_contiguous_normal_iter<_FwdIter, _TraitsT>
__compile_nfa(_FwdIter __first, _FwdIter __last, const _TraitsT& __traits,
regex_constants::syntax_option_type __flags);
template<typename _Iter, typename _TraitsT>
__enable_if_contiguous_normal_iter<_Iter, _TraitsT>
__compile_nfa(_Iter __first, _Iter __last, const _TraitsT& __traits,
regex_constants::syntax_option_type __flags); regex_constants::syntax_option_type __flags);
_GLIBCXX_END_NAMESPACE_VERSION _GLIBCXX_END_NAMESPACE_VERSION
...@@ -561,7 +521,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -561,7 +521,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
flag_type __f = ECMAScript) flag_type __f = ECMAScript)
: _M_flags(__f), : _M_flags(__f),
_M_original_str(__first, __last), _M_original_str(__first, __last),
_M_automaton(__detail::__compile_nfa(__first, __last, _M_traits, _M_automaton(__detail::__compile_nfa(_M_original_str.c_str(),
_M_original_str.c_str()
+ _M_original_str.size(),
_M_traits,
_M_flags)) _M_flags))
{ } { }
...@@ -698,7 +661,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -698,7 +661,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{ {
_M_flags = __flags; _M_flags = __flags;
_M_original_str.assign(__s.begin(), __s.end()); _M_original_str.assign(__s.begin(), __s.end());
_M_automaton = __detail::__compile_nfa(__s.begin(), __s.end(), auto __p = _M_original_str.c_str();
_M_automaton = __detail::__compile_nfa(__p,
__p + _M_original_str.size(),
_M_traits, _M_flags); _M_traits, _M_flags);
return *this; return *this;
} }
......
...@@ -41,7 +41,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -41,7 +41,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
*/ */
typedef long _StateIdT; typedef long _StateIdT;
typedef std::set<_StateIdT> _StateSet;
static const _StateIdT _S_invalid_state_id = -1; static const _StateIdT _S_invalid_state_id = -1;
template<typename _CharT> template<typename _CharT>
...@@ -138,16 +137,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -138,16 +137,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_start() const _M_start() const
{ return _M_start_state; } { return _M_start_state; }
const _StateSet&
_M_final_states() const
{ return _M_accepting_states; }
_SizeT _SizeT
_M_sub_count() const _M_sub_count() const
{ return _M_subexpr_count; } { return _M_subexpr_count; }
std::vector<size_t> _M_paren_stack; std::vector<size_t> _M_paren_stack;
_StateSet _M_accepting_states;
_FlagT _M_flags; _FlagT _M_flags;
_StateIdT _M_start_state; _StateIdT _M_start_state;
_SizeT _M_subexpr_count; _SizeT _M_subexpr_count;
...@@ -172,7 +166,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -172,7 +166,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_insert_accept() _M_insert_accept()
{ {
auto __ret = _M_insert_state(_StateT(_S_opcode_accept)); auto __ret = _M_insert_state(_StateT(_S_opcode_accept));
this->_M_accepting_states.insert(__ret);
return __ret; return __ret;
} }
......
...@@ -186,7 +186,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -186,7 +186,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_StateSeq<_TraitsT> _StateSeq<_TraitsT>
_StateSeq<_TraitsT>::_M_clone() _StateSeq<_TraitsT>::_M_clone()
{ {
std::map<_StateIdT, _StateIdT> __m; std::vector<_StateIdT> __m(_M_nfa.size(), -1);
std::stack<_StateIdT> __stack; std::stack<_StateIdT> __stack;
__stack.push(_M_start); __stack.push(_M_start);
while (!__stack.empty()) while (!__stack.empty())
...@@ -194,30 +194,35 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -194,30 +194,35 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
auto __u = __stack.top(); auto __u = __stack.top();
__stack.pop(); __stack.pop();
auto __dup = _M_nfa[__u]; auto __dup = _M_nfa[__u];
// _M_insert_state() never return -1
auto __id = _M_nfa._M_insert_state(__dup); auto __id = _M_nfa._M_insert_state(__dup);
__m[__u] = __id; __m[__u] = __id;
if (__u == _M_end) if (__u == _M_end)
continue; continue;
if (__m.count(__dup._M_next) == 0) if (__dup._M_next != _S_invalid_state_id && __m[__dup._M_next] == -1)
__stack.push(__dup._M_next); __stack.push(__dup._M_next);
if (__dup._M_opcode == _S_opcode_alternative if (__dup._M_opcode == _S_opcode_alternative
|| __dup._M_opcode == _S_opcode_subexpr_lookahead) || __dup._M_opcode == _S_opcode_subexpr_lookahead)
if (__m.count(__dup._M_alt) == 0) if (__dup._M_alt != _S_invalid_state_id && __m[__dup._M_alt] == -1)
__stack.push(__dup._M_alt); __stack.push(__dup._M_alt);
} }
for (auto __it : __m) long __size = static_cast<long>(__m.size());
for (long __k = 0; __k < __size; __k++)
{ {
auto& __ref = _M_nfa[__it.second]; long __v;
if (__ref._M_next != -1) if ((__v = __m[__k]) == -1)
continue;
auto& __ref = _M_nfa[__v];
if (__ref._M_next != _S_invalid_state_id)
{ {
_GLIBCXX_DEBUG_ASSERT(__m.count(__ref._M_next)); _GLIBCXX_DEBUG_ASSERT(__m[__ref._M_next] != -1);
__ref._M_next = __m[__ref._M_next]; __ref._M_next = __m[__ref._M_next];
} }
if (__ref._M_opcode == _S_opcode_alternative if (__ref._M_opcode == _S_opcode_alternative
|| __ref._M_opcode == _S_opcode_subexpr_lookahead) || __ref._M_opcode == _S_opcode_subexpr_lookahead)
if (__ref._M_alt != -1) if (__ref._M_alt != _S_invalid_state_id)
{ {
_GLIBCXX_DEBUG_ASSERT(__m.count(__ref._M_alt)); _GLIBCXX_DEBUG_ASSERT(__m[__ref._M_alt] != -1);
__ref._M_alt = __m[__ref._M_alt]; __ref._M_alt = __m[__ref._M_alt];
} }
} }
......
...@@ -39,19 +39,20 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -39,19 +39,20 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
* @{ * @{
*/ */
template<typename _TraitsT> template<typename, bool, bool>
struct _BracketMatcher; struct _BracketMatcher;
/// Builds an NFA from an input iterator interval. /// Builds an NFA from an input iterator interval.
template<typename _FwdIter, typename _TraitsT> template<typename _TraitsT>
class _Compiler class _Compiler
{ {
public: public:
typedef typename _TraitsT::string_type _StringT; typedef typename _TraitsT::char_type _CharT;
typedef const _CharT* _IterT;
typedef _NFA<_TraitsT> _RegexT; typedef _NFA<_TraitsT> _RegexT;
typedef regex_constants::syntax_option_type _FlagT; typedef regex_constants::syntax_option_type _FlagT;
_Compiler(_FwdIter __b, _FwdIter __e, _Compiler(_IterT __b, _IterT __e,
const _TraitsT& __traits, _FlagT __flags); const _TraitsT& __traits, _FlagT __flags);
std::shared_ptr<_RegexT> std::shared_ptr<_RegexT>
...@@ -59,12 +60,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -59,12 +60,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{ return make_shared<_RegexT>(std::move(_M_nfa)); } { return make_shared<_RegexT>(std::move(_M_nfa)); }
private: private:
typedef _Scanner<_FwdIter> _ScannerT; typedef _Scanner<_CharT> _ScannerT;
typedef typename _TraitsT::string_type _StringT;
typedef typename _ScannerT::_TokenT _TokenT; typedef typename _ScannerT::_TokenT _TokenT;
typedef _StateSeq<_TraitsT> _StateSeqT; typedef _StateSeq<_TraitsT> _StateSeqT;
typedef std::stack<_StateSeqT, std::vector<_StateSeqT>> _StackT; typedef std::stack<_StateSeqT> _StackT;
typedef _BracketMatcher<_TraitsT> _BMatcherT; typedef std::ctype<_CharT> _CtypeT;
typedef std::ctype<typename _TraitsT::char_type> _CtypeT;
// accepts a specific token or returns false. // accepts a specific token or returns false.
bool bool
...@@ -91,20 +92,30 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -91,20 +92,30 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
bool bool
_M_bracket_expression(); _M_bracket_expression();
template<bool __icase, bool __collate>
void void
_M_expression_term(_BMatcherT& __matcher); _M_insert_any_matcher_ecma();
bool template<bool __icase, bool __collate>
_M_range_expression(_BMatcherT& __matcher); void
_M_insert_any_matcher_posix();
bool template<bool __icase, bool __collate>
_M_collating_symbol(_BMatcherT& __matcher); void
_M_insert_char_matcher();
bool template<bool __icase, bool __collate>
_M_equivalence_class(_BMatcherT& __matcher); void
_M_insert_character_class_matcher();
bool template<bool __icase, bool __collate>
_M_character_class(_BMatcherT& __matcher); void
_M_insert_bracket_matcher(bool __neg);
template<bool __icase, bool __collate>
void
_M_expression_term(_BracketMatcher<_TraitsT, __icase, __collate>&
__matcher);
int int
_M_cur_int_value(int __radix); _M_cur_int_value(int __radix);
...@@ -129,33 +140,119 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -129,33 +140,119 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_StackT _M_stack; _StackT _M_stack;
}; };
template<typename _FwdIter, typename _TraitsT> template<typename _TraitsT>
inline __disable_if_contiguous_normal_iter<_FwdIter, _TraitsT> inline std::shared_ptr<_NFA<_TraitsT>>
__compile_nfa(_FwdIter __first, _FwdIter __last, const _TraitsT& __traits, __compile_nfa(const typename _TraitsT::char_type* __first,
const typename _TraitsT::char_type* __last,
const _TraitsT& __traits,
regex_constants::syntax_option_type __flags) regex_constants::syntax_option_type __flags)
{ {
using _Cmplr = _Compiler<_FwdIter, _TraitsT>; using _Cmplr = _Compiler<_TraitsT>;
return _Cmplr(__first, __last, __traits, __flags)._M_get_nfa(); return _Cmplr(__first, __last, __traits, __flags)._M_get_nfa();
} }
template<typename _Iter, typename _TraitsT> // [28.13.14]
inline __enable_if_contiguous_normal_iter<_Iter, _TraitsT> template<typename _TraitsT, bool __icase, bool __collate>
__compile_nfa(_Iter __first, _Iter __last, const _TraitsT& __traits, class _RegexTranslator
regex_constants::syntax_option_type __flags) {
public:
typedef typename _TraitsT::char_type _CharT;
typedef typename _TraitsT::string_type _StringT;
typedef typename std::conditional<__collate,
_StringT,
_CharT>::type _StrTransT;
explicit
_RegexTranslator(const _TraitsT& __traits)
: _M_traits(__traits)
{ }
_CharT
_M_translate(_CharT __ch) const
{ {
size_t __len = __last - __first; if (__icase)
const auto* __cfirst = __len ? std::__addressof(*__first) : nullptr; return _M_traits.translate_nocase(__ch);
return __compile_nfa(__cfirst, __cfirst + __len, __traits, __flags); else if (__collate)
return _M_traits.translate(__ch);
else
return __ch;
} }
template<typename _TraitsT, bool __is_ecma> _StrTransT
struct _AnyMatcher _M_transform(_CharT __ch) const
{ {
return _M_transform_impl(__ch, typename integral_constant<bool,
__collate>::type());
}
private:
_StrTransT
_M_transform_impl(_CharT __ch, false_type) const
{ return __ch; }
_StrTransT
_M_transform_impl(_CharT __ch, true_type) const
{
_StrTransT __str = _StrTransT(1, _M_translate(__ch));
return _M_traits.transform(__str.begin(), __str.end());
}
const _TraitsT& _M_traits;
};
template<typename _TraitsT>
class _RegexTranslator<_TraitsT, false, false>
{
public:
typedef typename _TraitsT::char_type _CharT; typedef typename _TraitsT::char_type _CharT;
typedef _CharT _StrTransT;
explicit
_RegexTranslator(const _TraitsT& __traits)
{ }
_CharT
_M_translate(_CharT __ch) const
{ return __ch; }
_StrTransT
_M_transform(_CharT __ch) const
{ return __ch; }
};
template<typename _TraitsT, bool __is_ecma, bool __icase, bool __collate>
struct _AnyMatcher;
template<typename _TraitsT, bool __icase, bool __collate>
struct _AnyMatcher<_TraitsT, false, __icase, __collate>
{
typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT;
typedef typename _TransT::_CharT _CharT;
explicit explicit
_AnyMatcher(const _TraitsT& __traits) _AnyMatcher(const _TraitsT& __traits)
: _M_traits(__traits) : _M_translator(__traits)
{ }
bool
operator()(_CharT __ch) const
{
static auto __nul = _M_translator._M_translate('\0');
return _M_translator._M_translate(__ch) != __nul;
}
_TransT _M_translator;
};
template<typename _TraitsT, bool __icase, bool __collate>
struct _AnyMatcher<_TraitsT, true, __icase, __collate>
{
typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT;
typedef typename _TransT::_CharT _CharT;
explicit
_AnyMatcher(const _TraitsT& __traits)
: _M_translator(__traits)
{ } { }
bool bool
...@@ -165,92 +262,63 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -165,92 +262,63 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
bool bool
_M_apply(_CharT __ch, true_type) const _M_apply(_CharT __ch, true_type) const
{ {
auto __c = _M_traits.translate(__ch); auto __c = _M_translator._M_translate(__ch);
if (__is_ecma) auto __n = _M_translator._M_translate('\n');
{ auto __r = _M_translator._M_translate('\r');
static auto __n = _M_traits.translate('\n');
static auto __r = _M_traits.translate('\r');
return __c != __n && __c != __r; return __c != __n && __c != __r;
} }
else
{
static auto __nul = _M_traits.translate('\0');
return __c != __nul;
}
}
bool bool
_M_apply(_CharT __ch, false_type) const _M_apply(_CharT __ch, false_type) const
{ {
auto __c = _M_traits.translate(__ch); auto __c = _M_translator._M_translate(__ch);
if (__is_ecma) auto __n = _M_translator._M_translate('\n');
{ auto __r = _M_translator._M_translate('\r');
static auto __n = _M_traits.translate('\n'); auto __u2028 = _M_translator._M_translate(u'\u2028');
static auto __r = _M_traits.translate('\r'); auto __u2029 = _M_translator._M_translate(u'\u2029');
static auto __u2028 = _M_traits.translate(u'\u2028'); return __c != __n && __c != __r && __c != __u2028 && __c != __u2029;
static auto __u2029 = _M_traits.translate(u'\u2029');
return __c != __n && __c != __r && __c != __u2028
&& __c != __u2029;
}
else
{
static auto __nul = _M_traits.translate('\0');
return __c != __nul;
}
} }
const _TraitsT& _M_traits; _TransT _M_translator;
}; };
template<typename _TraitsT, bool __icase> template<typename _TraitsT, bool __icase, bool __collate>
struct _CharMatcher struct _CharMatcher
{ {
typedef typename _TraitsT::char_type _CharT; typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT;
typedef typename _TransT::_CharT _CharT;
_CharMatcher(_CharT __ch, const _TraitsT& __traits) _CharMatcher(_CharT __ch, const _TraitsT& __traits)
: _M_traits(__traits), _M_ch(_M_translate(__ch)) : _M_translator(__traits), _M_ch(_M_translator._M_translate(__ch))
{ } { }
bool bool
operator()(_CharT __ch) const operator()(_CharT __ch) const
{ return _M_ch == _M_translate(__ch); } { return _M_ch == _M_translator._M_translate(__ch); }
_CharT _TransT _M_translator;
_M_translate(_CharT __ch) const
{
if (__icase)
return _M_traits.translate_nocase(__ch);
else
return _M_traits.translate(__ch);
}
const _TraitsT& _M_traits;
_CharT _M_ch; _CharT _M_ch;
}; };
/// Matches a character range (bracket expression) /// Matches a character range (bracket expression)
// TODO: Convert used _M_flags fields to template parameters, including template<typename _TraitsT, bool __icase, bool __collate>
// collate and icase. Avoid using std::set, could use flat_set
// (sorted vector and binary search) instead.
template<typename _TraitsT>
struct _BracketMatcher struct _BracketMatcher
{ {
public: public:
typedef typename _TraitsT::char_type _CharT; typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT;
typedef typename _TraitsT::char_class_type _CharClassT; typedef typename _TransT::_CharT _CharT;
typedef typename _TransT::_StrTransT _StrTransT;
typedef typename _TraitsT::string_type _StringT; typedef typename _TraitsT::string_type _StringT;
typedef regex_constants::syntax_option_type _FlagT; typedef typename _TraitsT::char_class_type _CharClassT;
public: public:
_BracketMatcher(bool __is_non_matching, _BracketMatcher(bool __is_non_matching,
const _TraitsT& __traits, const _TraitsT& __traits)
_FlagT __flags) : _M_class_set(0), _M_translator(__traits), _M_traits(__traits),
: _M_is_non_matching(__is_non_matching)
#ifdef _GLIBCXX_DEBUG #ifdef _GLIBCXX_DEBUG
_M_is_ready(false), , _M_is_ready(false)
#endif #endif
_M_traits(__traits), _M_class_set(0), _M_flags(__flags),
_M_is_non_matching(__is_non_matching)
{ } { }
bool bool
...@@ -263,7 +331,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -263,7 +331,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
void void
_M_add_char(_CharT __c) _M_add_char(_CharT __c)
{ {
_M_char_set.insert(_M_translate(__c)); _M_char_set.push_back(_M_translator._M_translate(__c));
#ifdef _GLIBCXX_DEBUG #ifdef _GLIBCXX_DEBUG
_M_is_ready = false; _M_is_ready = false;
#endif #endif
...@@ -276,7 +344,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -276,7 +344,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__s.data() + __s.size()); __s.data() + __s.size());
if (__st.empty()) if (__st.empty())
__throw_regex_error(regex_constants::error_collate); __throw_regex_error(regex_constants::error_collate);
_M_char_set.insert(_M_translate(__st[0])); _M_char_set.push_back(_M_translator._M_translate(__st[0]));
#ifdef _GLIBCXX_DEBUG #ifdef _GLIBCXX_DEBUG
_M_is_ready = false; _M_is_ready = false;
#endif #endif
...@@ -291,7 +359,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -291,7 +359,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__throw_regex_error(regex_constants::error_collate); __throw_regex_error(regex_constants::error_collate);
__st = _M_traits.transform_primary(__st.data(), __st = _M_traits.transform_primary(__st.data(),
__st.data() + __st.size()); __st.data() + __st.size());
_M_equiv_set.insert(__st); _M_equiv_set.push_back(__st);
#ifdef _GLIBCXX_DEBUG #ifdef _GLIBCXX_DEBUG
_M_is_ready = false; _M_is_ready = false;
#endif #endif
...@@ -302,7 +370,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -302,7 +370,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{ {
auto __mask = _M_traits.lookup_classname(__s.data(), auto __mask = _M_traits.lookup_classname(__s.data(),
__s.data() + __s.size(), __s.data() + __s.size(),
_M_is_icase()); __icase);
if (__mask == 0) if (__mask == 0)
__throw_regex_error(regex_constants::error_ctype); __throw_regex_error(regex_constants::error_ctype);
_M_class_set |= __mask; _M_class_set |= __mask;
...@@ -314,12 +382,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -314,12 +382,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
void void
_M_make_range(_CharT __l, _CharT __r) _M_make_range(_CharT __l, _CharT __r)
{ {
if (_M_flags & regex_constants::collate) _M_range_set.push_back(make_pair(_M_translator._M_transform(__l),
_M_range_set.insert( _M_translator._M_transform(__r)));
make_pair(_M_get_str(_M_translate(__l)),
_M_get_str(_M_translate(__r))));
else
_M_range_set.insert(make_pair(_M_get_str(__l), _M_get_str(__r)));
#ifdef _GLIBCXX_DEBUG #ifdef _GLIBCXX_DEBUG
_M_is_ready = false; _M_is_ready = false;
#endif #endif
...@@ -350,26 +414,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -350,26 +414,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_apply(_CharT __ch, true_type) const _M_apply(_CharT __ch, true_type) const
{ return _M_cache[static_cast<_UnsignedCharT>(__ch)]; } { return _M_cache[static_cast<_UnsignedCharT>(__ch)]; }
_CharT
_M_translate(_CharT __c) const
{
if (_M_is_icase())
return _M_traits.translate_nocase(__c);
else
return _M_traits.translate(__c);
}
bool
_M_is_icase() const
{ return _M_flags & regex_constants::icase; }
_StringT
_M_get_str(_CharT __c) const
{
_StringT __s(1, __c);
return _M_traits.transform(__s.begin(), __s.end());
}
void void
_M_make_cache(true_type) _M_make_cache(true_type)
{ {
...@@ -384,12 +428,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -384,12 +428,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
private: private:
_CacheT _M_cache; _CacheT _M_cache;
std::set<_CharT> _M_char_set; std::vector<_CharT> _M_char_set;
std::set<_StringT> _M_equiv_set; std::vector<_StringT> _M_equiv_set;
std::set<pair<_StringT, _StringT>> _M_range_set; std::vector<pair<_StrTransT, _StrTransT>> _M_range_set;
const _TraitsT& _M_traits;
_CharClassT _M_class_set; _CharClassT _M_class_set;
_FlagT _M_flags; _TransT _M_translator;
const _TraitsT& _M_traits;
bool _M_is_non_matching; bool _M_is_non_matching;
#ifdef _GLIBCXX_DEBUG #ifdef _GLIBCXX_DEBUG
bool _M_is_ready; bool _M_is_ready;
......
...@@ -59,9 +59,9 @@ namespace __detail ...@@ -59,9 +59,9 @@ namespace __detail
{ {
_GLIBCXX_BEGIN_NAMESPACE_VERSION _GLIBCXX_BEGIN_NAMESPACE_VERSION
template<typename _FwdIter, typename _TraitsT> template<typename _TraitsT>
_Compiler<_FwdIter, _TraitsT>:: _Compiler<_TraitsT>::
_Compiler(_FwdIter __b, _FwdIter __e, _Compiler(_IterT __b, _IterT __e,
const _TraitsT& __traits, _FlagT __flags) const _TraitsT& __traits, _FlagT __flags)
: _M_flags((__flags : _M_flags((__flags
& (regex_constants::ECMAScript & (regex_constants::ECMAScript
...@@ -89,9 +89,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -89,9 +89,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_nfa._M_eliminate_dummy(); _M_nfa._M_eliminate_dummy();
} }
template<typename _FwdIter, typename _TraitsT> template<typename _TraitsT>
void void
_Compiler<_FwdIter, _TraitsT>:: _Compiler<_TraitsT>::
_M_disjunction() _M_disjunction()
{ {
this->_M_alternative(); this->_M_alternative();
...@@ -110,9 +110,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -110,9 +110,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
} }
} }
template<typename _FwdIter, typename _TraitsT> template<typename _TraitsT>
void void
_Compiler<_FwdIter, _TraitsT>:: _Compiler<_TraitsT>::
_M_alternative() _M_alternative()
{ {
if (this->_M_term()) if (this->_M_term())
...@@ -126,9 +126,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -126,9 +126,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_stack.push(_StateSeqT(_M_nfa, _M_nfa._M_insert_dummy())); _M_stack.push(_StateSeqT(_M_nfa, _M_nfa._M_insert_dummy()));
} }
template<typename _FwdIter, typename _TraitsT> template<typename _TraitsT>
bool bool
_Compiler<_FwdIter, _TraitsT>:: _Compiler<_TraitsT>::
_M_term() _M_term()
{ {
if (this->_M_assertion()) if (this->_M_assertion())
...@@ -141,9 +141,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -141,9 +141,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return false; return false;
} }
template<typename _FwdIter, typename _TraitsT> template<typename _TraitsT>
bool bool
_Compiler<_FwdIter, _TraitsT>:: _Compiler<_TraitsT>::
_M_assertion() _M_assertion()
{ {
if (_M_match_token(_ScannerT::_S_token_line_begin)) if (_M_match_token(_ScannerT::_S_token_line_begin))
...@@ -172,9 +172,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -172,9 +172,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return true; return true;
} }
template<typename _FwdIter, typename _TraitsT> template<typename _TraitsT>
void void
_Compiler<_FwdIter, _TraitsT>:: _Compiler<_TraitsT>::
_M_quantifier() _M_quantifier()
{ {
bool __neg = (_M_flags & regex_constants::ECMAScript); bool __neg = (_M_flags & regex_constants::ECMAScript);
...@@ -278,52 +278,39 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -278,52 +278,39 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
} }
} }
template<typename _FwdIter, typename _TraitsT> #define __INSERT_REGEX_MATCHER(__func, args...)\
do\
if (!(_M_flags & regex_constants::icase))\
if (!(_M_flags & regex_constants::collate))\
__func<false, false>(args);\
else\
__func<false, true>(args);\
else\
if (!(_M_flags & regex_constants::collate))\
__func<true, false>(args);\
else\
__func<true, true>(args);\
while (false)
template<typename _TraitsT>
bool bool
_Compiler<_FwdIter, _TraitsT>:: _Compiler<_TraitsT>::
_M_atom() _M_atom()
{ {
if (_M_match_token(_ScannerT::_S_token_anychar)) if (_M_match_token(_ScannerT::_S_token_anychar))
{ {
if (_M_flags & regex_constants::ECMAScript) if (!(_M_flags & regex_constants::ECMAScript))
_M_stack.push(_StateSeqT(_M_nfa, __INSERT_REGEX_MATCHER(_M_insert_any_matcher_posix);
_M_nfa._M_insert_matcher
(_AnyMatcher<_TraitsT,
true>(_M_traits))));
else else
_M_stack.push(_StateSeqT(_M_nfa, __INSERT_REGEX_MATCHER(_M_insert_any_matcher_ecma);
_M_nfa._M_insert_matcher
(_AnyMatcher<_TraitsT,
false>(_M_traits))));
} }
else if (_M_try_char()) else if (_M_try_char())
{ __INSERT_REGEX_MATCHER(_M_insert_char_matcher);
if (_M_flags & regex_constants::icase)
_M_stack.push(_StateSeqT(_M_nfa,
_M_nfa._M_insert_matcher
(_CharMatcher<_TraitsT,
true>(_M_value[0],
_M_traits))));
else
_M_stack.push(_StateSeqT(_M_nfa,
_M_nfa._M_insert_matcher
(_CharMatcher<_TraitsT,
false>(_M_value[0],
_M_traits))));
}
else if (_M_match_token(_ScannerT::_S_token_backref)) else if (_M_match_token(_ScannerT::_S_token_backref))
_M_stack.push(_StateSeqT(_M_nfa, _M_nfa. _M_stack.push(_StateSeqT(_M_nfa, _M_nfa.
_M_insert_backref(_M_cur_int_value(10)))); _M_insert_backref(_M_cur_int_value(10))));
else if (_M_match_token(_ScannerT::_S_token_quoted_class)) else if (_M_match_token(_ScannerT::_S_token_quoted_class))
{ __INSERT_REGEX_MATCHER(_M_insert_character_class_matcher);
_GLIBCXX_DEBUG_ASSERT(_M_value.size() == 1);
_BMatcherT __matcher(_M_ctype.is(_CtypeT::upper, _M_value[0]),
_M_traits, _M_flags);
__matcher._M_add_character_class(_M_value);
__matcher._M_ready();
_M_stack.push(_StateSeqT(_M_nfa,
_M_nfa._M_insert_matcher(std::move(__matcher))));
}
else if (_M_match_token(_ScannerT::_S_token_subexpr_no_group_begin)) else if (_M_match_token(_ScannerT::_S_token_subexpr_no_group_begin))
{ {
_StateSeqT __r(_M_nfa, _M_nfa._M_insert_dummy()); _StateSeqT __r(_M_nfa, _M_nfa._M_insert_dummy());
...@@ -348,28 +335,90 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -348,28 +335,90 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return true; return true;
} }
template<typename _FwdIter, typename _TraitsT> template<typename _TraitsT>
bool bool
_Compiler<_FwdIter, _TraitsT>:: _Compiler<_TraitsT>::
_M_bracket_expression() _M_bracket_expression()
{ {
bool __neg = bool __neg =
_M_match_token(_ScannerT::_S_token_bracket_neg_begin); _M_match_token(_ScannerT::_S_token_bracket_neg_begin);
if (!(__neg || _M_match_token(_ScannerT::_S_token_bracket_begin))) if (!(__neg || _M_match_token(_ScannerT::_S_token_bracket_begin)))
return false; return false;
_BMatcherT __matcher(__neg, _M_traits, _M_flags); __INSERT_REGEX_MATCHER(_M_insert_bracket_matcher, __neg);
return true;
}
#undef __INSERT_REGEX_MATCHER
template<typename _TraitsT>
template<bool __icase, bool __collate>
void
_Compiler<_TraitsT>::
_M_insert_any_matcher_ecma()
{
_M_stack.push(_StateSeqT(_M_nfa,
_M_nfa._M_insert_matcher
(_AnyMatcher<_TraitsT, true, __icase, __collate>
(_M_traits))));
}
template<typename _TraitsT>
template<bool __icase, bool __collate>
void
_Compiler<_TraitsT>::
_M_insert_any_matcher_posix()
{
_M_stack.push(_StateSeqT(_M_nfa,
_M_nfa._M_insert_matcher
(_AnyMatcher<_TraitsT, false, __icase, __collate>
(_M_traits))));
}
template<typename _TraitsT>
template<bool __icase, bool __collate>
void
_Compiler<_TraitsT>::
_M_insert_char_matcher()
{
_M_stack.push(_StateSeqT(_M_nfa,
_M_nfa._M_insert_matcher
(_CharMatcher<_TraitsT, __icase, __collate>
(_M_value[0], _M_traits))));
}
template<typename _TraitsT>
template<bool __icase, bool __collate>
void
_Compiler<_TraitsT>::
_M_insert_character_class_matcher()
{
_GLIBCXX_DEBUG_ASSERT(_M_value.size() == 1);
_BracketMatcher<_TraitsT, __icase, __collate> __matcher
(_M_ctype.is(_CtypeT::upper, _M_value[0]), _M_traits);
__matcher._M_add_character_class(_M_value);
__matcher._M_ready();
_M_stack.push(_StateSeqT(_M_nfa,
_M_nfa._M_insert_matcher(std::move(__matcher))));
}
template<typename _TraitsT>
template<bool __icase, bool __collate>
void
_Compiler<_TraitsT>::
_M_insert_bracket_matcher(bool __neg)
{
_BracketMatcher<_TraitsT, __icase, __collate> __matcher(__neg, _M_traits);
while (!_M_match_token(_ScannerT::_S_token_bracket_end)) while (!_M_match_token(_ScannerT::_S_token_bracket_end))
_M_expression_term(__matcher); _M_expression_term(__matcher);
__matcher._M_ready(); __matcher._M_ready();
_M_stack.push(_StateSeqT(_M_nfa, _M_stack.push(_StateSeqT(_M_nfa,
_M_nfa._M_insert_matcher(std::move(__matcher)))); _M_nfa._M_insert_matcher(std::move(__matcher))));
return true;
} }
template<typename _FwdIter, typename _TraitsT> template<typename _TraitsT>
template<bool __icase, bool __collate>
void void
_Compiler<_FwdIter, _TraitsT>:: _Compiler<_TraitsT>::
_M_expression_term(_BMatcherT& __matcher) _M_expression_term(_BracketMatcher<_TraitsT, __icase, __collate>& __matcher)
{ {
if (_M_match_token(_ScannerT::_S_token_collsymbol)) if (_M_match_token(_ScannerT::_S_token_collsymbol))
__matcher._M_add_collating_element(_M_value); __matcher._M_add_collating_element(_M_value);
...@@ -403,9 +452,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -403,9 +452,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__throw_regex_error(regex_constants::error_brack); __throw_regex_error(regex_constants::error_brack);
} }
template<typename _FwdIter, typename _TraitsT> template<typename _TraitsT>
bool bool
_Compiler<_FwdIter, _TraitsT>:: _Compiler<_TraitsT>::
_M_try_char() _M_try_char()
{ {
bool __is_char = false; bool __is_char = false;
...@@ -424,9 +473,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -424,9 +473,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return __is_char; return __is_char;
} }
template<typename _FwdIter, typename _TraitsT> template<typename _TraitsT>
bool bool
_Compiler<_FwdIter, _TraitsT>:: _Compiler<_TraitsT>::
_M_match_token(_TokenT token) _M_match_token(_TokenT token)
{ {
if (token == _M_scanner._M_get_token()) if (token == _M_scanner._M_get_token())
...@@ -438,9 +487,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -438,9 +487,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return false; return false;
} }
template<typename _FwdIter, typename _TraitsT> template<typename _TraitsT>
int int
_Compiler<_FwdIter, _TraitsT>:: _Compiler<_TraitsT>::
_M_cur_int_value(int __radix) _M_cur_int_value(int __radix)
{ {
long __v = 0; long __v = 0;
...@@ -450,25 +499,31 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -450,25 +499,31 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return __v; return __v;
} }
template<typename _TraitsT> template<typename _TraitsT, bool __icase, bool __collate>
bool bool
_BracketMatcher<_TraitsT>::_M_apply(_CharT __ch, false_type) const _BracketMatcher<_TraitsT, __icase, __collate>::
_M_apply(_CharT __ch, false_type) const
{ {
bool __ret = false; bool __ret = false;
if (_M_traits.isctype(__ch, _M_class_set) if (std::find(_M_char_set.begin(), _M_char_set.end(),
|| _M_char_set.count(_M_translate(__ch)) _M_translator._M_translate(__ch))
|| _M_equiv_set.count(_M_traits.transform_primary(&__ch, &__ch+1))) != _M_char_set.end())
__ret = true; __ret = true;
else else
{ {
_StringT __s = _M_get_str(_M_flags & regex_constants::collate auto __s = _M_translator._M_transform(__ch);
? _M_translate(__ch) : __ch);
for (auto& __it : _M_range_set) for (auto& __it : _M_range_set)
if (__it.first <= __s && __s <= __it.second) if (__it.first <= __s && __s <= __it.second)
{ {
__ret = true; __ret = true;
break; break;
} }
if (_M_traits.isctype(__ch, _M_class_set))
__ret = true;
else if (std::find(_M_equiv_set.begin(), _M_equiv_set.end(),
_M_traits.transform_primary(&__ch, &__ch+1))
!= _M_equiv_set.end())
__ret = true;
} }
if (_M_is_non_matching) if (_M_is_non_matching)
return !__ret; return !__ret;
......
...@@ -65,7 +65,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -65,7 +65,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_nfa(*__re._M_automaton), _M_nfa(*__re._M_automaton),
_M_results(__results), _M_results(__results),
_M_match_queue(__dfs_mode ? nullptr _M_match_queue(__dfs_mode ? nullptr
: new queue<pair<_StateIdT, _ResultsVec>>()), : new vector<pair<_StateIdT, _ResultsVec>>()),
_M_visited(__dfs_mode ? nullptr : new vector<bool>(_M_nfa.size())), _M_visited(__dfs_mode ? nullptr : new vector<bool>(_M_nfa.size())),
_M_flags((__flags & regex_constants::match_prev_avail) _M_flags((__flags & regex_constants::match_prev_avail)
? (__flags ? (__flags
...@@ -142,7 +142,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -142,7 +142,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_ResultsVec& _M_results; _ResultsVec& _M_results;
// Used in BFS, saving states that need to be considered for the next // Used in BFS, saving states that need to be considered for the next
// character. // character.
std::unique_ptr<queue<pair<_StateIdT, _ResultsVec>>> _M_match_queue; std::unique_ptr<vector<pair<_StateIdT, _ResultsVec>>> _M_match_queue;
// Used in BFS, indicating that which state is already visited. // Used in BFS, indicating that which state is already visited.
std::unique_ptr<vector<bool>> _M_visited; std::unique_ptr<vector<bool>> _M_visited;
_FlagT _M_flags; _FlagT _M_flags;
......
...@@ -111,7 +111,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -111,7 +111,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
} }
else else
{ {
_M_match_queue->push(make_pair(_M_start_state, _M_results)); _M_match_queue->push_back(make_pair(_M_start_state, _M_results));
bool __ret = false; bool __ret = false;
while (1) while (1)
{ {
...@@ -120,10 +120,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -120,10 +120,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
break; break;
_M_visited->assign(_M_visited->size(), false); _M_visited->assign(_M_visited->size(), false);
auto _M_old_queue = std::move(*_M_match_queue); auto _M_old_queue = std::move(*_M_match_queue);
while (!_M_old_queue.empty()) for (auto __task : _M_old_queue)
{ {
auto __task = _M_old_queue.front();
_M_old_queue.pop();
_M_cur_results = __task.second; _M_cur_results = __task.second;
_M_dfs<__match_mode>(__task.first); _M_dfs<__match_mode>(__task.first);
} }
...@@ -279,7 +277,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -279,7 +277,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
} }
else else
if (__state._M_matches(*_M_current)) if (__state._M_matches(*_M_current))
_M_match_queue->push(make_pair(__state._M_next, _M_cur_results)); _M_match_queue->push_back(make_pair(__state._M_next,
_M_cur_results));
break; break;
// First fetch the matched result from _M_cur_results as __submatch; // First fetch the matched result from _M_cur_results as __submatch;
// then compare it with // then compare it with
......
...@@ -39,25 +39,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -39,25 +39,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
* @{ * @{
*/ */
/** struct _ScannerBase
* @brief struct _Scanner. Scans an input range for regex tokens.
*
* The %_Scanner class interprets the regular expression pattern in
* the input range passed to its constructor as a sequence of parse
* tokens passed to the regular expression compiler. The sequence
* of tokens provided depends on the flag settings passed to the
* constructor: different regular expression grammars will interpret
* the same input pattern in syntactically different ways.
*/
template<typename _FwdIter>
class _Scanner
{ {
public: public:
typedef typename std::iterator_traits<_FwdIter>::value_type _CharT;
typedef std::basic_string<_CharT> _StringT;
typedef regex_constants::syntax_option_type _FlagT;
typedef const std::ctype<_CharT> _CtypeT;
/// Token types returned from the scanner. /// Token types returned from the scanner.
enum _TokenT enum _TokenT
{ {
...@@ -93,7 +77,137 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -93,7 +77,137 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_S_token_unknown _S_token_unknown
}; };
_Scanner(_FwdIter __begin, _FwdIter __end, protected:
typedef regex_constants::syntax_option_type _FlagT;
enum _StateT
{
_S_state_normal,
_S_state_in_brace,
_S_state_in_bracket,
};
protected:
_ScannerBase(_FlagT __flags)
: _M_state(_S_state_normal),
_M_flags(__flags),
_M_escape_tbl(_M_is_ecma()
? _M_ecma_escape_tbl
: _M_awk_escape_tbl),
_M_spec_char(_M_is_ecma()
? _M_ecma_spec_char
: _M_is_basic()
? _M_basic_spec_char
: _M_extended_spec_char),
_M_at_bracket_start(false)
{ }
protected:
const char*
_M_find_escape(char __c)
{
auto __it = _M_escape_tbl;
for (; __it->first != '\0'; ++__it)
if (__it->first == __c)
return &__it->second;
return nullptr;
}
bool
_M_is_ecma() const
{ return _M_flags & regex_constants::ECMAScript; }
bool
_M_is_basic() const
{ return _M_flags & (regex_constants::basic | regex_constants::grep); }
bool
_M_is_extended() const
{
return _M_flags & (regex_constants::extended
| regex_constants::egrep
| regex_constants::awk);
}
bool
_M_is_grep() const
{ return _M_flags & (regex_constants::grep | regex_constants::egrep); }
bool
_M_is_awk() const
{ return _M_flags & regex_constants::awk; }
protected:
const std::pair<char, _TokenT> _M_token_tbl[9] =
{
{'^', _S_token_line_begin},
{'$', _S_token_line_end},
{'.', _S_token_anychar},
{'*', _S_token_closure0},
{'+', _S_token_closure1},
{'?', _S_token_opt},
{'|', _S_token_or},
{'\n', _S_token_or}, // grep and egrep
{'\0', _S_token_or},
};
const std::pair<char, char> _M_ecma_escape_tbl[8] =
{
{'0', '\0'},
{'b', '\b'},
{'f', '\f'},
{'n', '\n'},
{'r', '\r'},
{'t', '\t'},
{'v', '\v'},
{'\0', '\0'},
};
const std::pair<char, char> _M_awk_escape_tbl[11] =
{
{'"', '"'},
{'/', '/'},
{'\\', '\\'},
{'a', '\a'},
{'b', '\b'},
{'f', '\f'},
{'n', '\n'},
{'r', '\r'},
{'t', '\t'},
{'v', '\v'},
{'\0', '\0'},
};
const char* _M_ecma_spec_char = "^$\\.*+?()[]{}|";
const char* _M_basic_spec_char = ".[\\*^$";
const char* _M_extended_spec_char = ".[\\()*+?{|^$";
_StateT _M_state;
_FlagT _M_flags;
_TokenT _M_token;
const std::pair<char, char>* _M_escape_tbl;
const char* _M_spec_char;
bool _M_at_bracket_start;
};
/**
* @brief struct _Scanner. Scans an input range for regex tokens.
*
* The %_Scanner class interprets the regular expression pattern in
* the input range passed to its constructor as a sequence of parse
* tokens passed to the regular expression compiler. The sequence
* of tokens provided depends on the flag settings passed to the
* constructor: different regular expression grammars will interpret
* the same input pattern in syntactically different ways.
*/
template<typename _CharT>
class _Scanner
: public _ScannerBase
{
public:
typedef const _CharT* _IterT;
typedef std::basic_string<_CharT> _StringT;
typedef regex_constants::syntax_option_type _FlagT;
typedef const std::ctype<_CharT> _CtypeT;
_Scanner(_IterT __begin, _IterT __end,
_FlagT __flags, std::locale __loc); _FlagT __flags, std::locale __loc);
void void
...@@ -113,13 +227,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -113,13 +227,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
#endif #endif
private: private:
enum _StateT
{
_S_state_normal,
_S_state_in_brace,
_S_state_in_bracket,
};
void void
_M_scan_normal(); _M_scan_normal();
...@@ -141,49 +248,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -141,49 +248,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
void void
_M_eat_class(char); _M_eat_class(char);
constexpr bool _IterT _M_current;
_M_is_ecma() _IterT _M_end;
{ return _M_flags & regex_constants::ECMAScript; }
constexpr bool
_M_is_basic()
{ return _M_flags & (regex_constants::basic | regex_constants::grep); }
constexpr bool
_M_is_extended()
{
return _M_flags & (regex_constants::extended
| regex_constants::egrep
| regex_constants::awk);
}
constexpr bool
_M_is_grep()
{ return _M_flags & (regex_constants::grep | regex_constants::egrep); }
constexpr bool
_M_is_awk()
{ return _M_flags & regex_constants::awk; }
_StateT _M_state;
_FwdIter _M_current;
_FwdIter _M_end;
_FlagT _M_flags;
_CtypeT& _M_ctype; _CtypeT& _M_ctype;
_TokenT _M_token;
_StringT _M_value; _StringT _M_value;
bool _M_at_bracket_start;
public:
// FIXME: make them static when this file is stable.
const std::map<char, _TokenT> _M_token_map;
const std::map<char, char> _M_ecma_escape_map;
const std::map<char, char> _M_awk_escape_map;
const std::set<char> _M_ecma_spec_char;
const std::set<char> _M_basic_spec_char;
const std::set<char> _M_extended_spec_char;
const std::map<char, char>& _M_escape_map;
const std::set<char>& _M_spec_char;
void (_Scanner::* _M_eat_escape)(); void (_Scanner::* _M_eat_escape)();
}; };
......
...@@ -52,106 +52,22 @@ namespace __detail ...@@ -52,106 +52,22 @@ namespace __detail
{ {
_GLIBCXX_BEGIN_NAMESPACE_VERSION _GLIBCXX_BEGIN_NAMESPACE_VERSION
template<typename _FwdIter> template<typename _CharT>
_Scanner<_FwdIter>:: _Scanner<_CharT>::
_Scanner(_FwdIter __begin, _FwdIter __end, _Scanner(typename _Scanner::_IterT __begin,
typename _Scanner::_IterT __end,
_FlagT __flags, std::locale __loc) _FlagT __flags, std::locale __loc)
: _M_state(_S_state_normal), _M_current(__begin), _M_end(__end), : _ScannerBase(__flags),
_M_flags(__flags), _M_current(__begin), _M_end(__end),
_M_ctype(std::use_facet<_CtypeT>(__loc)), _M_ctype(std::use_facet<_CtypeT>(__loc)),
_M_at_bracket_start(false),
_M_token_map
{
{'^', _S_token_line_begin},
{'$', _S_token_line_end},
{'.', _S_token_anychar},
{'*', _S_token_closure0},
{'+', _S_token_closure1},
{'?', _S_token_opt},
{'|', _S_token_or},
// grep and egrep
{'\n', _S_token_or},
},
_M_ecma_escape_map
{
{'0', '\0'},
{'b', '\b'},
{'f', '\f'},
{'n', '\n'},
{'r', '\r'},
{'t', '\t'},
{'v', '\v'},
},
_M_awk_escape_map
{
{'"', '"'},
{'/', '/'},
{'\\', '\\'},
{'a', '\a'},
{'b', '\b'},
{'f', '\f'},
{'n', '\n'},
{'r', '\r'},
{'t', '\t'},
{'v', '\v'},
},
_M_ecma_spec_char
{
'^',
'$',
'\\',
'.',
'*',
'+',
'?',
'(',
')',
'[',
']',
'{',
'}',
'|',
},
_M_basic_spec_char
{
'.',
'[',
'\\',
'*',
'^',
'$',
},
_M_extended_spec_char
{
'.',
'[',
'\\',
'(',
')',
'*',
'+',
'?',
'{',
'|',
'^',
'$',
},
_M_escape_map(_M_is_ecma()
? _M_ecma_escape_map
: _M_awk_escape_map),
_M_spec_char(_M_is_ecma()
? _M_ecma_spec_char
: _M_is_basic()
? _M_basic_spec_char
: _M_extended_spec_char),
_M_eat_escape(_M_is_ecma() _M_eat_escape(_M_is_ecma()
? &_Scanner::_M_eat_escape_ecma ? &_Scanner::_M_eat_escape_ecma
: &_Scanner::_M_eat_escape_posix) : &_Scanner::_M_eat_escape_posix)
{ _M_advance(); } { _M_advance(); }
template<typename _FwdIter> template<typename _CharT>
void void
_Scanner<_FwdIter>:: _Scanner<_CharT>::
_M_advance() _M_advance()
{ {
if (_M_current == _M_end) if (_M_current == _M_end)
...@@ -173,12 +89,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -173,12 +89,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
// Differences between styles: // Differences between styles:
// 1) "\(", "\)", "\{" in basic. It's not escaping. // 1) "\(", "\)", "\{" in basic. It's not escaping.
// 2) "(?:", "(?=", "(?!" in ECMAScript. // 2) "(?:", "(?=", "(?!" in ECMAScript.
template<typename _FwdIter> template<typename _CharT>
void void
_Scanner<_FwdIter>:: _Scanner<_CharT>::
_M_scan_normal() _M_scan_normal()
{ {
auto __c = *_M_current++; auto __c = *_M_current++;
const char* __pos;
if (__c == '\\') if (__c == '\\')
{ {
...@@ -244,11 +161,23 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -244,11 +161,23 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_state = _S_state_in_brace; _M_state = _S_state_in_brace;
_M_token = _S_token_interval_begin; _M_token = _S_token_interval_begin;
} }
else if ((_M_spec_char.count(_M_ctype.narrow(__c, '\0')) else if (((__pos = std::strchr(_M_spec_char, _M_ctype.narrow(__c, '\0')))
!= nullptr
&& *__pos != '\0'
&& __c != ']' && __c != ']'
&& __c != '}') && __c != '}')
|| (_M_is_grep() && __c == '\n')) || (_M_is_grep() && __c == '\n'))
_M_token = _M_token_map.at(__c); {
auto __it = _M_token_tbl;
auto __narrowc = _M_ctype.narrow(__c, '\0');
for (; __it->first != '\0'; ++__it)
if (__it->first == __narrowc)
{
_M_token = __it->second;
return;
}
_GLIBCXX_DEBUG_ASSERT(false);
}
else else
{ {
_M_token = _S_token_ord_char; _M_token = _S_token_ord_char;
...@@ -259,9 +188,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -259,9 +188,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
// Differences between styles: // Differences between styles:
// 1) different semantics of "[]" and "[^]". // 1) different semantics of "[]" and "[^]".
// 2) Escaping in bracket expr. // 2) Escaping in bracket expr.
template<typename _FwdIter> template<typename _CharT>
void void
_Scanner<_FwdIter>:: _Scanner<_CharT>::
_M_scan_in_bracket() _M_scan_in_bracket()
{ {
if (_M_current == _M_end) if (_M_current == _M_end)
...@@ -316,9 +245,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -316,9 +245,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
// Differences between styles: // Differences between styles:
// 1) "\}" in basic style. // 1) "\}" in basic style.
template<typename _FwdIter> template<typename _CharT>
void void
_Scanner<_FwdIter>:: _Scanner<_CharT>::
_M_scan_in_brace() _M_scan_in_brace()
{ {
if (_M_current == _M_end) if (_M_current == _M_end)
...@@ -357,21 +286,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -357,21 +286,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__throw_regex_error(regex_constants::error_badbrace); __throw_regex_error(regex_constants::error_badbrace);
} }
template<typename _FwdIter> template<typename _CharT>
void void
_Scanner<_FwdIter>:: _Scanner<_CharT>::
_M_eat_escape_ecma() _M_eat_escape_ecma()
{ {
if (_M_current == _M_end) if (_M_current == _M_end)
__throw_regex_error(regex_constants::error_escape); __throw_regex_error(regex_constants::error_escape);
auto __c = *_M_current++; auto __c = *_M_current++;
auto __pos = _M_find_escape(_M_ctype.narrow(__c, '\0'));
if (_M_escape_map.count(_M_ctype.narrow(__c, '\0')) if (__pos != nullptr && (__c != 'b' || _M_state == _S_state_in_bracket))
&& (__c != 'b' || _M_state == _S_state_in_bracket))
{ {
_M_token = _S_token_ord_char; _M_token = _S_token_ord_char;
_M_value.assign(1, _M_escape_map.at(__c)); _M_value.assign(1, *__pos);
} }
else if (__c == 'b') else if (__c == 'b')
{ {
...@@ -431,17 +360,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -431,17 +360,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
// Differences between styles: // Differences between styles:
// 1) Extended doesn't support backref, but basic does. // 1) Extended doesn't support backref, but basic does.
template<typename _FwdIter> template<typename _CharT>
void void
_Scanner<_FwdIter>:: _Scanner<_CharT>::
_M_eat_escape_posix() _M_eat_escape_posix()
{ {
if (_M_current == _M_end) if (_M_current == _M_end)
__throw_regex_error(regex_constants::error_escape); __throw_regex_error(regex_constants::error_escape);
auto __c = *_M_current; auto __c = *_M_current;
auto __pos = std::strchr(_M_spec_char, _M_ctype.narrow(__c, '\0'));
if (_M_spec_char.count(_M_ctype.narrow(__c, '\0'))) if (__pos != nullptr && *__pos != '\0')
{ {
_M_token = _S_token_ord_char; _M_token = _S_token_ord_char;
_M_value.assign(1, __c); _M_value.assign(1, __c);
...@@ -469,17 +399,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -469,17 +399,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
++_M_current; ++_M_current;
} }
template<typename _FwdIter> template<typename _CharT>
void void
_Scanner<_FwdIter>:: _Scanner<_CharT>::
_M_eat_escape_awk() _M_eat_escape_awk()
{ {
auto __c = *_M_current++; auto __c = *_M_current++;
auto __pos = _M_find_escape(_M_ctype.narrow(__c, '\0'));
if (_M_escape_map.count(_M_ctype.narrow(__c, '\0'))) if (__pos != nullptr)
{ {
_M_token = _S_token_ord_char; _M_token = _S_token_ord_char;
_M_value.assign(1, _M_escape_map.at(__c)); _M_value.assign(1, *__pos);
} }
// \ddd for oct representation // \ddd for oct representation
else if (_M_ctype.is(_CtypeT::digit, __c) else if (_M_ctype.is(_CtypeT::digit, __c)
...@@ -505,9 +436,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -505,9 +436,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
// Eats a character class or throwns an exception. // Eats a character class or throwns an exception.
// __ch cound be ':', '.' or '=', _M_current is the char after ']' when // __ch cound be ':', '.' or '=', _M_current is the char after ']' when
// returning. // returning.
template<typename _FwdIter> template<typename _CharT>
void void
_Scanner<_FwdIter>:: _Scanner<_CharT>::
_M_eat_class(char __ch) _M_eat_class(char __ch)
{ {
for (_M_value.clear(); _M_current != _M_end && *_M_current != __ch;) for (_M_value.clear(); _M_current != _M_end && *_M_current != __ch;)
...@@ -525,9 +456,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -525,9 +456,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
} }
#ifdef _GLIBCXX_DEBUG #ifdef _GLIBCXX_DEBUG
template<typename _FwdIter> template<typename _CharT>
std::ostream& std::ostream&
_Scanner<_FwdIter>:: _Scanner<_CharT>::
_M_print(std::ostream& ostr) _M_print(std::ostream& ostr)
{ {
switch (_M_token) switch (_M_token)
......
...@@ -44,15 +44,13 @@ ...@@ -44,15 +44,13 @@
#include <iterator> #include <iterator>
#include <locale> #include <locale>
#include <memory> #include <memory>
#include <map>
#include <queue>
#include <set>
#include <sstream> #include <sstream>
#include <stack> #include <stack>
#include <stdexcept> #include <stdexcept>
#include <string> #include <string>
#include <utility> #include <utility>
#include <vector> #include <vector>
#include <cstring>
#include <bits/regex_constants.h> #include <bits/regex_constants.h>
#include <bits/regex_error.h> #include <bits/regex_error.h>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment