Commit 6cb784b6 by Tim Shen Committed by Tim Shen

Makefile.am: Adjust to new files.

2013-08-07  Tim Shen  <timshen91@gmail.com>

	* include/Makefile.am: Adjust to new files.
	* include/Makefile.in: Regenerate.
	* include/bits/regex.h: Adjust to new interfaces.
	* include/bits/regex_automaton.h: New.
	* include/bits/regex_automaton.tcc: New.
	* include/bits/regex_compiler.h: Adjust to new files.
	* include/bits/regex_compiler.tcc: New.
	* include/bits/regex_constants.h: Tail spaces.
	* include/bits/regex_error.h: Likewise.
	* include/bits/regex_executor.h: New.
	* include/bits/regex_executor.tcc: New.
	* include/std/regex: Adjust to new files.
	* testsuite/28_regex/algorithms/regex_match/extended/
	string_dispatch_01.cc: Adjust to new interfaces.

From-SVN: r201573
parent 5ee5b32c
2013-08-07 Tim Shen <timshen91@gmail.com>
* include/Makefile.am: Adjust to new files.
* include/Makefile.in: Regenerate.
* include/bits/regex.h: Adjust to new interfaces.
* include/bits/regex_automaton.h: New.
* include/bits/regex_automaton.tcc: New.
* include/bits/regex_compiler.h: Adjust to new files.
* include/bits/regex_compiler.tcc: New.
* include/bits/regex_constants.h: Tail spaces.
* include/bits/regex_error.h: Likewise.
* include/bits/regex_executor.h: New.
* include/bits/regex_executor.tcc: New.
* include/std/regex: Adjust to new files.
* testsuite/28_regex/algorithms/regex_match/extended/
string_dispatch_01.cc: Adjust to new interfaces.
2013-08-07 Paolo Carlini <paolo.carlini@oracle.com> 2013-08-07 Paolo Carlini <paolo.carlini@oracle.com>
* include/ext/atomicity.h: Add #pragma GCC system_header. * include/ext/atomicity.h: Add #pragma GCC system_header.
......
...@@ -126,14 +126,14 @@ bits_headers = \ ...@@ -126,14 +126,14 @@ bits_headers = \
${bits_srcdir}/random.tcc \ ${bits_srcdir}/random.tcc \
${bits_srcdir}/range_access.h \ ${bits_srcdir}/range_access.h \
${bits_srcdir}/regex.h \ ${bits_srcdir}/regex.h \
${bits_srcdir}/regex_compiler.h \
${bits_srcdir}/regex_constants.h \ ${bits_srcdir}/regex_constants.h \
${bits_srcdir}/regex_cursor.h \
${bits_srcdir}/regex_error.h \ ${bits_srcdir}/regex_error.h \
${bits_srcdir}/regex_grep_matcher.h \ ${bits_srcdir}/regex_automaton.h \
${bits_srcdir}/regex_grep_matcher.tcc \ ${bits_srcdir}/regex_automaton.tcc \
${bits_srcdir}/regex_nfa.h \ ${bits_srcdir}/regex_compiler.h \
${bits_srcdir}/regex_nfa.tcc \ ${bits_srcdir}/regex_compiler.tcc \
${bits_srcdir}/regex_executor.h \
${bits_srcdir}/regex_executor.tcc \
${bits_srcdir}/stream_iterator.h \ ${bits_srcdir}/stream_iterator.h \
${bits_srcdir}/streambuf_iterator.h \ ${bits_srcdir}/streambuf_iterator.h \
${bits_srcdir}/shared_ptr.h \ ${bits_srcdir}/shared_ptr.h \
......
...@@ -393,14 +393,14 @@ bits_headers = \ ...@@ -393,14 +393,14 @@ bits_headers = \
${bits_srcdir}/random.tcc \ ${bits_srcdir}/random.tcc \
${bits_srcdir}/range_access.h \ ${bits_srcdir}/range_access.h \
${bits_srcdir}/regex.h \ ${bits_srcdir}/regex.h \
${bits_srcdir}/regex_compiler.h \
${bits_srcdir}/regex_constants.h \ ${bits_srcdir}/regex_constants.h \
${bits_srcdir}/regex_cursor.h \
${bits_srcdir}/regex_error.h \ ${bits_srcdir}/regex_error.h \
${bits_srcdir}/regex_grep_matcher.h \ ${bits_srcdir}/regex_automaton.h \
${bits_srcdir}/regex_grep_matcher.tcc \ ${bits_srcdir}/regex_automaton.tcc \
${bits_srcdir}/regex_nfa.h \ ${bits_srcdir}/regex_compiler.h \
${bits_srcdir}/regex_nfa.tcc \ ${bits_srcdir}/regex_compiler.tcc \
${bits_srcdir}/regex_executor.h \
${bits_srcdir}/regex_executor.tcc \
${bits_srcdir}/stream_iterator.h \ ${bits_srcdir}/stream_iterator.h \
${bits_srcdir}/streambuf_iterator.h \ ${bits_srcdir}/streambuf_iterator.h \
${bits_srcdir}/shared_ptr.h \ ${bits_srcdir}/shared_ptr.h \
......
...@@ -61,7 +61,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -61,7 +61,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_BaseType _M_base; _BaseType _M_base;
unsigned char _M_extended; unsigned char _M_extended;
static constexpr unsigned char _S_under = 1 << 0; static constexpr unsigned char _S_under = 1 << 0;
// FIXME: _S_blank should be removed in the future, when locale's complete. // FIXME: _S_blank should be removed in the future,
// when locale's complete.
static constexpr unsigned char _S_blank = 1 << 1; static constexpr unsigned char _S_blank = 1 << 1;
static constexpr unsigned char _S_valid_mask = 0x3; static constexpr unsigned char _S_valid_mask = 0x3;
...@@ -579,7 +580,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -579,7 +580,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
if (__s == __it->first) if (__s == __it->first)
{ {
if (__icase if (__icase
&& ((__it->second & (ctype_base::lower | ctype_base::upper)) != 0)) && ((__it->second
& (ctype_base::lower | ctype_base::upper)) != 0))
return ctype_base::alpha; return ctype_base::alpha;
return __it->second; return __it->second;
} }
...@@ -662,9 +664,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -662,9 +664,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
* character sequence. * character sequence.
*/ */
basic_regex() basic_regex()
: _M_flags(ECMAScript), : _M_flags(ECMAScript), _M_automaton(nullptr)
_M_automaton(__detail::__compile<const _Ch_type*, _Rx_traits>(0, 0,
_M_traits, _M_flags))
{ } { }
/** /**
...@@ -680,9 +680,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -680,9 +680,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
*/ */
explicit explicit
basic_regex(const _Ch_type* __p, flag_type __f = ECMAScript) basic_regex(const _Ch_type* __p, flag_type __f = ECMAScript)
: _M_flags(__f), : basic_regex(__p, __p + _Rx_traits::length(__p), __f)
_M_automaton(__detail::__compile(__p, __p + _Rx_traits::length(__p),
_M_traits, _M_flags))
{ } { }
/** /**
...@@ -697,9 +695,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -697,9 +695,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
* *
* @throws regex_error if @p __p is not a valid regular expression. * @throws regex_error if @p __p is not a valid regular expression.
*/ */
basic_regex(const _Ch_type* __p, std::size_t __len, flag_type __f) basic_regex(const _Ch_type* __p,
: _M_flags(__f), std::size_t __len, flag_type __f = ECMAScript)
_M_automaton(__detail::__compile(__p, __p + __len, _M_traits, _M_flags)) : basic_regex(__p, __p + __len, __f)
{ } { }
/** /**
...@@ -707,10 +705,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -707,10 +705,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
* *
* @param __rhs A @p regex object. * @param __rhs A @p regex object.
*/ */
basic_regex(const basic_regex& __rhs) basic_regex(const basic_regex& __rhs) = default;
: _M_flags(__rhs._M_flags), _M_traits(__rhs._M_traits),
_M_automaton(__rhs._M_automaton)
{ }
/** /**
* @brief Move-constructs a basic regular expression. * @brief Move-constructs a basic regular expression.
...@@ -736,9 +731,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -736,9 +731,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
basic_regex(const std::basic_string<_Ch_type, _Ch_traits, basic_regex(const std::basic_string<_Ch_type, _Ch_traits,
_Ch_alloc>& __s, _Ch_alloc>& __s,
flag_type __f = ECMAScript) flag_type __f = ECMAScript)
: _M_flags(__f), : basic_regex(__s.begin(), __s.end(), __f)
_M_automaton(__detail::__compile(__s.begin(), __s.end(),
_M_traits, _M_flags))
{ } { }
/** /**
...@@ -758,7 +751,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -758,7 +751,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
basic_regex(_InputIterator __first, _InputIterator __last, basic_regex(_InputIterator __first, _InputIterator __last,
flag_type __f = ECMAScript) flag_type __f = ECMAScript)
: _M_flags(__f), : _M_flags(__f),
_M_automaton(__detail::__compile(__first, __last, _M_traits, _M_flags)) _M_automaton(__detail::_Compiler<_InputIterator, _Ch_type, _Rx_traits>
(__first, __last, _M_traits, _M_flags)._M_get_nfa())
{ } { }
/** /**
...@@ -771,9 +765,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -771,9 +765,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
*/ */
basic_regex(initializer_list<_Ch_type> __l, basic_regex(initializer_list<_Ch_type> __l,
flag_type __f = ECMAScript) flag_type __f = ECMAScript)
: _M_flags(__f), : basic_regex(__l.begin(), __l.end(), __f)
_M_automaton(__detail::__compile(__l.begin(), __l.end(),
_M_traits, _M_flags))
{ } { }
/** /**
...@@ -989,14 +981,37 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -989,14 +981,37 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{ _M_automaton->_M_dot(__ostr); } { _M_automaton->_M_dot(__ostr); }
#endif #endif
const __detail::_AutomatonPtr&
_M_get_automaton() const
{ return _M_automaton; }
protected: protected:
typedef std::shared_ptr<__detail::_Automaton<_Ch_type, _Rx_traits>>
_AutomatonPtr;
template<typename _BiIter, typename _Alloc,
typename _CharT, typename _TraitsT>
friend std::unique_ptr<
__detail::_Executor<_BiIter, _Alloc, _CharT, _TraitsT>>
__detail::__get_executor(_BiIter,
_BiIter,
match_results<_BiIter, _Alloc>&,
const basic_regex<_CharT, _TraitsT>&,
regex_constants::match_flag_type);
template<typename _B, typename _A, typename _C, typename _R>
friend bool
regex_match(_B, _B,
match_results<_B, _A>&,
const basic_regex<_C, _R>&,
regex_constants::match_flag_type);
template<typename _B, typename _A, typename _C, typename _R>
friend bool
regex_search(_B, _B,
match_results<_B, _A>&,
const basic_regex<_C, _R>&,
regex_constants::match_flag_type);
flag_type _M_flags; flag_type _M_flags;
_Rx_traits _M_traits; _Rx_traits _M_traits;
__detail::_AutomatonPtr _M_automaton; _AutomatonPtr _M_automaton;
}; };
/** @brief Standard regular expressions. */ /** @brief Standard regular expressions. */
...@@ -2051,6 +2066,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -2051,6 +2066,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{ {
string_type __result; string_type __result;
format(std::back_inserter(__result), format(std::back_inserter(__result),
__fmt,
__fmt + char_traits<char_type>::length(__fmt), __fmt + char_traits<char_type>::length(__fmt),
__flags); __flags);
return __result; return __result;
...@@ -2086,7 +2102,28 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -2086,7 +2102,28 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
//@} //@}
private: private:
friend class __detail::_SpecializedResults<_Bi_iter, _Alloc>; template<typename, typename, typename, typename>
friend class __detail::_Executor;
template<typename, typename, typename, typename>
friend class __detail::_DFSExecutor;
template<typename, typename, typename, typename>
friend class __detail::_BFSExecutor;
template<typename _B, typename _A, typename _Ch_type, typename _Rx_traits>
friend bool
regex_match(_B, _B, match_results<_B, _A>&,
const basic_regex<_Ch_type,
_Rx_traits>&,
regex_constants::match_flag_type);
template<typename _B, typename _A, typename _Ch_type, typename _Rx_traits>
friend bool
regex_search(_B, _B, match_results<_B, _A>&,
const basic_regex<_Ch_type,
_Rx_traits>&,
regex_constants::match_flag_type);
}; };
typedef match_results<const char*> cmatch; typedef match_results<const char*> cmatch;
...@@ -2179,11 +2216,22 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -2179,11 +2216,22 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
regex_constants::match_flag_type __flags regex_constants::match_flag_type __flags
= regex_constants::match_default) = regex_constants::match_default)
{ {
__detail::_AutomatonPtr __a = __re._M_get_automaton(); if (__re._M_automaton == nullptr)
__detail::_Automaton::_SizeT __sz = __a->_M_sub_count(); return false;
__detail::_SpecializedCursor<_Bi_iter> __cs(__s, __e); if (__detail::__get_executor(__s, __e, __m, __re, __flags)->_M_match())
__detail::_SpecializedResults<_Bi_iter, _Alloc> __r(__sz, __cs, __m); {
return __a->_M_get_matcher(__cs, __r, __a, __flags)->_M_match(); for (auto __it : __m)
if (!__it.matched)
__it.first = __it.second = __e;
__m.at(__m.size()).matched = false;
__m.at(__m.size()).first = __s;
__m.at(__m.size()).second = __s;
__m.at(__m.size()+1).matched = false;
__m.at(__m.size()+1).first = __e;
__m.at(__m.size()+1).second = __e;
return true;
}
return false;
} }
/** /**
...@@ -2327,29 +2375,25 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -2327,29 +2375,25 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
regex_constants::match_flag_type __flags regex_constants::match_flag_type __flags
= regex_constants::match_default) = regex_constants::match_default)
{ {
__detail::_AutomatonPtr __a = __re._M_get_automaton(); if (__re._M_automaton == nullptr)
__detail::_Automaton::_SizeT __sz = __a->_M_sub_count(); return false;
__detail::_SpecializedCursor<_Bi_iter> __cs(__first, __last);
__detail::_SpecializedResults<_Bi_iter, _Alloc> __r(__sz, __cs, __m);
for (auto __cur = __first; __cur != __last; ++__cur) // Any KMP-like algo? for (auto __cur = __first; __cur != __last; ++__cur) // Any KMP-like algo?
{ if (__detail::__get_executor(__cur, __last, __m, __re, __flags)
__detail::_SpecializedCursor<_Bi_iter> __curs(__cur, __last); ->_M_search_from_first())
auto __matcher = __a->_M_get_matcher(__curs, __r, __a, __flags); {
if (__matcher->_M_search_from_first()) for (auto __it : __m)
{ if (!__it.matched)
__r._M_set_range(__m.size(), __it.first = __it.second = __last;
__detail::_SpecializedCursor<_Bi_iter> __m.at(__m.size()).first = __first;
{__first, __m[0].first}); __m.at(__m.size()).second = __m[0].first;
__r._M_set_range(__m.size()+1, __m.at(__m.size()+1).first = __m[0].second;
__detail::_SpecializedCursor<_Bi_iter> __m.at(__m.size()+1).second = __last;
{__m[0].second, __last}); __m.at(__m.size()).matched =
__r._M_set_matched(__m.size(), (__m.prefix().first != __m.prefix().second);
__m.prefix().first != __m.prefix().second); __m.at(__m.size()+1).matched =
__r._M_set_matched(__m.size()+1, (__m.suffix().first != __m.suffix().second);
__m.suffix().first != __m.suffix().second);
return true; return true;
} }
}
return false; return false;
} }
......
// class template regex -*- C++ -*-
// Copyright (C) 2013 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// Under Section 7 of GPL version 3, you are granted additional
// permissions described in the GCC Runtime Library Exception, version
// 3.1, as published by the Free Software Foundation.
// You should have received a copy of the GNU General Public License and
// a copy of the GCC Runtime Library Exception along with this program;
// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
// <http://www.gnu.org/licenses/>.
/**
* @file bits/regex_automaton.h
* This is an internal header file, included by other library headers.
* Do not attempt to use it directly. @headername{regex}
*/
namespace std _GLIBCXX_VISIBILITY(default)
{
namespace __detail
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION
/**
* @defgroup regex-detail Base and Implementation Classes
* @ingroup regex
* @{
*/
typedef int _StateIdT;
typedef std::set<_StateIdT> _StateSet;
static const _StateIdT _S_invalid_state_id = -1;
template<typename _CharT>
using _Matcher = std::function<bool (_CharT)>;
/// Operation codes that define the type of transitions within the base NFA
/// that represents the regular expression.
enum _Opcode
{
_S_opcode_unknown = 0,
_S_opcode_alternative = 1,
_S_opcode_subexpr_begin = 4,
_S_opcode_subexpr_end = 5,
_S_opcode_match = 100,
_S_opcode_accept = 255
};
template<typename _CharT, typename _TraitsT>
class _State
{
public:
typedef int _OpcodeT;
typedef _Matcher<_CharT> _MatcherT;
_OpcodeT _M_opcode; // type of outgoing transition
_StateIdT _M_next; // outgoing transition
_StateIdT _M_alt; // for _S_opcode_alternative
unsigned int _M_subexpr; // for _S_opcode_subexpr_*
_MatcherT _M_matches; // for _S_opcode_match
explicit _State(_OpcodeT __opcode)
: _M_opcode(__opcode), _M_next(_S_invalid_state_id)
{ }
_State(const _MatcherT& __m)
: _M_opcode(_S_opcode_match), _M_next(_S_invalid_state_id),
_M_matches(__m)
{ }
_State(_OpcodeT __opcode, unsigned __index)
: _M_opcode(__opcode), _M_next(_S_invalid_state_id), _M_subexpr(__index)
{ }
_State(_StateIdT __next, _StateIdT __alt)
: _M_opcode(_S_opcode_alternative), _M_next(__next), _M_alt(__alt)
{ }
#ifdef _GLIBCXX_DEBUG
std::ostream&
_M_print(std::ostream& ostr) const;
// Prints graphviz dot commands for state.
std::ostream&
_M_dot(std::ostream& __ostr, _StateIdT __id) const;
#endif
};
/// Base class for, um, automata. Could be an NFA or a DFA. Your choice.
template<typename _CharT, typename _TraitsT>
class _Automaton
{
public:
typedef unsigned int _SizeT;
public:
virtual _SizeT
_M_sub_count() const = 0;
#ifdef _GLIBCXX_DEBUG
virtual std::ostream&
_M_dot(std::ostream& __ostr) const = 0;
#endif
};
template<typename _CharT, typename _TraitsT>
class _NFA
: public _Automaton<_CharT, _TraitsT>,
public std::vector<_State<_CharT, _TraitsT>>
{
public:
typedef _State<_CharT, _TraitsT> _StateT;
typedef const _Matcher<_CharT>& _MatcherT;
typedef unsigned int _SizeT;
typedef regex_constants::syntax_option_type _FlagT;
_NFA(_FlagT __f)
: _M_flags(__f), _M_start_state(0), _M_subexpr_count(0),
_M_has_backref(false)
{ }
_FlagT
_M_options() const
{ return _M_flags; }
_StateIdT
_M_start() const
{ return _M_start_state; }
const _StateSet&
_M_final_states() const
{ return _M_accepting_states; }
_SizeT
_M_sub_count() const
{ return _M_subexpr_count; }
_StateIdT
_M_insert_accept()
{
this->push_back(_StateT(_S_opcode_accept));
_M_accepting_states.insert(this->size()-1);
return this->size()-1;
}
_StateIdT
_M_insert_alt(_StateIdT __next, _StateIdT __alt)
{
this->push_back(_StateT(__next, __alt));
return this->size()-1;
}
_StateIdT
_M_insert_matcher(_MatcherT __m)
{
this->push_back(_StateT(__m));
return this->size()-1;
}
_StateIdT
_M_insert_subexpr_begin()
{
auto __id = _M_subexpr_count++;
_M_paren_stack.push(__id);
this->push_back(_StateT(_S_opcode_subexpr_begin, __id));
return this->size()-1;
}
_StateIdT
_M_insert_subexpr_end()
{
this->push_back(_StateT(_S_opcode_subexpr_end, _M_paren_stack.top()));
_M_paren_stack.pop();
return this->size()-1;
}
void
_M_set_backref(bool __b)
{ _M_has_backref = __b; }
#ifdef _GLIBCXX_DEBUG
std::ostream&
_M_dot(std::ostream& __ostr) const;
#endif
_FlagT _M_flags;
_StateIdT _M_start_state;
_StateSet _M_accepting_states;
_SizeT _M_subexpr_count;
bool _M_has_backref;
std::stack<unsigned int> _M_paren_stack;
};
/// Describes a sequence of one or more %_State, its current start
/// and end(s). This structure contains fragments of an NFA during
/// construction.
template<typename _CharT, typename _TraitsT>
class _StateSeq
{
public:
typedef _NFA<_CharT, _TraitsT> _RegexT;
public:
// Constructs a single-node sequence
_StateSeq(_RegexT& __ss, _StateIdT __s,
_StateIdT __e = _S_invalid_state_id)
: _M_nfa(__ss), _M_start(__s), _M_end1(__s), _M_end2(__e)
{ }
// Constructs a split sequence from two other sequencces
_StateSeq(const _StateSeq& __e1, const _StateSeq& __e2)
: _M_nfa(__e1._M_nfa),
_M_start(_M_nfa._M_insert_alt(__e1._M_start, __e2._M_start)),
_M_end1(__e1._M_end1), _M_end2(__e2._M_end1)
{ }
// Constructs a split sequence from a single sequence
_StateSeq(const _StateSeq& __e, _StateIdT __id)
: _M_nfa(__e._M_nfa),
_M_start(_M_nfa._M_insert_alt(__id, __e._M_start)),
_M_end1(__id), _M_end2(__e._M_end1)
{ }
// Constructs a copy of a %_StateSeq
_StateSeq(const _StateSeq& __rhs)
: _M_nfa(__rhs._M_nfa), _M_start(__rhs._M_start),
_M_end1(__rhs._M_end1), _M_end2(__rhs._M_end2)
{ }
_StateSeq& operator=(const _StateSeq& __rhs);
_StateIdT
_M_front() const
{ return _M_start; }
// Extends a sequence by one.
void
_M_push_back(_StateIdT __id);
// Extends and maybe joins a sequence.
void
_M_append(_StateIdT __id);
void
_M_append(_StateSeq& __rhs);
// Clones an entire sequence.
_StateIdT
_M_clone();
private:
_RegexT& _M_nfa;
_StateIdT _M_start;
_StateIdT _M_end1;
_StateIdT _M_end2;
};
//@} regex-detail
_GLIBCXX_END_NAMESPACE_VERSION
} // namespace __detail
} // namespace std
#include <bits/regex_automaton.tcc>
// class template regex -*- C++ -*- // class template regex -*- C++ -*-
// Copyright (C) 2010-2013 Free Software Foundation, Inc. // Copyright (C) 2013 Free Software Foundation, Inc.
// //
// This file is part of the GNU ISO C++ Library. This library is free // This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the // software; you can redistribute it and/or modify it under the
...@@ -23,11 +23,10 @@ ...@@ -23,11 +23,10 @@
// <http://www.gnu.org/licenses/>. // <http://www.gnu.org/licenses/>.
/** /**
* @file bits/regex_nfa.tcc * @file bits/regex_automaton.tcc
* This is an internal header file, included by other library headers. * This is an internal header file, included by other library headers.
* Do not attempt to use it directly. @headername{regex} * Do not attempt to use it directly. @headername{regex}
*/ */
#include <regex>
namespace std _GLIBCXX_VISIBILITY(default) namespace std _GLIBCXX_VISIBILITY(default)
{ {
...@@ -35,68 +34,11 @@ namespace __detail ...@@ -35,68 +34,11 @@ namespace __detail
{ {
_GLIBCXX_BEGIN_NAMESPACE_VERSION _GLIBCXX_BEGIN_NAMESPACE_VERSION
template<typename _InIterT, typename _TraitsT>
bool _BracketMatcher<_InIterT, _TraitsT>::
operator()(const _PatternCursor& __pc) const
{
typedef const _SpecializedCursor<_InIterT>& _CursorT;
_CursorT __c = static_cast<_CursorT>(__pc);
_CharT __ch = __c._M_current();
bool __ret = false;
for (auto __c : _M_char_set)
if (this->_M_equ(__c, __ch))
{
__ret = true;
break;
}
if (!__ret && _M_traits.isctype(__ch, _M_class_set))
__ret = true;
else
{
for (auto& __it : _M_range_set)
if (this->_M_le(__it.first, __ch) && this->_M_le(__ch, __it.second))
{
__ret = true;
break;
}
}
if (_M_is_non_matching)
__ret = !__ret;
return __ret;
}
template<typename _InIterT, typename _TraitsT>
bool _Comparator<_InIterT, _TraitsT>::
_M_equ(_CharT __a, _CharT __b) const
{
if (_M_flags & regex_constants::icase)
return _M_traits.translate_nocase(__a)
== _M_traits.translate_nocase(__b);
if (_M_flags & regex_constants::collate)
return _M_traits.translate(__a) == _M_traits.translate(__b);
return __a == __b;
}
template<typename _InIterT, typename _TraitsT>
bool _Comparator<_InIterT, _TraitsT>::
_M_le(_CharT __a, _CharT __b) const
{
_StringT __str1 = _StringT(1,
_M_flags & regex_constants::icase
? _M_traits.translate_nocase(__a)
: _M_traits.translate(__a));
_StringT __str2 = _StringT(1,
_M_flags & regex_constants::icase
? _M_traits.translate_nocase(__b)
: _M_traits.translate(__b));
return _M_traits.transform(__str1.begin(), __str1.end())
<= _M_traits.transform(__str2.begin(), __str2.end());
}
#ifdef _GLIBCXX_DEBUG #ifdef _GLIBCXX_DEBUG
inline std::ostream& _State:: template<typename _CharT, typename _TraitsT>
_M_print(std::ostream& ostr) const std::ostream& _State<_CharT, _TraitsT>::
{ _M_print(std::ostream& ostr) const
{
switch (_M_opcode) switch (_M_opcode)
{ {
case _S_opcode_alternative: case _S_opcode_alternative:
...@@ -119,12 +61,13 @@ _M_print(std::ostream& ostr) const ...@@ -119,12 +61,13 @@ _M_print(std::ostream& ostr) const
break; break;
} }
return ostr; return ostr;
} }
// Prints graphviz dot commands for state. // Prints graphviz dot commands for state.
inline std::ostream& _State:: template<typename _CharT, typename _TraitsT>
_M_dot(std::ostream& __ostr, _StateIdT __id) const std::ostream& _State<_CharT, _TraitsT>::
{ _M_dot(std::ostream& __ostr, _StateIdT __id) const
{
switch (_M_opcode) switch (_M_opcode)
{ {
case _S_opcode_alternative: case _S_opcode_alternative:
...@@ -157,40 +100,44 @@ _M_dot(std::ostream& __ostr, _StateIdT __id) const ...@@ -157,40 +100,44 @@ _M_dot(std::ostream& __ostr, _StateIdT __id) const
break; break;
} }
return __ostr; return __ostr;
} }
inline std::ostream& _Nfa:: template<typename _CharT, typename _TraitsT>
_M_dot(std::ostream& __ostr) const std::ostream& _NFA<_CharT, _TraitsT>::
{ _M_dot(std::ostream& __ostr) const
{
__ostr << "digraph _Nfa {\n" __ostr << "digraph _Nfa {\n"
<< " rankdir=LR;\n"; << " rankdir=LR;\n";
for (unsigned int __i = 0; __i < this->size(); ++__i) for (unsigned int __i = 0; __i < this->size(); ++__i)
{ this->at(__i)._M_dot(__ostr, __i); } { this->at(__i)._M_dot(__ostr, __i); }
__ostr << "}\n"; __ostr << "}\n";
return __ostr; return __ostr;
} }
#endif #endif
inline _StateSeq& _StateSeq:: template<typename _CharT, typename _TraitsT>
operator=(const _StateSeq& __rhs) _StateSeq<_CharT, _TraitsT>& _StateSeq<_CharT, _TraitsT>::
{ operator=(const _StateSeq& __rhs)
{
_M_start = __rhs._M_start; _M_start = __rhs._M_start;
_M_end1 = __rhs._M_end1; _M_end1 = __rhs._M_end1;
_M_end2 = __rhs._M_end2; _M_end2 = __rhs._M_end2;
return *this; return *this;
} }
inline void _StateSeq:: template<typename _CharT, typename _TraitsT>
_M_push_back(_StateIdT __id) void _StateSeq<_CharT, _TraitsT>::
{ _M_push_back(_StateIdT __id)
{
if (_M_end1 != _S_invalid_state_id) if (_M_end1 != _S_invalid_state_id)
_M_nfa[_M_end1]._M_next = __id; _M_nfa[_M_end1]._M_next = __id;
_M_end1 = __id; _M_end1 = __id;
} }
inline void _StateSeq:: template<typename _CharT, typename _TraitsT>
_M_append(_StateIdT __id) void _StateSeq<_CharT, _TraitsT>::
{ _M_append(_StateIdT __id)
{
if (_M_end2 != _S_invalid_state_id) if (_M_end2 != _S_invalid_state_id)
{ {
if (_M_end2 == _M_end1) if (_M_end2 == _M_end1)
...@@ -202,11 +149,12 @@ _M_append(_StateIdT __id) ...@@ -202,11 +149,12 @@ _M_append(_StateIdT __id)
if (_M_end1 != _S_invalid_state_id) if (_M_end1 != _S_invalid_state_id)
_M_nfa[_M_end1]._M_next = __id; _M_nfa[_M_end1]._M_next = __id;
_M_end1 = __id; _M_end1 = __id;
} }
inline void _StateSeq:: template<typename _CharT, typename _TraitsT>
_M_append(_StateSeq& __rhs) void _StateSeq<_CharT, _TraitsT>::
{ _M_append(_StateSeq& __rhs)
{
if (_M_end2 != _S_invalid_state_id) if (_M_end2 != _S_invalid_state_id)
{ {
if (_M_end2 == _M_end1) if (_M_end2 == _M_end1)
...@@ -220,12 +168,13 @@ _M_append(_StateSeq& __rhs) ...@@ -220,12 +168,13 @@ _M_append(_StateSeq& __rhs)
if (_M_end1 != _S_invalid_state_id) if (_M_end1 != _S_invalid_state_id)
_M_nfa[_M_end1]._M_next = __rhs._M_start; _M_nfa[_M_end1]._M_next = __rhs._M_start;
_M_end1 = __rhs._M_end1; _M_end1 = __rhs._M_end1;
} }
// @todo implement this function. // @todo implement this function.
inline _StateIdT _StateSeq:: template<typename _CharT, typename _TraitsT>
_M_clone() _StateIdT _StateSeq<_CharT, _TraitsT>::
{ return 0; } _M_clone()
{ return 0; }
_GLIBCXX_END_NAMESPACE_VERSION _GLIBCXX_END_NAMESPACE_VERSION
} // namespace __detail } // namespace __detail
......
...@@ -39,15 +39,88 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -39,15 +39,88 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
* @{ * @{
*/ */
/// Base class for scanner. /// Matches a character range (bracket expression)
struct _Scanner_base template<typename _CharT, typename _TraitsT>
struct _BracketMatcher
{ {
typedef unsigned int _StateT; typedef typename _TraitsT::char_class_type _CharClassT;
typedef typename _TraitsT::string_type _StringT;
typedef regex_constants::syntax_option_type _FlagT;
static constexpr _StateT _S_state_in_brace = 1 << 0; explicit
static constexpr _StateT _S_state_in_bracket = 1 << 1; _BracketMatcher(bool __is_non_matching,
const _TraitsT& __t,
_FlagT __flags)
: _M_is_non_matching(__is_non_matching), _M_traits(__t),
_M_flags(__flags), _M_class_set(0)
{ }
bool
operator()(_CharT) const;
void
_M_add_char(_CharT __c)
{
if (_M_flags & regex_constants::collate)
if (_M_is_icase())
_M_char_set.push_back(_M_traits.translate_nocase(__c));
else
_M_char_set.push_back(_M_traits.translate(__c));
else
_M_char_set.push_back(__c);
}
void
_M_add_collating_element(const _StringT& __s)
{
auto __st = _M_traits.lookup_collatename(&*__s.begin(), &*__s.end());
if (__st.empty())
__throw_regex_error(regex_constants::error_collate);
// TODO: digraph
_M_char_set.push_back(__st[0]);
}
void
_M_add_equivalence_class(const _StringT& __s)
{
_M_add_character_class(
_M_traits.transform_primary(&*__s.begin(), &*__s.end()));
}
void
_M_add_character_class(const _StringT& __s)
{
auto __st = _M_traits.
lookup_classname(&*__s.begin(), &*__s.end(), _M_is_icase());
if (__st == 0)
__throw_regex_error(regex_constants::error_ctype);
_M_class_set |= __st;
}
void
_M_make_range(_CharT __l, _CharT __r)
{ _M_range_set.push_back(make_pair(_M_get_str(__l), _M_get_str(__r))); }
bool
_M_is_icase() const
{ return _M_flags & regex_constants::icase; }
virtual ~_Scanner_base() { }; _StringT
_M_get_str(_CharT __c) const
{
auto __s = _StringT(1,
_M_is_icase()
? _M_traits.translate_nocase(__c)
: _M_traits.translate(__c));
return _M_traits.transform(__s.begin(), __s.end());
}
_TraitsT _M_traits;
_FlagT _M_flags;
bool _M_is_non_matching;
std::vector<_CharT> _M_char_set;
std::vector<pair<_StringT, _StringT>> _M_range_set;
_CharClassT _M_class_set;
}; };
/** /**
...@@ -60,12 +133,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -60,12 +133,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
* constructor: different regular expression grammars will interpret * constructor: different regular expression grammars will interpret
* the same input pattern in syntactically different ways. * the same input pattern in syntactically different ways.
*/ */
template<typename _InputIterator> template<typename _InputIter>
class _Scanner: public _Scanner_base class _Scanner
{ {
public: public:
typedef _InputIterator _IteratorT; typedef unsigned int _StateT;
typedef typename std::iterator_traits<_IteratorT>::value_type _CharT; typedef typename std::iterator_traits<_InputIter>::value_type _CharT;
typedef std::basic_string<_CharT> _StringT; typedef std::basic_string<_CharT> _StringT;
typedef regex_constants::syntax_option_type _FlagT; typedef regex_constants::syntax_option_type _FlagT;
typedef const std::ctype<_CharT> _CtypeT; typedef const std::ctype<_CharT> _CtypeT;
...@@ -103,8 +176,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -103,8 +176,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_S_token_unknown _S_token_unknown
}; };
_Scanner(_IteratorT __begin, _IteratorT __end, _FlagT __flags, _Scanner(_InputIter __begin, _InputIter __end,
std::locale __loc) _FlagT __flags, std::locale __loc)
: _M_current(__begin) , _M_end(__end) , _M_flags(__flags), : _M_current(__begin) , _M_end(__end) , _M_flags(__flags),
_M_ctype(std::use_facet<_CtypeT>(__loc)), _M_state(0) _M_ctype(std::use_facet<_CtypeT>(__loc)), _M_state(0)
{ _M_advance(); } { _M_advance(); }
...@@ -144,8 +217,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -144,8 +217,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
void void
_M_eat_collsymbol(); _M_eat_collsymbol();
_IteratorT _M_current; static constexpr _StateT _S_state_in_brace = 1 << 0;
_IteratorT _M_end; static constexpr _StateT _S_state_in_bracket = 1 << 1;
_InputIter _M_current;
_InputIter _M_end;
_FlagT _M_flags; _FlagT _M_flags;
_CtypeT& _M_ctype; _CtypeT& _M_ctype;
_TokenT _M_curToken; _TokenT _M_curToken;
...@@ -153,520 +228,28 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -153,520 +228,28 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_StateT _M_state; _StateT _M_state;
}; };
template<typename _InputIterator>
void
_Scanner<_InputIterator>::
_M_advance()
{
if (_M_current == _M_end)
{
_M_curToken = _S_token_eof;
return;
}
_CharT __c = *_M_current;
if (_M_state & _S_state_in_bracket)
{
_M_scan_in_bracket();
return;
}
if (_M_state & _S_state_in_brace)
{
_M_scan_in_brace();
return;
}
#if 0
// TODO: re-enable line anchors when _M_assertion is implemented.
// See PR libstdc++/47724
else if (_M_state & _S_state_at_start && __c == _M_ctype.widen('^'))
{
_M_curToken = _S_token_line_begin;
++_M_current;
return;
}
else if (__c == _M_ctype.widen('$'))
{
_M_curToken = _S_token_line_end;
++_M_current;
return;
}
#endif
else if (__c == _M_ctype.widen('.'))
{
_M_curToken = _S_token_anychar;
++_M_current;
return;
}
else if (__c == _M_ctype.widen('*'))
{
_M_curToken = _S_token_closure0;
++_M_current;
return;
}
else if (__c == _M_ctype.widen('+'))
{
_M_curToken = _S_token_closure1;
++_M_current;
return;
}
else if (__c == _M_ctype.widen('|'))
{
_M_curToken = _S_token_or;
++_M_current;
return;
}
else if (__c == _M_ctype.widen('['))
{
if (*++_M_current == _M_ctype.widen('^'))
{
_M_curToken = _S_token_bracket_inverse_begin;
++_M_current;
}
else
_M_curToken = _S_token_bracket_begin;
_M_state |= _S_state_in_bracket;
return;
}
else if (__c == _M_ctype.widen('\\'))
{
_M_eat_escape();
return;
}
else if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
{
if (__c == _M_ctype.widen('('))
{
_M_curToken = _S_token_subexpr_begin;
++_M_current;
return;
}
else if (__c == _M_ctype.widen(')'))
{
_M_curToken = _S_token_subexpr_end;
++_M_current;
return;
}
else if (__c == _M_ctype.widen('{'))
{
_M_curToken = _S_token_interval_begin;
_M_state |= _S_state_in_brace;
++_M_current;
return;
}
}
_M_curToken = _S_token_ord_char;
_M_curValue.assign(1, __c);
++_M_current;
}
template<typename _InputIterator>
void
_Scanner<_InputIterator>::
_M_scan_in_brace()
{
if (_M_ctype.is(_CtypeT::digit, *_M_current))
{
_M_curToken = _S_token_dup_count;
_M_curValue.assign(1, *_M_current);
++_M_current;
while (_M_current != _M_end
&& _M_ctype.is(_CtypeT::digit, *_M_current))
{
_M_curValue += *_M_current;
++_M_current;
}
return;
}
else if (*_M_current == _M_ctype.widen(','))
{
_M_curToken = _S_token_comma;
++_M_current;
return;
}
if (_M_flags & (regex_constants::basic | regex_constants::grep))
{
if (*_M_current == _M_ctype.widen('\\'))
_M_eat_escape();
}
else
{
if (*_M_current == _M_ctype.widen('}'))
{
_M_curToken = _S_token_interval_end;
_M_state &= ~_S_state_in_brace;
++_M_current;
return;
}
}
}
template<typename _InputIterator>
void
_Scanner<_InputIterator>::
_M_scan_in_bracket()
{
if (*_M_current == _M_ctype.widen('['))
{
++_M_current;
if (_M_current == _M_end)
{
_M_curToken = _S_token_eof;
return;
}
if (*_M_current == _M_ctype.widen('.'))
{
_M_curToken = _S_token_collsymbol;
_M_eat_collsymbol();
return;
}
else if (*_M_current == _M_ctype.widen(':'))
{
_M_curToken = _S_token_char_class_name;
_M_eat_charclass();
return;
}
else if (*_M_current == _M_ctype.widen('='))
{
_M_curToken = _S_token_equiv_class_name;
_M_eat_equivclass();
return;
}
}
else if (*_M_current == _M_ctype.widen('-'))
{
_M_curToken = _S_token_dash;
++_M_current;
return;
}
else if (*_M_current == _M_ctype.widen(']'))
{
_M_curToken = _S_token_bracket_end;
_M_state &= ~_S_state_in_bracket;
++_M_current;
return;
}
else if (*_M_current == _M_ctype.widen('\\'))
{
_M_eat_escape();
return;
}
_M_curToken = _S_token_collelem_single;
_M_curValue.assign(1, *_M_current);
++_M_current;
}
// TODO implement it.
template<typename _InputIterator>
void
_Scanner<_InputIterator>::
_M_eat_escape()
{
++_M_current;
if (_M_current == _M_end)
{
_M_curToken = _S_token_eof;
return;
}
_CharT __c = *_M_current;
++_M_current;
if (__c == _M_ctype.widen('('))
{
if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
{
_M_curToken = _S_token_ord_char;
_M_curValue.assign(1, __c);
}
else
_M_curToken = _S_token_subexpr_begin;
}
else if (__c == _M_ctype.widen(')'))
{
if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
{
_M_curToken = _S_token_ord_char;
_M_curValue.assign(1, __c);
}
else
_M_curToken = _S_token_subexpr_end;
}
else if (__c == _M_ctype.widen('{'))
{
if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
{
_M_curToken = _S_token_ord_char;
_M_curValue.assign(1, __c);
}
else
{
_M_curToken = _S_token_interval_begin;
_M_state |= _S_state_in_brace;
}
}
else if (__c == _M_ctype.widen('}'))
{
if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
{
_M_curToken = _S_token_ord_char;
_M_curValue.assign(1, __c);
}
else
{
if (!(_M_state && _S_state_in_brace))
__throw_regex_error(regex_constants::error_badbrace);
_M_state &= ~_S_state_in_brace;
_M_curToken = _S_token_interval_end;
}
}
else if (__c == _M_ctype.widen('x'))
{
++_M_current;
if (_M_current == _M_end)
{
_M_curToken = _S_token_eof;
return;
}
if (_M_ctype.is(_CtypeT::digit, *_M_current))
{
_M_curValue.assign(1, *_M_current);
++_M_current;
if (_M_current == _M_end)
{
_M_curToken = _S_token_eof;
return;
}
if (_M_ctype.is(_CtypeT::digit, *_M_current))
{
_M_curValue += *_M_current;
++_M_current;
return;
}
}
}
else if (__c == _M_ctype.widen('^')
|| __c == _M_ctype.widen('.')
|| __c == _M_ctype.widen('*')
|| __c == _M_ctype.widen('$')
|| __c == _M_ctype.widen('\\'))
{
_M_curToken = _S_token_ord_char;
_M_curValue.assign(1, __c);
}
else if (_M_ctype.is(_CtypeT::digit, __c))
{
_M_curToken = _S_token_backref;
_M_curValue.assign(1, __c);
}
else if (_M_state & _S_state_in_bracket)
{
if (__c == _M_ctype.widen('-')
|| __c == _M_ctype.widen('[')
|| __c == _M_ctype.widen(']'))
{
_M_curToken = _S_token_ord_char;
_M_curValue.assign(1, __c);
}
else if ((_M_flags & regex_constants::ECMAScript)
&& __c == _M_ctype.widen('b'))
{
_M_curToken = _S_token_ord_char;
_M_curValue.assign(1, _M_ctype.widen(' '));
}
else
__throw_regex_error(regex_constants::error_escape);
}
else
__throw_regex_error(regex_constants::error_escape);
}
// Eats a character class or throwns an exception.
// current point to ':' delimiter on entry, char after ']' on return
template<typename _InputIterator>
void
_Scanner<_InputIterator>::
_M_eat_charclass()
{
++_M_current; // skip ':'
if (_M_current == _M_end)
__throw_regex_error(regex_constants::error_ctype);
for (_M_curValue.clear();
_M_current != _M_end && *_M_current != _M_ctype.widen(':');
++_M_current)
_M_curValue += *_M_current;
if (_M_current == _M_end)
__throw_regex_error(regex_constants::error_ctype);
++_M_current; // skip ':'
if (*_M_current != _M_ctype.widen(']'))
__throw_regex_error(regex_constants::error_ctype);
++_M_current; // skip ']'
}
template<typename _InputIterator>
void
_Scanner<_InputIterator>::
_M_eat_equivclass()
{
++_M_current; // skip '='
if (_M_current == _M_end)
__throw_regex_error(regex_constants::error_collate);
for (_M_curValue.clear();
_M_current != _M_end && *_M_current != _M_ctype.widen('=');
++_M_current)
_M_curValue += *_M_current;
if (_M_current == _M_end)
__throw_regex_error(regex_constants::error_collate);
++_M_current; // skip '='
if (*_M_current != _M_ctype.widen(']'))
__throw_regex_error(regex_constants::error_collate);
++_M_current; // skip ']'
}
template<typename _InputIterator>
void
_Scanner<_InputIterator>::
_M_eat_collsymbol()
{
++_M_current; // skip '.'
if (_M_current == _M_end)
__throw_regex_error(regex_constants::error_collate);
for (_M_curValue.clear();
_M_current != _M_end && *_M_current != _M_ctype.widen('.');
++_M_current)
_M_curValue += *_M_current;
if (_M_current == _M_end)
__throw_regex_error(regex_constants::error_collate);
++_M_current; // skip '.'
if (*_M_current != _M_ctype.widen(']'))
__throw_regex_error(regex_constants::error_collate);
++_M_current; // skip ']'
}
#ifdef _GLIBCXX_DEBUG
template<typename _InputIterator>
std::ostream&
_Scanner<_InputIterator>::
_M_print(std::ostream& ostr)
{
switch (_M_curToken)
{
case _S_token_anychar:
ostr << "any-character\n";
break;
case _S_token_backref:
ostr << "backref\n";
break;
case _S_token_bracket_begin:
ostr << "bracket-begin\n";
break;
case _S_token_bracket_inverse_begin:
ostr << "bracket-inverse-begin\n";
break;
case _S_token_bracket_end:
ostr << "bracket-end\n";
break;
case _S_token_char_class_name:
ostr << "char-class-name \"" << _M_curValue << "\"\n";
break;
case _S_token_closure0:
ostr << "closure0\n";
break;
case _S_token_closure1:
ostr << "closure1\n";
break;
case _S_token_collelem_multi:
ostr << "coll-elem-multi \"" << _M_curValue << "\"\n";
break;
case _S_token_collelem_single:
ostr << "coll-elem-single \"" << _M_curValue << "\"\n";
break;
case _S_token_collsymbol:
ostr << "collsymbol \"" << _M_curValue << "\"\n";
break;
case _S_token_comma:
ostr << "comma\n";
break;
case _S_token_dash:
ostr << "dash\n";
break;
case _S_token_dup_count:
ostr << "dup count: " << _M_curValue << "\n";
break;
case _S_token_eof:
ostr << "EOF\n";
break;
case _S_token_equiv_class_name:
ostr << "equiv-class-name \"" << _M_curValue << "\"\n";
break;
case _S_token_interval_begin:
ostr << "interval begin\n";
break;
case _S_token_interval_end:
ostr << "interval end\n";
break;
case _S_token_line_begin:
ostr << "line begin\n";
break;
case _S_token_line_end:
ostr << "line end\n";
break;
case _S_token_opt:
ostr << "opt\n";
break;
case _S_token_or:
ostr << "or\n";
break;
case _S_token_ord_char:
ostr << "ordinary character: \"" << _M_value() << "\"\n";
break;
case _S_token_subexpr_begin:
ostr << "subexpr begin\n";
break;
case _S_token_subexpr_end:
ostr << "subexpr end\n";
break;
case _S_token_word_begin:
ostr << "word begin\n";
break;
case _S_token_word_end:
ostr << "word end\n";
break;
case _S_token_unknown:
ostr << "-- unknown token --\n";
break;
default:
_GLIBCXX_DEBUG_ASSERT(false);
}
return ostr;
}
#endif
/// Builds an NFA from an input iterator interval. /// Builds an NFA from an input iterator interval.
template<typename _InIter, typename _TraitsT> template<typename _InputIter, typename _CharT, typename _TraitsT>
class _Compiler class _Compiler
{ {
public: public:
typedef _InIter _IterT; typedef typename _TraitsT::string_type _StringT;
typedef typename std::iterator_traits<_InIter>::value_type _CharT; typedef _NFA<_CharT, _TraitsT> _RegexT;
typedef std::basic_string<_CharT> _StringT;
typedef regex_constants::syntax_option_type _FlagT; typedef regex_constants::syntax_option_type _FlagT;
_Compiler(const _InIter& __b, const _InIter& __e, _Compiler(_InputIter __b, _InputIter __e,
_TraitsT& __traits, _FlagT __flags); const _TraitsT& __traits, _FlagT __flags);
const _Nfa& std::shared_ptr<_RegexT>
_M_nfa() const _M_get_nfa() const
{ return _M_state_store; } { return std::shared_ptr<_RegexT>(new _RegexT(_M_state_store)); }
private: private:
typedef _Scanner<_InIter> _ScannerT; typedef _Scanner<_InputIter> _ScannerT;
typedef typename _ScannerT::_TokenT _TokenT; typedef typename _ScannerT::_TokenT _TokenT;
typedef std::stack<_StateSeq, std::vector<_StateSeq> > _StackT; typedef _StateSeq<_CharT, _TraitsT> _StateSeqT;
typedef _BracketMatcher<_InIter, _TraitsT> _BMatcherT; typedef std::stack<_StateSeqT, std::vector<_StateSeqT>> _StackT;
typedef _BracketMatcher<_CharT, _TraitsT> _BMatcherT;
// accepts a specific token or returns false. // accepts a specific token or returns false.
bool bool
...@@ -720,345 +303,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -720,345 +303,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
int int
_M_cur_int_value(int __radix); _M_cur_int_value(int __radix);
_TraitsT& _M_traits; const _TraitsT& _M_traits;
_ScannerT _M_scanner; _ScannerT _M_scanner;
_StringT _M_cur_value; _StringT _M_cur_value;
_Nfa _M_state_store; _RegexT _M_state_store;
_StackT _M_stack; _StackT _M_stack;
_FlagT _M_flags; _FlagT _M_flags;
}; };
template<typename _InIter, typename _TraitsT>
_Compiler<_InIter, _TraitsT>::
_Compiler(const _InIter& __b, const _InIter& __e, _TraitsT& __traits,
_Compiler<_InIter, _TraitsT>::_FlagT __flags)
: _M_traits(__traits), _M_scanner(__b, __e, __flags, _M_traits.getloc()),
_M_state_store(__flags), _M_flags(__flags)
{
typedef _StartTagger<_InIter, _TraitsT> _Start;
typedef _EndTagger<_InIter, _TraitsT> _End;
_StateSeq __r(_M_state_store,
_M_state_store._M_insert_subexpr_begin(_Start(0)));
_M_disjunction();
if (!_M_stack.empty())
{
__r._M_append(_M_stack.top());
_M_stack.pop();
}
__r._M_append(_M_state_store._M_insert_subexpr_end(0, _End(0)));
__r._M_append(_M_state_store._M_insert_accept());
}
template<typename _InIter, typename _TraitsT>
bool
_Compiler<_InIter, _TraitsT>::
_M_match_token(_Compiler<_InIter, _TraitsT>::_TokenT token)
{
if (token == _M_scanner._M_token())
{
_M_cur_value = _M_scanner._M_value();
_M_scanner._M_advance();
return true;
}
return false;
}
template<typename _InIter, typename _TraitsT>
void
_Compiler<_InIter, _TraitsT>::
_M_disjunction()
{
this->_M_alternative();
if (_M_match_token(_ScannerT::_S_token_or))
{
_StateSeq __alt1 = _M_stack.top(); _M_stack.pop();
this->_M_disjunction();
_StateSeq __alt2 = _M_stack.top(); _M_stack.pop();
_M_stack.push(_StateSeq(__alt1, __alt2));
}
}
template<typename _InIter, typename _TraitsT>
void
_Compiler<_InIter, _TraitsT>::
_M_alternative()
{
if (this->_M_term())
{
_StateSeq __re = _M_stack.top(); _M_stack.pop();
this->_M_alternative();
if (!_M_stack.empty())
{
__re._M_append(_M_stack.top());
_M_stack.pop();
}
_M_stack.push(__re);
}
}
template<typename _InIter, typename _TraitsT>
bool
_Compiler<_InIter, _TraitsT>::
_M_term()
{
if (this->_M_assertion())
return true;
if (this->_M_atom())
{
this->_M_quantifier();
return true;
}
return false;
}
template<typename _InIter, typename _TraitsT>
bool
_Compiler<_InIter, _TraitsT>::
_M_assertion()
{
if (_M_match_token(_ScannerT::_S_token_line_begin))
{
// __m.push(_Matcher::_S_opcode_line_begin);
return true;
}
if (_M_match_token(_ScannerT::_S_token_line_end))
{
// __m.push(_Matcher::_S_opcode_line_end);
return true;
}
if (_M_match_token(_ScannerT::_S_token_word_begin))
{
// __m.push(_Matcher::_S_opcode_word_begin);
return true;
}
if (_M_match_token(_ScannerT::_S_token_word_end))
{
// __m.push(_Matcher::_S_opcode_word_end);
return true;
}
return false;
}
template<typename _InIter, typename _TraitsT>
void
_Compiler<_InIter, _TraitsT>::
_M_quantifier()
{
if (_M_match_token(_ScannerT::_S_token_closure0))
{
if (_M_stack.empty())
__throw_regex_error(regex_constants::error_badrepeat);
_StateSeq __r(_M_stack.top(), -1);
__r._M_append(__r._M_front());
_M_stack.pop();
_M_stack.push(__r);
return;
}
if (_M_match_token(_ScannerT::_S_token_closure1))
{
if (_M_stack.empty())
__throw_regex_error(regex_constants::error_badrepeat);
_StateSeq __r(_M_state_store,
_M_state_store.
_M_insert_alt(_S_invalid_state_id,
_M_stack.top()._M_front()));
_M_stack.top()._M_append(__r);
return;
}
if (_M_match_token(_ScannerT::_S_token_opt))
{
if (_M_stack.empty())
__throw_regex_error(regex_constants::error_badrepeat);
_StateSeq __r(_M_stack.top(), -1);
_M_stack.pop();
_M_stack.push(__r);
return;
}
if (_M_match_token(_ScannerT::_S_token_interval_begin))
{
if (_M_stack.empty())
__throw_regex_error(regex_constants::error_badrepeat);
if (!_M_match_token(_ScannerT::_S_token_dup_count))
__throw_regex_error(regex_constants::error_badbrace);
_StateSeq __r(_M_stack.top());
int __min_rep = _M_cur_int_value(10);
for (int __i = 1; __i < __min_rep; ++__i)
_M_stack.top()._M_append(__r._M_clone());
if (_M_match_token(_ScannerT::_S_token_comma))
if (_M_match_token(_ScannerT::_S_token_dup_count))
{
int __n = _M_cur_int_value(10) - __min_rep;
if (__n < 0)
__throw_regex_error(regex_constants::error_badbrace);
for (int __i = 0; __i < __n; ++__i)
{
_StateSeq __r(_M_state_store,
_M_state_store.
_M_insert_alt(_S_invalid_state_id,
_M_stack.top()._M_front()));
_M_stack.top()._M_append(__r);
}
}
else
{
_StateSeq __r(_M_stack.top(), -1);
__r._M_push_back(__r._M_front());
_M_stack.pop();
_M_stack.push(__r);
}
if (!_M_match_token(_ScannerT::_S_token_interval_end))
__throw_regex_error(regex_constants::error_brace);
return;
}
}
template<typename _InIter, typename _TraitsT>
bool
_Compiler<_InIter, _TraitsT>::
_M_atom()
{
typedef _CharMatcher<_InIter, _TraitsT> _CMatcher;
typedef _StartTagger<_InIter, _TraitsT> _Start;
typedef _EndTagger<_InIter, _TraitsT> _End;
if (_M_match_token(_ScannerT::_S_token_anychar))
{
_M_stack.push(_StateSeq(_M_state_store,
_M_state_store._M_insert_matcher
(_AnyMatcher)));
return true;
}
if (_M_match_token(_ScannerT::_S_token_ord_char))
{
_M_stack.push(_StateSeq(_M_state_store,
_M_state_store._M_insert_matcher
(_CMatcher(_M_cur_value[0], _M_flags, _M_traits))));
return true;
}
if (_M_match_token(_ScannerT::_S_token_backref))
{
// __m.push(_Matcher::_S_opcode_ordchar, _M_cur_value);
_M_state_store._M_set_back_ref(true);
//return true;
}
if (_M_match_token(_ScannerT::_S_token_subexpr_begin))
{
int __mark = _M_state_store._M_sub_count();
_StateSeq __r(_M_state_store,
_M_state_store.
_M_insert_subexpr_begin(_Start(__mark)));
this->_M_disjunction();
if (!_M_match_token(_ScannerT::_S_token_subexpr_end))
__throw_regex_error(regex_constants::error_paren);
if (!_M_stack.empty())
{
__r._M_append(_M_stack.top());
_M_stack.pop();
}
__r._M_append(_M_state_store._M_insert_subexpr_end
(__mark, _End(__mark)));
_M_stack.push(__r);
return true;
}
return _M_bracket_expression();
}
template<typename _InIter, typename _TraitsT>
bool
_Compiler<_InIter, _TraitsT>::
_M_bracket_expression()
{
bool __inverse =
_M_match_token(_ScannerT::_S_token_bracket_inverse_begin);
if (!(__inverse || _M_match_token(_ScannerT::_S_token_bracket_begin)))
return false;
_BMatcherT __matcher( __inverse, _M_flags, _M_traits);
// special case: only if _not_ chr first after
// '[' or '[^' or if ECMAscript
if (!_M_bracket_list(__matcher) // list is empty
&& !(_M_flags & regex_constants::ECMAScript))
__throw_regex_error(regex_constants::error_brack);
_M_stack.push(_StateSeq(_M_state_store,
_M_state_store._M_insert_matcher(__matcher)));
return true;
}
template<typename _InIter, typename _TraitsT>
bool // list is non-empty
_Compiler<_InIter, _TraitsT>::
_M_bracket_list(_BMatcherT& __matcher)
{
if (_M_match_token(_ScannerT::_S_token_bracket_end))
return false;
_M_expression_term(__matcher);
_M_bracket_list(__matcher);
return true;
}
template<typename _InIter, typename _TraitsT>
void
_Compiler<_InIter, _TraitsT>::
_M_expression_term(_BMatcherT& __matcher)
{
if (_M_match_token(_ScannerT::_S_token_collsymbol))
{
__matcher._M_add_collating_element(_M_cur_value);
return;
}
if (_M_match_token(_ScannerT::_S_token_equiv_class_name))
{
__matcher._M_add_equivalence_class(_M_cur_value);
return;
}
if (_M_match_token(_ScannerT::_S_token_char_class_name))
{
__matcher._M_add_character_class(_M_cur_value);
return;
}
if (_M_match_token(_ScannerT::_S_token_collelem_single)) // [a
{
auto __ch = _M_cur_value[0];
if (_M_match_token(_ScannerT::_S_token_dash)) // [a-
{
// If the dash is the last character in the bracket expression,
// it is not special.
if (_M_scanner._M_token() == _ScannerT::_S_token_bracket_end)
__matcher._M_add_char(_M_cur_value[0]); // [a-] <=> [a\-]
else // [a-z]
{
if (!_M_match_token(_ScannerT::_S_token_collelem_single))
__throw_regex_error(regex_constants::error_range);
__matcher._M_make_range(__ch, _M_cur_value[0]);
}
}
else // [a]
__matcher._M_add_char(__ch);
return;
}
__throw_regex_error(regex_constants::error_brack);
}
template<typename _InIter, typename _TraitsT>
int
_Compiler<_InIter, _TraitsT>::
_M_cur_int_value(int __radix)
{
int __v = 0;
for (typename _StringT::size_type __i = 0;
__i < _M_cur_value.length(); ++__i)
__v =__v * __radix + _M_traits.value(_M_cur_value[__i], __radix);
return __v;
}
template<typename _InIter, typename _TraitsT>
_AutomatonPtr
__compile(const _InIter& __b, const _InIter& __e, _TraitsT& __t,
regex_constants::syntax_option_type __f)
{ return _AutomatonPtr(new _Nfa(_Compiler<_InIter, _TraitsT>(__b, __e, __t,
__f)._M_nfa())); }
//@} regex-detail //@} regex-detail
_GLIBCXX_END_NAMESPACE_VERSION _GLIBCXX_END_NAMESPACE_VERSION
} // namespace __detail } // namespace __detail
} // namespace std } // namespace std
#include <bits/regex_compiler.tcc>
// class template regex -*- C++ -*-
// Copyright (C) 2013 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// Under Section 7 of GPL version 3, you are granted additional
// permissions described in the GCC Runtime Library Exception, version
// 3.1, as published by the Free Software Foundation.
// You should have received a copy of the GNU General Public License and
// a copy of the GCC Runtime Library Exception along with this program;
// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
// <http://www.gnu.org/licenses/>.
/**
* @file bits/regex_compiler.tcc
* This is an internal header file, included by other library headers.
* Do not attempt to use it directly. @headername{regex}
*/
namespace std _GLIBCXX_VISIBILITY(default)
{
namespace __detail
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION
template<typename _BiIter>
void
_Scanner<_BiIter>::
_M_advance()
{
if (_M_current == _M_end)
{
_M_curToken = _S_token_eof;
return;
}
_CharT __c = *_M_current;
if (_M_state & _S_state_in_bracket)
{
_M_scan_in_bracket();
return;
}
if (_M_state & _S_state_in_brace)
{
_M_scan_in_brace();
return;
}
#if 0
// TODO: re-enable line anchors when _M_assertion is implemented.
// See PR libstdc++/47724
else if (_M_state & _S_state_at_start && __c == _M_ctype.widen('^'))
{
_M_curToken = _S_token_line_begin;
++_M_current;
return;
}
else if (__c == _M_ctype.widen('$'))
{
_M_curToken = _S_token_line_end;
++_M_current;
return;
}
#endif
else if (__c == _M_ctype.widen('.'))
{
_M_curToken = _S_token_anychar;
++_M_current;
return;
}
else if (__c == _M_ctype.widen('*'))
{
_M_curToken = _S_token_closure0;
++_M_current;
return;
}
else if (__c == _M_ctype.widen('+'))
{
_M_curToken = _S_token_closure1;
++_M_current;
return;
}
else if (__c == _M_ctype.widen('|'))
{
_M_curToken = _S_token_or;
++_M_current;
return;
}
else if (__c == _M_ctype.widen('['))
{
if (*++_M_current == _M_ctype.widen('^'))
{
_M_curToken = _S_token_bracket_inverse_begin;
++_M_current;
}
else
_M_curToken = _S_token_bracket_begin;
_M_state |= _S_state_in_bracket;
return;
}
else if (__c == _M_ctype.widen('\\'))
{
_M_eat_escape();
return;
}
else if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
{
if (__c == _M_ctype.widen('('))
{
_M_curToken = _S_token_subexpr_begin;
++_M_current;
return;
}
else if (__c == _M_ctype.widen(')'))
{
_M_curToken = _S_token_subexpr_end;
++_M_current;
return;
}
else if (__c == _M_ctype.widen('{'))
{
_M_curToken = _S_token_interval_begin;
_M_state |= _S_state_in_brace;
++_M_current;
return;
}
}
_M_curToken = _S_token_ord_char;
_M_curValue.assign(1, __c);
++_M_current;
}
template<typename _BiIter>
void
_Scanner<_BiIter>::
_M_scan_in_brace()
{
if (_M_ctype.is(_CtypeT::digit, *_M_current))
{
_M_curToken = _S_token_dup_count;
_M_curValue.assign(1, *_M_current);
++_M_current;
while (_M_current != _M_end
&& _M_ctype.is(_CtypeT::digit, *_M_current))
{
_M_curValue += *_M_current;
++_M_current;
}
return;
}
else if (*_M_current == _M_ctype.widen(','))
{
_M_curToken = _S_token_comma;
++_M_current;
return;
}
if (_M_flags & (regex_constants::basic | regex_constants::grep))
{
if (*_M_current == _M_ctype.widen('\\'))
_M_eat_escape();
}
else
{
if (*_M_current == _M_ctype.widen('}'))
{
_M_curToken = _S_token_interval_end;
_M_state &= ~_S_state_in_brace;
++_M_current;
return;
}
}
}
template<typename _BiIter>
void
_Scanner<_BiIter>::
_M_scan_in_bracket()
{
if (*_M_current == _M_ctype.widen('['))
{
++_M_current;
if (_M_current == _M_end)
{
_M_curToken = _S_token_eof;
return;
}
if (*_M_current == _M_ctype.widen('.'))
{
_M_curToken = _S_token_collsymbol;
_M_eat_collsymbol();
return;
}
else if (*_M_current == _M_ctype.widen(':'))
{
_M_curToken = _S_token_char_class_name;
_M_eat_charclass();
return;
}
else if (*_M_current == _M_ctype.widen('='))
{
_M_curToken = _S_token_equiv_class_name;
_M_eat_equivclass();
return;
}
}
else if (*_M_current == _M_ctype.widen('-'))
{
_M_curToken = _S_token_dash;
++_M_current;
return;
}
else if (*_M_current == _M_ctype.widen(']'))
{
_M_curToken = _S_token_bracket_end;
_M_state &= ~_S_state_in_bracket;
++_M_current;
return;
}
else if (*_M_current == _M_ctype.widen('\\'))
{
_M_eat_escape();
return;
}
_M_curToken = _S_token_collelem_single;
_M_curValue.assign(1, *_M_current);
++_M_current;
}
// TODO Complete it.
template<typename _BiIter>
void
_Scanner<_BiIter>::
_M_eat_escape()
{
++_M_current;
if (_M_current == _M_end)
{
_M_curToken = _S_token_eof;
return;
}
_CharT __c = *_M_current;
++_M_current;
if (__c == _M_ctype.widen('('))
{
if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
{
_M_curToken = _S_token_ord_char;
_M_curValue.assign(1, __c);
}
else
_M_curToken = _S_token_subexpr_begin;
}
else if (__c == _M_ctype.widen(')'))
{
if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
{
_M_curToken = _S_token_ord_char;
_M_curValue.assign(1, __c);
}
else
_M_curToken = _S_token_subexpr_end;
}
else if (__c == _M_ctype.widen('{'))
{
if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
{
_M_curToken = _S_token_ord_char;
_M_curValue.assign(1, __c);
}
else
{
_M_curToken = _S_token_interval_begin;
_M_state |= _S_state_in_brace;
}
}
else if (__c == _M_ctype.widen('}'))
{
if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
{
_M_curToken = _S_token_ord_char;
_M_curValue.assign(1, __c);
}
else
{
if (!(_M_state && _S_state_in_brace))
__throw_regex_error(regex_constants::error_badbrace);
_M_state &= ~_S_state_in_brace;
_M_curToken = _S_token_interval_end;
}
}
else if (__c == _M_ctype.widen('x'))
{
++_M_current;
if (_M_current == _M_end)
{
_M_curToken = _S_token_eof;
return;
}
if (_M_ctype.is(_CtypeT::digit, *_M_current))
{
_M_curValue.assign(1, *_M_current);
++_M_current;
if (_M_current == _M_end)
{
_M_curToken = _S_token_eof;
return;
}
if (_M_ctype.is(_CtypeT::digit, *_M_current))
{
_M_curValue += *_M_current;
++_M_current;
return;
}
}
}
else if (__c == _M_ctype.widen('^')
|| __c == _M_ctype.widen('.')
|| __c == _M_ctype.widen('*')
|| __c == _M_ctype.widen('$')
|| __c == _M_ctype.widen('\\'))
{
_M_curToken = _S_token_ord_char;
_M_curValue.assign(1, __c);
}
else if (_M_ctype.is(_CtypeT::digit, __c))
{
_M_curToken = _S_token_backref;
_M_curValue.assign(1, __c);
}
else if (_M_state & _S_state_in_bracket)
{
if (__c == _M_ctype.widen('-')
|| __c == _M_ctype.widen('[')
|| __c == _M_ctype.widen(']'))
{
_M_curToken = _S_token_ord_char;
_M_curValue.assign(1, __c);
}
else if ((_M_flags & regex_constants::ECMAScript)
&& __c == _M_ctype.widen('b'))
{
_M_curToken = _S_token_ord_char;
_M_curValue.assign(1, _M_ctype.widen(' '));
}
else
__throw_regex_error(regex_constants::error_escape);
}
else
__throw_regex_error(regex_constants::error_escape);
}
// Eats a character class or throwns an exception.
// current point to ':' delimiter on entry, char after ']' on return
template<typename _BiIter>
void
_Scanner<_BiIter>::
_M_eat_charclass()
{
++_M_current; // skip ':'
if (_M_current == _M_end)
__throw_regex_error(regex_constants::error_ctype);
for (_M_curValue.clear();
_M_current != _M_end && *_M_current != _M_ctype.widen(':');
++_M_current)
_M_curValue += *_M_current;
if (_M_current == _M_end)
__throw_regex_error(regex_constants::error_ctype);
++_M_current; // skip ':'
if (*_M_current != _M_ctype.widen(']'))
__throw_regex_error(regex_constants::error_ctype);
++_M_current; // skip ']'
}
template<typename _BiIter>
void
_Scanner<_BiIter>::
_M_eat_equivclass()
{
++_M_current; // skip '='
if (_M_current == _M_end)
__throw_regex_error(regex_constants::error_collate);
for (_M_curValue.clear();
_M_current != _M_end && *_M_current != _M_ctype.widen('=');
++_M_current)
_M_curValue += *_M_current;
if (_M_current == _M_end)
__throw_regex_error(regex_constants::error_collate);
++_M_current; // skip '='
if (*_M_current != _M_ctype.widen(']'))
__throw_regex_error(regex_constants::error_collate);
++_M_current; // skip ']'
}
template<typename _BiIter>
void
_Scanner<_BiIter>::
_M_eat_collsymbol()
{
++_M_current; // skip '.'
if (_M_current == _M_end)
__throw_regex_error(regex_constants::error_collate);
for (_M_curValue.clear();
_M_current != _M_end && *_M_current != _M_ctype.widen('.');
++_M_current)
_M_curValue += *_M_current;
if (_M_current == _M_end)
__throw_regex_error(regex_constants::error_collate);
++_M_current; // skip '.'
if (*_M_current != _M_ctype.widen(']'))
__throw_regex_error(regex_constants::error_collate);
++_M_current; // skip ']'
}
#ifdef _GLIBCXX_DEBUG
template<typename _BiIter>
std::ostream&
_Scanner<_BiIter>::
_M_print(std::ostream& ostr)
{
switch (_M_curToken)
{
case _S_token_anychar:
ostr << "any-character\n";
break;
case _S_token_backref:
ostr << "backref\n";
break;
case _S_token_bracket_begin:
ostr << "bracket-begin\n";
break;
case _S_token_bracket_inverse_begin:
ostr << "bracket-inverse-begin\n";
break;
case _S_token_bracket_end:
ostr << "bracket-end\n";
break;
case _S_token_char_class_name:
ostr << "char-class-name \"" << _M_curValue << "\"\n";
break;
case _S_token_closure0:
ostr << "closure0\n";
break;
case _S_token_closure1:
ostr << "closure1\n";
break;
case _S_token_collelem_multi:
ostr << "coll-elem-multi \"" << _M_curValue << "\"\n";
break;
case _S_token_collelem_single:
ostr << "coll-elem-single \"" << _M_curValue << "\"\n";
break;
case _S_token_collsymbol:
ostr << "collsymbol \"" << _M_curValue << "\"\n";
break;
case _S_token_comma:
ostr << "comma\n";
break;
case _S_token_dash:
ostr << "dash\n";
break;
case _S_token_dup_count:
ostr << "dup count: " << _M_curValue << "\n";
break;
case _S_token_eof:
ostr << "EOF\n";
break;
case _S_token_equiv_class_name:
ostr << "equiv-class-name \"" << _M_curValue << "\"\n";
break;
case _S_token_interval_begin:
ostr << "interval begin\n";
break;
case _S_token_interval_end:
ostr << "interval end\n";
break;
case _S_token_line_begin:
ostr << "line begin\n";
break;
case _S_token_line_end:
ostr << "line end\n";
break;
case _S_token_opt:
ostr << "opt\n";
break;
case _S_token_or:
ostr << "or\n";
break;
case _S_token_ord_char:
ostr << "ordinary character: \"" << _M_value() << "\"\n";
break;
case _S_token_subexpr_begin:
ostr << "subexpr begin\n";
break;
case _S_token_subexpr_end:
ostr << "subexpr end\n";
break;
case _S_token_word_begin:
ostr << "word begin\n";
break;
case _S_token_word_end:
ostr << "word end\n";
break;
case _S_token_unknown:
ostr << "-- unknown token --\n";
break;
default:
_GLIBCXX_DEBUG_ASSERT(false);
}
return ostr;
}
#endif
template<typename _InputIter, typename _CharT, typename _TraitsT>
_Compiler<_InputIter, _CharT, _TraitsT>::
_Compiler(_InputIter __b, _InputIter __e,
const _TraitsT& __traits, _FlagT __flags)
: _M_traits(__traits), _M_scanner(__b, __e, __flags, _M_traits.getloc()),
_M_state_store(__flags), _M_flags(__flags)
{
_StateSeqT __r(_M_state_store,
_M_state_store._M_insert_subexpr_begin());
_M_disjunction();
if (!_M_stack.empty())
{
__r._M_append(_M_stack.top());
_M_stack.pop();
}
__r._M_append(_M_state_store._M_insert_subexpr_end());
__r._M_append(_M_state_store._M_insert_accept());
}
template<typename _InputIter, typename _CharT, typename _TraitsT>
bool
_Compiler<_InputIter, _CharT, _TraitsT>::
_M_match_token(_Compiler<_InputIter, _CharT, _TraitsT>::_TokenT token)
{
if (token == _M_scanner._M_token())
{
_M_cur_value = _M_scanner._M_value();
_M_scanner._M_advance();
return true;
}
return false;
}
template<typename _InputIter, typename _CharT, typename _TraitsT>
void
_Compiler<_InputIter, _CharT, _TraitsT>::
_M_disjunction()
{
this->_M_alternative();
if (_M_match_token(_ScannerT::_S_token_or))
{
_StateSeqT __alt1 = _M_stack.top(); _M_stack.pop();
this->_M_disjunction();
_StateSeqT __alt2 = _M_stack.top(); _M_stack.pop();
_M_stack.push(_StateSeqT(__alt1, __alt2));
}
}
template<typename _InputIter, typename _CharT, typename _TraitsT>
void
_Compiler<_InputIter, _CharT, _TraitsT>::
_M_alternative()
{
if (this->_M_term())
{
_StateSeqT __re = _M_stack.top(); _M_stack.pop();
this->_M_alternative();
if (!_M_stack.empty())
{
__re._M_append(_M_stack.top());
_M_stack.pop();
}
_M_stack.push(__re);
}
}
template<typename _InputIter, typename _CharT, typename _TraitsT>
bool
_Compiler<_InputIter, _CharT, _TraitsT>::
_M_term()
{
if (this->_M_assertion())
return true;
if (this->_M_atom())
{
this->_M_quantifier();
return true;
}
return false;
}
template<typename _InputIter, typename _CharT, typename _TraitsT>
bool
_Compiler<_InputIter, _CharT, _TraitsT>::
_M_assertion()
{
if (_M_match_token(_ScannerT::_S_token_line_begin))
{
// __m.push(_Matcher::_S_opcode_line_begin);
return true;
}
if (_M_match_token(_ScannerT::_S_token_line_end))
{
// __m.push(_Matcher::_S_opcode_line_end);
return true;
}
if (_M_match_token(_ScannerT::_S_token_word_begin))
{
// __m.push(_Matcher::_S_opcode_word_begin);
return true;
}
if (_M_match_token(_ScannerT::_S_token_word_end))
{
// __m.push(_Matcher::_S_opcode_word_end);
return true;
}
return false;
}
template<typename _InputIter, typename _CharT, typename _TraitsT>
void
_Compiler<_InputIter, _CharT, _TraitsT>::
_M_quantifier()
{
if (_M_match_token(_ScannerT::_S_token_closure0))
{
if (_M_stack.empty())
__throw_regex_error(regex_constants::error_badrepeat);
_StateSeqT __r(_M_stack.top(), -1);
__r._M_append(__r._M_front());
_M_stack.pop();
_M_stack.push(__r);
return;
}
if (_M_match_token(_ScannerT::_S_token_closure1))
{
if (_M_stack.empty())
__throw_regex_error(regex_constants::error_badrepeat);
_StateSeqT __r(_M_state_store,
_M_state_store.
_M_insert_alt(_S_invalid_state_id,
_M_stack.top()._M_front()));
_M_stack.top()._M_append(__r);
return;
}
if (_M_match_token(_ScannerT::_S_token_opt))
{
if (_M_stack.empty())
__throw_regex_error(regex_constants::error_badrepeat);
_StateSeqT __r(_M_stack.top(), -1);
_M_stack.pop();
_M_stack.push(__r);
return;
}
if (_M_match_token(_ScannerT::_S_token_interval_begin))
{
if (_M_stack.empty())
__throw_regex_error(regex_constants::error_badrepeat);
if (!_M_match_token(_ScannerT::_S_token_dup_count))
__throw_regex_error(regex_constants::error_badbrace);
_StateSeqT __r(_M_stack.top());
int __min_rep = _M_cur_int_value(10);
for (int __i = 1; __i < __min_rep; ++__i)
_M_stack.top()._M_append(__r._M_clone());
if (_M_match_token(_ScannerT::_S_token_comma))
if (_M_match_token(_ScannerT::_S_token_dup_count))
{
int __n = _M_cur_int_value(10) - __min_rep;
if (__n < 0)
__throw_regex_error(regex_constants::error_badbrace);
for (int __i = 0; __i < __n; ++__i)
{
_StateSeqT __r(_M_state_store,
_M_state_store.
_M_insert_alt(_S_invalid_state_id,
_M_stack.top()._M_front()));
_M_stack.top()._M_append(__r);
}
}
else
{
_StateSeqT __r(_M_stack.top(), -1);
__r._M_push_back(__r._M_front());
_M_stack.pop();
_M_stack.push(__r);
}
if (!_M_match_token(_ScannerT::_S_token_interval_end))
__throw_regex_error(regex_constants::error_brace);
return;
}
}
template<typename _InputIter, typename _CharT, typename _TraitsT>
bool
_Compiler<_InputIter, _CharT, _TraitsT>::
_M_atom()
{
if (_M_match_token(_ScannerT::_S_token_anychar))
{
const static auto&
__any_matcher = [](_CharT) -> bool
{ return true; };
_M_stack.push(_StateSeqT(_M_state_store,
_M_state_store._M_insert_matcher
(__any_matcher)));
return true;
}
if (_M_match_token(_ScannerT::_S_token_ord_char))
{
auto __c = _M_cur_value[0];
__detail::_Matcher<_CharT> f;
if (_M_flags & regex_constants::icase)
{
auto __traits = this->_M_traits;
__c = __traits.translate_nocase(__c);
f = [__traits, __c](_CharT __ch) -> bool
{ return __traits.translate_nocase(__ch) == __c; };
}
else
f = [__c](_CharT __ch) -> bool
{ return __ch == __c; };
_M_stack.push(_StateSeqT(_M_state_store,
_M_state_store._M_insert_matcher(f)));
return true;
}
if (_M_match_token(_ScannerT::_S_token_backref))
{
// __m.push(_Matcher::_S_opcode_ordchar, _M_cur_value);
_M_state_store._M_set_backref(true);
//return true;
}
if (_M_match_token(_ScannerT::_S_token_subexpr_begin))
{
int __mark = _M_state_store._M_sub_count();
_StateSeqT __r(_M_state_store,
_M_state_store.
_M_insert_subexpr_begin());
this->_M_disjunction();
if (!_M_match_token(_ScannerT::_S_token_subexpr_end))
__throw_regex_error(regex_constants::error_paren);
if (!_M_stack.empty())
{
__r._M_append(_M_stack.top());
_M_stack.pop();
}
__r._M_append(_M_state_store._M_insert_subexpr_end());
_M_stack.push(__r);
return true;
}
return _M_bracket_expression();
}
template<typename _InputIter, typename _CharT, typename _TraitsT>
bool
_Compiler<_InputIter, _CharT, _TraitsT>::
_M_bracket_expression()
{
bool __inverse =
_M_match_token(_ScannerT::_S_token_bracket_inverse_begin);
if (!(__inverse || _M_match_token(_ScannerT::_S_token_bracket_begin)))
return false;
_BMatcherT __matcher( __inverse, _M_traits, _M_flags);
// special case: only if _not_ chr first after
// '[' or '[^' or if ECMAscript
if (!_M_bracket_list(__matcher) // list is empty
&& !(_M_flags & regex_constants::ECMAScript))
__throw_regex_error(regex_constants::error_brack);
_M_stack.push(_StateSeqT(_M_state_store,
_M_state_store._M_insert_matcher(__matcher)));
return true;
}
template<typename _InputIter, typename _CharT, typename _TraitsT>
bool // list is non-empty
_Compiler<_InputIter, _CharT, _TraitsT>::
_M_bracket_list(_BMatcherT& __matcher)
{
if (_M_match_token(_ScannerT::_S_token_bracket_end))
return false;
_M_expression_term(__matcher);
_M_bracket_list(__matcher);
return true;
}
template<typename _InputIter, typename _CharT, typename _TraitsT>
void
_Compiler<_InputIter, _CharT, _TraitsT>::
_M_expression_term(_BMatcherT& __matcher)
{
if (_M_match_token(_ScannerT::_S_token_collsymbol))
{
__matcher._M_add_collating_element(_M_cur_value);
return;
}
if (_M_match_token(_ScannerT::_S_token_equiv_class_name))
{
__matcher._M_add_equivalence_class(_M_cur_value);
return;
}
if (_M_match_token(_ScannerT::_S_token_char_class_name))
{
__matcher._M_add_character_class(_M_cur_value);
return;
}
if (_M_match_token(_ScannerT::_S_token_collelem_single)) // [a
{
auto __ch = _M_cur_value[0];
if (_M_match_token(_ScannerT::_S_token_dash)) // [a-
{
// If the dash is the last character in the bracket expression,
// it is not special.
if (_M_scanner._M_token() == _ScannerT::_S_token_bracket_end)
__matcher._M_add_char(_M_cur_value[0]); // [a-] <=> [a\-]
else // [a-z]
{
if (!_M_match_token(_ScannerT::_S_token_collelem_single))
__throw_regex_error(regex_constants::error_range);
__matcher._M_make_range(__ch, _M_cur_value[0]);
}
}
else // [a]
__matcher._M_add_char(__ch);
return;
}
__throw_regex_error(regex_constants::error_brack);
}
template<typename _InputIter, typename _CharT, typename _TraitsT>
int
_Compiler<_InputIter, _CharT, _TraitsT>::
_M_cur_int_value(int __radix)
{
int __v = 0;
for (typename _StringT::size_type __i = 0;
__i < _M_cur_value.length(); ++__i)
__v =__v * __radix + _M_traits.value(_M_cur_value[__i], __radix);
return __v;
}
template<typename _CharT, typename _TraitsT>
bool _BracketMatcher<_CharT, _TraitsT>::
operator()(_CharT __ch) const
{
auto __oldch = __ch;
if (_M_flags & regex_constants::collate)
if (_M_is_icase())
__ch = _M_traits.translate_nocase(__ch);
else
__ch = _M_traits.translate(__ch);
bool __ret = false;
for (auto __c : _M_char_set)
if (__c == __ch)
{
__ret = true;
break;
}
if (!__ret && _M_traits.isctype(__oldch, _M_class_set))
__ret = true;
else
{
_StringT __s = _M_get_str(__ch);
for (auto& __it : _M_range_set)
if (__it.first <= __s && __s <= __it.second)
{
__ret = true;
break;
}
}
if (_M_is_non_matching)
__ret = !__ret;
return __ret;
}
_GLIBCXX_END_NAMESPACE_VERSION
} // namespace __detail
} // namespace
// class template regex -*- C++ -*-
// Copyright (C) 2010-2013 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// Under Section 7 of GPL version 3, you are granted additional
// permissions described in the GCC Runtime Library Exception, version
// 3.1, as published by the Free Software Foundation.
// You should have received a copy of the GNU General Public License and
// a copy of the GCC Runtime Library Exception along with this program;
// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
// <http://www.gnu.org/licenses/>.
/**
* @file bits/regex_cursor.h
* This is an internal header file, included by other library headers.
* Do not attempt to use it directly. @headername{regex}
*/
namespace std _GLIBCXX_VISIBILITY(default)
{
namespace __detail
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION
/**
* @defgroup regex-detail Base and Implementation Classes
* @ingroup regex
* @{
*/
/// ABC for pattern matching
struct _PatternCursor
{
virtual ~_PatternCursor() { };
virtual void _M_next() = 0;
virtual void _M_prev() = 0;
virtual bool _M_at_end() const = 0;
};
/// Provides a cursor into the specific target string.
template<typename _FwdIterT>
class _SpecializedCursor
: public _PatternCursor
{
public:
_SpecializedCursor(const _FwdIterT& __b, const _FwdIterT __e)
: _M_b(__b), _M_c(__b), _M_e(__e)
{ }
typename std::iterator_traits<_FwdIterT>::value_type
_M_current() const
{ return *_M_c; }
void
_M_next()
{ ++_M_c; }
void
_M_prev()
{ --_M_c; }
_FwdIterT
_M_pos() const
{ return _M_c; }
const _FwdIterT&
_M_begin() const
{ return _M_b; }
const _FwdIterT&
_M_end() const
{ return _M_e; }
bool
_M_at_end() const
{ return _M_c == _M_e; }
private:
_FwdIterT _M_b;
_FwdIterT _M_c;
_FwdIterT _M_e;
};
// Helper function to create a cursor specialized for an iterator class.
template<typename _FwdIterT>
inline _SpecializedCursor<_FwdIterT>
__cursor(const _FwdIterT& __b, const _FwdIterT __e)
{ return _SpecializedCursor<_FwdIterT>(__b, __e); }
//@} regex-detail
_GLIBCXX_END_NAMESPACE_VERSION
} // namespace __detail
} // namespace
// class template regex -*- C++ -*-
// Copyright (C) 2013 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// Under Section 7 of GPL version 3, you are granted additional
// permissions described in the GCC Runtime Library Exception, version
// 3.1, as published by the Free Software Foundation.
// You should have received a copy of the GNU General Public License and
// a copy of the GCC Runtime Library Exception along with this program;
// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
// <http://www.gnu.org/licenses/>.
/**
* @file bits/regex_executor.h
* This is an internal header file, included by other library headers.
* Do not attempt to use it directly. @headername{regex}
*/
namespace std _GLIBCXX_VISIBILITY(default)
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION
template<typename, typename>
class basic_regex;
template<typename, typename>
class match_results;
_GLIBCXX_END_NAMESPACE_VERSION
namespace __detail
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION
/**
* @addtogroup regex-detail
* @{
*/
template<typename _BiIter, typename _Alloc,
typename _CharT, typename _TraitsT>
class _Executor
{
public:
typedef match_results<_BiIter, _Alloc> _ResultsT;
typedef regex_constants::match_flag_type _FlagT;
virtual
~_Executor()
{ }
// Set matched when string exactly match the pattern.
virtual bool
_M_match() = 0;
// Set matched when some prefix of the string matches the pattern.
virtual bool
_M_search_from_first() = 0;
protected:
typedef typename _NFA<_CharT, _TraitsT>::_SizeT _SizeT;
_Executor(_BiIter __begin,
_BiIter __end,
_ResultsT& __results,
_FlagT __flags,
_SizeT __size)
: _M_current(__begin), _M_end(__end),
_M_results(__results), _M_flags(__flags)
{
__results.resize(__size + 2);
for (auto __it : __results)
__it.matched = false;
}
_BiIter _M_current;
_BiIter _M_end;
_ResultsT& _M_results;
_FlagT _M_flags;
};
template<typename _BiIter, typename _Alloc,
typename _CharT, typename _TraitsT>
class _DFSExecutor
: public _Executor<_BiIter, _Alloc, _CharT, _TraitsT>
{
public:
typedef _Executor<_BiIter, _Alloc, _CharT, _TraitsT> _BaseT;
typedef _NFA<_CharT, _TraitsT> _RegexT;
typedef typename _BaseT::_ResultsT _ResultsT;
typedef regex_constants::match_flag_type _FlagT;
_DFSExecutor(_BiIter __begin,
_BiIter __end,
_ResultsT& __results,
const _RegexT& __nfa,
_FlagT __flags)
: _BaseT(__begin, __end, __results, __flags, __nfa._M_sub_count()),
_M_nfa(__nfa)
{ }
bool
_M_match()
{ return _M_dfs<true>(_M_nfa._M_start()); }
bool
_M_search_from_first()
{ return _M_dfs<false>(_M_nfa._M_start()); }
private:
template<bool __match_mode>
bool
_M_dfs(_StateIdT __i);
const _RegexT& _M_nfa;
};
// It's essentially a variant of Single-Source-Shortest-Path problem, where,
// the matching results is the final distance and should be minimized.
// Instead of using Dijkstra Algorithm, I pick up the queue-optimizaed
// (BFS-like) Bellman-Ford algorithm,
// SPFA(http://en.wikipedia.org/wiki/Shortest_Path_Faster_Algorithm).
//
// Every entry of _M_covered saves the solution(grouping status) for every
// matching head. When states transfer, solutions will be compared and
// deduplicated(based on which greedy mode we have).
//
// Time complexity: O(_M_str_cur.size() * _M_nfa.size())
// Space complexity: O(_M_nfa.size() * _M_nfa.mark_count())
template<typename _BiIter, typename _Alloc,
typename _CharT, typename _TraitsT>
class _BFSExecutor
: public _Executor<_BiIter, _Alloc, _CharT, _TraitsT>
{
public:
typedef _Executor<_BiIter, _Alloc, _CharT, _TraitsT> _BaseT;
typedef _NFA<_CharT, _TraitsT> _RegexT;
typedef typename _BaseT::_ResultsT _ResultsT;
typedef std::unique_ptr<_ResultsT> _ResultsPtr;
typedef regex_constants::match_flag_type _FlagT;
_BFSExecutor(_BiIter __begin,
_BiIter __end,
_ResultsT& __results,
const _RegexT& __nfa,
_FlagT __flags)
: _BaseT(__begin, __end, __results, __flags, __nfa._M_sub_count()),
_M_nfa(__nfa)
{
if (_M_nfa._M_start() != _S_invalid_state_id)
_M_covered[_M_nfa._M_start()] =
_ResultsPtr(new _ResultsT(this->_M_results));
_M_e_closure();
}
bool
_M_match()
{ return _M_main_loop<true>(); }
bool
_M_search_from_first()
{ return _M_main_loop<false>(); }
private:
template<bool __match_mode>
bool
_M_main_loop();
void
_M_e_closure();
void
_M_move();
bool
_M_match_less_than(_StateIdT __u, _StateIdT __v) const;
bool
_M_includes_some() const;
std::map<_StateIdT, _ResultsPtr> _M_covered;
const _RegexT& _M_nfa;
};
//@} regex-detail
_GLIBCXX_END_NAMESPACE_VERSION
} // namespace __detail
} // namespace std
#include <bits/regex_executor.tcc>
// class template regex -*- C++ -*- // class template regex -*- C++ -*-
// Copyright (C) 2010-2013 Free Software Foundation, Inc. // Copyright (C) 2013 Free Software Foundation, Inc.
// //
// This file is part of the GNU ISO C++ Library. This library is free // This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the // software; you can redistribute it and/or modify it under the
...@@ -23,13 +23,11 @@ ...@@ -23,13 +23,11 @@
// <http://www.gnu.org/licenses/>. // <http://www.gnu.org/licenses/>.
/** /**
* @file bits/regex_grep_matcher.tcc * @file bits/regex_executor.tcc
* This is an internal header file, included by other library headers. * This is an internal header file, included by other library headers.
* Do not attempt to use it directly. @headername{regex} * Do not attempt to use it directly. @headername{regex}
*/ */
#include <regex>
namespace std _GLIBCXX_VISIBILITY(default) namespace std _GLIBCXX_VISIBILITY(default)
{ {
namespace __detail namespace __detail
...@@ -37,14 +35,19 @@ namespace __detail ...@@ -37,14 +35,19 @@ namespace __detail
_GLIBCXX_BEGIN_NAMESPACE_VERSION _GLIBCXX_BEGIN_NAMESPACE_VERSION
// TODO: This is too slow. Try to compile the NFA to a DFA. // TODO: This is too slow. Try to compile the NFA to a DFA.
template<typename _BiIter, typename _Alloc,
typename _CharT, typename _TraitsT>
template<bool __match_mode> template<bool __match_mode>
bool _DFSMatcher:: bool _DFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
_M_dfs(_StateIdT __i) _M_dfs(_StateIdT __i)
{ {
auto& __current = this->_M_current;
auto& __end = this->_M_end;
auto& __results = this->_M_results;
if (__i == _S_invalid_state_id) if (__i == _S_invalid_state_id)
// This is not that certain. Need deeper investigate. // This is not that certain. Need deeper investigate.
return false; return false;
const auto& __state = (*_M_nfa)[__i]; const auto& __state = _M_nfa[__i];
bool __ret = false; bool __ret = false;
switch (__state._M_opcode) switch (__state._M_opcode)
{ {
...@@ -56,25 +59,25 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -56,25 +59,25 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|| _M_dfs<__match_mode>(__state._M_next); || _M_dfs<__match_mode>(__state._M_next);
break; break;
case _S_opcode_subexpr_begin: case _S_opcode_subexpr_begin:
__state._M_tagger(_M_str_cur, _M_results); __results.at(__state._M_subexpr).first = __current;
__ret = _M_dfs<__match_mode>(__state._M_next); __ret = _M_dfs<__match_mode>(__state._M_next);
break; break;
case _S_opcode_subexpr_end: case _S_opcode_subexpr_end:
__state._M_tagger(_M_str_cur, _M_results);
__ret = _M_dfs<__match_mode>(__state._M_next); __ret = _M_dfs<__match_mode>(__state._M_next);
_M_results._M_set_matched(__state._M_subexpr, __ret); __results.at(__state._M_subexpr).second = __current;
__results.at(__state._M_subexpr).matched = __ret;
break; break;
case _S_opcode_match: case _S_opcode_match:
if (!_M_str_cur._M_at_end() && __state._M_matches(_M_str_cur)) if (__current != __end && __state._M_matches(*__current))
{ {
_M_str_cur._M_next(); ++__current;
__ret = _M_dfs<__match_mode>(__state._M_next); __ret = _M_dfs<__match_mode>(__state._M_next);
_M_str_cur._M_prev(); --__current;
} }
break; break;
case _S_opcode_accept: case _S_opcode_accept:
if (__match_mode) if (__match_mode)
__ret = _M_str_cur._M_at_end(); __ret = __current == __end;
else else
__ret = true; __ret = true;
break; break;
...@@ -84,31 +87,34 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -84,31 +87,34 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return __ret; return __ret;
} }
template<typename _BiIter, typename _Alloc,
typename _CharT, typename _TraitsT>
template<bool __match_mode> template<bool __match_mode>
bool _BFSMatcher:: bool _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
_M_main_loop() _M_main_loop()
{ {
while (!_M_str_cur._M_at_end()) while (this->_M_current != this->_M_end)
{ {
if (!__match_mode) if (!__match_mode)
if (_M_includes_some()) if (_M_includes_some())
return true; return true;
_M_move(); _M_move();
_M_str_cur._M_next(); ++this->_M_current;
_M_e_closure(); _M_e_closure();
} }
return _M_includes_some(); return _M_includes_some();
} }
// The SPFA approach. // The SPFA approach.
// FIXME: move it to src/c++11 when it's stable, and make it not inlined. template<typename _BiIter, typename _Alloc,
inline typename _CharT, typename _TraitsT>
void _BFSMatcher:: void _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
_M_e_closure() _M_e_closure()
{ {
auto& __current = this->_M_current;
std::queue<_StateIdT> __q; std::queue<_StateIdT> __q;
std::vector<bool> __in_q(_M_nfa->size(), false); std::vector<bool> __in_q(_M_nfa.size(), false);
for (auto& __it : _M_current) for (auto& __it : _M_covered)
{ {
__in_q[__it.first] = true; __in_q[__it.first] = true;
__q.push(__it.first); __q.push(__it.first);
...@@ -118,7 +124,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -118,7 +124,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
auto __u = __q.front(); auto __u = __q.front();
__q.pop(); __q.pop();
__in_q[__u] = false; __in_q[__u] = false;
const auto& __state = (*_M_nfa)[__u]; const auto& __state = _M_nfa[__u];
// Can be implemented using method, but there're too much arguments. // Can be implemented using method, but there're too much arguments.
auto __add_visited_state = [&](_StateIdT __v) auto __add_visited_state = [&](_StateIdT __v)
...@@ -127,7 +133,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -127,7 +133,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return; return;
if (_M_match_less_than(__u, __v)) if (_M_match_less_than(__u, __v))
{ {
_M_current[__v] = _M_current[__u]->_M_clone(); _M_covered[__v] = _ResultsPtr(new _ResultsT(*_M_covered[__u]));
// if a state is updated, it's outgoing neighbors should be // if a state is updated, it's outgoing neighbors should be
// reconsidered too. Push them to the queue. // reconsidered too. Push them to the queue.
if (!__in_q[__v]) if (!__in_q[__v])
...@@ -145,12 +151,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -145,12 +151,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__add_visited_state(__state._M_alt); __add_visited_state(__state._M_alt);
break; break;
case _S_opcode_subexpr_begin: case _S_opcode_subexpr_begin:
__state._M_tagger(_M_str_cur, *_M_current[__u]); _M_covered[__u]->at(__state._M_subexpr).first = __current;
__add_visited_state(__state._M_next); __add_visited_state(__state._M_next);
break; break;
case _S_opcode_subexpr_end: case _S_opcode_subexpr_end:
__state._M_tagger(_M_str_cur, *_M_current[__u]); _M_covered[__u]->at(__state._M_subexpr).second = __current;
_M_current[__u]->_M_set_matched(__state._M_subexpr, true); _M_covered[__u]->at(__state._M_subexpr).matched = true;
__add_visited_state(__state._M_next); __add_visited_state(__state._M_next);
break; break;
case _S_opcode_match: case _S_opcode_match:
...@@ -164,44 +170,44 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -164,44 +170,44 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
} }
} }
// FIXME: move it to src/c++11 when it's stable, and make it not inlined. template<typename _BiIter, typename _Alloc,
inline typename _CharT, typename _TraitsT>
void _BFSMatcher:: void _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
_M_move() _M_move()
{ {
decltype(_M_current) __next; decltype(_M_covered) __next;
for (auto& __it : _M_current) for (auto& __it : _M_covered)
{ {
const auto& __state = (*_M_nfa)[__it.first]; const auto& __state = _M_nfa[__it.first];
if (__state._M_opcode == _S_opcode_match if (__state._M_opcode == _S_opcode_match
&& __state._M_matches(_M_str_cur)) && __state._M_matches(*this->_M_current))
if (_M_match_less_than(__it.first, __state._M_next) if (_M_match_less_than(__it.first, __state._M_next)
&& __state._M_next != _S_invalid_state_id) && __state._M_next != _S_invalid_state_id)
__next[__state._M_next] = __it.second->_M_clone(); __next[__state._M_next] = move(__it.second);
} }
_M_current = move(__next); _M_covered = move(__next);
} }
// FIXME: move it to src/c++11 when it's stable, and make it not inlined. template<typename _BiIter, typename _Alloc,
inline typename _CharT, typename _TraitsT>
bool _BFSMatcher:: bool _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
_M_match_less_than(_StateIdT __u, _StateIdT __v) const _M_match_less_than(_StateIdT __u, _StateIdT __v) const
{ {
if (_M_current.count(__u) == 0) if (_M_covered.count(__u) == 0)
return false; return false;
if (_M_current.count(__v) > 0) if (_M_covered.count(__v) > 0)
return true; return true;
// TODO: Greedy and Non-greedy support // TODO: Greedy and Non-greedy support
return true; return true;
} }
// FIXME: move it to src/c++11 when it's stable, and make it not inlined. template<typename _BiIter, typename _Alloc,
inline typename _CharT, typename _TraitsT>
bool _BFSMatcher:: bool _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT>::
_M_includes_some() const _M_includes_some() const
{ {
auto& __s = _M_nfa->_M_final_states(); auto& __s = _M_nfa._M_final_states();
auto& __t = _M_current; auto& __t = _M_covered;
if (__s.size() > 0 && __t.size() > 0) if (__s.size() > 0 && __t.size() > 0)
{ {
auto __first = __s.begin(); auto __first = __s.begin();
...@@ -214,7 +220,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -214,7 +220,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
++__second; ++__second;
else else
{ {
_M_results._M_assign(*__second->second); this->_M_results = *__second->second;
return true; return true;
} }
} }
...@@ -222,20 +228,23 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -222,20 +228,23 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return false; return false;
} }
// FIXME: move it to src/c++11 when it's stable, and make it not inlined. template<typename _BiIter, typename _Alloc,
inline typename _CharT, typename _TraitsT>
std::unique_ptr<_Grep_matcher> _Nfa:: std::unique_ptr<_Executor<_BiIter, _Alloc, _CharT, _TraitsT>>
_M_get_matcher(_PatternCursor& __p, __get_executor(_BiIter __b,
_Results& __r, _BiIter __e,
const _AutomatonPtr& __a, match_results<_BiIter, _Alloc>& __m,
const basic_regex<_CharT, _TraitsT>& __re,
regex_constants::match_flag_type __flags) regex_constants::match_flag_type __flags)
{ {
if (_M_has_back_ref) typedef std::unique_ptr<_Executor<_BiIter, _Alloc, _CharT, _TraitsT>>
return unique_ptr<_Grep_matcher>( _ExecutorPtr;
new _DFSMatcher(__p, __r, __a, __flags)); typedef _DFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT> _DFSExecutorT;
else auto __p = std::static_pointer_cast<_NFA<_CharT, _TraitsT>>
return unique_ptr<_Grep_matcher>( (__re._M_automaton);
new _BFSMatcher(__p, __r, __a, __flags)); if (__p->_M_has_backref)
return _ExecutorPtr(new _DFSExecutorT(__b, __e, __m, *__p, __flags));
return _ExecutorPtr(new _DFSExecutorT(__b, __e, __m, *__p, __flags));
} }
_GLIBCXX_END_NAMESPACE_VERSION _GLIBCXX_END_NAMESPACE_VERSION
......
// class template regex -*- C++ -*-
// Copyright (C) 2010-2013 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// Under Section 7 of GPL version 3, you are granted additional
// permissions described in the GCC Runtime Library Exception, version
// 3.1, as published by the Free Software Foundation.
// You should have received a copy of the GNU General Public License and
// a copy of the GCC Runtime Library Exception along with this program;
// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
// <http://www.gnu.org/licenses/>.
/**
* @file bits/regex_grep_matcher.h
* This is an internal header file, included by other library headers.
* Do not attempt to use it directly. @headername{regex}
*/
namespace std _GLIBCXX_VISIBILITY(default)
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION
template<typename _BiIter>
class sub_match;
template<typename _Bi_iter, typename _Allocator>
class match_results;
_GLIBCXX_END_NAMESPACE_VERSION
namespace __detail
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION
/**
* @defgroup regex-detail Base and Implementation Classes
* @ingroup regex
* @{
*/
/// A _Results facade specialized for wrapping a templated match_results.
template<typename _FwdIterT, typename _Alloc>
class _SpecializedResults
: public _Results
{
public:
_SpecializedResults(const _Automaton::_SizeT __size,
const _SpecializedCursor<_FwdIterT>& __cursor,
match_results<_FwdIterT, _Alloc>& __m);
~_SpecializedResults()
{
if (_M_managed)
delete &_M_results;
}
private:
_SpecializedResults(const _SpecializedResults& __rhs)
: _M_results(*new match_results<_FwdIterT, _Alloc>(__rhs._M_results)),
_M_managed(true)
{ }
public:
void
_M_set_pos(int __i, int __j, const _PatternCursor& __pc);
void
_M_set_range(int __i, const _PatternCursor& __pc)
{
typedef const _SpecializedCursor<_FwdIterT>& _CursorT;
_CursorT __c = static_cast<_CursorT>(__pc);
_M_results.at(__i).first = __c._M_begin();
_M_results.at(__i).second = __c._M_end();
}
void
_M_set_matched(int __i, bool __is_matched)
{ _M_results.at(__i).matched = __is_matched; }
std::unique_ptr<_Results>
_M_clone() const
{ return unique_ptr<_Results>(new _SpecializedResults(*this)); }
void
_M_assign(const _Results& __rhs)
{
auto __r = static_cast<const _SpecializedResults*>(&__rhs);
_M_results = __r->_M_results;
}
private:
match_results<_FwdIterT, _Alloc>& _M_results;
bool _M_managed;
};
template<typename _FwdIterT, typename _Alloc>
_SpecializedResults<_FwdIterT, _Alloc>::
_SpecializedResults(const _Automaton::_SizeT __size,
const _SpecializedCursor<_FwdIterT>& __cursor,
match_results<_FwdIterT, _Alloc>& __m)
: _M_results(__m), _M_managed(false)
{
_M_results.clear();
_M_results.reserve(__size + 2);
_M_results.resize(__size);
typename match_results<_FwdIterT, _Alloc>::value_type __sm;
__sm.first = __sm.second = __cursor._M_begin();
_M_results.push_back(__sm);
__sm.first = __sm.second = __cursor._M_end();
_M_results.push_back(__sm);
}
template<typename _FwdIterT, typename _Alloc>
void
_SpecializedResults<_FwdIterT, _Alloc>::
_M_set_pos(int __i, int __j, const _PatternCursor& __pc)
{
typedef const _SpecializedCursor<_FwdIterT>& _CursorT;
_CursorT __c = static_cast<_CursorT>(__pc);
if (__j == 0)
_M_results.at(__i).first = __c._M_pos();
else
_M_results.at(__i).second = __c._M_pos();
}
/// Executes a regular expression NFA/DFA over a range using a
/// variant of the parallel execution algorithm featured in the grep
/// utility, modified to use Laurikari tags.
class _Grep_matcher
{
public:
_Grep_matcher(_PatternCursor& __p,
_Results& __r,
const _AutomatonPtr& __automaton,
regex_constants::match_flag_type __flags)
: _M_nfa(static_pointer_cast<_Nfa>(__automaton)),
_M_str_cur(__p), _M_results(__r)
{ }
virtual
~_Grep_matcher()
{ }
// Set matched when string exactly match the pattern.
virtual bool
_M_match() = 0;
// Set matched when some prefix of the string matches the pattern.
virtual bool
_M_search_from_first() = 0;
protected:
const std::shared_ptr<_Nfa> _M_nfa;
_PatternCursor& _M_str_cur;
_Results& _M_results;
};
// Time complexity: exponential
// Space complexity: O(_M_str_cur.size())
// _M_dfs() take a state, along with current string cursor(_M_str_cur),
// trying to match current state with current character.
// Only _S_opcode_match will consume a character.
class _DFSMatcher
: public _Grep_matcher
{
public:
_DFSMatcher(_PatternCursor& __p,
_Results& __r,
const _AutomatonPtr& __automaton,
regex_constants::match_flag_type __flags)
: _Grep_matcher(__p, __r, __automaton, __flags)
{ }
bool
_M_match()
{ return _M_dfs<true>(_M_nfa->_M_start()); }
bool
_M_search_from_first()
{ return _M_dfs<false>(_M_nfa->_M_start()); }
private:
template<bool __match_mode>
bool
_M_dfs(_StateIdT __i);
};
// It's essentially a variant of Single-Source-Shortest-Path problem, where,
// the matching results is the final distance and should be minimized.
// Instead of using Dijkstra Algorithm, I pick up the queue-optimizaed
// (BFS-like) Bellman-Ford algorithm,
// SPFA(http://en.wikipedia.org/wiki/Shortest_Path_Faster_Algorithm).
//
// Every entry of _M_current saves the solution(grouping status) for every
// matching head. When states transfer, solutions will be compared and
// deduplicated(based on which greedy mode we have).
//
// Time complexity: O(_M_str_cur.size() * _M_nfa.size())
// Space complexity: O(_M_nfa.size() * _M_nfa.mark_count())
class _BFSMatcher
: public _Grep_matcher
{
public:
_BFSMatcher(_PatternCursor& __p,
_Results& __r,
const _AutomatonPtr& __automaton,
regex_constants::match_flag_type __flags)
: _Grep_matcher(__p, __r, __automaton, __flags)
{
if (_M_nfa->_M_start() != _S_invalid_state_id)
_M_current[_M_nfa->_M_start()] = _M_results._M_clone();
_M_e_closure();
}
bool
_M_match()
{ return _M_main_loop<true>(); }
bool
_M_search_from_first()
{ return _M_main_loop<false>(); }
private:
template<bool __match_mode>
bool
_M_main_loop();
void
_M_e_closure();
void
_M_move();
bool
_M_match_less_than(_StateIdT __u, _StateIdT __v) const;
bool
_M_includes_some() const;
std::map<_StateIdT, std::unique_ptr<_Results>> _M_current;
};
//@} regex-detail
_GLIBCXX_END_NAMESPACE_VERSION
} // namespace __detail
} // namespace std
#include <bits/regex_grep_matcher.tcc>
// class template regex -*- C++ -*-
// Copyright (C) 2010-2013 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// Under Section 7 of GPL version 3, you are granted additional
// permissions described in the GCC Runtime Library Exception, version
// 3.1, as published by the Free Software Foundation.
// You should have received a copy of the GNU General Public License and
// a copy of the GCC Runtime Library Exception along with this program;
// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
// <http://www.gnu.org/licenses/>.
/**
* @file bits/regex_nfa.h
* This is an internal header file, included by other library headers.
* Do not attempt to use it directly. @headername{regex}
*/
namespace std _GLIBCXX_VISIBILITY(default)
{
namespace __detail
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION
/**
* @addtogroup regex-detail
* @{
*/
/// Provides a generic facade for a templated match_results.
struct _Results
{
virtual
~_Results()
{ }
virtual void _M_set_pos(int __i, int __j, const _PatternCursor& __p) = 0;
virtual void _M_set_matched(int __i, bool __is_matched) = 0;
virtual std::unique_ptr<_Results> _M_clone() const = 0;
virtual void _M_assign(const _Results& __rhs) = 0;
};
class _Grep_matcher;
class _Automaton;
/// Generic shared pointer to an automaton.
typedef std::shared_ptr<_Automaton> _AutomatonPtr;
/// Base class for, um, automata. Could be an NFA or a DFA. Your choice.
class _Automaton
{
public:
typedef unsigned int _SizeT;
public:
virtual
~_Automaton() { }
virtual _SizeT
_M_sub_count() const = 0;
virtual std::unique_ptr<_Grep_matcher>
_M_get_matcher(_PatternCursor& __p,
_Results& __r,
const _AutomatonPtr& __automaton,
regex_constants::match_flag_type __flags) = 0;
#ifdef _GLIBCXX_DEBUG
virtual std::ostream&
_M_dot(std::ostream& __ostr) const = 0;
#endif
};
/// Operation codes that define the type of transitions within the base NFA
/// that represents the regular expression.
enum _Opcode
{
_S_opcode_unknown = 0,
_S_opcode_alternative = 1,
_S_opcode_subexpr_begin = 4,
_S_opcode_subexpr_end = 5,
_S_opcode_match = 100,
_S_opcode_accept = 255
};
/// Tags current state (for subexpr begin/end).
typedef std::function<void (const _PatternCursor&, _Results&)> _Tagger;
/// Start state tag.
template<typename _FwdIterT, typename _TraitsT>
struct _StartTagger
{
explicit
_StartTagger(int __i)
: _M_index(__i)
{ }
void
operator()(const _PatternCursor& __pc, _Results& __r)
{ __r._M_set_pos(_M_index, 0, __pc); }
int _M_index;
};
/// End state tag.
template<typename _FwdIterT, typename _TraitsT>
struct _EndTagger
{
explicit
_EndTagger(int __i)
: _M_index(__i)
{ }
void
operator()(const _PatternCursor& __pc, _Results& __r)
{ __r._M_set_pos(_M_index, 1, __pc); }
int _M_index;
};
// TODO For now we use an all-in-one comparator. In the future there may be
// optimizations based on regex_traits::translate and regex_transform.
template<typename _InIterT, typename _TraitsT>
struct _Comparator
{
typedef regex_constants::syntax_option_type _FlagT;
typedef typename _TraitsT::char_type _CharT;
typedef std::basic_string<_CharT> _StringT;
_Comparator(_FlagT __flags, const _TraitsT& __traits)
: _M_flags(__flags), _M_traits(__traits)
{ }
bool
_M_equ(_CharT __a, _CharT __b) const;
bool
_M_le(_CharT __a, _CharT __b) const;
_FlagT _M_flags;
_TraitsT _M_traits;
};
/// Indicates if current state matches cursor current.
typedef std::function<bool (const _PatternCursor&)> _Matcher;
/// Matches any character
inline bool
_AnyMatcher(const _PatternCursor&)
{ return true; }
/// Matches a single character
template<typename _InIterT, typename _TraitsT>
struct _CharMatcher
: public _Comparator<_InIterT, _TraitsT>
{
typedef _Comparator<_InIterT, _TraitsT> _BaseT;
typedef typename _TraitsT::char_type _CharT;
typedef regex_constants::syntax_option_type _FlagT;
explicit
_CharMatcher(_CharT __c, _FlagT __flags, const _TraitsT& __t)
: _BaseT(__flags, __t), _M_c(__c)
{ }
bool
operator()(const _PatternCursor& __pc) const
{
typedef const _SpecializedCursor<_InIterT>& _CursorT;
_CursorT __c = static_cast<_CursorT>(__pc);
return this->_M_equ(__c._M_current(), _M_c);
}
_CharT _M_c;
};
/// Matches a character range (bracket expression)
template<typename _InIterT, typename _TraitsT>
struct _BracketMatcher
: public _Comparator<_InIterT, _TraitsT>
{
typedef _Comparator<_InIterT, _TraitsT> _BaseT;
typedef typename _TraitsT::char_class_type _CharClassT;
typedef regex_constants::syntax_option_type _FlagT;
typedef typename _TraitsT::char_type _CharT;
typedef std::basic_string<_CharT> _StringT;
explicit
_BracketMatcher(bool __is_non_matching,
_FlagT __flags,
const _TraitsT& __t)
: _BaseT(__flags, __t), _M_flags(__flags), _M_traits(__t),
_M_is_non_matching(__is_non_matching), _M_class_set(0)
{ }
bool
operator()(const _PatternCursor& __pc) const;
void
_M_add_char(_CharT __c)
{ _M_char_set.push_back(__c); }
void
_M_add_collating_element(const _StringT& __s)
{
auto __st = _M_traits.lookup_collatename(&*__s.begin(), &*__s.end());
if (__st.empty())
__throw_regex_error(regex_constants::error_collate);
// TODO: digraph
_M_char_set.push_back(__st[0]);
}
void
_M_add_equivalence_class(const _StringT& __s)
{
_M_add_character_class(
_M_traits.transform_primary(&*__s.begin(), &*__s.end()));
}
void
_M_add_character_class(const _StringT& __s)
{
auto __st = _M_traits.lookup_classname(
&*__s.begin(), &*__s.end(), (_M_flags & regex_constants::icase));
if (__st == 0)
__throw_regex_error(regex_constants::error_ctype);
_M_class_set |= __st;
}
void
_M_make_range(_CharT __l, _CharT __r)
{
if (!this->_M_le(__l, __r))
__throw_regex_error(regex_constants::error_range);
_M_range_set.push_back(make_pair(__l, __r));
}
_FlagT _M_flags;
_TraitsT _M_traits;
bool _M_is_non_matching;
std::vector<_CharT> _M_char_set;
std::vector<pair<_CharT, _CharT>> _M_range_set;
_CharClassT _M_class_set;
};
/// Identifies a state in the NFA.
typedef int _StateIdT;
/// The special case in which a state identifier is not an index.
static const _StateIdT _S_invalid_state_id = -1;
/**
* @brief struct _State
*
* An individual state in an NFA
*
* In this case a "state" is an entry in the NFA definition coupled
* with its outgoing transition(s). All states have a single outgoing
* transition, except for accepting states (which have no outgoing
* transitions) and alt states, which have two outgoing transitions.
*/
struct _State
{
typedef int _OpcodeT;
_OpcodeT _M_opcode; // type of outgoing transition
_StateIdT _M_next; // outgoing transition
_StateIdT _M_alt; // for _S_opcode_alternative
unsigned int _M_subexpr; // for _S_opcode_subexpr_*
_Tagger _M_tagger; // for _S_opcode_subexpr_*
_Matcher _M_matches; // for _S_opcode_match
explicit _State(_OpcodeT __opcode)
: _M_opcode(__opcode), _M_next(_S_invalid_state_id)
{ }
_State(const _Matcher& __m)
: _M_opcode(_S_opcode_match), _M_next(_S_invalid_state_id), _M_matches(__m)
{ }
_State(_OpcodeT __opcode, unsigned int __s, const _Tagger& __t)
: _M_opcode(__opcode), _M_next(_S_invalid_state_id), _M_subexpr(__s),
_M_tagger(__t)
{ }
_State(_StateIdT __next, _StateIdT __alt)
: _M_opcode(_S_opcode_alternative), _M_next(__next), _M_alt(__alt)
{ }
#ifdef _GLIBCXX_DEBUG
std::ostream&
_M_print(std::ostream& ostr) const;
// Prints graphviz dot commands for state.
std::ostream&
_M_dot(std::ostream& __ostr, _StateIdT __id) const;
#endif
};
/// The Grep Matcher works on sets of states. Here are sets of states.
typedef std::set<_StateIdT> _StateSet;
/**
* @brief struct _Nfa
*
* A collection of all states making up an NFA.
*
* An NFA is a 4-tuple M = (K, S, s, F), where
* K is a finite set of states,
* S is the alphabet of the NFA,
* s is the initial state,
* F is a set of final (accepting) states.
*
* This NFA class is templated on S, a type that will hold values of the
* underlying alphabet (without regard to semantics of that alphabet). The
* other elements of the tuple are generated during construction of the NFA
* and are available through accessor member functions.
*/
class _Nfa
: public _Automaton, public std::vector<_State>
{
public:
typedef _State _StateT;
typedef unsigned int _SizeT;
typedef regex_constants::syntax_option_type _FlagT;
_Nfa(_FlagT __f)
: _M_flags(__f), _M_start_state(0), _M_subexpr_count(0),
// TODO: BFS by default. Your choice. Need to be set by the compiler.
_M_has_back_ref(false)
{ }
~_Nfa()
{ }
_FlagT
_M_options() const
{ return _M_flags; }
_StateIdT
_M_start() const
{ return _M_start_state; }
const _StateSet&
_M_final_states() const
{ return _M_accepting_states; }
_SizeT
_M_sub_count() const
{ return _M_subexpr_count; }
_StateIdT
_M_insert_accept()
{
this->push_back(_StateT(_S_opcode_accept));
_M_accepting_states.insert(this->size()-1);
return this->size()-1;
}
_StateIdT
_M_insert_alt(_StateIdT __next, _StateIdT __alt)
{
this->push_back(_StateT(__next, __alt));
return this->size()-1;
}
_StateIdT
_M_insert_matcher(_Matcher __m)
{
this->push_back(_StateT(__m));
return this->size()-1;
}
_StateIdT
_M_insert_subexpr_begin(const _Tagger& __t)
{
this->push_back(_StateT(_S_opcode_subexpr_begin, _M_subexpr_count++,
__t));
return this->size()-1;
}
_StateIdT
_M_insert_subexpr_end(unsigned int __i, const _Tagger& __t)
{
this->push_back(_StateT(_S_opcode_subexpr_end, __i, __t));
return this->size()-1;
}
void
_M_set_back_ref(bool __b)
{ _M_has_back_ref = __b; }
std::unique_ptr<_Grep_matcher>
_M_get_matcher(_PatternCursor& __p,
_Results& __r,
const _AutomatonPtr& __automaton,
regex_constants::match_flag_type __flags);
#ifdef _GLIBCXX_DEBUG
std::ostream&
_M_dot(std::ostream& __ostr) const;
#endif
private:
_FlagT _M_flags;
_StateIdT _M_start_state;
_StateSet _M_accepting_states;
_SizeT _M_subexpr_count;
bool _M_has_back_ref;
};
/// Describes a sequence of one or more %_State, its current start
/// and end(s). This structure contains fragments of an NFA during
/// construction.
class _StateSeq
{
public:
// Constructs a single-node sequence
_StateSeq(_Nfa& __ss, _StateIdT __s, _StateIdT __e = _S_invalid_state_id)
: _M_nfa(__ss), _M_start(__s), _M_end1(__s), _M_end2(__e)
{ }
// Constructs a split sequence from two other sequencces
_StateSeq(const _StateSeq& __e1, const _StateSeq& __e2)
: _M_nfa(__e1._M_nfa),
_M_start(_M_nfa._M_insert_alt(__e1._M_start, __e2._M_start)),
_M_end1(__e1._M_end1), _M_end2(__e2._M_end1)
{ }
// Constructs a split sequence from a single sequence
_StateSeq(const _StateSeq& __e, _StateIdT __id)
: _M_nfa(__e._M_nfa),
_M_start(_M_nfa._M_insert_alt(__id, __e._M_start)),
_M_end1(__id), _M_end2(__e._M_end1)
{ }
// Constructs a copy of a %_StateSeq
_StateSeq(const _StateSeq& __rhs)
: _M_nfa(__rhs._M_nfa), _M_start(__rhs._M_start),
_M_end1(__rhs._M_end1), _M_end2(__rhs._M_end2)
{ }
_StateSeq& operator=(const _StateSeq& __rhs);
_StateIdT
_M_front() const
{ return _M_start; }
// Extends a sequence by one.
void
_M_push_back(_StateIdT __id);
// Extends and maybe joins a sequence.
void
_M_append(_StateIdT __id);
void
_M_append(_StateSeq& __rhs);
// Clones an entire sequence.
_StateIdT
_M_clone();
private:
_Nfa& _M_nfa;
_StateIdT _M_start;
_StateIdT _M_end1;
_StateIdT _M_end2;
};
//@} regex-detail
_GLIBCXX_END_NAMESPACE_VERSION
} // namespace __detail
} // namespace std
#include <bits/regex_nfa.tcc>
...@@ -54,13 +54,11 @@ ...@@ -54,13 +54,11 @@
#include <utility> #include <utility>
#include <vector> #include <vector>
#include <bits/range_access.h>
#include <bits/regex_constants.h> #include <bits/regex_constants.h>
#include <bits/regex_error.h> #include <bits/regex_error.h>
#include <bits/regex_cursor.h> #include <bits/regex_automaton.h>
#include <bits/regex_nfa.h>
#include <bits/regex_compiler.h> #include <bits/regex_compiler.h>
#include <bits/regex_grep_matcher.h> #include <bits/regex_executor.h>
#include <bits/regex.h> #include <bits/regex.h>
#endif // C++11 #endif // C++11
......
...@@ -38,12 +38,10 @@ template<typename _Bi_iter, typename _Alloc, ...@@ -38,12 +38,10 @@ template<typename _Bi_iter, typename _Alloc,
regex_constants::match_flag_type __flags regex_constants::match_flag_type __flags
= regex_constants::match_default) = regex_constants::match_default)
{ {
__detail::_AutomatonPtr __a = __re._M_get_automaton(); VERIFY( (dynamic_cast
__detail::_Automaton::_SizeT __sz = __a->_M_sub_count(); <__detail::_DFSExecutor<_Bi_iter, _Alloc, _Ch_type, _Rx_traits>*>
__detail::_SpecializedCursor<_Bi_iter> __cs(__s, __e); (&*__detail::__get_executor(__s, __e, __m, __re, __flags))
__detail::_SpecializedResults<_Bi_iter, _Alloc> __r(__sz, __cs, __m); != nullptr) );
VERIFY( dynamic_cast<__detail::_DFSMatcher *>(
&*__a->_M_get_matcher(__cs, __r, __a, __flags)) != nullptr );
} }
void void
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment