Commit 6cb784b6 by Tim Shen Committed by Tim Shen

Makefile.am: Adjust to new files.

2013-08-07  Tim Shen  <timshen91@gmail.com>

	* include/Makefile.am: Adjust to new files.
	* include/Makefile.in: Regenerate.
	* include/bits/regex.h: Adjust to new interfaces.
	* include/bits/regex_automaton.h: New.
	* include/bits/regex_automaton.tcc: New.
	* include/bits/regex_compiler.h: Adjust to new files.
	* include/bits/regex_compiler.tcc: New.
	* include/bits/regex_constants.h: Tail spaces.
	* include/bits/regex_error.h: Likewise.
	* include/bits/regex_executor.h: New.
	* include/bits/regex_executor.tcc: New.
	* include/std/regex: Adjust to new files.
	* testsuite/28_regex/algorithms/regex_match/extended/
	string_dispatch_01.cc: Adjust to new interfaces.

From-SVN: r201573
parent 5ee5b32c
2013-08-07 Tim Shen <timshen91@gmail.com>
* include/Makefile.am: Adjust to new files.
* include/Makefile.in: Regenerate.
* include/bits/regex.h: Adjust to new interfaces.
* include/bits/regex_automaton.h: New.
* include/bits/regex_automaton.tcc: New.
* include/bits/regex_compiler.h: Adjust to new files.
* include/bits/regex_compiler.tcc: New.
* include/bits/regex_constants.h: Tail spaces.
* include/bits/regex_error.h: Likewise.
* include/bits/regex_executor.h: New.
* include/bits/regex_executor.tcc: New.
* include/std/regex: Adjust to new files.
* testsuite/28_regex/algorithms/regex_match/extended/
string_dispatch_01.cc: Adjust to new interfaces.
2013-08-07 Paolo Carlini <paolo.carlini@oracle.com>
* include/ext/atomicity.h: Add #pragma GCC system_header.
......
......@@ -126,14 +126,14 @@ bits_headers = \
${bits_srcdir}/random.tcc \
${bits_srcdir}/range_access.h \
${bits_srcdir}/regex.h \
${bits_srcdir}/regex_compiler.h \
${bits_srcdir}/regex_constants.h \
${bits_srcdir}/regex_cursor.h \
${bits_srcdir}/regex_error.h \
${bits_srcdir}/regex_grep_matcher.h \
${bits_srcdir}/regex_grep_matcher.tcc \
${bits_srcdir}/regex_nfa.h \
${bits_srcdir}/regex_nfa.tcc \
${bits_srcdir}/regex_automaton.h \
${bits_srcdir}/regex_automaton.tcc \
${bits_srcdir}/regex_compiler.h \
${bits_srcdir}/regex_compiler.tcc \
${bits_srcdir}/regex_executor.h \
${bits_srcdir}/regex_executor.tcc \
${bits_srcdir}/stream_iterator.h \
${bits_srcdir}/streambuf_iterator.h \
${bits_srcdir}/shared_ptr.h \
......
......@@ -393,14 +393,14 @@ bits_headers = \
${bits_srcdir}/random.tcc \
${bits_srcdir}/range_access.h \
${bits_srcdir}/regex.h \
${bits_srcdir}/regex_compiler.h \
${bits_srcdir}/regex_constants.h \
${bits_srcdir}/regex_cursor.h \
${bits_srcdir}/regex_error.h \
${bits_srcdir}/regex_grep_matcher.h \
${bits_srcdir}/regex_grep_matcher.tcc \
${bits_srcdir}/regex_nfa.h \
${bits_srcdir}/regex_nfa.tcc \
${bits_srcdir}/regex_automaton.h \
${bits_srcdir}/regex_automaton.tcc \
${bits_srcdir}/regex_compiler.h \
${bits_srcdir}/regex_compiler.tcc \
${bits_srcdir}/regex_executor.h \
${bits_srcdir}/regex_executor.tcc \
${bits_srcdir}/stream_iterator.h \
${bits_srcdir}/streambuf_iterator.h \
${bits_srcdir}/shared_ptr.h \
......
// class template regex -*- C++ -*-
// Copyright (C) 2013 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// Under Section 7 of GPL version 3, you are granted additional
// permissions described in the GCC Runtime Library Exception, version
// 3.1, as published by the Free Software Foundation.
// You should have received a copy of the GNU General Public License and
// a copy of the GCC Runtime Library Exception along with this program;
// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
// <http://www.gnu.org/licenses/>.
/**
* @file bits/regex_automaton.h
* This is an internal header file, included by other library headers.
* Do not attempt to use it directly. @headername{regex}
*/
namespace std _GLIBCXX_VISIBILITY(default)
{
namespace __detail
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION
/**
* @defgroup regex-detail Base and Implementation Classes
* @ingroup regex
* @{
*/
typedef int _StateIdT;
typedef std::set<_StateIdT> _StateSet;
static const _StateIdT _S_invalid_state_id = -1;
template<typename _CharT>
using _Matcher = std::function<bool (_CharT)>;
/// Operation codes that define the type of transitions within the base NFA
/// that represents the regular expression.
enum _Opcode
{
_S_opcode_unknown = 0,
_S_opcode_alternative = 1,
_S_opcode_subexpr_begin = 4,
_S_opcode_subexpr_end = 5,
_S_opcode_match = 100,
_S_opcode_accept = 255
};
template<typename _CharT, typename _TraitsT>
class _State
{
public:
typedef int _OpcodeT;
typedef _Matcher<_CharT> _MatcherT;
_OpcodeT _M_opcode; // type of outgoing transition
_StateIdT _M_next; // outgoing transition
_StateIdT _M_alt; // for _S_opcode_alternative
unsigned int _M_subexpr; // for _S_opcode_subexpr_*
_MatcherT _M_matches; // for _S_opcode_match
explicit _State(_OpcodeT __opcode)
: _M_opcode(__opcode), _M_next(_S_invalid_state_id)
{ }
_State(const _MatcherT& __m)
: _M_opcode(_S_opcode_match), _M_next(_S_invalid_state_id),
_M_matches(__m)
{ }
_State(_OpcodeT __opcode, unsigned __index)
: _M_opcode(__opcode), _M_next(_S_invalid_state_id), _M_subexpr(__index)
{ }
_State(_StateIdT __next, _StateIdT __alt)
: _M_opcode(_S_opcode_alternative), _M_next(__next), _M_alt(__alt)
{ }
#ifdef _GLIBCXX_DEBUG
std::ostream&
_M_print(std::ostream& ostr) const;
// Prints graphviz dot commands for state.
std::ostream&
_M_dot(std::ostream& __ostr, _StateIdT __id) const;
#endif
};
/// Base class for, um, automata. Could be an NFA or a DFA. Your choice.
template<typename _CharT, typename _TraitsT>
class _Automaton
{
public:
typedef unsigned int _SizeT;
public:
virtual _SizeT
_M_sub_count() const = 0;
#ifdef _GLIBCXX_DEBUG
virtual std::ostream&
_M_dot(std::ostream& __ostr) const = 0;
#endif
};
template<typename _CharT, typename _TraitsT>
class _NFA
: public _Automaton<_CharT, _TraitsT>,
public std::vector<_State<_CharT, _TraitsT>>
{
public:
typedef _State<_CharT, _TraitsT> _StateT;
typedef const _Matcher<_CharT>& _MatcherT;
typedef unsigned int _SizeT;
typedef regex_constants::syntax_option_type _FlagT;
_NFA(_FlagT __f)
: _M_flags(__f), _M_start_state(0), _M_subexpr_count(0),
_M_has_backref(false)
{ }
_FlagT
_M_options() const
{ return _M_flags; }
_StateIdT
_M_start() const
{ return _M_start_state; }
const _StateSet&
_M_final_states() const
{ return _M_accepting_states; }
_SizeT
_M_sub_count() const
{ return _M_subexpr_count; }
_StateIdT
_M_insert_accept()
{
this->push_back(_StateT(_S_opcode_accept));
_M_accepting_states.insert(this->size()-1);
return this->size()-1;
}
_StateIdT
_M_insert_alt(_StateIdT __next, _StateIdT __alt)
{
this->push_back(_StateT(__next, __alt));
return this->size()-1;
}
_StateIdT
_M_insert_matcher(_MatcherT __m)
{
this->push_back(_StateT(__m));
return this->size()-1;
}
_StateIdT
_M_insert_subexpr_begin()
{
auto __id = _M_subexpr_count++;
_M_paren_stack.push(__id);
this->push_back(_StateT(_S_opcode_subexpr_begin, __id));
return this->size()-1;
}
_StateIdT
_M_insert_subexpr_end()
{
this->push_back(_StateT(_S_opcode_subexpr_end, _M_paren_stack.top()));
_M_paren_stack.pop();
return this->size()-1;
}
void
_M_set_backref(bool __b)
{ _M_has_backref = __b; }
#ifdef _GLIBCXX_DEBUG
std::ostream&
_M_dot(std::ostream& __ostr) const;
#endif
_FlagT _M_flags;
_StateIdT _M_start_state;
_StateSet _M_accepting_states;
_SizeT _M_subexpr_count;
bool _M_has_backref;
std::stack<unsigned int> _M_paren_stack;
};
/// Describes a sequence of one or more %_State, its current start
/// and end(s). This structure contains fragments of an NFA during
/// construction.
template<typename _CharT, typename _TraitsT>
class _StateSeq
{
public:
typedef _NFA<_CharT, _TraitsT> _RegexT;
public:
// Constructs a single-node sequence
_StateSeq(_RegexT& __ss, _StateIdT __s,
_StateIdT __e = _S_invalid_state_id)
: _M_nfa(__ss), _M_start(__s), _M_end1(__s), _M_end2(__e)
{ }
// Constructs a split sequence from two other sequencces
_StateSeq(const _StateSeq& __e1, const _StateSeq& __e2)
: _M_nfa(__e1._M_nfa),
_M_start(_M_nfa._M_insert_alt(__e1._M_start, __e2._M_start)),
_M_end1(__e1._M_end1), _M_end2(__e2._M_end1)
{ }
// Constructs a split sequence from a single sequence
_StateSeq(const _StateSeq& __e, _StateIdT __id)
: _M_nfa(__e._M_nfa),
_M_start(_M_nfa._M_insert_alt(__id, __e._M_start)),
_M_end1(__id), _M_end2(__e._M_end1)
{ }
// Constructs a copy of a %_StateSeq
_StateSeq(const _StateSeq& __rhs)
: _M_nfa(__rhs._M_nfa), _M_start(__rhs._M_start),
_M_end1(__rhs._M_end1), _M_end2(__rhs._M_end2)
{ }
_StateSeq& operator=(const _StateSeq& __rhs);
_StateIdT
_M_front() const
{ return _M_start; }
// Extends a sequence by one.
void
_M_push_back(_StateIdT __id);
// Extends and maybe joins a sequence.
void
_M_append(_StateIdT __id);
void
_M_append(_StateSeq& __rhs);
// Clones an entire sequence.
_StateIdT
_M_clone();
private:
_RegexT& _M_nfa;
_StateIdT _M_start;
_StateIdT _M_end1;
_StateIdT _M_end2;
};
//@} regex-detail
_GLIBCXX_END_NAMESPACE_VERSION
} // namespace __detail
} // namespace std
#include <bits/regex_automaton.tcc>
......@@ -79,7 +79,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
*/
typedef unsigned int syntax_option_type;
/**
/**
* Specifies that the matching of regular expressions against a character
* sequence shall be performed without regard to case.
*/
......@@ -139,7 +139,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
* identical to syntax_option_type extended, except that C-style escape
* sequences are supported. These sequences are:
* \\\\, \\a, \\b, \\f, \\n, \\r, \\t , \\v, \\&apos;, &apos;,
* and \\ddd (where ddd is one, two, or three octal digits).
* and \\ddd (where ddd is one, two, or three octal digits).
*/
constexpr syntax_option_type awk = 1 << _S_awk;
......@@ -154,7 +154,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
/**
* Specifies that the grammar recognized by the regular expression engine is
* that used by POSIX utility grep when given the -E option in
* IEEE Std 1003.1-2001. This option is identical to syntax_option_type
* IEEE Std 1003.1-2001. This option is identical to syntax_option_type
* extended, except that newlines are treated as whitespace.
*/
constexpr syntax_option_type egrep = 1 << _S_egrep;
......@@ -215,35 +215,35 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
* expression shall not match [last, last).
*/
constexpr match_flag_type match_not_eol = 1 << _S_not_eol;
/**
* The expression \\b is not matched against the sub-sequence
* [first,first).
*/
constexpr match_flag_type match_not_bow = 1 << _S_not_bow;
/**
* The expression \\b should not be matched against the sub-sequence
* [last,last).
*/
constexpr match_flag_type match_not_eow = 1 << _S_not_eow;
/**
* If more than one match is possible then any match is an acceptable
* result.
*/
constexpr match_flag_type match_any = 1 << _S_any;
/**
* The expression does not match an empty sequence.
*/
constexpr match_flag_type match_not_null = 1 << _S_not_null;
/**
* The expression only matches a sub-sequence that begins at first .
*/
constexpr match_flag_type match_continuous = 1 << _S_continuous;
/**
* --first is a valid iterator position. When this flag is set then the
* flags match_not_bol and match_not_bow are ignored by the regular
......@@ -260,7 +260,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
* operations all non-overlapping occurrences of the regular expression
* are located and replaced, and sections of the input that did not match
* the expression are copied unchanged to the output string.
*
*
* Format strings (from ECMA-262 [15.5.4.11]):
* @li $$ The dollar-sign itself ($)
* @li $& The matched substring.
......
// class template regex -*- C++ -*-
// Copyright (C) 2010-2013 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// Under Section 7 of GPL version 3, you are granted additional
// permissions described in the GCC Runtime Library Exception, version
// 3.1, as published by the Free Software Foundation.
// You should have received a copy of the GNU General Public License and
// a copy of the GCC Runtime Library Exception along with this program;
// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
// <http://www.gnu.org/licenses/>.
/**
* @file bits/regex_cursor.h
* This is an internal header file, included by other library headers.
* Do not attempt to use it directly. @headername{regex}
*/
namespace std _GLIBCXX_VISIBILITY(default)
{
namespace __detail
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION
/**
* @defgroup regex-detail Base and Implementation Classes
* @ingroup regex
* @{
*/
/// ABC for pattern matching
struct _PatternCursor
{
virtual ~_PatternCursor() { };
virtual void _M_next() = 0;
virtual void _M_prev() = 0;
virtual bool _M_at_end() const = 0;
};
/// Provides a cursor into the specific target string.
template<typename _FwdIterT>
class _SpecializedCursor
: public _PatternCursor
{
public:
_SpecializedCursor(const _FwdIterT& __b, const _FwdIterT __e)
: _M_b(__b), _M_c(__b), _M_e(__e)
{ }
typename std::iterator_traits<_FwdIterT>::value_type
_M_current() const
{ return *_M_c; }
void
_M_next()
{ ++_M_c; }
void
_M_prev()
{ --_M_c; }
_FwdIterT
_M_pos() const
{ return _M_c; }
const _FwdIterT&
_M_begin() const
{ return _M_b; }
const _FwdIterT&
_M_end() const
{ return _M_e; }
bool
_M_at_end() const
{ return _M_c == _M_e; }
private:
_FwdIterT _M_b;
_FwdIterT _M_c;
_FwdIterT _M_e;
};
// Helper function to create a cursor specialized for an iterator class.
template<typename _FwdIterT>
inline _SpecializedCursor<_FwdIterT>
__cursor(const _FwdIterT& __b, const _FwdIterT __e)
{ return _SpecializedCursor<_FwdIterT>(__b, __e); }
//@} regex-detail
_GLIBCXX_END_NAMESPACE_VERSION
} // namespace __detail
} // namespace
......@@ -45,7 +45,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
* @name 5.3 Error Types
*/
//@{
enum error_type
{
_S_error_collate,
......
// class template regex -*- C++ -*-
// Copyright (C) 2013 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// Under Section 7 of GPL version 3, you are granted additional
// permissions described in the GCC Runtime Library Exception, version
// 3.1, as published by the Free Software Foundation.
// You should have received a copy of the GNU General Public License and
// a copy of the GCC Runtime Library Exception along with this program;
// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
// <http://www.gnu.org/licenses/>.
/**
* @file bits/regex_executor.h
* This is an internal header file, included by other library headers.
* Do not attempt to use it directly. @headername{regex}
*/
namespace std _GLIBCXX_VISIBILITY(default)
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION
template<typename, typename>
class basic_regex;
template<typename, typename>
class match_results;
_GLIBCXX_END_NAMESPACE_VERSION
namespace __detail
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION
/**
* @addtogroup regex-detail
* @{
*/
template<typename _BiIter, typename _Alloc,
typename _CharT, typename _TraitsT>
class _Executor
{
public:
typedef match_results<_BiIter, _Alloc> _ResultsT;
typedef regex_constants::match_flag_type _FlagT;
virtual
~_Executor()
{ }
// Set matched when string exactly match the pattern.
virtual bool
_M_match() = 0;
// Set matched when some prefix of the string matches the pattern.
virtual bool
_M_search_from_first() = 0;
protected:
typedef typename _NFA<_CharT, _TraitsT>::_SizeT _SizeT;
_Executor(_BiIter __begin,
_BiIter __end,
_ResultsT& __results,
_FlagT __flags,
_SizeT __size)
: _M_current(__begin), _M_end(__end),
_M_results(__results), _M_flags(__flags)
{
__results.resize(__size + 2);
for (auto __it : __results)
__it.matched = false;
}
_BiIter _M_current;
_BiIter _M_end;
_ResultsT& _M_results;
_FlagT _M_flags;
};
template<typename _BiIter, typename _Alloc,
typename _CharT, typename _TraitsT>
class _DFSExecutor
: public _Executor<_BiIter, _Alloc, _CharT, _TraitsT>
{
public:
typedef _Executor<_BiIter, _Alloc, _CharT, _TraitsT> _BaseT;
typedef _NFA<_CharT, _TraitsT> _RegexT;
typedef typename _BaseT::_ResultsT _ResultsT;
typedef regex_constants::match_flag_type _FlagT;
_DFSExecutor(_BiIter __begin,
_BiIter __end,
_ResultsT& __results,
const _RegexT& __nfa,
_FlagT __flags)
: _BaseT(__begin, __end, __results, __flags, __nfa._M_sub_count()),
_M_nfa(__nfa)
{ }
bool
_M_match()
{ return _M_dfs<true>(_M_nfa._M_start()); }
bool
_M_search_from_first()
{ return _M_dfs<false>(_M_nfa._M_start()); }
private:
template<bool __match_mode>
bool
_M_dfs(_StateIdT __i);
const _RegexT& _M_nfa;
};
// It's essentially a variant of Single-Source-Shortest-Path problem, where,
// the matching results is the final distance and should be minimized.
// Instead of using Dijkstra Algorithm, I pick up the queue-optimizaed
// (BFS-like) Bellman-Ford algorithm,
// SPFA(http://en.wikipedia.org/wiki/Shortest_Path_Faster_Algorithm).
//
// Every entry of _M_covered saves the solution(grouping status) for every
// matching head. When states transfer, solutions will be compared and
// deduplicated(based on which greedy mode we have).
//
// Time complexity: O(_M_str_cur.size() * _M_nfa.size())
// Space complexity: O(_M_nfa.size() * _M_nfa.mark_count())
template<typename _BiIter, typename _Alloc,
typename _CharT, typename _TraitsT>
class _BFSExecutor
: public _Executor<_BiIter, _Alloc, _CharT, _TraitsT>
{
public:
typedef _Executor<_BiIter, _Alloc, _CharT, _TraitsT> _BaseT;
typedef _NFA<_CharT, _TraitsT> _RegexT;
typedef typename _BaseT::_ResultsT _ResultsT;
typedef std::unique_ptr<_ResultsT> _ResultsPtr;
typedef regex_constants::match_flag_type _FlagT;
_BFSExecutor(_BiIter __begin,
_BiIter __end,
_ResultsT& __results,
const _RegexT& __nfa,
_FlagT __flags)
: _BaseT(__begin, __end, __results, __flags, __nfa._M_sub_count()),
_M_nfa(__nfa)
{
if (_M_nfa._M_start() != _S_invalid_state_id)
_M_covered[_M_nfa._M_start()] =
_ResultsPtr(new _ResultsT(this->_M_results));
_M_e_closure();
}
bool
_M_match()
{ return _M_main_loop<true>(); }
bool
_M_search_from_first()
{ return _M_main_loop<false>(); }
private:
template<bool __match_mode>
bool
_M_main_loop();
void
_M_e_closure();
void
_M_move();
bool
_M_match_less_than(_StateIdT __u, _StateIdT __v) const;
bool
_M_includes_some() const;
std::map<_StateIdT, _ResultsPtr> _M_covered;
const _RegexT& _M_nfa;
};
//@} regex-detail
_GLIBCXX_END_NAMESPACE_VERSION
} // namespace __detail
} // namespace std
#include <bits/regex_executor.tcc>
// class template regex -*- C++ -*-
// Copyright (C) 2010-2013 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// Under Section 7 of GPL version 3, you are granted additional
// permissions described in the GCC Runtime Library Exception, version
// 3.1, as published by the Free Software Foundation.
// You should have received a copy of the GNU General Public License and
// a copy of the GCC Runtime Library Exception along with this program;
// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
// <http://www.gnu.org/licenses/>.
/**
* @file bits/regex_grep_matcher.h
* This is an internal header file, included by other library headers.
* Do not attempt to use it directly. @headername{regex}
*/
namespace std _GLIBCXX_VISIBILITY(default)
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION
template<typename _BiIter>
class sub_match;
template<typename _Bi_iter, typename _Allocator>
class match_results;
_GLIBCXX_END_NAMESPACE_VERSION
namespace __detail
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION
/**
* @defgroup regex-detail Base and Implementation Classes
* @ingroup regex
* @{
*/
/// A _Results facade specialized for wrapping a templated match_results.
template<typename _FwdIterT, typename _Alloc>
class _SpecializedResults
: public _Results
{
public:
_SpecializedResults(const _Automaton::_SizeT __size,
const _SpecializedCursor<_FwdIterT>& __cursor,
match_results<_FwdIterT, _Alloc>& __m);
~_SpecializedResults()
{
if (_M_managed)
delete &_M_results;
}
private:
_SpecializedResults(const _SpecializedResults& __rhs)
: _M_results(*new match_results<_FwdIterT, _Alloc>(__rhs._M_results)),
_M_managed(true)
{ }
public:
void
_M_set_pos(int __i, int __j, const _PatternCursor& __pc);
void
_M_set_range(int __i, const _PatternCursor& __pc)
{
typedef const _SpecializedCursor<_FwdIterT>& _CursorT;
_CursorT __c = static_cast<_CursorT>(__pc);
_M_results.at(__i).first = __c._M_begin();
_M_results.at(__i).second = __c._M_end();
}
void
_M_set_matched(int __i, bool __is_matched)
{ _M_results.at(__i).matched = __is_matched; }
std::unique_ptr<_Results>
_M_clone() const
{ return unique_ptr<_Results>(new _SpecializedResults(*this)); }
void
_M_assign(const _Results& __rhs)
{
auto __r = static_cast<const _SpecializedResults*>(&__rhs);
_M_results = __r->_M_results;
}
private:
match_results<_FwdIterT, _Alloc>& _M_results;
bool _M_managed;
};
template<typename _FwdIterT, typename _Alloc>
_SpecializedResults<_FwdIterT, _Alloc>::
_SpecializedResults(const _Automaton::_SizeT __size,
const _SpecializedCursor<_FwdIterT>& __cursor,
match_results<_FwdIterT, _Alloc>& __m)
: _M_results(__m), _M_managed(false)
{
_M_results.clear();
_M_results.reserve(__size + 2);
_M_results.resize(__size);
typename match_results<_FwdIterT, _Alloc>::value_type __sm;
__sm.first = __sm.second = __cursor._M_begin();
_M_results.push_back(__sm);
__sm.first = __sm.second = __cursor._M_end();
_M_results.push_back(__sm);
}
template<typename _FwdIterT, typename _Alloc>
void
_SpecializedResults<_FwdIterT, _Alloc>::
_M_set_pos(int __i, int __j, const _PatternCursor& __pc)
{
typedef const _SpecializedCursor<_FwdIterT>& _CursorT;
_CursorT __c = static_cast<_CursorT>(__pc);
if (__j == 0)
_M_results.at(__i).first = __c._M_pos();
else
_M_results.at(__i).second = __c._M_pos();
}
/// Executes a regular expression NFA/DFA over a range using a
/// variant of the parallel execution algorithm featured in the grep
/// utility, modified to use Laurikari tags.
class _Grep_matcher
{
public:
_Grep_matcher(_PatternCursor& __p,
_Results& __r,
const _AutomatonPtr& __automaton,
regex_constants::match_flag_type __flags)
: _M_nfa(static_pointer_cast<_Nfa>(__automaton)),
_M_str_cur(__p), _M_results(__r)
{ }
virtual
~_Grep_matcher()
{ }
// Set matched when string exactly match the pattern.
virtual bool
_M_match() = 0;
// Set matched when some prefix of the string matches the pattern.
virtual bool
_M_search_from_first() = 0;
protected:
const std::shared_ptr<_Nfa> _M_nfa;
_PatternCursor& _M_str_cur;
_Results& _M_results;
};
// Time complexity: exponential
// Space complexity: O(_M_str_cur.size())
// _M_dfs() take a state, along with current string cursor(_M_str_cur),
// trying to match current state with current character.
// Only _S_opcode_match will consume a character.
class _DFSMatcher
: public _Grep_matcher
{
public:
_DFSMatcher(_PatternCursor& __p,
_Results& __r,
const _AutomatonPtr& __automaton,
regex_constants::match_flag_type __flags)
: _Grep_matcher(__p, __r, __automaton, __flags)
{ }
bool
_M_match()
{ return _M_dfs<true>(_M_nfa->_M_start()); }
bool
_M_search_from_first()
{ return _M_dfs<false>(_M_nfa->_M_start()); }
private:
template<bool __match_mode>
bool
_M_dfs(_StateIdT __i);
};
// It's essentially a variant of Single-Source-Shortest-Path problem, where,
// the matching results is the final distance and should be minimized.
// Instead of using Dijkstra Algorithm, I pick up the queue-optimizaed
// (BFS-like) Bellman-Ford algorithm,
// SPFA(http://en.wikipedia.org/wiki/Shortest_Path_Faster_Algorithm).
//
// Every entry of _M_current saves the solution(grouping status) for every
// matching head. When states transfer, solutions will be compared and
// deduplicated(based on which greedy mode we have).
//
// Time complexity: O(_M_str_cur.size() * _M_nfa.size())
// Space complexity: O(_M_nfa.size() * _M_nfa.mark_count())
class _BFSMatcher
: public _Grep_matcher
{
public:
_BFSMatcher(_PatternCursor& __p,
_Results& __r,
const _AutomatonPtr& __automaton,
regex_constants::match_flag_type __flags)
: _Grep_matcher(__p, __r, __automaton, __flags)
{
if (_M_nfa->_M_start() != _S_invalid_state_id)
_M_current[_M_nfa->_M_start()] = _M_results._M_clone();
_M_e_closure();
}
bool
_M_match()
{ return _M_main_loop<true>(); }
bool
_M_search_from_first()
{ return _M_main_loop<false>(); }
private:
template<bool __match_mode>
bool
_M_main_loop();
void
_M_e_closure();
void
_M_move();
bool
_M_match_less_than(_StateIdT __u, _StateIdT __v) const;
bool
_M_includes_some() const;
std::map<_StateIdT, std::unique_ptr<_Results>> _M_current;
};
//@} regex-detail
_GLIBCXX_END_NAMESPACE_VERSION
} // namespace __detail
} // namespace std
#include <bits/regex_grep_matcher.tcc>
......@@ -54,13 +54,11 @@
#include <utility>
#include <vector>
#include <bits/range_access.h>
#include <bits/regex_constants.h>
#include <bits/regex_error.h>
#include <bits/regex_cursor.h>
#include <bits/regex_nfa.h>
#include <bits/regex_automaton.h>
#include <bits/regex_compiler.h>
#include <bits/regex_grep_matcher.h>
#include <bits/regex_executor.h>
#include <bits/regex.h>
#endif // C++11
......
......@@ -38,12 +38,10 @@ template<typename _Bi_iter, typename _Alloc,
regex_constants::match_flag_type __flags
= regex_constants::match_default)
{
__detail::_AutomatonPtr __a = __re._M_get_automaton();
__detail::_Automaton::_SizeT __sz = __a->_M_sub_count();
__detail::_SpecializedCursor<_Bi_iter> __cs(__s, __e);
__detail::_SpecializedResults<_Bi_iter, _Alloc> __r(__sz, __cs, __m);
VERIFY( dynamic_cast<__detail::_DFSMatcher *>(
&*__a->_M_get_matcher(__cs, __r, __a, __flags)) != nullptr );
VERIFY( (dynamic_cast
<__detail::_DFSExecutor<_Bi_iter, _Alloc, _Ch_type, _Rx_traits>*>
(&*__detail::__get_executor(__s, __e, __m, __re, __flags))
!= nullptr) );
}
void
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment