Commit 1b488e33 by Tim Shen Committed by Tim Shen

regex.h: Executor caller.

2013-08-22  Tim Shen  <timshen91@gmail.com>

	* include/bits/regex.h: Executor caller.
	* include/bits/regex_executor.h: Fix empty grouping problem.
	* include/bits/regex_executor.tcc: Same.
	* testsuite/28_regex/algorithms/regex_match/ecma/cstring_emptygroup.cc:
	  New.

From-SVN: r201914
parent 9ad30113
2013-08-22 Tim Shen <timshen91@gmail.com>
* include/bits/regex.h: Executor caller.
* include/bits/regex_executor.h: Fix empty grouping problem.
* include/bits/regex_executor.tcc: Same.
* testsuite/28_regex/algorithms/regex_match/ecma/cstring_emptygroup.cc:
New.
2013-08-20 Phil Muldoon <pmuldoon@redhat.com> 2013-08-20 Phil Muldoon <pmuldoon@redhat.com>
PR libstdc++/53477 PR libstdc++/53477
......
...@@ -2211,7 +2211,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -2211,7 +2211,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{ {
if (__re._M_automaton == nullptr) if (__re._M_automaton == nullptr)
return false; return false;
if (__detail::__get_executor(__s, __e, __m, __re, __flags)->_M_match()) __detail::__get_executor(__s, __e, __m, __re, __flags)->_M_match();
if (__m.size() > 0 && __m[0].matched)
{ {
for (auto __it : __m) for (auto __it : __m)
if (!__it.matched) if (!__it.matched)
...@@ -2371,22 +2372,25 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -2371,22 +2372,25 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
if (__re._M_automaton == nullptr) if (__re._M_automaton == nullptr)
return false; return false;
for (auto __cur = __first; __cur != __last; ++__cur) // Any KMP-like algo? for (auto __cur = __first; __cur != __last; ++__cur) // Any KMP-like algo?
if (__detail::__get_executor(__cur, __last, __m, __re, __flags) {
->_M_search_from_first()) __detail::__get_executor(__cur, __last, __m, __re, __flags)
{ ->_M_search_from_first();
for (auto __it : __m) if (__m.size() > 0 && __m[0].matched)
if (!__it.matched) {
__it.first = __it.second = __last; for (auto __it : __m)
__m.at(__m.size()).first = __first; if (!__it.matched)
__m.at(__m.size()).second = __m[0].first; __it.first = __it.second = __last;
__m.at(__m.size()+1).first = __m[0].second; __m.at(__m.size()).first = __first;
__m.at(__m.size()+1).second = __last; __m.at(__m.size()).second = __m[0].first;
__m.at(__m.size()).matched = __m.at(__m.size()+1).first = __m[0].second;
(__m.prefix().first != __m.prefix().second); __m.at(__m.size()+1).second = __last;
__m.at(__m.size()+1).matched = __m.at(__m.size()).matched =
(__m.suffix().first != __m.suffix().second); (__m.prefix().first != __m.prefix().second);
return true; __m.at(__m.size()+1).matched =
} (__m.suffix().first != __m.suffix().second);
return true;
}
}
return false; return false;
} }
......
...@@ -28,12 +28,17 @@ ...@@ -28,12 +28,17 @@
* Do not attempt to use it directly. @headername{regex} * Do not attempt to use it directly. @headername{regex}
*/ */
// TODO: convert comments to doxygen format.
namespace std _GLIBCXX_VISIBILITY(default) namespace std _GLIBCXX_VISIBILITY(default)
{ {
_GLIBCXX_BEGIN_NAMESPACE_VERSION _GLIBCXX_BEGIN_NAMESPACE_VERSION
template<typename, typename> template<typename, typename>
class basic_regex; class basic_regex;
template<typename>
class sub_match;
template<typename, typename> template<typename, typename>
class match_results; class match_results;
_GLIBCXX_END_NAMESPACE_VERSION _GLIBCXX_END_NAMESPACE_VERSION
...@@ -52,19 +57,20 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -52,19 +57,20 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
class _Executor class _Executor
{ {
public: public:
typedef match_results<_BiIter, _Alloc> _ResultsT; typedef match_results<_BiIter, _Alloc> _ResultsT;
typedef regex_constants::match_flag_type _FlagT; typedef std::vector<sub_match<_BiIter>, _Alloc> _ResultsVec;
typedef regex_constants::match_flag_type _FlagT;
virtual virtual
~_Executor() ~_Executor()
{ } { }
// Set matched when string exactly match the pattern. // Set matched when string exactly match the pattern.
virtual bool virtual void
_M_match() = 0; _M_match() = 0;
// Set matched when some prefix of the string matches the pattern. // Set matched when some prefix of the string matches the pattern.
virtual bool virtual void
_M_search_from_first() = 0; _M_search_from_first() = 0;
protected: protected:
...@@ -74,20 +80,34 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -74,20 +80,34 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_ResultsT& __results, _ResultsT& __results,
_FlagT __flags, _FlagT __flags,
_SizeT __size) _SizeT __size)
: _M_current(__begin), _M_end(__end), : _M_current(__begin), _M_end(__end), _M_results(__results),
_M_results(__results), _M_flags(__flags) _M_flags(__flags)
{ {
__results.resize(__size + 2); __size += 2;
for (auto __it : __results) _M_results.resize(__size);
__it.matched = false; for (auto __i = 0; __i < __size; __i++)
_M_results[__i].matched = false;
} }
_BiIter _M_current; _BiIter _M_current;
_BiIter _M_end; _BiIter _M_end;
_ResultsT& _M_results; _ResultsVec& _M_results;
_FlagT _M_flags; _FlagT _M_flags;
}; };
// A _DFSExecutor perform a DFS on given NFA and input string. At the very
// beginning the executor stands in the start state, then it try every
// possible state transition in current state recursively. Some state
// transitions consume input string, say, a single-char-matcher or a
// back-reference matcher; some not, like assertion or other anchor nodes.
// When the input is exhausted and the current state is an accepting state,
// the whole executor return true.
//
// TODO: This approach is exponentially slow for certain input.
// Try to compile the NFA to a DFA.
//
// Time complexity: exponential
// Space complexity: O(__end - __begin)
template<typename _BiIter, typename _Alloc, template<typename _BiIter, typename _Alloc,
typename _CharT, typename _TraitsT> typename _CharT, typename _TraitsT>
class _DFSExecutor class _DFSExecutor
...@@ -97,6 +117,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -97,6 +117,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
typedef _Executor<_BiIter, _Alloc, _CharT, _TraitsT> _BaseT; typedef _Executor<_BiIter, _Alloc, _CharT, _TraitsT> _BaseT;
typedef _NFA<_CharT, _TraitsT> _RegexT; typedef _NFA<_CharT, _TraitsT> _RegexT;
typedef typename _BaseT::_ResultsT _ResultsT; typedef typename _BaseT::_ResultsT _ResultsT;
typedef typename _BaseT::_ResultsVec _ResultsVec;
typedef regex_constants::match_flag_type _FlagT; typedef regex_constants::match_flag_type _FlagT;
_DFSExecutor(_BiIter __begin, _DFSExecutor(_BiIter __begin,
...@@ -105,37 +126,39 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -105,37 +126,39 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
const _RegexT& __nfa, const _RegexT& __nfa,
_FlagT __flags) _FlagT __flags)
: _BaseT(__begin, __end, __results, __flags, __nfa._M_sub_count()), : _BaseT(__begin, __end, __results, __flags, __nfa._M_sub_count()),
_M_traits(_TraitsT()), _M_nfa(__nfa) _M_traits(_TraitsT()), _M_nfa(__nfa), _M_results_ret(this->_M_results)
{ } { }
bool void
_M_match() _M_match()
{ return _M_dfs<true>(_M_nfa._M_start()); } { _M_dfs<true>(_M_nfa._M_start()); }
bool void
_M_search_from_first() _M_search_from_first()
{ return _M_dfs<false>(_M_nfa._M_start()); } { _M_dfs<false>(_M_nfa._M_start()); }
private: private:
template<bool __match_mode> template<bool __match_mode>
bool bool
_M_dfs(_StateIdT __i); _M_dfs(_StateIdT __i);
_ResultsVec _M_results_ret;
_TraitsT _M_traits; _TraitsT _M_traits;
const _RegexT& _M_nfa; const _RegexT& _M_nfa;
}; };
// It's essentially a variant of Single-Source-Shortest-Path problem, where, // Like the DFS approach, it try every possible state transition; Unlike DFS,
// the matching results is the final distance and should be minimized. // it uses a queue instead of a stack to store matching states. It's a BFS
// Instead of using Dijkstra Algorithm, I pick up the queue-optimizaed // approach.
// (BFS-like) Bellman-Ford algorithm, //
// SPFA(http://en.wikipedia.org/wiki/Shortest_Path_Faster_Algorithm). // Russ Cox's article(http://swtch.com/~rsc/regexp/regexp1.html) explained
// this algorithm clearly.
// //
// Every entry of _M_covered saves the solution(grouping status) for every // Every entry of _M_covered saves the solution(grouping status) for every
// matching head. When states transfer, solutions will be compared and // matching head. When states transit, solutions will be compared and
// deduplicated(based on which greedy mode we have). // deduplicated(based on which greedy mode we have).
// //
// Time complexity: O(_M_str_cur.size() * _M_nfa.size()) // Time complexity: O((__end - __begin) * _M_nfa.size())
// Space complexity: O(_M_nfa.size() * _M_nfa.mark_count()) // Space complexity: O(_M_nfa.size() * _M_nfa.mark_count())
template<typename _BiIter, typename _Alloc, template<typename _BiIter, typename _Alloc,
typename _CharT, typename _TraitsT> typename _CharT, typename _TraitsT>
...@@ -146,12 +169,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -146,12 +169,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
typedef _Executor<_BiIter, _Alloc, _CharT, _TraitsT> _BaseT; typedef _Executor<_BiIter, _Alloc, _CharT, _TraitsT> _BaseT;
typedef _NFA<_CharT, _TraitsT> _RegexT; typedef _NFA<_CharT, _TraitsT> _RegexT;
typedef typename _BaseT::_ResultsT _ResultsT; typedef typename _BaseT::_ResultsT _ResultsT;
typedef std::unique_ptr<_ResultsT> _ResultsPtr; typedef typename _BaseT::_ResultsVec _ResultsVec;
typedef std::unique_ptr<_ResultsVec> _ResultsPtr;
typedef regex_constants::match_flag_type _FlagT; typedef regex_constants::match_flag_type _FlagT;
_BFSExecutor(_BiIter __begin, _BFSExecutor(_BiIter __begin,
_BiIter __end, _BiIter __end,
_ResultsT& __results, _ResultsT& __results,
const _RegexT& __nfa, const _RegexT& __nfa,
_FlagT __flags) _FlagT __flags)
: _BaseT(__begin, __end, __results, __flags, __nfa._M_sub_count()), : _BaseT(__begin, __end, __results, __flags, __nfa._M_sub_count()),
...@@ -159,21 +183,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -159,21 +183,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{ {
if (_M_nfa._M_start() != _S_invalid_state_id) if (_M_nfa._M_start() != _S_invalid_state_id)
_M_covered[_M_nfa._M_start()] = _M_covered[_M_nfa._M_start()] =
_ResultsPtr(new _ResultsT(this->_M_results)); _ResultsPtr(new _ResultsVec(this->_M_results));
_M_e_closure(); _M_e_closure();
} }
bool void
_M_match() _M_match()
{ return _M_main_loop<true>(); } { _M_main_loop<true>(); }
bool void
_M_search_from_first() _M_search_from_first()
{ return _M_main_loop<false>(); } { _M_main_loop<false>(); }
private: private:
template<bool __match_mode> template<bool __match_mode>
bool void
_M_main_loop(); _M_main_loop();
void void
...@@ -183,13 +207,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -183,13 +207,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_move(); _M_move();
bool bool
_M_match_less_than(_StateIdT __u, _StateIdT __v) const; _M_match_less_than(const _ResultsVec& __u, const _ResultsVec& __v) const;
bool bool
_M_includes_some() const; _M_includes_some() const;
std::map<_StateIdT, _ResultsPtr> _M_covered; std::map<_StateIdT, _ResultsPtr> _M_covered;
const _RegexT& _M_nfa; const _RegexT& _M_nfa;
}; };
//@} regex-detail //@} regex-detail
......
// { dg-options "-std=gnu++11" }
//
// 2013-08-22 Tim Shen <timshen91@gmail.com>
//
// Copyright (C) 2013 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with this library; see the file COPYING3. If not see
// <http://www.gnu.org/licenses/>.
// 28.11.2 regex_match
// Tests ECMAScript empty-grouping against a C-string.
#include <regex>
#include <testsuite_hooks.h>
using namespace std;
void
test01()
{
bool test __attribute__((unused)) = true;
{
regex re("()*\\1");
cmatch m;
const char s[] = "";
VERIFY( regex_match(s, m, re) );
VERIFY( m.size() == 2 );
VERIFY( m[0].matched );
VERIFY( m[1].matched );
}
{
regex re("()*");
cmatch m;
const char s[] = "";
VERIFY( regex_match(s, m, re) );
}
}
int
main()
{
test01();
return 0;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment