Commit 37c74e28 by Tim Shen Committed by Tim Shen

Reimplment matcher using Depth-first search(backtracking).

2013-07-24  Tim Shen  <timshen91@gmail.com>

	Reimplment matcher using Depth-first search(backtracking).
	PR libstdc++/53622
	PR libstdc++/57173
	* include/bits/regex.h: regex_match() and regex_search().
	* include/bits/regex_cursor.h: Fix _M_set_pos().
	* include/bits/regex_grep_matcher.h: add _M_dfs_match().
	* include/bits/regex_grep_matcher.tcc: Implement it.
	* testsuite/28_regex/algorithms/regex_match/extended/string_group_01.cc:
	New.
	* testsuite/28_regex/algorithms/regex_match/extended/string_group_02.cc:
	New.
	* testsuite/28_regex/algorithms/regex_search/basic/string_01.cc:
	Remove xfail.

From-SVN: r201213
parent c0f01192
2013-07-24 Tim Shen <timshen91@gmail.com>
Reimplment matcher using Depth-first search(backtracking).
PR libstdc++/53622
PR libstdc++/57173
* include/bits/regex.h: regex_match() and regex_search().
* include/bits/regex_cursor.h: Fix _M_set_pos().
* include/bits/regex_grep_matcher.h: add _M_dfs_match().
* include/bits/regex_grep_matcher.tcc: Implement it.
* testsuite/28_regex/algorithms/regex_match/extended/string_group_01.cc:
New.
* testsuite/28_regex/algorithms/regex_match/extended/string_group_02.cc:
New.
* testsuite/28_regex/algorithms/regex_search/basic/string_01.cc:
Remove xfail.
2013-07-23 Tim Shen <timshen91@gmail.com>
Implement regex_iterator and regex_token_iterator.
......
......@@ -2185,8 +2185,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__detail::_SpecializedCursor<_Bi_iter> __cs(__s, __e);
__detail::_SpecializedResults<_Bi_iter, _Alloc> __r(__sz, __cs, __m);
__detail::_Grep_matcher __matcher(__cs, __r, __a, __flags);
__matcher._M_match();
return __m[0].matched;
return __matcher._M_dfs_match();
}
/**
......@@ -2338,8 +2337,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{
__detail::_SpecializedCursor<_Bi_iter> __curs(__cur, __last);
__detail::_Grep_matcher __matcher(__curs, __r, __a, __flags);
__matcher._M_search_from_first();
if (__m[0].matched)
if (__matcher._M_dfs_search_from_first())
{
__r._M_set_range(__m.size(),
__detail::_SpecializedCursor<_Bi_iter>
......
......@@ -45,6 +45,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{
virtual ~_PatternCursor() { };
virtual void _M_next() = 0;
virtual void _M_prev() = 0;
virtual bool _M_at_end() const = 0;
};
......@@ -66,6 +67,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_next()
{ ++_M_c; }
void
_M_prev()
{ --_M_c; }
_FwdIterT
_M_pos() const
{ return _M_c; }
......
......@@ -107,7 +107,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
if (__j == 0)
_M_results.at(__i).first = __c._M_pos();
else
_M_results.at(__i).second = __c._M_pos()+1;
_M_results.at(__i).second = __c._M_pos();
}
/// A stack of states used in evaluating the NFA.
......@@ -127,9 +127,19 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_pattern(__p), _M_results(__r)
{ }
void _M_match();
void
_M_match();
void
_M_search_from_first();
void _M_search_from_first();
bool
_M_dfs_match()
{ return _M_dfs<true>(_M_nfa->_M_start()); }
bool
_M_dfs_search_from_first()
{ return _M_dfs<false>(_M_nfa->_M_start()); }
private:
_StateSet
......@@ -141,6 +151,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_StateSet
_M_e_closure(_StateStack& __stack, const _StateSet& __s);
template<bool __match_mode>
bool
_M_dfs(_StateIdT __i);
const std::shared_ptr<_Nfa> _M_nfa;
_PatternCursor& _M_pattern;
_Results& _M_results;
......
......@@ -103,6 +103,56 @@ namespace __detail
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION
// _M_dfs() take a state, along with current string cursor(_M_pattern),
// trying to match current state with current character.
// Only _S_opcode_match will consume a character.
// TODO: This is too slow. Try to compile the NFA to a DFA.
template<bool __match_mode>
bool _Grep_matcher::
_M_dfs(_StateIdT __i)
{
if (__i == _S_invalid_state_id)
// This is not that certain. Need deeper investigate.
return false;
const auto& __state = (*_M_nfa)[__i];
bool __ret = false;
switch (__state._M_opcode)
{
case _S_opcode_alternative:
// Greedy mode by default. For non-greedy mode,
// swap _M_alt and _M_next.
__ret = _M_dfs<__match_mode>(__state._M_alt)
|| _M_dfs<__match_mode>(__state._M_next);
break;
case _S_opcode_subexpr_begin:
__state._M_tagger(_M_pattern, _M_results);
__ret = _M_dfs<__match_mode>(__state._M_next);
break;
case _S_opcode_subexpr_end:
__state._M_tagger(_M_pattern, _M_results);
__ret = _M_dfs<__match_mode>(__state._M_next);
_M_results._M_set_matched(__state._M_subexpr, __ret);
break;
case _S_opcode_match:
if (!_M_pattern._M_at_end() && __state._M_matches(_M_pattern))
{
_M_pattern._M_next();
__ret = _M_dfs<__match_mode>(__state._M_next);
_M_pattern._M_prev();
}
break;
case _S_opcode_accept:
if (__match_mode)
__ret = _M_pattern._M_at_end();
else
__ret = true;
break;
default:
_GLIBCXX_DEBUG_ASSERT( false );
}
return __ret;
}
inline void _Grep_matcher::
_M_match()
{
......
// { dg-options "-std=gnu++11" }
//
// 2013-07-23 Tim Shen <timshen91@gmail.com>
//
// Copyright (C) 2013 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with this library; see the file COPYING3. If not see
// <http://www.gnu.org/licenses/>.
// 28.11.2 regex_match
// Tests Extended grouping against a std::string target.
#include <regex>
#include <testsuite_hooks.h>
// libstdc++/53622
void
test01()
{
bool test __attribute__((unused)) = true;
std::regex re("zxcv/(one.*)abc", std::regex::extended);
std::string target("zxcv/onetwoabc");
std::smatch m;
VERIFY( std::regex_search(target, m, re) );
VERIFY( m.size() == 2 );
VERIFY( m[0].matched == true );
VERIFY( std::string(m[0].first, m[0].second) == "zxcv/onetwoabc" );
VERIFY( m[1].matched == true );
VERIFY( std::string(m[1].first, m[1].second) == "onetwo" );
}
int
main()
{
test01();
return 0;
}
// { dg-options "-std=gnu++11" }
//
// 2013-07-23 Tim Shen <timshen91@gmail.com>
//
// Copyright (C) 2013 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with this library; see the file COPYING3. If not see
// <http://www.gnu.org/licenses/>.
// 28.11.3 regex_search
// Tests Extended against a std::string target.
#include <regex>
#include <testsuite_hooks.h>
#include <iostream>
// libstdc++/57173
void
test01()
{
bool test __attribute__((unused)) = true;
std::regex re("/asdf(/.*)", std::regex::extended);
std::string target("/asdf/qwerty");
std::smatch m;
VERIFY( std::regex_match(target, m, re) );
VERIFY( m.size() == 2 );
VERIFY( std::string(m[1].first, m[1].second) == "/qwerty");
}
int
main()
{
test01();
return 0;
}
// { dg-options "-std=gnu++11" }
// { dg-do run { xfail *-*-* } }
//
// 2013-07-17 Tim Shen <timshen91@gmail.com>
......@@ -32,7 +31,7 @@ test01()
{
bool test __attribute__((unused)) = true;
std::regex re("as(df)", std::regex::basic);
std::regex re("as\\(df\\)", std::regex::basic);
std::string target("xxasdfyy");
std::smatch m;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment