Commit c2669da9 by Tim Shen Committed by Tim Shen

Makefile.am: Add regex.tcc.

2013-09-24  Tim Shen  <timshen91@gmail.com>

	* include/Makefile.am: Add regex.tcc.
	* include/Makefile.in: Regenerate.
	* include/bits/regex.h: Remove definitions to regex.tcc.
	* include/bits/regex.tcc: New.
	(match_results::format, regex_replace): Implement;
	* include/bits/regex_compiler.h: Move _M_flags to the top of class
	member list, because other members' initialization depend on it.
	* include/bits/regex_compiler.tcc
	(_Compiler<>::_Compiler): Adjust member initializations.
	(_Compiler<>::_M_quantifier): Fix ungreedy interval quantifier.
	* include/bits/regex_executor.h: Remove _RegexT from _*Executor classes.
	In the future, all regex classes may refactor to *Impl style.
	* include/bits/regex_executor.tcc (_Executor::_M_set_results):
	Merge identical code from _*Executor classes.
	* testsuite/28_regex/algorithms/regex_match/extended/
	string_dispatch_01.cc (fake_match<>): Adjust the hacking-style testcase
	caller for new __get_executors interface.
	* testsuite/28_regex/algorithms/regex_replace/char/basic_replace.cc:
	New.
	* testsuite/28_regex/match_results/format.cc: New.
	* testsuite/28_regex/traits/char/lookup_collatename.cc: Remove digraph
	testcase.
	* testsuite/28_regex/traits/wchar_t/lookup_collatename.cc: Likewise.

From-SVN: r202858
parent 5704e022
2013-09-24 Tim Shen <timshen91@gmail.com>
* include/Makefile.am: Add regex.tcc.
* include/Makefile.in: Regenerate.
* include/bits/regex.h: Remove definitions to regex.tcc.
* include/bits/regex.tcc: New.
(match_results::format, regex_replace): Implement;
* include/bits/regex_compiler.h: Move _M_flags to the top of class
member list, because other members' initialization depend on it.
* include/bits/regex_compiler.tcc
(_Compiler<>::_Compiler): Adjust member initializations.
(_Compiler<>::_M_quantifier): Fix ungreedy interval quantifier.
* include/bits/regex_executor.h: Remove _RegexT from _*Executor classes.
In the future, all regex classes may refactor to *Impl style.
* include/bits/regex_executor.tcc (_Executor::_M_set_results):
Merge identical code from _*Executor classes.
* testsuite/28_regex/algorithms/regex_match/extended/
string_dispatch_01.cc (fake_match<>): Adjust the hacking-style testcase
caller for new __get_executors interface.
* testsuite/28_regex/algorithms/regex_replace/char/basic_replace.cc:
New.
* testsuite/28_regex/match_results/format.cc: New.
* testsuite/28_regex/traits/char/lookup_collatename.cc: Remove digraph
testcase.
* testsuite/28_regex/traits/wchar_t/lookup_collatename.cc: Likewise.
2013-09-23 Paul Pluzhnikov <ppluzhnikov@google.com>
* src/c++11/snprintf_lite.cc (__concat_size_t): Use
......
......@@ -126,6 +126,7 @@ bits_headers = \
${bits_srcdir}/random.tcc \
${bits_srcdir}/range_access.h \
${bits_srcdir}/regex.h \
${bits_srcdir}/regex.tcc \
${bits_srcdir}/regex_constants.h \
${bits_srcdir}/regex_error.h \
${bits_srcdir}/regex_scanner.h \
......
......@@ -393,6 +393,7 @@ bits_headers = \
${bits_srcdir}/random.tcc \
${bits_srcdir}/range_access.h \
${bits_srcdir}/regex.h \
${bits_srcdir}/regex.tcc \
${bits_srcdir}/regex_constants.h \
${bits_srcdir}/regex_error.h \
${bits_srcdir}/regex_scanner.h \
......
......@@ -120,13 +120,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return ret;
}
_FlagT _M_flags;
const _TraitsT& _M_traits;
const _CtypeT& _M_ctype;
_ScannerT _M_scanner;
_RegexT _M_nfa;
_StringT _M_value;
_StackT _M_stack;
_FlagT _M_flags;
};
template<typename _CharT, typename _TraitsT>
......@@ -207,7 +207,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__s.data() + __s.size());
if (__st.empty())
__throw_regex_error(regex_constants::error_collate);
// TODO: digraph
_M_char_set.insert(_M_translate(__st[0]));
}
......
......@@ -63,9 +63,19 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_Compiler<_FwdIter, _CharT, _TraitsT>::
_Compiler(_FwdIter __b, _FwdIter __e,
const _TraitsT& __traits, _FlagT __flags)
: _M_traits(__traits), _M_scanner(__b, __e, __flags, _M_traits.getloc()),
_M_ctype(std::use_facet<std::ctype<_CharT>>(_M_traits.getloc())),
_M_nfa(__flags), _M_flags(__flags)
: _M_flags((__flags
& (regex_constants::ECMAScript
| regex_constants::basic
| regex_constants::extended
| regex_constants::grep
| regex_constants::egrep
| regex_constants::awk))
? __flags
: __flags | regex_constants::ECMAScript),
_M_traits(__traits),
_M_scanner(__b, __e, _M_flags, _M_traits.getloc()),
_M_ctype(std::use_facet<std::ctype<_CharT>>(_M_traits.getloc())),
_M_nfa(_M_flags)
{
_StateSeqT __r(_M_nfa, _M_nfa._M_start());
__r._M_append(_M_nfa._M_insert_subexpr_begin());
......@@ -85,7 +95,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_disjunction()
{
this->_M_alternative();
// TODO empty alternative like, um, "(|asdf)"
while (_M_match_token(_ScannerT::_S_token_or))
{
_StateSeqT __alt1 = _M_pop();
......@@ -170,7 +179,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_Compiler<_FwdIter, _CharT, _TraitsT>::
_M_quantifier()
{
bool __neg = regex_constants::ECMAScript;
bool __neg = (_M_flags & regex_constants::ECMAScript);
auto __init = [this, &__neg]()
{
if (_M_stack.empty())
......@@ -207,53 +216,66 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
}
else if (_M_match_token(_ScannerT::_S_token_interval_begin))
{
__init();
if (_M_stack.empty())
__throw_regex_error(regex_constants::error_badrepeat);
if (!_M_match_token(_ScannerT::_S_token_dup_count))
__throw_regex_error(regex_constants::error_badbrace);
_StateSeqT __r(_M_pop());
_StateSeqT __e(_M_nfa, _M_nfa._M_insert_dummy());
int __min_rep = _M_cur_int_value(10);
bool __infi = false;
int __n;
// {3
for (int __i = 0; __i < __min_rep; ++__i)
__e._M_append(__r._M_clone());
if (_M_match_token(_ScannerT::_S_token_comma))
if (_M_match_token(_ScannerT::_S_token_dup_count)) // {3,7}
{
int __n = _M_cur_int_value(10) - __min_rep;
if (__n < 0)
__throw_regex_error(regex_constants::error_badbrace);
auto __end = _M_nfa._M_insert_dummy();
// _M_alt is the "match more" branch, and _M_next is the
// "match less" one. Switch _M_alt and _M_next of all created
// nodes. This is a hacking but IMO works well.
std::stack<_StateIdT> __stack;
for (int __i = 0; __i < __n; ++__i)
{
auto __tmp = __r._M_clone();
auto __alt = _M_nfa._M_insert_alt(__tmp._M_start,
__end, __neg);
__stack.push(__alt);
__e._M_append(_StateSeqT(_M_nfa, __alt, __tmp._M_end));
}
__e._M_append(__end);
while (!__stack.empty())
{
auto& __tmp = _M_nfa[__stack.top()];
__stack.pop();
swap(__tmp._M_next, __tmp._M_alt);
}
}
else // {3,}
{
auto __tmp = __r._M_clone();
_StateSeqT __s(_M_nfa,
_M_nfa._M_insert_alt(_S_invalid_state_id,
__tmp._M_start, __neg));
__tmp._M_append(__s);
__e._M_append(__s);
}
__n = _M_cur_int_value(10) - __min_rep;
else
__infi = true;
else
__n = 0;
if (!_M_match_token(_ScannerT::_S_token_interval_end))
__throw_regex_error(regex_constants::error_brace);
__neg = __neg && _M_match_token(_ScannerT::_S_token_opt);
for (int __i = 0; __i < __min_rep; ++__i)
__e._M_append(__r._M_clone());
if (__infi)
{
auto __tmp = __r._M_clone();
_StateSeqT __s(_M_nfa,
_M_nfa._M_insert_alt(_S_invalid_state_id,
__tmp._M_start, __neg));
__tmp._M_append(__s);
__e._M_append(__s);
}
else
{
if (__n < 0)
__throw_regex_error(regex_constants::error_badbrace);
auto __end = _M_nfa._M_insert_dummy();
// _M_alt is the "match more" branch, and _M_next is the
// "match less" one. Switch _M_alt and _M_next of all created
// nodes. This is a hacking but IMO works well.
std::stack<_StateIdT> __stack;
for (int __i = 0; __i < __n; ++__i)
{
auto __tmp = __r._M_clone();
auto __alt = _M_nfa._M_insert_alt(__tmp._M_start,
__end, __neg);
__stack.push(__alt);
__e._M_append(_StateSeqT(_M_nfa, __alt, __tmp._M_end));
}
__e._M_append(__end);
while (!__stack.empty())
{
auto& __tmp = _M_nfa[__stack.top()];
__stack.pop();
swap(__tmp._M_next, __tmp._M_alt);
}
}
_M_stack.push(__e);
}
}
......
......@@ -62,7 +62,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{
public:
typedef basic_regex<_CharT, _TraitsT> _RegexT;
typedef match_results<_BiIter, _Alloc> _ResultsT;
typedef std::vector<sub_match<_BiIter>, _Alloc> _ResultsVec;
typedef regex_constants::match_flag_type _FlagT;
typedef typename _TraitsT::char_class_type _ClassT;
......@@ -70,14 +69,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
public:
_Executor(_BiIter __begin,
_BiIter __end,
_ResultsT& __results,
_ResultsVec& __results,
const _RegexT& __re,
_FlagT __flags)
: _M_begin(__begin),
_M_end(__end),
_M_results(__results),
_M_re(__re),
_M_flags(__flags)
_M_flags((__flags & regex_constants::match_prev_avail)
? (__flags
& ~regex_constants::match_not_bol
& ~regex_constants::match_not_bow)
: __flags)
{ }
// Set matched when string exactly match the pattern.
......@@ -145,6 +148,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
bool
_M_lookahead(_State<_CharT, _TraitsT> __state) const;
void
_M_set_results(_ResultsVec& __cur_results);
public:
virtual void
_M_init(_BiIter __cur) = 0;
......@@ -159,8 +165,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
const _BiIter _M_begin;
const _BiIter _M_end;
const _RegexT& _M_re;
_ResultsT& _M_results;
const _FlagT _M_flags;
_ResultsVec& _M_results;
_FlagT _M_flags;
bool _M_match_mode;
};
......@@ -186,14 +192,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
typedef _Executor<_BiIter, _Alloc, _CharT, _TraitsT> _BaseT;
typedef _NFA<_CharT, _TraitsT> _NFAT;
typedef typename _BaseT::_RegexT _RegexT;
typedef typename _BaseT::_ResultsT _ResultsT;
typedef typename _BaseT::_ResultsVec _ResultsVec;
typedef typename _BaseT::_FlagT _FlagT;
public:
_DFSExecutor(_BiIter __begin,
_BiIter __end,
_ResultsT& __results,
_ResultsVec& __results,
const _RegexT& __re,
_FlagT __flags)
: _BaseT(__begin, __end, __results, __re, __flags),
......@@ -249,7 +254,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
typedef _Executor<_BiIter, _Alloc, _CharT, _TraitsT> _BaseT;
typedef _NFA<_CharT, _TraitsT> _NFAT;
typedef typename _BaseT::_RegexT _RegexT;
typedef typename _BaseT::_ResultsT _ResultsT;
typedef typename _BaseT::_ResultsVec _ResultsVec;
typedef typename _BaseT::_FlagT _FlagT;
// Here's a solution for greedy/ungreedy mode in BFS approach. We need to
......@@ -314,7 +318,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_inc(unsigned int __idx, bool __neg)
{ _M_quant_keys[__idx] += __neg ? 1 : -1; }
_ResultsVec
_ResultsVec&
_M_get()
{ return *this; }
......@@ -326,7 +330,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
public:
_BFSExecutor(_BiIter __begin,
_BiIter __end,
_ResultsT& __results,
_ResultsVec& __results,
const _RegexT& __re,
_FlagT __flags)
: _BaseT(__begin, __end, __results, __re, __flags),
......@@ -377,7 +381,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
std::unique_ptr<_Executor<_BiIter, _Alloc, _CharT, _TraitsT>>
__get_executor(_BiIter __b,
_BiIter __e,
match_results<_BiIter, _Alloc>& __m,
std::vector<sub_match<_BiIter>, _Alloc>& __m,
const basic_regex<_CharT, _TraitsT>& __re,
regex_constants::match_flag_type __flags);
......
......@@ -148,17 +148,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
&& (this->_M_flags & regex_constants::match_not_null))
__ret = false;
if (__ret)
{
_ResultsVec& __res(this->_M_results);
if (this->_M_re.flags() & regex_constants::nosubs)
{
_M_cur_results.resize(3); // truncate
__res.resize(3);
}
for (unsigned int __i = 0; __i < _M_cur_results.size(); ++__i)
if (_M_cur_results[__i].matched)
__res[__i] = _M_cur_results[__i];
}
this->_M_set_results(_M_cur_results);
break;
default:
_GLIBCXX_DEBUG_ASSERT(false);
......@@ -187,18 +177,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
if (this->_M_match_mode)
__ret = _M_includes_some();
if (__ret)
{
_ResultsVec& __res(this->_M_results);
if (this->_M_re.flags() & regex_constants::nosubs)
{
// truncate
_M_cur_results->resize(3);
__res.resize(3);
}
for (unsigned int __i = 0; __i < _M_cur_results->size(); ++__i)
if ((*_M_cur_results)[__i].matched)
__res[__i] = (*_M_cur_results)[__i];
}
this->_M_set_results(_M_cur_results->_M_get());
return __ret;
}
......@@ -403,10 +382,26 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
template<typename _BiIter, typename _Alloc,
typename _CharT, typename _TraitsT>
void _Executor<_BiIter, _Alloc, _CharT, _TraitsT>::
_M_set_results(_ResultsVec& __cur_results)
{
if (_M_re.flags() & regex_constants::nosubs)
{
// truncate
__cur_results.resize(3);
_M_results.resize(3);
}
for (unsigned int __i = 0; __i < __cur_results.size(); ++__i)
if (__cur_results[__i].matched)
_M_results[__i] = __cur_results[__i];
}
template<typename _BiIter, typename _Alloc,
typename _CharT, typename _TraitsT>
std::unique_ptr<_Executor<_BiIter, _Alloc, _CharT, _TraitsT>>
__get_executor(_BiIter __b,
_BiIter __e,
match_results<_BiIter, _Alloc>& __m,
std::vector<sub_match<_BiIter>, _Alloc>& __m,
const basic_regex<_CharT, _TraitsT>& __re,
regex_constants::match_flag_type __flags)
{
......
......@@ -38,9 +38,10 @@ template<typename _Bi_iter, typename _Alloc,
regex_constants::match_flag_type __flags
= regex_constants::match_default)
{
auto& __res = (vector<sub_match<_Bi_iter>, _Alloc>&)(__m);
VERIFY( (dynamic_cast
<__detail::_DFSExecutor<_Bi_iter, _Alloc, _Ch_type, _Rx_traits>*>
(&*__detail::__get_executor(__s, __e, __m, __re, __flags))
(&*__detail::__get_executor(__s, __e, __res, __re, __flags))
!= nullptr) );
}
......
// { dg-options "-std=gnu++11" }
//
// 2013-09-24 Tim Shen <timshen91@gmail.com>
//
// Copyright (C) 2013 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with this library; see the file COPYING3. If not see
// <http://www.gnu.org/licenses/>.
// 28.11.4 regex_replace
// Tests ECMAScript regex_replace.
#include <regex>
#include <testsuite_hooks.h>
using namespace std;
void
test01()
{
bool test __attribute__((unused)) = true;
VERIFY(regex_replace(string("This is a string"), regex("\\b\\w*\\b"), "|$0|")
== "|This||| |is||| |a||| |string|||");
VERIFY(regex_replace(string("This is a string"), regex("\\b\\w*\\b"), "|$0|",
regex_constants::format_no_copy)
== "|This||||is||||a||||string|||");
VERIFY(regex_replace(string("This is a string"), regex("\\b\\w*\\b"), "|$0|",
regex_constants::format_first_only)
== "|This| is a string");
}
int
main()
{
test01();
return 0;
}
// { dg-options "-std=gnu++11" }
//
// 2013-09-24 Tim Shen <timshen91@gmail.com>
//
// Copyright (C) 2013 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with this library; see the file COPYING3. If not see
// <http://www.gnu.org/licenses/>.
// 28.10.5 formatting
// Tests ECMAScript format()
#include <regex>
#include <testsuite_hooks.h>
using namespace std;
void
test01()
{
bool test __attribute__((unused)) = true;
cmatch m;
VERIFY(regex_search("*** this is a string !!!", m,
regex("(\\w+) (\\w+) (\\w+) (\\w+)")));
VERIFY(m.format("$&|$`|$3|$4|$2|$1|$'$$$")
== "this is a string|*** |a|string|is|this| !!!$$");
VERIFY(m.format("&|\\3|\\4|\\2|\\1|\\",
regex_constants::format_sed)
== "this is a string|a|string|is|this|\\");
}
int
main()
{
test01();
return 0;
}
......@@ -35,12 +35,9 @@ test01()
typedef char CharT;
typedef std::regex_traits<CharT> traits;
char name[] = "ll";
traits t;
traits::string_type sname = t.lookup_collatename(name, name+sizeof(name)-1);
VERIFY( !sname.empty() );
traits t;
CharT name[] = "tilde";
VERIFY(t.lookup_collatename(name, name+sizeof(name)-1) == "~");
}
int main()
......
......@@ -33,13 +33,9 @@ test01()
typedef wchar_t CharT;
typedef std::regex_traits<CharT> traits;
wchar_t name[] = L"ll";
traits t;
traits::string_type sname =
t.lookup_collatename(name, name+sizeof(name)/sizeof(*name)-1);
VERIFY( !sname.empty() );
traits t;
CharT name[] = L"tilde";
VERIFY(t.lookup_collatename(name, name+sizeof(name)/sizeof(*name)-1) == L"~");
}
int main()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment