Commit b21abcee by Tim Shen Committed by Tim Shen

regex.h: Add friend classes.

2013-09-18  Tim Shen  <timshen91@gmail.com>

	* include/bits/regex.h: Add friend classes.
	(match_results<>::position, regex_iterator<>::operator++):
	Implement position specification in regex_iterator.
	(regex_match<>, regex_search<>):
	Move match_results initializations to these function. Remove `todo`.
	* include/bits/regex_compiler.tcc:
	(_Compiler<>::_M_quantifier): Fix greedy/ungreedy of interval matching.
	* include/bits/regex_constants.h:
	Fix indentation. Change match_flag_type to enum type.
	* include/bits/regex_executor.h:
	Merge identical code to the base class _Executor.
	Support flags in regex_constants.
	* include/bits/regex_executor.tcc: Likewise.
	* include/bits/regex_scanner.h: Add comments.
	* include/bits/regex_scanner.tcc: Same.
	* testsuite/28_regex/algorithms/regex_search/ecma/assertion.cc:
	Add a testcase.
	* testsuite/28_regex/algorithms/regex_search/ecma/flags.cc: New.
	* testsuite/28_regex/iterators/regex_iterator/char/
	string_position_01.cc: Remove `xfail`.
	* testsuite/28_regex/iterators/regex_iterator/wchar_t/string_02.cc:
	Remove `xfail` and make the case really work.

From-SVN: r202706
parent 64bc8861
2013-09-18 Tim Shen <timshen91@gmail.com>
* include/bits/regex.h: Add friend classes.
(match_results<>::position, regex_iterator<>::operator++):
Implement position specification in regex_iterator.
(regex_match<>, regex_search<>):
Move match_results initializations to these function. Remove `todo`.
* include/bits/regex_compiler.tcc:
(_Compiler<>::_M_quantifier): Fix greedy/ungreedy of interval matching.
* include/bits/regex_constants.h:
Fix indentation. Change match_flag_type to enum type.
* include/bits/regex_executor.h:
Merge identical code to the base class _Executor.
Support flags in regex_constants.
* include/bits/regex_executor.tcc: Likewise.
* include/bits/regex_scanner.h: Add comments.
* include/bits/regex_scanner.tcc: Same.
* testsuite/28_regex/algorithms/regex_search/ecma/assertion.cc:
Add a testcase.
* testsuite/28_regex/algorithms/regex_search/ecma/flags.cc: New.
* testsuite/28_regex/iterators/regex_iterator/char/
string_position_01.cc: Remove `xfail`.
* testsuite/28_regex/iterators/regex_iterator/wchar_t/string_02.cc:
Remove `xfail` and make the case really work.
2013-09-18 Paolo Carlini <paolo.carlini@oracle.com> 2013-09-18 Paolo Carlini <paolo.carlini@oracle.com>
* testsuite/performance/25_algorithms/search_n.cc: Fix typo. * testsuite/performance/25_algorithms/search_n.cc: Fix typo.
......
...@@ -1004,6 +1004,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -1004,6 +1004,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
const basic_regex<_Cp, _Rp>&, const basic_regex<_Cp, _Rp>&,
regex_constants::match_flag_type); regex_constants::match_flag_type);
template<typename, typename, typename, typename>
friend class __detail::_Executor;
template<typename, typename, typename, typename>
friend class __detail::_DFSExecutor;
template<typename, typename, typename, typename>
friend class __detail::_BFSExecutor;
flag_type _M_flags; flag_type _M_flags;
_Rx_traits _M_traits; _Rx_traits _M_traits;
_AutomatonPtr _M_automaton; _AutomatonPtr _M_automaton;
...@@ -1783,21 +1792,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -1783,21 +1792,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
*/ */
explicit explicit
match_results(const _Alloc& __a = _Alloc()) match_results(const _Alloc& __a = _Alloc())
: _Base_type(__a) : _Base_type(__a), _M_in_iterator(false)
{ } { }
/** /**
* @brief Copy constructs a %match_results. * @brief Copy constructs a %match_results.
*/ */
match_results(const match_results& __rhs) match_results(const match_results& __rhs)
: _Base_type(__rhs) : _Base_type(__rhs), _M_in_iterator(false)
{ } { }
/** /**
* @brief Move constructs a %match_results. * @brief Move constructs a %match_results.
*/ */
match_results(match_results&& __rhs) noexcept match_results(match_results&& __rhs) noexcept
: _Base_type(std::move(__rhs)) : _Base_type(std::move(__rhs)), _M_in_iterator(false)
{ } { }
/** /**
...@@ -1905,8 +1914,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -1905,8 +1914,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
difference_type difference_type
position(size_type __sub = 0) const position(size_type __sub = 0) const
{ {
return __sub < size() ? std::distance(this->prefix().first, // [28.12.1.4.5]
(*this)[__sub].first) : -1; if (_M_in_iterator)
return __sub < size() ? std::distance(_M_begin,
(*this)[__sub].first) : -1;
else
return __sub < size() ? std::distance(this->prefix().first,
(*this)[__sub].first) : -1;
} }
/** /**
...@@ -2106,6 +2120,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -2106,6 +2120,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
template<typename, typename, typename, typename> template<typename, typename, typename, typename>
friend class __detail::_BFSExecutor; friend class __detail::_BFSExecutor;
template<typename, typename, typename>
friend class regex_iterator;
template<typename _Bp, typename _Ap, template<typename _Bp, typename _Ap,
typename _Ch_type, typename _Rx_traits> typename _Ch_type, typename _Rx_traits>
friend bool friend bool
...@@ -2121,6 +2138,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -2121,6 +2138,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
const basic_regex<_Ch_type, const basic_regex<_Ch_type,
_Rx_traits>&, _Rx_traits>&,
regex_constants::match_flag_type); regex_constants::match_flag_type);
_Bi_iter _M_begin;
bool _M_in_iterator;
}; };
typedef match_results<const char*> cmatch; typedef match_results<const char*> cmatch;
...@@ -2200,8 +2220,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -2200,8 +2220,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
* @retval false Otherwise. * @retval false Otherwise.
* *
* @throws an exception of type regex_error. * @throws an exception of type regex_error.
*
* @todo Implement this function.
*/ */
template<typename _Bi_iter, typename _Alloc, template<typename _Bi_iter, typename _Alloc,
typename _Ch_type, typename _Rx_traits> typename _Ch_type, typename _Rx_traits>
...@@ -2215,6 +2233,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -2215,6 +2233,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{ {
if (__re._M_automaton == nullptr) if (__re._M_automaton == nullptr)
return false; return false;
auto __size = __re._M_automaton->_M_sub_count();
__size += 2;
__m.resize(__size);
for (decltype(__size) __i = 0; __i < __size; ++__i)
__m.at(__i).matched = false;
if (__detail::__get_executor(__s, __e, __m, __re, __flags)->_M_match()) if (__detail::__get_executor(__s, __e, __m, __re, __flags)->_M_match())
{ {
for (auto __it : __m) for (auto __it : __m)
...@@ -2360,8 +2385,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -2360,8 +2385,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
* undefined. * undefined.
* *
* @throws an exception of type regex_error. * @throws an exception of type regex_error.
*
* @todo Implement this function.
*/ */
template<typename _Bi_iter, typename _Alloc, template<typename _Bi_iter, typename _Alloc,
typename _Ch_type, typename _Rx_traits> typename _Ch_type, typename _Rx_traits>
...@@ -2374,6 +2397,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -2374,6 +2397,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{ {
if (__re._M_automaton == nullptr) if (__re._M_automaton == nullptr)
return false; return false;
auto __size = __re._M_automaton->_M_sub_count();
__size += 2;
__m.resize(__size);
for (decltype(__size) __i = 0; __i < __size; ++__i)
__m.at(__i).matched = false;
if (__detail::__get_executor(__first, __last, __m, __re, __flags) if (__detail::__get_executor(__first, __last, __m, __re, __flags)
->_M_search()) ->_M_search())
{ {
...@@ -2677,7 +2707,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -2677,7 +2707,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
operator++() operator++()
{ {
// FIXME: In all cases in which the call to regex_search returns true, // In all cases in which the call to regex_search returns true,
// match.prefix().first shall be equal to the previous value of // match.prefix().first shall be equal to the previous value of
// match[0].second, and for each index i in the half-open range // match[0].second, and for each index i in the half-open range
// [0, match.size()) for which match[i].matched is true, // [0, match.size()) for which match[i].matched is true,
...@@ -2697,12 +2727,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -2697,12 +2727,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags
| regex_constants::match_not_null | regex_constants::match_not_null
| regex_constants::match_continuous)) | regex_constants::match_continuous))
return *this; {
_M_match._M_in_iterator = true;
_M_match._M_begin = _M_begin;
return *this;
}
else else
++__start; ++__start;
} }
_M_flags |= regex_constants::match_prev_avail; _M_flags |= regex_constants::match_prev_avail;
if (!regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags)) if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
{
_M_match._M_in_iterator = true;
_M_match._M_begin = _M_begin;
}
else
_M_match = value_type(); _M_match = value_type();
} }
return *this; return *this;
......
...@@ -28,7 +28,7 @@ ...@@ -28,7 +28,7 @@
* Do not attempt to use it directly. @headername{regex} * Do not attempt to use it directly. @headername{regex}
*/ */
// TODO make comments doxygen format. // FIXME make comments doxygen format.
// This compiler refers to "Regular Expression Matching Can Be Simple And Fast" // This compiler refers to "Regular Expression Matching Can Be Simple And Fast"
// (http://swtch.com/~rsc/regexp/regexp1.html"), // (http://swtch.com/~rsc/regexp/regexp1.html"),
...@@ -223,16 +223,25 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -223,16 +223,25 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
if (__n < 0) if (__n < 0)
__throw_regex_error(regex_constants::error_badbrace); __throw_regex_error(regex_constants::error_badbrace);
auto __end = _M_nfa._M_insert_dummy(); auto __end = _M_nfa._M_insert_dummy();
// _M_alt is the "match more" branch, and _M_next is the
// "match less" one. Switch _M_alt and _M_next of all created
// nodes. This is a hacking but IMO works well.
std::stack<_StateIdT> __stack;
for (int __i = 0; __i < __n; ++__i) for (int __i = 0; __i < __n; ++__i)
{ {
auto __tmp = __r._M_clone(); auto __tmp = __r._M_clone();
__e._M_append auto __alt = _M_nfa._M_insert_alt(__tmp._M_start,
(_StateSeqT(_M_nfa, __end, __neg);
_M_nfa._M_insert_alt(__tmp._M_start, __stack.push(__alt);
__end, __neg), __e._M_append(_StateSeqT(_M_nfa, __alt, __tmp._M_end));
__tmp._M_end));
} }
__e._M_append(__end); __e._M_append(__end);
while (!__stack.empty())
{
auto& __tmp = _M_nfa[__stack.top()];
__stack.pop();
swap(__tmp._M_next, __tmp._M_alt);
}
} }
else // {3,} else // {3,}
{ {
......
...@@ -68,7 +68,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -68,7 +68,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_S_token_backref, _S_token_backref,
_S_token_subexpr_begin, _S_token_subexpr_begin,
_S_token_subexpr_no_group_begin, _S_token_subexpr_no_group_begin,
_S_token_subexpr_lookahead_begin, _S_token_subexpr_lookahead_begin, // neg if _M_value[0] == 'n'
_S_token_subexpr_end, _S_token_subexpr_end,
_S_token_bracket_begin, _S_token_bracket_begin,
_S_token_bracket_neg_begin, _S_token_bracket_neg_begin,
...@@ -86,7 +86,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -86,7 +86,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_S_token_ungreedy, _S_token_ungreedy,
_S_token_line_begin, _S_token_line_begin,
_S_token_line_end, _S_token_line_end,
_S_token_word_bound, _S_token_word_bound, // neg if _M_value[0] == 'n'
_S_token_comma, _S_token_comma,
_S_token_dup_count, _S_token_dup_count,
_S_token_eof, _S_token_eof,
...@@ -174,7 +174,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -174,7 +174,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_StringT _M_value; _StringT _M_value;
bool _M_at_bracket_start; bool _M_at_bracket_start;
public: public:
// TODO: make them static when this file is stable. // FIXME: make them static when this file is stable.
const std::map<char, _TokenT> _M_token_map; const std::map<char, _TokenT> _M_token_map;
const std::map<char, char> _M_ecma_escape_map; const std::map<char, char> _M_ecma_escape_map;
const std::map<char, char> _M_awk_escape_map; const std::map<char, char> _M_awk_escape_map;
......
...@@ -28,7 +28,7 @@ ...@@ -28,7 +28,7 @@
* Do not attempt to use it directly. @headername{regex} * Do not attempt to use it directly. @headername{regex}
*/ */
// TODO make comments doxygen format. // FIXME make comments doxygen format.
// N3376 specified 6 regex styles: ECMAScript, basic, extended, grep, egrep // N3376 specified 6 regex styles: ECMAScript, basic, extended, grep, egrep
// and awk // and awk
......
// { dg-options "-std=gnu++11" } // { dg-options "-std=gnu++11" }
// { dg-do run { xfail *-*-* } }
// //
// 2013-09-14 Tim Shen <timshen91@gmail.com> // 2013-09-14 Tim Shen <timshen91@gmail.com>
...@@ -54,22 +53,37 @@ test01() ...@@ -54,22 +53,37 @@ test01()
string sol[] = string sol[] =
{ {
"This", "This",
"",
"is", "is",
"",
"a", "a",
"",
"regular", "regular",
"",
"expression", "expression",
"",
}; };
regex re("\\b\\w*\\b"); regex re("\\b\\w*\\b");
int i = 0; int i = 0;
for (auto it = sregex_iterator(s.begin(), s.end(), re); for (auto it = sregex_iterator(s.begin(), s.end(), re);
it != sregex_iterator() && i < 5; it != sregex_iterator();
++it) ++it)
{ {
string s((*it)[0].first, (*it)[0].second); string s((*it)[0].first, (*it)[0].second);
VERIFY(s == sol[i++]); VERIFY(s == sol[i++]);
} }
VERIFY(i == 5); VERIFY(i == 10);
{
cmatch m;
regex re("(?=(as)df)as(df)");
regex_search("asdf", m, re);
VERIFY(m.size() == 3);
VERIFY(m[0].matched && string(m[0].first, m[0].second) == "asdf");
VERIFY(m[1].matched && string(m[1].first, m[1].second) == "as");
VERIFY(m[2].matched && string(m[2].first, m[2].second) == "df");
}
} }
int int
......
// { dg-options "-std=gnu++11" }
//
// 2013-09-18 Tim Shen <timshen91@gmail.com>
//
// Copyright (C) 2013 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with this library; see the file COPYING3. If not see
// <http://www.gnu.org/licenses/>.
// 28.11.3 regex_search
// Tests ECMAScript flags.
#include <regex>
#include <testsuite_hooks.h>
using namespace std;
void
test01()
{
bool test __attribute__((unused)) = true;
cmatch m;
regex re("((as)(df))", regex_constants::ECMAScript | regex_constants::nosubs);
VERIFY(regex_search("asdf", m, re));
VERIFY(m.size() == 1);
VERIFY(m[0].matched && string(m[0].first, m[0].second) == "asdf");
VERIFY( regex_search("a", regex("^a")));
VERIFY(!regex_search("a", regex("^a"), regex_constants::match_not_bol));
VERIFY( regex_search("a", regex("a$")));
VERIFY(!regex_search("a", regex("a$"), regex_constants::match_not_eol));
VERIFY( regex_search("a", regex("\\ba")));
VERIFY(!regex_search("a", regex("\\ba"), regex_constants::match_not_bow));
VERIFY( regex_search("a", regex("a\\b")));
VERIFY(!regex_search("a", regex("a\\b"), regex_constants::match_not_eow));
VERIFY( regex_search("", regex("")));
VERIFY(!regex_search("", regex(""), regex_constants::match_not_null));
VERIFY( regex_search("", regex("^$")));
VERIFY(!regex_search("", regex("^$"), regex_constants::match_not_null));
VERIFY( regex_search("aaa", m, regex("a*?"),
regex_constants::match_not_null));
VERIFY(m[0].matched && string(m[0].first, m[0].second) == "a");
VERIFY( regex_search("asdf", regex("sdf")));
VERIFY(!regex_search("asdf", regex("sdf"),
regex_constants::match_continuous));
VERIFY( regex_search(" a"+1, regex("\\ba"),
regex_constants::match_prev_avail));
VERIFY( regex_search("ba"+1, regex("\\Ba"),
regex_constants::match_prev_avail));
}
int
main()
{
test01();
return 0;
}
// { dg-options "-std=gnu++11" } // { dg-options "-std=gnu++11" }
// { dg-do run { xfail *-*-* } }
// //
// 2013-07-25 Tim Shen <timshen91@gmail.com> // 2013-07-25 Tim Shen <timshen91@gmail.com>
......
// { dg-options "-std=gnu++11" } // { dg-options "-std=gnu++11" }
// { dg-require-namedlocale "en_US.UTF-8" } // { dg-require-namedlocale "en_US.UTF-8" }
// { dg-do run { xfail *-*-* } }
// //
// 2013-09-05 Tim Shen <timshen91@gmail.com> // 2013-09-05 Tim Shen <timshen91@gmail.com>
...@@ -42,13 +41,19 @@ test01() ...@@ -42,13 +41,19 @@ test01()
re2.assign(L"([[:lower:]]{0,1}[[:space:]]{0,1}[[:upper:]]{0,1})"); re2.assign(L"([[:lower:]]{0,1}[[:space:]]{0,1}[[:upper:]]{0,1})");
std::wsregex_iterator p(str2.begin(), str2.end(), re2); std::wstring sol[] =
auto a = p; {
++p; L"ä\u2009Ä",
VERIFY(a != p); L"\u2009",
//for (std::wsregex_iterator p(str2.begin(), str2.end(), re2); L"ö\u2009Ö",
// p != std::wsregex_iterator{}; ++p) L"\u2009",
// std::wcout << (*p)[1] << std::endl; L"ü\u2009Ü",
L"",
};
int i = 0;
for (std::wsregex_iterator p(str2.begin(), str2.end(), re2);
p != std::wsregex_iterator{}; ++p)
VERIFY(std::wstring((*p)[1].first, (*p)[1].second) == sol[i++]);
} }
int int
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment