Commit b21abcee by Tim Shen Committed by Tim Shen

regex.h: Add friend classes.

2013-09-18  Tim Shen  <timshen91@gmail.com>

	* include/bits/regex.h: Add friend classes.
	(match_results<>::position, regex_iterator<>::operator++):
	Implement position specification in regex_iterator.
	(regex_match<>, regex_search<>):
	Move match_results initializations to these function. Remove `todo`.
	* include/bits/regex_compiler.tcc:
	(_Compiler<>::_M_quantifier): Fix greedy/ungreedy of interval matching.
	* include/bits/regex_constants.h:
	Fix indentation. Change match_flag_type to enum type.
	* include/bits/regex_executor.h:
	Merge identical code to the base class _Executor.
	Support flags in regex_constants.
	* include/bits/regex_executor.tcc: Likewise.
	* include/bits/regex_scanner.h: Add comments.
	* include/bits/regex_scanner.tcc: Same.
	* testsuite/28_regex/algorithms/regex_search/ecma/assertion.cc:
	Add a testcase.
	* testsuite/28_regex/algorithms/regex_search/ecma/flags.cc: New.
	* testsuite/28_regex/iterators/regex_iterator/char/
	string_position_01.cc: Remove `xfail`.
	* testsuite/28_regex/iterators/regex_iterator/wchar_t/string_02.cc:
	Remove `xfail` and make the case really work.

From-SVN: r202706
parent 64bc8861
2013-09-18 Tim Shen <timshen91@gmail.com>
* include/bits/regex.h: Add friend classes.
(match_results<>::position, regex_iterator<>::operator++):
Implement position specification in regex_iterator.
(regex_match<>, regex_search<>):
Move match_results initializations to these function. Remove `todo`.
* include/bits/regex_compiler.tcc:
(_Compiler<>::_M_quantifier): Fix greedy/ungreedy of interval matching.
* include/bits/regex_constants.h:
Fix indentation. Change match_flag_type to enum type.
* include/bits/regex_executor.h:
Merge identical code to the base class _Executor.
Support flags in regex_constants.
* include/bits/regex_executor.tcc: Likewise.
* include/bits/regex_scanner.h: Add comments.
* include/bits/regex_scanner.tcc: Same.
* testsuite/28_regex/algorithms/regex_search/ecma/assertion.cc:
Add a testcase.
* testsuite/28_regex/algorithms/regex_search/ecma/flags.cc: New.
* testsuite/28_regex/iterators/regex_iterator/char/
string_position_01.cc: Remove `xfail`.
* testsuite/28_regex/iterators/regex_iterator/wchar_t/string_02.cc:
Remove `xfail` and make the case really work.
2013-09-18 Paolo Carlini <paolo.carlini@oracle.com>
* testsuite/performance/25_algorithms/search_n.cc: Fix typo.
......
......@@ -1004,6 +1004,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
const basic_regex<_Cp, _Rp>&,
regex_constants::match_flag_type);
template<typename, typename, typename, typename>
friend class __detail::_Executor;
template<typename, typename, typename, typename>
friend class __detail::_DFSExecutor;
template<typename, typename, typename, typename>
friend class __detail::_BFSExecutor;
flag_type _M_flags;
_Rx_traits _M_traits;
_AutomatonPtr _M_automaton;
......@@ -1783,21 +1792,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
*/
explicit
match_results(const _Alloc& __a = _Alloc())
: _Base_type(__a)
: _Base_type(__a), _M_in_iterator(false)
{ }
/**
* @brief Copy constructs a %match_results.
*/
match_results(const match_results& __rhs)
: _Base_type(__rhs)
: _Base_type(__rhs), _M_in_iterator(false)
{ }
/**
* @brief Move constructs a %match_results.
*/
match_results(match_results&& __rhs) noexcept
: _Base_type(std::move(__rhs))
: _Base_type(std::move(__rhs)), _M_in_iterator(false)
{ }
/**
......@@ -1905,8 +1914,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
difference_type
position(size_type __sub = 0) const
{
return __sub < size() ? std::distance(this->prefix().first,
(*this)[__sub].first) : -1;
// [28.12.1.4.5]
if (_M_in_iterator)
return __sub < size() ? std::distance(_M_begin,
(*this)[__sub].first) : -1;
else
return __sub < size() ? std::distance(this->prefix().first,
(*this)[__sub].first) : -1;
}
/**
......@@ -2106,6 +2120,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
template<typename, typename, typename, typename>
friend class __detail::_BFSExecutor;
template<typename, typename, typename>
friend class regex_iterator;
template<typename _Bp, typename _Ap,
typename _Ch_type, typename _Rx_traits>
friend bool
......@@ -2121,6 +2138,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
const basic_regex<_Ch_type,
_Rx_traits>&,
regex_constants::match_flag_type);
_Bi_iter _M_begin;
bool _M_in_iterator;
};
typedef match_results<const char*> cmatch;
......@@ -2200,8 +2220,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
* @retval false Otherwise.
*
* @throws an exception of type regex_error.
*
* @todo Implement this function.
*/
template<typename _Bi_iter, typename _Alloc,
typename _Ch_type, typename _Rx_traits>
......@@ -2215,6 +2233,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{
if (__re._M_automaton == nullptr)
return false;
auto __size = __re._M_automaton->_M_sub_count();
__size += 2;
__m.resize(__size);
for (decltype(__size) __i = 0; __i < __size; ++__i)
__m.at(__i).matched = false;
if (__detail::__get_executor(__s, __e, __m, __re, __flags)->_M_match())
{
for (auto __it : __m)
......@@ -2360,8 +2385,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
* undefined.
*
* @throws an exception of type regex_error.
*
* @todo Implement this function.
*/
template<typename _Bi_iter, typename _Alloc,
typename _Ch_type, typename _Rx_traits>
......@@ -2374,6 +2397,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{
if (__re._M_automaton == nullptr)
return false;
auto __size = __re._M_automaton->_M_sub_count();
__size += 2;
__m.resize(__size);
for (decltype(__size) __i = 0; __i < __size; ++__i)
__m.at(__i).matched = false;
if (__detail::__get_executor(__first, __last, __m, __re, __flags)
->_M_search())
{
......@@ -2677,7 +2707,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
operator++()
{
// FIXME: In all cases in which the call to regex_search returns true,
// In all cases in which the call to regex_search returns true,
// match.prefix().first shall be equal to the previous value of
// match[0].second, and for each index i in the half-open range
// [0, match.size()) for which match[i].matched is true,
......@@ -2697,12 +2727,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags
| regex_constants::match_not_null
| regex_constants::match_continuous))
return *this;
{
_M_match._M_in_iterator = true;
_M_match._M_begin = _M_begin;
return *this;
}
else
++__start;
}
_M_flags |= regex_constants::match_prev_avail;
if (!regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
{
_M_match._M_in_iterator = true;
_M_match._M_begin = _M_begin;
}
else
_M_match = value_type();
}
return *this;
......
......@@ -28,7 +28,7 @@
* Do not attempt to use it directly. @headername{regex}
*/
// TODO make comments doxygen format.
// FIXME make comments doxygen format.
// This compiler refers to "Regular Expression Matching Can Be Simple And Fast"
// (http://swtch.com/~rsc/regexp/regexp1.html"),
......@@ -223,16 +223,25 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
if (__n < 0)
__throw_regex_error(regex_constants::error_badbrace);
auto __end = _M_nfa._M_insert_dummy();
// _M_alt is the "match more" branch, and _M_next is the
// "match less" one. Switch _M_alt and _M_next of all created
// nodes. This is a hacking but IMO works well.
std::stack<_StateIdT> __stack;
for (int __i = 0; __i < __n; ++__i)
{
auto __tmp = __r._M_clone();
__e._M_append
(_StateSeqT(_M_nfa,
_M_nfa._M_insert_alt(__tmp._M_start,
__end, __neg),
__tmp._M_end));
auto __alt = _M_nfa._M_insert_alt(__tmp._M_start,
__end, __neg);
__stack.push(__alt);
__e._M_append(_StateSeqT(_M_nfa, __alt, __tmp._M_end));
}
__e._M_append(__end);
while (!__stack.empty())
{
auto& __tmp = _M_nfa[__stack.top()];
__stack.pop();
swap(__tmp._M_next, __tmp._M_alt);
}
}
else // {3,}
{
......
......@@ -68,7 +68,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_S_token_backref,
_S_token_subexpr_begin,
_S_token_subexpr_no_group_begin,
_S_token_subexpr_lookahead_begin,
_S_token_subexpr_lookahead_begin, // neg if _M_value[0] == 'n'
_S_token_subexpr_end,
_S_token_bracket_begin,
_S_token_bracket_neg_begin,
......@@ -86,7 +86,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_S_token_ungreedy,
_S_token_line_begin,
_S_token_line_end,
_S_token_word_bound,
_S_token_word_bound, // neg if _M_value[0] == 'n'
_S_token_comma,
_S_token_dup_count,
_S_token_eof,
......@@ -174,7 +174,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_StringT _M_value;
bool _M_at_bracket_start;
public:
// TODO: make them static when this file is stable.
// FIXME: make them static when this file is stable.
const std::map<char, _TokenT> _M_token_map;
const std::map<char, char> _M_ecma_escape_map;
const std::map<char, char> _M_awk_escape_map;
......
......@@ -28,7 +28,7 @@
* Do not attempt to use it directly. @headername{regex}
*/
// TODO make comments doxygen format.
// FIXME make comments doxygen format.
// N3376 specified 6 regex styles: ECMAScript, basic, extended, grep, egrep
// and awk
......
// { dg-options "-std=gnu++11" }
// { dg-do run { xfail *-*-* } }
//
// 2013-09-14 Tim Shen <timshen91@gmail.com>
......@@ -54,22 +53,37 @@ test01()
string sol[] =
{
"This",
"",
"is",
"",
"a",
"",
"regular",
"",
"expression",
"",
};
regex re("\\b\\w*\\b");
int i = 0;
for (auto it = sregex_iterator(s.begin(), s.end(), re);
it != sregex_iterator() && i < 5;
it != sregex_iterator();
++it)
{
string s((*it)[0].first, (*it)[0].second);
VERIFY(s == sol[i++]);
}
VERIFY(i == 5);
VERIFY(i == 10);
{
cmatch m;
regex re("(?=(as)df)as(df)");
regex_search("asdf", m, re);
VERIFY(m.size() == 3);
VERIFY(m[0].matched && string(m[0].first, m[0].second) == "asdf");
VERIFY(m[1].matched && string(m[1].first, m[1].second) == "as");
VERIFY(m[2].matched && string(m[2].first, m[2].second) == "df");
}
}
int
......
// { dg-options "-std=gnu++11" }
//
// 2013-09-18 Tim Shen <timshen91@gmail.com>
//
// Copyright (C) 2013 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with this library; see the file COPYING3. If not see
// <http://www.gnu.org/licenses/>.
// 28.11.3 regex_search
// Tests ECMAScript flags.
#include <regex>
#include <testsuite_hooks.h>
using namespace std;
void
test01()
{
bool test __attribute__((unused)) = true;
cmatch m;
regex re("((as)(df))", regex_constants::ECMAScript | regex_constants::nosubs);
VERIFY(regex_search("asdf", m, re));
VERIFY(m.size() == 1);
VERIFY(m[0].matched && string(m[0].first, m[0].second) == "asdf");
VERIFY( regex_search("a", regex("^a")));
VERIFY(!regex_search("a", regex("^a"), regex_constants::match_not_bol));
VERIFY( regex_search("a", regex("a$")));
VERIFY(!regex_search("a", regex("a$"), regex_constants::match_not_eol));
VERIFY( regex_search("a", regex("\\ba")));
VERIFY(!regex_search("a", regex("\\ba"), regex_constants::match_not_bow));
VERIFY( regex_search("a", regex("a\\b")));
VERIFY(!regex_search("a", regex("a\\b"), regex_constants::match_not_eow));
VERIFY( regex_search("", regex("")));
VERIFY(!regex_search("", regex(""), regex_constants::match_not_null));
VERIFY( regex_search("", regex("^$")));
VERIFY(!regex_search("", regex("^$"), regex_constants::match_not_null));
VERIFY( regex_search("aaa", m, regex("a*?"),
regex_constants::match_not_null));
VERIFY(m[0].matched && string(m[0].first, m[0].second) == "a");
VERIFY( regex_search("asdf", regex("sdf")));
VERIFY(!regex_search("asdf", regex("sdf"),
regex_constants::match_continuous));
VERIFY( regex_search(" a"+1, regex("\\ba"),
regex_constants::match_prev_avail));
VERIFY( regex_search("ba"+1, regex("\\Ba"),
regex_constants::match_prev_avail));
}
int
main()
{
test01();
return 0;
}
// { dg-options "-std=gnu++11" }
// { dg-do run { xfail *-*-* } }
//
// 2013-07-25 Tim Shen <timshen91@gmail.com>
......
// { dg-options "-std=gnu++11" }
// { dg-require-namedlocale "en_US.UTF-8" }
// { dg-do run { xfail *-*-* } }
//
// 2013-09-05 Tim Shen <timshen91@gmail.com>
......@@ -42,13 +41,19 @@ test01()
re2.assign(L"([[:lower:]]{0,1}[[:space:]]{0,1}[[:upper:]]{0,1})");
std::wsregex_iterator p(str2.begin(), str2.end(), re2);
auto a = p;
++p;
VERIFY(a != p);
//for (std::wsregex_iterator p(str2.begin(), str2.end(), re2);
// p != std::wsregex_iterator{}; ++p)
// std::wcout << (*p)[1] << std::endl;
std::wstring sol[] =
{
L"ä\u2009Ä",
L"\u2009",
L"ö\u2009Ö",
L"\u2009",
L"ü\u2009Ü",
L"",
};
int i = 0;
for (std::wsregex_iterator p(str2.begin(), str2.end(), re2);
p != std::wsregex_iterator{}; ++p)
VERIFY(std::wstring((*p)[1].first, (*p)[1].second) == sol[i++]);
}
int
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment