regex.h: Add friend classes.

2013-09-18 Tim Shen <timshen91@gmail.com> * include/bits/regex.h: Add friend classes. (match_results<>::position, regex_iterator<>::operator++): Implement position specification in regex_iterator. (regex_match<>, regex_search<>): Move match_results initializations to these function. Remove `todo`. * include/bits/regex_compiler.tcc: (_Compiler<>::_M_quantifier): Fix greedy/ungreedy of interval matching. * include/bits/regex_constants.h: Fix indentation. Change match_flag_type to enum type. * include/bits/regex_executor.h: Merge identical code to the base class _Executor. Support flags in regex_constants. * include/bits/regex_executor.tcc: Likewise. * include/bits/regex_scanner.h: Add comments. * include/bits/regex_scanner.tcc: Same. * testsuite/28_regex/algorithms/regex_search/ecma/assertion.cc: Add a testcase. * testsuite/28_regex/algorithms/regex_search/ecma/flags.cc: New. * testsuite/28_regex/iterators/regex_iterator/char/ string_position_01.cc: Remove `xfail`. * testsuite/28_regex/iterators/regex_iterator/wchar_t/string_02.cc: Remove `xfail` and make the case really work. From-SVN: r202706

regex.h: Add friend classes.
2013-09-18 Tim Shen <timshen91@gmail.com> * include/bits/regex.h: Add friend classes. (match_results<>::position, regex_iterator<>::operator++): Implement position specification in regex_iterator. (regex_match<>, regex_search<>): Move match_results initializations to these function. Remove `todo`. * include/bits/regex_compiler.tcc: (_Compiler<>::_M_quantifier): Fix greedy/ungreedy of interval matching. * include/bits/regex_constants.h: Fix indentation. Change match_flag_type to enum type. * include/bits/regex_executor.h: Merge identical code to the base class _Executor. Support flags in regex_constants. * include/bits/regex_executor.tcc: Likewise. * include/bits/regex_scanner.h: Add comments. * include/bits/regex_scanner.tcc: Same. * testsuite/28_regex/algorithms/regex_search/ecma/assertion.cc: Add a testcase. * testsuite/28_regex/algorithms/regex_search/ecma/flags.cc: New. * testsuite/28_regex/iterators/regex_iterator/char/ string_position_01.cc: Remove `xfail`. * testsuite/28_regex/iterators/regex_iterator/wchar_t/string_02.cc: Remove `xfail` and make the case really work. From-SVN: r202706
b21abcee · Tim Shen · Tim Shen · 64bc8861 · b21abcee · b21abcee
Commit b21abcee authored Sep 18, 2013 by Tim Shen Committed by Tim Shen Sep 18, 2013
12 changed files
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
+2013-09-18  Tim Shen  <timshen91@gmail.com>
+	* include/bits/regex.h: Add friend classes.
+	(match_results<>::position, regex_iterator<>::operator++):
+	Implement position specification in regex_iterator.
+	(regex_match<>, regex_search<>):
+	Move match_results initializations to these function. Remove `todo`.
+	* include/bits/regex_compiler.tcc:
+	(_Compiler<>::_M_quantifier): Fix greedy/ungreedy of interval matching.
+	* include/bits/regex_constants.h:
+	Fix indentation. Change match_flag_type to enum type.
+	* include/bits/regex_executor.h:
+	Merge identical code to the base class _Executor.
+	Support flags in regex_constants.
+	* include/bits/regex_executor.tcc: Likewise.
+	* include/bits/regex_scanner.h: Add comments.
+	* include/bits/regex_scanner.tcc: Same.
+	* testsuite/28_regex/algorithms/regex_search/ecma/assertion.cc:
+	Add a testcase.
+	* testsuite/28_regex/algorithms/regex_search/ecma/flags.cc: New.
+	* testsuite/28_regex/iterators/regex_iterator/char/
+	string_position_01.cc: Remove `xfail`.
+	* testsuite/28_regex/iterators/regex_iterator/wchar_t/string_02.cc:
+	Remove `xfail` and make the case really work.
 2013-09-18  Paolo Carlini  <paolo.carlini@oracle.com>
 	* testsuite/performance/25_algorithms/search_n.cc: Fix typo.

--- a/libstdc++-v3/include/bits/regex.h
+++ b/libstdc++-v3/include/bits/regex.h
@@ -1004,6 +1004,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 		     const basic_regex<_Cp, _Rp>&,
 		     regex_constants::match_flag_type);
+      template<typename, typename, typename, typename>
+	friend class __detail::_Executor;
+      template<typename, typename, typename, typename>
+	friend class __detail::_DFSExecutor;
+      template<typename, typename, typename, typename>
+	friend class __detail::_BFSExecutor;
      flag_type     _M_flags;
      _Rx_traits    _M_traits;
      _AutomatonPtr _M_automaton;
@@ -1783,21 +1792,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       */
      explicit
      match_results(const _Alloc& __a = _Alloc())
-      : _Base_type(__a)
+      : _Base_type(__a), _M_in_iterator(false)
      { }
      /**
       * @brief Copy constructs a %match_results.
       */
      match_results(const match_results& __rhs)
-      : _Base_type(__rhs)
+      : _Base_type(__rhs), _M_in_iterator(false)
      { }
      /**
       * @brief Move constructs a %match_results.
       */
      match_results(match_results&& __rhs) noexcept
-      : _Base_type(std::move(__rhs))
+      : _Base_type(std::move(__rhs)), _M_in_iterator(false)
      { }
      /**
@@ -1905,8 +1914,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
      difference_type
      position(size_type __sub = 0) const
      {
-	return __sub < size() ? std::distance(this->prefix().first,
+	// [28.12.1.4.5]
-					      (*this)[__sub].first) : -1;
+	if (_M_in_iterator)
+	  return __sub < size() ? std::distance(_M_begin,
+						(*this)[__sub].first) : -1;
+	else
+	  return __sub < size() ? std::distance(this->prefix().first,
+						(*this)[__sub].first) : -1;
      }
      /**
@@ -2106,6 +2120,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
      template<typename, typename, typename, typename>
 	friend class __detail::_BFSExecutor;
+      template<typename, typename, typename>
+	friend class regex_iterator;
      template<typename _Bp, typename _Ap,
 	typename _Ch_type, typename _Rx_traits>
 	friend bool
@@ -2121,6 +2138,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 		     const basic_regex<_Ch_type,
 		     _Rx_traits>&,
 		     regex_constants::match_flag_type);
+      _Bi_iter _M_begin;
+      bool     _M_in_iterator;
    };
  typedef match_results<const char*>             cmatch;
@@ -2200,8 +2220,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   * @retval false Otherwise.
   *
   * @throws an exception of type regex_error.
-   *
-   * @todo Implement this function.
   */
  template<typename _Bi_iter, typename _Alloc,
 	   typename _Ch_type, typename _Rx_traits>
@@ -2215,6 +2233,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
    {
      if (__re._M_automaton == nullptr)
 	return false;
+      auto __size = __re._M_automaton->_M_sub_count();
+      __size += 2;
+      __m.resize(__size);
+      for (decltype(__size) __i = 0; __i < __size; ++__i)
+	__m.at(__i).matched = false;
      if (__detail::__get_executor(__s, __e, __m, __re, __flags)->_M_match())
 	{
 	  for (auto __it : __m)
@@ -2360,8 +2385,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   *               undefined.
   *
   * @throws an exception of type regex_error.
-   *
-   * @todo Implement this function.
   */
  template<typename _Bi_iter, typename _Alloc,
 	   typename _Ch_type, typename _Rx_traits>
@@ -2374,6 +2397,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
    {
      if (__re._M_automaton == nullptr)
 	return false;
+      auto __size = __re._M_automaton->_M_sub_count();
+      __size += 2;
+      __m.resize(__size);
+      for (decltype(__size) __i = 0; __i < __size; ++__i)
+	__m.at(__i).matched = false;
      if (__detail::__get_executor(__first, __last, __m, __re, __flags)
 	  ->_M_search())
 	{
@@ -2677,7 +2707,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
    regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
    operator++()
    {
-      // FIXME: In all cases in which the call to regex_search returns true,
+      // In all cases in which the call to regex_search returns true,
      // match.prefix().first shall be equal to the previous value of
      // match[0].second, and for each index i in the half-open range
      // [0, match.size()) for which match[i].matched is true,
@@ -2697,12 +2727,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 		if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags
 				 | regex_constants::match_not_null
 				 | regex_constants::match_continuous))
-		  return *this;
+		  {
+		    _M_match._M_in_iterator = true;
+		    _M_match._M_begin = _M_begin;
+		    return *this;
+		  }
 		else
 		  ++__start;
 	      }
 	  _M_flags |= regex_constants::match_prev_avail;
-	  if (!regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
+	  if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
+	    {
+	      _M_match._M_in_iterator = true;
+	      _M_match._M_begin = _M_begin;
+	    }
+	  else
 	    _M_match = value_type();
 	}
      return *this;

--- a/libstdc++-v3/include/bits/regex_compiler.tcc
+++ b/libstdc++-v3/include/bits/regex_compiler.tcc
@@ -28,7 +28,7 @@
 *  Do not attempt to use it directly. @headername{regex}
 */
-// TODO make comments doxygen format.
+// FIXME make comments doxygen format.
 // This compiler refers to "Regular Expression Matching Can Be Simple And Fast"
 // (http://swtch.com/~rsc/regexp/regexp1.html"),
@@ -223,16 +223,25 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 		if (__n < 0)
 		  __throw_regex_error(regex_constants::error_badbrace);
 		auto __end = _M_nfa._M_insert_dummy();
+		// _M_alt is the "match more" branch, and _M_next is the
+		// "match less" one. Switch _M_alt and _M_next of all created
+		// nodes. This is a hacking but IMO works well.
+		std::stack<_StateIdT> __stack;
 		for (int __i = 0; __i < __n; ++__i)
 		  {
 		    auto __tmp = __r._M_clone();
-		    __e._M_append
+		    auto __alt = _M_nfa._M_insert_alt(__tmp._M_start,
-		      (_StateSeqT(_M_nfa,
+						      __end, __neg);
-				  _M_nfa._M_insert_alt(__tmp._M_start,
+		    __stack.push(__alt);
-						       __end, __neg),
+		    __e._M_append(_StateSeqT(_M_nfa, __alt, __tmp._M_end));
-				  __tmp._M_end));
 		  }
 		__e._M_append(__end);
+		while (!__stack.empty())
+		  {
+		    auto& __tmp = _M_nfa[__stack.top()];
+		    __stack.pop();
+		    swap(__tmp._M_next, __tmp._M_alt);
+		  }
 	      }
 	    else // {3,}
 	      {

--- a/libstdc++-v3/include/bits/regex_constants.h
+++ b/libstdc++-v3/include/bits/regex_constants.h
--- a/libstdc++-v3/include/bits/regex_executor.h
+++ b/libstdc++-v3/include/bits/regex_executor.h
--- a/libstdc++-v3/include/bits/regex_executor.tcc
+++ b/libstdc++-v3/include/bits/regex_executor.tcc
--- a/libstdc++-v3/include/bits/regex_scanner.h
+++ b/libstdc++-v3/include/bits/regex_scanner.h
@@ -68,7 +68,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	_S_token_backref,
 	_S_token_subexpr_begin,
 	_S_token_subexpr_no_group_begin,
-	_S_token_subexpr_lookahead_begin,
+	_S_token_subexpr_lookahead_begin, // neg if _M_value[0] == 'n'
 	_S_token_subexpr_end,
 	_S_token_bracket_begin,
 	_S_token_bracket_neg_begin,
@@ -86,7 +86,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	_S_token_ungreedy,
 	_S_token_line_begin,
 	_S_token_line_end,
-	_S_token_word_bound,
+	_S_token_word_bound, // neg if _M_value[0] == 'n'
 	_S_token_comma,
 	_S_token_dup_count,
 	_S_token_eof,
@@ -174,7 +174,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
      _StringT                      _M_value;
      bool                          _M_at_bracket_start;
    public:
-      // TODO: make them static when this file is stable.
+      // FIXME: make them static when this file is stable.
      const std::map<char, _TokenT> _M_token_map;
      const std::map<char, char>    _M_ecma_escape_map;
      const std::map<char, char>    _M_awk_escape_map;

--- a/libstdc++-v3/include/bits/regex_scanner.tcc
+++ b/libstdc++-v3/include/bits/regex_scanner.tcc
@@ -28,7 +28,7 @@
 *  Do not attempt to use it directly. @headername{regex}
 */
-// TODO make comments doxygen format.
+// FIXME make comments doxygen format.
 // N3376 specified 6 regex styles: ECMAScript, basic, extended, grep, egrep
 // and awk

--- a/libstdc++-v3/testsuite/28_regex/algorithms/regex_search/ecma/assertion.cc
+++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_search/ecma/assertion.cc
 // { dg-options "-std=gnu++11" }
-// { dg-do run { xfail *-*-* } }
 //
 // 2013-09-14  Tim Shen <timshen91@gmail.com>
@@ -54,22 +53,37 @@ test01()
  string sol[] =
    {
      "This",
+      "",
      "is",
+      "",
      "a",
+      "",
      "regular",
+      "",
      "expression",
+      "",
    };
  regex re("\\b\\w*\\b");
  int i = 0;
  for (auto it = sregex_iterator(s.begin(), s.end(), re);
-       it != sregex_iterator() && i < 5;
+       it != sregex_iterator();
       ++it)
    {
      string s((*it)[0].first, (*it)[0].second);
      VERIFY(s == sol[i++]);
    }
-  VERIFY(i == 5);
+  VERIFY(i == 10);
+  {
+    cmatch m;
+    regex re("(?=(as)df)as(df)");
+    regex_search("asdf", m, re);
+    VERIFY(m.size() == 3);
+    VERIFY(m[0].matched && string(m[0].first, m[0].second) == "asdf");
+    VERIFY(m[1].matched && string(m[1].first, m[1].second) == "as");
+    VERIFY(m[2].matched && string(m[2].first, m[2].second) == "df");
+  }
 }
 int

--- a/libstdc++-v3/testsuite/28_regex/algorithms/regex_search/ecma/flags.cc
+++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_search/ecma/flags.cc
+// { dg-options "-std=gnu++11" }
+//
+// 2013-09-18  Tim Shen <timshen91@gmail.com>
+//
+// Copyright (C) 2013 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+// 28.11.3 regex_search
+// Tests ECMAScript flags.
+#include <regex>
+#include <testsuite_hooks.h>
+using namespace std;
+void
+test01()
+{
+  bool test __attribute__((unused)) = true;
+  cmatch m;
+  regex re("((as)(df))", regex_constants::ECMAScript | regex_constants::nosubs);
+  VERIFY(regex_search("asdf", m, re));
+  VERIFY(m.size() == 1);
+  VERIFY(m[0].matched && string(m[0].first, m[0].second) == "asdf");
+  VERIFY( regex_search("a", regex("^a")));
+  VERIFY(!regex_search("a", regex("^a"), regex_constants::match_not_bol));
+  VERIFY( regex_search("a", regex("a$")));
+  VERIFY(!regex_search("a", regex("a$"), regex_constants::match_not_eol));
+  VERIFY( regex_search("a", regex("\\ba")));
+  VERIFY(!regex_search("a", regex("\\ba"), regex_constants::match_not_bow));
+  VERIFY( regex_search("a", regex("a\\b")));
+  VERIFY(!regex_search("a", regex("a\\b"), regex_constants::match_not_eow));
+  VERIFY( regex_search("", regex("")));
+  VERIFY(!regex_search("", regex(""), regex_constants::match_not_null));
+  VERIFY( regex_search("", regex("^$")));
+  VERIFY(!regex_search("", regex("^$"), regex_constants::match_not_null));
+  VERIFY( regex_search("aaa", m, regex("a*?"),
+		       regex_constants::match_not_null));
+  VERIFY(m[0].matched && string(m[0].first, m[0].second) == "a");
+  VERIFY( regex_search("asdf", regex("sdf")));
+  VERIFY(!regex_search("asdf", regex("sdf"),
+		       regex_constants::match_continuous));
+  VERIFY( regex_search(" a"+1, regex("\\ba"),
+		       regex_constants::match_prev_avail));
+  VERIFY( regex_search("ba"+1, regex("\\Ba"),
+		       regex_constants::match_prev_avail));
+}
+int
+main()
+{
+  test01();
+  return 0;
+}
--- a/libstdc++-v3/testsuite/28_regex/iterators/regex_iterator/char/string_position_01.cc
+++ b/libstdc++-v3/testsuite/28_regex/iterators/regex_iterator/char/string_position_01.cc
 // { dg-options "-std=gnu++11" }
-// { dg-do run { xfail *-*-* } }
 //
 // 2013-07-25  Tim Shen <timshen91@gmail.com>

--- a/libstdc++-v3/testsuite/28_regex/iterators/regex_iterator/wchar_t/string_02.cc
+++ b/libstdc++-v3/testsuite/28_regex/iterators/regex_iterator/wchar_t/string_02.cc
 // { dg-options "-std=gnu++11" }
 // { dg-require-namedlocale "en_US.UTF-8" }
-// { dg-do run { xfail *-*-* } }
 //
 // 2013-09-05  Tim Shen <timshen91@gmail.com>
@@ -42,13 +41,19 @@ test01()
  re2.assign(L"([[:lower:]]{0,1}[[:space:]]{0,1}[[:upper:]]{0,1})");
-  std::wsregex_iterator p(str2.begin(), str2.end(), re2);
+  std::wstring sol[] =
-  auto a = p;
+    {
-  ++p;
+      L"ä\u2009Ä",
-  VERIFY(a != p);
+      L"\u2009",
-  //for (std::wsregex_iterator p(str2.begin(), str2.end(), re2);
+      L"ö\u2009Ö",
-  //    p != std::wsregex_iterator{}; ++p)
+      L"\u2009",
-  //  std::wcout << (*p)[1] << std::endl;
+      L"ü\u2009Ü",
+      L"",
+    };
+  int i = 0;
+  for (std::wsregex_iterator p(str2.begin(), str2.end(), re2);
+      p != std::wsregex_iterator{}; ++p)
+    VERIFY(std::wstring((*p)[1].first, (*p)[1].second) == sol[i++]);
 }
 int