Commit 399eeef9 by Tim Shen Committed by Tim Shen

Implement bracket expression.

2013-08-03  Tim Shen  <timshen91@gmail.com>

	Implement bracket expression.
	* include/bits/regex.h: Remove constexpr from "|=", etc.
	* include/bits/regex_compiler.h: Parse bracket expression.
	* include/bits/regex_nfa.h: _Comparator and _BracketMatcher(old
	_RangeMatcher).
	* include/bits/regex_nfa.tcc: Implement them.
	* testsuite/28_regex/algorithms/regex_match/extended/53622.cc:
	from regex_search to regex_match.
	* testsuite/28_regex/algorithms/regex_match/extended/
	cstring_bracket_01.cc: New.

From-SVN: r201465
parent cdaa808a
2013-08-03 Tim Shen <timshen91@gmail.com>
Implement bracket expression.
* include/bits/regex.h: Remove constexpr from "|=", etc.
* include/bits/regex_compiler.h: Parse bracket expression.
* include/bits/regex_nfa.h: _Comparator and _BracketMatcher(old
_RangeMatcher).
* include/bits/regex_nfa.tcc: Implement them.
* testsuite/28_regex/algorithms/regex_match/extended/53622.cc:
from regex_search to regex_match.
* testsuite/28_regex/algorithms/regex_match/extended/
cstring_bracket_01.cc: New.
2013-08-02 Paolo Carlini <paolo.carlini@oracle.com>
* include/debug/functions.h (__foreign_iterator_aux4):
......
......@@ -95,15 +95,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
operator~() const
{ return _RegexMask(~_M_base, ~_M_extended); }
constexpr _RegexMask&
_RegexMask&
operator&=(_RegexMask __other)
{ return *this = (*this) & __other; }
constexpr _RegexMask&
_RegexMask&
operator|=(_RegexMask __other)
{ return *this = (*this) | __other; }
constexpr _RegexMask&
_RegexMask&
operator^=(_RegexMask __other)
{ return *this = (*this) ^ __other; }
......@@ -228,7 +228,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__fctyp.tolower(&*__v.begin(), &*__v.end());
return this->transform(&*__v.begin(), &*__v.end());
}
__catch (...)
__catch (std::bad_cast)
{
}
return string_type();
......@@ -519,7 +519,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
};
std::string __s(__last - __first, '?');
string_type a(__first, __last);
__fctyp.narrow(__first, __last, '?', &*__s.begin());
for (unsigned int __i = 0; *__collatenames[__i]; __i++)
......
......@@ -129,6 +129,29 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
int _M_index;
};
// TODO For now we use an all-in-one comparator. In the future there may be
// optimizations based on regex_traits::translate and regex_transform.
template<typename _InIterT, typename _TraitsT>
struct _Comparator
{
typedef regex_constants::syntax_option_type _FlagT;
typedef typename _TraitsT::char_type _CharT;
typedef std::basic_string<_CharT> _StringT;
_Comparator(_FlagT __flags, const _TraitsT& __traits)
: _M_flags(__flags), _M_traits(__traits)
{ }
bool
_M_equ(_CharT __a, _CharT __b) const;
bool
_M_le(_CharT __a, _CharT __b) const;
_FlagT _M_flags;
_TraitsT _M_traits;
};
/// Indicates if current state matches cursor current.
typedef std::function<bool (const _PatternCursor&)> _Matcher;
......@@ -140,12 +163,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
/// Matches a single character
template<typename _InIterT, typename _TraitsT>
struct _CharMatcher
: public _Comparator<_InIterT, _TraitsT>
{
typedef typename _TraitsT::char_type char_type;
typedef _Comparator<_InIterT, _TraitsT> _BaseT;
typedef typename _TraitsT::char_type _CharT;
typedef regex_constants::syntax_option_type _FlagT;
explicit
_CharMatcher(char_type __c, const _TraitsT& __t = _TraitsT())
: _M_traits(__t), _M_c(_M_traits.translate(__c))
_CharMatcher(_CharT __c, _FlagT __flags, const _TraitsT& __t)
: _BaseT(__flags, __t), _M_c(__c)
{ }
bool
......@@ -153,55 +179,79 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{
typedef const _SpecializedCursor<_InIterT>& _CursorT;
_CursorT __c = static_cast<_CursorT>(__pc);
return _M_traits.translate(__c._M_current()) == _M_c;
return this->_M_equ(__c._M_current(), _M_c);
}
const _TraitsT& _M_traits;
char_type _M_c;
_CharT _M_c;
};
/// Matches a character range (bracket expression)
template<typename _InIterT, typename _TraitsT>
struct _RangeMatcher
struct _BracketMatcher
: public _Comparator<_InIterT, _TraitsT>
{
typedef typename _TraitsT::char_type _CharT;
typedef std::basic_string<_CharT> _StringT;
typedef _Comparator<_InIterT, _TraitsT> _BaseT;
typedef typename _TraitsT::char_class_type _CharClassT;
typedef regex_constants::syntax_option_type _FlagT;
typedef typename _TraitsT::char_type _CharT;
typedef std::basic_string<_CharT> _StringT;
explicit
_RangeMatcher(bool __is_non_matching, const _TraitsT& __t = _TraitsT())
: _M_traits(__t), _M_is_non_matching(__is_non_matching)
_BracketMatcher(bool __is_non_matching,
_FlagT __flags,
const _TraitsT& __t)
: _BaseT(__flags, __t), _M_flags(__flags), _M_traits(__t),
_M_is_non_matching(__is_non_matching), _M_class_set(0)
{ }
bool
operator()(const _PatternCursor& __pc) const
{
typedef const _SpecializedCursor<_InIterT>& _CursorT;
_CursorT __c = static_cast<_CursorT>(__pc);
return true;
}
operator()(const _PatternCursor& __pc) const;
void
_M_add_char(_CharT __c)
{ }
{ _M_char_set.push_back(__c); }
void
_M_add_collating_element(const _StringT& __s)
{ }
{
auto __st = _M_traits.lookup_collatename(&*__s.begin(), &*__s.end());
if (__st.empty())
__throw_regex_error(regex_constants::error_collate);
// TODO: digraph
_M_char_set.push_back(__st[0]);
}
void
_M_add_equivalence_class(const _StringT& __s)
{ }
{
_M_add_character_class(
_M_traits.transform_primary(&*__s.begin(), &*__s.end()));
}
void
_M_add_character_class(const _StringT& __s)
{ }
{
auto __st = _M_traits.lookup_classname(
&*__s.begin(), &*__s.end(), (_M_flags & regex_constants::icase));
if (__st == 0)
__throw_regex_error(regex_constants::error_ctype);
_M_class_set |= __st;
}
void
_M_make_range()
{ }
_M_make_range(_CharT __l, _CharT __r)
{
if (!this->_M_le(__l, __r))
__throw_regex_error(regex_constants::error_range);
_M_range_set.push_back(make_pair(__l, __r));
}
const _TraitsT& _M_traits;
bool _M_is_non_matching;
_FlagT _M_flags;
_TraitsT _M_traits;
bool _M_is_non_matching;
std::vector<_CharT> _M_char_set;
std::vector<pair<_CharT, _CharT>> _M_range_set;
_CharClassT _M_class_set;
};
/// Identifies a state in the NFA.
......
......@@ -35,6 +35,64 @@ namespace __detail
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION
template<typename _InIterT, typename _TraitsT>
bool _BracketMatcher<_InIterT, _TraitsT>::
operator()(const _PatternCursor& __pc) const
{
typedef const _SpecializedCursor<_InIterT>& _CursorT;
_CursorT __c = static_cast<_CursorT>(__pc);
_CharT __ch = __c._M_current();
bool __ret = false;
for (auto __c : _M_char_set)
if (this->_M_equ(__c, __ch))
{
__ret = true;
break;
}
if (!__ret && _M_traits.isctype(__ch, _M_class_set))
__ret = true;
else
{
for (auto& __it : _M_range_set)
if (this->_M_le(__it.first, __ch) && this->_M_le(__ch, __it.second))
{
__ret = true;
break;
}
}
if (_M_is_non_matching)
__ret = !__ret;
return __ret;
}
template<typename _InIterT, typename _TraitsT>
bool _Comparator<_InIterT, _TraitsT>::
_M_equ(_CharT __a, _CharT __b) const
{
if (_M_flags & regex_constants::icase)
return _M_traits.translate_nocase(__a)
== _M_traits.translate_nocase(__b);
if (_M_flags & regex_constants::collate)
return _M_traits.translate(__a) == _M_traits.translate(__b);
return __a == __b;
}
template<typename _InIterT, typename _TraitsT>
bool _Comparator<_InIterT, _TraitsT>::
_M_le(_CharT __a, _CharT __b) const
{
_StringT __str1 = _StringT(1,
_M_flags & regex_constants::icase
? _M_traits.translate_nocase(__a)
: _M_traits.translate(__a));
_StringT __str2 = _StringT(1,
_M_flags & regex_constants::icase
? _M_traits.translate_nocase(__b)
: _M_traits.translate(__b));
return _M_traits.transform(__str1.begin(), __str1.end())
<= _M_traits.transform(__str2.begin(), __str2.end());
}
#ifdef _GLIBCXX_DEBUG
inline std::ostream& _State::
_M_print(std::ostream& ostr) const
......
......@@ -37,7 +37,7 @@ test01()
std::string target("zxcv/onetwoabc");
std::smatch m;
VERIFY( std::regex_search(target, m, re) );
VERIFY( std::regex_match(target, m, re) );
VERIFY( m.size() == 2 );
VERIFY( m[0].matched == true );
VERIFY( std::string(m[0].first, m[0].second) == "zxcv/onetwoabc" );
......@@ -50,7 +50,7 @@ test01()
std::string target("zxcv/onetwoabc");
std::smatch m;
VERIFY( std::regex_search(target, m, re) );
VERIFY( std::regex_match(target, m, re) );
VERIFY( m.size() == 3 );
VERIFY( m[0].matched == true );
VERIFY( std::string(m[0].first, m[0].second) == "zxcv/onetwoabc" );
......
// { dg-options "-std=gnu++11" }
//
// 2013-08-01 Tim Shen <timshen91@gmail.com>
//
// Copyright (C) 2013 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with this library; see the file COPYING3. If not see
// <http://www.gnu.org/licenses/>.
// 28.11.2 regex_match
// Tests Extended bracket expression against a C-string.
#include <regex>
#include <testsuite_hooks.h>
void
test01()
{
bool test __attribute__((unused)) = true;
{
std::regex re("pre/[za-x]", std::regex::extended);
VERIFY( std::regex_match("pre/z", re) );
VERIFY( std::regex_match("pre/a", re) );
VERIFY( !std::regex_match("pre/y", re) );
}
{
std::regex re("pre/[[:uPPer:]]", std::regex::extended);
VERIFY( std::regex_match("pre/Z", re) );
VERIFY( !std::regex_match("pre/_", re) );
VERIFY( !std::regex_match("pre/a", re) );
VERIFY( !std::regex_match("pre/0", re) );
}
{
std::regex re("pre/[[:lOWer:]]", std::regex::extended | std::regex::icase);
VERIFY( std::regex_match("pre/Z", re) );
VERIFY( std::regex_match("pre/a", re) );
}
{
std::regex re("pre/[[:w:][.tilde.]]", std::regex::extended);
VERIFY( std::regex_match("pre/~", re) );
VERIFY( std::regex_match("pre/_", re) );
VERIFY( std::regex_match("pre/a", re) );
VERIFY( std::regex_match("pre/0", re) );
}
}
int
main()
{
test01();
return 0;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment