Commit 974afa58 by Tim Shen Committed by Tim Shen

re PR libstdc++/71500 (regex::icase only works on first character in a range)

	PR libstdc++/71500
	* include/bits/regex.h (basic_regex::basic_regex): Use ECMAScript
	when the syntax is not specified.
	* include/bits/regex_compiler.h (_RegexTranslator,
	_RegexTranslatorBase): Partially support icase in ranges.
	* include/bits/regex_compiler.tcc (_BracketMatcher::_M_apply):
	Refactor _M_apply to make the control flow easier to follow, and
	call _M_translator._M_match_range as added previously.
	* testsuite/28_regex/traits/char/icase.cc: Add new tests.
	* testsuite/28_regex/traits/char/user_defined.cc: Add new tests.

From-SVN: r243093
parent 0f091278
2016-11-31 Tim Shen <timshen@google.com>
PR libstdc++/71500
* include/bits/regex.h (basic_regex::basic_regex): Use ECMAScript
when the syntax is not specified.
* include/bits/regex_compiler.h (_RegexTranslator,
_RegexTranslatorBase): Partially support icase in ranges.
* include/bits/regex_compiler.tcc (_BracketMatcher::_M_apply):
Refactor _M_apply to make the control flow easier to follow, and
call _M_translator._M_match_range as added previously.
* testsuite/28_regex/traits/char/icase.cc: Add new tests.
* testsuite/28_regex/traits/char/user_defined.cc: Add new tests.
2016-11-30 Ville Voutilainen <ville.voutilainen@gmail.com> 2016-11-30 Ville Voutilainen <ville.voutilainen@gmail.com>
Fix testsuite failures caused by the patch implementing LWG 2534. Fix testsuite failures caused by the patch implementing LWG 2534.
......
...@@ -762,7 +762,9 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11 ...@@ -762,7 +762,9 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
template<typename _FwdIter> template<typename _FwdIter>
basic_regex(_FwdIter __first, _FwdIter __last, locale_type __loc, basic_regex(_FwdIter __first, _FwdIter __last, locale_type __loc,
flag_type __f) flag_type __f)
: _M_flags(__f), _M_loc(std::move(__loc)), : _M_flags((__f & (ECMAScript | basic | extended | awk | grep | egrep))
? __f : (__f | ECMAScript)),
_M_loc(std::move(__loc)),
_M_automaton(__detail::__compile_nfa<_FwdIter, _Rx_traits>( _M_automaton(__detail::__compile_nfa<_FwdIter, _Rx_traits>(
std::move(__first), std::move(__last), _M_loc, _M_flags)) std::move(__first), std::move(__last), _M_loc, _M_flags))
{ } { }
......
...@@ -30,6 +30,15 @@ ...@@ -30,6 +30,15 @@
namespace std _GLIBCXX_VISIBILITY(default) namespace std _GLIBCXX_VISIBILITY(default)
{ {
_GLIBCXX_BEGIN_NAMESPACE_VERSION
_GLIBCXX_BEGIN_NAMESPACE_CXX11
template<typename>
class regex_traits;
_GLIBCXX_END_NAMESPACE_CXX11
_GLIBCXX_END_NAMESPACE_VERSION
namespace __detail namespace __detail
{ {
_GLIBCXX_BEGIN_NAMESPACE_VERSION _GLIBCXX_BEGIN_NAMESPACE_VERSION
...@@ -207,17 +216,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -207,17 +216,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
// [28.13.14] // [28.13.14]
template<typename _TraitsT, bool __icase, bool __collate> template<typename _TraitsT, bool __icase, bool __collate>
class _RegexTranslator class _RegexTranslatorBase
{ {
public: public:
typedef typename _TraitsT::char_type _CharT; typedef typename _TraitsT::char_type _CharT;
typedef typename _TraitsT::string_type _StringT; typedef typename _TraitsT::string_type _StringT;
typedef typename std::conditional<__collate, typedef _StringT _StrTransT;
_StringT,
_CharT>::type _StrTransT;
explicit explicit
_RegexTranslator(const _TraitsT& __traits) _RegexTranslatorBase(const _TraitsT& __traits)
: _M_traits(__traits) : _M_traits(__traits)
{ } { }
...@@ -235,23 +242,86 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -235,23 +242,86 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_StrTransT _StrTransT
_M_transform(_CharT __ch) const _M_transform(_CharT __ch) const
{ {
return _M_transform_impl(__ch, typename integral_constant<bool, _StrTransT __str(1, __ch);
__collate>::type()); return _M_traits.transform(__str.begin(), __str.end());
} }
private: // See LWG 523. It's not efficiently implementable when _TraitsT is not
// std::regex_traits<>, and __collate is true. See specializations for
// implementations of other cases.
bool
_M_match_range(const _StrTransT& __first, const _StrTransT& __last,
const _StrTransT& __s) const
{ return __first <= __s && __s <= __last; }
protected:
bool _M_in_range_icase(_CharT __first, _CharT __last, _CharT __ch) const
{
typedef std::ctype<_CharT> __ctype_type;
const auto& __fctyp = use_facet<__ctype_type>(this->_M_traits.getloc());
auto __lower = __fctyp.tolower(__ch);
auto __upper = __fctyp.toupper(__ch);
return (__first <= __lower && __lower <= __last)
|| (__first <= __upper && __upper <= __last);
}
const _TraitsT& _M_traits;
};
template<typename _TraitsT, bool __icase, bool __collate>
class _RegexTranslator
: public _RegexTranslatorBase<_TraitsT, __icase, __collate>
{
public:
typedef _RegexTranslatorBase<_TraitsT, __icase, __collate> _Base;
using _Base::_Base;
};
template<typename _TraitsT, bool __icase>
class _RegexTranslator<_TraitsT, __icase, false>
: public _RegexTranslatorBase<_TraitsT, __icase, false>
{
public:
typedef _RegexTranslatorBase<_TraitsT, __icase, false> _Base;
typedef typename _Base::_CharT _CharT;
typedef _CharT _StrTransT;
using _Base::_Base;
_StrTransT _StrTransT
_M_transform_impl(_CharT __ch, false_type) const _M_transform(_CharT __ch) const
{ return __ch; } { return __ch; }
_StrTransT bool
_M_transform_impl(_CharT __ch, true_type) const _M_match_range(_CharT __first, _CharT __last, _CharT __ch) const
{ {
_StrTransT __str = _StrTransT(1, _M_translate(__ch)); if (!__icase)
return _M_traits.transform(__str.begin(), __str.end()); return __first <= __ch && __ch <= __last;
return this->_M_in_range_icase(__first, __last, __ch);
} }
};
const _TraitsT& _M_traits; template<typename _CharType>
class _RegexTranslator<std::regex_traits<_CharType>, true, true>
: public _RegexTranslatorBase<std::regex_traits<_CharType>, true, true>
{
public:
typedef _RegexTranslatorBase<std::regex_traits<_CharType>, true, true>
_Base;
typedef typename _Base::_CharT _CharT;
typedef typename _Base::_StrTransT _StrTransT;
using _Base::_Base;
bool
_M_match_range(const _StrTransT& __first, const _StrTransT& __last,
const _StrTransT& __str) const
{
__glibcxx_assert(__first.size() == 1);
__glibcxx_assert(__last.size() == 1);
__glibcxx_assert(__str.size() == 1);
return this->_M_in_range_icase(__first[0], __last[0], __str[0]);
}
}; };
template<typename _TraitsT> template<typename _TraitsT>
...@@ -272,6 +342,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -272,6 +342,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_StrTransT _StrTransT
_M_transform(_CharT __ch) const _M_transform(_CharT __ch) const
{ return __ch; } { return __ch; }
bool
_M_match_range(_CharT __first, _CharT __last, _CharT __ch) const
{ return __first <= __ch && __ch <= __last; }
}; };
template<typename _TraitsT, bool __is_ecma, bool __icase, bool __collate> template<typename _TraitsT, bool __is_ecma, bool __icase, bool __collate>
......
...@@ -612,37 +612,26 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -612,37 +612,26 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_BracketMatcher<_TraitsT, __icase, __collate>:: _BracketMatcher<_TraitsT, __icase, __collate>::
_M_apply(_CharT __ch, false_type) const _M_apply(_CharT __ch, false_type) const
{ {
bool __ret = std::binary_search(_M_char_set.begin(), _M_char_set.end(), return [this, __ch]
_M_translator._M_translate(__ch)); {
if (!__ret) if (std::binary_search(_M_char_set.begin(), _M_char_set.end(),
{ _M_translator._M_translate(__ch)))
auto __s = _M_translator._M_transform(__ch); return true;
for (auto& __it : _M_range_set) auto __s = _M_translator._M_transform(__ch);
if (__it.first <= __s && __s <= __it.second) for (auto& __it : _M_range_set)
{ if (_M_translator._M_match_range(__it.first, __it.second, __s))
__ret = true; return true;
break; if (_M_traits.isctype(__ch, _M_class_set))
} return true;
if (_M_traits.isctype(__ch, _M_class_set)) if (std::find(_M_equiv_set.begin(), _M_equiv_set.end(),
__ret = true; _M_traits.transform_primary(&__ch, &__ch+1))
else if (std::find(_M_equiv_set.begin(), _M_equiv_set.end(), != _M_equiv_set.end())
_M_traits.transform_primary(&__ch, &__ch+1)) return true;
!= _M_equiv_set.end()) for (auto& __it : _M_neg_class_set)
__ret = true; if (!_M_traits.isctype(__ch, __it))
else return true;
{ return false;
for (auto& __it : _M_neg_class_set) }() ^ _M_is_non_matching;
if (!_M_traits.isctype(__ch, __it))
{
__ret = true;
break;
}
}
}
if (_M_is_non_matching)
return !__ret;
else
return __ret;
} }
_GLIBCXX_END_NAMESPACE_VERSION _GLIBCXX_END_NAMESPACE_VERSION
......
// { dg-do run { target c++11 } }
//
// Copyright (C) 2016 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with this library; see the file COPYING3. If not see
// <http://www.gnu.org/licenses/>.
// 28.7 Class template regex_traits [re.traits]
#include <regex>
#include <testsuite_hooks.h>
using namespace std;
void
test01()
{
{
regex re("[T-f]", regex::icase);
VERIFY(regex_match("A", re));
VERIFY(regex_match("F", re));
VERIFY(regex_match("a", re));
VERIFY(regex_match("f", re));
VERIFY(!regex_match("G", re));
VERIFY(!regex_match("S", re));
VERIFY(!regex_match("g", re));
VERIFY(!regex_match("s", re));
VERIFY(regex_match("T", re));
VERIFY(regex_match("Z", re));
VERIFY(regex_match("t", re));
VERIFY(regex_match("z", re));
}
// icase works with std::regex_traits<>, because we know how it's implemented.
{
regex re("[T-f]", regex::icase | regex::collate);
VERIFY(regex_match("A", re));
VERIFY(regex_match("F", re));
VERIFY(regex_match("a", re));
VERIFY(regex_match("f", re));
VERIFY(!regex_match("G", re));
VERIFY(!regex_match("S", re));
VERIFY(!regex_match("g", re));
VERIFY(!regex_match("s", re));
VERIFY(regex_match("T", re));
VERIFY(regex_match("Z", re));
VERIFY(regex_match("t", re));
VERIFY(regex_match("z", re));
}
}
int main()
{
test01();
return 0;
}
...@@ -30,6 +30,9 @@ ...@@ -30,6 +30,9 @@
using namespace std; using namespace std;
bool called_transform = false;
bool called_nocase = false;
template<typename CharT> template<typename CharT>
class MyRegexTraits class MyRegexTraits
: public regex_traits<CharT> : public regex_traits<CharT>
...@@ -40,14 +43,71 @@ template<typename CharT> ...@@ -40,14 +43,71 @@ template<typename CharT>
{ {
return c+1; return c+1;
} }
CharT
translate_nocase(CharT c) const
{
called_nocase = true;
return regex_traits<CharT>::translate_nocase(c);
}
template<typename FwdIt>
basic_string<CharT>
transform(FwdIt begin, FwdIt end) const
{
called_transform = true;
return regex_traits<CharT>::transform(begin, end);
}
}; };
void void
test01() test01()
{ {
basic_regex<char, MyRegexTraits<char>> re("."); {
VERIFY(!regex_match("\n", re)); basic_regex<char, MyRegexTraits<char>> re(".");
VERIFY(!regex_match("\r", re)); VERIFY(!regex_match("\n", re));
VERIFY(!regex_match("\r", re));
}
{
VERIFY(!called_transform);
basic_regex<char, MyRegexTraits<char>> re("[a]", regex::collate);
VERIFY(regex_match("a", re));
VERIFY(exchange(called_transform, false));
}
{
VERIFY(!called_nocase);
basic_regex<char, MyRegexTraits<char>> re("[a]", regex::icase);
VERIFY(regex_match("A", re));
VERIFY(exchange(called_nocase, false));
}
{
basic_regex<char, MyRegexTraits<char>> re("[T-f]", regex::icase);
VERIFY(regex_match("A", re));
VERIFY(regex_match("F", re));
VERIFY(regex_match("a", re));
VERIFY(regex_match("f", re));
VERIFY(!regex_match("G", re));
VERIFY(!regex_match("S", re));
VERIFY(!regex_match("g", re));
VERIFY(!regex_match("s", re));
VERIFY(regex_match("T", re));
VERIFY(regex_match("Z", re));
VERIFY(regex_match("t", re));
VERIFY(regex_match("z", re));
}
// icase doesn't participate with the presence of collate and user-defined traits.
{
basic_regex<char, MyRegexTraits<char>> re("[T-f]", regex::icase | regex::collate);
VERIFY(!regex_match("A", re));
VERIFY(!regex_match("S", re));
VERIFY(regex_match("T", re));
VERIFY(regex_match("Z", re));
VERIFY(regex_match("a", re));
VERIFY(regex_match("f", re));
VERIFY(!regex_match("g", re));
}
} }
int main() int main()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment