Commit 974afa58 by Tim Shen Committed by Tim Shen

re PR libstdc++/71500 (regex::icase only works on first character in a range)

	PR libstdc++/71500
	* include/bits/regex.h (basic_regex::basic_regex): Use ECMAScript
	when the syntax is not specified.
	* include/bits/regex_compiler.h (_RegexTranslator,
	_RegexTranslatorBase): Partially support icase in ranges.
	* include/bits/regex_compiler.tcc (_BracketMatcher::_M_apply):
	Refactor _M_apply to make the control flow easier to follow, and
	call _M_translator._M_match_range as added previously.
	* testsuite/28_regex/traits/char/icase.cc: Add new tests.
	* testsuite/28_regex/traits/char/user_defined.cc: Add new tests.

From-SVN: r243093
parent 0f091278
2016-11-31 Tim Shen <timshen@google.com>
PR libstdc++/71500
* include/bits/regex.h (basic_regex::basic_regex): Use ECMAScript
when the syntax is not specified.
* include/bits/regex_compiler.h (_RegexTranslator,
_RegexTranslatorBase): Partially support icase in ranges.
* include/bits/regex_compiler.tcc (_BracketMatcher::_M_apply):
Refactor _M_apply to make the control flow easier to follow, and
call _M_translator._M_match_range as added previously.
* testsuite/28_regex/traits/char/icase.cc: Add new tests.
* testsuite/28_regex/traits/char/user_defined.cc: Add new tests.
2016-11-30 Ville Voutilainen <ville.voutilainen@gmail.com>
Fix testsuite failures caused by the patch implementing LWG 2534.
......
......@@ -762,7 +762,9 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
template<typename _FwdIter>
basic_regex(_FwdIter __first, _FwdIter __last, locale_type __loc,
flag_type __f)
: _M_flags(__f), _M_loc(std::move(__loc)),
: _M_flags((__f & (ECMAScript | basic | extended | awk | grep | egrep))
? __f : (__f | ECMAScript)),
_M_loc(std::move(__loc)),
_M_automaton(__detail::__compile_nfa<_FwdIter, _Rx_traits>(
std::move(__first), std::move(__last), _M_loc, _M_flags))
{ }
......
......@@ -30,6 +30,15 @@
namespace std _GLIBCXX_VISIBILITY(default)
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION
_GLIBCXX_BEGIN_NAMESPACE_CXX11
template<typename>
class regex_traits;
_GLIBCXX_END_NAMESPACE_CXX11
_GLIBCXX_END_NAMESPACE_VERSION
namespace __detail
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION
......@@ -207,17 +216,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
// [28.13.14]
template<typename _TraitsT, bool __icase, bool __collate>
class _RegexTranslator
class _RegexTranslatorBase
{
public:
typedef typename _TraitsT::char_type _CharT;
typedef typename _TraitsT::string_type _StringT;
typedef typename std::conditional<__collate,
_StringT,
_CharT>::type _StrTransT;
typedef _StringT _StrTransT;
explicit
_RegexTranslator(const _TraitsT& __traits)
_RegexTranslatorBase(const _TraitsT& __traits)
: _M_traits(__traits)
{ }
......@@ -235,23 +242,86 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_StrTransT
_M_transform(_CharT __ch) const
{
return _M_transform_impl(__ch, typename integral_constant<bool,
__collate>::type());
_StrTransT __str(1, __ch);
return _M_traits.transform(__str.begin(), __str.end());
}
private:
// See LWG 523. It's not efficiently implementable when _TraitsT is not
// std::regex_traits<>, and __collate is true. See specializations for
// implementations of other cases.
bool
_M_match_range(const _StrTransT& __first, const _StrTransT& __last,
const _StrTransT& __s) const
{ return __first <= __s && __s <= __last; }
protected:
bool _M_in_range_icase(_CharT __first, _CharT __last, _CharT __ch) const
{
typedef std::ctype<_CharT> __ctype_type;
const auto& __fctyp = use_facet<__ctype_type>(this->_M_traits.getloc());
auto __lower = __fctyp.tolower(__ch);
auto __upper = __fctyp.toupper(__ch);
return (__first <= __lower && __lower <= __last)
|| (__first <= __upper && __upper <= __last);
}
const _TraitsT& _M_traits;
};
template<typename _TraitsT, bool __icase, bool __collate>
class _RegexTranslator
: public _RegexTranslatorBase<_TraitsT, __icase, __collate>
{
public:
typedef _RegexTranslatorBase<_TraitsT, __icase, __collate> _Base;
using _Base::_Base;
};
template<typename _TraitsT, bool __icase>
class _RegexTranslator<_TraitsT, __icase, false>
: public _RegexTranslatorBase<_TraitsT, __icase, false>
{
public:
typedef _RegexTranslatorBase<_TraitsT, __icase, false> _Base;
typedef typename _Base::_CharT _CharT;
typedef _CharT _StrTransT;
using _Base::_Base;
_StrTransT
_M_transform_impl(_CharT __ch, false_type) const
_M_transform(_CharT __ch) const
{ return __ch; }
_StrTransT
_M_transform_impl(_CharT __ch, true_type) const
bool
_M_match_range(_CharT __first, _CharT __last, _CharT __ch) const
{
_StrTransT __str = _StrTransT(1, _M_translate(__ch));
return _M_traits.transform(__str.begin(), __str.end());
if (!__icase)
return __first <= __ch && __ch <= __last;
return this->_M_in_range_icase(__first, __last, __ch);
}
};
const _TraitsT& _M_traits;
template<typename _CharType>
class _RegexTranslator<std::regex_traits<_CharType>, true, true>
: public _RegexTranslatorBase<std::regex_traits<_CharType>, true, true>
{
public:
typedef _RegexTranslatorBase<std::regex_traits<_CharType>, true, true>
_Base;
typedef typename _Base::_CharT _CharT;
typedef typename _Base::_StrTransT _StrTransT;
using _Base::_Base;
bool
_M_match_range(const _StrTransT& __first, const _StrTransT& __last,
const _StrTransT& __str) const
{
__glibcxx_assert(__first.size() == 1);
__glibcxx_assert(__last.size() == 1);
__glibcxx_assert(__str.size() == 1);
return this->_M_in_range_icase(__first[0], __last[0], __str[0]);
}
};
template<typename _TraitsT>
......@@ -272,6 +342,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_StrTransT
_M_transform(_CharT __ch) const
{ return __ch; }
bool
_M_match_range(_CharT __first, _CharT __last, _CharT __ch) const
{ return __first <= __ch && __ch <= __last; }
};
template<typename _TraitsT, bool __is_ecma, bool __icase, bool __collate>
......
......@@ -612,37 +612,26 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_BracketMatcher<_TraitsT, __icase, __collate>::
_M_apply(_CharT __ch, false_type) const
{
bool __ret = std::binary_search(_M_char_set.begin(), _M_char_set.end(),
_M_translator._M_translate(__ch));
if (!__ret)
{
auto __s = _M_translator._M_transform(__ch);
for (auto& __it : _M_range_set)
if (__it.first <= __s && __s <= __it.second)
{
__ret = true;
break;
}
if (_M_traits.isctype(__ch, _M_class_set))
__ret = true;
else if (std::find(_M_equiv_set.begin(), _M_equiv_set.end(),
_M_traits.transform_primary(&__ch, &__ch+1))
!= _M_equiv_set.end())
__ret = true;
else
{
for (auto& __it : _M_neg_class_set)
if (!_M_traits.isctype(__ch, __it))
{
__ret = true;
break;
}
}
}
if (_M_is_non_matching)
return !__ret;
else
return __ret;
return [this, __ch]
{
if (std::binary_search(_M_char_set.begin(), _M_char_set.end(),
_M_translator._M_translate(__ch)))
return true;
auto __s = _M_translator._M_transform(__ch);
for (auto& __it : _M_range_set)
if (_M_translator._M_match_range(__it.first, __it.second, __s))
return true;
if (_M_traits.isctype(__ch, _M_class_set))
return true;
if (std::find(_M_equiv_set.begin(), _M_equiv_set.end(),
_M_traits.transform_primary(&__ch, &__ch+1))
!= _M_equiv_set.end())
return true;
for (auto& __it : _M_neg_class_set)
if (!_M_traits.isctype(__ch, __it))
return true;
return false;
}() ^ _M_is_non_matching;
}
_GLIBCXX_END_NAMESPACE_VERSION
......
// { dg-do run { target c++11 } }
//
// Copyright (C) 2016 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with this library; see the file COPYING3. If not see
// <http://www.gnu.org/licenses/>.
// 28.7 Class template regex_traits [re.traits]
#include <regex>
#include <testsuite_hooks.h>
using namespace std;
void
test01()
{
{
regex re("[T-f]", regex::icase);
VERIFY(regex_match("A", re));
VERIFY(regex_match("F", re));
VERIFY(regex_match("a", re));
VERIFY(regex_match("f", re));
VERIFY(!regex_match("G", re));
VERIFY(!regex_match("S", re));
VERIFY(!regex_match("g", re));
VERIFY(!regex_match("s", re));
VERIFY(regex_match("T", re));
VERIFY(regex_match("Z", re));
VERIFY(regex_match("t", re));
VERIFY(regex_match("z", re));
}
// icase works with std::regex_traits<>, because we know how it's implemented.
{
regex re("[T-f]", regex::icase | regex::collate);
VERIFY(regex_match("A", re));
VERIFY(regex_match("F", re));
VERIFY(regex_match("a", re));
VERIFY(regex_match("f", re));
VERIFY(!regex_match("G", re));
VERIFY(!regex_match("S", re));
VERIFY(!regex_match("g", re));
VERIFY(!regex_match("s", re));
VERIFY(regex_match("T", re));
VERIFY(regex_match("Z", re));
VERIFY(regex_match("t", re));
VERIFY(regex_match("z", re));
}
}
int main()
{
test01();
return 0;
}
......@@ -30,6 +30,9 @@
using namespace std;
bool called_transform = false;
bool called_nocase = false;
template<typename CharT>
class MyRegexTraits
: public regex_traits<CharT>
......@@ -40,14 +43,71 @@ template<typename CharT>
{
return c+1;
}
CharT
translate_nocase(CharT c) const
{
called_nocase = true;
return regex_traits<CharT>::translate_nocase(c);
}
template<typename FwdIt>
basic_string<CharT>
transform(FwdIt begin, FwdIt end) const
{
called_transform = true;
return regex_traits<CharT>::transform(begin, end);
}
};
void
test01()
{
basic_regex<char, MyRegexTraits<char>> re(".");
VERIFY(!regex_match("\n", re));
VERIFY(!regex_match("\r", re));
{
basic_regex<char, MyRegexTraits<char>> re(".");
VERIFY(!regex_match("\n", re));
VERIFY(!regex_match("\r", re));
}
{
VERIFY(!called_transform);
basic_regex<char, MyRegexTraits<char>> re("[a]", regex::collate);
VERIFY(regex_match("a", re));
VERIFY(exchange(called_transform, false));
}
{
VERIFY(!called_nocase);
basic_regex<char, MyRegexTraits<char>> re("[a]", regex::icase);
VERIFY(regex_match("A", re));
VERIFY(exchange(called_nocase, false));
}
{
basic_regex<char, MyRegexTraits<char>> re("[T-f]", regex::icase);
VERIFY(regex_match("A", re));
VERIFY(regex_match("F", re));
VERIFY(regex_match("a", re));
VERIFY(regex_match("f", re));
VERIFY(!regex_match("G", re));
VERIFY(!regex_match("S", re));
VERIFY(!regex_match("g", re));
VERIFY(!regex_match("s", re));
VERIFY(regex_match("T", re));
VERIFY(regex_match("Z", re));
VERIFY(regex_match("t", re));
VERIFY(regex_match("z", re));
}
// icase doesn't participate with the presence of collate and user-defined traits.
{
basic_regex<char, MyRegexTraits<char>> re("[T-f]", regex::icase | regex::collate);
VERIFY(!regex_match("A", re));
VERIFY(!regex_match("S", re));
VERIFY(regex_match("T", re));
VERIFY(regex_match("Z", re));
VERIFY(regex_match("a", re));
VERIFY(regex_match("f", re));
VERIFY(!regex_match("g", re));
}
}
int main()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment