Commit f9ce3c16 by Tim Shen Committed by Tim Shen

re PR libstdc++/67015 ("^[a-z0-9][a-z0-9-]*$", std::regex::extended is miscompiled)

	PR libstdc++/67015
	* include/bits/regex_compiler.h (_Compiler<>::_M_expression_term,
	_BracketMatcher<>::_M_add_collating_element): Change signature
	to make checking the and of bracket expression easier.
	* include/bits/regex_compiler.tcc (_Compiler<>::_M_expression_term):
	Treat '-' as a valid literal if it's at the end of bracket expression.
	* testsuite/28_regex/algorithms/regex_match/cstring_bracket_01.cc:
	New testcases.

From-SVN: r226336
parent 3d61d875
2015-07-29 Tim Shen <timshen@google.com>
PR libstdc++/67015
* include/bits/regex_compiler.h (_Compiler<>::_M_expression_term,
_BracketMatcher<>::_M_add_collating_element): Change signature
to make checking the and of bracket expression easier.
* include/bits/regex_compiler.tcc (_Compiler<>::_M_expression_term):
Treat '-' as a valid literal if it's at the end of bracket expression.
* testsuite/28_regex/algorithms/regex_match/cstring_bracket_01.cc:
New testcases.
2015-07-24 Jonathan Wakely <jwakely@redhat.com> 2015-07-24 Jonathan Wakely <jwakely@redhat.com>
* include/bits/atomic_futex.h [_GLIBCXX_HAVE_LINUX_FUTEX] * include/bits/atomic_futex.h [_GLIBCXX_HAVE_LINUX_FUTEX]
......
...@@ -116,8 +116,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -116,8 +116,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
void void
_M_insert_bracket_matcher(bool __neg); _M_insert_bracket_matcher(bool __neg);
// Returns true if successfully matched one term and should continue.
// Returns false if the compiler should move on.
template<bool __icase, bool __collate> template<bool __icase, bool __collate>
void bool
_M_expression_term(pair<bool, _CharT>& __last_char, _M_expression_term(pair<bool, _CharT>& __last_char,
_BracketMatcher<_TraitsT, __icase, __collate>& _BracketMatcher<_TraitsT, __icase, __collate>&
__matcher); __matcher);
...@@ -389,8 +391,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -389,8 +391,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
#endif #endif
} }
void _StringT
_M_add_collating_element(const _StringT& __s) _M_add_collate_element(const _StringT& __s)
{ {
auto __st = _M_traits.lookup_collatename(__s.data(), auto __st = _M_traits.lookup_collatename(__s.data(),
__s.data() + __s.size()); __s.data() + __s.size());
...@@ -400,6 +402,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -400,6 +402,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
#ifdef _GLIBCXX_DEBUG #ifdef _GLIBCXX_DEBUG
_M_is_ready = false; _M_is_ready = false;
#endif #endif
return __st;
} }
void void
......
...@@ -424,8 +424,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -424,8 +424,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__last_char.first = true; __last_char.first = true;
__last_char.second = _M_value[0]; __last_char.second = _M_value[0];
} }
while (!_M_match_token(_ScannerT::_S_token_bracket_end)) while (_M_expression_term(__last_char, __matcher));
_M_expression_term(__last_char, __matcher);
__matcher._M_ready(); __matcher._M_ready();
_M_stack.push(_StateSeqT( _M_stack.push(_StateSeqT(
*_M_nfa, *_M_nfa,
...@@ -434,21 +433,31 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -434,21 +433,31 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
template<typename _TraitsT> template<typename _TraitsT>
template<bool __icase, bool __collate> template<bool __icase, bool __collate>
void bool
_Compiler<_TraitsT>:: _Compiler<_TraitsT>::
_M_expression_term(pair<bool, _CharT>& __last_char, _M_expression_term(pair<bool, _CharT>& __last_char,
_BracketMatcher<_TraitsT, __icase, __collate>& __matcher) _BracketMatcher<_TraitsT, __icase, __collate>& __matcher)
{ {
if (_M_match_token(_ScannerT::_S_token_bracket_end))
return false;
if (_M_match_token(_ScannerT::_S_token_collsymbol)) if (_M_match_token(_ScannerT::_S_token_collsymbol))
__matcher._M_add_collating_element(_M_value); {
auto __symbol = __matcher._M_add_collate_element(_M_value);
if (__symbol.size() == 1)
{
__last_char.first = true;
__last_char.second = __symbol[0];
}
}
else if (_M_match_token(_ScannerT::_S_token_equiv_class_name)) else if (_M_match_token(_ScannerT::_S_token_equiv_class_name))
__matcher._M_add_equivalence_class(_M_value); __matcher._M_add_equivalence_class(_M_value);
else if (_M_match_token(_ScannerT::_S_token_char_class_name)) else if (_M_match_token(_ScannerT::_S_token_char_class_name))
__matcher._M_add_character_class(_M_value, false); __matcher._M_add_character_class(_M_value, false);
// POSIX doesn't permit '-' as a start-range char (say [a-z--0]), // POSIX doesn't allow '-' as a start-range char (say [a-z--0]),
// except when the '-' is the first character in the bracket expression // except when the '-' is the first or last character in the bracket
// ([--0]). ECMAScript treats all '-' after a range as a normal character. // expression ([--0]). ECMAScript treats all '-' after a range as a
// Also see above, where _M_expression_term gets called. // normal character. Also see above, where _M_expression_term gets called.
// //
// As a result, POSIX rejects [-----], but ECMAScript doesn't. // As a result, POSIX rejects [-----], but ECMAScript doesn't.
// Boost (1.57.0) always uses POSIX style even in its ECMAScript syntax. // Boost (1.57.0) always uses POSIX style even in its ECMAScript syntax.
...@@ -459,10 +468,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -459,10 +468,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{ {
if (!__last_char.first) if (!__last_char.first)
{ {
__matcher._M_add_char(_M_value[0]);
if (_M_value[0] == '-' if (_M_value[0] == '-'
&& !(_M_flags & regex_constants::ECMAScript)) && !(_M_flags & regex_constants::ECMAScript))
{
if (_M_match_token(_ScannerT::_S_token_bracket_end))
return false;
__throw_regex_error(regex_constants::error_range); __throw_regex_error(regex_constants::error_range);
__matcher._M_add_char(_M_value[0]); }
__last_char.first = true; __last_char.first = true;
__last_char.second = _M_value[0]; __last_char.second = _M_value[0];
} }
...@@ -496,6 +509,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -496,6 +509,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_value[0])); _M_value[0]));
else else
__throw_regex_error(regex_constants::error_brack); __throw_regex_error(regex_constants::error_brack);
return true;
} }
template<typename _TraitsT> template<typename _TraitsT>
......
...@@ -82,6 +82,22 @@ test02() ...@@ -82,6 +82,22 @@ test02()
VERIFY(e.code() == std::regex_constants::error_range); VERIFY(e.code() == std::regex_constants::error_range);
} }
std::regex re("[-----]", std::regex::ECMAScript); std::regex re("[-----]", std::regex::ECMAScript);
VERIFY(!regex_match("b", regex("[-ac]", regex_constants::extended)));
VERIFY(!regex_match("b", regex("[ac-]", regex_constants::extended)));
VERIFY(regex_match("b", regex("[^-ac]", regex_constants::extended)));
VERIFY(regex_match("b", regex("[^ac-]", regex_constants::extended)));
VERIFY(regex_match("&", regex("[%--]", regex_constants::extended)));
VERIFY(regex_match(".", regex("[--@]", regex_constants::extended)));
try
{
regex("[a--@]", regex_constants::extended);
VERIFY(false);
}
catch (const std::regex_error& e)
{
}
VERIFY(regex_match("].", regex("[][.hyphen.]-0]*", regex_constants::extended)));
} }
void void
...@@ -115,6 +131,44 @@ test04() ...@@ -115,6 +131,44 @@ test04()
VERIFY(regex_match_debug("w", re)); VERIFY(regex_match_debug("w", re));
} }
// libstdc++/67015
void
test05()
{
bool test __attribute__((unused)) = true;
regex lanana_namespace("^[a-z0-9]+$", regex::extended);
regex lsb_namespace("^_?([a-z0-9_.]+-, regex::extended)+[a-z0-9]+$");
regex debian_dpkg_conffile_cruft("dpkg-(old|dist|new|tmp, regex::extended)$");
regex debian_cron_namespace("^[a-z0-9][a-z0-9-]*$", regex::extended);
VERIFY(regex_match("test", debian_cron_namespace));
VERIFY(!regex_match("-a", debian_cron_namespace));
VERIFY(regex_match("a-", debian_cron_namespace));
regex debian_cron_namespace_ok("^[a-z0-9][-a-z0-9]*$", regex::extended);
VERIFY(regex_match("test", debian_cron_namespace_ok));
VERIFY(!regex_match("-a", debian_cron_namespace_ok));
VERIFY(regex_match("a-", debian_cron_namespace_ok));
}
// libstdc++/67015
void
test06()
{
bool test __attribute__((unused)) = true;
regex lanana_namespace("^[a-z0-9]+$");
regex lsb_namespace("^_?([a-z0-9_.]+-)+[a-z0-9]+$");
regex debian_dpkg_conffile_cruft("dpkg-(old|dist|new|tmp)$");
regex debian_cron_namespace("^[a-z0-9][a-z0-9-]*$");
VERIFY(regex_match("test", debian_cron_namespace));
VERIFY(!regex_match("-a", debian_cron_namespace));
VERIFY(regex_match("a-", debian_cron_namespace));
regex debian_cron_namespace_ok("^[a-z0-9][-a-z0-9]*$");
VERIFY(regex_match("test", debian_cron_namespace_ok));
VERIFY(!regex_match("-a", debian_cron_namespace_ok));
VERIFY(regex_match("a-", debian_cron_namespace_ok));
}
int int
main() main()
{ {
...@@ -122,5 +176,8 @@ main() ...@@ -122,5 +176,8 @@ main()
test02(); test02();
test03(); test03();
test04(); test04();
test05();
test06();
return 0; return 0;
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment