Commit 79b576cc by Tim Shen Committed by Tim Shen

re PR libstdc++/63775 ([C++11] Regex range with leading dash (-) not working)

	PR libstdc++/63775
	* include/bits/regex_compiler.h (_Compiler<>::_M_expression_term,
	_BracketMatcher<>::_M_make_range): Throw regex_erorr on invalid range
	like [z-a]. Change _M_expression_term interface.
	* include/bits/regex_compiler.tcc (
	_Compiler<>::_M_insert_bracket_matcher,
	_Compiler<>::_M_expression_term): Rewrite bracket expression parsing.
	* testsuite/28_regex/algorithms/regex_match/cstring_bracket_01.cc:
	Add testcases and move file out of extended.

From-SVN: r217461
parent 0a134b2a
2014-11-13 Tim Shen <timshen@google.com>
PR libstdc++/63775
* include/bits/regex_compiler.h (_Compiler<>::_M_expression_term,
_BracketMatcher<>::_M_make_range): Throw regex_erorr on invalid range
like [z-a]. Change _M_expression_term interface.
* include/bits/regex_compiler.tcc (
_Compiler<>::_M_insert_bracket_matcher,
_Compiler<>::_M_expression_term): Rewrite bracket expression parsing.
* testsuite/28_regex/algorithms/regex_match/cstring_bracket_01.cc:
Add testcases and move file out of extended.
2014-11-12 Jonathan Wakely <jwakely@redhat.com> 2014-11-12 Jonathan Wakely <jwakely@redhat.com>
PR libstdc++/57250 PR libstdc++/57250
......
...@@ -118,7 +118,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -118,7 +118,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
template<bool __icase, bool __collate> template<bool __icase, bool __collate>
void void
_M_expression_term(_BracketMatcher<_TraitsT, __icase, __collate>& _M_expression_term(pair<bool, _CharT>& __last_char,
_BracketMatcher<_TraitsT, __icase, __collate>&
__matcher); __matcher);
int int
...@@ -390,6 +391,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -390,6 +391,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
void void
_M_make_range(_CharT __l, _CharT __r) _M_make_range(_CharT __l, _CharT __r)
{ {
if (__l > __r)
__throw_regex_error(regex_constants::error_range);
_M_range_set.push_back(make_pair(_M_translator._M_transform(__l), _M_range_set.push_back(make_pair(_M_translator._M_transform(__l),
_M_translator._M_transform(__r))); _M_translator._M_transform(__r)));
#ifdef _GLIBCXX_DEBUG #ifdef _GLIBCXX_DEBUG
......
...@@ -415,8 +415,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -415,8 +415,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_insert_bracket_matcher(bool __neg) _M_insert_bracket_matcher(bool __neg)
{ {
_BracketMatcher<_TraitsT, __icase, __collate> __matcher(__neg, _M_traits); _BracketMatcher<_TraitsT, __icase, __collate> __matcher(__neg, _M_traits);
pair<bool, _CharT> __last_char; // Optional<_CharT>
__last_char.first = false;
if (!(_M_flags & regex_constants::ECMAScript))
if (_M_try_char())
{
__matcher._M_add_char(_M_value[0]);
__last_char.first = true;
__last_char.second = _M_value[0];
}
while (!_M_match_token(_ScannerT::_S_token_bracket_end)) while (!_M_match_token(_ScannerT::_S_token_bracket_end))
_M_expression_term(__matcher); _M_expression_term(__last_char, __matcher);
__matcher._M_ready(); __matcher._M_ready();
_M_stack.push(_StateSeqT( _M_stack.push(_StateSeqT(
*_M_nfa, *_M_nfa,
...@@ -427,7 +436,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -427,7 +436,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
template<bool __icase, bool __collate> template<bool __icase, bool __collate>
void void
_Compiler<_TraitsT>:: _Compiler<_TraitsT>::
_M_expression_term(_BracketMatcher<_TraitsT, __icase, __collate>& __matcher) _M_expression_term(pair<bool, _CharT>& __last_char,
_BracketMatcher<_TraitsT, __icase, __collate>& __matcher)
{ {
if (_M_match_token(_ScannerT::_S_token_collsymbol)) if (_M_match_token(_ScannerT::_S_token_collsymbol))
__matcher._M_add_collating_element(_M_value); __matcher._M_add_collating_element(_M_value);
...@@ -435,27 +445,50 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -435,27 +445,50 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__matcher._M_add_equivalence_class(_M_value); __matcher._M_add_equivalence_class(_M_value);
else if (_M_match_token(_ScannerT::_S_token_char_class_name)) else if (_M_match_token(_ScannerT::_S_token_char_class_name))
__matcher._M_add_character_class(_M_value, false); __matcher._M_add_character_class(_M_value, false);
else if (_M_try_char()) // [a // POSIX doesn't permit '-' as a start-range char (say [a-z--0]),
// except when the '-' is the first character in the bracket expression
// ([--0]). ECMAScript treats all '-' after a range as a normal character.
// Also see above, where _M_expression_term gets called.
//
// As a result, POSIX rejects [-----], but ECMAScript doesn't.
// Boost (1.57.0) always uses POSIX style even in its ECMAScript syntax.
// Clang (3.5) always uses ECMAScript style even in its POSIX syntax.
//
// It turns out that no one reads BNFs ;)
else if (_M_try_char())
{ {
auto __ch = _M_value[0]; if (!__last_char.first)
if (_M_try_char()) {
if (_M_value[0] == '-'
&& !(_M_flags & regex_constants::ECMAScript))
__throw_regex_error(regex_constants::error_range);
__matcher._M_add_char(_M_value[0]);
__last_char.first = true;
__last_char.second = _M_value[0];
}
else
{ {
if (_M_value[0] == '-') // [a- if (_M_value[0] == '-')
{ {
if (_M_try_char()) // [a-z] if (_M_try_char())
{ {
__matcher._M_make_range(__ch, _M_value[0]); __matcher._M_make_range(__last_char.second , _M_value[0]);
return; __last_char.first = false;
} }
// If the dash is the last character in the bracket else
// expression, it is not special. {
if (_M_scanner._M_get_token() if (_M_scanner._M_get_token()
!= _ScannerT::_S_token_bracket_end) != _ScannerT::_S_token_bracket_end)
__throw_regex_error(regex_constants::error_range); __throw_regex_error(regex_constants::error_range);
__matcher._M_add_char(_M_value[0]);
}
} }
else
{
__matcher._M_add_char(_M_value[0]); __matcher._M_add_char(_M_value[0]);
__last_char.second = _M_value[0];
}
} }
__matcher._M_add_char(__ch);
} }
else if (_M_match_token(_ScannerT::_S_token_quoted_class)) else if (_M_match_token(_ScannerT::_S_token_quoted_class))
__matcher._M_add_character_class(_M_value, __matcher._M_add_character_class(_M_value,
......
...@@ -67,9 +67,60 @@ test01() ...@@ -67,9 +67,60 @@ test01()
} }
} }
void
test02()
{
bool test __attribute__((unused)) = true;
try
{
std::regex re("[-----]", std::regex::extended);
VERIFY(false);
}
catch (const std::regex_error& e)
{
VERIFY(e.code() == std::regex_constants::error_range);
}
std::regex re("[-----]", std::regex::ECMAScript);
}
void
test03()
{
bool test __attribute__((unused)) = true;
try
{
std::regex re("[z-a]", std::regex::extended);
VERIFY(false);
}
catch (const std::regex_error& e)
{
VERIFY(e.code() == std::regex_constants::error_range);
}
}
void
test04()
{
bool test __attribute__((unused)) = true;
std::regex re("[-0-9a-z]");
VERIFY(regex_match_debug("-", re));
VERIFY(regex_match_debug("1", re));
VERIFY(regex_match_debug("w", re));
re.assign("[-0-9a-z]", regex_constants::basic);
VERIFY(regex_match_debug("-", re));
VERIFY(regex_match_debug("1", re));
VERIFY(regex_match_debug("w", re));
}
int int
main() main()
{ {
test01(); test01();
test02();
test03();
test04();
return 0; return 0;
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment