Commit 7b86458e by Tim Shen Committed by Tim Shen

regex.h (regex_match<>, [...]): Change regex_executor caller.

2013-09-14  Tim Shen  <timshen91@gmail.com>

	* include/bits/regex.h (regex_match<>, regex_search<>):
	Change regex_executor caller. Now use their return value instead
	of checking __m[0].matched to find out if it's successful.
	(regex_search<>): Move the search logic to regex_executor.
	* include/bits/regex_automaton.h: Add some new _Opcode. Refactor
	_NFA::_M_insert_*.
	* include/bits/regex_automaton.tcc: Add DEBUG dump for new
	_Opcode. Refactor _NFA::_M_insert_*.
	* include/bits/regex_compiler.h (_Compiler<>::_M_get_nfa):
	Use make_shared instead of construct by hand.
	* include/bits/regex_compiler.tcc: Implement _Compiler<>::_M_assertion.
	* include/bits/regex_constants.h: Fix indentation and line breaking.
	* include/bits/regex_executor.h: Add _ResultsEntry to support
	greedy/ungreedy mode. Move regex_search logic here.
	* include/bits/regex_executor.tcc: Implement assertions and
	greedy/ungreedy matching.
	* include/bits/regex_scanner.h: Add a new token _S_token_ungreedy.
	* include/bits/regex_scanner.tcc: Parse a new token _S_token_ungreedy.
	* testsuite/28_regex/algorithms/regex_search/ecma/assertion.cc: New.
	* testsuite/28_regex/algorithms/regex_search/ecma/greedy.cc: New.
	* testsuite/28_regex/algorithms/regex_search/ecma/string_01.cc:
	Fix comment.

From-SVN: r202591
parent 492d1e0a
2013-09-14 Tim Shen <timshen91@gmail.com>
* include/bits/regex.h (regex_match<>, regex_search<>):
Change regex_executor caller. Now use their return value instead
of checking __m[0].matched to find out if it's successful.
(regex_search<>): Move the search logic to regex_executor.
* include/bits/regex_automaton.h: Add some new _Opcode. Refactor
_NFA::_M_insert_*.
* include/bits/regex_automaton.tcc: Add DEBUG dump for new
_Opcode. Refactor _NFA::_M_insert_*.
* include/bits/regex_compiler.h (_Compiler<>::_M_get_nfa):
Use make_shared instead of construct by hand.
* include/bits/regex_compiler.tcc: Implement _Compiler<>::_M_assertion.
* include/bits/regex_constants.h: Fix indentation and line breaking.
* include/bits/regex_executor.h: Add _ResultsEntry to support
greedy/ungreedy mode. Move regex_search logic here.
* include/bits/regex_executor.tcc: Implement assertions and
greedy/ungreedy matching.
* include/bits/regex_scanner.h: Add a new token _S_token_ungreedy.
* include/bits/regex_scanner.tcc: Parse a new token _S_token_ungreedy.
* testsuite/28_regex/algorithms/regex_search/ecma/assertion.cc: New.
* testsuite/28_regex/algorithms/regex_search/ecma/greedy.cc: New.
* testsuite/28_regex/algorithms/regex_search/ecma/string_01.cc:
Fix comment.
2013-09-13 Paolo Carlini <paolo.carlini@oracle.com> 2013-09-13 Paolo Carlini <paolo.carlini@oracle.com>
PR libstdc++/58415 PR libstdc++/58415
......
...@@ -2106,14 +2106,16 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -2106,14 +2106,16 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
template<typename, typename, typename, typename> template<typename, typename, typename, typename>
friend class __detail::_BFSExecutor; friend class __detail::_BFSExecutor;
template<typename _Bp, typename _Ap, typename _Ch_type, typename _Rx_traits> template<typename _Bp, typename _Ap,
typename _Ch_type, typename _Rx_traits>
friend bool friend bool
regex_match(_Bp, _Bp, match_results<_Bp, _Ap>&, regex_match(_Bp, _Bp, match_results<_Bp, _Ap>&,
const basic_regex<_Ch_type, const basic_regex<_Ch_type,
_Rx_traits>&, _Rx_traits>&,
regex_constants::match_flag_type); regex_constants::match_flag_type);
template<typename _Bp, typename _Ap, typename _Ch_type, typename _Rx_traits> template<typename _Bp, typename _Ap,
typename _Ch_type, typename _Rx_traits>
friend bool friend bool
regex_search(_Bp, _Bp, match_results<_Bp, _Ap>&, regex_search(_Bp, _Bp, match_results<_Bp, _Ap>&,
const basic_regex<_Ch_type, const basic_regex<_Ch_type,
...@@ -2213,8 +2215,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -2213,8 +2215,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{ {
if (__re._M_automaton == nullptr) if (__re._M_automaton == nullptr)
return false; return false;
__detail::__get_executor(__s, __e, __m, __re, __flags)->_M_match(); if (__detail::__get_executor(__s, __e, __m, __re, __flags)->_M_match())
if (__m.size() > 0 && __m[0].matched)
{ {
for (auto __it : __m) for (auto __it : __m)
if (!__it.matched) if (!__it.matched)
...@@ -2373,29 +2374,22 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -2373,29 +2374,22 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{ {
if (__re._M_automaton == nullptr) if (__re._M_automaton == nullptr)
return false; return false;
auto __cur = __first; if (__detail::__get_executor(__first, __last, __m, __re, __flags)
// Continue when __cur == __last ->_M_search())
do
{ {
__detail::__get_executor(__cur, __last, __m, __re, __flags) for (auto __it : __m)
->_M_search_from_first(); if (!__it.matched)
if (__m.size() > 0 && __m[0].matched) __it.first = __it.second = __last;
{ __m.at(__m.size()).first = __first;
for (auto __it : __m) __m.at(__m.size()).second = __m[0].first;
if (!__it.matched) __m.at(__m.size()+1).first = __m[0].second;
__it.first = __it.second = __last; __m.at(__m.size()+1).second = __last;
__m.at(__m.size()).first = __first; __m.at(__m.size()).matched =
__m.at(__m.size()).second = __m[0].first; (__m.prefix().first != __m.prefix().second);
__m.at(__m.size()+1).first = __m[0].second; __m.at(__m.size()+1).matched =
__m.at(__m.size()+1).second = __last; (__m.suffix().first != __m.suffix().second);
__m.at(__m.size()).matched = return true;
(__m.prefix().first != __m.prefix().second);
__m.at(__m.size()+1).matched =
(__m.suffix().first != __m.suffix().second);
return true;
}
} }
while (__cur++ != __last);
return false; return false;
} }
......
...@@ -51,14 +51,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -51,14 +51,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
/// that represents the regular expression. /// that represents the regular expression.
enum _Opcode enum _Opcode
{ {
_S_opcode_unknown = 0, _S_opcode_unknown,
_S_opcode_alternative = 1, _S_opcode_alternative,
_S_opcode_backref = 2, _S_opcode_backref,
_S_opcode_subexpr_begin = 4, _S_opcode_line_begin_assertion,
_S_opcode_subexpr_end = 5, _S_opcode_line_end_assertion,
_S_opcode_dummy = 6, _S_opcode_word_boundry,
_S_opcode_match = 100, _S_opcode_subexpr_lookahead,
_S_opcode_accept = 255 _S_opcode_subexpr_begin,
_S_opcode_subexpr_end,
_S_opcode_dummy,
_S_opcode_match,
_S_opcode_accept,
}; };
template<typename _CharT, typename _TraitsT> template<typename _CharT, typename _TraitsT>
...@@ -72,35 +76,25 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -72,35 +76,25 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_StateIdT _M_next; // outgoing transition _StateIdT _M_next; // outgoing transition
union // Since they are mutually exclusive. union // Since they are mutually exclusive.
{ {
_StateIdT _M_alt; // for _S_opcode_alternative
unsigned int _M_subexpr; // for _S_opcode_subexpr_* unsigned int _M_subexpr; // for _S_opcode_subexpr_*
unsigned int _M_backref_index; // for _S_opcode_backref unsigned int _M_backref_index; // for _S_opcode_backref
struct
{
// for _S_opcode_alternative.
_StateIdT _M_quant_index;
// for _S_opcode_alternative or _S_opcode_subexpr_lookahead
_StateIdT _M_alt;
// for _S_opcode_word_boundry or _S_opcode_subexpr_lookahead or
// quantifiers(ungreedy if set true)
bool _M_neg;
};
}; };
_MatcherT _M_matches; // for _S_opcode_match _MatcherT _M_matches; // for _S_opcode_match
explicit _State(_OpcodeT __opcode) explicit _State(_OpcodeT __opcode)
: _M_opcode(__opcode), _M_next(_S_invalid_state_id) : _M_opcode(__opcode), _M_next(_S_invalid_state_id)
{ } { }
_State(const _MatcherT& __m)
: _M_opcode(_S_opcode_match), _M_next(_S_invalid_state_id),
_M_matches(__m)
{ }
_State(_OpcodeT __opcode, unsigned __index)
: _M_opcode(__opcode), _M_next(_S_invalid_state_id)
{
if (__opcode == _S_opcode_subexpr_begin
|| __opcode == _S_opcode_subexpr_end)
_M_subexpr = __index;
else if (__opcode == _S_opcode_backref)
_M_backref_index = __index;
}
_State(_StateIdT __next, _StateIdT __alt)
: _M_opcode(_S_opcode_alternative), _M_next(__next), _M_alt(__alt)
{ }
#ifdef _GLIBCXX_DEBUG #ifdef _GLIBCXX_DEBUG
std::ostream& std::ostream&
_M_print(std::ostream& ostr) const; _M_print(std::ostream& ostr) const;
...@@ -141,7 +135,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -141,7 +135,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_NFA(_FlagT __f) _NFA(_FlagT __f)
: _M_flags(__f), _M_start_state(0), _M_subexpr_count(0), : _M_flags(__f), _M_start_state(0), _M_subexpr_count(0),
_M_has_backref(false) _M_has_backref(false), _M_quant_count(0)
{ } { }
_FlagT _FlagT
...@@ -163,23 +157,30 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -163,23 +157,30 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_StateIdT _StateIdT
_M_insert_accept() _M_insert_accept()
{ {
this->push_back(_StateT(_S_opcode_accept)); auto __ret = _M_insert_state(_StateT(_S_opcode_accept));
_M_accepting_states.insert(this->size()-1); _M_accepting_states.insert(__ret);
return this->size()-1; return __ret;
} }
_StateIdT _StateIdT
_M_insert_alt(_StateIdT __next, _StateIdT __alt) _M_insert_alt(_StateIdT __next, _StateIdT __alt, bool __neg)
{ {
this->push_back(_StateT(__next, __alt)); _StateT __tmp(_S_opcode_alternative);
return this->size()-1; // It labels every quantifier to make greedy comparison easier in BFS
// approach.
__tmp._M_quant_index = _M_quant_count++;
__tmp._M_next = __next;
__tmp._M_alt = __alt;
__tmp._M_neg = __neg;
return _M_insert_state(__tmp);
} }
_StateIdT _StateIdT
_M_insert_matcher(_MatcherT __m) _M_insert_matcher(_MatcherT __m)
{ {
this->push_back(_StateT(__m)); _StateT __tmp(_S_opcode_match);
return this->size()-1; __tmp._M_matches = __m;
return _M_insert_state(__tmp);
} }
_StateIdT _StateIdT
...@@ -187,29 +188,53 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -187,29 +188,53 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{ {
auto __id = _M_subexpr_count++; auto __id = _M_subexpr_count++;
_M_paren_stack.push_back(__id); _M_paren_stack.push_back(__id);
this->push_back(_StateT(_S_opcode_subexpr_begin, __id)); _StateT __tmp(_S_opcode_subexpr_begin);
return this->size()-1; __tmp._M_subexpr = __id;
return _M_insert_state(__tmp);
} }
_StateIdT _StateIdT
_M_insert_subexpr_end() _M_insert_subexpr_end()
{ {
this->push_back(_StateT(_S_opcode_subexpr_end, _M_paren_stack.back())); _StateT __tmp(_S_opcode_subexpr_end);
__tmp._M_subexpr = _M_paren_stack.back();
_M_paren_stack.pop_back(); _M_paren_stack.pop_back();
return this->size()-1; return _M_insert_state(__tmp);
} }
_StateIdT _StateIdT
_M_insert_backref(unsigned int __index); _M_insert_backref(unsigned int __index);
_StateIdT _StateIdT
_M_insert_dummy() _M_insert_line_begin()
{ return _M_insert_state(_StateT(_S_opcode_line_begin_assertion)); }
_StateIdT
_M_insert_line_end()
{ return _M_insert_state(_StateT(_S_opcode_line_end_assertion)); }
_StateIdT
_M_insert_word_bound(bool __neg)
{ {
this->push_back(_StateT(_S_opcode_dummy)); _StateT __tmp(_S_opcode_word_boundry);
return this->size()-1; __tmp._M_neg = __neg;
return _M_insert_state(__tmp);
} }
_StateIdT _StateIdT
_M_insert_lookahead(_StateIdT __alt, bool __neg)
{
_StateT __tmp(_S_opcode_subexpr_lookahead);
__tmp._M_alt = __alt;
__tmp._M_neg = __neg;
return _M_insert_state(__tmp);
}
_StateIdT
_M_insert_dummy()
{ return _M_insert_state(_StateT(_S_opcode_dummy)); }
_StateIdT
_M_insert_state(_StateT __s) _M_insert_state(_StateT __s)
{ {
this->push_back(__s); this->push_back(__s);
...@@ -230,6 +255,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -230,6 +255,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_FlagT _M_flags; _FlagT _M_flags;
_StateIdT _M_start_state; _StateIdT _M_start_state;
_SizeT _M_subexpr_count; _SizeT _M_subexpr_count;
_SizeT _M_quant_count;
bool _M_has_backref; bool _M_has_backref;
}; };
......
...@@ -80,6 +80,31 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -80,6 +80,31 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
<< __id << " -> " << _M_alt << __id << " -> " << _M_alt
<< " [label=\"epsilon\", tailport=\"n\"];\n"; << " [label=\"epsilon\", tailport=\"n\"];\n";
break; break;
case _S_opcode_backref:
__ostr << __id << " [label=\"" << __id << "\\nBACKREF "
<< _M_subexpr << "\"];\n"
<< __id << " -> " << _M_next << " [label=\"<match>\"];\n";
break;
case _S_opcode_line_begin_assertion:
__ostr << __id << " [label=\"" << __id << "\\nLINE_BEGIN \"];\n"
<< __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
break;
case _S_opcode_line_end_assertion:
__ostr << __id << " [label=\"" << __id << "\\nLINE_END \"];\n"
<< __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
break;
case _S_opcode_word_boundry:
__ostr << __id << " [label=\"" << __id << "\\nWORD_BOUNDRY "
<< _M_neg << "\"];\n"
<< __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
break;
case _S_opcode_subexpr_lookahead:
__ostr << __id << " [label=\"" << __id << "\\nLOOK_AHEAD\"];\n"
<< __id << " -> " << _M_next
<< " [label=\"epsilon\", tailport=\"s\"];\n"
<< __id << " -> " << _M_alt
<< " [label=\"<assert>\", tailport=\"n\"];\n";
break;
case _S_opcode_subexpr_begin: case _S_opcode_subexpr_begin:
__ostr << __id << " [label=\"" << __id << "\\nSBEGIN " __ostr << __id << " [label=\"" << __id << "\\nSBEGIN "
<< _M_subexpr << "\"];\n" << _M_subexpr << "\"];\n"
...@@ -90,10 +115,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -90,10 +115,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
<< _M_subexpr << "\"];\n" << _M_subexpr << "\"];\n"
<< __id << " -> " << _M_next << " [label=\"epsilon\"];\n"; << __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
break; break;
case _S_opcode_backref: case _S_opcode_dummy:
__ostr << __id << " [label=\"" << __id << "\\nBACKREF "
<< _M_subexpr << "\"];\n"
<< __id << " -> " << _M_next << " [label=\"<match>\"];\n";
break; break;
case _S_opcode_match: case _S_opcode_match:
__ostr << __id << " [label=\"" << __id << "\\nMATCH\"];\n" __ostr << __id << " [label=\"" << __id << "\\nMATCH\"];\n"
...@@ -102,8 +124,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -102,8 +124,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
case _S_opcode_accept: case _S_opcode_accept:
__ostr << __id << " [label=\"" << __id << "\\nACC\"];\n" ; __ostr << __id << " [label=\"" << __id << "\\nACC\"];\n" ;
break; break;
case _S_opcode_dummy:
break;
default: default:
_GLIBCXX_DEBUG_ASSERT(false); _GLIBCXX_DEBUG_ASSERT(false);
break; break;
...@@ -141,8 +161,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -141,8 +161,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
if (__index == __it) if (__index == __it)
__throw_regex_error(regex_constants::error_backref); __throw_regex_error(regex_constants::error_backref);
_M_has_backref = true; _M_has_backref = true;
this->push_back(_StateT(_S_opcode_backref, __index)); _StateT __tmp(_S_opcode_backref);
return this->size()-1; __tmp._M_backref_index = __index;
return _M_insert_state(__tmp);
} }
template<typename _CharT, typename _TraitsT> template<typename _CharT, typename _TraitsT>
...@@ -152,7 +173,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -152,7 +173,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
for (auto& __it : *this) for (auto& __it : *this)
{ {
while (__it._M_next >= 0 && (*this)[__it._M_next]._M_opcode while (__it._M_next >= 0 && (*this)[__it._M_next]._M_opcode
== _S_opcode_dummy) == _S_opcode_dummy)
__it._M_next = (*this)[__it._M_next]._M_next; __it._M_next = (*this)[__it._M_next]._M_next;
if (__it._M_opcode == _S_opcode_alternative) if (__it._M_opcode == _S_opcode_alternative)
while (__it._M_alt >= 0 && (*this)[__it._M_alt]._M_opcode while (__it._M_alt >= 0 && (*this)[__it._M_alt]._M_opcode
......
...@@ -56,7 +56,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -56,7 +56,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
std::shared_ptr<_RegexT> std::shared_ptr<_RegexT>
_M_get_nfa() const _M_get_nfa() const
{ return std::shared_ptr<_RegexT>(new _RegexT(_M_nfa)); } { return make_shared<_RegexT>(_M_nfa); }
private: private:
typedef _Scanner<_FwdIter> _ScannerT; typedef _Scanner<_FwdIter> _ScannerT;
......
...@@ -96,7 +96,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -96,7 +96,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__alt2._M_append(__end); __alt2._M_append(__end);
_M_stack.push(_StateSeqT(_M_nfa, _M_stack.push(_StateSeqT(_M_nfa,
_M_nfa._M_insert_alt(__alt1._M_start, _M_nfa._M_insert_alt(__alt1._M_start,
__alt2._M_start), __alt2._M_start, false),
__end)); __end));
} }
} }
...@@ -132,25 +132,34 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -132,25 +132,34 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return false; return false;
} }
// TODO Implement it.
template<typename _FwdIter, typename _CharT, typename _TraitsT> template<typename _FwdIter, typename _CharT, typename _TraitsT>
bool bool
_Compiler<_FwdIter, _CharT, _TraitsT>:: _Compiler<_FwdIter, _CharT, _TraitsT>::
_M_assertion() _M_assertion()
{ {
// temporary place holders.
if (_M_match_token(_ScannerT::_S_token_line_begin)) if (_M_match_token(_ScannerT::_S_token_line_begin))
_M_stack.push(_StateSeqT(_M_nfa, _M_nfa._M_insert_dummy())); _M_stack.push(_StateSeqT(_M_nfa, _M_nfa.
_M_insert_line_begin()));
else if (_M_match_token(_ScannerT::_S_token_line_end)) else if (_M_match_token(_ScannerT::_S_token_line_end))
_M_stack.push(_StateSeqT(_M_nfa, _M_nfa._M_insert_dummy())); _M_stack.push(_StateSeqT(_M_nfa, _M_nfa.
_M_insert_line_end()));
else if (_M_match_token(_ScannerT::_S_token_word_bound)) else if (_M_match_token(_ScannerT::_S_token_word_bound))
_M_stack.push(_StateSeqT(_M_nfa, _M_nfa._M_insert_dummy())); // _M_value[0] == 'n' means it's negtive, say "not word boundary".
else if (_M_match_token(_ScannerT::_S_token_neg_word_bound)) _M_stack.push(_StateSeqT(_M_nfa, _M_nfa.
_M_stack.push(_StateSeqT(_M_nfa, _M_nfa._M_insert_dummy())); _M_insert_word_bound(_M_value[0] == 'n')));
else if (_M_match_token(_ScannerT::_S_token_subexpr_lookahead_begin)) else if (_M_match_token(_ScannerT::_S_token_subexpr_lookahead_begin))
_M_stack.push(_StateSeqT(_M_nfa, _M_nfa._M_insert_dummy())); {
else if (_M_match_token(_ScannerT::_S_token_subexpr_neg_lookahead_begin)) auto __neg = _M_value[0] == 'n';
_M_stack.push(_StateSeqT(_M_nfa, _M_nfa._M_insert_dummy())); this->_M_disjunction();
if (!_M_match_token(_ScannerT::_S_token_subexpr_end))
__throw_regex_error(regex_constants::error_paren);
auto __tmp = _M_pop();
__tmp._M_append(_M_nfa._M_insert_accept());
_M_stack.push(
_StateSeqT(
_M_nfa,
_M_nfa._M_insert_lookahead(__tmp._M_start, __neg)));
}
else else
return false; return false;
return true; return true;
...@@ -161,40 +170,44 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -161,40 +170,44 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_Compiler<_FwdIter, _CharT, _TraitsT>:: _Compiler<_FwdIter, _CharT, _TraitsT>::
_M_quantifier() _M_quantifier()
{ {
if (_M_match_token(_ScannerT::_S_token_closure0)) bool __neg = regex_constants::ECMAScript;
auto __init = [this, &__neg]()
{ {
if (_M_stack.empty()) if (_M_stack.empty())
__throw_regex_error(regex_constants::error_badrepeat); __throw_regex_error(regex_constants::error_badrepeat);
__neg = __neg && _M_match_token(_ScannerT::_S_token_opt);
};
if (_M_match_token(_ScannerT::_S_token_closure0))
{
__init();
auto __e = _M_pop(); auto __e = _M_pop();
_StateSeqT __r(_M_nfa, _M_nfa._M_insert_alt(_S_invalid_state_id, _StateSeqT __r(_M_nfa, _M_nfa._M_insert_alt(_S_invalid_state_id,
__e._M_start)); __e._M_start, __neg));
__e._M_append(__r); __e._M_append(__r);
_M_stack.push(__r); _M_stack.push(__r);
} }
else if (_M_match_token(_ScannerT::_S_token_closure1)) else if (_M_match_token(_ScannerT::_S_token_closure1))
{ {
if (_M_stack.empty()) __init();
__throw_regex_error(regex_constants::error_badrepeat);
auto __e = _M_pop(); auto __e = _M_pop();
__e._M_append(_M_nfa._M_insert_alt(_S_invalid_state_id, __e._M_start)); __e._M_append(_M_nfa._M_insert_alt(_S_invalid_state_id, __e._M_start,
__neg));
_M_stack.push(__e); _M_stack.push(__e);
} }
else if (_M_match_token(_ScannerT::_S_token_opt)) else if (_M_match_token(_ScannerT::_S_token_opt))
{ {
if (_M_stack.empty()) __init();
__throw_regex_error(regex_constants::error_badrepeat);
auto __e = _M_pop(); auto __e = _M_pop();
auto __end = _M_nfa._M_insert_dummy(); auto __end = _M_nfa._M_insert_dummy();
_StateSeqT __r(_M_nfa, _M_nfa._M_insert_alt(_S_invalid_state_id, _StateSeqT __r(_M_nfa, _M_nfa._M_insert_alt(_S_invalid_state_id,
__e._M_start)); __e._M_start, __neg));
__e._M_append(__end); __e._M_append(__end);
__r._M_append(__end); __r._M_append(__end);
_M_stack.push(__r); _M_stack.push(__r);
} }
else if (_M_match_token(_ScannerT::_S_token_interval_begin)) else if (_M_match_token(_ScannerT::_S_token_interval_begin))
{ {
if (_M_stack.empty()) __init();
__throw_regex_error(regex_constants::error_badrepeat);
if (!_M_match_token(_ScannerT::_S_token_dup_count)) if (!_M_match_token(_ScannerT::_S_token_dup_count))
__throw_regex_error(regex_constants::error_badbrace); __throw_regex_error(regex_constants::error_badbrace);
_StateSeqT __r(_M_pop()); _StateSeqT __r(_M_pop());
...@@ -206,23 +219,27 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -206,23 +219,27 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
if (_M_match_token(_ScannerT::_S_token_comma)) if (_M_match_token(_ScannerT::_S_token_comma))
if (_M_match_token(_ScannerT::_S_token_dup_count)) // {3,7} if (_M_match_token(_ScannerT::_S_token_dup_count)) // {3,7}
{ {
int __n = _M_cur_int_value(10) - __min_rep; int __n = _M_cur_int_value(10) - __min_rep;
if (__n < 0) if (__n < 0)
__throw_regex_error(regex_constants::error_badbrace); __throw_regex_error(regex_constants::error_badbrace);
auto __end = _M_nfa._M_insert_dummy(); auto __end = _M_nfa._M_insert_dummy();
for (int __i = 0; __i < __n; ++__i) for (int __i = 0; __i < __n; ++__i)
{ {
auto __tmp = __r._M_clone(); auto __tmp = __r._M_clone();
__e._M_append(_StateSeqT(_M_nfa, _M_nfa. __e._M_append
_M_insert_alt(__tmp._M_start, __end), __tmp._M_end)); (_StateSeqT(_M_nfa,
} _M_nfa._M_insert_alt(__tmp._M_start,
__end, __neg),
__tmp._M_end));
}
__e._M_append(__end); __e._M_append(__end);
} }
else // {3,} else // {3,}
{ {
auto __tmp = __r._M_clone(); auto __tmp = __r._M_clone();
_StateSeqT __s(_M_nfa, _M_nfa._M_insert_alt(_S_invalid_state_id, _StateSeqT __s(_M_nfa,
__tmp._M_start)); _M_nfa._M_insert_alt(_S_invalid_state_id,
__tmp._M_start, __neg));
__tmp._M_append(__s); __tmp._M_append(__s);
__e._M_append(__s); __e._M_append(__s);
} }
......
...@@ -78,87 +78,87 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -78,87 +78,87 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
* %set. * %set.
*/ */
enum syntax_option_type : unsigned int enum syntax_option_type : unsigned int
{ {
/** /**
* Specifies that the matching of regular expressions against a character * Specifies that the matching of regular expressions against a character
* sequence shall be performed without regard to case. * sequence shall be performed without regard to case.
*/ */
icase = 1 << _S_icase, icase = 1 << _S_icase,
/** /**
* Specifies that when a regular expression is matched against a character * Specifies that when a regular expression is matched against a character
* container sequence, no sub-expression matches are to be stored in the * container sequence, no sub-expression matches are to be stored in the
* supplied match_results structure. * supplied match_results structure.
*/ */
nosubs = 1 << _S_nosubs, nosubs = 1 << _S_nosubs,
/** /**
* Specifies that the regular expression engine should pay more attention to * Specifies that the regular expression engine should pay more attention to
* the speed with which regular expressions are matched, and less to the * the speed with which regular expressions are matched, and less to the
* speed with which regular expression objects are constructed. Otherwise * speed with which regular expression objects are constructed. Otherwise
* it has no detectable effect on the program output. * it has no detectable effect on the program output.
*/ */
optimize = 1 << _S_optimize, optimize = 1 << _S_optimize,
/** /**
* Specifies that character ranges of the form [a-b] should be locale * Specifies that character ranges of the form [a-b] should be locale
* sensitive. * sensitive.
*/ */
collate = 1 << _S_collate, collate = 1 << _S_collate,
/** /**
* Specifies that the grammar recognized by the regular expression engine is * Specifies that the grammar recognized by the regular expression engine is
* that used by ECMAScript in ECMA-262 [Ecma International, ECMAScript * that used by ECMAScript in ECMA-262 [Ecma International, ECMAScript
* Language Specification, Standard Ecma-262, third edition, 1999], as * Language Specification, Standard Ecma-262, third edition, 1999], as
* modified in section [28.13]. This grammar is similar to that defined * modified in section [28.13]. This grammar is similar to that defined
* in the PERL scripting language but extended with elements found in the * in the PERL scripting language but extended with elements found in the
* POSIX regular expression grammar. * POSIX regular expression grammar.
*/ */
ECMAScript = 1 << _S_ECMAScript, ECMAScript = 1 << _S_ECMAScript,
/** /**
* Specifies that the grammar recognized by the regular expression engine is * Specifies that the grammar recognized by the regular expression engine is
* that used by POSIX basic regular expressions in IEEE Std 1003.1-2001, * that used by POSIX basic regular expressions in IEEE Std 1003.1-2001,
* Portable Operating System Interface (POSIX), Base Definitions and * Portable Operating System Interface (POSIX), Base Definitions and
* Headers, Section 9, Regular Expressions [IEEE, Information Technology -- * Headers, Section 9, Regular Expressions [IEEE, Information Technology --
* Portable Operating System Interface (POSIX), IEEE Standard 1003.1-2001]. * Portable Operating System Interface (POSIX), IEEE Standard 1003.1-2001].
*/ */
basic = 1 << _S_basic, basic = 1 << _S_basic,
/** /**
* Specifies that the grammar recognized by the regular expression engine is * Specifies that the grammar recognized by the regular expression engine is
* that used by POSIX extended regular expressions in IEEE Std 1003.1-2001, * that used by POSIX extended regular expressions in IEEE Std 1003.1-2001,
* Portable Operating System Interface (POSIX), Base Definitions and Headers, * Portable Operating System Interface (POSIX), Base Definitions and
* Section 9, Regular Expressions. * Headers, Section 9, Regular Expressions.
*/ */
extended = 1 << _S_extended, extended = 1 << _S_extended,
/** /**
* Specifies that the grammar recognized by the regular expression engine is * Specifies that the grammar recognized by the regular expression engine is
* that used by POSIX utility awk in IEEE Std 1003.1-2001. This option is * that used by POSIX utility awk in IEEE Std 1003.1-2001. This option is
* identical to syntax_option_type extended, except that C-style escape * identical to syntax_option_type extended, except that C-style escape
* sequences are supported. These sequences are: * sequences are supported. These sequences are:
* \\\\, \\a, \\b, \\f, \\n, \\r, \\t , \\v, \\&apos,, &apos,, * \\\\, \\a, \\b, \\f, \\n, \\r, \\t , \\v, \\&apos,, &apos,,
* and \\ddd (where ddd is one, two, or three octal digits). * and \\ddd (where ddd is one, two, or three octal digits).
*/ */
awk = 1 << _S_awk, awk = 1 << _S_awk,
/** /**
* Specifies that the grammar recognized by the regular expression engine is * Specifies that the grammar recognized by the regular expression engine is
* that used by POSIX utility grep in IEEE Std 1003.1-2001. This option is * that used by POSIX utility grep in IEEE Std 1003.1-2001. This option is
* identical to syntax_option_type basic, except that newlines are treated * identical to syntax_option_type basic, except that newlines are treated
* as whitespace. * as whitespace.
*/ */
grep = 1 << _S_grep, grep = 1 << _S_grep,
/** /**
* Specifies that the grammar recognized by the regular expression engine is * Specifies that the grammar recognized by the regular expression engine is
* that used by POSIX utility grep when given the -E option in * that used by POSIX utility grep when given the -E option in
* IEEE Std 1003.1-2001. This option is identical to syntax_option_type * IEEE Std 1003.1-2001. This option is identical to syntax_option_type
* extended, except that newlines are treated as whitespace. * extended, except that newlines are treated as whitespace.
*/ */
egrep = 1 << _S_egrep, egrep = 1 << _S_egrep,
}; };
constexpr inline syntax_option_type constexpr inline syntax_option_type
operator&(syntax_option_type __a, syntax_option_type __b) operator&(syntax_option_type __a, syntax_option_type __b)
......
...@@ -66,33 +66,46 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -66,33 +66,46 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{ } { }
// Set matched when string exactly match the pattern. // Set matched when string exactly match the pattern.
virtual void virtual bool
_M_match() = 0; _M_match() = 0;
// Set matched when some prefix of the string matches the pattern. // Set matched when some prefix of the string matches the pattern.
virtual void virtual bool
_M_search_from_first() = 0; _M_search() = 0;
protected: protected:
typedef typename _NFA<_CharT, _TraitsT>::_SizeT _SizeT; typedef typename _NFA<_CharT, _TraitsT>::_SizeT _SizeT;
_Executor(_BiIter __begin, typedef typename _TraitsT::char_class_type _ClassT;
_BiIter __end,
_ResultsT& __results, _Executor(_BiIter __begin,
_FlagT __flags, _BiIter __end,
_SizeT __size) _ResultsT& __results,
: _M_current(__begin), _M_end(__end), _M_results(__results), _FlagT __flags,
_M_flags(__flags) _SizeT __size,
const _TraitsT& __traits)
: _M_current(__begin), _M_begin(__begin), _M_end(__end),
_M_results(__results), _M_flags(__flags), _M_traits(__traits)
{ {
__size += 2; __size += 2;
_M_results.resize(__size); _M_results.resize(__size);
for (auto __i = 0; __i < __size; __i++) for (_SizeT __i = 0; __i < __size; ++__i)
_M_results[__i].matched = false; _M_results[__i].matched = false;
} }
_BiIter _M_current; bool
_BiIter _M_end; _M_is_word(_CharT __ch)
_ResultsVec& _M_results; {
_FlagT _M_flags; static const _CharT __s = 'w';
return _M_traits.isctype(__ch,
_M_traits.lookup_classname(&__s, &__s+1));
}
_BiIter _M_current;
const _BiIter _M_begin;
const _BiIter _M_end;
_ResultsVec& _M_results;
const _TraitsT& _M_traits;
_FlagT _M_flags;
}; };
// A _DFSExecutor perform a DFS on given NFA and input string. At the very // A _DFSExecutor perform a DFS on given NFA and input string. At the very
...@@ -126,26 +139,51 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -126,26 +139,51 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
const _RegexT& __nfa, const _RegexT& __nfa,
const _TraitsT& __traits, const _TraitsT& __traits,
_FlagT __flags) _FlagT __flags)
: _BaseT(__begin, __end, __results, __flags, __nfa._M_sub_count()), : _BaseT(__begin, __end, __results, __flags, __nfa._M_sub_count(),
_M_traits(__traits), _M_nfa(__nfa), _M_results_ret(this->_M_results) __traits),
_M_traits(__traits), _M_nfa(__nfa), _M_cur_results(this->_M_results),
_M_start_state(__nfa._M_start())
{ } { }
void bool
_M_match() _M_match()
{ _M_dfs<true>(_M_nfa._M_start()); } {
this->_M_current = this->_M_begin;
return _M_dfs<true>(_M_start_state);
}
void bool
_M_search_from_first() _M_search_from_first()
{ _M_dfs<false>(_M_nfa._M_start()); } {
this->_M_current = this->_M_begin;
return _M_dfs<false>(_M_start_state);
}
bool
_M_search()
{
auto __cur = this->_M_begin;
do
{
this->_M_current = __cur;
if (_M_dfs<false>(_M_start_state))
return true;
}
// Continue when __cur == _M_end
while (__cur++ != this->_M_end);
return false;
}
private: private:
template<bool __match_mode> template<bool __match_mode>
bool bool
_M_dfs(_StateIdT __i); _M_dfs(_StateIdT __i);
_ResultsVec _M_results_ret; // To record current solution.
_ResultsVec _M_cur_results;
const _TraitsT& _M_traits; const _TraitsT& _M_traits;
const _RegexT& _M_nfa; const _RegexT& _M_nfa;
_StateIdT _M_start_state;
}; };
// Like the DFS approach, it try every possible state transition; Unlike DFS, // Like the DFS approach, it try every possible state transition; Unlike DFS,
...@@ -170,35 +208,129 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -170,35 +208,129 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
typedef _Executor<_BiIter, _Alloc, _CharT, _TraitsT> _BaseT; typedef _Executor<_BiIter, _Alloc, _CharT, _TraitsT> _BaseT;
typedef _NFA<_CharT, _TraitsT> _RegexT; typedef _NFA<_CharT, _TraitsT> _RegexT;
typedef typename _BaseT::_ResultsT _ResultsT; typedef typename _BaseT::_ResultsT _ResultsT;
typedef typename _BaseT::_ResultsVec _ResultsVec; // Here's a solution for greedy/ungreedy mode in BFS approach. We need to
typedef std::unique_ptr<_ResultsVec> _ResultsPtr; // carefully work out how to compare to conflict matching states.
//
// A matching state is a pair(where, when); `where` is a NFA node; `when`
// is a _BiIter, indicating which char is the next to be mathed one. Two
// matching states conflict means that they have equivalent `where` and
// `when`.
//
// Now since we need to drop one and keep another, because at most one of
// them could be the final optimal solution. This behavior is affected by
// greedy policy.
//
// The definition of `greedy`:
// For the sequence of quantifiers in NFA sorted by there start position,
// now maintain a vector in a matching state, with equal length to
// quantifier seq, recording repeating times of every quantifier. Now to
// compare two matching states, we just lexically compare these two
// vectors. To win the compare(to survive), one matching state needs to
// make its greedy quantifier count larger, and ungreedy quantifiers
// count smaller.
//
// In the implementation, we recorded negtive numbers for greedy
// quantifiers and positive numbers of ungreedy ones. Now a simple
// operator<() for lexicographical_compare will emit the answer.
//
// When two vectors equal, it means the `where`, `when` and quantifier
// counts are identical, it indicates the same answer, so just return
// false.
struct _ResultsEntry
: private _BaseT::_ResultsVec
{
public:
_ResultsEntry(unsigned int __res_sz, unsigned int __sz)
: _BaseT::_ResultsVec(__res_sz), _M_quant_keys(__sz)
{ }
sub_match<_BiIter>&
operator[](unsigned int __idx)
{ return this->_BaseT::_ResultsVec::operator[](__idx); }
bool
operator<(const _ResultsEntry& __rhs) const
{
_GLIBCXX_DEBUG_ASSERT(_M_quant_keys.size()
== __rhs._M_quant_keys.size());
return lexicographical_compare(_M_quant_keys.begin(),
_M_quant_keys.end(),
__rhs._M_quant_keys.begin(),
__rhs._M_quant_keys.end());
}
void
_M_inc(unsigned int __idx, bool __neg)
{ _M_quant_keys[__idx] += __neg ? 1 : -1; }
typename _BaseT::_ResultsVec
_M_get()
{ return *this; }
public:
std::vector<int> _M_quant_keys;
};
typedef std::unique_ptr<_ResultsEntry> _ResultsPtr;
typedef regex_constants::match_flag_type _FlagT; typedef regex_constants::match_flag_type _FlagT;
_BFSExecutor(_BiIter __begin, _BFSExecutor(_BiIter __begin,
_BiIter __end, _BiIter __end,
_ResultsT& __results, _ResultsT& __results,
const _RegexT& __nfa, const _RegexT& __nfa,
_FlagT __flags) const _TraitsT& __traits,
: _BaseT(__begin, __end, __results, __flags, __nfa._M_sub_count()), _FlagT __flags)
_M_nfa(__nfa) : _BaseT(__begin, __end, __results, __flags, __nfa._M_sub_count(),
{ __traits),
if (_M_nfa._M_start() != _S_invalid_state_id) _M_nfa(__nfa),
_M_covered[_M_nfa._M_start()] = _M_cur_results(nullptr),
_ResultsPtr(new _ResultsVec(this->_M_results)); _M_start_state(__nfa._M_start())
_M_e_closure(); { }
}
void bool
_M_match() _M_match()
{ _M_main_loop<true>(); } {
_M_init(this->_M_begin);
return _M_main_loop<true>();
}
void bool
_M_search_from_first() _M_search_from_first()
{ _M_main_loop<false>(); } {
_M_init(this->_M_begin);
return _M_main_loop<false>();
}
bool
_M_search()
{
auto __cur = this->_M_begin;
do
{
_M_init(__cur);
if (_M_main_loop<false>())
return true;
}
// Continue when __cur == _M_end
while (__cur++ != this->_M_end);
return false;
}
private: private:
void
_M_init(_BiIter __cur)
{
_GLIBCXX_DEBUG_ASSERT(_M_start_state != _S_invalid_state_id);
this->_M_current = __cur;
_M_covered.clear();
_M_covered[_M_start_state] =
_ResultsPtr(new _ResultsEntry(this->_M_results.size(),
_M_nfa._M_quant_count));
_M_e_closure();
}
template<bool __match_mode> template<bool __match_mode>
void bool
_M_main_loop(); _M_main_loop();
void void
...@@ -208,13 +340,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -208,13 +340,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_move(); _M_move();
bool bool
_M_match_less_than(const _ResultsVec& __u, const _ResultsVec& __v) const; _M_includes_some();
bool
_M_includes_some() const;
std::map<_StateIdT, _ResultsPtr> _M_covered; std::map<_StateIdT, _ResultsPtr> _M_covered;
const _RegexT& _M_nfa; // To record global optimal solution.
_ResultsPtr _M_cur_results;
const _RegexT& _M_nfa;
_StateIdT _M_start_state;
}; };
//@} regex-detail //@} regex-detail
......
...@@ -69,7 +69,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -69,7 +69,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_S_token_subexpr_begin, _S_token_subexpr_begin,
_S_token_subexpr_no_group_begin, _S_token_subexpr_no_group_begin,
_S_token_subexpr_lookahead_begin, _S_token_subexpr_lookahead_begin,
_S_token_subexpr_neg_lookahead_begin,
_S_token_subexpr_end, _S_token_subexpr_end,
_S_token_bracket_begin, _S_token_bracket_begin,
_S_token_bracket_neg_begin, _S_token_bracket_neg_begin,
...@@ -84,10 +83,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -84,10 +83,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_S_token_or, _S_token_or,
_S_token_closure0, _S_token_closure0,
_S_token_closure1, _S_token_closure1,
_S_token_ungreedy,
_S_token_line_begin, _S_token_line_begin,
_S_token_line_end, _S_token_line_end,
_S_token_word_bound, _S_token_word_bound,
_S_token_neg_word_bound,
_S_token_comma, _S_token_comma,
_S_token_dup_count, _S_token_dup_count,
_S_token_eof, _S_token_eof,
......
...@@ -210,11 +210,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -210,11 +210,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{ {
++_M_current; ++_M_current;
_M_token = _S_token_subexpr_lookahead_begin; _M_token = _S_token_subexpr_lookahead_begin;
_M_value.assign(1, 'p');
} }
else if (*_M_current == '!') else if (*_M_current == '!')
{ {
++_M_current; ++_M_current;
_M_token = _S_token_subexpr_neg_lookahead_begin; _M_token = _S_token_subexpr_lookahead_begin;
_M_value.assign(1, 'n');
} }
else else
__throw_regex_error(regex_constants::error_paren); __throw_regex_error(regex_constants::error_paren);
...@@ -371,9 +373,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -371,9 +373,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_value.assign(1, _M_escape_map.at(__c)); _M_value.assign(1, _M_escape_map.at(__c));
} }
else if (__c == 'b') else if (__c == 'b')
_M_token = _S_token_word_bound; {
_M_token = _S_token_word_bound;
_M_value.assign(1, 'p');
}
else if (__c == 'B') else if (__c == 'B')
_M_token = _S_token_neg_word_bound; {
_M_token = _S_token_word_bound;
_M_value.assign(1, 'n');
}
// N3376 28.13 // N3376 28.13
else if (__c == 'd' else if (__c == 'd'
|| __c == 'D' || __c == 'D'
...@@ -581,9 +589,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION ...@@ -581,9 +589,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
case _S_token_subexpr_lookahead_begin: case _S_token_subexpr_lookahead_begin:
ostr << "lookahead subexpr begin\n"; ostr << "lookahead subexpr begin\n";
break; break;
case _S_token_subexpr_neg_lookahead_begin:
ostr << "neg lookahead subexpr begin\n";
break;
case _S_token_subexpr_end: case _S_token_subexpr_end:
ostr << "subexpr end\n"; ostr << "subexpr end\n";
break; break;
......
// { dg-options "-std=gnu++11" }
// { dg-do run { xfail *-*-* } }
//
// 2013-09-14 Tim Shen <timshen91@gmail.com>
//
// Copyright (C) 2013 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with this library; see the file COPYING3. If not see
// <http://www.gnu.org/licenses/>.
// 28.11.3 regex_search
// Tests ECMAScript assertion.
#include <regex>
#include <testsuite_hooks.h>
using namespace std;
void
test01()
{
bool test __attribute__((unused)) = true;
VERIFY(!regex_search("2123456", regex("^1234")));
VERIFY(regex_search("123456", regex("^1234")));
VERIFY(regex_search("123456", regex("(5|^)1234")));
VERIFY(regex_search("5123456", regex("(5|^)1234")));
VERIFY(!regex_search("1234562", regex("3456$")));
VERIFY(regex_search("123456", regex("3456$")));
VERIFY(!regex_search("123456", regex("(?=1234)56")));
VERIFY(regex_search("123456", regex("(?=1234)123456")));
VERIFY(regex_search("123456", regex("(?!1234)56")));
VERIFY(!regex_search("123456", regex("(?!1234)123456")));
VERIFY(regex_search("a-", regex("a\\b-")));
VERIFY(!regex_search("ab", regex("a\\bb")));
VERIFY(!regex_search("a-", regex("a\\B-")));
VERIFY(regex_search("ab", regex("a\\Bb")));
string s("This is a regular expression");
string sol[] =
{
"This",
"is",
"a",
"regular",
"expression",
};
regex re("\\b\\w*\\b");
int i = 0;
for (auto it = sregex_iterator(s.begin(), s.end(), re);
it != sregex_iterator() && i < 5;
++it)
{
string s((*it)[0].first, (*it)[0].second);
VERIFY(s == sol[i++]);
}
VERIFY(i == 5);
}
int
main()
{
test01();
return 0;
}
// { dg-options "-std=gnu++11" }
//
// 2013-09-14 Tim Shen <timshen91@gmail.com>
//
// Copyright (C) 2013 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License along
// with this library; see the file COPYING3. If not see
// <http://www.gnu.org/licenses/>.
// 28.11.3 regex_search
// Tests ECMAScript greedy and ungreedy quantifiers.
#include <regex>
#include <testsuite_hooks.h>
using namespace std;
void
test01()
{
bool test __attribute__((unused)) = true;
cmatch m;
#define TEST(i, s) VERIFY(m[i].matched && string(m[i].first, m[i].second) == s)
VERIFY(regex_search("aaaa", m, regex("a*")));
TEST(0, "aaaa");
VERIFY(regex_search("aaaa", m, regex("a*?")));
TEST(0, "");
VERIFY(regex_search("aaaa", m, regex("a+")));
TEST(0, "aaaa");
VERIFY(regex_search("aaaa", m, regex("a+?")));
TEST(0, "a");
VERIFY(regex_search("a", m, regex("a?")));
TEST(0, "a");
VERIFY(regex_search("a", m, regex("a??")));
TEST(0, "");
VERIFY(regex_search("", m, regex("a??")));
TEST(0, "");
VERIFY(regex_search("aaaa", m, regex("(a+)(a+)")));
TEST(1, "aaa");
TEST(2, "a");
VERIFY(regex_search("aaaa", m, regex("(a+?)(a+)")));
TEST(1, "a");
TEST(2, "aaa");
VERIFY(regex_search("aaaa", m, regex("(a+?)(a+)")));
TEST(1, "a");
TEST(2, "aaa");
VERIFY(regex_search("aaaa", m, regex("(a+?)(a+?)")));
TEST(1, "a");
TEST(2, "a");
}
int
main()
{
test01();
return 0;
}
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
// <http://www.gnu.org/licenses/>. // <http://www.gnu.org/licenses/>.
// 28.11.3 regex_search // 28.11.3 regex_search
// Tests BRE against a std::string target. // Tests ECMAScript against a std::string target.
#include <regex> #include <regex>
#include <testsuite_hooks.h> #include <testsuite_hooks.h>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment