regex_automaton.tcc (_StateSeq<>::_M_clone()): Do not use std::map.

2014-01-17  Tim Shen  <timshen91@gmail.com>

	* include/bits/regex_automaton.tcc (_StateSeq<>::_M_clone()): Do not
	use std::map.
	* include/bits/regex_automaton.h: Do not use std::set.
	* include/bits/regex_compiler.h (_BracketMatcher<>::_M_add_char(),
	_BracketMatcher<>::_M_add_collating_element(),
	_BracketMatcher<>::_M_add_equivalence_class(),
	_BracketMatcher<>::_M_make_range()): Likewise.
	* include/bits/regex_compiler.tcc (_BracketMatcher<>::_M_apply()):
	Likewise.
	* include/bits/regex_executor.h: Do not use std::queue.
	* include/bits/regex_executor.tcc (_Executor<>::_M_main(),
	_Executor<>::_M_dfs()): Likewise.
	* include/std/regex: Remove <map>, <set> and <queue>.

2014-01-17  Tim Shen  <timshen91@gmail.com>

	* include/bits/regex.h (__compile_nfa<>(), basic_regex<>::basic_regex(),
	basic_regex<>::assign()): Change __compile_nfa to accept
	const _CharT* only.
	* include/bits/regex_compiler.h: Change _Compiler's template
	argument from <_FwdIter, _TraitsT> to <_TraitsT>.
	* include/bits/regex_compiler.tcc: Likewise.

2014-01-17  Tim Shen  <timshen91@gmail.com>

	* include/bits/regex_compiler.h: Change _ScannerT into char-type
	templated.
	* include/bits/regex_scanner.h (_Scanner<>::_Scanner()): Separate
	_ScannerBase from _Scanner; Change _Scanner's template argument from
	_FwdIter to _CharT. Avoid use of std::map and std::set by using arrays
	instead.
	* include/bits/regex_scanner.tcc (_Scanner<>::_Scanner(),
	_Scanner<>::_M_scan_normal(), _Scanner<>::_M_eat_escape_ecma(),
	_Scanner<>::_M_eat_escape_posix(), _Scanner<>::_M_eat_escape_awk()):
	Likewise.
	* include/std/regex: Add <cstring> for using strchr.

2014-01-17  Tim Shen  <timshen91@gmail.com>

	* bits/regex_automaton.tcc: Indentation fix.
	* bits/regex_compiler.h (__compile_nfa<>(), _Compiler<>,
	_RegexTranslator<> _AnyMatcher<>, _CharMatcher<>,
	_BracketMatcher<>): Add bool option template parameters and
	specializations to make matching more efficient and space saving.
	* bits/regex_compiler.tcc: Likewise.

From-SVN: r206690
This commit is contained in:
Tim Shen 2014-01-16 23:35:21 +00:00 committed by Tim Shen
parent 9e6f9ad62c
commit ddf41e9db6
11 changed files with 594 additions and 488 deletions

View file

@ -1,3 +1,51 @@
2014-01-17 Tim Shen <timshen91@gmail.com>
* include/bits/regex_automaton.tcc (_StateSeq<>::_M_clone()): Do not
use std::map.
* include/bits/regex_automaton.h: Do not use std::set.
* include/bits/regex_compiler.h (_BracketMatcher<>::_M_add_char(),
_BracketMatcher<>::_M_add_collating_element(),
_BracketMatcher<>::_M_add_equivalence_class(),
_BracketMatcher<>::_M_make_range()): Likewise.
* include/bits/regex_compiler.tcc (_BracketMatcher<>::_M_apply()):
Likewise.
* include/bits/regex_executor.h: Do not use std::queue.
* include/bits/regex_executor.tcc (_Executor<>::_M_main(),
_Executor<>::_M_dfs()): Likewise.
* include/std/regex: Remove <map>, <set> and <queue>.
2014-01-17 Tim Shen <timshen91@gmail.com>
* include/bits/regex.h (__compile_nfa<>(), basic_regex<>::basic_regex(),
basic_regex<>::assign()): Change __compile_nfa to accept
const _CharT* only.
* include/bits/regex_compiler.h: Change _Compiler's template
argument from <_FwdIter, _TraitsT> to <_TraitsT>.
* include/bits/regex_compiler.tcc: Likewise.
2014-01-17 Tim Shen <timshen91@gmail.com>
* include/bits/regex_compiler.h: Change _ScannerT into char-type
templated.
* include/bits/regex_scanner.h (_Scanner<>::_Scanner()): Separate
_ScannerBase from _Scanner; Change _Scanner's template argument from
_FwdIter to _CharT. Avoid use of std::map and std::set by using arrays
instead.
* include/bits/regex_scanner.tcc (_Scanner<>::_Scanner(),
_Scanner<>::_M_scan_normal(), _Scanner<>::_M_eat_escape_ecma(),
_Scanner<>::_M_eat_escape_posix(), _Scanner<>::_M_eat_escape_awk()):
Likewise.
* include/std/regex: Add <cstring> for using strchr.
2014-01-17 Tim Shen <timshen91@gmail.com>
* bits/regex_automaton.tcc: Indentation fix.
* bits/regex_compiler.h (__compile_nfa<>(), _Compiler<>,
_RegexTranslator<> _AnyMatcher<>, _CharMatcher<>,
_BracketMatcher<>): Add bool option template parameters and
specializations to make matching more efficient and space saving.
* bits/regex_compiler.tcc: Likewise.
2014-01-15 François Dumont <fdumont@gcc.gnu.org>
PR libstdc++/59712

View file

@ -60,51 +60,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
template<typename, typename, typename, bool>
class _Executor;
template<typename _Tp>
struct __has_contiguous_iter : std::false_type { };
template<typename _Ch, typename _Tr, typename _Alloc>
struct __has_contiguous_iter<std::basic_string<_Ch, _Tr, _Alloc>>
: std::true_type // string<Ch> storage is contiguous
{ };
template<typename _Tp, typename _Alloc>
struct __has_contiguous_iter<std::vector<_Tp, _Alloc>>
: std::true_type // vector<Tp> storage is contiguous
{ };
template<typename _Alloc>
struct __has_contiguous_iter<std::vector<bool, _Alloc>>
: std::false_type // vector<bool> storage is not contiguous
{ };
template<typename _Tp>
struct __is_contiguous_normal_iter : std::false_type { };
template<typename _Tp, typename _Cont>
struct
__is_contiguous_normal_iter<__gnu_cxx::__normal_iterator<_Tp, _Cont>>
: __has_contiguous_iter<_Cont>::type
{ };
template<typename _Iter, typename _TraitsT>
using __enable_if_contiguous_normal_iter
= typename enable_if< __is_contiguous_normal_iter<_Iter>::value,
std::shared_ptr<_NFA<_TraitsT>> >::type;
template<typename _Iter, typename _TraitsT>
using __disable_if_contiguous_normal_iter
= typename enable_if< !__is_contiguous_normal_iter<_Iter>::value,
std::shared_ptr<_NFA<_TraitsT>> >::type;
template<typename _FwdIter, typename _TraitsT>
__disable_if_contiguous_normal_iter<_FwdIter, _TraitsT>
__compile_nfa(_FwdIter __first, _FwdIter __last, const _TraitsT& __traits,
regex_constants::syntax_option_type __flags);
template<typename _Iter, typename _TraitsT>
__enable_if_contiguous_normal_iter<_Iter, _TraitsT>
__compile_nfa(_Iter __first, _Iter __last, const _TraitsT& __traits,
template<typename _TraitsT>
inline std::shared_ptr<_NFA<_TraitsT>>
__compile_nfa(const typename _TraitsT::char_type* __first,
const typename _TraitsT::char_type* __last,
const _TraitsT& __traits,
regex_constants::syntax_option_type __flags);
_GLIBCXX_END_NAMESPACE_VERSION
@ -561,7 +521,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
flag_type __f = ECMAScript)
: _M_flags(__f),
_M_original_str(__first, __last),
_M_automaton(__detail::__compile_nfa(__first, __last, _M_traits,
_M_automaton(__detail::__compile_nfa(_M_original_str.c_str(),
_M_original_str.c_str()
+ _M_original_str.size(),
_M_traits,
_M_flags))
{ }
@ -698,7 +661,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{
_M_flags = __flags;
_M_original_str.assign(__s.begin(), __s.end());
_M_automaton = __detail::__compile_nfa(__s.begin(), __s.end(),
auto __p = _M_original_str.c_str();
_M_automaton = __detail::__compile_nfa(__p,
__p + _M_original_str.size(),
_M_traits, _M_flags);
return *this;
}

View file

@ -41,7 +41,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
*/
typedef long _StateIdT;
typedef std::set<_StateIdT> _StateSet;
static const _StateIdT _S_invalid_state_id = -1;
template<typename _CharT>
@ -138,16 +137,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_start() const
{ return _M_start_state; }
const _StateSet&
_M_final_states() const
{ return _M_accepting_states; }
_SizeT
_M_sub_count() const
{ return _M_subexpr_count; }
std::vector<size_t> _M_paren_stack;
_StateSet _M_accepting_states;
_FlagT _M_flags;
_StateIdT _M_start_state;
_SizeT _M_subexpr_count;
@ -172,7 +166,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_insert_accept()
{
auto __ret = _M_insert_state(_StateT(_S_opcode_accept));
this->_M_accepting_states.insert(__ret);
return __ret;
}

View file

@ -134,9 +134,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_NFA<_TraitsT>::_M_dot(std::ostream& __ostr) const
{
__ostr << "digraph _Nfa {\n"
" rankdir=LR;\n";
" rankdir=LR;\n";
for (size_t __i = 0; __i < this->size(); ++__i)
(*this)[__i]._M_dot(__ostr, __i);
(*this)[__i]._M_dot(__ostr, __i);
__ostr << "}\n";
return __ostr;
}
@ -186,7 +186,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_StateSeq<_TraitsT>
_StateSeq<_TraitsT>::_M_clone()
{
std::map<_StateIdT, _StateIdT> __m;
std::vector<_StateIdT> __m(_M_nfa.size(), -1);
std::stack<_StateIdT> __stack;
__stack.push(_M_start);
while (!__stack.empty())
@ -194,30 +194,35 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
auto __u = __stack.top();
__stack.pop();
auto __dup = _M_nfa[__u];
// _M_insert_state() never return -1
auto __id = _M_nfa._M_insert_state(__dup);
__m[__u] = __id;
if (__u == _M_end)
continue;
if (__m.count(__dup._M_next) == 0)
if (__dup._M_next != _S_invalid_state_id && __m[__dup._M_next] == -1)
__stack.push(__dup._M_next);
if (__dup._M_opcode == _S_opcode_alternative
|| __dup._M_opcode == _S_opcode_subexpr_lookahead)
if (__m.count(__dup._M_alt) == 0)
if (__dup._M_alt != _S_invalid_state_id && __m[__dup._M_alt] == -1)
__stack.push(__dup._M_alt);
}
for (auto __it : __m)
long __size = static_cast<long>(__m.size());
for (long __k = 0; __k < __size; __k++)
{
auto& __ref = _M_nfa[__it.second];
if (__ref._M_next != -1)
long __v;
if ((__v = __m[__k]) == -1)
continue;
auto& __ref = _M_nfa[__v];
if (__ref._M_next != _S_invalid_state_id)
{
_GLIBCXX_DEBUG_ASSERT(__m.count(__ref._M_next));
_GLIBCXX_DEBUG_ASSERT(__m[__ref._M_next] != -1);
__ref._M_next = __m[__ref._M_next];
}
if (__ref._M_opcode == _S_opcode_alternative
|| __ref._M_opcode == _S_opcode_subexpr_lookahead)
if (__ref._M_alt != -1)
if (__ref._M_alt != _S_invalid_state_id)
{
_GLIBCXX_DEBUG_ASSERT(__m.count(__ref._M_alt));
_GLIBCXX_DEBUG_ASSERT(__m[__ref._M_alt] != -1);
__ref._M_alt = __m[__ref._M_alt];
}
}

View file

@ -39,19 +39,20 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
* @{
*/
template<typename _TraitsT>
template<typename, bool, bool>
struct _BracketMatcher;
/// Builds an NFA from an input iterator interval.
template<typename _FwdIter, typename _TraitsT>
template<typename _TraitsT>
class _Compiler
{
public:
typedef typename _TraitsT::string_type _StringT;
typedef typename _TraitsT::char_type _CharT;
typedef const _CharT* _IterT;
typedef _NFA<_TraitsT> _RegexT;
typedef regex_constants::syntax_option_type _FlagT;
_Compiler(_FwdIter __b, _FwdIter __e,
_Compiler(_IterT __b, _IterT __e,
const _TraitsT& __traits, _FlagT __flags);
std::shared_ptr<_RegexT>
@ -59,12 +60,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{ return make_shared<_RegexT>(std::move(_M_nfa)); }
private:
typedef _Scanner<_FwdIter> _ScannerT;
typedef typename _ScannerT::_TokenT _TokenT;
typedef _StateSeq<_TraitsT> _StateSeqT;
typedef std::stack<_StateSeqT, std::vector<_StateSeqT>> _StackT;
typedef _BracketMatcher<_TraitsT> _BMatcherT;
typedef std::ctype<typename _TraitsT::char_type> _CtypeT;
typedef _Scanner<_CharT> _ScannerT;
typedef typename _TraitsT::string_type _StringT;
typedef typename _ScannerT::_TokenT _TokenT;
typedef _StateSeq<_TraitsT> _StateSeqT;
typedef std::stack<_StateSeqT> _StackT;
typedef std::ctype<_CharT> _CtypeT;
// accepts a specific token or returns false.
bool
@ -91,20 +92,30 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
bool
_M_bracket_expression();
void
_M_expression_term(_BMatcherT& __matcher);
template<bool __icase, bool __collate>
void
_M_insert_any_matcher_ecma();
bool
_M_range_expression(_BMatcherT& __matcher);
template<bool __icase, bool __collate>
void
_M_insert_any_matcher_posix();
bool
_M_collating_symbol(_BMatcherT& __matcher);
template<bool __icase, bool __collate>
void
_M_insert_char_matcher();
bool
_M_equivalence_class(_BMatcherT& __matcher);
template<bool __icase, bool __collate>
void
_M_insert_character_class_matcher();
bool
_M_character_class(_BMatcherT& __matcher);
template<bool __icase, bool __collate>
void
_M_insert_bracket_matcher(bool __neg);
template<bool __icase, bool __collate>
void
_M_expression_term(_BracketMatcher<_TraitsT, __icase, __collate>&
__matcher);
int
_M_cur_int_value(int __radix);
@ -129,33 +140,119 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_StackT _M_stack;
};
template<typename _FwdIter, typename _TraitsT>
inline __disable_if_contiguous_normal_iter<_FwdIter, _TraitsT>
__compile_nfa(_FwdIter __first, _FwdIter __last, const _TraitsT& __traits,
template<typename _TraitsT>
inline std::shared_ptr<_NFA<_TraitsT>>
__compile_nfa(const typename _TraitsT::char_type* __first,
const typename _TraitsT::char_type* __last,
const _TraitsT& __traits,
regex_constants::syntax_option_type __flags)
{
using _Cmplr = _Compiler<_FwdIter, _TraitsT>;
using _Cmplr = _Compiler<_TraitsT>;
return _Cmplr(__first, __last, __traits, __flags)._M_get_nfa();
}
template<typename _Iter, typename _TraitsT>
inline __enable_if_contiguous_normal_iter<_Iter, _TraitsT>
__compile_nfa(_Iter __first, _Iter __last, const _TraitsT& __traits,
regex_constants::syntax_option_type __flags)
// [28.13.14]
template<typename _TraitsT, bool __icase, bool __collate>
class _RegexTranslator
{
size_t __len = __last - __first;
const auto* __cfirst = __len ? std::__addressof(*__first) : nullptr;
return __compile_nfa(__cfirst, __cfirst + __len, __traits, __flags);
}
public:
typedef typename _TraitsT::char_type _CharT;
typedef typename _TraitsT::string_type _StringT;
typedef typename std::conditional<__collate,
_StringT,
_CharT>::type _StrTransT;
template<typename _TraitsT, bool __is_ecma>
struct _AnyMatcher
explicit
_RegexTranslator(const _TraitsT& __traits)
: _M_traits(__traits)
{ }
_CharT
_M_translate(_CharT __ch) const
{
if (__icase)
return _M_traits.translate_nocase(__ch);
else if (__collate)
return _M_traits.translate(__ch);
else
return __ch;
}
_StrTransT
_M_transform(_CharT __ch) const
{
return _M_transform_impl(__ch, typename integral_constant<bool,
__collate>::type());
}
private:
_StrTransT
_M_transform_impl(_CharT __ch, false_type) const
{ return __ch; }
_StrTransT
_M_transform_impl(_CharT __ch, true_type) const
{
_StrTransT __str = _StrTransT(1, _M_translate(__ch));
return _M_traits.transform(__str.begin(), __str.end());
}
const _TraitsT& _M_traits;
};
template<typename _TraitsT>
class _RegexTranslator<_TraitsT, false, false>
{
typedef typename _TraitsT::char_type _CharT;
public:
typedef typename _TraitsT::char_type _CharT;
typedef _CharT _StrTransT;
explicit
_RegexTranslator(const _TraitsT& __traits)
{ }
_CharT
_M_translate(_CharT __ch) const
{ return __ch; }
_StrTransT
_M_transform(_CharT __ch) const
{ return __ch; }
};
template<typename _TraitsT, bool __is_ecma, bool __icase, bool __collate>
struct _AnyMatcher;
template<typename _TraitsT, bool __icase, bool __collate>
struct _AnyMatcher<_TraitsT, false, __icase, __collate>
{
typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT;
typedef typename _TransT::_CharT _CharT;
explicit
_AnyMatcher(const _TraitsT& __traits)
: _M_traits(__traits)
: _M_translator(__traits)
{ }
bool
operator()(_CharT __ch) const
{
static auto __nul = _M_translator._M_translate('\0');
return _M_translator._M_translate(__ch) != __nul;
}
_TransT _M_translator;
};
template<typename _TraitsT, bool __icase, bool __collate>
struct _AnyMatcher<_TraitsT, true, __icase, __collate>
{
typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT;
typedef typename _TransT::_CharT _CharT;
explicit
_AnyMatcher(const _TraitsT& __traits)
: _M_translator(__traits)
{ }
bool
@ -165,92 +262,63 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
bool
_M_apply(_CharT __ch, true_type) const
{
auto __c = _M_traits.translate(__ch);
if (__is_ecma)
{
static auto __n = _M_traits.translate('\n');
static auto __r = _M_traits.translate('\r');
return __c != __n && __c != __r;
}
else
{
static auto __nul = _M_traits.translate('\0');
return __c != __nul;
}
auto __c = _M_translator._M_translate(__ch);
auto __n = _M_translator._M_translate('\n');
auto __r = _M_translator._M_translate('\r');
return __c != __n && __c != __r;
}
bool
_M_apply(_CharT __ch, false_type) const
{
auto __c = _M_traits.translate(__ch);
if (__is_ecma)
{
static auto __n = _M_traits.translate('\n');
static auto __r = _M_traits.translate('\r');
static auto __u2028 = _M_traits.translate(u'\u2028');
static auto __u2029 = _M_traits.translate(u'\u2029');
return __c != __n && __c != __r && __c != __u2028
&& __c != __u2029;
}
else
{
static auto __nul = _M_traits.translate('\0');
return __c != __nul;
}
auto __c = _M_translator._M_translate(__ch);
auto __n = _M_translator._M_translate('\n');
auto __r = _M_translator._M_translate('\r');
auto __u2028 = _M_translator._M_translate(u'\u2028');
auto __u2029 = _M_translator._M_translate(u'\u2029');
return __c != __n && __c != __r && __c != __u2028 && __c != __u2029;
}
const _TraitsT& _M_traits;
_TransT _M_translator;
};
template<typename _TraitsT, bool __icase>
template<typename _TraitsT, bool __icase, bool __collate>
struct _CharMatcher
{
typedef typename _TraitsT::char_type _CharT;
typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT;
typedef typename _TransT::_CharT _CharT;
_CharMatcher(_CharT __ch, const _TraitsT& __traits)
: _M_traits(__traits), _M_ch(_M_translate(__ch))
: _M_translator(__traits), _M_ch(_M_translator._M_translate(__ch))
{ }
bool
operator()(_CharT __ch) const
{ return _M_ch == _M_translate(__ch); }
{ return _M_ch == _M_translator._M_translate(__ch); }
_CharT
_M_translate(_CharT __ch) const
{
if (__icase)
return _M_traits.translate_nocase(__ch);
else
return _M_traits.translate(__ch);
}
const _TraitsT& _M_traits;
_CharT _M_ch;
_TransT _M_translator;
_CharT _M_ch;
};
/// Matches a character range (bracket expression)
// TODO: Convert used _M_flags fields to template parameters, including
// collate and icase. Avoid using std::set, could use flat_set
// (sorted vector and binary search) instead.
template<typename _TraitsT>
template<typename _TraitsT, bool __icase, bool __collate>
struct _BracketMatcher
{
public:
typedef typename _TraitsT::char_type _CharT;
typedef typename _TraitsT::char_class_type _CharClassT;
typedef typename _TraitsT::string_type _StringT;
typedef regex_constants::syntax_option_type _FlagT;
typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT;
typedef typename _TransT::_CharT _CharT;
typedef typename _TransT::_StrTransT _StrTransT;
typedef typename _TraitsT::string_type _StringT;
typedef typename _TraitsT::char_class_type _CharClassT;
public:
_BracketMatcher(bool __is_non_matching,
const _TraitsT& __traits,
_FlagT __flags)
:
#ifdef _GLIBCXX_DEBUG
_M_is_ready(false),
#endif
_M_traits(__traits), _M_class_set(0), _M_flags(__flags),
const _TraitsT& __traits)
: _M_class_set(0), _M_translator(__traits), _M_traits(__traits),
_M_is_non_matching(__is_non_matching)
#ifdef _GLIBCXX_DEBUG
, _M_is_ready(false)
#endif
{ }
bool
@ -263,7 +331,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
void
_M_add_char(_CharT __c)
{
_M_char_set.insert(_M_translate(__c));
_M_char_set.push_back(_M_translator._M_translate(__c));
#ifdef _GLIBCXX_DEBUG
_M_is_ready = false;
#endif
@ -276,7 +344,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__s.data() + __s.size());
if (__st.empty())
__throw_regex_error(regex_constants::error_collate);
_M_char_set.insert(_M_translate(__st[0]));
_M_char_set.push_back(_M_translator._M_translate(__st[0]));
#ifdef _GLIBCXX_DEBUG
_M_is_ready = false;
#endif
@ -291,7 +359,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__throw_regex_error(regex_constants::error_collate);
__st = _M_traits.transform_primary(__st.data(),
__st.data() + __st.size());
_M_equiv_set.insert(__st);
_M_equiv_set.push_back(__st);
#ifdef _GLIBCXX_DEBUG
_M_is_ready = false;
#endif
@ -302,7 +370,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{
auto __mask = _M_traits.lookup_classname(__s.data(),
__s.data() + __s.size(),
_M_is_icase());
__icase);
if (__mask == 0)
__throw_regex_error(regex_constants::error_ctype);
_M_class_set |= __mask;
@ -314,12 +382,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
void
_M_make_range(_CharT __l, _CharT __r)
{
if (_M_flags & regex_constants::collate)
_M_range_set.insert(
make_pair(_M_get_str(_M_translate(__l)),
_M_get_str(_M_translate(__r))));
else
_M_range_set.insert(make_pair(_M_get_str(__l), _M_get_str(__r)));
_M_range_set.push_back(make_pair(_M_translator._M_transform(__l),
_M_translator._M_transform(__r)));
#ifdef _GLIBCXX_DEBUG
_M_is_ready = false;
#endif
@ -350,26 +414,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_apply(_CharT __ch, true_type) const
{ return _M_cache[static_cast<_UnsignedCharT>(__ch)]; }
_CharT
_M_translate(_CharT __c) const
{
if (_M_is_icase())
return _M_traits.translate_nocase(__c);
else
return _M_traits.translate(__c);
}
bool
_M_is_icase() const
{ return _M_flags & regex_constants::icase; }
_StringT
_M_get_str(_CharT __c) const
{
_StringT __s(1, __c);
return _M_traits.transform(__s.begin(), __s.end());
}
void
_M_make_cache(true_type)
{
@ -383,16 +427,16 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{ }
private:
_CacheT _M_cache;
std::set<_CharT> _M_char_set;
std::set<_StringT> _M_equiv_set;
std::set<pair<_StringT, _StringT>> _M_range_set;
const _TraitsT& _M_traits;
_CharClassT _M_class_set;
_FlagT _M_flags;
bool _M_is_non_matching;
_CacheT _M_cache;
std::vector<_CharT> _M_char_set;
std::vector<_StringT> _M_equiv_set;
std::vector<pair<_StrTransT, _StrTransT>> _M_range_set;
_CharClassT _M_class_set;
_TransT _M_translator;
const _TraitsT& _M_traits;
bool _M_is_non_matching;
#ifdef _GLIBCXX_DEBUG
bool _M_is_ready;
bool _M_is_ready;
#endif
};

View file

@ -59,9 +59,9 @@ namespace __detail
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION
template<typename _FwdIter, typename _TraitsT>
_Compiler<_FwdIter, _TraitsT>::
_Compiler(_FwdIter __b, _FwdIter __e,
template<typename _TraitsT>
_Compiler<_TraitsT>::
_Compiler(_IterT __b, _IterT __e,
const _TraitsT& __traits, _FlagT __flags)
: _M_flags((__flags
& (regex_constants::ECMAScript
@ -89,9 +89,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_nfa._M_eliminate_dummy();
}
template<typename _FwdIter, typename _TraitsT>
template<typename _TraitsT>
void
_Compiler<_FwdIter, _TraitsT>::
_Compiler<_TraitsT>::
_M_disjunction()
{
this->_M_alternative();
@ -110,9 +110,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
}
}
template<typename _FwdIter, typename _TraitsT>
template<typename _TraitsT>
void
_Compiler<_FwdIter, _TraitsT>::
_Compiler<_TraitsT>::
_M_alternative()
{
if (this->_M_term())
@ -126,9 +126,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_stack.push(_StateSeqT(_M_nfa, _M_nfa._M_insert_dummy()));
}
template<typename _FwdIter, typename _TraitsT>
template<typename _TraitsT>
bool
_Compiler<_FwdIter, _TraitsT>::
_Compiler<_TraitsT>::
_M_term()
{
if (this->_M_assertion())
@ -141,9 +141,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return false;
}
template<typename _FwdIter, typename _TraitsT>
template<typename _TraitsT>
bool
_Compiler<_FwdIter, _TraitsT>::
_Compiler<_TraitsT>::
_M_assertion()
{
if (_M_match_token(_ScannerT::_S_token_line_begin))
@ -172,9 +172,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return true;
}
template<typename _FwdIter, typename _TraitsT>
template<typename _TraitsT>
void
_Compiler<_FwdIter, _TraitsT>::
_Compiler<_TraitsT>::
_M_quantifier()
{
bool __neg = (_M_flags & regex_constants::ECMAScript);
@ -278,52 +278,39 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
}
}
template<typename _FwdIter, typename _TraitsT>
#define __INSERT_REGEX_MATCHER(__func, args...)\
do\
if (!(_M_flags & regex_constants::icase))\
if (!(_M_flags & regex_constants::collate))\
__func<false, false>(args);\
else\
__func<false, true>(args);\
else\
if (!(_M_flags & regex_constants::collate))\
__func<true, false>(args);\
else\
__func<true, true>(args);\
while (false)
template<typename _TraitsT>
bool
_Compiler<_FwdIter, _TraitsT>::
_Compiler<_TraitsT>::
_M_atom()
{
if (_M_match_token(_ScannerT::_S_token_anychar))
{
if (_M_flags & regex_constants::ECMAScript)
_M_stack.push(_StateSeqT(_M_nfa,
_M_nfa._M_insert_matcher
(_AnyMatcher<_TraitsT,
true>(_M_traits))));
if (!(_M_flags & regex_constants::ECMAScript))
__INSERT_REGEX_MATCHER(_M_insert_any_matcher_posix);
else
_M_stack.push(_StateSeqT(_M_nfa,
_M_nfa._M_insert_matcher
(_AnyMatcher<_TraitsT,
false>(_M_traits))));
__INSERT_REGEX_MATCHER(_M_insert_any_matcher_ecma);
}
else if (_M_try_char())
{
if (_M_flags & regex_constants::icase)
_M_stack.push(_StateSeqT(_M_nfa,
_M_nfa._M_insert_matcher
(_CharMatcher<_TraitsT,
true>(_M_value[0],
_M_traits))));
else
_M_stack.push(_StateSeqT(_M_nfa,
_M_nfa._M_insert_matcher
(_CharMatcher<_TraitsT,
false>(_M_value[0],
_M_traits))));
}
__INSERT_REGEX_MATCHER(_M_insert_char_matcher);
else if (_M_match_token(_ScannerT::_S_token_backref))
_M_stack.push(_StateSeqT(_M_nfa, _M_nfa.
_M_insert_backref(_M_cur_int_value(10))));
else if (_M_match_token(_ScannerT::_S_token_quoted_class))
{
_GLIBCXX_DEBUG_ASSERT(_M_value.size() == 1);
_BMatcherT __matcher(_M_ctype.is(_CtypeT::upper, _M_value[0]),
_M_traits, _M_flags);
__matcher._M_add_character_class(_M_value);
__matcher._M_ready();
_M_stack.push(_StateSeqT(_M_nfa,
_M_nfa._M_insert_matcher(std::move(__matcher))));
}
__INSERT_REGEX_MATCHER(_M_insert_character_class_matcher);
else if (_M_match_token(_ScannerT::_S_token_subexpr_no_group_begin))
{
_StateSeqT __r(_M_nfa, _M_nfa._M_insert_dummy());
@ -348,28 +335,90 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return true;
}
template<typename _FwdIter, typename _TraitsT>
template<typename _TraitsT>
bool
_Compiler<_FwdIter, _TraitsT>::
_Compiler<_TraitsT>::
_M_bracket_expression()
{
bool __neg =
_M_match_token(_ScannerT::_S_token_bracket_neg_begin);
if (!(__neg || _M_match_token(_ScannerT::_S_token_bracket_begin)))
return false;
_BMatcherT __matcher(__neg, _M_traits, _M_flags);
__INSERT_REGEX_MATCHER(_M_insert_bracket_matcher, __neg);
return true;
}
#undef __INSERT_REGEX_MATCHER
template<typename _TraitsT>
template<bool __icase, bool __collate>
void
_Compiler<_TraitsT>::
_M_insert_any_matcher_ecma()
{
_M_stack.push(_StateSeqT(_M_nfa,
_M_nfa._M_insert_matcher
(_AnyMatcher<_TraitsT, true, __icase, __collate>
(_M_traits))));
}
template<typename _TraitsT>
template<bool __icase, bool __collate>
void
_Compiler<_TraitsT>::
_M_insert_any_matcher_posix()
{
_M_stack.push(_StateSeqT(_M_nfa,
_M_nfa._M_insert_matcher
(_AnyMatcher<_TraitsT, false, __icase, __collate>
(_M_traits))));
}
template<typename _TraitsT>
template<bool __icase, bool __collate>
void
_Compiler<_TraitsT>::
_M_insert_char_matcher()
{
_M_stack.push(_StateSeqT(_M_nfa,
_M_nfa._M_insert_matcher
(_CharMatcher<_TraitsT, __icase, __collate>
(_M_value[0], _M_traits))));
}
template<typename _TraitsT>
template<bool __icase, bool __collate>
void
_Compiler<_TraitsT>::
_M_insert_character_class_matcher()
{
_GLIBCXX_DEBUG_ASSERT(_M_value.size() == 1);
_BracketMatcher<_TraitsT, __icase, __collate> __matcher
(_M_ctype.is(_CtypeT::upper, _M_value[0]), _M_traits);
__matcher._M_add_character_class(_M_value);
__matcher._M_ready();
_M_stack.push(_StateSeqT(_M_nfa,
_M_nfa._M_insert_matcher(std::move(__matcher))));
}
template<typename _TraitsT>
template<bool __icase, bool __collate>
void
_Compiler<_TraitsT>::
_M_insert_bracket_matcher(bool __neg)
{
_BracketMatcher<_TraitsT, __icase, __collate> __matcher(__neg, _M_traits);
while (!_M_match_token(_ScannerT::_S_token_bracket_end))
_M_expression_term(__matcher);
__matcher._M_ready();
_M_stack.push(_StateSeqT(_M_nfa,
_M_nfa._M_insert_matcher(std::move(__matcher))));
return true;
}
template<typename _FwdIter, typename _TraitsT>
template<typename _TraitsT>
template<bool __icase, bool __collate>
void
_Compiler<_FwdIter, _TraitsT>::
_M_expression_term(_BMatcherT& __matcher)
_Compiler<_TraitsT>::
_M_expression_term(_BracketMatcher<_TraitsT, __icase, __collate>& __matcher)
{
if (_M_match_token(_ScannerT::_S_token_collsymbol))
__matcher._M_add_collating_element(_M_value);
@ -403,9 +452,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__throw_regex_error(regex_constants::error_brack);
}
template<typename _FwdIter, typename _TraitsT>
template<typename _TraitsT>
bool
_Compiler<_FwdIter, _TraitsT>::
_Compiler<_TraitsT>::
_M_try_char()
{
bool __is_char = false;
@ -424,9 +473,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return __is_char;
}
template<typename _FwdIter, typename _TraitsT>
template<typename _TraitsT>
bool
_Compiler<_FwdIter, _TraitsT>::
_Compiler<_TraitsT>::
_M_match_token(_TokenT token)
{
if (token == _M_scanner._M_get_token())
@ -438,9 +487,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return false;
}
template<typename _FwdIter, typename _TraitsT>
template<typename _TraitsT>
int
_Compiler<_FwdIter, _TraitsT>::
_Compiler<_TraitsT>::
_M_cur_int_value(int __radix)
{
long __v = 0;
@ -450,25 +499,31 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
return __v;
}
template<typename _TraitsT>
template<typename _TraitsT, bool __icase, bool __collate>
bool
_BracketMatcher<_TraitsT>::_M_apply(_CharT __ch, false_type) const
_BracketMatcher<_TraitsT, __icase, __collate>::
_M_apply(_CharT __ch, false_type) const
{
bool __ret = false;
if (_M_traits.isctype(__ch, _M_class_set)
|| _M_char_set.count(_M_translate(__ch))
|| _M_equiv_set.count(_M_traits.transform_primary(&__ch, &__ch+1)))
if (std::find(_M_char_set.begin(), _M_char_set.end(),
_M_translator._M_translate(__ch))
!= _M_char_set.end())
__ret = true;
else
{
_StringT __s = _M_get_str(_M_flags & regex_constants::collate
? _M_translate(__ch) : __ch);
auto __s = _M_translator._M_transform(__ch);
for (auto& __it : _M_range_set)
if (__it.first <= __s && __s <= __it.second)
{
__ret = true;
break;
}
if (_M_traits.isctype(__ch, _M_class_set))
__ret = true;
else if (std::find(_M_equiv_set.begin(), _M_equiv_set.end(),
_M_traits.transform_primary(&__ch, &__ch+1))
!= _M_equiv_set.end())
__ret = true;
}
if (_M_is_non_matching)
return !__ret;

View file

@ -65,7 +65,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_nfa(*__re._M_automaton),
_M_results(__results),
_M_match_queue(__dfs_mode ? nullptr
: new queue<pair<_StateIdT, _ResultsVec>>()),
: new vector<pair<_StateIdT, _ResultsVec>>()),
_M_visited(__dfs_mode ? nullptr : new vector<bool>(_M_nfa.size())),
_M_flags((__flags & regex_constants::match_prev_avail)
? (__flags
@ -133,23 +133,23 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_lookahead(_State<_TraitsT> __state);
public:
_ResultsVec _M_cur_results;
_BiIter _M_current;
const _BiIter _M_begin;
const _BiIter _M_end;
const _RegexT& _M_re;
const _NFAT& _M_nfa;
_ResultsVec& _M_results;
_ResultsVec _M_cur_results;
_BiIter _M_current;
const _BiIter _M_begin;
const _BiIter _M_end;
const _RegexT& _M_re;
const _NFAT& _M_nfa;
_ResultsVec& _M_results;
// Used in BFS, saving states that need to be considered for the next
// character.
std::unique_ptr<queue<pair<_StateIdT, _ResultsVec>>> _M_match_queue;
std::unique_ptr<vector<pair<_StateIdT, _ResultsVec>>> _M_match_queue;
// Used in BFS, indicating that which state is already visited.
std::unique_ptr<vector<bool>> _M_visited;
_FlagT _M_flags;
std::unique_ptr<vector<bool>> _M_visited;
_FlagT _M_flags;
// To record current solution.
_StateIdT _M_start_state;
_StateIdT _M_start_state;
// Do we have a solution so far?
bool _M_has_sol;
bool _M_has_sol;
};
//@} regex-detail

View file

@ -111,7 +111,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
}
else
{
_M_match_queue->push(make_pair(_M_start_state, _M_results));
_M_match_queue->push_back(make_pair(_M_start_state, _M_results));
bool __ret = false;
while (1)
{
@ -120,10 +120,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
break;
_M_visited->assign(_M_visited->size(), false);
auto _M_old_queue = std::move(*_M_match_queue);
while (!_M_old_queue.empty())
for (auto __task : _M_old_queue)
{
auto __task = _M_old_queue.front();
_M_old_queue.pop();
_M_cur_results = __task.second;
_M_dfs<__match_mode>(__task.first);
}
@ -279,7 +277,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
}
else
if (__state._M_matches(*_M_current))
_M_match_queue->push(make_pair(__state._M_next, _M_cur_results));
_M_match_queue->push_back(make_pair(__state._M_next,
_M_cur_results));
break;
// First fetch the matched result from _M_cur_results as __submatch;
// then compare it with

View file

@ -39,6 +39,154 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
* @{
*/
struct _ScannerBase
{
public:
/// Token types returned from the scanner.
enum _TokenT
{
_S_token_anychar,
_S_token_ord_char,
_S_token_oct_num,
_S_token_hex_num,
_S_token_backref,
_S_token_subexpr_begin,
_S_token_subexpr_no_group_begin,
_S_token_subexpr_lookahead_begin, // neg if _M_value[0] == 'n'
_S_token_subexpr_end,
_S_token_bracket_begin,
_S_token_bracket_neg_begin,
_S_token_bracket_end,
_S_token_interval_begin,
_S_token_interval_end,
_S_token_quoted_class,
_S_token_char_class_name,
_S_token_collsymbol,
_S_token_equiv_class_name,
_S_token_opt,
_S_token_or,
_S_token_closure0,
_S_token_closure1,
_S_token_ungreedy,
_S_token_line_begin,
_S_token_line_end,
_S_token_word_bound, // neg if _M_value[0] == 'n'
_S_token_comma,
_S_token_dup_count,
_S_token_eof,
_S_token_unknown
};
protected:
typedef regex_constants::syntax_option_type _FlagT;
enum _StateT
{
_S_state_normal,
_S_state_in_brace,
_S_state_in_bracket,
};
protected:
_ScannerBase(_FlagT __flags)
: _M_state(_S_state_normal),
_M_flags(__flags),
_M_escape_tbl(_M_is_ecma()
? _M_ecma_escape_tbl
: _M_awk_escape_tbl),
_M_spec_char(_M_is_ecma()
? _M_ecma_spec_char
: _M_is_basic()
? _M_basic_spec_char
: _M_extended_spec_char),
_M_at_bracket_start(false)
{ }
protected:
const char*
_M_find_escape(char __c)
{
auto __it = _M_escape_tbl;
for (; __it->first != '\0'; ++__it)
if (__it->first == __c)
return &__it->second;
return nullptr;
}
bool
_M_is_ecma() const
{ return _M_flags & regex_constants::ECMAScript; }
bool
_M_is_basic() const
{ return _M_flags & (regex_constants::basic | regex_constants::grep); }
bool
_M_is_extended() const
{
return _M_flags & (regex_constants::extended
| regex_constants::egrep
| regex_constants::awk);
}
bool
_M_is_grep() const
{ return _M_flags & (regex_constants::grep | regex_constants::egrep); }
bool
_M_is_awk() const
{ return _M_flags & regex_constants::awk; }
protected:
const std::pair<char, _TokenT> _M_token_tbl[9] =
{
{'^', _S_token_line_begin},
{'$', _S_token_line_end},
{'.', _S_token_anychar},
{'*', _S_token_closure0},
{'+', _S_token_closure1},
{'?', _S_token_opt},
{'|', _S_token_or},
{'\n', _S_token_or}, // grep and egrep
{'\0', _S_token_or},
};
const std::pair<char, char> _M_ecma_escape_tbl[8] =
{
{'0', '\0'},
{'b', '\b'},
{'f', '\f'},
{'n', '\n'},
{'r', '\r'},
{'t', '\t'},
{'v', '\v'},
{'\0', '\0'},
};
const std::pair<char, char> _M_awk_escape_tbl[11] =
{
{'"', '"'},
{'/', '/'},
{'\\', '\\'},
{'a', '\a'},
{'b', '\b'},
{'f', '\f'},
{'n', '\n'},
{'r', '\r'},
{'t', '\t'},
{'v', '\v'},
{'\0', '\0'},
};
const char* _M_ecma_spec_char = "^$\\.*+?()[]{}|";
const char* _M_basic_spec_char = ".[\\*^$";
const char* _M_extended_spec_char = ".[\\()*+?{|^$";
_StateT _M_state;
_FlagT _M_flags;
_TokenT _M_token;
const std::pair<char, char>* _M_escape_tbl;
const char* _M_spec_char;
bool _M_at_bracket_start;
};
/**
* @brief struct _Scanner. Scans an input range for regex tokens.
*
@ -49,51 +197,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
* constructor: different regular expression grammars will interpret
* the same input pattern in syntactically different ways.
*/
template<typename _FwdIter>
template<typename _CharT>
class _Scanner
: public _ScannerBase
{
public:
typedef typename std::iterator_traits<_FwdIter>::value_type _CharT;
typedef const _CharT* _IterT;
typedef std::basic_string<_CharT> _StringT;
typedef regex_constants::syntax_option_type _FlagT;
typedef const std::ctype<_CharT> _CtypeT;
/// Token types returned from the scanner.
enum _TokenT
{
_S_token_anychar,
_S_token_ord_char,
_S_token_oct_num,
_S_token_hex_num,
_S_token_backref,
_S_token_subexpr_begin,
_S_token_subexpr_no_group_begin,
_S_token_subexpr_lookahead_begin, // neg if _M_value[0] == 'n'
_S_token_subexpr_end,
_S_token_bracket_begin,
_S_token_bracket_neg_begin,
_S_token_bracket_end,
_S_token_interval_begin,
_S_token_interval_end,
_S_token_quoted_class,
_S_token_char_class_name,
_S_token_collsymbol,
_S_token_equiv_class_name,
_S_token_opt,
_S_token_or,
_S_token_closure0,
_S_token_closure1,
_S_token_ungreedy,
_S_token_line_begin,
_S_token_line_end,
_S_token_word_bound, // neg if _M_value[0] == 'n'
_S_token_comma,
_S_token_dup_count,
_S_token_eof,
_S_token_unknown
};
_Scanner(_FwdIter __begin, _FwdIter __end,
_Scanner(_IterT __begin, _IterT __end,
_FlagT __flags, std::locale __loc);
void
@ -113,13 +227,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
#endif
private:
enum _StateT
{
_S_state_normal,
_S_state_in_brace,
_S_state_in_bracket,
};
void
_M_scan_normal();
@ -141,49 +248,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
void
_M_eat_class(char);
constexpr bool
_M_is_ecma()
{ return _M_flags & regex_constants::ECMAScript; }
constexpr bool
_M_is_basic()
{ return _M_flags & (regex_constants::basic | regex_constants::grep); }
constexpr bool
_M_is_extended()
{
return _M_flags & (regex_constants::extended
| regex_constants::egrep
| regex_constants::awk);
}
constexpr bool
_M_is_grep()
{ return _M_flags & (regex_constants::grep | regex_constants::egrep); }
constexpr bool
_M_is_awk()
{ return _M_flags & regex_constants::awk; }
_StateT _M_state;
_FwdIter _M_current;
_FwdIter _M_end;
_FlagT _M_flags;
_IterT _M_current;
_IterT _M_end;
_CtypeT& _M_ctype;
_TokenT _M_token;
_StringT _M_value;
bool _M_at_bracket_start;
public:
// FIXME: make them static when this file is stable.
const std::map<char, _TokenT> _M_token_map;
const std::map<char, char> _M_ecma_escape_map;
const std::map<char, char> _M_awk_escape_map;
const std::set<char> _M_ecma_spec_char;
const std::set<char> _M_basic_spec_char;
const std::set<char> _M_extended_spec_char;
const std::map<char, char>& _M_escape_map;
const std::set<char>& _M_spec_char;
void (_Scanner::* _M_eat_escape)();
};

View file

@ -52,106 +52,22 @@ namespace __detail
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION
template<typename _FwdIter>
_Scanner<_FwdIter>::
_Scanner(_FwdIter __begin, _FwdIter __end,
template<typename _CharT>
_Scanner<_CharT>::
_Scanner(typename _Scanner::_IterT __begin,
typename _Scanner::_IterT __end,
_FlagT __flags, std::locale __loc)
: _M_state(_S_state_normal), _M_current(__begin), _M_end(__end),
_M_flags(__flags),
: _ScannerBase(__flags),
_M_current(__begin), _M_end(__end),
_M_ctype(std::use_facet<_CtypeT>(__loc)),
_M_at_bracket_start(false),
_M_token_map
{
{'^', _S_token_line_begin},
{'$', _S_token_line_end},
{'.', _S_token_anychar},
{'*', _S_token_closure0},
{'+', _S_token_closure1},
{'?', _S_token_opt},
{'|', _S_token_or},
// grep and egrep
{'\n', _S_token_or},
},
_M_ecma_escape_map
{
{'0', '\0'},
{'b', '\b'},
{'f', '\f'},
{'n', '\n'},
{'r', '\r'},
{'t', '\t'},
{'v', '\v'},
},
_M_awk_escape_map
{
{'"', '"'},
{'/', '/'},
{'\\', '\\'},
{'a', '\a'},
{'b', '\b'},
{'f', '\f'},
{'n', '\n'},
{'r', '\r'},
{'t', '\t'},
{'v', '\v'},
},
_M_ecma_spec_char
{
'^',
'$',
'\\',
'.',
'*',
'+',
'?',
'(',
')',
'[',
']',
'{',
'}',
'|',
},
_M_basic_spec_char
{
'.',
'[',
'\\',
'*',
'^',
'$',
},
_M_extended_spec_char
{
'.',
'[',
'\\',
'(',
')',
'*',
'+',
'?',
'{',
'|',
'^',
'$',
},
_M_escape_map(_M_is_ecma()
? _M_ecma_escape_map
: _M_awk_escape_map),
_M_spec_char(_M_is_ecma()
? _M_ecma_spec_char
: _M_is_basic()
? _M_basic_spec_char
: _M_extended_spec_char),
_M_eat_escape(_M_is_ecma()
? &_Scanner::_M_eat_escape_ecma
: &_Scanner::_M_eat_escape_posix)
{ _M_advance(); }
template<typename _FwdIter>
template<typename _CharT>
void
_Scanner<_FwdIter>::
_Scanner<_CharT>::
_M_advance()
{
if (_M_current == _M_end)
@ -173,12 +89,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
// Differences between styles:
// 1) "\(", "\)", "\{" in basic. It's not escaping.
// 2) "(?:", "(?=", "(?!" in ECMAScript.
template<typename _FwdIter>
template<typename _CharT>
void
_Scanner<_FwdIter>::
_Scanner<_CharT>::
_M_scan_normal()
{
auto __c = *_M_current++;
const char* __pos;
if (__c == '\\')
{
@ -244,11 +161,23 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_M_state = _S_state_in_brace;
_M_token = _S_token_interval_begin;
}
else if ((_M_spec_char.count(_M_ctype.narrow(__c, '\0'))
else if (((__pos = std::strchr(_M_spec_char, _M_ctype.narrow(__c, '\0')))
!= nullptr
&& *__pos != '\0'
&& __c != ']'
&& __c != '}')
|| (_M_is_grep() && __c == '\n'))
_M_token = _M_token_map.at(__c);
{
auto __it = _M_token_tbl;
auto __narrowc = _M_ctype.narrow(__c, '\0');
for (; __it->first != '\0'; ++__it)
if (__it->first == __narrowc)
{
_M_token = __it->second;
return;
}
_GLIBCXX_DEBUG_ASSERT(false);
}
else
{
_M_token = _S_token_ord_char;
@ -259,9 +188,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
// Differences between styles:
// 1) different semantics of "[]" and "[^]".
// 2) Escaping in bracket expr.
template<typename _FwdIter>
template<typename _CharT>
void
_Scanner<_FwdIter>::
_Scanner<_CharT>::
_M_scan_in_bracket()
{
if (_M_current == _M_end)
@ -316,9 +245,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
// Differences between styles:
// 1) "\}" in basic style.
template<typename _FwdIter>
template<typename _CharT>
void
_Scanner<_FwdIter>::
_Scanner<_CharT>::
_M_scan_in_brace()
{
if (_M_current == _M_end)
@ -357,21 +286,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__throw_regex_error(regex_constants::error_badbrace);
}
template<typename _FwdIter>
template<typename _CharT>
void
_Scanner<_FwdIter>::
_Scanner<_CharT>::
_M_eat_escape_ecma()
{
if (_M_current == _M_end)
__throw_regex_error(regex_constants::error_escape);
auto __c = *_M_current++;
auto __pos = _M_find_escape(_M_ctype.narrow(__c, '\0'));
if (_M_escape_map.count(_M_ctype.narrow(__c, '\0'))
&& (__c != 'b' || _M_state == _S_state_in_bracket))
if (__pos != nullptr && (__c != 'b' || _M_state == _S_state_in_bracket))
{
_M_token = _S_token_ord_char;
_M_value.assign(1, _M_escape_map.at(__c));
_M_value.assign(1, *__pos);
}
else if (__c == 'b')
{
@ -431,17 +360,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
// Differences between styles:
// 1) Extended doesn't support backref, but basic does.
template<typename _FwdIter>
template<typename _CharT>
void
_Scanner<_FwdIter>::
_Scanner<_CharT>::
_M_eat_escape_posix()
{
if (_M_current == _M_end)
__throw_regex_error(regex_constants::error_escape);
auto __c = *_M_current;
auto __pos = std::strchr(_M_spec_char, _M_ctype.narrow(__c, '\0'));
if (_M_spec_char.count(_M_ctype.narrow(__c, '\0')))
if (__pos != nullptr && *__pos != '\0')
{
_M_token = _S_token_ord_char;
_M_value.assign(1, __c);
@ -469,17 +399,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
++_M_current;
}
template<typename _FwdIter>
template<typename _CharT>
void
_Scanner<_FwdIter>::
_Scanner<_CharT>::
_M_eat_escape_awk()
{
auto __c = *_M_current++;
auto __pos = _M_find_escape(_M_ctype.narrow(__c, '\0'));
if (_M_escape_map.count(_M_ctype.narrow(__c, '\0')))
if (__pos != nullptr)
{
_M_token = _S_token_ord_char;
_M_value.assign(1, _M_escape_map.at(__c));
_M_value.assign(1, *__pos);
}
// \ddd for oct representation
else if (_M_ctype.is(_CtypeT::digit, __c)
@ -505,9 +436,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
// Eats a character class or throwns an exception.
// __ch cound be ':', '.' or '=', _M_current is the char after ']' when
// returning.
template<typename _FwdIter>
template<typename _CharT>
void
_Scanner<_FwdIter>::
_Scanner<_CharT>::
_M_eat_class(char __ch)
{
for (_M_value.clear(); _M_current != _M_end && *_M_current != __ch;)
@ -525,9 +456,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
}
#ifdef _GLIBCXX_DEBUG
template<typename _FwdIter>
template<typename _CharT>
std::ostream&
_Scanner<_FwdIter>::
_Scanner<_CharT>::
_M_print(std::ostream& ostr)
{
switch (_M_token)

View file

@ -44,15 +44,13 @@
#include <iterator>
#include <locale>
#include <memory>
#include <map>
#include <queue>
#include <set>
#include <sstream>
#include <stack>
#include <stdexcept>
#include <string>
#include <utility>
#include <vector>
#include <cstring>
#include <bits/regex_constants.h>
#include <bits/regex_error.h>