diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog index 898f0319a20..cd365692b5c 100644 --- a/libstdc++-v3/ChangeLog +++ b/libstdc++-v3/ChangeLog @@ -1,3 +1,26 @@ +2013-09-02 Tim Shen + + * regex_automaton.h: Rearrange _NFA's layout. + * include/bits/regex_compiler.h: Add _AnyMatcher and _CharMatcher. + Rearrange _BracketMatcher's layout. + (_BracketMatcher<>::_M_add_char): Use set instead of vector for + _M_char_set. + (_BracketMatcher<>::_M_add_collating_element): Likewise. + (_BracketMatcher<>::_M_make_range): Likewise. + * include/bits/regex_compiler.tcc (_Compiler<>::_M_atom): Use + apropriate constructors of matchers above. + * testsuite/28_regex/algorithms/regex_match/ecma/char/anymatcher.cc: + New. + * testsuite/28_regex/algorithms/regex_match/ecma/char/backref.cc: New. + * testsuite/28_regex/algorithms/regex_match/ecma/char/empty_range.cc: + New. + * testsuite/28_regex/algorithms/regex_match/ecma/char/emptygroup.cc: + New. + * testsuite/28_regex/algorithms/regex_match/ecma/char/hex.cc: New. + * testsuite/28_regex/algorithms/regex_match/ecma/wchar_t/anymatcher.cc: + New. + * testsuite/28_regex/algorithms/regex_match/ecma/wchar_t/hex.cc: New. + 2013-08-30 François Dumont PR libstdc++/58148 diff --git a/libstdc++-v3/include/bits/regex_automaton.h b/libstdc++-v3/include/bits/regex_automaton.h index f9e9630636b..2c872aa9482 100644 --- a/libstdc++-v3/include/bits/regex_automaton.h +++ b/libstdc++-v3/include/bits/regex_automaton.h @@ -206,12 +206,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_dot(std::ostream& __ostr) const; #endif + std::vector _M_paren_stack; + _StateSet _M_accepting_states; _FlagT _M_flags; _StateIdT _M_start_state; - _StateSet _M_accepting_states; _SizeT _M_subexpr_count; bool _M_has_backref; - std::vector _M_paren_stack; }; /// Describes a sequence of one or more %_State, its current start diff --git a/libstdc++-v3/include/bits/regex_compiler.h b/libstdc++-v3/include/bits/regex_compiler.h index a1107bb7eeb..55ecdb92d41 100644 --- a/libstdc++-v3/include/bits/regex_compiler.h +++ b/libstdc++-v3/include/bits/regex_compiler.h @@ -125,12 +125,60 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION const _TraitsT& _M_traits; _ScannerT _M_scanner; - _StringT _M_value; _RegexT _M_state_store; + _StringT _M_value; _StackT _M_stack; _FlagT _M_flags; }; + template + struct _AnyMatcher + { + explicit + _AnyMatcher(const _TraitsT& __traits) + : _M_traits(__traits) + { } + + bool + operator()(_CharT __ch) const + { + return _M_traits.translate(__ch) != '\n' + && _M_traits.translate(__ch) != '\r' + && _M_traits.translate(__ch) != u'\u2028' + && _M_traits.translate(__ch) != u'\u2029'; + } + + const _TraitsT& _M_traits; + }; + + template + struct _CharMatcher + { + typedef regex_constants::syntax_option_type _FlagT; + + explicit + _CharMatcher(_CharT __ch, const _TraitsT& __traits, _FlagT __flags) + : _M_ch(_M_translate(__ch)), _M_traits(__traits), _M_flags(__flags) + { } + + bool + operator()(_CharT __ch) const + { return _M_ch == _M_translate(__ch); } + + _CharT + _M_translate(_CharT __ch) const + { + if (_M_flags & regex_constants::icase) + return _M_traits.translate_nocase(__ch); + else + return _M_traits.translate(__ch); + } + + const _TraitsT& _M_traits; + _FlagT _M_flags; + _CharT _M_ch; + }; + /// Matches a character range (bracket expression) template struct _BracketMatcher @@ -141,9 +189,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION explicit _BracketMatcher(bool __is_non_matching, - const _TraitsT& __t, + const _TraitsT& __traits, _FlagT __flags) - : _M_is_non_matching(__is_non_matching), _M_traits(__t), + : _M_is_non_matching(__is_non_matching), _M_traits(__traits), _M_flags(__flags), _M_class_set(0) { } @@ -152,7 +200,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION void _M_add_char(_CharT __c) - { _M_char_set.push_back(_M_translate(__c)); } + { _M_char_set.insert(_M_translate(__c)); } void _M_add_collating_element(const _StringT& __s) @@ -162,7 +210,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION if (__st.empty()) __throw_regex_error(regex_constants::error_collate); // TODO: digraph - _M_char_set.push_back(__st[0]); + _M_char_set.insert(_M_translate(__st[0])); } void @@ -186,21 +234,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION void _M_make_range(_CharT __l, _CharT __r) { - _M_range_set.push_back( - make_pair(_M_get_str(_M_translate(__l)), - _M_get_str(_M_translate(__r)))); + if (_M_flags & regex_constants::collate) + _M_range_set.insert( + make_pair(_M_get_str(_M_translate(__l)), + _M_get_str(_M_translate(__r)))); + else + _M_range_set.insert(make_pair(_M_get_str(__l), _M_get_str(__r))); } _CharT _M_translate(_CharT __c) const { - if (_M_flags & regex_constants::collate) - if (_M_is_icase()) - return _M_traits.translate_nocase(__c); - else - return _M_traits.translate(__c); + if (_M_is_icase()) + return _M_traits.translate_nocase(__c); else - return __c; + return _M_traits.translate(__c); } bool @@ -214,12 +262,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION return _M_traits.transform(__s.begin(), __s.end()); } - const _TraitsT& _M_traits; - _FlagT _M_flags; - bool _M_is_non_matching; - std::vector<_CharT> _M_char_set; - std::vector> _M_range_set; - _CharClassT _M_class_set; + std::set<_CharT> _M_char_set; + std::set> _M_range_set; + const _TraitsT& _M_traits; + _CharClassT _M_class_set; + _FlagT _M_flags; + bool _M_is_non_matching; }; //@} regex-detail diff --git a/libstdc++-v3/include/bits/regex_compiler.tcc b/libstdc++-v3/include/bits/regex_compiler.tcc index bed091a4486..e41b251c257 100644 --- a/libstdc++-v3/include/bits/regex_compiler.tcc +++ b/libstdc++-v3/include/bits/regex_compiler.tcc @@ -204,32 +204,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { if (_M_match_token(_ScannerT::_S_token_anychar)) { - const static auto& - __any_matcher = [](_CharT __ch) -> bool - { return true; }; - _M_stack.push(_StateSeqT(_M_state_store, _M_state_store._M_insert_matcher - (__any_matcher))); + (_AnyMatcher<_CharT, _TraitsT>(_M_traits)))); return true; } if (_M_try_char()) { - _CharT __c = _M_value[0]; - __detail::_Matcher<_CharT> f; - if (_M_flags & regex_constants::icase) - { - auto __traits = this->_M_traits; - __c = __traits.translate_nocase(__c); - f = [__traits, __c](_CharT __ch) -> bool - { return __traits.translate_nocase(__ch) == __c; }; - } - else - f = [__c](_CharT __ch) -> bool - { return __ch == __c; }; - _M_stack.push(_StateSeqT(_M_state_store, - _M_state_store._M_insert_matcher(f))); + _M_state_store._M_insert_matcher + (_CharMatcher<_CharT, _TraitsT>(_M_value[0], + _M_traits, + _M_flags)))); return true; } if (_M_match_token(_ScannerT::_S_token_backref)) @@ -374,26 +360,18 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION bool __ret = false; if (_M_traits.isctype(__ch, _M_class_set)) __ret = true; + else if (_M_char_set.count(_M_translate(__ch))) + __ret = true; else { - __ch = _M_translate(__ch); - - for (auto __c : _M_char_set) - if (__c == __ch) + _StringT __s = _M_get_str(_M_flags & regex_constants::collate + ? _M_translate(__ch) : __ch); + for (auto& __it : _M_range_set) + if (__it.first <= __s && __s <= __it.second) { __ret = true; break; } - if (!__ret) - { - _StringT __s = _M_get_str(__ch); - for (auto& __it : _M_range_set) - if (__it.first <= __s && __s <= __it.second) - { - __ret = true; - break; - } - } } if (_M_is_non_matching) return !__ret; diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/anymatcher.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/anymatcher.cc new file mode 100644 index 00000000000..6e6095b8f24 --- /dev/null +++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/anymatcher.cc @@ -0,0 +1,52 @@ +// { dg-options "-std=gnu++11" } + +// +// 2013-09-02 Tim Shen +// +// Copyright (C) 2013 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING3. If not see +// . + +// 28.11.2 regex_match +// Tests ECMAScript "." against a std::string. + +#include +#include + +using namespace std; + +void +test01() +{ + bool test __attribute__((unused)) = true; + +#define TEST(res, s) \ + {\ + regex re(res);\ + string st(s);\ + VERIFY(!regex_match(st, re));\ + } + TEST(".", "\0"); + TEST(".", "\n"); + TEST(".", "\r"); +} + +int +main() +{ + test01(); + return 0; +} diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/string_backref.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/backref.cc similarity index 97% rename from libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/string_backref.cc rename to libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/backref.cc index a828fea93c6..321ce35a038 100644 --- a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/string_backref.cc +++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/backref.cc @@ -1,7 +1,7 @@ // { dg-options "-std=gnu++11" } // -// 2013-08-10 Tim Shen +// 2013-09-02 Tim Shen // // Copyright (C) 2013 Free Software Foundation, Inc. // diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/empty_range.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/empty_range.cc similarity index 96% rename from libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/empty_range.cc rename to libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/empty_range.cc index 93bca45bf9d..3c48d3521a5 100644 --- a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/empty_range.cc +++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/empty_range.cc @@ -1,7 +1,7 @@ // { dg-options "-std=gnu++11" } // -// 2013-08-26 Tim Shen +// 2013-09-02 Tim Shen // // Copyright (C) 2013 Free Software Foundation, Inc. // diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/cstring_emptygroup.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/emptygroup.cc similarity index 96% rename from libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/cstring_emptygroup.cc rename to libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/emptygroup.cc index e112db55e4a..1dc8f63f789 100644 --- a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/cstring_emptygroup.cc +++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/emptygroup.cc @@ -1,7 +1,7 @@ // { dg-options "-std=gnu++11" } // -// 2013-08-22 Tim Shen +// 2013-09-02 Tim Shen // // Copyright (C) 2013 Free Software Foundation, Inc. // diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/cstring_hex.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/hex.cc similarity index 92% rename from libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/cstring_hex.cc rename to libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/hex.cc index a7ef0fb36cc..a73b742a5e4 100644 --- a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/cstring_hex.cc +++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/hex.cc @@ -1,7 +1,7 @@ // { dg-options "-std=gnu++11" } // -// 2013-08-26 Tim Shen +// 2013-09-02 Tim Shen // // Copyright (C) 2013 Free Software Foundation, Inc. // @@ -34,7 +34,6 @@ test01() bool test __attribute__((unused)) = true; VERIFY(regex_match(":", regex("\\x3a"))); - VERIFY(regex_match(L"\u1234", wregex(L"\\u1234"))); try { regex("\\u400x"); diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/wchar_t/anymatcher.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/wchar_t/anymatcher.cc new file mode 100644 index 00000000000..c574908d6a9 --- /dev/null +++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/wchar_t/anymatcher.cc @@ -0,0 +1,51 @@ +// { dg-options "-std=gnu++11" } + +// +// 2013-09-02 Tim Shen +// +// Copyright (C) 2013 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING3. If not see +// . + +// 28.11.2 regex_match +// Tests ECMAScript "." against a std::string. + +#include +#include + +using namespace std; + +void +test01() +{ + bool test __attribute__((unused)) = true; + +#define TESTL(res, s) \ + {\ + wregex re(res);\ + wstring st(s);\ + VERIFY(!regex_match(st, re));\ + } + TESTL(L".", L"\u2028"); + TESTL(L".", L"\u2029"); +} + +int +main() +{ + test01(); + return 0; +} diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/wchar_t/hex.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/wchar_t/hex.cc new file mode 100644 index 00000000000..f9561be70e2 --- /dev/null +++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/wchar_t/hex.cc @@ -0,0 +1,44 @@ +// { dg-options "-std=gnu++11" } + +// +// 2013-09-02 Tim Shen +// +// Copyright (C) 2013 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this library; see the file COPYING3. If not see +// . + +// 28.11.2 regex_match +// Tests ECMAScript \x and \u. + +#include +#include + +using namespace std; + +void +test01() +{ + bool test __attribute__((unused)) = true; + + VERIFY(regex_match(L"\u1234", wregex(L"\\u1234"))); +} + +int +main() +{ + test01(); + return 0; +}