re PR libstdc++/67015 ("^[a-z0-9][a-z0-9-]*$", std::regex::extended is miscompiled)
PR libstdc++/67015 * include/bits/regex_compiler.h (_Compiler<>::_M_expression_term, _BracketMatcher<>::_M_add_collating_element): Change signature to make checking the and of bracket expression easier. * include/bits/regex_compiler.tcc (_Compiler<>::_M_expression_term): Treat '-' as a valid literal if it's at the end of bracket expression. * testsuite/28_regex/algorithms/regex_match/cstring_bracket_01.cc: New testcases. From-SVN: r226336
This commit is contained in:
parent
3d61d87585
commit
f9ce3c1639
4 changed files with 99 additions and 13 deletions
|
@ -1,3 +1,14 @@
|
|||
2015-07-29 Tim Shen <timshen@google.com>
|
||||
|
||||
PR libstdc++/67015
|
||||
* include/bits/regex_compiler.h (_Compiler<>::_M_expression_term,
|
||||
_BracketMatcher<>::_M_add_collating_element): Change signature
|
||||
to make checking the and of bracket expression easier.
|
||||
* include/bits/regex_compiler.tcc (_Compiler<>::_M_expression_term):
|
||||
Treat '-' as a valid literal if it's at the end of bracket expression.
|
||||
* testsuite/28_regex/algorithms/regex_match/cstring_bracket_01.cc:
|
||||
New testcases.
|
||||
|
||||
2015-07-24 Jonathan Wakely <jwakely@redhat.com>
|
||||
|
||||
* include/bits/atomic_futex.h [_GLIBCXX_HAVE_LINUX_FUTEX]
|
||||
|
|
|
@ -116,8 +116,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
void
|
||||
_M_insert_bracket_matcher(bool __neg);
|
||||
|
||||
// Returns true if successfully matched one term and should continue.
|
||||
// Returns false if the compiler should move on.
|
||||
template<bool __icase, bool __collate>
|
||||
void
|
||||
bool
|
||||
_M_expression_term(pair<bool, _CharT>& __last_char,
|
||||
_BracketMatcher<_TraitsT, __icase, __collate>&
|
||||
__matcher);
|
||||
|
@ -389,8 +391,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
_M_add_collating_element(const _StringT& __s)
|
||||
_StringT
|
||||
_M_add_collate_element(const _StringT& __s)
|
||||
{
|
||||
auto __st = _M_traits.lookup_collatename(__s.data(),
|
||||
__s.data() + __s.size());
|
||||
|
@ -400,6 +402,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
#ifdef _GLIBCXX_DEBUG
|
||||
_M_is_ready = false;
|
||||
#endif
|
||||
return __st;
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
@ -424,8 +424,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
__last_char.first = true;
|
||||
__last_char.second = _M_value[0];
|
||||
}
|
||||
while (!_M_match_token(_ScannerT::_S_token_bracket_end))
|
||||
_M_expression_term(__last_char, __matcher);
|
||||
while (_M_expression_term(__last_char, __matcher));
|
||||
__matcher._M_ready();
|
||||
_M_stack.push(_StateSeqT(
|
||||
*_M_nfa,
|
||||
|
@ -434,21 +433,31 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
|
||||
template<typename _TraitsT>
|
||||
template<bool __icase, bool __collate>
|
||||
void
|
||||
bool
|
||||
_Compiler<_TraitsT>::
|
||||
_M_expression_term(pair<bool, _CharT>& __last_char,
|
||||
_BracketMatcher<_TraitsT, __icase, __collate>& __matcher)
|
||||
{
|
||||
if (_M_match_token(_ScannerT::_S_token_bracket_end))
|
||||
return false;
|
||||
|
||||
if (_M_match_token(_ScannerT::_S_token_collsymbol))
|
||||
__matcher._M_add_collating_element(_M_value);
|
||||
{
|
||||
auto __symbol = __matcher._M_add_collate_element(_M_value);
|
||||
if (__symbol.size() == 1)
|
||||
{
|
||||
__last_char.first = true;
|
||||
__last_char.second = __symbol[0];
|
||||
}
|
||||
}
|
||||
else if (_M_match_token(_ScannerT::_S_token_equiv_class_name))
|
||||
__matcher._M_add_equivalence_class(_M_value);
|
||||
else if (_M_match_token(_ScannerT::_S_token_char_class_name))
|
||||
__matcher._M_add_character_class(_M_value, false);
|
||||
// POSIX doesn't permit '-' as a start-range char (say [a-z--0]),
|
||||
// except when the '-' is the first character in the bracket expression
|
||||
// ([--0]). ECMAScript treats all '-' after a range as a normal character.
|
||||
// Also see above, where _M_expression_term gets called.
|
||||
// POSIX doesn't allow '-' as a start-range char (say [a-z--0]),
|
||||
// except when the '-' is the first or last character in the bracket
|
||||
// expression ([--0]). ECMAScript treats all '-' after a range as a
|
||||
// normal character. Also see above, where _M_expression_term gets called.
|
||||
//
|
||||
// As a result, POSIX rejects [-----], but ECMAScript doesn't.
|
||||
// Boost (1.57.0) always uses POSIX style even in its ECMAScript syntax.
|
||||
|
@ -459,10 +468,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
{
|
||||
if (!__last_char.first)
|
||||
{
|
||||
__matcher._M_add_char(_M_value[0]);
|
||||
if (_M_value[0] == '-'
|
||||
&& !(_M_flags & regex_constants::ECMAScript))
|
||||
__throw_regex_error(regex_constants::error_range);
|
||||
__matcher._M_add_char(_M_value[0]);
|
||||
{
|
||||
if (_M_match_token(_ScannerT::_S_token_bracket_end))
|
||||
return false;
|
||||
__throw_regex_error(regex_constants::error_range);
|
||||
}
|
||||
__last_char.first = true;
|
||||
__last_char.second = _M_value[0];
|
||||
}
|
||||
|
@ -496,6 +509,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
|
|||
_M_value[0]));
|
||||
else
|
||||
__throw_regex_error(regex_constants::error_brack);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template<typename _TraitsT>
|
||||
|
|
|
@ -82,6 +82,22 @@ test02()
|
|||
VERIFY(e.code() == std::regex_constants::error_range);
|
||||
}
|
||||
std::regex re("[-----]", std::regex::ECMAScript);
|
||||
|
||||
VERIFY(!regex_match("b", regex("[-ac]", regex_constants::extended)));
|
||||
VERIFY(!regex_match("b", regex("[ac-]", regex_constants::extended)));
|
||||
VERIFY(regex_match("b", regex("[^-ac]", regex_constants::extended)));
|
||||
VERIFY(regex_match("b", regex("[^ac-]", regex_constants::extended)));
|
||||
VERIFY(regex_match("&", regex("[%--]", regex_constants::extended)));
|
||||
VERIFY(regex_match(".", regex("[--@]", regex_constants::extended)));
|
||||
try
|
||||
{
|
||||
regex("[a--@]", regex_constants::extended);
|
||||
VERIFY(false);
|
||||
}
|
||||
catch (const std::regex_error& e)
|
||||
{
|
||||
}
|
||||
VERIFY(regex_match("].", regex("[][.hyphen.]-0]*", regex_constants::extended)));
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -115,6 +131,44 @@ test04()
|
|||
VERIFY(regex_match_debug("w", re));
|
||||
}
|
||||
|
||||
// libstdc++/67015
|
||||
void
|
||||
test05()
|
||||
{
|
||||
bool test __attribute__((unused)) = true;
|
||||
|
||||
regex lanana_namespace("^[a-z0-9]+$", regex::extended);
|
||||
regex lsb_namespace("^_?([a-z0-9_.]+-, regex::extended)+[a-z0-9]+$");
|
||||
regex debian_dpkg_conffile_cruft("dpkg-(old|dist|new|tmp, regex::extended)$");
|
||||
regex debian_cron_namespace("^[a-z0-9][a-z0-9-]*$", regex::extended);
|
||||
VERIFY(regex_match("test", debian_cron_namespace));
|
||||
VERIFY(!regex_match("-a", debian_cron_namespace));
|
||||
VERIFY(regex_match("a-", debian_cron_namespace));
|
||||
regex debian_cron_namespace_ok("^[a-z0-9][-a-z0-9]*$", regex::extended);
|
||||
VERIFY(regex_match("test", debian_cron_namespace_ok));
|
||||
VERIFY(!regex_match("-a", debian_cron_namespace_ok));
|
||||
VERIFY(regex_match("a-", debian_cron_namespace_ok));
|
||||
}
|
||||
|
||||
// libstdc++/67015
|
||||
void
|
||||
test06()
|
||||
{
|
||||
bool test __attribute__((unused)) = true;
|
||||
|
||||
regex lanana_namespace("^[a-z0-9]+$");
|
||||
regex lsb_namespace("^_?([a-z0-9_.]+-)+[a-z0-9]+$");
|
||||
regex debian_dpkg_conffile_cruft("dpkg-(old|dist|new|tmp)$");
|
||||
regex debian_cron_namespace("^[a-z0-9][a-z0-9-]*$");
|
||||
VERIFY(regex_match("test", debian_cron_namespace));
|
||||
VERIFY(!regex_match("-a", debian_cron_namespace));
|
||||
VERIFY(regex_match("a-", debian_cron_namespace));
|
||||
regex debian_cron_namespace_ok("^[a-z0-9][-a-z0-9]*$");
|
||||
VERIFY(regex_match("test", debian_cron_namespace_ok));
|
||||
VERIFY(!regex_match("-a", debian_cron_namespace_ok));
|
||||
VERIFY(regex_match("a-", debian_cron_namespace_ok));
|
||||
}
|
||||
|
||||
int
|
||||
main()
|
||||
{
|
||||
|
@ -122,5 +176,8 @@ main()
|
|||
test02();
|
||||
test03();
|
||||
test04();
|
||||
test05();
|
||||
test06();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue