c++: libcpp: Support raw strings with newlines in directives [PR55971]

It's not currently possible to use a C++11 raw string containing a newline as
part of the definition of a macro, or in any other preprocessing directive,
such as:

 #define X R"(two
lines)"

 #error R"(this error has
two lines)"

Add support for that by relaxing the conditions under which
_cpp_get_fresh_line() refuses to get a new line. For the case of lexing a raw
string, it's OK to do so as long as there is another line within the current
buffer. The code in cpp_get_fresh_line() was refactored into a new function
get_fresh_line_impl(), so that the new logic is applied only when processing a
raw string and not any other times.

libcpp/ChangeLog:

	PR preprocessor/55971
	* lex.cc (get_fresh_line_impl): New function refactoring the code
	from...
	(_cpp_get_fresh_line): ...here.
	(lex_raw_string): Use the new version of get_fresh_line_impl() to
	support raw strings containing new lines when processing a directive.

gcc/testsuite/ChangeLog:

	PR preprocessor/55971
	* c-c++-common/raw-string-directive-1.c: New test.
	* c-c++-common/raw-string-directive-2.c: New test.

gcc/c-family/ChangeLog:

	PR preprocessor/55971
	* c-ppoutput.cc (adjust_for_newlines): Update comment.
This commit is contained in:
Lewis Hyatt 2022-06-15 18:06:53 -04:00
parent b83f01d005
commit 3ad2167bba
4 changed files with 148 additions and 10 deletions

View file

@ -433,7 +433,15 @@ scan_translation_unit_directives_only (cpp_reader *pfile)
lang_hooks.preprocess_token (pfile, NULL, streamer.filter);
}
/* Adjust print.src_line for newlines embedded in output. */
/* Adjust print.src_line for newlines embedded in output. For example, if a raw
string literal contains newlines, then we need to increment our notion of the
current line to keep in sync and avoid outputting a line marker
unnecessarily. If a raw string literal containing newlines is the result of
macro expansion, then we have the opposite problem, where the token takes up
more lines in the output than it did in the input, and hence a line marker is
needed to restore the correct state for subsequent lines. In this case,
incrementing print.src_line still does the job, because it will cause us to
emit the line marker the next time a token is streamed. */
static void
account_for_newlines (const unsigned char *str, size_t len)
{

View file

@ -0,0 +1,74 @@
/* { dg-do compile } */
/* { dg-options "-std=gnu99" { target c } } */
/* { dg-options "-std=c++11" { target c++ } } */
/* Test that multi-line raw strings are lexed OK for all preprocessing
directives where one could appear. Test raw-string-directive-2.c
checks that #define is also processed properly. */
/* Note that in cases where we cause GCC to produce a multi-line error
message, we construct the string so that the second line looks enough
like an error message for DejaGNU to process it as such, so that we
can use dg-warning or dg-error directives to check for it. */
#warning R"delim(line1 /* { dg-warning "line1" } */
file:15:1: warning: line2)delim" /* { dg-warning "line2" } */
#error R"delim(line3 /* { dg-error "line3" } */
file:18:1: error: line4)delim" /* { dg-error "line4" } */
#define X1 R"(line 5
line 6
line 7
line 8
/*
//
line 9)" R"delim(
line10)delim"
#define X2(a) X1 #a R"(line 11
/*
line12
)"
#if R"(line 13 /* { dg-error "line13" } */
file:35:1: error: line14)" /* { dg-error "line14\\)\"\" is not valid" } */
#endif R"(line 15 /* { dg-warning "extra tokens at end of #endif" } */
\
line16)" ""
#ifdef XYZ R"(line17 /* { dg-warning "extra tokens at end of #ifdef" } */
\
\
line18)"
#endif
#if 1
#else R"(line23 /* { dg-warning "extra tokens at end of #else" } */
\
line24)"
#endif
#if 0
#elif R"(line 25 /* { dg-error "line25" } */
file:55:1: error: line26)" /* { dg-error "line26\\)\"\" is not valid" } */
#endif
#line 60 R"(file:60:1: warning: this file has a space
in it!)"
#warning "line27" /* { dg-warning "line27" } */
/* { dg-warning "this file has a space" "#line check" { target *-*-* } 60 } */
#line 63 "file"
#undef X1 R"(line28 /* { dg-warning "extra tokens at end of #undef" } */
line29
\
)"
#ident R"(line30
line31)" R"(line 32 /* { dg-warning "extra tokens at end of #ident" } */
line 33)"
#pragma GCC diagnostic ignored R"(-Woption /* { dg-warning "-Wpragmas" } */
-with-a-newline)"

View file

@ -0,0 +1,33 @@
/* { dg-do run } */
/* { dg-options "-std=gnu99" { target c } } */
/* { dg-options "-std=c++11" { target c++ } } */
#define S1 R"(three
line
string)"
#define S2 R"(pasted
two line)" " string"
#define X(a, b) a b R"(
one more)"
const char *s1 = S1;
const char *s2 = S2;
const char *s3 = X(S1, R"(
with this line plus)");
int main ()
{
const char s1_correct[] = "three\nline\nstring";
if (__builtin_memcmp (s1, s1_correct, sizeof s1_correct))
__builtin_abort ();
const char s2_correct[] = "pasted\ntwo line string";
if (__builtin_memcmp (s2, s2_correct, sizeof s2_correct))
__builtin_abort ();
const char s3_correct[] = "three\nline\nstring\nwith this line plus\none more";
if (__builtin_memcmp (s3, s3_correct, sizeof s3_correct))
__builtin_abort ();
}

View file

@ -1076,6 +1076,9 @@ _cpp_clean_line (cpp_reader *pfile)
buffer->next_line = s + 1;
}
template <bool lexing_raw_string>
static bool get_fresh_line_impl (cpp_reader *pfile);
/* Return true if the trigraph indicated by NOTE should be warned
about in a comment. */
static bool
@ -2695,9 +2698,8 @@ lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
{
pos--;
pfile->buffer->cur = pos;
if (pfile->state.in_directive
|| (pfile->state.parsing_args
&& pfile->buffer->next_line >= pfile->buffer->rlimit))
if ((pfile->state.in_directive || pfile->state.parsing_args)
&& pfile->buffer->next_line >= pfile->buffer->rlimit)
{
cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,
"unterminated raw string");
@ -2712,7 +2714,7 @@ lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
CPP_INCREMENT_LINE (pfile, 0);
pfile->buffer->need_line = true;
if (!_cpp_get_fresh_line (pfile))
if (!get_fresh_line_impl<true> (pfile))
{
/* We ran out of file and failed to get a line. */
location_t src_loc = token->src_loc;
@ -2724,8 +2726,15 @@ lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
_cpp_release_buff (pfile, accum.first);
cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0,
"unterminated raw string");
/* Now pop the buffer that _cpp_get_fresh_line did not. */
/* Now pop the buffer that get_fresh_line_impl() did not. Popping
is not safe if processing a directive, however this cannot
happen as we already checked above that a line would be
available, and get_fresh_line_impl() can't fail in this
case. */
gcc_assert (!pfile->state.in_directive);
_cpp_pop_buffer (pfile);
return;
}
@ -3659,11 +3668,14 @@ _cpp_lex_token (cpp_reader *pfile)
}
/* Returns true if a fresh line has been loaded. */
bool
_cpp_get_fresh_line (cpp_reader *pfile)
template <bool lexing_raw_string>
static bool
get_fresh_line_impl (cpp_reader *pfile)
{
/* We can't get a new line until we leave the current directive. */
if (pfile->state.in_directive)
/* We can't get a new line until we leave the current directive, unless we
are lexing a raw string, in which case it will be OK as long as we don't
pop the current buffer. */
if (!lexing_raw_string && pfile->state.in_directive)
return false;
for (;;)
@ -3679,6 +3691,10 @@ _cpp_get_fresh_line (cpp_reader *pfile)
return true;
}
/* We can't change buffers until we leave the current directive. */
if (lexing_raw_string && pfile->state.in_directive)
return false;
/* First, get out of parsing arguments state. */
if (pfile->state.parsing_args)
return false;
@ -3706,6 +3722,13 @@ _cpp_get_fresh_line (cpp_reader *pfile)
}
}
bool
_cpp_get_fresh_line (cpp_reader *pfile)
{
return get_fresh_line_impl<false> (pfile);
}
#define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE) \
do \
{ \