cpphash.h (struct lexer_state): Remove line_extension member.

* cpphash.h (struct lexer_state): Remove line_extension member.
	* cpplib.c (dequote_string, do_linemarker): New functions.
	(linemarker_dir): New data object.
	(DIRECTIVE_TABLE): No longer need to interpret #line in
	preprocessed source.  Delete obsolete comment about return
	values of handlers.
	(end_directive, directive_diagnostics, _cpp_handle_directive):
	Don't muck with line_extension.
	(directive_diagnostics): No need to issue warnings for
	linemarkers here.
	(_cpp_handle_directive): Issue warnings for linemarkers here,
	when appropriate.  Dispatch linemarkers to do_linemarker, not
	do_line.
	(do_line): Code to handle linemarkers split out to do_linemarker.
	Convert escape sequences in filename argument, both places.

	* cppmacro.c (quote_string): Rename cpp_quote_string and
	export.  All callers changed.
	* cpplib.h (cpp_quote_string): Prototype.
	* cppmain.c (print_line): Call cpp_quote_string on to_file
	before printing it.

	* doc/cpp.texi: Document that escapes are now interpreted in
	#line and in linemarkers, and that non-printing characters are
	converted to octal escapes when linemarkers are generated.

From-SVN: r50779
This commit is contained in:
Zack Weinberg 2002-03-14 18:17:18 +00:00
parent 83a4940733
commit dcc229e5a1
6 changed files with 193 additions and 97 deletions

View file

@ -152,9 +152,6 @@ struct lexer_state
/* Nonzero when parsing arguments to a function-like macro. */
unsigned char parsing_args;
/* Nonzero when in a # NUMBER directive. */
unsigned char line_extension;
};
/* Special nodes - identifiers with predefined significance. */

View file

@ -103,6 +103,8 @@ static const cpp_token *parse_include PARAMS ((cpp_reader *));
static void push_conditional PARAMS ((cpp_reader *, int, int,
const cpp_hashnode *));
static unsigned int read_flag PARAMS ((cpp_reader *, unsigned int));
static U_CHAR *dequote_string PARAMS ((cpp_reader *, const U_CHAR *,
unsigned int));
static int strtoul_for_line PARAMS ((const U_CHAR *, unsigned int,
unsigned long *));
static void do_diagnostic PARAMS ((cpp_reader *, enum error_type, int));
@ -117,6 +119,7 @@ static void do_pragma_once PARAMS ((cpp_reader *));
static void do_pragma_poison PARAMS ((cpp_reader *));
static void do_pragma_system_header PARAMS ((cpp_reader *));
static void do_pragma_dependency PARAMS ((cpp_reader *));
static void do_linemarker PARAMS ((cpp_reader *));
static const cpp_token *get_token_no_padding PARAMS ((cpp_reader *));
static const cpp_token *get__Pragma_string PARAMS ((cpp_reader *));
static void destringize_and_run PARAMS ((cpp_reader *, const cpp_string *));
@ -145,7 +148,7 @@ D(if, T_IF, KANDR, COND | IF_COND) /* 18162 */ \
D(else, T_ELSE, KANDR, COND) /* 9863 */ \
D(ifndef, T_IFNDEF, KANDR, COND | IF_COND) /* 9675 */ \
D(undef, T_UNDEF, KANDR, IN_I) /* 4837 */ \
D(line, T_LINE, KANDR, IN_I) /* 2465 */ \
D(line, T_LINE, KANDR, 0) /* 2465 */ \
D(elif, T_ELIF, STDC89, COND) /* 610 */ \
D(error, T_ERROR, STDC89, 0) /* 475 */ \
D(pragma, T_PRAGMA, STDC89, IN_I) /* 195 */ \
@ -167,10 +170,6 @@ SCCS_ENTRY /* 0 SVR4? */
/* Use the table to generate a series of prototypes, an enum for the
directive names, and an array of directive handlers. */
/* The directive-processing functions are declared to return int
instead of void, because some old compilers have trouble with
pointers to functions returning void. */
/* Don't invoke CONCAT2 with any whitespace or K&R cc will fail. */
#define D(name, t, o, f) static void CONCAT2(do_,name) PARAMS ((cpp_reader *));
DIRECTIVE_TABLE
@ -195,6 +194,14 @@ DIRECTIVE_TABLE
#undef D
#undef DIRECTIVE_TABLE
/* Wrapper struct directive for linemarkers.
The origin is more or less true - the original K+R cpp
did use this notation in its preprocessed output. */
static const directive linemarker_dir =
{
do_linemarker, U"#", 1, KANDR, IN_I
};
#define SEEN_EOL() (pfile->cur_token[-1].type == CPP_EOF)
/* Skip any remaining tokens in a directive. */
@ -256,7 +263,6 @@ end_directive (pfile, skip_line)
pfile->state.save_comments = ! CPP_OPTION (pfile, discard_comments);
pfile->state.in_directive = 0;
pfile->state.angled_headers = 0;
pfile->state.line_extension = 0;
pfile->directive = 0;
}
@ -268,39 +274,30 @@ directive_diagnostics (pfile, dir, indented)
const directive *dir;
int indented;
{
if (pfile->state.line_extension)
{
if (CPP_PEDANTIC (pfile)
&& ! pfile->state.skipping)
cpp_pedwarn (pfile, "style of line directive is a GCC extension");
}
else
{
/* Issue -pedantic warnings for extensions. */
if (CPP_PEDANTIC (pfile)
&& ! pfile->state.skipping
&& dir->origin == EXTENSION)
cpp_pedwarn (pfile, "#%s is a GCC extension", dir->name);
/* Issue -pedantic warnings for extensions. */
if (CPP_PEDANTIC (pfile)
&& ! pfile->state.skipping
&& dir->origin == EXTENSION)
cpp_pedwarn (pfile, "#%s is a GCC extension", dir->name);
/* Traditionally, a directive is ignored unless its # is in
column 1. Therefore in code intended to work with K+R
compilers, directives added by C89 must have their #
indented, and directives present in traditional C must not.
This is true even of directives in skipped conditional
blocks. #elif cannot be used at all. */
if (CPP_WTRADITIONAL (pfile))
{
if (dir == &dtable[T_ELIF])
cpp_warning (pfile, "suggest not using #elif in traditional C");
else if (indented && dir->origin == KANDR)
cpp_warning (pfile,
"traditional C ignores #%s with the # indented",
dir->name);
else if (!indented && dir->origin != KANDR)
cpp_warning (pfile,
"suggest hiding #%s from traditional C with an indented #",
dir->name);
}
/* Traditionally, a directive is ignored unless its # is in
column 1. Therefore in code intended to work with K+R
compilers, directives added by C89 must have their #
indented, and directives present in traditional C must not.
This is true even of directives in skipped conditional
blocks. #elif cannot be used at all. */
if (CPP_WTRADITIONAL (pfile))
{
if (dir == &dtable[T_ELIF])
cpp_warning (pfile, "suggest not using #elif in traditional C");
else if (indented && dir->origin == KANDR)
cpp_warning (pfile,
"traditional C ignores #%s with the # indented",
dir->name);
else if (!indented && dir->origin != KANDR)
cpp_warning (pfile,
"suggest hiding #%s from traditional C with an indented #",
dir->name);
}
}
@ -339,8 +336,10 @@ _cpp_handle_directive (pfile, indented)
assembler code. */
else if (dname->type == CPP_NUMBER && CPP_OPTION (pfile, lang) != CLK_ASM)
{
dir = &dtable[T_LINE];
pfile->state.line_extension = 1;
dir = &linemarker_dir;
if (CPP_PEDANTIC (pfile) && ! CPP_OPTION (pfile, preprocessed)
&& ! pfile->state.skipping)
cpp_pedwarn (pfile, "style of line directive is a GCC extension");
}
if (dir)
@ -669,9 +668,10 @@ do_include_next (pfile)
do_include_common (pfile, IT_INCLUDE_NEXT);
}
/* Subroutine of do_line. Read possible flags after file name. LAST
is the last flag seen; 0 if this is the first flag. Return the flag
if it is valid, 0 at the end of the directive. Otherwise complain. */
/* Subroutine of do_linemarker. Read possible flags after file name.
LAST is the last flag seen; 0 if this is the first flag. Return the
flag if it is valid, 0 at the end of the directive. Otherwise
complain. */
static unsigned int
read_flag (pfile, last)
cpp_reader *pfile;
@ -695,9 +695,43 @@ read_flag (pfile, last)
return 0;
}
/* Another subroutine of do_line. Convert a number in STR, of length
LEN, to binary; store it in NUMP, and return 0 if the number was
well-formed, 1 if not. Temporary, hopefully. */
/* Subroutine of do_line and do_linemarker. Returns a version of STR
which has a NUL terminator and all escape sequences converted to
their equivalents. Temporary, hopefully. */
static U_CHAR *
dequote_string (pfile, str, len)
cpp_reader *pfile;
const U_CHAR *str;
unsigned int len;
{
U_CHAR *result = _cpp_unaligned_alloc (pfile, len + 1);
U_CHAR *dst = result;
const U_CHAR *limit = str + len;
unsigned int c;
unsigned HOST_WIDE_INT mask;
/* We need the mask to match the host's 'unsigned char', not the
target's. */
if (CHAR_BIT < HOST_BITS_PER_WIDE_INT)
mask = ((unsigned HOST_WIDE_INT) 1 << CHAR_BIT) - 1;
else
mask = ~(unsigned HOST_WIDE_INT)0;
while (str < limit)
{
c = *str++;
if (c != '\\')
*dst++ = c;
else
*dst++ = cpp_parse_escape (pfile, (const U_CHAR **)&str, limit, mask);
}
*dst++ = '\0';
return result;
}
/* Subroutine of do_line and do_linemarker. Convert a number in STR,
of length LEN, to binary; store it in NUMP, and return 0 if the
number was well-formed, 1 if not. Temporary, hopefully. */
static int
strtoul_for_line (str, len, nump)
const U_CHAR *str;
@ -719,8 +753,8 @@ strtoul_for_line (str, len, nump)
}
/* Interpret #line command.
Note that the filename string (if any) is treated as if it were an
include filename. That means no escape handling. */
Note that the filename string (if any) is a true string constant
(escapes are interpreted), unlike in #line. */
static void
do_line (pfile)
cpp_reader *pfile;
@ -728,16 +762,9 @@ do_line (pfile)
const cpp_token *token;
const char *new_file = pfile->map->to_file;
unsigned long new_lineno;
unsigned int cap, new_sysp = pfile->map->sysp;
enum lc_reason reason = LC_RENAME;
/* C99 raised the minimum limit on #line numbers. */
cap = CPP_OPTION (pfile, c99) ? 2147483647 : 32767;
/* Putting this in _cpp_handle_directive risks two calls to
_cpp_backup_tokens in some circumstances, which can segfault. */
if (pfile->state.line_extension)
_cpp_backup_tokens (pfile, 1);
unsigned int cap = CPP_OPTION (pfile, c99) ? 2147483647 : 32767;
/* #line commands expand macros. */
token = cpp_get_token (pfile);
@ -750,42 +777,85 @@ do_line (pfile)
return;
}
if (CPP_PEDANTIC (pfile) && ! pfile->state.line_extension
&& (new_lineno == 0 || new_lineno > cap))
if (CPP_PEDANTIC (pfile) && (new_lineno == 0 || new_lineno > cap))
cpp_pedwarn (pfile, "line number out of range");
token = cpp_get_token (pfile);
if (token->type == CPP_STRING)
{
new_file = (const char *) token->val.str.text;
new_file = (const char *) dequote_string (pfile, token->val.str.text,
token->val.str.len);
check_eol (pfile);
}
else if (token->type != CPP_EOF)
{
cpp_error (pfile, "\"%s\" is not a valid filename",
cpp_token_as_text (pfile, token));
return;
}
/* Only accept flags for the # 55 form. */
if (pfile->state.line_extension)
skip_rest_of_line (pfile);
_cpp_do_file_change (pfile, LC_RENAME, new_file, new_lineno,
pfile->map->sysp);
}
/* Interpret the # 44 "file" [flags] notation, which has slightly
different syntax and semantics from #line: Flags are allowed,
and we never complain about the line number being too big. */
static void
do_linemarker (pfile)
cpp_reader *pfile;
{
const cpp_token *token;
const char *new_file = pfile->map->to_file;
unsigned long new_lineno;
unsigned int new_sysp = pfile->map->sysp;
enum lc_reason reason = LC_RENAME;
int flag;
/* Back up so we can get the number again. Putting this in
_cpp_handle_directive risks two calls to _cpp_backup_tokens in
some circumstances, which can segfault. */
_cpp_backup_tokens (pfile, 1);
/* #line commands expand macros. */
token = cpp_get_token (pfile);
if (token->type != CPP_NUMBER
|| strtoul_for_line (token->val.str.text, token->val.str.len,
&new_lineno))
{
cpp_error (pfile, "\"%s\" after # is not a positive integer",
cpp_token_as_text (pfile, token));
return;
}
token = cpp_get_token (pfile);
if (token->type == CPP_STRING)
{
new_file = (const char *) dequote_string (pfile, token->val.str.text,
token->val.str.len);
new_sysp = 0;
flag = read_flag (pfile, 0);
if (flag == 1)
{
int flag;
new_sysp = 0;
flag = read_flag (pfile, 0);
if (flag == 1)
{
reason = LC_ENTER;
/* Fake an include for cpp_included (). */
_cpp_fake_include (pfile, new_file);
flag = read_flag (pfile, flag);
}
else if (flag == 2)
{
reason = LC_LEAVE;
flag = read_flag (pfile, flag);
}
if (flag == 3)
{
new_sysp = 1;
flag = read_flag (pfile, flag);
if (flag == 4)
new_sysp = 2;
}
reason = LC_ENTER;
/* Fake an include for cpp_included (). */
_cpp_fake_include (pfile, new_file);
flag = read_flag (pfile, flag);
}
else if (flag == 2)
{
reason = LC_LEAVE;
flag = read_flag (pfile, flag);
}
if (flag == 3)
{
new_sysp = 1;
flag = read_flag (pfile, flag);
if (flag == 4)
new_sysp = 2;
}
check_eol (pfile);
}
else if (token->type != CPP_EOF)

View file

@ -592,6 +592,9 @@ extern void cpp_forall_identifiers PARAMS ((cpp_reader *,
/* In cppmacro.c */
extern void cpp_scan_nooutput PARAMS ((cpp_reader *));
extern int cpp_sys_macro_p PARAMS ((cpp_reader *));
extern unsigned char *cpp_quote_string PARAMS ((unsigned char *,
const unsigned char *,
unsigned int));
/* In cppfiles.c */
extern int cpp_included PARAMS ((cpp_reader *, const char *));

View file

@ -64,9 +64,6 @@ static cpp_context *next_context PARAMS ((cpp_reader *));
static const cpp_token *padding_token
PARAMS ((cpp_reader *, const cpp_token *));
static void expand_arg PARAMS ((cpp_reader *, macro_arg *));
static unsigned char *quote_string PARAMS ((unsigned char *,
const unsigned char *,
unsigned int));
static const cpp_token *new_string_token PARAMS ((cpp_reader *, U_CHAR *,
unsigned int));
static const cpp_token *new_number_token PARAMS ((cpp_reader *, unsigned int));
@ -164,7 +161,7 @@ builtin_macro (pfile, node)
name = map->to_file;
len = strlen (name);
buf = _cpp_unaligned_alloc (pfile, len * 4 + 1);
len = quote_string (buf, (const unsigned char *) name, len) - buf;
len = cpp_quote_string (buf, (const unsigned char *) name, len) - buf;
result = new_string_token (pfile, buf, len);
}
@ -244,9 +241,10 @@ builtin_macro (pfile, node)
/* Copies SRC, of length LEN, to DEST, adding backslashes before all
backslashes and double quotes. Non-printable characters are
converted to octal. DEST must be of sufficient size. */
static U_CHAR *
quote_string (dest, src, len)
converted to octal. DEST must be of sufficient size. Returns
a pointer to the end of the string. */
U_CHAR *
cpp_quote_string (dest, src, len)
U_CHAR *dest;
const U_CHAR *src;
unsigned int len;
@ -331,7 +329,7 @@ stringify_arg (pfile, arg)
_cpp_buff *buff = _cpp_get_buff (pfile, len);
unsigned char *buf = BUFF_FRONT (buff);
len = cpp_spell_token (pfile, token, buf) - buf;
dest = quote_string (dest, buf, len);
dest = cpp_quote_string (dest, buf, len);
_cpp_release_buff (pfile, buff);
}
else

View file

@ -321,8 +321,17 @@ print_line (map, line, special_flags)
print.line = line;
if (! options->no_line_commands)
{
size_t to_file_len = strlen (map->to_file);
unsigned char *to_file_quoted = alloca (to_file_len * 4 + 1);
unsigned char *p;
/* cpp_quote_string does not nul-terminate, so we have to do it
ourselves. */
p = cpp_quote_string (to_file_quoted,
(unsigned char *)map->to_file, to_file_len);
*p = '\0';
fprintf (print.outf, "# %u \"%s\"%s",
SOURCE_LINE (map, print.line), map->to_file, special_flags);
SOURCE_LINE (map, print.line), to_file_quoted, special_flags);
if (map->sysp == 2)
fputs (" 3 4", print.outf);

View file

@ -3087,6 +3087,13 @@ input. Subsequent lines are counted from @var{linenum}.
effect. In addition, @var{filename} is a string constant. The
following line and all subsequent lines are reported to come from the
file it specifies, until something else happens to change that.
@var{filename} is interpreted according to the normal rules for a string
constant: backslash escapes are interpreted. This is different from
@samp{#include}.
Previous versions of GNU CPP did not interpret escapes in @samp{#line};
we have changed it because the standard requires they be interpreted,
and most other compilers do.
@item #line @var{anything else}
@var{anything else} is checked for macro calls, which are expanded.
@ -3304,7 +3311,8 @@ of the form
These are called @dfn{linemarkers}. They are inserted as needed into
the output (but never within a string or character constant). They mean
that the following line originated in file @var{filename} at line
@var{linenum}.
@var{linenum}. @var{filename} will never contain any non-printing
characters; they are replaced with octal escape sequences.
After the file name comes zero or more flags, which are @samp{1},
@samp{2}, @samp{3}, or @samp{4}. If there are multiple flags, spaces
@ -3868,6 +3876,17 @@ The @samp{#line} directive used to change GCC's notion of the
a double-quoted header file name. In 3.0 and later, it does not.
@xref{Line Control}, for further explanation.
@item Syntax of @samp{#line}
In GCC 2.95 and previous, the string constant argument to @samp{#line}
was treated the same way as the argument to @samp{#include}: backslash
escapes were not honored, and the string ended at the second @samp{"}.
This is not compliant with the C standard. In GCC 3.0, an attempt was
made to correct the behavior, so that the string was treated as a real
string constant, but it turned out to be buggy. In 3.1, the bugs have
been fixed. (We are not fixing the bugs in 3.0 because they affect
relatively few people and the fix is quite invasive.)
@end itemize
@node Invocation