cpplex.c (handle_newline, [...]): Update to do more stepping back.

* cpplex.c (handle_newline, skip_escaped_newlines,
	get_effective_char, skip_block_comment, skip_line_comment,
	parse_identifier_slow, parse_number, parse_string,
	_cpp_lex_direct): Update to do more stepping back.
	(trigraph_ok): Similarly.  Rename trigraph_p.
	(SAVE_STATE, RESTORE_STATE): Remove.
	(BUFF_SIZE_UPPER_BOUND): Tweak.  Add sanity check.

	* cpplib.c (destringize): Rename destringize_and_run, and
	call run_directive directly.
	(_cpp_do__Pragma): Simplify.

From-SVN: r46373
This commit is contained in:
Neil Booth 2001-10-20 09:00:53 +00:00 committed by Neil Booth
parent 3c1ef3c102
commit 870628131c
3 changed files with 149 additions and 174 deletions

View file

@ -1,3 +1,17 @@
2001-10-20 Neil Booth <neil@daikokuya.demon.co.uk>
* cpplex.c (handle_newline, skip_escaped_newlines,
get_effective_char, skip_block_comment, skip_line_comment,
parse_identifier_slow, parse_number, parse_string,
_cpp_lex_direct): Update to do more stepping back.
(trigraph_ok): Similarly. Rename trigraph_p.
(SAVE_STATE, RESTORE_STATE): Remove.
(BUFF_SIZE_UPPER_BOUND): Tweak. Add sanity check.
* cpplib.c (destringize): Rename destringize_and_run, and
call run_directive directly.
(_cpp_do__Pragma): Simplify.
2001-10-19 Kaveh R. Ghazi <ghazi@caip.rutgers.edu>
* pe.c (arm_pe_unique_section): Const-ify.

View file

@ -20,20 +20,6 @@ You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
/* This lexer works with a single pass of the file. Recently I
re-wrote it to minimize the places where we step backwards in the
input stream, to make future changes to support multi-byte
character sets fairly straight-forward.
There is now only one routine where we do step backwards:
skip_escaped_newlines. This routine could probably also be changed
so that it doesn't need to step back. One possibility is to use a
trick similar to that used in lex_period and lex_percent. Two
extra characters might be needed, but skip_escaped_newlines itself
would probably be the only place that needs to be aware of that,
and changes to the remaining routines would probably only be needed
if they process a backslash. */
#include "config.h"
#include "system.h"
#include "cpplib.h"
@ -81,8 +67,8 @@ static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
#define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
#define TOKEN_NAME(token) (token_spellings[(token)->type].name)
static cppchar_t handle_newline PARAMS ((cpp_reader *, cppchar_t));
static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *, cppchar_t));
static void handle_newline PARAMS ((cpp_reader *));
static cppchar_t skip_escaped_newlines PARAMS ((cpp_reader *));
static cppchar_t get_effective_char PARAMS ((cpp_reader *));
static int skip_block_comment PARAMS ((cpp_reader *));
@ -96,7 +82,7 @@ static void parse_number PARAMS ((cpp_reader *, cpp_string *, cppchar_t, int));
static int unescaped_terminator_p PARAMS ((cpp_reader *, const U_CHAR *));
static void parse_string PARAMS ((cpp_reader *, cpp_token *, cppchar_t));
static void unterminated PARAMS ((cpp_reader *, int));
static int trigraph_ok PARAMS ((cpp_reader *, cppchar_t));
static bool trigraph_p PARAMS ((cpp_reader *));
static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
static void lex_percent PARAMS ((cpp_reader *, cpp_token *));
static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
@ -124,58 +110,53 @@ cpp_ideq (token, string)
return !ustrcmp (NODE_NAME (token->val.node), (const U_CHAR *) string);
}
/* Call when meeting a newline. Returns the character after the newline
(or carriage-return newline combination), or EOF. */
static cppchar_t
handle_newline (pfile, newline_char)
/* Call when meeting a newline, assumed to be in buffer->cur[-1].
Returns with buffer->cur pointing to the character immediately
following the newline (combination). */
static void
handle_newline (pfile)
cpp_reader *pfile;
cppchar_t newline_char;
{
cpp_buffer *buffer;
cppchar_t next = EOF;
cpp_buffer *buffer = pfile->buffer;
/* Handle CR-LF and LF-CR. Most other implementations (e.g. java)
only accept CR-LF; maybe we should fall back to that behaviour?
NOTE: the EOF case in _cpp_lex_direct currently requires the
buffer->cur != buffer->rlimit test here for 0-length files. */
if (buffer->cur != buffer->rlimit
&& buffer->cur[-1] + buffer->cur[0] == '\r' + '\n')
buffer->cur++;
pfile->line++;
buffer = pfile->buffer;
buffer->col_adjust = 0;
buffer->line_base = buffer->cur;
/* Handle CR-LF and LF-CR combinations, get the next character. */
if (buffer->cur < buffer->rlimit)
{
next = *buffer->cur++;
if (next + newline_char == '\r' + '\n')
{
buffer->line_base = buffer->cur;
if (buffer->cur < buffer->rlimit)
next = *buffer->cur++;
else
next = EOF;
}
}
buffer->read_ahead = next;
return next;
buffer->col_adjust = 0;
pfile->line++;
}
/* Subroutine of skip_escaped_newlines; called when a trigraph is
encountered. It warns if necessary, and returns true if the
trigraph should be honoured. FROM_CHAR is the third character of a
trigraph, and presumed to be the previous character for position
reporting. */
static int
trigraph_ok (pfile, from_char)
/* Subroutine of skip_escaped_newlines; called when a 3-character
sequence beginning with "??" is encountered. buffer->cur points to
the second '?'.
Warn if necessary, and returns true if the sequence forms a
trigraph and the trigraph should be honoured. */
static bool
trigraph_p (pfile)
cpp_reader *pfile;
cppchar_t from_char;
{
int accept = CPP_OPTION (pfile, trigraphs);
cpp_buffer *buffer = pfile->buffer;
cppchar_t from_char = buffer->cur[1];
bool accept;
if (!_cpp_trigraph_map[from_char])
return false;
accept = CPP_OPTION (pfile, trigraphs);
/* Don't warn about trigraphs in comments. */
if (CPP_OPTION (pfile, warn_trigraphs) && !pfile->state.lexing_comment)
{
cpp_buffer *buffer = pfile->buffer;
if (accept)
cpp_warning_with_line (pfile, pfile->line, CPP_BUF_COL (buffer) - 2,
cpp_warning_with_line (pfile, pfile->line, CPP_BUF_COL (buffer) - 1,
"trigraph ??%c converted to %c",
(int) from_char,
(int) _cpp_trigraph_map[from_char]);
@ -183,7 +164,7 @@ trigraph_ok (pfile, from_char)
{
buffer->last_Wtrigraphs = buffer->cur;
cpp_warning_with_line (pfile, pfile->line,
CPP_BUF_COL (buffer) - 2,
CPP_BUF_COL (buffer) - 1,
"trigraph ??%c ignored", (int) from_char);
}
}
@ -195,96 +176,79 @@ trigraph_ok (pfile, from_char)
#define ACCEPT_CHAR(t) \
do { result->type = t; buffer->read_ahead = EOF; } while (0)
/* When we move to multibyte character sets, add to these something
that saves and restores the state of the multibyte conversion
library. This probably involves saving and restoring a "cookie".
In the case of glibc it is an 8-byte structure, so is not a high
overhead operation. In any case, it's out of the fast path. */
#define SAVE_STATE() do { saved_cur = buffer->cur; } while (0)
#define RESTORE_STATE() do { buffer->cur = saved_cur; } while (0)
/* Skips any escaped newlines introduced by NEXT, which is either a
'?' or a '\\'. Returns the next character, which will also have
been placed in buffer->read_ahead. This routine performs
preprocessing stages 1 and 2 of the ISO C standard. */
/* Skips any escaped newlines introduced by '?' or a '\\', assumed to
lie in buffer->cur[-1]. Returns the next character, which will
then be in buffer->cur[-1]. This routine performs preprocessing
stages 1 and 2 of the ISO C standard. */
static cppchar_t
skip_escaped_newlines (pfile, next)
skip_escaped_newlines (pfile)
cpp_reader *pfile;
cppchar_t next;
{
cpp_buffer *buffer = pfile->buffer;
cppchar_t next = buffer->cur[-1];
/* Only do this if we apply stages 1 and 2. */
if (!buffer->from_stage3)
{
cppchar_t next1;
const unsigned char *saved_cur;
int space;
cppchar_t next1;
do
{
if (buffer->cur == buffer->rlimit)
break;
SAVE_STATE ();
if (next == '?')
{
next1 = *buffer->cur++;
if (next1 != '?' || buffer->cur == buffer->rlimit)
{
RESTORE_STATE ();
break;
}
if (buffer->cur[0] != '?' || buffer->cur + 1 == buffer->rlimit)
break;
next1 = *buffer->cur++;
if (!_cpp_trigraph_map[next1]
|| !trigraph_ok (pfile, next1))
{
RESTORE_STATE ();
break;
}
if (!trigraph_p (pfile))
break;
/* We have a full trigraph here. */
next = _cpp_trigraph_map[next1];
/* Translate the trigraph. */
next = _cpp_trigraph_map[buffer->cur[1]];
buffer->cur += 2;
if (next != '\\' || buffer->cur == buffer->rlimit)
break;
SAVE_STATE ();
}
/* We have a backslash, and room for at least one more character. */
space = 0;
/* We have a backslash, and room for at least one more
character. Skip horizontal whitespace. */
saved_cur = buffer->cur;
do
{
next1 = *buffer->cur++;
if (!is_nvspace (next1))
break;
space = 1;
}
while (buffer->cur < buffer->rlimit);
next1 = *buffer->cur++;
while (is_nvspace (next1) && buffer->cur < buffer->rlimit);
if (!is_vspace (next1))
{
RESTORE_STATE ();
buffer->cur = saved_cur;
break;
}
if (space && !pfile->state.lexing_comment)
if (saved_cur != buffer->cur - 1
&& !pfile->state.lexing_comment)
cpp_warning (pfile, "backslash and newline separated by space");
next = handle_newline (pfile, next1);
if (next == EOF)
cpp_pedwarn (pfile, "backslash-newline at end of file");
handle_newline (pfile);
if (buffer->cur == buffer->rlimit)
{
cpp_pedwarn (pfile, "backslash-newline at end of file");
next = EOF;
}
else
next = *buffer->cur++;
}
while (next == '\\' || next == '?');
}
buffer->read_ahead = next;
return next;
}
/* Obtain the next character, after trigraph conversion and skipping
an arbitrary string of escaped newlines. The common case of no
trigraphs or escaped newlines falls through quickly. */
an arbitrarily long string of escaped newlines. The common case of
no trigraphs or escaped newlines falls through quickly. On return,
buffer->cur points after the returned character. */
static cppchar_t
get_effective_char (pfile)
cpp_reader *pfile;
@ -301,7 +265,7 @@ get_effective_char (pfile)
UCNs, which, depending upon lexer state, we will handle in
the future. */
if (next == '?' || next == '\\')
next = skip_escaped_newlines (pfile, next);
next = skip_escaped_newlines (pfile);
}
buffer->read_ahead = next;
@ -323,11 +287,10 @@ skip_block_comment (pfile)
{
prevc = c, c = *buffer->cur++;
next_char:
/* FIXME: For speed, create a new character class of characters
of interest inside block comments. */
if (c == '?' || c == '\\')
c = skip_escaped_newlines (pfile, c);
c = skip_escaped_newlines (pfile);
/* People like decorating comments with '*', so check for '/'
instead for efficiency. */
@ -340,25 +303,14 @@ skip_block_comment (pfile)
comes immediately before the true comment delimeter.
Don't bother to get it right across escaped newlines. */
if (CPP_OPTION (pfile, warn_comments)
&& buffer->cur != buffer->rlimit)
{
prevc = c, c = *buffer->cur++;
if (c == '*' && buffer->cur != buffer->rlimit)
{
prevc = c, c = *buffer->cur++;
if (c != '/')
cpp_warning_with_line (pfile, pfile->line,
CPP_BUF_COL (buffer) - 2,
"\"/*\" within comment");
}
goto next_char;
}
&& buffer->cur + 1 < buffer->rlimit
&& buffer->cur[0] == '*' && buffer->cur[1] != '/')
cpp_warning_with_line (pfile,
pfile->line, CPP_BUF_COL (buffer),
"\"/*\" within comment");
}
else if (is_vspace (c))
{
prevc = c, c = handle_newline (pfile, c);
goto next_char;
}
handle_newline (pfile);
else if (c == '\t')
adjust_column (pfile);
}
@ -388,7 +340,7 @@ skip_line_comment (pfile)
c = *buffer->cur++;
if (c == '?' || c == '\\')
c = skip_escaped_newlines (pfile, c);
c = skip_escaped_newlines (pfile);
}
while (!is_vspace (c));
@ -568,7 +520,7 @@ parse_identifier_slow (pfile, cur)
/* Potential escaped newline? */
if (c != '?' && c != '\\')
break;
c = skip_escaped_newlines (pfile, c);
c = skip_escaped_newlines (pfile);
}
while (is_idchar (c));
@ -640,7 +592,7 @@ parse_number (pfile, number, c, leading_period)
/* Potential escaped newline? */
if (c != '?' && c != '\\')
break;
c = skip_escaped_newlines (pfile, c);
c = skip_escaped_newlines (pfile);
}
while (is_numchar (c) || c == '.' || VALID_SIGN (c, dest[-1]));
@ -697,8 +649,10 @@ unescaped_terminator_p (pfile, dest)
name. Handles embedded trigraphs and escaped newlines. The stored
string is guaranteed NUL-terminated, but it is not guaranteed that
this is the first NUL since embedded NULs are preserved.
Multi-line strings are allowed, but they are deprecated.
Multi-line strings are allowed, but they are deprecated. */
When this function returns, buffer->cur points to the next
character to be processed. */
static void
parse_string (pfile, token, terminator)
cpp_reader *pfile;
@ -715,13 +669,7 @@ parse_string (pfile, token, terminator)
for (;;)
{
if (buffer->cur == buffer->rlimit)
c = EOF;
else
c = *buffer->cur++;
have_char:
/* We need space for the terminating NUL. */
/* We need room for another char, possibly the terminating NUL. */
if ((size_t) (limit - dest) < 1)
{
size_t len_so_far = dest - BUFF_FRONT (pfile->u_buff);
@ -730,20 +678,21 @@ parse_string (pfile, token, terminator)
limit = BUFF_LIMIT (pfile->u_buff);
}
if (c == EOF)
if (buffer->cur == buffer->rlimit)
{
unterminated (pfile, terminator);
break;
}
/* Handle trigraphs, escaped newlines etc. */
c = *buffer->cur++;
if (c == '?' || c == '\\')
c = skip_escaped_newlines (pfile, c);
c = skip_escaped_newlines (pfile);
if (c == terminator && unescaped_terminator_p (pfile, dest))
if (c == terminator)
{
c = EOF;
break;
if (unescaped_terminator_p (pfile, dest))
break;
}
else if (is_vspace (c))
{
@ -751,7 +700,10 @@ parse_string (pfile, token, terminator)
character literals at end of line. This is a kludge
around not knowing where comments are. */
if (CPP_OPTION (pfile, lang) == CLK_ASM && terminator != '>')
break;
{
buffer->cur--;
break;
}
/* Character constants and header names may not extend over
multiple lines. In Standard C, neither may strings.
@ -760,6 +712,7 @@ parse_string (pfile, token, terminator)
if (terminator != '"' || pfile->state.angled_headers)
{
unterminated (pfile, terminator);
buffer->cur--;
break;
}
@ -775,9 +728,8 @@ parse_string (pfile, token, terminator)
pfile->mls_col = token->col;
}
c = handle_newline (pfile, c);
*dest++ = '\n';
goto have_char;
handle_newline (pfile);
c = '\n';
}
else if (c == '\0' && !warned_nulls)
{
@ -788,8 +740,7 @@ parse_string (pfile, token, terminator)
*dest++ = c;
}
/* Remember the next character. */
buffer->read_ahead = c;
buffer->read_ahead = EOF;
*dest = '\0';
token->val.str.text = BUFF_FRONT (pfile->u_buff);
@ -1066,7 +1017,7 @@ _cpp_lex_direct (pfile)
for command line and _Pragma buffers. */
if (!buffer->from_stage3)
cpp_pedwarn (pfile, "no newline at end of file");
handle_newline (pfile, '\n');
handle_newline (pfile);
}
/* Don't pop the last buffer. */
@ -1088,7 +1039,7 @@ _cpp_lex_direct (pfile)
goto skipped_white;
case '\n': case '\r':
handle_newline (pfile, c);
handle_newline (pfile);
buffer->saved_flags = BOL;
if (! pfile->state.in_directive)
{
@ -1112,12 +1063,14 @@ _cpp_lex_direct (pfile)
{
unsigned int line = pfile->line;
c = skip_escaped_newlines (pfile, c);
c = skip_escaped_newlines (pfile);
if (line != pfile->line)
/* We had at least one escaped newline of some sort, and the
next character is in buffer->read_ahead. Update the
token's line and column. */
{
buffer->read_ahead = c;
/* We had at least one escaped newline of some sort.
Update the token's line and column. */
goto update_tokens_line;
}
/* We are either the original '?' or '\\', or a trigraph. */
result->type = CPP_QUERY;
@ -2045,10 +1998,14 @@ cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
expansion. Also check the change in peak memory usage (NJAMD is a
good tool for this). */
#define MIN_BUFF_SIZE 8000
#define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (8000 + (MIN_SIZE) * 3 / 2)
#define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
#define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
(MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
#if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
#error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
#endif
struct dummy
{
char c;
@ -2199,7 +2156,16 @@ _cpp_unaligned_alloc (pfile, len)
return result;
}
/* Allocate permanent, unaligned storage of length LEN. */
/* Allocate permanent, unaligned storage of length LEN from a_buff.
That buffer is used for growing allocations when saving macro
replacement lists in a #define, and when parsing an answer to an
assertion in #assert, #unassert or #if (and therefore possibly
whilst expanding macros). It therefore must not be used by any
code that they might call: specifically the lexer and the guts of
the macro expander.
All existing other uses clearly fit this restriction: storing
registered pragmas during initialization. */
unsigned char *
_cpp_aligned_alloc (pfile, len)
cpp_reader *pfile;

View file

@ -120,8 +120,7 @@ static void do_pragma_system_header PARAMS ((cpp_reader *));
static void do_pragma_dependency PARAMS ((cpp_reader *));
static const cpp_token *get_token_no_padding PARAMS ((cpp_reader *));
static const cpp_token *get__Pragma_string PARAMS ((cpp_reader *));
static unsigned char *destringize PARAMS ((const cpp_string *,
unsigned int *));
static void destringize_and_run PARAMS ((cpp_reader *, const cpp_string *));
static int parse_answer PARAMS ((cpp_reader *, struct answer **, int));
static cpp_hashnode *parse_assertion PARAMS ((cpp_reader *, struct answer **,
int));
@ -1149,17 +1148,17 @@ get__Pragma_string (pfile)
return string;
}
/* Returns a malloced buffer containing a destringized cpp_string by
removing the first \ of \" and \\ sequences. */
static unsigned char *
destringize (in, len)
/* Destringize IN into a temporary buffer, by removing the first \ of
\" and \\ sequences, and process the result as a #pragma directive. */
static void
destringize_and_run (pfile, in)
cpp_reader *pfile;
const cpp_string *in;
unsigned int *len;
{
const unsigned char *src, *limit;
unsigned char *dest, *result;
char *dest, *result;
dest = result = (unsigned char *) xmalloc (in->len);
dest = result = alloca (in->len);
for (src = in->text, limit = src + in->len; src < limit;)
{
/* We know there is a character following the backslash. */
@ -1168,17 +1167,15 @@ destringize (in, len)
*dest++ = *src++;
}
*len = dest - result;
return result;
run_directive (pfile, T_PRAGMA, result, dest - result);
}
/* Handle the _Pragma operator. */
void
_cpp_do__Pragma (pfile)
cpp_reader *pfile;
{
const cpp_token *string = get__Pragma_string (pfile);
unsigned char *buffer;
unsigned int len;
if (!string)
cpp_error (pfile, "_Pragma takes a parenthesized string literal");
@ -1195,9 +1192,7 @@ _cpp_do__Pragma (pfile)
Getting these correct line markers is a little tricky. */
unsigned int orig_line = pfile->line;
buffer = destringize (&string->val.str, &len);
run_directive (pfile, T_PRAGMA, (char *) buffer, len);
free ((PTR) buffer);
destringize_and_run (pfile, &string->val.str);
pfile->line = orig_line;
pfile->buffer->saved_flags = BOL;
}