preproc: when parsing a # marker, use C-style string unquoting

To handle escape codes in filename strings after # markers correctly,
we need nasm_unquote() to be aware that it is using C escapes;
otherwise things like "foo`bar" will break.

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
This commit is contained in:
H. Peter Anvin (Intel) 2020-07-13 14:10:16 -07:00
parent 4c3798b7e6
commit 1d151a8558
3 changed files with 87 additions and 74 deletions

View file

@ -721,30 +721,37 @@ static inline bool tok_isnt(const Token *x, char c)
* Unquote a token if it is a string, and set its type to
* TOK_INTERNAL_STRING.
*/
static const char *unquote_token(Token *t)
/*
* Common version for any kind of quoted string; see asm/quote.c for
* information about the arguments.
*/
static const char *unquote_token_anystr(Token *t, uint32_t badctl, char qstart)
{
size_t nlen, olen;
char *p;
if (t->type != TOK_STRING)
return tok_text(t);
olen = t->len;
p = (olen > INLINE_TEXT) ? t->text.p.ptr : t->text.a;
t->len = nlen = nasm_unquote_anystr(p, NULL, badctl, qstart);
t->type = TOK_INTERNAL_STRING;
if (t->len > INLINE_TEXT) {
char *p = t->text.p.ptr;
if (olen <= INLINE_TEXT || nlen > INLINE_TEXT)
return p;
t->len = nasm_unquote(p, NULL);
nasm_zero(t->text.a);
memcpy(t->text.a, p, nlen);
nasm_free(p);
return t->text.a;
}
if (t->len <= INLINE_TEXT) {
nasm_zero(t->text.a);
memcpy(t->text.a, p, t->len);
nasm_free(p);
return t->text.a;
} else {
return p;
}
} else {
t->len = nasm_unquote(t->text.a, NULL);
return t->text.a;
}
/* Unquote any string, can produce any arbitrary binary output */
static const char *unquote_token(Token *t)
{
return unquote_token_anystr(t, 0, STR_NASM);
}
/*
@ -753,28 +760,7 @@ static const char *unquote_token(Token *t)
*/
static const char *unquote_token_cstr(Token *t)
{
if (t->type != TOK_STRING)
return tok_text(t);
t->type = TOK_INTERNAL_STRING;
if (t->len > INLINE_TEXT) {
char *p = t->text.p.ptr;
t->len = nasm_unquote_cstr(p, NULL);
if (t->len <= INLINE_TEXT) {
nasm_zero(t->text.a);
memcpy(t->text.a, p, t->len);
nasm_free(p);
return t->text.a;
} else {
return p;
}
} else {
t->len = nasm_unquote_cstr(t->text.a, NULL);
return t->text.a;
}
return unquote_token_anystr(t, BADCTL, STR_NASM);
}
/*
@ -3389,14 +3375,19 @@ static int line_directive(Token *origline, Token *tline)
tline = skip_white(tline);
if (tline) {
if (tline->type == TOK_STRING) {
const char *fname;
/*
* If this is a quoted string, ignore anything after
* it; this allows for compatiblity with gcc's
* additional flags options.
*/
src_set_fname(unquote_token(tline));
fname = unquote_token_anystr(tline, BADCTL,
dname[0] == '#' ? STR_C : STR_NASM);
src_set_fname(fname);
} else {
char *fname = detoken(tline, false);
char *fname;
fname = detoken(tline, false);
src_set_fname(fname);
nasm_free(fname);
}

View file

@ -1,6 +1,6 @@
/* ----------------------------------------------------------------------- *
*
* Copyright 1996-2019 The NASM Authors - All Rights Reserved
* Copyright 1996-2020 The NASM Authors - All Rights Reserved
* See the file AUTHORS included with the NASM distribution for
* the specific copyright holders.
*
@ -291,10 +291,17 @@ char *nasm_quote_cstr(const char *str, size_t *lenp)
* corresponding to bits set in badctl; in that case, the output
* string, but not *ep, is truncated before the first invalid
* character.
*
* badctl is a bitmask of control characters (0-31) which are forbidden
* from appearing in the final output.
*
* The qstart character can be either '`' (NASM style) or '\"' (C style),
* to indicate the lead marker of a quoted string. If it is '\"', then
* '`' is not a special character at all.
*/
static size_t nasm_unquote_common(char *str, char **ep,
const uint32_t badctl)
size_t nasm_unquote_anystr(char *str, char **ep, const uint32_t badctl,
const char qstart)
{
unsigned char bq;
const unsigned char *p;
@ -319,15 +326,7 @@ static size_t nasm_unquote_common(char *str, char **ep,
if (!bq)
return 0;
switch (bq) {
case '\'':
case '\"':
/* '...' or "..." string */
while ((c = *p++) && (c != bq))
EMIT(c);
break;
case '`':
if (bq == (unsigned char)qstart) {
/* `...` string */
state = st_start;
@ -335,18 +334,13 @@ static size_t nasm_unquote_common(char *str, char **ep,
c = *p++;
switch (state) {
case st_start:
switch (c) {
case '\\':
if (c == '\\') {
state = st_backslash;
break;
case '`':
case '\0':
} else if ((c == '\0') | (c == bq)) {
state = st_done;
break;
default:
} else {
EMIT(c);
break;
}
}
break;
case st_backslash:
@ -450,14 +444,19 @@ static size_t nasm_unquote_common(char *str, char **ep,
default:
panic();
}
}
break;
default:
}
} else if (bq == '\'' || bq == '\"') {
/*
* '...' or "..." string, NASM legacy style (no escapes of
* * any kind, including collapsing double quote marks.)
* We obviously can't get here if qstart == '\"'.
*/
while ((c = *p++) && (c != bq))
EMIT(c);
} else {
/* Not a quoted string, just return the input... */
while ((c = *p++))
EMIT(c);
break;
}
/* Zero-terminate the output */
@ -472,24 +471,30 @@ static size_t nasm_unquote_common(char *str, char **ep,
}
#undef EMIT
/*
* Unquote any arbitrary string; may produce any bytes, including embedded
* control- and NUL characters.
*/
size_t nasm_unquote(char *str, char **ep)
{
return nasm_unquote_common(str, ep, 0);
return nasm_unquote_anystr(str, ep, 0, STR_NASM);
}
/*
* Unquote a string indended to be used as a C string; most control
* characters are rejected, including whitespace characters that
* would imply line endings and so on.
*/
size_t nasm_unquote_cstr(char *str, char **ep)
{
/*
* These are the only control characters permitted: BEL BS TAB ESC
*/
const uint32_t okctl = (1 << '\a') | (1 << '\b') | (1 << '\t') | (1 << 27);
return nasm_unquote_common(str, ep, ~okctl);
return nasm_unquote_anystr(str, ep, BADCTL, STR_NASM);
}
/*
* Find the end of a quoted string; returns the pointer to the terminating
* character (either the ending quote or the null character, if unterminated.)
* If the input is not a quoted string, return NULL.
* This applies to NASM style strings only.
*/
char *nasm_skip_string(const char *str)
{
@ -537,7 +542,9 @@ char *nasm_skip_string(const char *str)
* Note: for the purpose of finding the end of the string,
* all successor states to st_backslash are functionally
* equivalent to st_start, since either a backslash or
* a backquote will force a return to the st_start state.
* a backquote will force a return to the st_start state,
* and any possible multi-character state will terminate
* for any non-alphanumeric character.
*/
state = c ? st_start : st_done;
break;

View file

@ -38,9 +38,24 @@
char *nasm_quote(const char *str, size_t *len);
char *nasm_quote_cstr(const char *str, size_t *len);
size_t nasm_unquote_anystr(char *str, char **endptr,
uint32_t badctl, char qstart);
size_t nasm_unquote(char *str, char **endptr);
size_t nasm_unquote_cstr(char *str, char **endptr);
char *nasm_skip_string(const char *str);
/* Arguments used with nasm_quote_anystr() */
/*
* These are the only control characters when we produce a C string:
* BEL BS TAB ESC
*/
#define OKCTL ((1U << '\a') | (1U << '\b') | (1U << '\t') | (1U << 27))
#define BADCTL (~(uint32_t)OKCTL)
/* Initial quotation mark */
#define STR_C '\"'
#define STR_NASM '`'
#endif /* NASM_QUOTE_H */