Implement __utf16__() and __utf32__() for the DB family
Implement __utf16__() and __utf32__() for the DB family of pseudo-instructions. Not yet implemented for evaluation context.
This commit is contained in:
parent
dfaa278cd5
commit
518df30308
10 changed files with 327 additions and 112 deletions
|
@ -67,8 +67,8 @@ NASM = nasm.$(O) nasmlib.$(O) raa.$(O) saa.$(O) \
|
|||
output/outobj.$(O) output/outas86.$(O) output/outrdf2.$(O) \
|
||||
output/outdbg.$(O) output/outieee.$(O) output/outmacho.$(O) \
|
||||
preproc.$(O) quote.$(O) pptok.$(O) macros.$(O) \
|
||||
listing.$(O) eval.$(O) exprlib.$(O) stdscan.$(O) tokhash.$(O) \
|
||||
regvals.$(O) regflags.$(O)
|
||||
listing.$(O) eval.$(O) exprlib.$(O) stdscan.$(O) strfunc.$(O) \
|
||||
tokhash.$(O) regvals.$(O) regflags.$(O)
|
||||
|
||||
NDISASM = ndisasm.$(O) disasm.$(O) sync.$(O) nasmlib.$(O) \
|
||||
insnsd.$(O) insnsb.$(O) insnsn.$(O) regs.$(O) regdis.$(O)
|
||||
|
@ -234,7 +234,7 @@ alldeps: perlreq
|
|||
#-- Everything below is generated by mkdep.pl - do not edit --#
|
||||
assemble.$(O): assemble.c assemble.h compiler.h config.h insns.h insnsi.h \
|
||||
nasm.h nasmlib.h regs.h tables.h tokens.h version.h
|
||||
crc64.$(O): crc64.c compiler.h config.h
|
||||
crc64.$(O): crc64.c compiler.h config.h nasmlib.h
|
||||
disasm.$(O): disasm.c compiler.h config.h disasm.h insns.h insnsi.h nasm.h \
|
||||
nasmlib.h regdis.h regs.h sync.h tables.h tokens.h version.h
|
||||
eval.$(O): eval.c compiler.h config.h eval.h float.h insnsi.h labels.h \
|
||||
|
@ -309,6 +309,8 @@ regvals.$(O): regvals.c compiler.h config.h insnsi.h tables.h
|
|||
saa.$(O): saa.c compiler.h config.h nasmlib.h saa.h
|
||||
stdscan.$(O): stdscan.c compiler.h config.h insns.h insnsi.h nasm.h \
|
||||
nasmlib.h quote.h regs.h stdscan.h tokens.h version.h
|
||||
strfunc.$(O): strfunc.c compiler.h config.h insnsi.h nasm.h nasmlib.h regs.h \
|
||||
version.h
|
||||
sync.$(O): sync.c compiler.h config.h nasmlib.h sync.h
|
||||
tokhash.$(O): tokhash.c compiler.h config.h hashtbl.h insns.h insnsi.h \
|
||||
nasm.h nasmlib.h regs.h tokens.h version.h
|
||||
|
|
|
@ -180,7 +180,7 @@ everything: all doc rdf
|
|||
#-- Everything below is generated by mkdep.pl - do not edit --#
|
||||
assemble.$(O): assemble.c assemble.h compiler.h insns.h insnsi.h nasm.h \
|
||||
nasmlib.h regs.h tables.h tokens.h version.h
|
||||
crc64.$(O): crc64.c compiler.h
|
||||
crc64.$(O): crc64.c compiler.h nasmlib.h
|
||||
disasm.$(O): disasm.c compiler.h disasm.h insns.h insnsi.h nasm.h nasmlib.h \
|
||||
regdis.h regs.h sync.h tables.h tokens.h version.h
|
||||
eval.$(O): eval.c compiler.h eval.h float.h insnsi.h labels.h nasm.h \
|
||||
|
@ -253,6 +253,8 @@ regvals.$(O): regvals.c compiler.h insnsi.h tables.h
|
|||
saa.$(O): saa.c compiler.h nasmlib.h saa.h
|
||||
stdscan.$(O): stdscan.c compiler.h insns.h insnsi.h nasm.h nasmlib.h quote.h \
|
||||
regs.h stdscan.h tokens.h version.h
|
||||
strfunc.$(O): strfunc.c compiler.h insnsi.h nasm.h nasmlib.h regs.h \
|
||||
version.h
|
||||
sync.$(O): sync.c compiler.h nasmlib.h sync.h
|
||||
tokhash.$(O): tokhash.c compiler.h hashtbl.h insns.h insnsi.h nasm.h \
|
||||
nasmlib.h regs.h tokens.h version.h
|
||||
|
|
|
@ -120,7 +120,7 @@ $(OBJDIR)/version.inc: $(PROOT)/version $(PROOT)/version.pl $(OBJDIR)
|
|||
#-- Everything below is generated by mkdep.pl - do not edit --#
|
||||
assemble.o: assemble.c assemble.h compiler.h config.h insns.h insnsi.h \
|
||||
nasm.h nasmlib.h regs.h tables.h tokens.h version.h
|
||||
crc64.o: crc64.c compiler.h config.h
|
||||
crc64.o: crc64.c compiler.h config.h nasmlib.h
|
||||
disasm.o: disasm.c compiler.h config.h disasm.h insns.h insnsi.h nasm.h \
|
||||
nasmlib.h regdis.h regs.h sync.h tables.h tokens.h version.h
|
||||
eval.o: eval.c compiler.h config.h eval.h float.h insnsi.h labels.h nasm.h \
|
||||
|
@ -193,6 +193,8 @@ regvals.o: regvals.c compiler.h config.h insnsi.h tables.h
|
|||
saa.o: saa.c compiler.h config.h nasmlib.h saa.h
|
||||
stdscan.o: stdscan.c compiler.h config.h insns.h insnsi.h nasm.h nasmlib.h \
|
||||
quote.h regs.h stdscan.h tokens.h version.h
|
||||
strfunc.o: strfunc.c compiler.h config.h insnsi.h nasm.h nasmlib.h regs.h \
|
||||
version.h
|
||||
sync.o: sync.c compiler.h config.h nasmlib.h sync.h
|
||||
tokhash.o: tokhash.c compiler.h config.h hashtbl.h insns.h insnsi.h nasm.h \
|
||||
nasmlib.h regs.h tokens.h version.h
|
||||
|
|
|
@ -209,7 +209,7 @@ everything: all doc rdf
|
|||
#-- Everything below is generated by mkdep.pl - do not edit --#
|
||||
assemble.$(O): assemble.c assemble.h compiler.h insns.h insnsi.h nasm.h &
|
||||
nasmlib.h regs.h tables.h tokens.h version.h
|
||||
crc64.$(O): crc64.c compiler.h
|
||||
crc64.$(O): crc64.c compiler.h nasmlib.h
|
||||
disasm.$(O): disasm.c compiler.h disasm.h insns.h insnsi.h nasm.h nasmlib.h &
|
||||
regdis.h regs.h sync.h tables.h tokens.h version.h
|
||||
eval.$(O): eval.c compiler.h eval.h float.h insnsi.h labels.h nasm.h &
|
||||
|
@ -282,6 +282,8 @@ regvals.$(O): regvals.c compiler.h insnsi.h tables.h
|
|||
saa.$(O): saa.c compiler.h nasmlib.h saa.h
|
||||
stdscan.$(O): stdscan.c compiler.h insns.h insnsi.h nasm.h nasmlib.h quote.h &
|
||||
regs.h stdscan.h tokens.h version.h
|
||||
strfunc.$(O): strfunc.c compiler.h insnsi.h nasm.h nasmlib.h regs.h &
|
||||
version.h
|
||||
sync.$(O): sync.c compiler.h nasmlib.h sync.h
|
||||
tokhash.$(O): tokhash.c compiler.h hashtbl.h insns.h insnsi.h nasm.h &
|
||||
nasmlib.h regs.h tokens.h version.h
|
||||
|
|
|
@ -219,7 +219,7 @@ everything: all doc rdf
|
|||
#-- Everything below is generated by mkdep.pl - do not edit --#
|
||||
assemble.$(O): assemble.c assemble.h compiler.h insns.h insnsi.h nasm.h \
|
||||
nasmlib.h regs.h tables.h tokens.h version.h
|
||||
crc64.$(O): crc64.c compiler.h
|
||||
crc64.$(O): crc64.c compiler.h nasmlib.h
|
||||
disasm.$(O): disasm.c compiler.h disasm.h insns.h insnsi.h nasm.h nasmlib.h \
|
||||
regdis.h regs.h sync.h tables.h tokens.h version.h
|
||||
eval.$(O): eval.c compiler.h eval.h float.h insnsi.h labels.h nasm.h \
|
||||
|
@ -292,6 +292,8 @@ regvals.$(O): regvals.c compiler.h insnsi.h tables.h
|
|||
saa.$(O): saa.c compiler.h nasmlib.h saa.h
|
||||
stdscan.$(O): stdscan.c compiler.h insns.h insnsi.h nasm.h nasmlib.h quote.h \
|
||||
regs.h stdscan.h tokens.h version.h
|
||||
strfunc.$(O): strfunc.c compiler.h insnsi.h nasm.h nasmlib.h regs.h \
|
||||
version.h
|
||||
sync.$(O): sync.c compiler.h nasmlib.h sync.h
|
||||
tokhash.$(O): tokhash.c compiler.h hashtbl.h insns.h insnsi.h nasm.h \
|
||||
nasmlib.h regs.h tokens.h version.h
|
||||
|
|
44
assemble.c
44
assemble.c
|
@ -335,7 +335,8 @@ int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
|
|||
out(offset, segment, &e->offset,
|
||||
OUT_ADDRESS, wsize, e->segment, e->wrt);
|
||||
offset += wsize;
|
||||
} else if (e->type == EOT_DB_STRING) {
|
||||
} else if (e->type == EOT_DB_STRING ||
|
||||
e->type == EOT_DB_STRING_FREE) {
|
||||
int align;
|
||||
|
||||
out(offset, segment, e->stringval,
|
||||
|
@ -348,6 +349,8 @@ int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
|
|||
OUT_RAWDATA, align, NO_SEG, NO_SEG);
|
||||
}
|
||||
offset += e->stringlen + align;
|
||||
if (e->type == EOT_DB_STRING_FREE)
|
||||
nasm_free(e->stringval);
|
||||
}
|
||||
}
|
||||
if (t > 0 && t == instruction->times - 1) {
|
||||
|
@ -365,15 +368,8 @@ int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
|
|||
}
|
||||
|
||||
if (instruction->opcode == I_INCBIN) {
|
||||
static char fname[FILENAME_MAX];
|
||||
const char *fname = instruction->eops->stringval;
|
||||
FILE *fp;
|
||||
int32_t len;
|
||||
|
||||
len = FILENAME_MAX - 1;
|
||||
if (len > instruction->eops->stringlen)
|
||||
len = instruction->eops->stringlen;
|
||||
strncpy(fname, instruction->eops->stringval, len);
|
||||
fname[len] = '\0';
|
||||
|
||||
fp = fopen(fname, "rb");
|
||||
if (!fp) {
|
||||
|
@ -383,17 +379,18 @@ int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
|
|||
error(ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
|
||||
fname);
|
||||
} else {
|
||||
static char buf[2048];
|
||||
int32_t t = instruction->times;
|
||||
int32_t base = 0;
|
||||
static char buf[4096];
|
||||
size_t t = instruction->times;
|
||||
size_t base = 0;
|
||||
size_t len;
|
||||
|
||||
len = ftell(fp);
|
||||
if (instruction->eops->next) {
|
||||
base = instruction->eops->next->offset;
|
||||
len -= base;
|
||||
if (instruction->eops->next->next &&
|
||||
len > instruction->eops->next->next->offset)
|
||||
len = instruction->eops->next->next->offset;
|
||||
len > (size_t)instruction->eops->next->next->offset)
|
||||
len = (size_t)instruction->eops->next->next->offset;
|
||||
}
|
||||
/*
|
||||
* Dummy call to list->output to give the offset to the
|
||||
|
@ -402,7 +399,7 @@ int64_t assemble(int32_t segment, int64_t offset, int bits, uint32_t cp,
|
|||
list->output(offset, NULL, OUT_RAWDATA, 0);
|
||||
list->uplevel(LIST_INCBIN);
|
||||
while (t--) {
|
||||
int32_t l;
|
||||
size_t l;
|
||||
|
||||
fseek(fp, base, SEEK_SET);
|
||||
l = len;
|
||||
|
@ -660,7 +657,8 @@ int64_t insn_size(int32_t segment, int64_t offset, int bits, uint32_t cp,
|
|||
osize = 0;
|
||||
if (e->type == EOT_DB_NUMBER)
|
||||
osize = 1;
|
||||
else if (e->type == EOT_DB_STRING)
|
||||
else if (e->type == EOT_DB_STRING ||
|
||||
e->type == EOT_DB_STRING_FREE)
|
||||
osize = e->stringlen;
|
||||
|
||||
align = (-osize) % wsize;
|
||||
|
@ -672,16 +670,10 @@ int64_t insn_size(int32_t segment, int64_t offset, int bits, uint32_t cp,
|
|||
}
|
||||
|
||||
if (instruction->opcode == I_INCBIN) {
|
||||
char fname[FILENAME_MAX];
|
||||
const char *fname = instruction->eops->stringval;
|
||||
FILE *fp;
|
||||
int32_t len;
|
||||
size_t len;
|
||||
|
||||
len = FILENAME_MAX - 1;
|
||||
if (len > instruction->eops->stringlen)
|
||||
len = instruction->eops->stringlen;
|
||||
strncpy(fname, instruction->eops->stringval, len);
|
||||
fname[len] = '\0';
|
||||
|
||||
fp = fopen(fname, "rb");
|
||||
if (!fp)
|
||||
error(ERR_NONFATAL, "`incbin': unable to open file `%s'",
|
||||
|
@ -695,8 +687,8 @@ int64_t insn_size(int32_t segment, int64_t offset, int bits, uint32_t cp,
|
|||
if (instruction->eops->next) {
|
||||
len -= instruction->eops->next->offset;
|
||||
if (instruction->eops->next->next &&
|
||||
len > instruction->eops->next->next->offset) {
|
||||
len = instruction->eops->next->next->offset;
|
||||
len > (size_t)instruction->eops->next->next->offset) {
|
||||
len = (size_t)instruction->eops->next->next->offset;
|
||||
}
|
||||
}
|
||||
return instruction->times * len;
|
||||
|
|
26
nasm.h
26
nasm.h
|
@ -182,6 +182,7 @@ enum token_type { /* token types, other than chars */
|
|||
TOKEN_DBL_AND, TOKEN_DBL_OR, TOKEN_DBL_XOR, /* &&, || and ^^ */
|
||||
TOKEN_SEG, TOKEN_WRT, /* SEG and WRT */
|
||||
TOKEN_FLOATIZE, /* __floatX__ */
|
||||
TOKEN_STRFUNC, /* __utf16__, __utf32__ */
|
||||
};
|
||||
|
||||
enum floatize {
|
||||
|
@ -195,6 +196,14 @@ enum floatize {
|
|||
FLOAT_128H,
|
||||
};
|
||||
|
||||
/* Must match the list in string_transform(), in strfunc.c */
|
||||
enum strfunc {
|
||||
STRFUNC_UTF16,
|
||||
STRFUNC_UTF32,
|
||||
};
|
||||
|
||||
size_t string_transform(char *, size_t, char **, enum strfunc);
|
||||
|
||||
/*
|
||||
* The expression evaluator must be passed a scanner function; a
|
||||
* standard scanner is provided as part of nasmlib.c. The
|
||||
|
@ -605,11 +614,14 @@ enum prefixes { /* instruction prefixes */
|
|||
PREFIX_ENUM_LIMIT
|
||||
};
|
||||
|
||||
enum { /* extended operand types */
|
||||
EOT_NOTHING, EOT_DB_STRING, EOT_DB_NUMBER
|
||||
enum extop_type { /* extended operand types */
|
||||
EOT_NOTHING,
|
||||
EOT_DB_STRING, /* Byte string */
|
||||
EOT_DB_STRING_FREE, /* Byte string which should be nasm_free'd*/
|
||||
EOT_DB_NUMBER, /* Integer */
|
||||
};
|
||||
|
||||
enum { /* special EA flags */
|
||||
enum ea_flags { /* special EA flags */
|
||||
EAF_BYTEOFFS = 1, /* force offset part to byte size */
|
||||
EAF_WORDOFFS = 2, /* force offset part to [d]word size */
|
||||
EAF_TIMESTWO = 4, /* really do EAX*2 not EAX+EAX */
|
||||
|
@ -643,12 +655,12 @@ typedef struct operand { /* operand to an instruction */
|
|||
|
||||
typedef struct extop { /* extended operand */
|
||||
struct extop *next; /* linked list */
|
||||
int32_t type; /* defined above */
|
||||
char *stringval; /* if it's a string, then here it is */
|
||||
int stringlen; /* ... and here's how long it is */
|
||||
int32_t segment; /* if it's a number/address, then... */
|
||||
char *stringval; /* if it's a string, then here it is */
|
||||
size_t stringlen; /* ... and here's how long it is */
|
||||
int64_t offset; /* ... it's given here ... */
|
||||
int32_t segment; /* if it's a number/address, then... */
|
||||
int32_t wrt; /* ... and here */
|
||||
enum extop_type type; /* defined above */
|
||||
} extop;
|
||||
|
||||
/* Prefix positions: each type of prefix goes in a specific slot.
|
||||
|
|
174
parser.c
174
parser.c
|
@ -334,6 +334,7 @@ restart_parse:
|
|||
result->opcode == I_DY || result->opcode == I_INCBIN) {
|
||||
extop *eop, **tail = &result->eops, **fixptr;
|
||||
int oper_num = 0;
|
||||
int32_t sign;
|
||||
|
||||
result->eops_float = false;
|
||||
|
||||
|
@ -355,85 +356,114 @@ restart_parse:
|
|||
eop->next = NULL;
|
||||
eop->type = EOT_NOTHING;
|
||||
oper_num++;
|
||||
sign = +1;
|
||||
|
||||
/* is_comma_next() here is to distinguish this from
|
||||
a string used as part of an expression... */
|
||||
if (i == TOKEN_STR && is_comma_next()) {
|
||||
eop->type = EOT_DB_STRING;
|
||||
eop->stringval = tokval.t_charptr;
|
||||
eop->stringlen = tokval.t_inttwo;
|
||||
i = stdscan(NULL, &tokval); /* eat the comma */
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((i == TOKEN_FLOAT && is_comma_next())
|
||||
|| i == '-' || i == '+') {
|
||||
int32_t sign = +1;
|
||||
|
||||
if (i == '+' || i == '-') {
|
||||
char *save = stdscan_bufptr;
|
||||
int token = i;
|
||||
sign = (i == '-') ? -1 : 1;
|
||||
i = stdscan(NULL, &tokval);
|
||||
if (i != TOKEN_FLOAT || !is_comma_next()) {
|
||||
stdscan_bufptr = save;
|
||||
i = tokval.t_type = token;
|
||||
}
|
||||
}
|
||||
|
||||
if (i == TOKEN_FLOAT) {
|
||||
eop->type = EOT_DB_STRING;
|
||||
result->eops_float = true;
|
||||
switch (result->opcode) {
|
||||
case I_DB:
|
||||
eop->stringlen = 1;
|
||||
break;
|
||||
case I_DW:
|
||||
eop->stringlen = 2;
|
||||
break;
|
||||
case I_DD:
|
||||
eop->stringlen = 4;
|
||||
break;
|
||||
case I_DQ:
|
||||
eop->stringlen = 8;
|
||||
break;
|
||||
case I_DT:
|
||||
eop->stringlen = 10;
|
||||
break;
|
||||
case I_DO:
|
||||
eop->stringlen = 16;
|
||||
break;
|
||||
case I_DY:
|
||||
error(ERR_NONFATAL, "floating-point constant"
|
||||
" encountered in DY instruction");
|
||||
eop->stringlen = 0;
|
||||
break;
|
||||
default:
|
||||
error(ERR_NONFATAL, "floating-point constant"
|
||||
" encountered in unknown instruction");
|
||||
/*
|
||||
* fix suggested by Pedro Gimeno... original line
|
||||
* was:
|
||||
* eop->type = EOT_NOTHING;
|
||||
*/
|
||||
eop->stringlen = 0;
|
||||
break;
|
||||
}
|
||||
eop = nasm_realloc(eop, sizeof(extop) + eop->stringlen);
|
||||
tail = &eop->next;
|
||||
*fixptr = eop;
|
||||
eop->stringval = (char *)eop + sizeof(extop);
|
||||
if (!eop->stringlen ||
|
||||
!float_const(tokval.t_charptr, sign,
|
||||
(uint8_t *)eop->stringval,
|
||||
eop->stringlen, error))
|
||||
eop->type = EOT_NOTHING;
|
||||
i = stdscan(NULL, &tokval); /* eat the comma */
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
/* anything else */
|
||||
{
|
||||
} else if (i == TOKEN_STRFUNC) {
|
||||
bool parens = false;
|
||||
const char *funcname = tokval.t_charptr;
|
||||
enum strfunc func = tokval.t_integer;
|
||||
i = stdscan(NULL, &tokval);
|
||||
if (i == '(') {
|
||||
parens = true;
|
||||
i = stdscan(NULL, &tokval);
|
||||
}
|
||||
if (i != TOKEN_STR) {
|
||||
error(ERR_NONFATAL,
|
||||
"%s must be followed by a string constant",
|
||||
funcname);
|
||||
eop->type = EOT_NOTHING;
|
||||
} else {
|
||||
eop->type = EOT_DB_STRING_FREE;
|
||||
eop->stringlen =
|
||||
string_transform(tokval.t_charptr, tokval.t_inttwo,
|
||||
&eop->stringval, func);
|
||||
if (eop->stringlen == (size_t)-1) {
|
||||
error(ERR_NONFATAL, "invalid string for transform");
|
||||
eop->type = EOT_NOTHING;
|
||||
}
|
||||
}
|
||||
if (parens && i && i != ')') {
|
||||
i = stdscan(NULL, &tokval);
|
||||
if (i != ')') {
|
||||
error(ERR_NONFATAL, "unterminated %s function",
|
||||
funcname);
|
||||
}
|
||||
}
|
||||
if (i && i != ',')
|
||||
i = stdscan(NULL, &tokval);
|
||||
} else if (i == '-' || i == '+') {
|
||||
char *save = stdscan_bufptr;
|
||||
int token = i;
|
||||
sign = (i == '-') ? -1 : 1;
|
||||
i = stdscan(NULL, &tokval);
|
||||
if (i != TOKEN_FLOAT) {
|
||||
stdscan_bufptr = save;
|
||||
i = tokval.t_type = token;
|
||||
goto is_expression;
|
||||
} else {
|
||||
goto is_float;
|
||||
}
|
||||
} else if (i == TOKEN_FLOAT) {
|
||||
is_float:
|
||||
eop->type = EOT_DB_STRING;
|
||||
result->eops_float = true;
|
||||
switch (result->opcode) {
|
||||
case I_DB:
|
||||
eop->stringlen = 1;
|
||||
break;
|
||||
case I_DW:
|
||||
eop->stringlen = 2;
|
||||
break;
|
||||
case I_DD:
|
||||
eop->stringlen = 4;
|
||||
break;
|
||||
case I_DQ:
|
||||
eop->stringlen = 8;
|
||||
break;
|
||||
case I_DT:
|
||||
eop->stringlen = 10;
|
||||
break;
|
||||
case I_DO:
|
||||
eop->stringlen = 16;
|
||||
break;
|
||||
case I_DY:
|
||||
error(ERR_NONFATAL, "floating-point constant"
|
||||
" encountered in DY instruction");
|
||||
eop->stringlen = 0;
|
||||
break;
|
||||
default:
|
||||
error(ERR_NONFATAL, "floating-point constant"
|
||||
" encountered in unknown instruction");
|
||||
/*
|
||||
* fix suggested by Pedro Gimeno... original line
|
||||
* was:
|
||||
* eop->type = EOT_NOTHING;
|
||||
*/
|
||||
eop->stringlen = 0;
|
||||
break;
|
||||
}
|
||||
eop = nasm_realloc(eop, sizeof(extop) + eop->stringlen);
|
||||
tail = &eop->next;
|
||||
*fixptr = eop;
|
||||
eop->stringval = (char *)eop + sizeof(extop);
|
||||
if (!eop->stringlen ||
|
||||
!float_const(tokval.t_charptr, sign,
|
||||
(uint8_t *)eop->stringval,
|
||||
eop->stringlen, error))
|
||||
eop->type = EOT_NOTHING;
|
||||
i = stdscan(NULL, &tokval); /* eat the comma */
|
||||
} else {
|
||||
/* anything else, assume it is an expression */
|
||||
expr *value;
|
||||
|
||||
is_expression:
|
||||
value = evaluate(stdscan, NULL, &tokval, NULL,
|
||||
critical, error, NULL);
|
||||
i = tokval.t_type;
|
||||
|
|
167
strfunc.c
Normal file
167
strfunc.c
Normal file
|
@ -0,0 +1,167 @@
|
|||
/*
|
||||
* strfunc.c
|
||||
*
|
||||
* String transformation functions
|
||||
*/
|
||||
|
||||
#include "nasmlib.h"
|
||||
#include "nasm.h"
|
||||
|
||||
/*
|
||||
* Convert a string in UTF-8 format to UTF-16LE
|
||||
*/
|
||||
static size_t utf8_to_16le(uint8_t *str, size_t len, char *op)
|
||||
{
|
||||
#define EMIT(x) do { if (op) { WRITESHORT(op,x); } outlen++; } while(0)
|
||||
|
||||
size_t outlen = 0;
|
||||
int expect = 0;
|
||||
uint8_t c;
|
||||
uint32_t v = 0, vmin = 0;
|
||||
|
||||
while (len--) {
|
||||
c = *str++;
|
||||
|
||||
if (expect) {
|
||||
if ((c & 0xc0) != 0x80) {
|
||||
expect = 0;
|
||||
return -1;
|
||||
} else {
|
||||
v = (v << 6) | (c & 0x3f);
|
||||
if (!--expect) {
|
||||
if (v < vmin || v > 0x10ffff ||
|
||||
(v >= 0xd800 && v <= 0xdfff)) {
|
||||
return -1;
|
||||
} else if (v > 0xffff) {
|
||||
v -= 0x10000;
|
||||
EMIT(0xd800 | (v >> 10));
|
||||
EMIT(0xdc00 | (v & 0x3ff));
|
||||
} else {
|
||||
EMIT(v);
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (c < 0x80) {
|
||||
EMIT(c);
|
||||
} else if (c < 0xa0 || c >= 0xfe) {
|
||||
/* Invalid UTF-8 */
|
||||
return -1;
|
||||
} else if (c < 0xe0) {
|
||||
v = c & 0x1f;
|
||||
expect = 1;
|
||||
vmin = 0x80;
|
||||
} else if (c < 0xf0) {
|
||||
v = c & 0x0f;
|
||||
expect = 2;
|
||||
vmin = 0x800;
|
||||
} else if (c < 0xf8) {
|
||||
v = c & 0x07;
|
||||
expect = 3;
|
||||
vmin = 0x10000;
|
||||
} else if (c < 0xfc) {
|
||||
v = c & 0x03;
|
||||
expect = 4;
|
||||
vmin = 0x200000;
|
||||
} else {
|
||||
v = c & 0x01;
|
||||
expect = 5;
|
||||
vmin = 0x4000000;
|
||||
}
|
||||
}
|
||||
|
||||
return expect ? (size_t)-1 : outlen << 1;
|
||||
|
||||
#undef EMIT
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert a string in UTF-8 format to UTF-32LE
|
||||
*/
|
||||
static size_t utf8_to_32le(uint8_t *str, size_t len, char *op)
|
||||
{
|
||||
#define EMIT(x) do { if (op) { WRITELONG(op,x); } outlen++; } while(0)
|
||||
|
||||
size_t outlen = 0;
|
||||
int expect = 0;
|
||||
uint8_t c;
|
||||
uint32_t v = 0, vmin = 0;
|
||||
|
||||
while (len--) {
|
||||
c = *str++;
|
||||
|
||||
if (expect) {
|
||||
if ((c & 0xc0) != 0x80) {
|
||||
return -1;
|
||||
} else {
|
||||
v = (v << 6) | (c & 0x3f);
|
||||
if (!--expect) {
|
||||
if (v < vmin || (v >= 0xd800 && v <= 0xdfff)) {
|
||||
return -1;
|
||||
} else {
|
||||
EMIT(v);
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (c < 0x80) {
|
||||
EMIT(c);
|
||||
} else if (c < 0xa0 || c >= 0xfe) {
|
||||
/* Invalid UTF-8 */
|
||||
return -1;
|
||||
} else if (c < 0xe0) {
|
||||
v = c & 0x1f;
|
||||
expect = 1;
|
||||
vmin = 0x80;
|
||||
} else if (c < 0xf0) {
|
||||
v = c & 0x0f;
|
||||
expect = 2;
|
||||
vmin = 0x800;
|
||||
} else if (c < 0xf8) {
|
||||
v = c & 0x07;
|
||||
expect = 3;
|
||||
vmin = 0x10000;
|
||||
} else if (c < 0xfc) {
|
||||
v = c & 0x03;
|
||||
expect = 4;
|
||||
vmin = 0x200000;
|
||||
} else {
|
||||
v = c & 0x01;
|
||||
expect = 5;
|
||||
vmin = 0x4000000;
|
||||
}
|
||||
}
|
||||
|
||||
return expect ? (size_t)-1 : outlen << 2;
|
||||
|
||||
#undef EMIT
|
||||
}
|
||||
|
||||
typedef size_t (*transform_func)(uint8_t *, size_t, char *);
|
||||
|
||||
/*
|
||||
* Apply a specific string transform and return it in a nasm_malloc'd
|
||||
* buffer, returning the length. On error, returns (size_t)-1 and no
|
||||
* buffer is allocated.
|
||||
*/
|
||||
size_t string_transform(char *str, size_t len, char **out, enum strfunc func)
|
||||
{
|
||||
/* This should match enum strfunc in nasm.h */
|
||||
static const transform_func str_transforms[] = {
|
||||
utf8_to_16le,
|
||||
utf8_to_32le,
|
||||
};
|
||||
transform_func transform = str_transforms[func];
|
||||
size_t outlen;
|
||||
uint8_t *s = (uint8_t *)str;
|
||||
|
||||
outlen = transform(s, len, NULL);
|
||||
if (outlen == (size_t)-1)
|
||||
return -1;
|
||||
|
||||
return transform(s, len, *out = nasm_malloc(outlen));
|
||||
}
|
|
@ -53,6 +53,10 @@ __float80e__
|
|||
__float128l__
|
||||
__float128h__
|
||||
|
||||
% TOKEN_STRFUNC, 0, STRFUNC_{__*__}
|
||||
__utf16__
|
||||
__utf32__
|
||||
|
||||
% TOKEN_*, 0, 0
|
||||
seg
|
||||
wrt
|
||||
|
|
Loading…
Reference in a new issue