macros.c: compress by tokenizing macro directives

Compress macros.c by representing macro directives with a single byte.
We can do this because we only use the ASCII character range inside
the standard macro files.

Note: we could save significant additional space by not having a
pointer array, and instead relying on the fact that we sweep
sequentially through the output array.
This commit is contained in:
H. Peter Anvin 2008-06-19 16:00:04 -07:00
parent d2456590fc
commit 72edbb87ef
5 changed files with 94 additions and 13 deletions

View file

@ -114,7 +114,7 @@ version.sed: version version.pl
# This source file is generated from the standard macros file
# `standard.mac' by another Perl script. Again, it's part of the
# standard distribution.
macros.c: macros.pl standard.mac version.mac $(srcdir)/macros/*.mac
macros.c: macros.pl pptok.ph standard.mac version.mac $(srcdir)/macros/*.mac
$(PERL) $(srcdir)/macros.pl $(srcdir)/standard.mac version.mac \
$(srcdir)/macros/*.mac
@ -148,12 +148,14 @@ pptok.h: pptok.dat pptok.pl perllib/phash.ph
$(PERL) $(srcdir)/pptok.pl h $(srcdir)/pptok.dat pptok.h
pptok.c: pptok.dat pptok.pl perllib/phash.ph
$(PERL) $(srcdir)/pptok.pl c $(srcdir)/pptok.dat pptok.c
pptok.ph: pptok.dat pptok.pl perllib/phash.ph
$(PERL) $(srcdir)/pptok.pl ph $(srcdir)/pptok.dat pptok.ph
# This target generates all files that require perl.
# This allows easier generation of distribution (see dist target).
PERLREQ = macros.c insnsb.c insnsa.c insnsd.c insnsi.h insnsn.c \
regs.c regs.h regflags.c regdis.c regdis.h regvals.c \
tokhash.c tokens.h pptok.h pptok.c \
tokhash.c tokens.h pptok.h pptok.c pptok.ph \
version.h version.mac
perlreq: $(PERLREQ)

View file

@ -8,12 +8,16 @@
# distributed in the NASM archive.
require 'phash.ph';
require 'pptok.ph';
my $fname;
my $line = 0;
my $index = 0;
my $tasm_count = 0;
#
# Generate macros.c
#
open(OUT,">macros.c") or die "unable to open macros.c\n";
print OUT "/*\n";
@ -53,12 +57,30 @@ foreach $fname ( @ARGV ) {
$module_number{$module} = $nmodule++;
$module_index{$module} = $index;
} elsif (m/^\s*((\s*([^\"\';\s]+|\"[^\"]*\"|\'[^\']*\'))*)\s*(;.*)?$/) {
$_ = $1;
s/(\s)\s+/$1/g;
s/\\/\\\\/g;
s/"/\\"/g;
if (length > 0) {
printf OUT " /* %4d */ \"%s\",\n", $index++, $_;
my $s1, $s2, $pd, $ws;
$s1 = $1;
$s1 =~ s/(\s)\s+/$1/g;
$s1 =~ s/\\/\\\\/g;
$s1 =~ s/"/\\"/g;
$s2 = '';
print $s1, ":";
while ($s1 =~ /^(.*)(\%[a-zA-Z_][a-zA-Z0-9_]*)(\s*)(.*)$/) {
$s2 .= $1;
$pd = $2;
$ws = $3;
$s1 = $4;
print " ", $pd;
if (defined($pptok_hash{$pd}) &&
$pptok_hash{$pd} <= 127) {
$s2 .= sprintf("\\x%02x\"\"", $pptok_hash{$pd}+128);
} else {
$s2 .= $pd.$ws;
}
}
print "\n";
$s2 .= $s1;
if (length($s2) > 0) {
printf OUT " /* %4d */ \"%s\",\n", $index++, $s2;
}
} else {
die "$fname:$line: error unterminated quote";

View file

@ -1,6 +1,6 @@
#!/usr/bin/perl
#
# Produce pptok.c and pptok.h from pptok.dat
# Produce pptok.c, pptok.h and pptok.ph from pptok.dat
#
require 'phash.ph';
@ -54,14 +54,15 @@ $first_uncond = $pptok[0];
@pptok = (@cptok, @pptok);
open(OUT, "> $out") or die "$0: cannot open: $out\n";
print OUT "/* Automatically generated from $in by $0 */\n";
print OUT "/* Do not edit */\n";
print OUT "\n";
#
# Output pptok.h
#
if ($what eq 'h') {
print OUT "/* Automatically generated from $in by $0 */\n";
print OUT "/* Do not edit */\n";
print OUT "\n";
print OUT "enum preproc_token {\n";
$n = 0;
foreach $pt (@pptok) {
@ -108,6 +109,10 @@ if ($what eq 'h') {
# Output pptok.c
#
if ($what eq 'c') {
print OUT "/* Automatically generated from $in by $0 */\n";
print OUT "/* Do not edit */\n";
print OUT "\n";
my %tokens = ();
my @tokendata = ();
@ -157,6 +162,12 @@ if ($what eq 'c') {
}
print OUT "};\n";
printf OUT "const int pp_directives_len[%d] = {\n", scalar(@pptok);
foreach $d (@pptok) {
printf OUT " %d,\n", defined($d) ? length($d)+1 : 0;
}
print OUT "};\n";
print OUT "enum preproc_token pp_token_hash(const char *token)\n";
print OUT "{\n";
@ -202,3 +213,25 @@ if ($what eq 'c') {
print OUT " return ix;\n";
print OUT "}\n";
}
#
# Output pptok.ph
#
if ($what eq 'ph') {
print OUT "# Automatically generated from $in by $0\n";
print OUT "# Do not edit\n";
print OUT "\n";
print OUT "%pptok_hash = (\n";
$n = 0;
foreach $tok (@pptok) {
if (defined($tok)) {
printf OUT " '%%%s' => %d,\n", $tok, $n;
}
$n++;
}
print OUT ");\n";
print OUT "1;\n";
}

View file

@ -641,7 +641,30 @@ static char *read_line(void)
int bufsize, continued_count;
if (stdmacpos) {
char *ret = nasm_strdup(*stdmacpos++);
unsigned char c;
char *ret, *q;
const char *smac = *stdmacpos++, *p;
size_t len = 0;
p = smac;
while ((c = *p++)) {
if (c >= 0x80)
len += pp_directives_len[c-0x80]+1;
else
len++;
}
ret = nasm_malloc(len+1);
p = smac; q = ret;
while ((c = *p++)) {
if (c >= 0x80) {
memcpy(q, pp_directives[c-0x80], pp_directives_len[c-0x80]);
q += pp_directives_len[c-0x80];
*q++ = ' ';
} else {
*q++ = c;
}
}
*q = '\0';
if (!*stdmacpos) {
/* This was the last of the standard macro chain... */
stdmacpos = NULL;

View file

@ -12,6 +12,7 @@
#include "pptok.h"
extern const char * const pp_directives[];
extern const int pp_directives_len[];
enum preproc_token pp_token_hash(const char *token);
void pp_include_path(char *);