phash: simplify the code generators

Simplify the code generators by merging the two hash constant arrays
into one. The hash is effectively the same, although the order of the
constants differ (possibly in a way which makes the indexing easier.)
The main difference is the amount of code is necessary to generate
each of the output C files.

Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
This commit is contained in:
H. Peter Anvin (Intel) 2020-07-10 19:26:52 -07:00
parent 10c9aa37a4
commit b14dbb95a1
6 changed files with 76 additions and 95 deletions

View file

@ -1,7 +1,7 @@
#!/usr/bin/perl
## --------------------------------------------------------------------------
##
## Copyright 1996-2019 The NASM Authors - All Rights Reserved
## Copyright 1996-2020 The NASM Authors - All Rights Reserved
## See the file AUTHORS included with the NASM distribution for
## the specific copyright holders.
##
@ -168,6 +168,36 @@ if ($what eq 'c') {
print OUT "/* Do not edit */\n";
print OUT "\n";
print OUT "#include \"compiler.h\"\n";
print OUT "#include \"nctype.h\"\n";
print OUT "#include \"nasmlib.h\"\n";
print OUT "#include \"hashtbl.h\"\n";
print OUT "#include \"preproc.h\"\n";
print OUT "\n";
# Note that this is global.
printf OUT "const char * const pp_directives[%d] = {\n", scalar(@pptok);
foreach $d (@pptok) {
if (defined($d) && $d !~ /[A-Z]/) {
print OUT " \"%$d\",\n";
} else {
print OUT " NULL,\n";
}
}
print OUT "};\n";
printf OUT "const uint8_t pp_directives_len[%d] = {\n", scalar(@pptok);
foreach $d (@pptok) {
printf OUT " %d,\n", defined($d) ? length($d)+1 : 0;
}
print OUT "};\n";
# Put a large value in unused hash slots. This makes it extremely
# unlikely that any combination that involves unused slot will
# pass the range test. This speeds up rejection of unrecognized
# tokens, i.e. identifiers.
print OUT "\n#define INVALID_HASH_ENTRY (65535/3)\n";
my %tokens = ();
my @tokendata = ();
@ -196,54 +226,19 @@ if ($what eq 'c') {
($n, $sv, $g) = @hashinfo;
die if ($n & ($n-1));
$n <<= 1;
print OUT "#include \"compiler.h\"\n";
print OUT "#include \"nctype.h\"\n";
print OUT "#include \"nasmlib.h\"\n";
print OUT "#include \"hashtbl.h\"\n";
print OUT "#include \"preproc.h\"\n";
print OUT "\n";
# Note that this is global.
printf OUT "const char * const pp_directives[%d] = {\n", scalar(@pptok);
foreach $d (@pptok) {
if (defined($d) && $d !~ /[A-Z]/) {
print OUT " \"%$d\",\n";
} else {
print OUT " NULL,\n";
}
}
print OUT "};\n";
printf OUT "const uint8_t pp_directives_len[%d] = {\n", scalar(@pptok);
foreach $d (@pptok) {
printf OUT " %d,\n", defined($d) ? length($d)+1 : 0;
}
print OUT "};\n";
# Put a large value in unused slots. This makes it extremely unlikely
# that any combination that involves unused slot will pass the range test.
# This speeds up rejection of unrecognized tokens, i.e. identifiers.
print OUT "\n#define INVALID_HASH_ENTRY (65535/3)\n";
print OUT "\n\n/* Primary preprocessor token hash */\n\n";
print OUT "enum preproc_token pp_token_hash(const char *token)\n";
print OUT "{\n";
print OUT " static const int16_t hash1[$n] = {\n";
print OUT " static const int16_t hashdata[$n] = {\n";
for ($i = 0; $i < $n; $i++) {
my $h = ${$g}[$i*2+0];
my $h = ${$g}[$i];
print OUT " ", defined($h) ? $h : 'INVALID_HASH_ENTRY', ",\n";
}
print OUT " };\n";
print OUT " static const int16_t hash2[$n] = {\n";
for ($i = 0; $i < $n; $i++) {
my $h = ${$g}[$i*2+1];
print OUT " ", defined($h) ? $h : 'INVALID_HASH_ENTRY', ",\n";
}
print OUT " };\n";
print OUT " uint32_t k1, k2;\n";
print OUT " uint64_t crc;\n";
# For correct overflow behavior, "ix" should be unsigned of the same
@ -253,10 +248,10 @@ if ($what eq 'c') {
printf OUT " crc = crc64i(UINT64_C(0x%08x%08x), token);\n",
$$sv[0], $$sv[1];
print OUT " k1 = (uint32_t)crc;\n";
print OUT " k2 = (uint32_t)(crc >> 32);\n";
printf OUT " k1 = ((uint32_t)crc & 0x%x) + 0;\n", $n-2;
printf OUT " k2 = ((uint32_t)(crc >> 32) & 0x%x) + 1;\n", $n-2;
print OUT "\n";
printf OUT " ix = hash1[k1 & 0x%x] + hash2[k2 & 0x%x];\n", $n-1, $n-1;
print OUT " ix = hashdata[k1] + hashdata[k2];\n";
printf OUT " if (ix >= %d)\n", scalar(@pptok);
print OUT " return PP_INVALID;\n";
print OUT "\n";
@ -288,25 +283,18 @@ if ($what eq 'c') {
($n, $sv, $g) = @hashinfo;
die if ($n & ($n-1));
$n <<= 1;
print OUT "\n\n/* TASM compatibility preprocessor token hash */\n";
print OUT "enum preproc_token pp_tasm_token_hash(const char *token)\n";
print OUT "{\n";
print OUT " static const int16_t hash1[$n] = {\n";
print OUT " static const int16_t hashdata[$n] = {\n";
for ($i = 0; $i < $n; $i++) {
my $h = ${$g}[$i*2+0];
my $h = ${$g}[$i];
print OUT " ", defined($h) ? $h : 'INVALID_HASH_ENTRY', ",\n";
}
print OUT " };\n";
print OUT " static const int16_t hash2[$n] = {\n";
for ($i = 0; $i < $n; $i++) {
my $h = ${$g}[$i*2+1];
print OUT " ", defined($h) ? $h : 'INVALID_HASH_ENTRY', ",\n";
}
print OUT " };\n";
print OUT " uint32_t k1, k2;\n";
print OUT " uint64_t crc;\n";
# For correct overflow behavior, "ix" should be unsigned of the same
@ -316,10 +304,12 @@ if ($what eq 'c') {
printf OUT " crc = crc64i(UINT64_C(0x%08x%08x), token);\n",
$$sv[0], $$sv[1];
print OUT " k1 = (uint32_t)crc;\n";
print OUT " k2 = (uint32_t)(crc >> 32);\n";
printf OUT " k1 = ((uint32_t)crc & 0x%x) + 0;\n", $n-2;
printf OUT " k2 = ((uint32_t)(crc >> 32) & 0x%x) + 1;\n", $n-2;
print OUT "\n";
printf OUT " ix = hash1[k1 & 0x%x] + hash2[k2 & 0x%x];\n", $n-1, $n-1;
printf OUT " ix = hashdata[k1] + hashdata[k2];\n", $n-1, $n-1;
# Comparing to pptok here is correct, because this hash produces
# an enum preproc_token value directly.
printf OUT " if (ix >= %d)\n", scalar(@pptok);
print OUT " return PP_INVALID;\n";
print OUT "\n";
@ -337,9 +327,8 @@ if ($what eq 'c') {
if ($what eq 'ph') {
print OUT "# Automatically generated from $in by $0\n";
print OUT "# Do not edit\n";
print OUT "\n";
print OUT "%pptok_hash = (\n";
print OUT "\n\%pptok_hash = (\n";
$n = 0;
foreach $tok (@pptok) {
if (defined($tok)) {
@ -348,5 +337,12 @@ if ($what eq 'ph') {
$n++;
}
print OUT ");\n";
print OUT "1;\n";
print OUT "\n\@pptok_list = (\n";
foreach $tok (@pptok) {
print OUT " ", (defined($tok) ? "'\%$tok'" : 'undef'), ",\n";
}
print OUT ");\n";
print OUT "\n1;\n";
}

View file

@ -1,7 +1,7 @@
#!/usr/bin/perl
## --------------------------------------------------------------------------
##
## Copyright 1996-2018 The NASM Authors - All Rights Reserved
## Copyright 1996-2020 The NASM Authors - All Rights Reserved
## See the file AUTHORS included with the NASM distribution for
## the specific copyright holders.
##
@ -201,6 +201,7 @@ if ($output eq 'h') {
($n, $sv, $g) = @hashinfo;
die if ($n & ($n-1));
$n <<= 1;
print "/*\n";
print " * This file is generated from insns.dat, regs.dat and token.dat\n";
@ -236,16 +237,9 @@ if ($output eq 'h') {
# This speeds up rejection of unrecognized tokens, i.e. identifiers.
print "#define INVALID_HASH_ENTRY (65535/3)\n";
print " static const int16_t hash1[$n] = {\n";
printf " static const int16_t hashdata[%d] = {\n", $n;
for ($i = 0; $i < $n; $i++) {
my $h = ${$g}[$i*2+0];
print " ", defined($h) ? $h : 'INVALID_HASH_ENTRY', ",\n";
}
print " };\n";
print " static const int16_t hash2[$n] = {\n";
for ($i = 0; $i < $n; $i++) {
my $h = ${$g}[$i*2+1];
my $h = ${$g}[$i];
print " ", defined($h) ? $h : 'INVALID_HASH_ENTRY', ",\n";
}
print " };\n";
@ -275,10 +269,11 @@ if ($output eq 'h') {
print " crc = crc64_byte(crc, c);\n";
print " };\n";
print "\n";
print " k1 = (uint32_t)crc;\n";
print " k2 = (uint32_t)(crc >> 32);\n";
printf " k1 = ((uint32_t)crc & 0x%x) + 0;\n", $n-2;
printf " k2 = ((uint32_t)(crc >> 32) & 0x%x) + 1;\n", $n-2;
print "\n";
printf " ix = hash1[k1 & 0x%x] + hash2[k2 & 0x%x];\n", $n-1, $n-1;
printf " ix = hashdata[k1] + hashdata[k2];\n",
$n-2, $n-2;
printf " if (ix >= %d)\n", scalar(@tokendata);
print " goto notfound;\n";
print "\n";

View file

@ -260,6 +260,7 @@ if (!@hashinfo) {
verify_hash_table(\%pkg_number, \@hashinfo);
my ($n, $sv, $g) = @hashinfo;
die if ($n & ($n-1));
$n <<= 1;
printf OUT "const int use_package_count = %d;\n\n", $npkg;
@ -278,16 +279,9 @@ print OUT " };\n";
# This speeds up rejection of unrecognized tokens, i.e. identifiers.
print OUT "#define INVALID_HASH_ENTRY (65535/3)\n";
print OUT " static const int16_t hash1[$n] = {\n";
print OUT " static const int16_t hashdata[$n] = {\n";
for ($i = 0; $i < $n; $i++) {
my $h = ${$g}[$i*2+0];
print OUT " ", defined($h) ? $h : 'INVALID_HASH_ENTRY', ",\n";
}
print OUT " };\n";
print OUT " static const int16_t hash2[$n] = {\n";
for ($i = 0; $i < $n; $i++) {
my $h = ${$g}[$i*2+1];
my $h = ${$g}[$i];
print OUT " ", defined($h) ? $h : 'INVALID_HASH_ENTRY', ",\n";
}
print OUT " };\n";
@ -301,10 +295,10 @@ print OUT "\n";
printf OUT " crc = crc64i(UINT64_C(0x%08x%08x), name);\n",
$$sv[0], $$sv[1];
print OUT " k1 = (uint32_t)crc;\n";
print OUT " k2 = (uint32_t)(crc >> 32);\n";
printf OUT " k1 = ((uint32_t)crc & 0x%x) + 0;\n", $n-2;
printf OUT " k2 = ((uint32_t)(crc >> 32) & 0x%x) + 1;\n", $n-2;
print OUT "\n";
printf OUT " ix = hash1[k1 & 0x%x] + hash2[k2 & 0x%x];\n", $n-1, $n-1;
printf OUT " ix = hashdata[k1] + hashdata[k2];\n";
printf OUT " if (ix >= %d)\n", scalar(@pkg_list);
print OUT " return NULL;\n";
print OUT "\n";

View file

@ -42,9 +42,9 @@ int perfhash_find(const struct perfect_hash *hash, const char *str)
crc = crc64i(hash->crcinit, str);
k1 = (uint32_t)crc & hash->hashmask;
k2 = (uint32_t)(crc >> 32) & hash->hashmask;
k2 = ((uint32_t)(crc >> 32) & hash->hashmask) + 1;
ix = hash->hashvals[k1] + hash->hashvals[k2 + hash->hashmask + 1];
ix = hash->hashvals[k1] + hash->hashvals[k2];
if (ix >= hash->tbllen ||
!hash->strings[ix] ||

View file

@ -338,13 +338,8 @@ if ($output eq 'h') {
printf F "static const int16_t %s_hashvals[%d] = ", $name, $n*2;
$c = '{';
for (my $i = 0; $i < $n; $i++) {
my $h = ${$g}[$i*2+0];
print F "$c\n ", defined($h) ? $h : 'INVALID_HASH_ENTRY';
$c = ',';
}
for (my $i = 0; $i < $n; $i++) {
my $h = ${$g}[$i*2+1];
for (my $i = 0; $i < $n*2; $i++) {
my $h = ${$g}[$i];
print F "$c\n ", defined($h) ? $h : 'INVALID_HASH_ENTRY';
$c = ',';
}
@ -352,7 +347,7 @@ if ($output eq 'h') {
print F "const struct perfect_hash ${name}_hash = {\n";
printf F " UINT64_C(0x%08x%08x),\n", $$sv[0], $$sv[1]; # crcinit
printf F " UINT32_C(0x%x),\n", $n-1; # hashmask
printf F " UINT32_C(0x%x),\n", ($n-1) << 1; # hashmask
printf F " UINT32_C(%u),\n", $tbllen; # tbllen
printf F " %d,\n", $tbloffs; # tbloffs
printf F " (%s),\n", $errval; # errval

View file

@ -15,10 +15,11 @@ require 'crc64.ph';
sub prehash($$$) {
my($key, $n, $sv) = @_;
my @c = crc64($sv, $key);
my $nmask = ($n << 1) - 2;
# Create a bipartite graph...
$k1 = (($c[1] & ($n-1)) << 1) + 0; # low word
$k2 = (($c[0] & ($n-1)) << 1) + 1; # high word
$k1 = ($c[1] & $nmask) + 0; # low word
$k2 = ($c[0] & $nmask) + 1; # high word
return ($k1, $k2);
}