unicode-decomp.pl: Move from chartables.pl...
2002-03-04 Eric Blake <ebb9@email.byu.edu> * scripts/unicode-decomp.pl: Move from chartables.pl, and remove the code for generating include/java-chartables.h. * scripts/unicode-blocks.pl: Move from scripts/blocks.pl, and merge with Classpath. * scripts/unicode-muncher.pl: Copy from Classpath. * scritps/MakeCharTables.java: New file. * gnu/gcj/convert/Blocks-3.txt: New file. * gnu/gcj/convert/UnicodeData-3.0.0.txt: New file. * gnu/gcj/convert/UnicodeCharacterDatabase-3.0.0.html: New file. * gnu/java/lang/CharData.java: Copy from Classpath. * Makefile.am (ordinary_java_source_files): Add gnu/java/lang/CharData.java. * configure.in: Remove --enable-fast-character option. * java/lang/Character.java: Merge algorithms and Javadoc with Classpath. * java/lang/natCharacter.cc: Implement Unicode lookup table more efficiently. * include/java-chardecomp.h: Regenerate. * include/java-chartables.h: Regenerate. From-SVN: r50368
This commit is contained in:
parent
b87e4a4c6f
commit
1fa782725c
17 changed files with 16513 additions and 84861 deletions
146
libjava/scripts/unicode-decomp.pl
Executable file
146
libjava/scripts/unicode-decomp.pl
Executable file
|
@ -0,0 +1,146 @@
|
|||
#!/usr/bin/perl -w
|
||||
# unicode-decomp.pl - script to generate database for java.text.Collator
|
||||
# Copyright (C) 1998, 1999, 2002 Free Software Foundation, Inc.
|
||||
#
|
||||
# This file is part of libjava.
|
||||
#
|
||||
# This software is copyrighted work licensed under the terms of the
|
||||
# Libjava License. Please consult the file "LIBJAVA_LICENSE" for
|
||||
# details.
|
||||
|
||||
# Code for reading UnicodeData.txt and generating the code for
|
||||
# gnu.java.lang.CharData. For now, the relevant Unicode definition files
|
||||
# are found in libjava/gnu/gcj/convert/.
|
||||
#
|
||||
# Usage: ./unicode-decomp.pl [-n] <UnicodeData.txt> <decomp.h>
|
||||
# where <UnicodeData.txt> is obtained from www.unicode.org (named
|
||||
# UnicodeData-3.0.0.txt for Unicode version 3.0.0), and <CharData.java>
|
||||
# is the final location of include/java-chardecomp.h.
|
||||
# As of JDK 1.4, use Unicode version 3.0.0 for best results.
|
||||
#
|
||||
# If this exits with nonzero status, then you must investigate the
|
||||
# cause of the problem.
|
||||
# Diagnostics and other information to stderr.
|
||||
# With -n, the files are not created, but all processing still occurs.
|
||||
|
||||
# These maps characters to their decompositions.
|
||||
my %canonical_decomposition = ();
|
||||
my %full_decomposition = ();
|
||||
|
||||
# Handle `-n' and open output files.
|
||||
if ($ARGV[0] && $ARGV[0] eq '-n')
|
||||
{
|
||||
shift @ARGV;
|
||||
$ARGV[1] = '/dev/null';
|
||||
}
|
||||
die "Usage: $0 <UnicodeData.txt> <java-chardecomp.h>" unless @ARGV == 2;
|
||||
open (UNICODE, "< $ARGV[0]") || die "Can't open Unicode attribute file: $!\n";
|
||||
|
||||
# Process the Unicode file.
|
||||
$| = 1;
|
||||
my $count = 0;
|
||||
print STDERR "Parsing attributes file";
|
||||
while (<UNICODE>)
|
||||
{
|
||||
print STDERR "." unless $count++ % 1000;
|
||||
chomp;
|
||||
s/\r//g;
|
||||
my ($ch, undef, undef, undef, undef, $decomp) = split ';';
|
||||
$ch = hex($ch);
|
||||
|
||||
if ($decomp ne '')
|
||||
{
|
||||
my $is_full = 0;
|
||||
my @decomp = ();
|
||||
foreach (split (' ', $decomp))
|
||||
{
|
||||
if (/^\<.*\>$/)
|
||||
{
|
||||
$is_full = 1;
|
||||
next;
|
||||
}
|
||||
push (@decomp, hex ($_));
|
||||
}
|
||||
my $s = pack "n*", @decomp;
|
||||
if ($is_full)
|
||||
{
|
||||
$full_decomposition{$ch} = $s;
|
||||
}
|
||||
else
|
||||
{
|
||||
$canonical_decomposition{$ch} = $s;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Now generate decomposition tables.
|
||||
open DECOMP, "> $ARGV[1]" or die "Can't open output file: $!\n";
|
||||
print STDERR "\nGenerating tables\n";
|
||||
print DECOMP <<EOF;
|
||||
// java-chardecomp.h - Decomposition character tables -*- c++ -*-
|
||||
|
||||
#ifndef __JAVA_CHARDECOMP_H__
|
||||
#define __JAVA_CHARDECOMP_H__
|
||||
|
||||
|
||||
// These tables are automatically generated by the $0
|
||||
// script. DO NOT EDIT the tables. Instead, fix the script
|
||||
// and run it again.
|
||||
|
||||
// This file should only be included by natCollator.cc
|
||||
|
||||
struct decomp_entry
|
||||
{
|
||||
jchar key;
|
||||
const char *value;
|
||||
};
|
||||
|
||||
EOF
|
||||
|
||||
&write_decompositions;
|
||||
|
||||
print DECOMP "#endif /* __JAVA_CHARDECOMP_H__ */\n";
|
||||
|
||||
close(DECOMP);
|
||||
print STDERR "Done\n";
|
||||
exit;
|
||||
|
||||
|
||||
# Write a single decomposition table.
|
||||
sub write_single_decomposition($$%)
|
||||
{
|
||||
my ($name, $is_canon, %table) = @_;
|
||||
my $first_line = 1;
|
||||
print DECOMP "static const decomp_entry ${name}_decomposition[] =\n{\n";
|
||||
|
||||
for my $key (0 .. 0xffff)
|
||||
{
|
||||
next if ! defined $table{$key};
|
||||
print DECOMP ",\n" unless $first_line;
|
||||
$first_line = 0;
|
||||
|
||||
printf DECOMP " { 0x%04x, \"", $key;
|
||||
|
||||
# We represent the expansion as a series of bytes, terminated
|
||||
# with a double nul. This is ugly, but relatively
|
||||
# space-efficient. Most expansions are short, but there are a
|
||||
# few that are very long (e.g. \uFDFA). This means that if we
|
||||
# chose a fixed-space representation we would waste a lot of
|
||||
# space.
|
||||
my @expansion = unpack "n*", $table{$key};
|
||||
foreach my $char (@expansion)
|
||||
{
|
||||
printf DECOMP "\\x%02x\\x%02x", ($char / 256), ($char % 256);
|
||||
}
|
||||
|
||||
print DECOMP "\" }";
|
||||
}
|
||||
|
||||
print DECOMP "\n};\n\n";
|
||||
}
|
||||
|
||||
sub write_decompositions()
|
||||
{
|
||||
&write_single_decomposition ('canonical', 1, %canonical_decomposition);
|
||||
&write_single_decomposition ('full', 0, %full_decomposition);
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue