Character.java: Re-merged with Classpath.

2007-03-05 Mark Wielaard <mark@klomp.org> * java/lang/Character.java: Re-merged with Classpath. * java/lang/natString.cc (nativeCompareTo): Renamed from compareTo. * java/lang/StringBuilder.java: Re-merged with Classpath. * java/lang/String.java: Re-merged with Classpath. (nativeCompareTo): Renamed from compareTo. * java/lang/StringBuffer.java: Re-merged with Classpath. * jni.cc (_Jv_JNI_GetAnyMethodID): Split calls to append. From-SVN: r122560
2007-03-05 17:27:44 +00:00 · 2007-03-05 17:27:44 +00:00 · 666ff4f65d
commit 666ff4f65d
parent b48a45922d
22 changed files with 609 additions and 246 deletions
--- a/libjava/java/lang/Character.java
+++ b/libjava/java/lang/Character.java
@ -1,5 +1,6 @@
 /* java.lang.Character -- Wrapper class for char, and Unicode subsets
-   Copyright (C) 1998, 1999, 2001, 2002, 2005, 2006 Free Software Foundation, Inc.
+   Copyright (C) 1998, 1999, 2001, 2002, 2005, 2006, 2007
+   Free Software Foundation, Inc.

 This file is part of GNU Classpath.

@ -54,7 +55,7 @@ import java.util.Locale;
 /**
 * Wrapper class for the primitive char data type.  In addition, this class
 * allows one to retrieve property information and perform transformations
- * on the 57,707 defined characters in the Unicode Standard, Version 3.0.0.
+ * on the defined characters in the Unicode Standard, Version 4.0.0.
 * java.lang.Character is designed to be very dynamic, and as such, it
 * retrieves information on the Unicode character set from a separate
 * database, gnu.java.lang.CharData, which can be easily upgraded.
@ -62,7 +63,7 @@ import java.util.Locale;
 * <p>For predicates, boundaries are used to describe
 * the set of characters for which the method will return true.
 * This syntax uses fairly normal regular expression notation.
- * See 5.13 of the Unicode Standard, Version 3.0, for the
+ * See 5.13 of the Unicode Standard, Version 4.0, for the
 * boundary specification.
 *
 * <p>See <a href="http://www.unicode.org">http://www.unicode.org</a>
@ -72,10 +73,11 @@ import java.util.Locale;
 * @author Paul N. Fisher
 * @author Jochen Hoenicke
 * @author Eric Blake (ebb9@email.byu.edu)
+ * @author Andrew John Hughes (gnu_andrew@member.fsf.org)
 * @since 1.0
- * @status updated to 1.4
+ * @status partly updated to 1.5; some things still missing
 */
-public final class Character implements Serializable, Comparable
+public final class Character implements Serializable, Comparable<Character>
 {
  /**
   * A subset of Unicode blocks.
@ -160,10 +162,8 @@ public final class Character implements Serializable, Comparable
    /** The canonical name of the block according to the Unicode standard. */
    private final String canonicalName;

-    /** Constants for the <code>forName()</code> method */
-    private static final int CANONICAL_NAME = 0;
-    private static final int NO_SPACES_NAME = 1;
-    private static final int CONSTANT_NAME = 2;
+    /** Enumeration for the <code>forName()</code> method */
+    private enum NameType { CANONICAL, NO_SPACES, CONSTANT; };

    /**
     * Constructor for strictly defined blocks.
@ -173,7 +173,7 @@ public final class Character implements Serializable, Comparable
     * @param name the block name
     */
    private UnicodeBlock(int start, int end, String name,
-             String canonicalName)
+			 String canonicalName)
    {
      super(name);
      this.start = start;
@ -207,8 +207,8 @@ public final class Character implements Serializable, Comparable
    public static UnicodeBlock of(int codePoint)
    {
      if (codePoint > MAX_CODE_POINT)
-    throw new IllegalArgumentException("The supplied integer value is " +
-                       "too large to be a codepoint.");
+	throw new IllegalArgumentException("The supplied integer value is " +
+					   "too large to be a codepoint.");
      // Simple binary search for the correct block.
      int low = 0;
      int hi = sets.length - 1;
@ -262,59 +262,51 @@ public final class Character implements Serializable, Comparable
     */
    public static final UnicodeBlock forName(String blockName)
    {
-      int type;
+      NameType type;
      if (blockName.indexOf(' ') != -1)
-        type = CANONICAL_NAME;
+        type = NameType.CANONICAL;
      else if (blockName.indexOf('_') != -1)
-        type = CONSTANT_NAME;
+        type = NameType.CONSTANT;
      else
-        type = NO_SPACES_NAME;
+        type = NameType.NO_SPACES;
      Collator usCollator = Collator.getInstance(Locale.US);
      usCollator.setStrength(Collator.PRIMARY);
      /* Special case for deprecated blocks not in sets */
      switch (type)
      {
-        case CANONICAL_NAME:
+        case CANONICAL:
          if (usCollator.compare(blockName, "Surrogates Area") == 0)
            return SURROGATES_AREA;
          break;
-        case NO_SPACES_NAME:
+        case NO_SPACES:
          if (usCollator.compare(blockName, "SurrogatesArea") == 0)
            return SURROGATES_AREA;
          break;
-        case CONSTANT_NAME:
+        case CONSTANT:
          if (usCollator.compare(blockName, "SURROGATES_AREA") == 0) 
            return SURROGATES_AREA;
          break;
      }
      /* Other cases */
-      int setLength = sets.length;
      switch (type)
      {
-        case CANONICAL_NAME:
-          for (int i = 0; i < setLength; i++)
-            {
-              UnicodeBlock block = sets[i];
-              if (usCollator.compare(blockName, block.canonicalName) == 0)
-                return block;
-            }
+        case CANONICAL:
+          for (UnicodeBlock block : sets)
+            if (usCollator.compare(blockName, block.canonicalName) == 0)
+              return block;
          break;
-        case NO_SPACES_NAME:
-          for (int i = 0; i < setLength; i++)
-            {
-              UnicodeBlock block = sets[i];
-              String nsName = block.canonicalName.replaceAll(" ","");
-              if (usCollator.compare(blockName, nsName) == 0)
-                return block;
-            }        
-          break;
-        case CONSTANT_NAME:
-          for (int i = 0; i < setLength; i++)
-            {
-              UnicodeBlock block = sets[i];
-              if (usCollator.compare(blockName, block.toString()) == 0)
-                return block;
-            }
+        case NO_SPACES:
+          for (UnicodeBlock block : sets)
+	    {
+	      String nsName = block.canonicalName.replaceAll(" ","");
+	      if (usCollator.compare(blockName, nsName) == 0)
+		return block;
+	    }
+	  break;
+        case CONSTANT:
+          for (UnicodeBlock block : sets)
+            if (usCollator.compare(blockName, block.toString()) == 0)
+              return block;
          break;
      }
      throw new IllegalArgumentException("No Unicode block found for " +
@ -1517,10 +1509,11 @@ public final class Character implements Serializable, Comparable
     * this.  These are also returned from calls to <code>of(int)</code>
     * and <code>of(char)</code>.
     */
+    @Deprecated
    public static final UnicodeBlock SURROGATES_AREA
      = new UnicodeBlock(0xD800, 0xDFFF,
                         "SURROGATES_AREA",
-             "Surrogates Area");
+			 "Surrogates Area");

    /**
     * The defined subsets.
@ -1698,12 +1691,79 @@ public final class Character implements Serializable, Comparable
   */
  public static final char MAX_VALUE = '\uFFFF';

+  /**
+   * The minimum Unicode 4.0 code point.  This value is <code>0</code>.
+   * @since 1.5
+   */
+  public static final int MIN_CODE_POINT = 0;
+
+  /**
+   * The maximum Unicode 4.0 code point, which is greater than the range
+   * of the char data type.
+   * This value is <code>0x10FFFF</code>.
+   * @since 1.5
+   */
+  public static final int MAX_CODE_POINT = 0x10FFFF;
+
+  /**
+   * The minimum Unicode high surrogate code unit, or
+   * <emph>leading-surrogate</emph>, in the UTF-16 character encoding.
+   * This value is <code>'\uD800'</code>.
+   * @since 1.5
+   */
+  public static final char MIN_HIGH_SURROGATE = '\uD800';
+
+  /**
+   * The maximum Unicode high surrogate code unit, or
+   * <emph>leading-surrogate</emph>, in the UTF-16 character encoding.
+   * This value is <code>'\uDBFF'</code>.
+   * @since 1.5
+   */
+  public static final char MAX_HIGH_SURROGATE = '\uDBFF';
+
+  /**
+   * The minimum Unicode low surrogate code unit, or
+   * <emph>trailing-surrogate</emph>, in the UTF-16 character encoding.
+   * This value is <code>'\uDC00'</code>.
+   * @since 1.5
+   */
+  public static final char MIN_LOW_SURROGATE = '\uDC00';
+
+  /**
+   * The maximum Unicode low surrogate code unit, or
+   * <emph>trailing-surrogate</emph>, in the UTF-16 character encoding.
+   * This value is <code>'\uDFFF'</code>.
+   * @since 1.5
+   */
+  public static final char MAX_LOW_SURROGATE = '\uDFFF';  
+
+  /**
+   * The minimum Unicode surrogate code unit in the UTF-16 character encoding.
+   * This value is <code>'\uD800'</code>.
+   * @since 1.5
+   */
+  public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
+
+  /**
+   * The maximum Unicode surrogate code unit in the UTF-16 character encoding.
+   * This value is <code>'\uDFFF'</code>.
+   * @since 1.5
+   */
+  public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
+
+  /**
+   * The lowest possible supplementary Unicode code point (the first code
+   * point outside the basic multilingual plane (BMP)).
+   * This value is <code>0x10000</code>.
+   */ 
+  public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x10000;
+
  /**
   * Class object representing the primitive char data type.
   *
   * @since 1.1
   */
-  public static final Class TYPE = VMClassLoader.getPrimitiveClass('C');
+  public static final Class<Character> TYPE = (Class<Character>) VMClassLoader.getPrimitiveClass('C');

  /**
   * The number of bits needed to represent a <code>char</code>.
@ -2088,71 +2148,6 @@ public final class Character implements Serializable, Comparable
   */
  private static final int MIRROR_MASK = 0x40;

-  /**
-   * Min value for supplementary code point.
-   *
-   * @since 1.5
-   */
-  public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x10000;
-
-  /**
-   * Min value for code point.
-   *
-   * @since 1.5
-   */
-  public static final int MIN_CODE_POINT = 0; 
- 
- 
-  /**
-   * Max value for code point.
-   *
-   * @since 1.5
-   */
-  public static final int MAX_CODE_POINT = 0x010ffff;
-
-
-  /**
-   * Minimum high surrogate code in UTF-16 encoding.
-   *
-   * @since 1.5
-   */
-  public static final char MIN_HIGH_SURROGATE = '\ud800';
-
-  /**
-   * Maximum high surrogate code in UTF-16 encoding.
-   *
-   * @since 1.5
-   */
-  public static final char MAX_HIGH_SURROGATE = '\udbff';
- 
-  /**
-   * Minimum low surrogate code in UTF-16 encoding.
-   *
-   * @since 1.5
-   */
-  public static final char MIN_LOW_SURROGATE = '\udc00';
-
-  /**
-   * Maximum low surrogate code in UTF-16 encoding.
-   *
-   * @since 1.5
-   */
-  public static final char MAX_LOW_SURROGATE = '\udfff';
-
-  /**
-   * Minimum surrogate code in UTF-16 encoding.
-   *
-   * @since 1.5
-   */
-  public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
-
-  /**
-   * Maximum low surrogate code in UTF-16 encoding.
-   *
-   * @since 1.5
-   */
-  public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
-
  /**
   * Grabs an attribute offset from the Unicode attribute database. The lower
   * 5 bits are the character type, the next 2 bits are flags, and the top
@ -2504,6 +2499,209 @@ public final class Character implements Serializable, Comparable
               | (1 << MODIFIER_LETTER)
               | (1 << OTHER_LETTER))) != 0;
  }
+  
+  /**
+   * Returns the index into the given CharSequence that is offset
+   * <code>codePointOffset</code> code points from <code>index</code>.
+   * @param seq the CharSequence
+   * @param index the start position in the CharSequence
+   * @param codePointOffset the number of code points offset from the start
+   * position
+   * @return the index into the CharSequence that is codePointOffset code 
+   * points offset from index
+   * 
+   * @throws NullPointerException if seq is null
+   * @throws IndexOutOfBoundsException if index is negative or greater than the
+   * length of the sequence.
+   * @throws IndexOutOfBoundsException if codePointOffset is positive and the 
+   * subsequence from index to the end of seq has fewer than codePointOffset
+   * code points
+   * @throws IndexOutOfBoundsException if codePointOffset is negative and the
+   * subsequence from the start of seq to index has fewer than 
+   * (-codePointOffset) code points
+   * @since 1.5
+   */
+  public static int offsetByCodePoints(CharSequence seq,
+                                       int index,
+                                       int codePointOffset)
+  {
+    int len = seq.length();
+    if (index < 0 || index > len)
+      throw new IndexOutOfBoundsException();
+    
+    int numToGo = codePointOffset;
+    int offset = index;
+    int adjust = 1;
+    if (numToGo >= 0)
+      {
+        for (; numToGo > 0; offset++)
+          {
+            numToGo--;
+            if (Character.isHighSurrogate(seq.charAt(offset))
+                && (offset + 1) < len
+                && Character.isLowSurrogate(seq.charAt(offset + 1)))
+              offset++;
+          }
+        return offset;
+      }
+    else
+      {
+        numToGo *= -1;
+        for (; numToGo > 0;)
+          {
+            numToGo--;
+            offset--;
+            if (Character.isLowSurrogate(seq.charAt(offset))
+                && (offset - 1) >= 0
+                && Character.isHighSurrogate(seq.charAt(offset - 1)))
+              offset--;
+          }
+        return offset;
+      }
+  }
+  
+  /**
+   * Returns the index into the given char subarray that is offset
+   * <code>codePointOffset</code> code points from <code>index</code>.
+   * @param a the char array
+   * @param start the start index of the subarray
+   * @param count the length of the subarray
+   * @param index the index to be offset
+   * @param codePointOffset the number of code points offset from <code>index
+   * </code>
+   * @return the index into the char array
+   * 
+   * @throws NullPointerException if a is null
+   * @throws IndexOutOfBoundsException if start or count is negative or if
+   * start + count is greater than the length of the array
+   * @throws IndexOutOfBoundsException if index is less than start or larger 
+   * than start + count
+   * @throws IndexOutOfBoundsException if codePointOffset is positive and the
+   * subarray from index to start + count - 1 has fewer than codePointOffset
+   * code points.
+   * @throws IndexOutOfBoundsException if codePointOffset is negative and the
+   * subarray from start to index - 1 has fewer than (-codePointOffset) code
+   * points
+   * @since 1.5
+
+   */
+  public static int offsetByCodePoints(char[] a,
+                                       int start,
+                                       int count,
+                                       int index,
+                                       int codePointOffset)
+  {
+    int len = a.length;
+    int end = start + count;
+    if (start < 0 || count < 0 || end > len || index < start || index > end)
+      throw new IndexOutOfBoundsException();
+    
+    int numToGo = codePointOffset;
+    int offset = index;
+    int adjust = 1;
+    if (numToGo >= 0)
+      {
+        for (; numToGo > 0; offset++)
+          {
+            numToGo--;
+            if (Character.isHighSurrogate(a[offset])
+                && (offset + 1) < len
+                && Character.isLowSurrogate(a[offset + 1]))
+              offset++;
+          }
+        return offset;
+      }
+    else
+      {
+        numToGo *= -1;
+        for (; numToGo > 0;)
+          {
+            numToGo--;
+            offset--;
+            if (Character.isLowSurrogate(a[offset])
+                && (offset - 1) >= 0
+                && Character.isHighSurrogate(a[offset - 1]))
+              offset--;
+            if (offset < start)
+              throw new IndexOutOfBoundsException();
+          }
+        return offset;
+      }
+
+  }
+
+  /**
+   * Returns the number of Unicode code points in the specified range of the
+   * given CharSequence.  The first char in the range is at position
+   * beginIndex and the last one is at position endIndex - 1.  Paired 
+   * surrogates (supplementary characters are represented by a pair of chars - 
+   * one from the high surrogates and one from the low surrogates) 
+   * count as just one code point.
+   * @param seq the CharSequence to inspect
+   * @param beginIndex the beginning of the range
+   * @param endIndex the end of the range
+   * @return the number of Unicode code points in the given range of the 
+   * sequence
+   * @throws NullPointerException if seq is null
+   * @throws IndexOutOfBoundsException if beginIndex is negative, endIndex is
+   * larger than the length of seq, or if beginIndex is greater than endIndex.
+   * @since 1.5
+   */
+  public static int codePointCount(CharSequence seq, int beginIndex,
+                                   int endIndex)
+  {
+    int len = seq.length();
+    if (beginIndex < 0 || endIndex > len || beginIndex > endIndex)
+      throw new IndexOutOfBoundsException();
+        
+    int count = 0;
+    for (int i = beginIndex; i < endIndex; i++)
+      {
+        count++;
+        // If there is a pairing, count it only once.
+        if (isHighSurrogate(seq.charAt(i)) && (i + 1) < endIndex
+            && isLowSurrogate(seq.charAt(i + 1)))
+          i ++;
+      }    
+    return count;
+  }
+
+  /**
+   * Returns the number of Unicode code points in the specified range of the
+   * given char array.  The first char in the range is at position
+   * offset and the length of the range is count.  Paired surrogates
+   * (supplementary characters are represented by a pair of chars - 
+   * one from the high surrogates and one from the low surrogates) 
+   * count as just one code point.
+   * @param a the char array to inspect
+   * @param offset the beginning of the range
+   * @param count the length of the range
+   * @return the number of Unicode code points in the given range of the 
+   * array
+   * @throws NullPointerException if a is null
+   * @throws IndexOutOfBoundsException if offset or count is negative or if 
+   * offset + countendIndex is larger than the length of a.
+   * @since 1.5
+   */
+  public static int codePointCount(char[] a, int offset,
+                                   int count)
+  {
+    int len = a.length;
+    int end = offset + count;
+    if (offset < 0 || count < 0 || end > len)
+      throw new IndexOutOfBoundsException();
+        
+    int counter = 0;
+    for (int i = offset; i < end; i++)
+      {
+        counter++;
+        // If there is a pairing, count it only once.
+        if (isHighSurrogate(a[i]) && (i + 1) < end
+            && isLowSurrogate(a[i + 1]))
+          i ++;
+      }    
+    return counter;
+  }

  /**
   * Determines if a character is a Unicode letter or a Unicode digit. This
@ -3496,23 +3694,6 @@ public final class Character implements Serializable, Comparable
    return value - anotherCharacter.value;
  }

-  /**
-   * Compares an object to this Character.  Assuming the object is a
-   * Character object, this method performs the same comparison as
-   * compareTo(Character).
-   *
-   * @param o object to compare
-   * @return the comparison value
-   * @throws ClassCastException if o is not a Character object
-   * @throws NullPointerException if o is null
-   * @see #compareTo(Character)
-   * @since 1.2
-   */
-  public int compareTo(Object o)
-  {
-    return compareTo((Character) o);
-  }
-
  /**
   * Returns an <code>Character</code> object wrapping the value.
   * In contrast to the <code>Character</code> constructor, this method
@ -3520,7 +3701,7 @@ public final class Character implements Serializable, Comparable
   *
   * @param val the value to wrap
   * @return the <code>Character</code>
-   * 
+   *
   * @since 1.5
   */
  public static Character valueOf(char val)
@ -3529,9 +3710,9 @@ public final class Character implements Serializable, Comparable
      return new Character(val);
    synchronized (charCache)
      {
-    if (charCache[val - MIN_VALUE] == null)
-      charCache[val - MIN_VALUE] = new Character(val);
-    return charCache[val - MIN_VALUE];
+	if (charCache[val - MIN_VALUE] == null)
+	  charCache[val - MIN_VALUE] = new Character(val);
+	return charCache[val - MIN_VALUE];
      }
  }

@ -3559,6 +3740,9 @@ public final class Character implements Serializable, Comparable
   */
  public static char[] toChars(int codePoint)
  {
+    if (!isValidCodePoint(codePoint))
+      throw new IllegalArgumentException("Illegal Unicode code point : "
+                                         + codePoint);
    char[] result = new char[charCount(codePoint)];
    int ignore = toChars(codePoint, result, 0);
    return result;
@ -3776,7 +3960,7 @@ public final class Character implements Serializable, Comparable
   */
  public static int codePointAt(char[] chars, int index, int limit)
  {
-    if (index < 0 || index >= limit || limit < 0 || limit >= chars.length)
+    if (index < 0 || index >= limit || limit < 0 || limit > chars.length)
      throw new IndexOutOfBoundsException();
    char high = chars[index];
    if (! isHighSurrogate(high) || ++index >= limit)