Imported GNU Classpath 0.92

2006-08-14  Mark Wielaard  <mark@klomp.org>

       Imported GNU Classpath 0.92
       * HACKING: Add more importing hints. Update automake version
       requirement.

       * configure.ac (gconf-peer): New enable AC argument.
       Add --disable-gconf-peer and --enable-default-preferences-peer
       to classpath configure when gconf is disabled.
       * scripts/makemake.tcl: Set gnu/java/util/prefs/gconf and
       gnu/java/awt/dnd/peer/gtk to bc. Classify
       gnu/java/security/Configuration.java as generated source file.

       * gnu/java/lang/management/VMGarbageCollectorMXBeanImpl.java,
       gnu/java/lang/management/VMMemoryPoolMXBeanImpl.java,
       gnu/java/lang/management/VMClassLoadingMXBeanImpl.java,
       gnu/java/lang/management/VMRuntimeMXBeanImpl.java,
       gnu/java/lang/management/VMMemoryManagerMXBeanImpl.java,
       gnu/java/lang/management/VMThreadMXBeanImpl.java,
       gnu/java/lang/management/VMMemoryMXBeanImpl.java,
       gnu/java/lang/management/VMCompilationMXBeanImpl.java: New VM stub
       classes.
       * java/lang/management/VMManagementFactory.java: Likewise.
       * java/net/VMURLConnection.java: Likewise.
       * gnu/java/nio/VMChannel.java: Likewise.

       * java/lang/Thread.java (getState): Add stub implementation.
       * java/lang/Class.java (isEnum): Likewise.
       * java/lang/Class.h (isEnum): Likewise.

       * gnu/awt/xlib/XToolkit.java (getClasspathTextLayoutPeer): Removed.

       * javax/naming/spi/NamingManager.java: New override for StackWalker
       functionality.

       * configure, sources.am, Makefile.in, gcj/Makefile.in,
       include/Makefile.in, testsuite/Makefile.in: Regenerated.

From-SVN: r116139
This commit is contained in:
Mark Wielaard 2006-08-14 23:12:35 +00:00
parent abab460491
commit ac1ed908de
1294 changed files with 99479 additions and 35933 deletions

View file

@ -0,0 +1,112 @@
/* gnu/regexp/BacktrackStack.java
Copyright (C) 2006 Free Software Foundation, Inc.
This file is part of GNU Classpath.
GNU Classpath is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
GNU Classpath is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Classpath; see the file COPYING. If not, write to the
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA.
Linking this library statically or dynamically with other modules is
making a combined work based on this library. Thus, the terms and
conditions of the GNU General Public License cover the whole
combination.
As a special exception, the copyright holders of this library give you
permission to link this library with independent modules to produce an
executable, regardless of the license terms of these independent
modules, and to copy and distribute the resulting executable under
terms of your choice, provided that you also meet, for each linked
independent module, the terms and conditions of the license of that
module. An independent module is a module which is not derived from
or based on this library. If you modify this library, you may extend
this exception to your version of the library, but you are not
obligated to do so. If you do not wish to do so, delete this
exception statement from your version. */
package gnu.java.util.regex;
/**
* An instance of this class represents a stack
* used for backtracking.
*
* @author Ito Kazumitsu</A>
*/
final class BacktrackStack {
/** A set of data to be used for backtracking. */
static class Backtrack {
/** REToken to which to go back */
REToken token;
/** CharIndexed on which matches are being searched for. */
CharIndexed input;
/** REMatch to be used by the REToken token. */
REMatch match;
/** Some parameter used by the token's backtrack method. */
Object param;
Backtrack(REToken token, CharIndexed input, REMatch match, Object param) {
this.token = token;
this.input = input;
// REMatch may change before backtracking is needed. So we
// keep a clone of it.
this.match = (REMatch) match.clone();
this.param = param;
}
}
Backtrack[] stack;
private int size;
private int capacity;
private static final int INITIAL_CAPACITY = 32;
private static final int CAPACITY_INCREMENT = 16;
BacktrackStack() {
stack = new Backtrack[INITIAL_CAPACITY];
size = 0;
capacity = INITIAL_CAPACITY;
}
boolean empty() {
return size == 0;
}
Backtrack peek() {
return stack[size - 1];
}
Backtrack pop() {
Backtrack bt = stack[--size];
stack[size] = null;
return bt;
}
void clear() {
for (int i = 0; i < size; i++) {
stack[i] = null;
}
size = 0;
}
void push(Backtrack bt) {
if (size >= capacity) {
capacity += CAPACITY_INCREMENT;
Backtrack[] newStack = new Backtrack[capacity];
System.arraycopy(stack, 0, newStack, 0, size);
stack = newStack;
}
stack[size++] = bt;
}
}

View file

@ -0,0 +1,116 @@
/* gnu/regexp/CharIndexed.java
Copyright (C) 1998-2001, 2004, 2006 Free Software Foundation, Inc.
This file is part of GNU Classpath.
GNU Classpath is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
GNU Classpath is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Classpath; see the file COPYING. If not, write to the
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA.
Linking this library statically or dynamically with other modules is
making a combined work based on this library. Thus, the terms and
conditions of the GNU General Public License cover the whole
combination.
As a special exception, the copyright holders of this library give you
permission to link this library with independent modules to produce an
executable, regardless of the license terms of these independent
modules, and to copy and distribute the resulting executable under
terms of your choice, provided that you also meet, for each linked
independent module, the terms and conditions of the license of that
module. An independent module is a module which is not derived from
or based on this library. If you modify this library, you may extend
this exception to your version of the library, but you are not
obligated to do so. If you do not wish to do so, delete this
exception statement from your version. */
package gnu.java.util.regex;
/**
* Defines the interface used internally so that different types of source
* text can be accessed in the same way. Built-in concrete classes provide
* support for String, StringBuffer, InputStream and char[] types.
* A class that is CharIndexed supports the notion of a cursor within a
* block of text. The cursor must be able to be advanced via the move()
* method. The charAt() method returns the character at the cursor position
* plus a given offset.
*
* @author <A HREF="mailto:wes@cacas.org">Wes Biggs</A>
*/
public interface CharIndexed {
/**
* Defines a constant (0xFFFF was somewhat arbitrarily chosen)
* that can be returned by the charAt() function indicating that
* the specified index is out of range.
*/
char OUT_OF_BOUNDS = '\uFFFF';
/**
* Returns the character at the given offset past the current cursor
* position in the input. The index of the current position is zero.
* It is possible for this method to be called with a negative index.
* This happens when using the '^' operator in multiline matching mode
* or the '\b' or '\<' word boundary operators. In any case, the lower
* bound is currently fixed at -2 (for '^' with a two-character newline).
*
* @param index the offset position in the character field to examine
* @return the character at the specified index, or the OUT_OF_BOUNDS
* character defined by this interface.
*/
char charAt(int index);
/**
* Shifts the input buffer by a given number of positions. Returns
* true if the new cursor position is valid.
*/
boolean move(int index);
/**
* Returns true if the most recent move() operation placed the cursor
* position at a valid position in the input.
*/
boolean isValid();
/**
* Returns another CharIndexed containing length characters to the left
* of the given index. The given length is an expected maximum and
* the returned CharIndexed may not necessarily contain so many characters.
*/
CharIndexed lookBehind(int index, int length);
/**
* Returns the effective length of this CharIndexed
*/
int length();
/**
* Sets the REMatch last found on this input.
*/
void setLastMatch(REMatch match);
/**
* Returns the REMatch last found on this input.
*/
REMatch getLastMatch();
/**
* Returns the anchor.
*/
int getAnchor();
/**
* Sets the anchor.
*/
void setAnchor(int anchor);
}

View file

@ -0,0 +1,46 @@
/* gnu/regexp/CharIndexedCharArray.java
Copyright (C) 2006 Free Software Foundation, Inc.
This file is part of GNU Classpath.
GNU Classpath is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
GNU Classpath is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Classpath; see the file COPYING. If not, write to the
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA.
Linking this library statically or dynamically with other modules is
making a combined work based on this library. Thus, the terms and
conditions of the GNU General Public License cover the whole
combination.
As a special exception, the copyright holders of this library give you
permission to link this library with independent modules to produce an
executable, regardless of the license terms of these independent
modules, and to copy and distribute the resulting executable under
terms of your choice, provided that you also meet, for each linked
independent module, the terms and conditions of the license of that
module. An independent module is a module which is not derived from
or based on this library. If you modify this library, you may extend
this exception to your version of the library, but you are not
obligated to do so. If you do not wish to do so, delete this
exception statement from your version. */
package gnu.java.util.regex;
import java.nio.CharBuffer;
class CharIndexedCharArray extends CharIndexedCharSequence {
CharIndexedCharArray(char[] str, int index) {
super(CharBuffer.wrap(str), index);
}
}

View file

@ -0,0 +1,82 @@
/* gnu/regexp/CharIndexedCharSequence.java
Copyright (C) 2006 Free Software Foundation, Inc.
This file is part of GNU Classpath.
GNU Classpath is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
GNU Classpath is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Classpath; see the file COPYING. If not, write to the
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA.
Linking this library statically or dynamically with other modules is
making a combined work based on this library. Thus, the terms and
conditions of the GNU General Public License cover the whole
combination.
As a special exception, the copyright holders of this library give you
permission to link this library with independent modules to produce an
executable, regardless of the license terms of these independent
modules, and to copy and distribute the resulting executable under
terms of your choice, provided that you also meet, for each linked
independent module, the terms and conditions of the license of that
module. An independent module is a module which is not derived from
or based on this library. If you modify this library, you may extend
this exception to your version of the library, but you are not
obligated to do so. If you do not wish to do so, delete this
exception statement from your version. */
package gnu.java.util.regex;
import java.io.Serializable;
class CharIndexedCharSequence implements CharIndexed, Serializable {
private CharSequence s;
private int anchor;
private int len;
CharIndexedCharSequence(CharSequence s, int index) {
this.s = s;
len = s.length();
anchor = index;
}
public char charAt(int index) {
int pos = anchor + index;
return ((pos < len) && (pos >= 0)) ? s.charAt(pos) : OUT_OF_BOUNDS;
}
public boolean isValid() {
return (anchor < len);
}
public boolean move(int index) {
return ((anchor += index) < len);
}
public CharIndexed lookBehind(int index, int length) {
if (length > (anchor + index)) length = anchor + index;
return new CharIndexedCharSequence(s, anchor + index - length);
}
public int length() {
return len - anchor;
}
private REMatch lastMatch;
public void setLastMatch(REMatch match) {
lastMatch = (REMatch)match.clone();
lastMatch.anchor = anchor;
}
public REMatch getLastMatch() { return lastMatch; }
public int getAnchor() { return anchor; }
public void setAnchor(int anchor) { this.anchor = anchor; }
}

View file

@ -0,0 +1,181 @@
/* gnu/regexp/CharIndexedInputStream.java
Copyright (C) 1998-2001, 2004, 2006 Free Software Foundation, Inc.
This file is part of GNU Classpath.
GNU Classpath is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
GNU Classpath is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Classpath; see the file COPYING. If not, write to the
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA.
Linking this library statically or dynamically with other modules is
making a combined work based on this library. Thus, the terms and
conditions of the GNU General Public License cover the whole
combination.
As a special exception, the copyright holders of this library give you
permission to link this library with independent modules to produce an
executable, regardless of the license terms of these independent
modules, and to copy and distribute the resulting executable under
terms of your choice, provided that you also meet, for each linked
independent module, the terms and conditions of the license of that
module. An independent module is a module which is not derived from
or based on this library. If you modify this library, you may extend
this exception to your version of the library, but you are not
obligated to do so. If you do not wish to do so, delete this
exception statement from your version. */
package gnu.java.util.regex;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
// TODO: move(x) shouldn't rely on calling next() x times
class CharIndexedInputStream implements CharIndexed {
private static final int BUFFER_INCREMENT = 1024;
private static final int UNKNOWN = Integer.MAX_VALUE; // value for end
private BufferedInputStream br;
// so that we don't try to reset() right away
private int index = -1;
private int bufsize = BUFFER_INCREMENT;
private int end = UNKNOWN;
private char cached = OUT_OF_BOUNDS;
// Big enough for a \r\n pair
// lookBehind[0] = most recent
// lookBehind[1] = second most recent
private char[] lookBehind = new char[] { OUT_OF_BOUNDS, OUT_OF_BOUNDS };
CharIndexedInputStream(InputStream str, int index) {
if (str instanceof BufferedInputStream) br = (BufferedInputStream) str;
else br = new BufferedInputStream(str,BUFFER_INCREMENT);
next();
if (index > 0) move(index);
}
private boolean next() {
if (end == 1) return false;
end--; // closer to end
try {
if (index != -1) {
br.reset();
}
int i = br.read();
br.mark(bufsize);
if (i == -1) {
end = 1;
cached = OUT_OF_BOUNDS;
return false;
}
cached = (char) i;
index = 1;
} catch (IOException e) {
e.printStackTrace();
cached = OUT_OF_BOUNDS;
return false;
}
return true;
}
public char charAt(int index) {
if (index == 0) {
return cached;
} else if (index >= end) {
return OUT_OF_BOUNDS;
} else if (index == -1) {
return lookBehind[0];
} else if (index == -2) {
return lookBehind[1];
} else if (index < -2) {
return OUT_OF_BOUNDS;
} else if (index >= bufsize) {
// Allocate more space in the buffer.
try {
while (bufsize <= index) bufsize += BUFFER_INCREMENT;
br.reset();
br.mark(bufsize);
br.skip(index-1);
} catch (IOException e) { }
} else if (this.index != index) {
try {
br.reset();
br.skip(index-1);
} catch (IOException e) { }
}
char ch = OUT_OF_BOUNDS;
try {
int i = br.read();
this.index = index+1; // this.index is index of next pos relative to charAt(0)
if (i == -1) {
// set flag that next should fail next time?
end = index;
return ch;
}
ch = (char) i;
} catch (IOException ie) { }
return ch;
}
public boolean move(int index) {
// move read position [index] clicks from 'charAt(0)'
boolean retval = true;
while (retval && (index-- > 0)) retval = next();
return retval;
}
public boolean isValid() {
return (cached != OUT_OF_BOUNDS);
}
public CharIndexed lookBehind(int index, int length) {
throw new UnsupportedOperationException(
"difficult to look behind for an input stream");
}
public int length() {
throw new UnsupportedOperationException(
"difficult to tell the length for an input stream");
}
public void setLastMatch(REMatch match) {
throw new UnsupportedOperationException(
"difficult to support setLastMatch for an input stream");
}
public REMatch getLastMatch() {
throw new UnsupportedOperationException(
"difficult to support getLastMatch for an input stream");
}
public int getAnchor() {
throw new UnsupportedOperationException(
"difficult to support getAnchor for an input stream");
}
public void setAnchor(int anchor) {
throw new UnsupportedOperationException(
"difficult to support setAnchor for an input stream");
}
}

View file

@ -0,0 +1,44 @@
/* gnu/regexp/CharIndexedString.java
Copyright (C) 2006 Free Software Foundation, Inc.
This file is part of GNU Classpath.
GNU Classpath is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
GNU Classpath is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Classpath; see the file COPYING. If not, write to the
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA.
Linking this library statically or dynamically with other modules is
making a combined work based on this library. Thus, the terms and
conditions of the GNU General Public License cover the whole
combination.
As a special exception, the copyright holders of this library give you
permission to link this library with independent modules to produce an
executable, regardless of the license terms of these independent
modules, and to copy and distribute the resulting executable under
terms of your choice, provided that you also meet, for each linked
independent module, the terms and conditions of the license of that
module. An independent module is a module which is not derived from
or based on this library. If you modify this library, you may extend
this exception to your version of the library, but you are not
obligated to do so. If you do not wish to do so, delete this
exception statement from your version. */
package gnu.java.util.regex;
class CharIndexedString extends CharIndexedCharSequence {
CharIndexedString(String str, int index) {
super(str, index);
}
}

View file

@ -0,0 +1,45 @@
/* gnu/regexp/CharIndexedStringBuffer.java
Copyright (C) 2006 Free Software Foundation, Inc.
This file is part of GNU Classpath.
GNU Classpath is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
GNU Classpath is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Classpath; see the file COPYING. If not, write to the
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA.
Linking this library statically or dynamically with other modules is
making a combined work based on this library. Thus, the terms and
conditions of the GNU General Public License cover the whole
combination.
As a special exception, the copyright holders of this library give you
permission to link this library with independent modules to produce an
executable, regardless of the license terms of these independent
modules, and to copy and distribute the resulting executable under
terms of your choice, provided that you also meet, for each linked
independent module, the terms and conditions of the license of that
module. An independent module is a module which is not derived from
or based on this library. If you modify this library, you may extend
this exception to your version of the library, but you are not
obligated to do so. If you do not wish to do so, delete this
exception statement from your version. */
package gnu.java.util.regex;
class CharIndexedStringBuffer extends CharIndexedCharSequence {
CharIndexedStringBuffer(StringBuffer str, int index) {
super(str, index);
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,182 @@
/* gnu/regexp/REException.java
Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
This file is part of GNU Classpath.
GNU Classpath is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
GNU Classpath is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Classpath; see the file COPYING. If not, write to the
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA.
Linking this library statically or dynamically with other modules is
making a combined work based on this library. Thus, the terms and
conditions of the GNU General Public License cover the whole
combination.
As a special exception, the copyright holders of this library give you
permission to link this library with independent modules to produce an
executable, regardless of the license terms of these independent
modules, and to copy and distribute the resulting executable under
terms of your choice, provided that you also meet, for each linked
independent module, the terms and conditions of the license of that
module. An independent module is a module which is not derived from
or based on this library. If you modify this library, you may extend
this exception to your version of the library, but you are not
obligated to do so. If you do not wish to do so, delete this
exception statement from your version. */
package gnu.java.util.regex;
import java.text.MessageFormat;
/**
* This is the regular expression exception class. An exception of this type
* defines the three attributes:
* <OL>
* <LI> A descriptive message of the error.
* <LI> An integral type code equivalent to one of the statically
* defined symbols listed below.
* <LI> The approximate position in the input string where the error
* occurred.
* </OL>
*
* @author <A HREF="mailto:wes@cacas.org">Wes Biggs</A>
*/
public class REException extends Exception {
private int type;
private int pos;
// Error conditions from GNU regcomp(3) manual
/**
* Error flag.
* Invalid use of repetition operators such as using
* `*' as the first character.
*/
public static final int REG_BADRPT = 1;
/**
* Error flag.
* Invalid use of back reference operator.
*/
public static final int REG_BADBR = 2;
/**
* Error flag.
* Un-matched brace interval operators.
*/
public static final int REG_EBRACE = 3;
/**
* Error flag.
* Un-matched bracket list operators.
*/
public static final int REG_EBRACK = 4;
/**
* Error flag.
* Invalid use of the range operator, eg. the ending
* point of the range occurs prior to the starting
* point.
*/
public static final int REG_ERANGE = 5;
/**
* Error flag.
* Unknown character class name. <B>Not implemented</B>.
*/
public static final int REG_ECTYPE = 6;
/**
* Error flag.
* Un-matched parenthesis group operators.
*/
public static final int REG_EPAREN = 7;
/**
* Error flag.
* Invalid back reference to a subexpression.
*/
public static final int REG_ESUBREG = 8;
/**
* Error flag.
* Non specific error. <B>Not implemented</B>.
*/
public static final int REG_EEND = 9;
/**
* Error flag.
* Invalid escape sequence. <B>Not implemented</B>.
*/
public static final int REG_ESCAPE = 10;
/**
* Error flag.
* Invalid use of pattern operators such as group or list.
*/
public static final int REG_BADPAT = 11;
/**
* Error flag.
* Compiled regular expression requires a pattern
* buffer larger than 64Kb. <B>Not implemented</B>.
*/
public static final int REG_ESIZE = 12;
/**
* Error flag.
* The regex routines ran out of memory. <B>Not implemented</B>.
*/
public static final int REG_ESPACE = 13;
REException(String msg, int type, int position) {
super(msg);
this.type = type;
this.pos = position;
}
/**
* Returns the type of the exception, one of the constants listed above.
*/
public int getType() {
return type;
}
/**
* Returns the position, relative to the string or character array being
* compiled, where the error occurred. This position is generally the point
* where the error was detected, not necessarily the starting index of
* a bad subexpression.
*/
public int getPosition() {
return pos;
}
/**
* Reports the descriptive message associated with this exception
* as well as its index position in the string or character array
* being compiled.
*/
public String getMessage() {
Object[] args = {new Integer(pos)};
StringBuffer sb = new StringBuffer();
String prefix = RE.getLocalizedMessage("error.prefix");
sb.append(MessageFormat.format(prefix, args));
sb.append('\n');
sb.append(super.getMessage());
return sb.toString();
}
}

View file

@ -0,0 +1,140 @@
/* gnu/regexp/REFilterInputStream.java
Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
This file is part of GNU Classpath.
GNU Classpath is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
GNU Classpath is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Classpath; see the file COPYING. If not, write to the
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA.
Linking this library statically or dynamically with other modules is
making a combined work based on this library. Thus, the terms and
conditions of the GNU General Public License cover the whole
combination.
As a special exception, the copyright holders of this library give you
permission to link this library with independent modules to produce an
executable, regardless of the license terms of these independent
modules, and to copy and distribute the resulting executable under
terms of your choice, provided that you also meet, for each linked
independent module, the terms and conditions of the license of that
module. An independent module is a module which is not derived from
or based on this library. If you modify this library, you may extend
this exception to your version of the library, but you are not
obligated to do so. If you do not wish to do so, delete this
exception statement from your version. */
package gnu.java.util.regex;
import java.io.FilterInputStream;
import java.io.InputStream;
/**
* Replaces instances of a given RE found within an InputStream
* with replacement text. The replacements are interpolated into the
* stream when a match is found.
*
* @author <A HREF="mailto:wes@cacas.org">Wes Biggs</A>
* @deprecated This class cannot properly handle all character
* encodings. For proper handling, use the REFilterReader
* class instead.
*/
public class REFilterInputStream extends FilterInputStream {
private RE expr;
private String replace;
private String buffer;
private int bufpos;
private int offset;
private CharIndexedInputStream stream;
/**
* Creates an REFilterInputStream. When reading from this stream,
* occurrences of patterns matching the supplied regular expression
* will be replaced with the supplied replacement text (the
* metacharacters $0 through $9 may be used to refer to the full
* match or subexpression matches).
*
* @param stream The InputStream to be filtered.
* @param expr The regular expression to search for.
* @param replace The text pattern to replace matches with.
*/
public REFilterInputStream(InputStream stream, RE expr, String replace) {
super(stream);
this.stream = new CharIndexedInputStream(stream,0);
this.expr = expr;
this.replace = replace;
}
/**
* Reads the next byte from the stream per the general contract of
* InputStream.read(). Returns -1 on error or end of stream.
*/
public int read() {
// If we have buffered replace data, use it.
if ((buffer != null) && (bufpos < buffer.length())) {
return (int) buffer.charAt(bufpos++);
}
// check if input is at a valid position
if (!stream.isValid()) return -1;
REMatch mymatch = new REMatch(expr.getNumSubs(),offset,0);
if (expr.match(stream, mymatch)) {
mymatch.end[0] = mymatch.index;
mymatch.finish(stream);
stream.move(mymatch.toString().length());
offset += mymatch.toString().length();
buffer = mymatch.substituteInto(replace);
bufpos = 1;
// This is prone to infinite loops if replace string turns out empty.
if (buffer.length() > 0) {
return buffer.charAt(0);
}
}
char ch = stream.charAt(0);
if (ch == CharIndexed.OUT_OF_BOUNDS) return -1;
stream.move(1);
offset++;
return ch;
}
/**
* Returns false. REFilterInputStream does not support mark() and
* reset() methods.
*/
public boolean markSupported() {
return false;
}
/** Reads from the stream into the provided array. */
public int read(byte[] b, int off, int len) {
int i;
int ok = 0;
while (len-- > 0) {
i = read();
if (i == -1) return (ok == 0) ? -1 : ok;
b[off++] = (byte) i;
ok++;
}
return ok;
}
/** Reads from the stream into the provided array. */
public int read(byte[] b) {
return read(b,0,b.length);
}
}

View file

@ -0,0 +1,324 @@
/* gnu/regexp/REMatch.java
Copyright (C) 2006 Free Software Foundation, Inc.
This file is part of GNU Classpath.
GNU Classpath is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
GNU Classpath is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Classpath; see the file COPYING. If not, write to the
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA.
Linking this library statically or dynamically with other modules is
making a combined work based on this library. Thus, the terms and
conditions of the GNU General Public License cover the whole
combination.
As a special exception, the copyright holders of this library give you
permission to link this library with independent modules to produce an
executable, regardless of the license terms of these independent
modules, and to copy and distribute the resulting executable under
terms of your choice, provided that you also meet, for each linked
independent module, the terms and conditions of the license of that
module. An independent module is a module which is not derived from
or based on this library. If you modify this library, you may extend
this exception to your version of the library, but you are not
obligated to do so. If you do not wish to do so, delete this
exception statement from your version. */
package gnu.java.util.regex;
import java.io.Serializable;
/**
* An instance of this class represents a match
* completed by a gnu.regexp matching function. It can be used
* to obtain relevant information about the location of a match
* or submatch.
*
* @author <A HREF="mailto:wes@cacas.org">Wes Biggs</A>
*/
public final class REMatch implements Serializable, Cloneable {
private String matchedText;
private CharIndexed matchedCharIndexed;
// These variables are package scope for fast access within the engine
int eflags; // execution flags this match was made using
// Offset in source text where match was tried. This is zero-based;
// the actual position in the source text is given by (offset + anchor).
int offset;
// Anchor position refers to the index into the source input
// at which the matching operation began.
// This is also useful for the ANCHORINDEX option.
int anchor;
// Package scope; used by RE.
int index; // used while matching to mark current match position in input
// start1[i] is set when the i-th subexp starts. And start1[i] is copied
// to start[i] when the i-th subexp ends. So start[i] keeps the previously
// assigned value while the i-th subexp is being processed. This makes
// backreference to the i-th subexp within the i-th subexp possible.
int[] start; // start positions (relative to offset) for each (sub)exp.
int[] start1; // start positions (relative to offset) for each (sub)exp.
int[] end; // end positions for the same
// start[i] == -1 or end[i] == -1 means that the start/end position is void.
// start[i] == p or end[i] == p where p < 0 and p != -1 means that
// the actual start/end position is (p+1). Start/end positions may
// become negative when the subexpression is in a RETokenLookBehind.
boolean empty; // empty string matched. This flag is used only within
// RETokenRepeated.
BacktrackStack backtrackStack;
public Object clone() {
try {
REMatch copy = (REMatch) super.clone();
copy.start = (int[]) start.clone();
copy.start1 = (int[]) start1.clone();
copy.end = (int[]) end.clone();
return copy;
} catch (CloneNotSupportedException e) {
throw new Error(); // doesn't happen
}
}
void assignFrom(REMatch other) {
start = other.start;
start1 = other.start1;
end = other.end;
index = other.index;
backtrackStack = other.backtrackStack;
}
REMatch(int subs, int anchor, int eflags) {
start = new int[subs+1];
start1 = new int[subs+1];
end = new int[subs+1];
this.anchor = anchor;
this.eflags = eflags;
clear(anchor);
}
void finish(CharIndexed text) {
start[0] = 0;
StringBuffer sb = new StringBuffer();
int i;
for (i = 0; i < end[0]; i++)
sb.append(text.charAt(i));
matchedText = sb.toString();
matchedCharIndexed = text;
for (i = 0; i < start.length; i++) {
// If any subexpressions didn't terminate, they don't count
// TODO check if this code ever gets hit
if ((start[i] == -1) ^ (end[i] == -1)) {
start[i] = -1;
end[i] = -1;
}
}
backtrackStack = null;
}
/** Clears the current match and moves the offset to the new index. */
void clear(int index) {
offset = index;
this.index = 0;
for (int i = 0; i < start.length; i++) {
start[i] = start1[i] = end[i] = -1;
}
backtrackStack = null;
}
/**
* Returns the string matching the pattern. This makes it convenient
* to write code like the following:
* <P>
* <code>
* REMatch myMatch = myExpression.getMatch(myString);<br>
* if (myMatch != null) System.out.println("Regexp found: "+myMatch);
* </code>
*/
public String toString() {
return matchedText;
}
/**
* Returns the index within the input text where the match in its entirety
* began.
*/
public int getStartIndex() {
return offset + start[0];
}
/**
* Returns the index within the input string where the match in
* its entirety ends. The return value is the next position after
* the end of the string; therefore, a match created by the
* following call:
*
* <P>
* <code>REMatch myMatch = myExpression.getMatch(myString);</code>
* <P>
* can be viewed (given that myMatch is not null) by creating
* <P>
* <code>String theMatch = myString.substring(myMatch.getStartIndex(),
* myMatch.getEndIndex());</code>
* <P>
* But you can save yourself that work, since the <code>toString()</code>
* method (above) does exactly that for you.
*/
public int getEndIndex() {
return offset + end[0];
}
/**
* Returns the string matching the given subexpression. The subexpressions
* are indexed starting with one, not zero. That is, the subexpression
* identified by the first set of parentheses in a regular expression
* could be retrieved from an REMatch by calling match.toString(1).
*
* @param sub Index of the subexpression.
*/
public String toString(int sub) {
if ((sub >= start.length) || sub < 0)
throw new IndexOutOfBoundsException("No group " + sub);
if (start[sub] == -1) return null;
if (start[sub] >= 0 && end[sub] <= matchedText.length())
return (matchedText.substring(start[sub],end[sub]));
else {
// This case occurs with RETokenLookAhead or RETokenLookBehind.
StringBuffer sb = new StringBuffer();
int s = start[sub];
int e = end[sub];
if (s < 0) s += 1;
if (e < 0) e += 1;
for (int i = start[0] + s; i < start[0] + e; i++)
sb.append(matchedCharIndexed.charAt(i));
return sb.toString();
}
}
/**
* Returns the index within the input string used to generate this match
* where subexpression number <i>sub</i> begins, or <code>-1</code> if
* the subexpression does not exist. The initial position is zero.
*
* @param sub Subexpression index
* @deprecated Use getStartIndex(int) instead.
*/
public int getSubStartIndex(int sub) {
if (sub >= start.length) return -1;
int x = start[sub];
return (x == -1) ? x :
(x >= 0) ? offset + x : offset + x + 1;
}
/**
* Returns the index within the input string used to generate this match
* where subexpression number <i>sub</i> begins, or <code>-1</code> if
* the subexpression does not exist. The initial position is zero.
*
* @param sub Subexpression index
* @since gnu.regexp 1.1.0
*/
public int getStartIndex(int sub) {
if (sub >= start.length) return -1;
int x = start[sub];
return (x == -1) ? x :
(x >= 0) ? offset + x : offset + x + 1;
}
/**
* Returns the index within the input string used to generate this match
* where subexpression number <i>sub</i> ends, or <code>-1</code> if
* the subexpression does not exist. The initial position is zero.
*
* @param sub Subexpression index
* @deprecated Use getEndIndex(int) instead
*/
public int getSubEndIndex(int sub) {
if (sub >= start.length) return -1;
int x = end[sub];
return (x == -1) ? x :
(x >= 0) ? offset + x : offset + x + 1;
}
/**
* Returns the index within the input string used to generate this match
* where subexpression number <i>sub</i> ends, or <code>-1</code> if
* the subexpression does not exist. The initial position is zero.
*
* @param sub Subexpression index
*/
public int getEndIndex(int sub) {
if (sub >= start.length) return -1;
int x = end[sub];
return (x == -1) ? x :
(x >= 0) ? offset + x : offset + x + 1;
}
/**
* Substitute the results of this match to create a new string.
* This is patterned after PERL, so the tokens to watch out for are
* <code>$0</code> through <code>$9</code>. <code>$0</code> matches
* the full substring matched; <code>$<i>n</i></code> matches
* subexpression number <i>n</i>.
* <code>$10, $11, ...</code> may match the 10th, 11th, ... subexpressions
* if such subexpressions exist.
*
* @param input A string consisting of literals and <code>$<i>n</i></code> tokens.
*/
public String substituteInto(String input) {
// a la Perl, $0 is whole thing, $1 - $9 are subexpressions
StringBuffer output = new StringBuffer();
int pos;
for (pos = 0; pos < input.length()-1; pos++) {
if ((input.charAt(pos) == '$') && (Character.isDigit(input.charAt(pos+1)))) {
int val = Character.digit(input.charAt(++pos),10);
int pos1 = pos + 1;
while (pos1 < input.length() &&
Character.isDigit(input.charAt(pos1))) {
int val1 = val*10 + Character.digit(input.charAt(pos1),10);
if (val1 >= start.length) break;
pos1++;
val = val1;
}
pos = pos1 - 1;
if (val < start.length) {
output.append(toString(val));
}
} else output.append(input.charAt(pos));
}
if (pos < input.length()) output.append(input.charAt(pos));
return output.toString();
}
/* The following are used for debugging purpose
static String d(REMatch m) {
if (m == null) return "null";
else return "[" + m.index + "]";
}
String substringUptoIndex(CharIndexed input) {
StringBuffer sb = new StringBuffer();
for (int i = 0; i < index; i++) {
sb.append(input.charAt(i));
}
return sb.toString();
}
*/
}

View file

@ -0,0 +1,135 @@
/* gnu/regexp/REMatchEnumeration.java
Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
This file is part of GNU Classpath.
GNU Classpath is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
GNU Classpath is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Classpath; see the file COPYING. If not, write to the
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA.
Linking this library statically or dynamically with other modules is
making a combined work based on this library. Thus, the terms and
conditions of the GNU General Public License cover the whole
combination.
As a special exception, the copyright holders of this library give you
permission to link this library with independent modules to produce an
executable, regardless of the license terms of these independent
modules, and to copy and distribute the resulting executable under
terms of your choice, provided that you also meet, for each linked
independent module, the terms and conditions of the license of that
module. An independent module is a module which is not derived from
or based on this library. If you modify this library, you may extend
this exception to your version of the library, but you are not
obligated to do so. If you do not wish to do so, delete this
exception statement from your version. */
package gnu.java.util.regex;
import java.io.Serializable;
import java.util.Enumeration;
import java.util.NoSuchElementException;
/**
* An REMatchEnumeration enumerates regular expression matches over a
* given input text. You obtain a reference to an enumeration using
* the <code>getMatchEnumeration()</code> methods on an instance of
* RE.
*
* <P>
*
* REMatchEnumeration does lazy computation; that is, it will not
* search for a match until it needs to. If you'd rather just get all
* the matches at once in a big array, use the
* <code>getAllMatches()</code> methods on RE. However, using an
* enumeration can help speed performance when the entire text does
* not need to be searched immediately.
*
* <P>
*
* The enumerated type is especially useful when searching on a Reader
* or InputStream, because the InputStream read position cannot be
* guaranteed after calling <code>getMatch()</code> (see the
* description of that method for an explanation of why). Enumeration
* also saves a lot of overhead required when calling
* <code>getMatch()</code> multiple times.
*
* @author <A HREF="mailto:wes@cacas.org">Wes Biggs</A>
*/
public class REMatchEnumeration implements Enumeration, Serializable {
private static final int YES = 1;
private static final int MAYBE = 0;
private static final int NO = -1;
private int more;
private REMatch match;
private RE expr;
private CharIndexed input;
private int eflags;
private int index;
// Package scope constructor is used by RE.getMatchEnumeration()
REMatchEnumeration(RE expr, CharIndexed input, int index, int eflags) {
more = MAYBE;
this.expr = expr;
this.input = input;
this.index = index;
this.eflags = eflags;
}
/** Returns true if there are more matches in the input text. */
public boolean hasMoreElements() {
return hasMoreMatches(null);
}
/** Returns true if there are more matches in the input text. */
public boolean hasMoreMatches() {
return hasMoreMatches(null);
}
/** Returns true if there are more matches in the input text.
* Saves the text leading up to the match (or to the end of the input)
* in the specified buffer.
*/
public boolean hasMoreMatches(StringBuffer buffer) {
if (more == MAYBE) {
match = expr.getMatchImpl(input,index,eflags,buffer);
if (match != null) {
input.move((match.end[0] > 0) ? match.end[0] : 1);
index = (match.end[0] > 0) ? match.end[0] + match.offset : index + 1;
more = YES;
} else more = NO;
}
return (more == YES);
}
/** Returns the next match in the input text. */
public Object nextElement() throws NoSuchElementException {
return nextMatch();
}
/**
* Returns the next match in the input text. This method is provided
* for convenience to avoid having to explicitly cast the return value
* to class REMatch.
*/
public REMatch nextMatch() throws NoSuchElementException {
if (hasMoreElements()) {
more = (input.isValid()) ? MAYBE : NO;
return match;
}
throw new NoSuchElementException();
}
}

View file

@ -0,0 +1,563 @@
/* gnu/regexp/RESyntax.java
Copyright (C) 2006 Free Software Foundation, Inc.
This file is part of GNU Classpath.
GNU Classpath is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
GNU Classpath is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Classpath; see the file COPYING. If not, write to the
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA.
Linking this library statically or dynamically with other modules is
making a combined work based on this library. Thus, the terms and
conditions of the GNU General Public License cover the whole
combination.
As a special exception, the copyright holders of this library give you
permission to link this library with independent modules to produce an
executable, regardless of the license terms of these independent
modules, and to copy and distribute the resulting executable under
terms of your choice, provided that you also meet, for each linked
independent module, the terms and conditions of the license of that
module. An independent module is a module which is not derived from
or based on this library. If you modify this library, you may extend
this exception to your version of the library, but you are not
obligated to do so. If you do not wish to do so, delete this
exception statement from your version. */
package gnu.java.util.regex;
import java.io.Serializable;
import java.util.BitSet;
/**
* An RESyntax specifies the way a regular expression will be compiled.
* This class provides a number of predefined useful constants for
* emulating popular regular expression syntaxes. Additionally the
* user may construct his or her own syntax, using any combination of the
* syntax bit constants. The syntax is an optional argument to any of the
* matching methods on class RE.
*
* @author <A HREF="mailto:wes@cacas.org">Wes Biggs</A>
*/
public final class RESyntax implements Serializable {
static final String DEFAULT_LINE_SEPARATOR = System.getProperty("line.separator");
private static final String SYNTAX_IS_FINAL = RE.getLocalizedMessage("syntax.final");
private BitSet bits;
// true for the constant defined syntaxes
private boolean isFinal = false;
private String lineSeparator = DEFAULT_LINE_SEPARATOR;
// Values for constants are bit indexes
/**
* Syntax bit. Backslash is an escape character in lists.
*/
public static final int RE_BACKSLASH_ESCAPE_IN_LISTS = 0;
/**
* Syntax bit. Use \? instead of ? and \+ instead of +.
*/
public static final int RE_BK_PLUS_QM = 1;
/**
* Syntax bit. POSIX character classes ([:...:]) in lists are allowed.
*/
public static final int RE_CHAR_CLASSES = 2;
/**
* Syntax bit. ^ and $ are special everywhere.
* <B>Not implemented.</B>
*/
public static final int RE_CONTEXT_INDEP_ANCHORS = 3;
/**
* Syntax bit. Repetition operators are only special in valid positions.
* <B>Not implemented.</B>
*/
public static final int RE_CONTEXT_INDEP_OPS = 4;
/**
* Syntax bit. Repetition and alternation operators are invalid
* at start and end of pattern and other places.
* <B>Not implemented</B>.
*/
public static final int RE_CONTEXT_INVALID_OPS = 5;
/**
* Syntax bit. Match-any-character operator (.) matches a newline.
*/
public static final int RE_DOT_NEWLINE = 6;
/**
* Syntax bit. Match-any-character operator (.) does not match a null.
*/
public static final int RE_DOT_NOT_NULL = 7;
/**
* Syntax bit. Intervals ({x}, {x,}, {x,y}) are allowed.
*/
public static final int RE_INTERVALS = 8;
/**
* Syntax bit. No alternation (|), match one-or-more (+), or
* match zero-or-one (?) operators.
*/
public static final int RE_LIMITED_OPS = 9;
/**
* Syntax bit. Newline is an alternation operator.
*/
public static final int RE_NEWLINE_ALT = 10; // impl.
/**
* Syntax bit. Intervals use { } instead of \{ \}
*/
public static final int RE_NO_BK_BRACES = 11;
/**
* Syntax bit. Grouping uses ( ) instead of \( \).
*/
public static final int RE_NO_BK_PARENS = 12;
/**
* Syntax bit. Backreferences not allowed.
*/
public static final int RE_NO_BK_REFS = 13;
/**
* Syntax bit. Alternation uses | instead of \|
*/
public static final int RE_NO_BK_VBAR = 14;
/**
* Syntax bit. <B>Not implemented</B>.
*/
public static final int RE_NO_EMPTY_RANGES = 15;
/**
* Syntax bit. An unmatched right parenthesis (')' or '\)', depending
* on RE_NO_BK_PARENS) will throw an exception when compiling.
*/
public static final int RE_UNMATCHED_RIGHT_PAREN_ORD = 16;
/**
* Syntax bit. <B>Not implemented.</B>
*/
public static final int RE_HAT_LISTS_NOT_NEWLINE = 17;
/**
* Syntax bit. Stingy matching is allowed (+?, *?, ??, {x,y}?).
*/
public static final int RE_STINGY_OPS = 18;
/**
* Syntax bit. Allow character class escapes (\d, \D, \s, \S, \w, \W).
*/
public static final int RE_CHAR_CLASS_ESCAPES = 19;
/**
* Syntax bit. Allow use of (?:xxx) grouping (subexpression is not saved).
*/
public static final int RE_PURE_GROUPING = 20;
/**
* Syntax bit. Allow use of (?=xxx) and (?!xxx) apply the subexpression
* to the text following the current position without consuming that text.
*/
public static final int RE_LOOKAHEAD = 21;
/**
* Syntax bit. Allow beginning- and end-of-string anchors (\A, \Z).
*/
public static final int RE_STRING_ANCHORS = 22;
/**
* Syntax bit. Allow embedded comments, (?#comment), as in Perl5.
*/
public static final int RE_COMMENTS = 23;
/**
* Syntax bit. Allow character class escapes within lists, as in Perl5.
*/
public static final int RE_CHAR_CLASS_ESC_IN_LISTS = 24;
/**
* Syntax bit. Possessive matching is allowed (++, *+, ?+, {x,y}+).
*/
public static final int RE_POSSESSIVE_OPS = 25;
/**
* Syntax bit. Allow embedded flags, (?is-x), as in Perl5.
*/
public static final int RE_EMBEDDED_FLAGS = 26;
/**
* Syntax bit. Allow octal char (\0377), as in Perl5.
*/
public static final int RE_OCTAL_CHAR = 27;
/**
* Syntax bit. Allow hex char (\x1b), as in Perl5.
*/
public static final int RE_HEX_CHAR = 28;
/**
* Syntax bit. Allow Unicode char (\u1234), as in Java 1.4.
*/
public static final int RE_UNICODE_CHAR = 29;
/**
* Syntax bit. Allow named property (\p{P}, \P{p}), as in Perl5.
*/
public static final int RE_NAMED_PROPERTY = 30;
/**
* Syntax bit. Allow nested characterclass ([a-z&&[^p-r]]), as in Java 1.4.
*/
public static final int RE_NESTED_CHARCLASS = 31;
private static final int BIT_TOTAL = 32;
/**
* Predefined syntax.
* Emulates regular expression support in the awk utility.
*/
public static final RESyntax RE_SYNTAX_AWK;
/**
* Predefined syntax.
* Emulates regular expression support in the ed utility.
*/
public static final RESyntax RE_SYNTAX_ED;
/**
* Predefined syntax.
* Emulates regular expression support in the egrep utility.
*/
public static final RESyntax RE_SYNTAX_EGREP;
/**
* Predefined syntax.
* Emulates regular expression support in the GNU Emacs editor.
*/
public static final RESyntax RE_SYNTAX_EMACS;
/**
* Predefined syntax.
* Emulates regular expression support in the grep utility.
*/
public static final RESyntax RE_SYNTAX_GREP;
/**
* Predefined syntax.
* Emulates regular expression support in the POSIX awk specification.
*/
public static final RESyntax RE_SYNTAX_POSIX_AWK;
/**
* Predefined syntax.
* Emulates POSIX basic regular expression support.
*/
public static final RESyntax RE_SYNTAX_POSIX_BASIC;
/**
* Predefined syntax.
* Emulates regular expression support in the POSIX egrep specification.
*/
public static final RESyntax RE_SYNTAX_POSIX_EGREP;
/**
* Predefined syntax.
* Emulates POSIX extended regular expression support.
*/
public static final RESyntax RE_SYNTAX_POSIX_EXTENDED;
/**
* Predefined syntax.
* Emulates POSIX basic minimal regular expressions.
*/
public static final RESyntax RE_SYNTAX_POSIX_MINIMAL_BASIC;
/**
* Predefined syntax.
* Emulates POSIX extended minimal regular expressions.
*/
public static final RESyntax RE_SYNTAX_POSIX_MINIMAL_EXTENDED;
/**
* Predefined syntax.
* Emulates regular expression support in the sed utility.
*/
public static final RESyntax RE_SYNTAX_SED;
/**
* Predefined syntax.
* Emulates regular expression support in Larry Wall's perl, version 4,
*/
public static final RESyntax RE_SYNTAX_PERL4;
/**
* Predefined syntax.
* Emulates regular expression support in Larry Wall's perl, version 4,
* using single line mode (/s modifier).
*/
public static final RESyntax RE_SYNTAX_PERL4_S; // single line mode (/s)
/**
* Predefined syntax.
* Emulates regular expression support in Larry Wall's perl, version 5.
*/
public static final RESyntax RE_SYNTAX_PERL5;
/**
* Predefined syntax.
* Emulates regular expression support in Larry Wall's perl, version 5,
* using single line mode (/s modifier).
*/
public static final RESyntax RE_SYNTAX_PERL5_S;
/**
* Predefined syntax.
* Emulates regular expression support in Java 1.4's java.util.regex
* package.
*/
public static final RESyntax RE_SYNTAX_JAVA_1_4;
static {
// Define syntaxes
RE_SYNTAX_EMACS = new RESyntax().makeFinal();
RESyntax RE_SYNTAX_POSIX_COMMON = new RESyntax()
.set(RE_CHAR_CLASSES)
.set(RE_DOT_NEWLINE)
.set(RE_DOT_NOT_NULL)
.set(RE_INTERVALS)
.set(RE_NO_EMPTY_RANGES)
.makeFinal();
RE_SYNTAX_POSIX_BASIC = new RESyntax(RE_SYNTAX_POSIX_COMMON)
.set(RE_BK_PLUS_QM)
.makeFinal();
RE_SYNTAX_POSIX_EXTENDED = new RESyntax(RE_SYNTAX_POSIX_COMMON)
.set(RE_CONTEXT_INDEP_ANCHORS)
.set(RE_CONTEXT_INDEP_OPS)
.set(RE_NO_BK_BRACES)
.set(RE_NO_BK_PARENS)
.set(RE_NO_BK_VBAR)
.set(RE_UNMATCHED_RIGHT_PAREN_ORD)
.makeFinal();
RE_SYNTAX_AWK = new RESyntax()
.set(RE_BACKSLASH_ESCAPE_IN_LISTS)
.set(RE_DOT_NOT_NULL)
.set(RE_NO_BK_PARENS)
.set(RE_NO_BK_REFS)
.set(RE_NO_BK_VBAR)
.set(RE_NO_EMPTY_RANGES)
.set(RE_UNMATCHED_RIGHT_PAREN_ORD)
.makeFinal();
RE_SYNTAX_POSIX_AWK = new RESyntax(RE_SYNTAX_POSIX_EXTENDED)
.set(RE_BACKSLASH_ESCAPE_IN_LISTS)
.makeFinal();
RE_SYNTAX_GREP = new RESyntax()
.set(RE_BK_PLUS_QM)
.set(RE_CHAR_CLASSES)
.set(RE_HAT_LISTS_NOT_NEWLINE)
.set(RE_INTERVALS)
.set(RE_NEWLINE_ALT)
.makeFinal();
RE_SYNTAX_EGREP = new RESyntax()
.set(RE_CHAR_CLASSES)
.set(RE_CONTEXT_INDEP_ANCHORS)
.set(RE_CONTEXT_INDEP_OPS)
.set(RE_HAT_LISTS_NOT_NEWLINE)
.set(RE_NEWLINE_ALT)
.set(RE_NO_BK_PARENS)
.set(RE_NO_BK_VBAR)
.makeFinal();
RE_SYNTAX_POSIX_EGREP = new RESyntax(RE_SYNTAX_EGREP)
.set(RE_INTERVALS)
.set(RE_NO_BK_BRACES)
.makeFinal();
/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */
RE_SYNTAX_ED = new RESyntax(RE_SYNTAX_POSIX_BASIC)
.makeFinal();
RE_SYNTAX_SED = new RESyntax(RE_SYNTAX_POSIX_BASIC)
.makeFinal();
RE_SYNTAX_POSIX_MINIMAL_BASIC = new RESyntax(RE_SYNTAX_POSIX_COMMON)
.set(RE_LIMITED_OPS)
.makeFinal();
/* Differs from RE_SYNTAX_POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS
replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */
RE_SYNTAX_POSIX_MINIMAL_EXTENDED = new RESyntax(RE_SYNTAX_POSIX_COMMON)
.set(RE_CONTEXT_INDEP_ANCHORS)
.set(RE_CONTEXT_INVALID_OPS)
.set(RE_NO_BK_BRACES)
.set(RE_NO_BK_PARENS)
.set(RE_NO_BK_REFS)
.set(RE_NO_BK_VBAR)
.set(RE_UNMATCHED_RIGHT_PAREN_ORD)
.makeFinal();
/* There is no official Perl spec, but here's a "best guess" */
RE_SYNTAX_PERL4 = new RESyntax()
.set(RE_BACKSLASH_ESCAPE_IN_LISTS)
.set(RE_CONTEXT_INDEP_ANCHORS)
.set(RE_CONTEXT_INDEP_OPS) // except for '{', apparently
.set(RE_INTERVALS)
.set(RE_NO_BK_BRACES)
.set(RE_NO_BK_PARENS)
.set(RE_NO_BK_VBAR)
.set(RE_NO_EMPTY_RANGES)
.set(RE_CHAR_CLASS_ESCAPES) // \d,\D,\w,\W,\s,\S
.makeFinal();
RE_SYNTAX_PERL4_S = new RESyntax(RE_SYNTAX_PERL4)
.set(RE_DOT_NEWLINE)
.makeFinal();
RE_SYNTAX_PERL5 = new RESyntax(RE_SYNTAX_PERL4)
.set(RE_PURE_GROUPING) // (?:)
.set(RE_STINGY_OPS) // *?,??,+?,{}?
.set(RE_LOOKAHEAD) // (?=)(?!)
.set(RE_STRING_ANCHORS) // \A,\Z
.set(RE_CHAR_CLASS_ESC_IN_LISTS)// \d,\D,\w,\W,\s,\S within []
.set(RE_COMMENTS) // (?#)
.set(RE_EMBEDDED_FLAGS) // (?imsx-imsx)
.set(RE_OCTAL_CHAR) // \0377
.set(RE_HEX_CHAR) // \x1b
.set(RE_NAMED_PROPERTY) // \p{prop}, \P{prop}
.makeFinal();
RE_SYNTAX_PERL5_S = new RESyntax(RE_SYNTAX_PERL5)
.set(RE_DOT_NEWLINE)
.makeFinal();
RE_SYNTAX_JAVA_1_4 = new RESyntax(RE_SYNTAX_PERL5)
// XXX
.set(RE_POSSESSIVE_OPS) // *+,?+,++,{}+
.set(RE_UNICODE_CHAR) // \u1234
.set(RE_NESTED_CHARCLASS) // [a-z&&[^p-r]]
.makeFinal();
}
/**
* Construct a new syntax object with all bits turned off.
* This is equivalent to RE_SYNTAX_EMACS.
*/
public RESyntax() {
bits = new BitSet(BIT_TOTAL);
}
/**
* Called internally when constructing predefined syntaxes
* so their interpretation cannot vary. Conceivably useful
* for your syntaxes as well. Causes IllegalAccessError to
* be thrown if any attempt to modify the syntax is made.
*
* @return this object for convenient chaining
*/
public RESyntax makeFinal() {
isFinal = true;
return this;
}
/**
* Construct a new syntax object with all bits set the same
* as the other syntax.
*/
public RESyntax(RESyntax other) {
bits = (BitSet) other.bits.clone();
}
/**
* Check if a given bit is set in this syntax.
*/
public boolean get(int index) {
return bits.get(index);
}
/**
* Set a given bit in this syntax.
*
* @param index the constant (RESyntax.RE_xxx) bit to set.
* @return a reference to this object for easy chaining.
*/
public RESyntax set(int index) {
if (isFinal) throw new IllegalAccessError(SYNTAX_IS_FINAL);
bits.set(index);
return this;
}
/**
* Clear a given bit in this syntax.
*
* @param index the constant (RESyntax.RE_xxx) bit to clear.
* @return a reference to this object for easy chaining.
*/
public RESyntax clear(int index) {
if (isFinal) throw new IllegalAccessError(SYNTAX_IS_FINAL);
bits.clear(index);
return this;
}
/**
* Changes the line separator string for regular expressions
* created using this RESyntax. The default separator is the
* value returned by the system property "line.separator", which
* should be correct when reading platform-specific files from a
* filesystem. However, many programs may collect input from
* sources where the line separator is differently specified (for
* example, in the applet environment, the text box widget
* interprets line breaks as single-character newlines,
* regardless of the host platform.
*
* Note that setting the line separator to a character or
* characters that have specific meaning within the current syntax
* can cause unexpected chronosynclastic infundibula.
*
* @return this object for convenient chaining
*/
public RESyntax setLineSeparator(String aSeparator) {
if (isFinal) throw new IllegalAccessError(SYNTAX_IS_FINAL);
lineSeparator = aSeparator;
return this;
}
/**
* Returns the currently active line separator string. The default
* is the platform-dependent system property "line.separator".
*/
public String getLineSeparator() {
return lineSeparator;
}
}

View file

@ -0,0 +1,189 @@
/* gnu/regexp/REToken.java
Copyright (C) 2006 Free Software Foundation, Inc.
This file is part of GNU Classpath.
GNU Classpath is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
GNU Classpath is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Classpath; see the file COPYING. If not, write to the
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA.
Linking this library statically or dynamically with other modules is
making a combined work based on this library. Thus, the terms and
conditions of the GNU General Public License cover the whole
combination.
As a special exception, the copyright holders of this library give you
permission to link this library with independent modules to produce an
executable, regardless of the license terms of these independent
modules, and to copy and distribute the resulting executable under
terms of your choice, provided that you also meet, for each linked
independent module, the terms and conditions of the license of that
module. An independent module is a module which is not derived from
or based on this library. If you modify this library, you may extend
this exception to your version of the library, but you are not
obligated to do so. If you do not wish to do so, delete this
exception statement from your version. */
package gnu.java.util.regex;
import java.io.Serializable;
abstract class REToken implements Serializable, Cloneable {
protected REToken next = null;
protected REToken uncle = null;
protected int subIndex;
protected boolean unicodeAware = true;
public Object clone() {
try {
REToken copy = (REToken) super.clone();
return copy;
} catch (CloneNotSupportedException e) {
throw new Error(); // doesn't happen
}
}
protected REToken(int subIndex) {
this.subIndex = subIndex;
}
int getMinimumLength() {
return 0;
}
int getMaximumLength() {
return Integer.MAX_VALUE;
}
void setUncle(REToken anUncle) {
uncle = anUncle;
}
/** Returns true if the match succeeded, false if it failed. */
boolean match(CharIndexed input, REMatch mymatch) {
REMatch m = matchThis(input, mymatch);
if (m == null) return false;
if (next(input, m)) {
mymatch.assignFrom(m);
return true;
}
return false;
}
/** Returns true if the match succeeded, false if it failed.
* The matching is done against this REToken only. Chained
* tokens are not checked.
* This method is used to define the default match method.
* Simple subclasses of REToken, for example, such that
* matches only one character, should implement this method.
* Then the default match method will work. But complicated
* subclasses of REToken, which needs a special match method,
* do not have to implement this method.
*/
REMatch matchThis(CharIndexed input, REMatch mymatch) {
throw new UnsupportedOperationException(
"This REToken does not have a matchThis method");
}
/** Returns true if the rest of the tokens match, false if they fail. */
protected boolean next(CharIndexed input, REMatch mymatch) {
REToken nextToken = getNext();
if (nextToken == null) return true;
return nextToken.match(input, mymatch);
}
/** Returns the next REToken chained to this REToken. */
REToken getNext() {
return (next != null ? next : uncle);
}
/** Finds a match at the position specified by the given REMatch.
* If necessary, adds a BacktrackStack.Backtrack object to backtrackStack
* of the REmatch found this time so that another possible match
* may be found when backtrack is called.
* By default, nothing is added to the backtrackStack.
* @param CharIndexed input Input character sequence.
* @param mymatch Position at which a match should be found
* @return REMatch object if a match was found, null otherwise.
*/
REMatch findMatch(CharIndexed input, REMatch mymatch) {
boolean b = match(input, mymatch);
if (b) return mymatch;
return null;
}
boolean returnsFixedLengthMatches() {
return false;
}
int findFixedLengthMatches(CharIndexed input, REMatch mymatch, int max) {
throw new UnsupportedOperationException(
"This token does not support findFixedLengthMatches");
}
/**
* Backtrack to another possibility.
* Ordinary REToken cannot do anything if this method is called.
*/
REMatch backtrack(CharIndexed input, REMatch mymatch, Object param) {
throw new IllegalStateException("This token cannot be backtracked to");
}
boolean chain(REToken token) {
next = token;
return true; // Token was accepted
}
abstract void dump(StringBuffer os);
void dumpAll(StringBuffer os) {
dump(os);
if (next != null) next.dumpAll(os);
}
public String toString() {
StringBuffer os = new StringBuffer();
dump(os);
return os.toString();
}
/**
* Converts the character argument to lowercase.
* @param ch the character to be converted.
* @param unicodeAware If true, use java.lang.Character#toLowerCase;
* otherwise, only US-ASCII charactes can be converted.
* @return the lowercase equivalent of the character, if any;
* otherwise, the character itself.
*/
public static char toLowerCase(char ch, boolean unicodeAware) {
if (unicodeAware) return Character.toLowerCase(ch);
if (ch >= 'A' && ch <= 'Z') return (char)(ch + 'a' - 'A');
return ch;
}
/**
* Converts the character argument to uppercase.
* @param ch the character to be converted.
* @param unicodeAware If true, use java.lang.Character#toUpperCase;
* otherwise, only US-ASCII charactes can be converted.
* @return the uppercase equivalent of the character, if any;
* otherwise, the character itself.
*/
public static char toUpperCase(char ch, boolean unicodeAware) {
if (unicodeAware) return Character.toUpperCase(ch);
if (ch >= 'a' && ch <= 'z') return (char)(ch + 'A' - 'a');
return ch;
}
}

View file

@ -0,0 +1,99 @@
/* gnu/regexp/RETokenAny.java
Copyright (C) 2006 Free Software Foundation, Inc.
This file is part of GNU Classpath.
GNU Classpath is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
GNU Classpath is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Classpath; see the file COPYING. If not, write to the
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA.
Linking this library statically or dynamically with other modules is
making a combined work based on this library. Thus, the terms and
conditions of the GNU General Public License cover the whole
combination.
As a special exception, the copyright holders of this library give you
permission to link this library with independent modules to produce an
executable, regardless of the license terms of these independent
modules, and to copy and distribute the resulting executable under
terms of your choice, provided that you also meet, for each linked
independent module, the terms and conditions of the license of that
module. An independent module is a module which is not derived from
or based on this library. If you modify this library, you may extend
this exception to your version of the library, but you are not
obligated to do so. If you do not wish to do so, delete this
exception statement from your version. */
package gnu.java.util.regex;
final class RETokenAny extends REToken {
/** True if '.' can match a newline (RE_DOT_NEWLINE) */
private boolean newline;
/** True if '.' can't match a null (RE_DOT_NOT_NULL) */
private boolean matchNull;
RETokenAny(int subIndex, boolean newline, boolean matchNull) {
super(subIndex);
this.newline = newline;
this.matchNull = matchNull;
}
int getMinimumLength() {
return 1;
}
int getMaximumLength() {
return 1;
}
REMatch matchThis(CharIndexed input, REMatch mymatch) {
char ch = input.charAt(mymatch.index);
boolean retval = matchOneChar(ch);
if (retval) {
++mymatch.index;
return mymatch;
}
return null;
}
boolean matchOneChar(char ch) {
if ((ch == CharIndexed.OUT_OF_BOUNDS)
|| (!newline && (ch == '\n'))
|| (matchNull && (ch == 0))) {
return false;
}
return true;
}
boolean returnsFixedLengthMatches() { return true; }
int findFixedLengthMatches(CharIndexed input, REMatch mymatch, int max) {
int index = mymatch.index;
int numRepeats = 0;
while (true) {
if (numRepeats >= max) break;
char ch = input.charAt(index++);
if (! matchOneChar(ch)) break;
numRepeats++;
}
return numRepeats;
}
void dump(StringBuffer os) {
os.append('.');
}
}

View file

@ -0,0 +1,86 @@
/* gnu/regexp/RETokenBackRef.java
Copyright (C) 2006 Free Software Foundation, Inc.
This file is part of GNU Classpath.
GNU Classpath is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
GNU Classpath is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Classpath; see the file COPYING. If not, write to the
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA.
Linking this library statically or dynamically with other modules is
making a combined work based on this library. Thus, the terms and
conditions of the GNU General Public License cover the whole
combination.
As a special exception, the copyright holders of this library give you
permission to link this library with independent modules to produce an
executable, regardless of the license terms of these independent
modules, and to copy and distribute the resulting executable under
terms of your choice, provided that you also meet, for each linked
independent module, the terms and conditions of the license of that
module. An independent module is a module which is not derived from
or based on this library. If you modify this library, you may extend
this exception to your version of the library, but you are not
obligated to do so. If you do not wish to do so, delete this
exception statement from your version. */
package gnu.java.util.regex;
final class RETokenBackRef extends REToken {
private int num;
private boolean insens;
RETokenBackRef(int subIndex, int num, boolean insens) {
super(subIndex);
this.num = num;
this.insens = insens;
}
// should implement getMinimumLength() -- any ideas?
REMatch matchThis(CharIndexed input, REMatch mymatch) {
if (num >= mymatch.start.length) return null;
if (num >= mymatch.end.length) return null;
int b,e;
b = mymatch.start[num];
e = mymatch.end[num];
if ((b==-1)||(e==-1)) return null; // this shouldn't happen, but...
if (b < 0) b += 1;
if (e < 0) e += 1;
for (int i=b; i<e; i++) {
char c1 = input.charAt(mymatch.index+i-b);
char c2 = input.charAt(i);
if (c1 != c2) {
if (insens) {
if (c1 != toLowerCase(c2, unicodeAware) &&
c1 != toUpperCase(c2, unicodeAware)) {
return null;
}
}
else {
return null;
}
}
}
mymatch.index += e-b;
return mymatch;
}
void dump(StringBuffer os) {
os.append('\\').append(num);
}
}

View file

@ -0,0 +1,128 @@
/* gnu/regexp/RETokenChar.java
Copyright (C) 2006 Free Software Foundation, Inc.
This file is part of GNU Classpath.
GNU Classpath is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
GNU Classpath is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Classpath; see the file COPYING. If not, write to the
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA.
Linking this library statically or dynamically with other modules is
making a combined work based on this library. Thus, the terms and
conditions of the GNU General Public License cover the whole
combination.
As a special exception, the copyright holders of this library give you
permission to link this library with independent modules to produce an
executable, regardless of the license terms of these independent
modules, and to copy and distribute the resulting executable under
terms of your choice, provided that you also meet, for each linked
independent module, the terms and conditions of the license of that
module. An independent module is a module which is not derived from
or based on this library. If you modify this library, you may extend
this exception to your version of the library, but you are not
obligated to do so. If you do not wish to do so, delete this
exception statement from your version. */
package gnu.java.util.regex;
final class RETokenChar extends REToken {
private char[] ch;
private boolean insens;
RETokenChar(int subIndex, char c, boolean ins) {
super(subIndex);
insens = ins;
ch = new char [1];
ch[0] = c;
}
int getMinimumLength() {
return ch.length;
}
int getMaximumLength() {
return ch.length;
}
REMatch matchThis(CharIndexed input, REMatch mymatch) {
int z = ch.length;
if (matchOneString(input, mymatch.index)) {
mymatch.index += z;
return mymatch;
}
return null;
}
boolean matchOneString(CharIndexed input, int index) {
int z = ch.length;
char c;
for (int i=0; i<z; i++) {
c = input.charAt(index+i);
if (! charEquals(c, ch[i])) {
return false;
}
}
return true;
}
private boolean charEquals(char c1, char c2) {
if (c1 == c2) return true;
if (! insens) return false;
if (toLowerCase(c1, unicodeAware) == c2) return true;
if (toUpperCase(c1, unicodeAware) == c2) return true;
return false;
}
boolean returnsFixedLengthMatches() { return true; }
int findFixedLengthMatches(CharIndexed input, REMatch mymatch, int max) {
int index = mymatch.index;
int numRepeats = 0;
int z = ch.length;
while (true) {
if (numRepeats >= max) break;
if (matchOneString(input, index)) {
index += z;
numRepeats++;
}
else break;
}
return numRepeats;
}
// Overrides REToken.chain() to optimize for strings
boolean chain(REToken next) {
if (next instanceof RETokenChar && ((RETokenChar)next).insens == insens) {
RETokenChar cnext = (RETokenChar) next;
// assume for now that next can only be one character
int newsize = ch.length + cnext.ch.length;
char[] chTemp = new char [newsize];
System.arraycopy(ch,0,chTemp,0,ch.length);
System.arraycopy(cnext.ch,0,chTemp,ch.length,cnext.ch.length);
ch = chTemp;
return false;
} else return super.chain(next);
}
void dump(StringBuffer os) {
os.append(ch);
}
}

View file

@ -0,0 +1,109 @@
/* gnu/regexp/RETokenEnd.java
Copyright (C) 2006 Free Software Foundation, Inc.
This file is part of GNU Classpath.
GNU Classpath is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
GNU Classpath is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Classpath; see the file COPYING. If not, write to the
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA.
Linking this library statically or dynamically with other modules is
making a combined work based on this library. Thus, the terms and
conditions of the GNU General Public License cover the whole
combination.
As a special exception, the copyright holders of this library give you
permission to link this library with independent modules to produce an
executable, regardless of the license terms of these independent
modules, and to copy and distribute the resulting executable under
terms of your choice, provided that you also meet, for each linked
independent module, the terms and conditions of the license of that
module. An independent module is a module which is not derived from
or based on this library. If you modify this library, you may extend
this exception to your version of the library, but you are not
obligated to do so. If you do not wish to do so, delete this
exception statement from your version. */
package gnu.java.util.regex;
final class RETokenEnd extends REToken {
/**
* Indicates whether this token should match on a line break.
*/
private String newline;
private boolean check_java_line_terminators;
RETokenEnd(int subIndex,String newline) {
super(subIndex);
this.newline = newline;
this.check_java_line_terminators = false;
}
RETokenEnd(int subIndex, String newline, boolean b) {
super(subIndex);
this.newline = newline;
this.check_java_line_terminators = b;
}
int getMaximumLength() {
return 0;
}
REMatch matchThis(CharIndexed input, REMatch mymatch) {
char ch = input.charAt(mymatch.index);
if (ch == CharIndexed.OUT_OF_BOUNDS)
return ((mymatch.eflags & RE.REG_NOTEOL)>0) ?
null : mymatch;
if (check_java_line_terminators) {
if (ch == '\n') {
char ch1 = input.charAt(mymatch.index - 1);
if (ch1 == '\r') return null;
return mymatch;
}
if (ch == '\r') return mymatch;
if (ch == '\u0085') return mymatch; // A next-line character
if (ch == '\u2028') return mymatch; // A line-separator character
if (ch == '\u2029') return mymatch; // A paragraph-separator character
return null;
}
if (newline != null) {
char z;
int i = 0; // position in newline
do {
z = newline.charAt(i);
if (ch != z) return null;
++i;
ch = input.charAt(mymatch.index + i);
} while (i < newline.length());
return mymatch;
}
return null;
}
boolean returnsFixedLengthMatches() { return true; }
int findFixedLengthMatches(CharIndexed input, REMatch mymatch, int max) {
REMatch m = (REMatch) mymatch.clone();
REToken tk = (REToken) this.clone();
tk.chain(null);
if (tk.match(input, m)) return max;
else return 0;
}
void dump(StringBuffer os) {
os.append('$');
}
}

View file

@ -0,0 +1,72 @@
/* gnu/regexp/RETokenEndOfPreviousMatch.java
Copyright (C) 2006 Free Software Foundation, Inc.
This file is part of GNU Classpath.
GNU Classpath is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
GNU Classpath is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Classpath; see the file COPYING. If not, write to the
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA.
Linking this library statically or dynamically with other modules is
making a combined work based on this library. Thus, the terms and
conditions of the GNU General Public License cover the whole
combination.
As a special exception, the copyright holders of this library give you
permission to link this library with independent modules to produce an
executable, regardless of the license terms of these independent
modules, and to copy and distribute the resulting executable under
terms of your choice, provided that you also meet, for each linked
independent module, the terms and conditions of the license of that
module. An independent module is a module which is not derived from
or based on this library. If you modify this library, you may extend
this exception to your version of the library, but you are not
obligated to do so. If you do not wish to do so, delete this
exception statement from your version. */
package gnu.java.util.regex;
class RETokenEndOfPreviousMatch extends RETokenStart {
RETokenEndOfPreviousMatch(int subIndex) {
super(subIndex, null);
}
int getMaximumLength() {
return 0;
}
REMatch matchThis(CharIndexed input, REMatch mymatch) {
REMatch lastMatch = input.getLastMatch();
if (lastMatch == null) return super.matchThis(input, mymatch);
if (input.getAnchor()+mymatch.index ==
lastMatch.anchor+lastMatch.index) {
return mymatch;
}
else {
return null;
}
}
boolean returnsFixedLengthmatches() { return true; }
int findFixedLengthMatches(CharIndexed input, REMatch mymatch, int max) {
if (matchThis(input, mymatch) != null) return max;
else return 0;
}
void dump(StringBuffer os) {
os.append("\\G");
}
}

View file

@ -0,0 +1,66 @@
/* gnu/regexp/RETokenEndSub.java
Copyright (C) 2006 Free Software Foundation, Inc.
This file is part of GNU Classpath.
GNU Classpath is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
GNU Classpath is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Classpath; see the file COPYING. If not, write to the
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA.
Linking this library statically or dynamically with other modules is
making a combined work based on this library. Thus, the terms and
conditions of the GNU General Public License cover the whole
combination.
As a special exception, the copyright holders of this library give you
permission to link this library with independent modules to produce an
executable, regardless of the license terms of these independent
modules, and to copy and distribute the resulting executable under
terms of your choice, provided that you also meet, for each linked
independent module, the terms and conditions of the license of that
module. An independent module is a module which is not derived from
or based on this library. If you modify this library, you may extend
this exception to your version of the library, but you are not
obligated to do so. If you do not wish to do so, delete this
exception statement from your version. */
package gnu.java.util.regex;
final class RETokenEndSub extends REToken {
RETokenEndSub(int subIndex) {
super(subIndex);
}
int getMaximumLength() {
return 0;
}
REMatch matchThis(CharIndexed input, REMatch mymatch) {
mymatch.start[subIndex] = mymatch.start1[subIndex];
mymatch.end[subIndex] = mymatch.index;
return mymatch;
}
REMatch findMatch(CharIndexed input, REMatch mymatch) {
mymatch.start[subIndex] = mymatch.start1[subIndex];
mymatch.end[subIndex] = mymatch.index;
return super.findMatch(input, mymatch);
}
void dump(StringBuffer os) {
// handled by RE
// But add something for debugging.
os.append("(?#RETokenEndSub subIndex=" + subIndex + ")");
}
}

View file

@ -0,0 +1,78 @@
/* gnu/regexp/RETokenIndependent.java
Copyright (C) 2006 Free Software Foundation, Inc.
This file is part of GNU Classpath.
GNU Classpath is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
GNU Classpath is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Classpath; see the file COPYING. If not, write to the
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA.
Linking this library statically or dynamically with other modules is
making a combined work based on this library. Thus, the terms and
conditions of the GNU General Public License cover the whole
combination.
As a special exception, the copyright holders of this library give you
permission to link this library with independent modules to produce an
executable, regardless of the license terms of these independent
modules, and to copy and distribute the resulting executable under
terms of your choice, provided that you also meet, for each linked
independent module, the terms and conditions of the license of that
module. An independent module is a module which is not derived from
or based on this library. If you modify this library, you may extend
this exception to your version of the library, but you are not
obligated to do so. If you do not wish to do so, delete this
exception statement from your version. */
package gnu.java.util.regex;
/**
* @author Ito Kazumitsu
*/
final class RETokenIndependent extends REToken
{
REToken re;
RETokenIndependent(REToken re) throws REException {
super(0);
this.re = re;
}
int getMinimumLength() {
return re.getMinimumLength();
}
int getMaximumLength() {
return re.getMaximumLength();
}
REMatch matchThis(CharIndexed input, REMatch mymatch)
{
boolean b = re.match(input, mymatch);
if (b) {
// Once we have found a match, we do not see other possible matches.
if (mymatch.backtrackStack != null) mymatch.backtrackStack.clear();
return mymatch;
}
return null;
}
void dump(StringBuffer os) {
os.append("(?>");
re.dumpAll(os);
os.append(')');
}
}

View file

@ -0,0 +1,80 @@
/* gnu/regexp/RETokenLookAhead.java
Copyright (C) 2006 Free Software Foundation, Inc.
This file is part of GNU Classpath.
GNU Classpath is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
GNU Classpath is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Classpath; see the file COPYING. If not, write to the
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA.
Linking this library statically or dynamically with other modules is
making a combined work based on this library. Thus, the terms and
conditions of the GNU General Public License cover the whole
combination.
As a special exception, the copyright holders of this library give you
permission to link this library with independent modules to produce an
executable, regardless of the license terms of these independent
modules, and to copy and distribute the resulting executable under
terms of your choice, provided that you also meet, for each linked
independent module, the terms and conditions of the license of that
module. An independent module is a module which is not derived from
or based on this library. If you modify this library, you may extend
this exception to your version of the library, but you are not
obligated to do so. If you do not wish to do so, delete this
exception statement from your version. */
package gnu.java.util.regex;
/**
* @since gnu.regexp 1.1.3
* @author Shashank Bapat
*/
final class RETokenLookAhead extends REToken
{
REToken re;
boolean negative;
RETokenLookAhead(REToken re, boolean negative) throws REException {
super(0);
this.re = re;
this.negative = negative;
}
int getMaximumLength() {
return 0;
}
REMatch matchThis(CharIndexed input, REMatch mymatch)
{
REMatch trymatch = (REMatch)mymatch.clone();
if (re.match(input, trymatch)) {
if (negative) return null;
trymatch.index = mymatch.index;
return trymatch;
}
else {
if (negative) return mymatch;
return null;
}
}
void dump(StringBuffer os) {
os.append("(?");
os.append(negative ? '!' : '=');
re.dumpAll(os);
os.append(')');
}
}

View file

@ -0,0 +1,118 @@
/* gnu/regexp/RETokenLookBehind.java
Copyright (C) 2006 Free Software Foundation, Inc.
This file is part of GNU Classpath.
GNU Classpath is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
GNU Classpath is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Classpath; see the file COPYING. If not, write to the
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA.
Linking this library statically or dynamically with other modules is
making a combined work based on this library. Thus, the terms and
conditions of the GNU General Public License cover the whole
combination.
As a special exception, the copyright holders of this library give you
permission to link this library with independent modules to produce an
executable, regardless of the license terms of these independent
modules, and to copy and distribute the resulting executable under
terms of your choice, provided that you also meet, for each linked
independent module, the terms and conditions of the license of that
module. An independent module is a module which is not derived from
or based on this library. If you modify this library, you may extend
this exception to your version of the library, but you are not
obligated to do so. If you do not wish to do so, delete this
exception statement from your version. */
package gnu.java.util.regex;
/**
* @author Ito Kazumitsu
*/
final class RETokenLookBehind extends REToken
{
REToken re;
boolean negative;
RETokenLookBehind(REToken re, boolean negative) throws REException {
super(0);
this.re = re;
this.negative = negative;
}
int getMaximumLength() {
return 0;
}
REMatch matchThis(CharIndexed input, REMatch mymatch)
{
int max = re.getMaximumLength();
CharIndexed behind = input.lookBehind(mymatch.index, max);
REMatch trymatch = (REMatch)mymatch.clone();
REMatch trymatch1 = (REMatch)mymatch.clone();
REMatch newMatch = null;
int diff = behind.length() - input.length();
int curIndex = trymatch.index + diff;
trymatch.index = 0;
trymatch.offset = 0;
RETokenMatchHereOnly stopper = new RETokenMatchHereOnly(curIndex);
REToken re1 = (REToken) re.clone();
re1.chain(stopper);
if (re1.match(behind, trymatch)) {
if (negative) return null;
for (int i = 0; i < trymatch.start.length; i++) {
if (trymatch.start[i] != -1 && trymatch.end[i] != -1) {
trymatch.start[i] -= diff;
if (trymatch.start[i] < 0) trymatch.start[i] -= 1;
trymatch.end[i] -= diff;
if (trymatch.end[i] < 0) trymatch.end[i] -= 1;
}
}
trymatch.index = mymatch.index;
trymatch.offset = mymatch.offset;
return trymatch;
}
else {
if (negative) return mymatch;
return null;
}
}
void dump(StringBuffer os) {
os.append("(?<");
os.append(negative ? '!' : '=');
re.dumpAll(os);
os.append(')');
}
private static class RETokenMatchHereOnly extends REToken {
int getMaximumLength() { return 0; }
private int index;
RETokenMatchHereOnly(int index) {
super(0);
this.index = index;
}
REMatch matchThis(CharIndexed input, REMatch mymatch) {
return (index == mymatch.index ? mymatch : null);
}
void dump(StringBuffer os) {}
}
}

View file

@ -0,0 +1,315 @@
/* gnu/regexp/RETokenNamedProperty.java
Copyright (C) 2006 Free Software Foundation, Inc.
This file is part of GNU Classpath.
GNU Classpath is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
GNU Classpath is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Classpath; see the file COPYING. If not, write to the
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA.
Linking this library statically or dynamically with other modules is
making a combined work based on this library. Thus, the terms and
conditions of the GNU General Public License cover the whole
combination.
As a special exception, the copyright holders of this library give you
permission to link this library with independent modules to produce an
executable, regardless of the license terms of these independent
modules, and to copy and distribute the resulting executable under
terms of your choice, provided that you also meet, for each linked
independent module, the terms and conditions of the license of that
module. An independent module is a module which is not derived from
or based on this library. If you modify this library, you may extend
this exception to your version of the library, but you are not
obligated to do so. If you do not wish to do so, delete this
exception statement from your version. */
package gnu.java.util.regex;
final class RETokenNamedProperty extends REToken {
String name;
boolean insens;
boolean negate;
Handler handler;
// Grouped properties
static final byte[] LETTER = new byte[]
{ Character.LOWERCASE_LETTER,
Character.UPPERCASE_LETTER,
Character.TITLECASE_LETTER,
Character.MODIFIER_LETTER,
Character.OTHER_LETTER };
static final byte[] MARK = new byte[]
{ Character.NON_SPACING_MARK,
Character.COMBINING_SPACING_MARK,
Character.ENCLOSING_MARK };
static final byte[] SEPARATOR = new byte[]
{ Character.SPACE_SEPARATOR,
Character.LINE_SEPARATOR,
Character.PARAGRAPH_SEPARATOR };
static final byte[] SYMBOL = new byte[]
{ Character.MATH_SYMBOL,
Character.CURRENCY_SYMBOL,
Character.MODIFIER_SYMBOL,
Character.OTHER_SYMBOL };
static final byte[] NUMBER = new byte[]
{ Character.DECIMAL_DIGIT_NUMBER,
Character.LETTER_NUMBER,
Character.OTHER_NUMBER };
static final byte[] PUNCTUATION = new byte[]
{ Character.DASH_PUNCTUATION,
Character.START_PUNCTUATION,
Character.END_PUNCTUATION,
Character.CONNECTOR_PUNCTUATION,
Character.OTHER_PUNCTUATION,
Character.INITIAL_QUOTE_PUNCTUATION,
Character.FINAL_QUOTE_PUNCTUATION};
static final byte[] OTHER = new byte[]
{ Character.CONTROL,
Character.FORMAT,
Character.PRIVATE_USE,
Character.SURROGATE,
Character.UNASSIGNED };
RETokenNamedProperty(int subIndex, String name, boolean insens, boolean negate) throws REException {
super(subIndex);
this.name = name;
this.insens = insens;
this.negate = negate;
handler = getHandler(name);
}
int getMinimumLength() {
return 1;
}
int getMaximumLength() {
return 1;
}
REMatch matchThis(CharIndexed input, REMatch mymatch) {
char ch = input.charAt(mymatch.index);
boolean retval = matchOneChar(ch);
if (retval) {
++mymatch.index;
return mymatch;
}
return null;
}
private boolean matchOneChar(char ch) {
if (ch == CharIndexed.OUT_OF_BOUNDS)
return false;
boolean retval = handler.includes(ch);
if (insens) {
retval = retval ||
handler.includes(toUpperCase(ch, unicodeAware)) ||
handler.includes(toLowerCase(ch, unicodeAware));
}
if (negate) retval = !retval;
return retval;
}
boolean returnsFixedLengthMatches() { return true; }
int findFixedLengthMatches(CharIndexed input, REMatch mymatch, int max) {
int index = mymatch.index;
int numRepeats = 0;
while (true) {
if (numRepeats >= max) break;
char ch = input.charAt(index++);
if (! matchOneChar(ch)) break;
numRepeats++;
}
return numRepeats;
}
void dump(StringBuffer os) {
os.append("\\")
.append(negate ? "P" : "p")
.append("{" + name + "}");
}
private abstract static class Handler {
public abstract boolean includes(char c);
}
private Handler getHandler(String name) throws REException {
if (name.equals("Lower") ||
name.equals("Upper") ||
// name.equals("ASCII") ||
name.equals("Alpha") ||
name.equals("Digit") ||
name.equals("Alnum") ||
name.equals("Punct") ||
name.equals("Graph") ||
name.equals("Print") ||
name.equals("Blank") ||
name.equals("Cntrl") ||
name.equals("XDigit") ||
name.equals("Space") ) {
return new POSIXHandler(name);
}
if (name.startsWith("In")) {
try {
name = name.substring(2);
Character.UnicodeBlock block = Character.UnicodeBlock.forName(name);
return new UnicodeBlockHandler(block);
}
catch (IllegalArgumentException e) {
throw new REException("Invalid Unicode block name: " + name, REException.REG_ESCAPE, 0);
}
}
if (name.startsWith("Is")) {
name = name.substring(2);
}
// "grouped properties"
if (name.equals("L"))
return new UnicodeCategoriesHandler(LETTER);
if (name.equals("M"))
return new UnicodeCategoriesHandler(MARK);
if (name.equals("Z"))
return new UnicodeCategoriesHandler(SEPARATOR);
if (name.equals("S"))
return new UnicodeCategoriesHandler(SYMBOL);
if (name.equals("N"))
return new UnicodeCategoriesHandler(NUMBER);
if (name.equals("P"))
return new UnicodeCategoriesHandler(PUNCTUATION);
if (name.equals("C"))
return new UnicodeCategoriesHandler(OTHER);
if (name.equals("Mc"))
return new UnicodeCategoryHandler(Character.COMBINING_SPACING_MARK);
if (name.equals("Pc"))
return new UnicodeCategoryHandler(Character.CONNECTOR_PUNCTUATION);
if (name.equals("Cc"))
return new UnicodeCategoryHandler(Character.CONTROL);
if (name.equals("Sc"))
return new UnicodeCategoryHandler(Character.CURRENCY_SYMBOL);
if (name.equals("Pd"))
return new UnicodeCategoryHandler(Character.DASH_PUNCTUATION);
if (name.equals("Nd"))
return new UnicodeCategoryHandler(Character.DECIMAL_DIGIT_NUMBER);
if (name.equals("Me"))
return new UnicodeCategoryHandler(Character.ENCLOSING_MARK);
if (name.equals("Pe"))
return new UnicodeCategoryHandler(Character.END_PUNCTUATION);
if (name.equals("Pf"))
return new UnicodeCategoryHandler(Character.FINAL_QUOTE_PUNCTUATION);
if (name.equals("Cf"))
return new UnicodeCategoryHandler(Character.FORMAT);
if (name.equals("Pi"))
return new UnicodeCategoryHandler(Character.INITIAL_QUOTE_PUNCTUATION);
if (name.equals("Nl"))
return new UnicodeCategoryHandler(Character.LETTER_NUMBER);
if (name.equals("Zl"))
return new UnicodeCategoryHandler(Character.LINE_SEPARATOR);
if (name.equals("Ll"))
return new UnicodeCategoryHandler(Character.LOWERCASE_LETTER);
if (name.equals("Sm"))
return new UnicodeCategoryHandler(Character.MATH_SYMBOL);
if (name.equals("Lm"))
return new UnicodeCategoryHandler(Character.MODIFIER_LETTER);
if (name.equals("Sk"))
return new UnicodeCategoryHandler(Character.MODIFIER_SYMBOL);
if (name.equals("Mn"))
return new UnicodeCategoryHandler(Character.NON_SPACING_MARK);
if (name.equals("Lo"))
return new UnicodeCategoryHandler(Character.OTHER_LETTER);
if (name.equals("No"))
return new UnicodeCategoryHandler(Character.OTHER_NUMBER);
if (name.equals("Po"))
return new UnicodeCategoryHandler(Character.OTHER_PUNCTUATION);
if (name.equals("So"))
return new UnicodeCategoryHandler(Character.OTHER_SYMBOL);
if (name.equals("Zp"))
return new UnicodeCategoryHandler(Character.PARAGRAPH_SEPARATOR);
if (name.equals("Co"))
return new UnicodeCategoryHandler(Character.PRIVATE_USE);
if (name.equals("Zs"))
return new UnicodeCategoryHandler(Character.SPACE_SEPARATOR);
if (name.equals("Ps"))
return new UnicodeCategoryHandler(Character.START_PUNCTUATION);
if (name.equals("Cs"))
return new UnicodeCategoryHandler(Character.SURROGATE);
if (name.equals("Lt"))
return new UnicodeCategoryHandler(Character.TITLECASE_LETTER);
if (name.equals("Cn"))
return new UnicodeCategoryHandler(Character.UNASSIGNED);
if (name.equals("Lu"))
return new UnicodeCategoryHandler(Character.UPPERCASE_LETTER);
throw new REException("unsupported name " + name, REException.REG_ESCAPE, 0);
}
private static class POSIXHandler extends Handler {
private RETokenPOSIX retoken;
public POSIXHandler(String name) {
int posixId = RETokenPOSIX.intValue(name.toLowerCase());
if (posixId != -1)
retoken = new RETokenPOSIX(0,posixId,false,false);
else
throw new RuntimeException("Unknown posix ID: " + name);
}
public boolean includes(char c) {
return retoken.matchOneChar(c);
}
}
private static class UnicodeCategoryHandler extends Handler {
public UnicodeCategoryHandler(byte category) {
this.category = (int)category;
}
private int category;
public boolean includes(char c) {
return Character.getType(c) == category;
}
}
private static class UnicodeCategoriesHandler extends Handler {
public UnicodeCategoriesHandler(byte[] categories) {
this.categories = categories;
}
private byte[] categories;
public boolean includes(char c) {
int category = Character.getType(c);
for (int i = 0; i < categories.length; i++)
if (category == categories[i])
return true;
return false;
}
}
private static class UnicodeBlockHandler extends Handler {
public UnicodeBlockHandler(Character.UnicodeBlock block) {
this.block = block;
}
private Character.UnicodeBlock block;
public boolean includes(char c) {
Character.UnicodeBlock cblock = Character.UnicodeBlock.of(c);
return (cblock != null && cblock.equals(block));
}
}
}

View file

@ -0,0 +1,280 @@
/* gnu/regexp/RETokenOneOf.java
Copyright (C) 2006 Free Software Foundation, Inc.
This file is part of GNU Classpath.
GNU Classpath is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
GNU Classpath is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Classpath; see the file COPYING. If not, write to the
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA.
Linking this library statically or dynamically with other modules is
making a combined work based on this library. Thus, the terms and
conditions of the GNU General Public License cover the whole
combination.
As a special exception, the copyright holders of this library give you
permission to link this library with independent modules to produce an
executable, regardless of the license terms of these independent
modules, and to copy and distribute the resulting executable under
terms of your choice, provided that you also meet, for each linked
independent module, the terms and conditions of the license of that
module. An independent module is a module which is not derived from
or based on this library. If you modify this library, you may extend
this exception to your version of the library, but you are not
obligated to do so. If you do not wish to do so, delete this
exception statement from your version. */
package gnu.java.util.regex;
import java.util.Vector;
import java.util.Stack;
final class RETokenOneOf extends REToken {
private Vector options;
private boolean negative;
// True if this RETokenOneOf is supposed to match only one character,
// which is typically the case of a character class expression.
private boolean matchesOneChar;
private Vector addition;
// This Vector addition is used to store nested character classes.
// For example, if the original expression is
// [2-7a-c[f-k][m-z]&&[^p-v][st]]
// the basic part /2-7a-c/ is stored in the Vector options, and
// the additional part /[f-k][m-z]&&[^p-v][st]/ is stored in the
// Vector addition in the following order (Reverse Polish Notation):
// -- The matching result of the basic part is assumed here.
// [f-k] -- REToken
// "|" -- or
// [m-z] -- REToken
// "|" -- or
// false
// [^p-v] -- REToken
// "|" -- or
// [st] -- REToken
// "|" -- or
// "&" -- and
//
// As it is clear from the explanation above, the Vector addition is
// effective only when this REToken originates from a character class
// expression.
// This constructor is used for convenience when we know the set beforehand,
// e.g. \d --> new RETokenOneOf("0123456789",false, ..)
// \D --> new RETokenOneOf("0123456789",true, ..)
RETokenOneOf(int subIndex, String optionsStr, boolean negative, boolean insens) {
super(subIndex);
options = new Vector();
this.negative = negative;
for (int i = 0; i < optionsStr.length(); i++)
options.addElement(new RETokenChar(subIndex,optionsStr.charAt(i),insens));
matchesOneChar = true;
}
RETokenOneOf(int subIndex, Vector options, boolean negative) {
super(subIndex);
this.options = options;
this.negative = negative;
matchesOneChar = negative;
}
RETokenOneOf(int subIndex, Vector options, Vector addition, boolean negative) {
super(subIndex);
this.options = options;
this.addition = addition;
this.negative = negative;
matchesOneChar = (negative || addition != null);
}
int getMinimumLength() {
if (matchesOneChar) return 1;
int min = Integer.MAX_VALUE;
int x;
for (int i=0; i < options.size(); i++) {
if ((x = ((REToken) options.elementAt(i)).getMinimumLength()) < min)
min = x;
}
return min;
}
int getMaximumLength() {
if (matchesOneChar) return 1;
int max = 0;
int x;
for (int i=0; i < options.size(); i++) {
if ((x = ((REToken) options.elementAt(i)).getMaximumLength()) > max)
max = x;
}
return max;
}
boolean match(CharIndexed input, REMatch mymatch) {
if (matchesOneChar) return matchOneChar(input, mymatch);
else return matchOneRE(input, mymatch);
}
boolean matchOneChar(CharIndexed input, REMatch mymatch) {
REMatch tryMatch;
boolean tryOnly;
if (addition == null) {
tryMatch = mymatch;
tryOnly = false;
}
else {
tryMatch = (REMatch) mymatch.clone();
tryOnly = true;
}
boolean b = negative ?
matchN(input, tryMatch, tryOnly) :
matchP(input, tryMatch, tryOnly);
if (addition == null) return b;
Stack stack = new Stack();
stack.push(new Boolean(b));
for (int i=0; i < addition.size(); i++) {
Object obj = addition.elementAt(i);
if (obj instanceof REToken) {
b = ((REToken)obj).match(input, (REMatch)mymatch.clone());
stack.push(new Boolean(b));
}
else if (obj instanceof Boolean) {
stack.push(obj);
}
else if (obj.equals("|")) {
b = ((Boolean)stack.pop()).booleanValue();
b = ((Boolean)stack.pop()).booleanValue() || b;
stack.push(new Boolean(b));
}
else if (obj.equals("&")) {
b = ((Boolean)stack.pop()).booleanValue();
b = ((Boolean)stack.pop()).booleanValue() && b;
stack.push(new Boolean(b));
}
else {
throw new RuntimeException("Invalid object found");
}
}
b = ((Boolean)stack.pop()).booleanValue();
if (b) {
++mymatch.index;
return next(input, mymatch);
}
return false;
}
private boolean matchN(CharIndexed input, REMatch mymatch, boolean tryOnly) {
if (input.charAt(mymatch.index) == CharIndexed.OUT_OF_BOUNDS)
return false;
REMatch newMatch = null;
REMatch last = null;
REToken tk;
for (int i=0; i < options.size(); i++) {
tk = (REToken) options.elementAt(i);
REMatch tryMatch = (REMatch) mymatch.clone();
if (tk.match(input, tryMatch)) { // match was successful
return false;
} // is a match
} // try next option
if (tryOnly) return true;
++mymatch.index;
return next(input, mymatch);
}
private boolean matchP(CharIndexed input, REMatch mymatch, boolean tryOnly) {
REToken tk;
for (int i=0; i < options.size(); i++) {
tk = (REToken) options.elementAt(i);
REMatch tryMatch = (REMatch) mymatch.clone();
if (tk.match(input, tryMatch)) { // match was successful
if (tryOnly) return true;
if (next(input, tryMatch)) {
mymatch.assignFrom(tryMatch);
return true;
}
}
}
return false;
}
private boolean matchOneRE(CharIndexed input, REMatch mymatch) {
REMatch newMatch = findMatch(input, mymatch);
if (newMatch != null) {
mymatch.assignFrom(newMatch);
return true;
}
return false;
}
REMatch findMatch(CharIndexed input, REMatch mymatch) {
if (matchesOneChar) return super.findMatch(input, mymatch);
return findMatch(input, mymatch, 0);
}
REMatch backtrack(CharIndexed input, REMatch mymatch, Object param) {
return findMatch(input, mymatch, ((Integer)param).intValue());
}
private REMatch findMatch(CharIndexed input, REMatch mymatch, int optionIndex) {
for (int i = optionIndex; i < options.size(); i++) {
REToken tk = (REToken) options.elementAt(i);
tk = (REToken) tk.clone();
tk.chain(getNext());
REMatch tryMatch = (REMatch) mymatch.clone();
if (tryMatch.backtrackStack == null) {
tryMatch.backtrackStack = new BacktrackStack();
}
boolean stackPushed = false;
if (i + 1 < options.size()) {
tryMatch.backtrackStack.push(new BacktrackStack.Backtrack(
this, input, mymatch, new Integer(i + 1)));
stackPushed = true;
}
boolean b = tk.match(input, tryMatch);
if (b) {
return tryMatch;
}
if (stackPushed) tryMatch.backtrackStack.pop();
}
return null;
}
boolean returnsFixedLengthMatches() { return matchesOneChar; }
int findFixedLengthMatches(CharIndexed input, REMatch mymatch, int max) {
if (!matchesOneChar)
return super.findFixedLengthMatches(input, mymatch, max);
int numRepeats = 0;
REMatch m = (REMatch) mymatch.clone();
REToken tk = (REToken) this.clone();
tk.chain(null);
while (true) {
if (numRepeats >= max) break;
m = tk.findMatch(input, m);
if (m == null) break;
numRepeats++;
}
return numRepeats;
}
void dump(StringBuffer os) {
os.append(negative ? "[^" : "(?:");
for (int i = 0; i < options.size(); i++) {
if (!negative && (i > 0)) os.append('|');
((REToken) options.elementAt(i)).dumpAll(os);
}
os.append(negative ? ']' : ')');
}
}

View file

@ -0,0 +1,167 @@
/* gnu/regexp/RETokenPOSIX.java
Copyright (C) 2006 Free Software Foundation, Inc.
This file is part of GNU Classpath.
GNU Classpath is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
GNU Classpath is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Classpath; see the file COPYING. If not, write to the
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA.
Linking this library statically or dynamically with other modules is
making a combined work based on this library. Thus, the terms and
conditions of the GNU General Public License cover the whole
combination.
As a special exception, the copyright holders of this library give you
permission to link this library with independent modules to produce an
executable, regardless of the license terms of these independent
modules, and to copy and distribute the resulting executable under
terms of your choice, provided that you also meet, for each linked
independent module, the terms and conditions of the license of that
module. An independent module is a module which is not derived from
or based on this library. If you modify this library, you may extend
this exception to your version of the library, but you are not
obligated to do so. If you do not wish to do so, delete this
exception statement from your version. */
package gnu.java.util.regex;
final class RETokenPOSIX extends REToken {
int type;
boolean insens;
boolean negated;
static final int ALNUM = 0;
static final int ALPHA = 1;
static final int BLANK = 2;
static final int CNTRL = 3;
static final int DIGIT = 4;
static final int GRAPH = 5;
static final int LOWER = 6;
static final int PRINT = 7;
static final int PUNCT = 8;
static final int SPACE = 9;
static final int UPPER = 10;
static final int XDIGIT = 11;
// Array indices correspond to constants defined above.
static final String[] s_nameTable = {
"alnum", "alpha", "blank", "cntrl", "digit", "graph", "lower",
"print", "punct", "space", "upper", "xdigit"
};
// The RE constructor uses this to look up the constant for a string
static int intValue(String key) {
for (int i = 0; i < s_nameTable.length; i++) {
if (s_nameTable[i].equals(key)) return i;
}
return -1;
}
RETokenPOSIX(int subIndex, int type, boolean insens, boolean negated) {
super(subIndex);
this.type = type;
this.insens = insens;
this.negated = negated;
}
int getMinimumLength() {
return 1;
}
int getMaximumLength() {
return 1;
}
REMatch matchThis(CharIndexed input, REMatch mymatch) {
char ch = input.charAt(mymatch.index);
boolean retval = matchOneChar(ch);
if (retval) {
++mymatch.index;
return mymatch;
}
return null;
}
boolean matchOneChar(char ch) {
if (ch == CharIndexed.OUT_OF_BOUNDS)
return false;
boolean retval = false;
switch (type) {
case ALNUM:
// Note that there is some debate over whether '_' should be included
retval = Character.isLetterOrDigit(ch) || (ch == '_');
break;
case ALPHA:
retval = Character.isLetter(ch);
break;
case BLANK:
retval = ((ch == ' ') || (ch == '\t'));
break;
case CNTRL:
retval = Character.isISOControl(ch);
break;
case DIGIT:
retval = Character.isDigit(ch);
break;
case GRAPH:
retval = (!(Character.isWhitespace(ch) || Character.isISOControl(ch)));
break;
case LOWER:
retval = ((insens && Character.isLetter(ch)) || Character.isLowerCase(ch));
break;
case PRINT:
retval = (!(Character.isWhitespace(ch) || Character.isISOControl(ch)))
|| (ch == ' ');
break;
case PUNCT:
// This feels sloppy, especially for non-U.S. locales.
retval = ("`~!@#$%^&*()-_=+[]{}\\|;:'\"/?,.<>".indexOf(ch)!=-1);
break;
case SPACE:
retval = Character.isWhitespace(ch);
break;
case UPPER:
retval = ((insens && Character.isLetter(ch)) || Character.isUpperCase(ch));
break;
case XDIGIT:
retval = (Character.isDigit(ch) || ("abcdefABCDEF".indexOf(ch)!=-1));
break;
}
if (negated) retval = !retval;
return retval;
}
boolean returnsFixedLengthMatches() { return true; }
int findFixedLengthMatches(CharIndexed input, REMatch mymatch, int max) {
int index = mymatch.index;
int numRepeats = 0;
while (true) {
if (numRepeats >= max) break;
char ch = input.charAt(index++);
if (! matchOneChar(ch)) break;
numRepeats++;
}
return numRepeats;
}
void dump(StringBuffer os) {
if (negated) os.append('^');
os.append("[:" + s_nameTable[type] + ":]");
}
}

View file

@ -0,0 +1,100 @@
/* gnu/regexp/RETokenRange.java
Copyright (C) 2006 Free Software Foundation, Inc.
This file is part of GNU Classpath.
GNU Classpath is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
GNU Classpath is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Classpath; see the file COPYING. If not, write to the
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA.
Linking this library statically or dynamically with other modules is
making a combined work based on this library. Thus, the terms and
conditions of the GNU General Public License cover the whole
combination.
As a special exception, the copyright holders of this library give you
permission to link this library with independent modules to produce an
executable, regardless of the license terms of these independent
modules, and to copy and distribute the resulting executable under
terms of your choice, provided that you also meet, for each linked
independent module, the terms and conditions of the license of that
module. An independent module is a module which is not derived from
or based on this library. If you modify this library, you may extend
this exception to your version of the library, but you are not
obligated to do so. If you do not wish to do so, delete this
exception statement from your version. */
package gnu.java.util.regex;
final class RETokenRange extends REToken {
private char lo, hi;
private boolean insens;
RETokenRange(int subIndex, char lo, char hi, boolean ins) {
super(subIndex);
insens = ins;
this.lo = lo;
this.hi = hi;
}
int getMinimumLength() {
return 1;
}
int getMaximumLength() {
return 1;
}
REMatch matchThis(CharIndexed input, REMatch mymatch) {
char c = input.charAt(mymatch.index);
if (matchOneChar(c)) {
++mymatch.index;
return mymatch;
}
return null;
}
boolean matchOneChar(char c) {
if (c == CharIndexed.OUT_OF_BOUNDS) return false;
boolean matches = (c >= lo) && (c <= hi);
if (! matches && insens) {
char c1 = toLowerCase(c, unicodeAware);
matches = (c1 >= lo) && (c1 <= hi);
if (!matches) {
c1 = toUpperCase(c, unicodeAware);
matches = (c1 >= lo) && (c1 <= hi);
}
}
return matches;
}
boolean returnsFixedLengthMatches() { return true; }
int findFixedLengthMatches(CharIndexed input, REMatch mymatch, int max) {
int index = mymatch.index;
int numRepeats = 0;
while (true) {
if (numRepeats >= max) break;
char ch = input.charAt(index++);
if (! matchOneChar(ch)) break;
numRepeats++;
}
return numRepeats;
}
void dump(StringBuffer os) {
os.append(lo).append('-').append(hi);
}
}

View file

@ -0,0 +1,427 @@
/* gnu/regexp/RETokenRepeated.java
Copyright (C) 2006 Free Software Foundation, Inc.
This file is part of GNU Classpath.
GNU Classpath is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
GNU Classpath is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Classpath; see the file COPYING. If not, write to the
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA.
Linking this library statically or dynamically with other modules is
making a combined work based on this library. Thus, the terms and
conditions of the GNU General Public License cover the whole
combination.
As a special exception, the copyright holders of this library give you
permission to link this library with independent modules to produce an
executable, regardless of the license terms of these independent
modules, and to copy and distribute the resulting executable under
terms of your choice, provided that you also meet, for each linked
independent module, the terms and conditions of the license of that
module. An independent module is a module which is not derived from
or based on this library. If you modify this library, you may extend
this exception to your version of the library, but you are not
obligated to do so. If you do not wish to do so, delete this
exception statement from your version. */
package gnu.java.util.regex;
// import java.util.Vector;
// import java.util.Stack;
final class RETokenRepeated extends REToken {
private REToken token;
private int min,max;
private boolean stingy;
private boolean possessive;
private int tokenFixedLength;
RETokenRepeated(int subIndex, REToken token, int min, int max) {
super(subIndex);
this.token = token;
this.min = min;
this.max = max;
if (token.returnsFixedLengthMatches()) {
tokenFixedLength = token.getMaximumLength();
}
else {
tokenFixedLength = -1;
}
}
/** Sets the minimal matching mode to true. */
void makeStingy() {
stingy = true;
}
/** Queries if this token has minimal matching enabled. */
boolean isStingy() {
return stingy;
}
/** Sets possessive matching mode to true. */
void makePossessive() {
possessive = true;
}
/** Queries if this token has possessive matching enabled. */
boolean isPossessive() {
return possessive;
}
/**
* The minimum length of a repeated token is the minimum length
* of the token multiplied by the minimum number of times it must
* match.
*/
int getMinimumLength() {
return (min * token.getMinimumLength());
}
int getMaximumLength() {
if (max == Integer.MAX_VALUE) return Integer.MAX_VALUE;
int tmax = token.getMaximumLength();
if (tmax == Integer.MAX_VALUE) return tmax;
return (max * tmax);
}
// The comment "MUST make a clone" below means that some tests
// failed without doing clone(),
private static class DoablesFinder {
private REToken tk;
private CharIndexed input;
private REMatch rematch;
private boolean findFirst;
private DoablesFinder(REToken tk, CharIndexed input, REMatch mymatch) {
this.tk = tk;
this.input = input;
this.rematch = (REMatch) mymatch.clone(); // MUST make a clone
this.rematch.backtrackStack = new BacktrackStack();
findFirst = true;
}
private REMatch find() {
int origin = rematch.index;
REMatch rem;
if (findFirst) {
rem = tk.findMatch(input, rematch);
findFirst = false;
}
else {
while (true) {
if (rematch.backtrackStack.empty()) {
rem = null;
break;
}
BacktrackStack.Backtrack bt = rematch.backtrackStack.pop();
rem = bt.token.backtrack(bt.input, bt.match, bt.param);
if (rem != null) break;
}
}
if (rem == null) return null;
if (rem.index == origin) rem.empty = true;
rematch = rem;
return (REMatch) rem.clone(); // MUST make a clone.
}
boolean noMore() {
return rematch.backtrackStack.empty();
}
}
REMatch findMatch(CharIndexed input, REMatch mymatch) {
if (tokenFixedLength >= 0) return findMatchFixedLength(input, mymatch);
BacktrackStack stack = new BacktrackStack();
stack.push(new StackedInfo(input, 0, mymatch, null, null));
return findMatch(stack);
}
REMatch backtrack(CharIndexed input, REMatch mymatch, Object param) {
if (tokenFixedLength >= 0) return backtrackFixedLength(input, mymatch, param);
return findMatch((BacktrackStack)param);
}
private static class StackedInfo extends BacktrackStack.Backtrack {
int numRepeats;
int[] visited;
DoablesFinder finder;
StackedInfo(CharIndexed input, int numRepeats, REMatch match,
int[] visited, DoablesFinder finder) {
super(null, input, match, null);
this.numRepeats = numRepeats;
this.visited = visited;
this.finder = finder;
}
}
private REMatch findMatch(BacktrackStack stack) {
// Avoid using recursive calls.
MAIN_LOOP:
while (true) {
if (stack.empty()) return null;
StackedInfo si = (StackedInfo)(stack.peek());
CharIndexed input = si.input;
int numRepeats = si.numRepeats;
REMatch mymatch = si.match;
int[] visited = si.visited;
DoablesFinder finder = si.finder;
if (mymatch.backtrackStack == null)
mymatch.backtrackStack = new BacktrackStack();
if (numRepeats >= max) {
stack.pop();
REMatch m1 = matchRest(input, mymatch);
if (m1 != null) {
if (! stack.empty()) {
m1.backtrackStack.push(new BacktrackStack.Backtrack(
this, input, mymatch, stack));
}
return m1;
}
if (stingy) {
continue MAIN_LOOP;
}
return null;
}
if (finder == null) {
finder = new DoablesFinder(token, input, mymatch);
si.finder = finder;
}
if (numRepeats < min) {
while (true) {
REMatch doable = finder.find();
if (doable == null) {
if (stack.empty()) return null;
stack.pop();
continue MAIN_LOOP;
}
if (finder.noMore()) stack.pop();
int newNumRepeats = (doable.empty ? min : numRepeats + 1);
stack.push(new StackedInfo(
input, newNumRepeats, doable, visited, null));
continue MAIN_LOOP;
}
}
if (visited == null) visited = initVisited();
if (stingy) {
REMatch nextMatch = finder.find();
if (nextMatch != null && !nextMatch.empty) {
stack.push(new StackedInfo(
input, numRepeats + 1, nextMatch, visited, null));
}
else {
stack.pop();
}
REMatch m1 = matchRest(input, mymatch);
if (m1 != null) {
if (!stack.empty()) {
m1.backtrackStack.push(new BacktrackStack.Backtrack(
this, input, mymatch, stack));
}
return m1;
}
else {
continue MAIN_LOOP;
}
}
visited = addVisited(mymatch.index, visited);
DO_THIS:
do {
boolean emptyMatchFound = false;
DO_ONE_DOABLE:
while (true) {
REMatch doable = finder.find();
if (doable == null) {
break DO_THIS;
}
if (doable.empty) emptyMatchFound = true;
if (!emptyMatchFound) {
int n = doable.index;
if (! visitedContains(n, visited)) {
visited = addVisited(n, visited);
}
else {
continue DO_ONE_DOABLE;
}
stack.push(new StackedInfo(
input, numRepeats + 1, doable, visited, null));
REMatch m1 = findMatch(stack);
if (possessive) {
return m1;
}
if (m1 != null) {
m1.backtrackStack.push(new BacktrackStack.Backtrack(
this, input, mymatch, stack));
return m1;
}
}
else {
REMatch m1 = matchRest(input, doable);
if (possessive) {
return m1;
}
if (m1 != null) {
if (! stack.empty()) {
m1.backtrackStack.push(new BacktrackStack.Backtrack(
this, input, mymatch, stack));
}
return m1;
}
}
} // DO_ONE_DOABLE
} while (false); // DO_THIS only once;
if (!stack.empty()) {
stack.pop();
}
if (possessive) {
stack.clear();
}
REMatch m1 = matchRest(input, mymatch);
if (m1 != null) {
if (! stack.empty()) {
m1.backtrackStack.push(new BacktrackStack.Backtrack(
this, input, mymatch, stack));
}
return m1;
}
} // MAIN_LOOP
}
boolean match(CharIndexed input, REMatch mymatch) {
REMatch m1 = findMatch(input, mymatch);
if (m1 != null) {
mymatch.assignFrom(m1);
return true;
}
return false;
}
// Array visited is an array of character positions we have already
// visited. visited[0] is used to store the effective length of the
// array.
private static int[] initVisited() {
int[] visited = new int[32];
visited[0] = 0;
return visited;
}
private static boolean visitedContains(int n, int[] visited) {
// Experience tells that for a small array like this,
// simple linear search is faster than binary search.
for (int i = 1; i < visited[0]; i++) {
if (n == visited[i]) return true;
}
return false;
}
private static int[] addVisited(int n, int[] visited) {
if (visitedContains(n, visited)) return visited;
if (visited[0] >= visited.length - 1) {
int[] newvisited = new int[visited.length + 32];
System.arraycopy(visited, 0, newvisited, 0, visited.length);
visited = newvisited;
}
visited[0]++;
visited[visited[0]] = n;
return visited;
}
private REMatch matchRest(CharIndexed input, final REMatch newMatch) {
if (next(input, newMatch)) {
return newMatch;
}
return null;
}
private REMatch findMatchFixedLength(CharIndexed input, REMatch mymatch) {
if (mymatch.backtrackStack == null)
mymatch.backtrackStack = new BacktrackStack();
int numRepeats = token.findFixedLengthMatches(input, (REMatch)mymatch.clone(), max);
if (numRepeats == Integer.MAX_VALUE) numRepeats = min;
int count = numRepeats - min + 1;
if (count <= 0) return null;
int index = 0;
if (!stingy) index = mymatch.index + (tokenFixedLength * numRepeats);
else index = mymatch.index + (tokenFixedLength * min);
return findMatchFixedLength(input, mymatch, index, count);
}
private REMatch backtrackFixedLength(CharIndexed input, REMatch mymatch,
Object param) {
int[] params = (int[])param;
int index = params[0];
int count = params[1];
return findMatchFixedLength(input, mymatch, index, count);
}
private REMatch findMatchFixedLength(CharIndexed input, REMatch mymatch,
int index, int count) {
REMatch tryMatch = (REMatch) mymatch.clone();
while (true) {
tryMatch.index = index;
REMatch m = matchRest(input, tryMatch);
count--;
if (stingy) index += tokenFixedLength;
else index -= tokenFixedLength;
if (possessive) return m;
if (m != null) {
if (count > 0) {
m.backtrackStack.push(new BacktrackStack.Backtrack(
this, input, mymatch,
new int[] {index, count}));
}
return m;
}
if (count <= 0) return null;
}
}
void dump(StringBuffer os) {
os.append("(?:");
token.dumpAll(os);
os.append(')');
if ((max == Integer.MAX_VALUE) && (min <= 1))
os.append( (min == 0) ? '*' : '+' );
else if ((min == 0) && (max == 1))
os.append('?');
else {
os.append('{').append(min);
if (max > min) {
os.append(',');
if (max != Integer.MAX_VALUE) os.append(max);
}
os.append('}');
}
if (stingy) os.append('?');
}
}

View file

@ -0,0 +1,121 @@
/* gnu/regexp/RETokenStart.java
Copyright (C) 2006 Free Software Foundation, Inc.
This file is part of GNU Classpath.
GNU Classpath is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
GNU Classpath is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Classpath; see the file COPYING. If not, write to the
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA.
Linking this library statically or dynamically with other modules is
making a combined work based on this library. Thus, the terms and
conditions of the GNU General Public License cover the whole
combination.
As a special exception, the copyright holders of this library give you
permission to link this library with independent modules to produce an
executable, regardless of the license terms of these independent
modules, and to copy and distribute the resulting executable under
terms of your choice, provided that you also meet, for each linked
independent module, the terms and conditions of the license of that
module. An independent module is a module which is not derived from
or based on this library. If you modify this library, you may extend
this exception to your version of the library, but you are not
obligated to do so. If you do not wish to do so, delete this
exception statement from your version. */
package gnu.java.util.regex;
class RETokenStart extends REToken {
private String newline; // matches after a newline
private boolean check_java_line_terminators;
RETokenStart(int subIndex, String newline) {
super(subIndex);
this.newline = newline;
this.check_java_line_terminators = false;
}
RETokenStart(int subIndex, String newline, boolean b) {
super(subIndex);
this.newline = newline;
this.check_java_line_terminators = b;
}
int getMaximumLength() {
return 0;
}
REMatch matchThis(CharIndexed input, REMatch mymatch) {
// charAt(index-n) may be unknown on a Reader/InputStream. FIXME
// Match after a newline if in multiline mode
if (check_java_line_terminators) {
char ch = input.charAt(mymatch.index - 1);
if (ch != CharIndexed.OUT_OF_BOUNDS) {
if (ch == '\n') return mymatch;
if (ch == '\r') {
char ch1 = input.charAt(mymatch.index);
if (ch1 != '\n') return mymatch;
return null;
}
if (ch == '\u0085') return mymatch; // A next-line character
if (ch == '\u2028') return mymatch; // A line-separator character
if (ch == '\u2029') return mymatch; // A paragraph-separator character
}
}
if (newline != null) {
int len = newline.length();
if (mymatch.offset >= len) {
boolean found = true;
char z;
int i = 0; // position in REToken.newline
char ch = input.charAt(mymatch.index - len);
do {
z = newline.charAt(i);
if (ch != z) {
found = false;
break;
}
++i;
ch = input.charAt(mymatch.index - len + i);
} while (i < len);
if (found) return mymatch;
}
}
// Don't match at all if REG_NOTBOL is set.
if ((mymatch.eflags & RE.REG_NOTBOL) > 0) return null;
if ((mymatch.eflags & RE.REG_ANCHORINDEX) > 0)
return (mymatch.anchor == mymatch.offset) ?
mymatch : null;
else
return ((mymatch.index == 0) && (mymatch.offset == 0)) ?
mymatch : null;
}
boolean returnsFixedLengthmatches() { return true; }
int findFixedLengthMatches(CharIndexed input, REMatch mymatch, int max) {
if (matchThis(input, mymatch) != null) return max;
else return 0;
}
void dump(StringBuffer os) {
os.append('^');
}
}

View file

@ -0,0 +1,116 @@
/* gnu/regexp/RETokenWordBoundary.java
Copyright (C) 2006 Free Software Foundation, Inc.
This file is part of GNU Classpath.
GNU Classpath is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
GNU Classpath is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Classpath; see the file COPYING. If not, write to the
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA.
Linking this library statically or dynamically with other modules is
making a combined work based on this library. Thus, the terms and
conditions of the GNU General Public License cover the whole
combination.
As a special exception, the copyright holders of this library give you
permission to link this library with independent modules to produce an
executable, regardless of the license terms of these independent
modules, and to copy and distribute the resulting executable under
terms of your choice, provided that you also meet, for each linked
independent module, the terms and conditions of the license of that
module. An independent module is a module which is not derived from
or based on this library. If you modify this library, you may extend
this exception to your version of the library, but you are not
obligated to do so. If you do not wish to do so, delete this
exception statement from your version. */
package gnu.java.util.regex;
/**
* Represents a combination lookahead/lookbehind for POSIX [:alnum:].
*/
final class RETokenWordBoundary extends REToken {
private boolean negated;
private int where;
static final int BEGIN = 1;
static final int END = 2;
RETokenWordBoundary(int subIndex, int where, boolean negated) {
super(subIndex);
this.where = where;
this.negated = negated;
}
int getMaximumLength() {
return 0;
}
REMatch matchThis(CharIndexed input, REMatch mymatch) {
// Word boundary means input[index-1] was a word character
// and input[index] is not, or input[index] is a word character
// and input[index-1] was not
// In the string "one two three", these positions match:
// |o|n|e| |t|w|o| |t|h|r|e|e|
// ^ ^ ^ ^ ^ ^
boolean after = false; // is current character a letter or digit?
boolean before = false; // is previous character a letter or digit?
char ch;
// TODO: Also check REG_ANCHORINDEX vs. anchor
if (((mymatch.eflags & RE.REG_ANCHORINDEX) != RE.REG_ANCHORINDEX)
|| (mymatch.offset + mymatch.index > mymatch.anchor)) {
if ((ch = input.charAt(mymatch.index - 1)) != CharIndexed.OUT_OF_BOUNDS) {
before = Character.isLetterOrDigit(ch) || (ch == '_');
}
}
if ((ch = input.charAt(mymatch.index)) != CharIndexed.OUT_OF_BOUNDS) {
after = Character.isLetterOrDigit(ch) || (ch == '_');
}
// if (before) and (!after), we're at end (\>)
// if (after) and (!before), we're at beginning (\<)
boolean doNext = false;
if ((where & BEGIN) == BEGIN) {
doNext = after && !before;
}
if ((where & END) == END) {
doNext ^= before && !after;
}
if (negated) doNext = !doNext;
return (doNext ? mymatch : null);
}
boolean returnsFixedLengthMatches() { return true; }
int findFixedLengthMatches(CharIndexed input, REMatch mymatch, int max) {
if(matchThis(input, mymatch) != null) return max;
else return 0;
}
void dump(StringBuffer os) {
if (where == (BEGIN | END)) {
os.append( negated ? "\\B" : "\\b" );
} else if (where == BEGIN) {
os.append("\\<");
} else {
os.append("\\>");
}
}
}

View file

@ -0,0 +1,109 @@
/* gnu/regexp/UncheckedRE.java
Copyright (C) 2001, 2004 Free Software Foundation, Inc.
This file is part of GNU Classpath.
GNU Classpath is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
GNU Classpath is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Classpath; see the file COPYING. If not, write to the
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA.
Linking this library statically or dynamically with other modules is
making a combined work based on this library. Thus, the terms and
conditions of the GNU General Public License cover the whole
combination.
As a special exception, the copyright holders of this library give you
permission to link this library with independent modules to produce an
executable, regardless of the license terms of these independent
modules, and to copy and distribute the resulting executable under
terms of your choice, provided that you also meet, for each linked
independent module, the terms and conditions of the license of that
module. An independent module is a module which is not derived from
or based on this library. If you modify this library, you may extend
this exception to your version of the library, but you are not
obligated to do so. If you do not wish to do so, delete this
exception statement from your version. */
package gnu.java.util.regex;
/**
* UncheckedRE is a subclass of RE that allows programmers an easier means
* of programmatically precompiling regular expressions. It is constructed
* and used in exactly the same manner as an instance of the RE class; the
* only difference is that its constructors do not throw REException.
* Instead, if a syntax error is encountered during construction, a
* RuntimeException will be thrown.
* <P>
* Note that this makes UncheckedRE dangerous if constructed with
* dynamic data. Do not use UncheckedRE unless you are completely sure
* that all input being passed to it contains valid, well-formed
* regular expressions for the syntax specified.
*
* @author <A HREF="mailto:wes@cacas.org">Wes Biggs</A>
* @see gnu.java.util.regex.RE
* @since gnu.regexp 1.1.4
*/
public final class UncheckedRE extends RE {
/**
* Constructs a regular expression pattern buffer without any compilation
* flags set, and using the default syntax (RESyntax.RE_SYNTAX_PERL5).
*
* @param pattern A regular expression pattern, in the form of a String,
* StringBuffer or char[]. Other input types will be converted to
* strings using the toString() method.
* @exception RuntimeException The input pattern could not be parsed.
* @exception NullPointerException The pattern was null.
*/
public UncheckedRE(Object pattern) {
this(pattern,0,RESyntax.RE_SYNTAX_PERL5);
}
/**
* Constructs a regular expression pattern buffer using the specified
* compilation flags and the default syntax (RESyntax.RE_SYNTAX_PERL5).
*
* @param pattern A regular expression pattern, in the form of a String,
* StringBuffer, or char[]. Other input types will be converted to
* strings using the toString() method.
* @param cflags The logical OR of any combination of the compilation flags in the RE class.
* @exception RuntimeException The input pattern could not be parsed.
* @exception NullPointerException The pattern was null.
*/
public UncheckedRE(Object pattern, int cflags) {
this(pattern,cflags,RESyntax.RE_SYNTAX_PERL5);
}
/**
* Constructs a regular expression pattern buffer using the specified
* compilation flags and regular expression syntax.
*
* @param pattern A regular expression pattern, in the form of a String,
* StringBuffer, or char[]. Other input types will be converted to
* strings using the toString() method.
* @param cflags The logical OR of any combination of the compilation flags in the RE class.
* @param syntax The type of regular expression syntax to use.
* @exception RuntimeException The input pattern could not be parsed.
* @exception NullPointerException The pattern was null.
*/
public UncheckedRE(Object pattern, int cflags, RESyntax syntax) {
try {
initialize(pattern,cflags,syntax,0,0);
} catch (REException e) {
throw new RuntimeException(e.getMessage());
}
}
}