Imported GNU Classpath 0.90
Imported GNU Classpath 0.90 * scripts/makemake.tcl: LocaleData.java moved to gnu/java/locale. * sources.am: Regenerated. * gcj/javaprims.h: Regenerated. * Makefile.in: Regenerated. * gcj/Makefile.in: Regenerated. * include/Makefile.in: Regenerated. * testsuite/Makefile.in: Regenerated. * gnu/java/lang/VMInstrumentationImpl.java: New override. * gnu/java/net/local/LocalSocketImpl.java: Likewise. * gnu/classpath/jdwp/VMMethod.java: Likewise. * gnu/classpath/jdwp/VMVirtualMachine.java: Update to latest interface. * java/lang/Thread.java: Add UncaughtExceptionHandler. * java/lang/reflect/Method.java: Implements GenericDeclaration and isSynthetic(), * java/lang/reflect/Field.java: Likewise. * java/lang/reflect/Constructor.java * java/lang/Class.java: Implements Type, GenericDeclaration, getSimpleName() and getEnclosing*() methods. * java/lang/Class.h: Add new public methods. * java/lang/Math.java: Add signum(), ulp() and log10(). * java/lang/natMath.cc (log10): New function. * java/security/VMSecureRandom.java: New override. * java/util/logging/Logger.java: Updated to latest classpath version. * java/util/logging/LogManager.java: New override. From-SVN: r113887
This commit is contained in:
parent
eaec4980e1
commit
4f9533c772
1640 changed files with 126485 additions and 104808 deletions
112
libjava/classpath/gnu/regexp/BacktrackStack.java
Normal file
112
libjava/classpath/gnu/regexp/BacktrackStack.java
Normal file
|
@ -0,0 +1,112 @@
|
|||
/* gnu/regexp/BacktrackStack.java
|
||||
Copyright (C) 2006 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GNU Classpath.
|
||||
|
||||
GNU Classpath is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
GNU Classpath is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with GNU Classpath; see the file COPYING. If not, write to the
|
||||
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
02110-1301 USA.
|
||||
|
||||
Linking this library statically or dynamically with other modules is
|
||||
making a combined work based on this library. Thus, the terms and
|
||||
conditions of the GNU General Public License cover the whole
|
||||
combination.
|
||||
|
||||
As a special exception, the copyright holders of this library give you
|
||||
permission to link this library with independent modules to produce an
|
||||
executable, regardless of the license terms of these independent
|
||||
modules, and to copy and distribute the resulting executable under
|
||||
terms of your choice, provided that you also meet, for each linked
|
||||
independent module, the terms and conditions of the license of that
|
||||
module. An independent module is a module which is not derived from
|
||||
or based on this library. If you modify this library, you may extend
|
||||
this exception to your version of the library, but you are not
|
||||
obligated to do so. If you do not wish to do so, delete this
|
||||
exception statement from your version. */
|
||||
|
||||
|
||||
package gnu.regexp;
|
||||
|
||||
/**
|
||||
* An instance of this class represents a stack
|
||||
* used for backtracking.
|
||||
*
|
||||
* @author Ito Kazumitsu</A>
|
||||
*/
|
||||
final class BacktrackStack {
|
||||
|
||||
/** A set of data to be used for backtracking. */
|
||||
static class Backtrack {
|
||||
/** REToken to which to go back */
|
||||
REToken token;
|
||||
/** CharIndexed on which matches are being searched for. */
|
||||
CharIndexed input;
|
||||
/** REMatch to be used by the REToken token. */
|
||||
REMatch match;
|
||||
/** Some parameter used by the token's backtrack method. */
|
||||
Object param;
|
||||
Backtrack(REToken token, CharIndexed input, REMatch match, Object param) {
|
||||
this.token = token;
|
||||
this.input = input;
|
||||
// REMatch may change before backtracking is needed. So we
|
||||
// keep a clone of it.
|
||||
this.match = (REMatch) match.clone();
|
||||
this.param = param;
|
||||
}
|
||||
}
|
||||
|
||||
Backtrack[] stack;
|
||||
private int size;
|
||||
private int capacity;
|
||||
private static final int INITIAL_CAPACITY = 32;
|
||||
private static final int CAPACITY_INCREMENT = 16;
|
||||
|
||||
BacktrackStack() {
|
||||
stack = new Backtrack[INITIAL_CAPACITY];
|
||||
size = 0;
|
||||
capacity = INITIAL_CAPACITY;
|
||||
}
|
||||
|
||||
boolean empty() {
|
||||
return size == 0;
|
||||
}
|
||||
|
||||
Backtrack peek() {
|
||||
return stack[size - 1];
|
||||
}
|
||||
|
||||
Backtrack pop() {
|
||||
Backtrack bt = stack[--size];
|
||||
stack[size] = null;
|
||||
return bt;
|
||||
}
|
||||
|
||||
void clear() {
|
||||
for (int i = 0; i < size; i++) {
|
||||
stack[i] = null;
|
||||
}
|
||||
size = 0;
|
||||
}
|
||||
|
||||
void push(Backtrack bt) {
|
||||
if (size >= capacity) {
|
||||
capacity += CAPACITY_INCREMENT;
|
||||
Backtrack[] newStack = new Backtrack[capacity];
|
||||
System.arraycopy(stack, 0, newStack, 0, size);
|
||||
stack = newStack;
|
||||
}
|
||||
stack[size++] = bt;
|
||||
}
|
||||
|
||||
}
|
|
@ -93,4 +93,24 @@ public interface CharIndexed {
|
|||
* Returns the effective length of this CharIndexed
|
||||
*/
|
||||
int length();
|
||||
|
||||
/**
|
||||
* Sets the REMatch last found on this input.
|
||||
*/
|
||||
void setLastMatch(REMatch match);
|
||||
|
||||
/**
|
||||
* Returns the REMatch last found on this input.
|
||||
*/
|
||||
REMatch getLastMatch();
|
||||
|
||||
/**
|
||||
* Returns the anchor.
|
||||
*/
|
||||
int getAnchor();
|
||||
|
||||
/**
|
||||
* Sets the anchor.
|
||||
*/
|
||||
void setAnchor(int anchor);
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* gnu/regexp/CharIndexedCharArray.java
|
||||
Copyright (C) 1998-2001, 2004, 2006 Free Software Foundation, Inc.
|
||||
Copyright (C) 2006 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GNU Classpath.
|
||||
|
||||
|
@ -36,36 +36,11 @@ obligated to do so. If you do not wish to do so, delete this
|
|||
exception statement from your version. */
|
||||
|
||||
package gnu.regexp;
|
||||
import java.io.Serializable;
|
||||
import java.nio.CharBuffer;
|
||||
|
||||
class CharIndexedCharArray implements CharIndexed, Serializable {
|
||||
private char[] s;
|
||||
private int anchor;
|
||||
class CharIndexedCharArray extends CharIndexedCharSequence {
|
||||
|
||||
CharIndexedCharArray(char[] str, int index) {
|
||||
s = str;
|
||||
anchor = index;
|
||||
}
|
||||
|
||||
public char charAt(int index) {
|
||||
int pos = anchor + index;
|
||||
return ((pos < s.length) && (pos >= 0)) ? s[pos] : OUT_OF_BOUNDS;
|
||||
}
|
||||
|
||||
public boolean isValid() {
|
||||
return (anchor < s.length);
|
||||
}
|
||||
|
||||
public boolean move(int index) {
|
||||
return ((anchor += index) < s.length);
|
||||
}
|
||||
|
||||
public CharIndexed lookBehind(int index, int length) {
|
||||
if (length > (anchor + index)) length = anchor + index;
|
||||
return new CharIndexedCharArray(s, anchor + index - length);
|
||||
}
|
||||
|
||||
public int length() {
|
||||
return s.length - anchor;
|
||||
super(CharBuffer.wrap(str), index);
|
||||
}
|
||||
}
|
||||
|
|
82
libjava/classpath/gnu/regexp/CharIndexedCharSequence.java
Normal file
82
libjava/classpath/gnu/regexp/CharIndexedCharSequence.java
Normal file
|
@ -0,0 +1,82 @@
|
|||
/* gnu/regexp/CharIndexedCharSequence.java
|
||||
Copyright (C) 2006 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GNU Classpath.
|
||||
|
||||
GNU Classpath is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
GNU Classpath is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with GNU Classpath; see the file COPYING. If not, write to the
|
||||
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
02110-1301 USA.
|
||||
|
||||
Linking this library statically or dynamically with other modules is
|
||||
making a combined work based on this library. Thus, the terms and
|
||||
conditions of the GNU General Public License cover the whole
|
||||
combination.
|
||||
|
||||
As a special exception, the copyright holders of this library give you
|
||||
permission to link this library with independent modules to produce an
|
||||
executable, regardless of the license terms of these independent
|
||||
modules, and to copy and distribute the resulting executable under
|
||||
terms of your choice, provided that you also meet, for each linked
|
||||
independent module, the terms and conditions of the license of that
|
||||
module. An independent module is a module which is not derived from
|
||||
or based on this library. If you modify this library, you may extend
|
||||
this exception to your version of the library, but you are not
|
||||
obligated to do so. If you do not wish to do so, delete this
|
||||
exception statement from your version. */
|
||||
|
||||
package gnu.regexp;
|
||||
import java.io.Serializable;
|
||||
|
||||
class CharIndexedCharSequence implements CharIndexed, Serializable {
|
||||
private CharSequence s;
|
||||
private int anchor;
|
||||
private int len;
|
||||
|
||||
CharIndexedCharSequence(CharSequence s, int index) {
|
||||
this.s = s;
|
||||
len = s.length();
|
||||
anchor = index;
|
||||
}
|
||||
|
||||
public char charAt(int index) {
|
||||
int pos = anchor + index;
|
||||
return ((pos < len) && (pos >= 0)) ? s.charAt(pos) : OUT_OF_BOUNDS;
|
||||
}
|
||||
|
||||
public boolean isValid() {
|
||||
return (anchor < len);
|
||||
}
|
||||
|
||||
public boolean move(int index) {
|
||||
return ((anchor += index) < len);
|
||||
}
|
||||
|
||||
public CharIndexed lookBehind(int index, int length) {
|
||||
if (length > (anchor + index)) length = anchor + index;
|
||||
return new CharIndexedCharSequence(s, anchor + index - length);
|
||||
}
|
||||
|
||||
public int length() {
|
||||
return len - anchor;
|
||||
}
|
||||
|
||||
private REMatch lastMatch;
|
||||
public void setLastMatch(REMatch match) {
|
||||
lastMatch = (REMatch)match.clone();
|
||||
lastMatch.anchor = anchor;
|
||||
}
|
||||
public REMatch getLastMatch() { return lastMatch; }
|
||||
public int getAnchor() { return anchor; }
|
||||
public void setAnchor(int anchor) { this.anchor = anchor; }
|
||||
}
|
|
@ -155,5 +155,27 @@ class CharIndexedInputStream implements CharIndexed {
|
|||
throw new UnsupportedOperationException(
|
||||
"difficult to tell the length for an input stream");
|
||||
}
|
||||
|
||||
public void setLastMatch(REMatch match) {
|
||||
throw new UnsupportedOperationException(
|
||||
"difficult to support setLastMatch for an input stream");
|
||||
}
|
||||
|
||||
public REMatch getLastMatch() {
|
||||
throw new UnsupportedOperationException(
|
||||
"difficult to support getLastMatch for an input stream");
|
||||
}
|
||||
|
||||
public int getAnchor() {
|
||||
throw new UnsupportedOperationException(
|
||||
"difficult to support getAnchor for an input stream");
|
||||
}
|
||||
|
||||
public void setAnchor(int anchor) {
|
||||
throw new UnsupportedOperationException(
|
||||
"difficult to support setAnchor for an input stream");
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* gnu/regexp/CharIndexedString.java
|
||||
Copyright (C) 1998-2001, 2004, 2006 Free Software Foundation, Inc.
|
||||
Copyright (C) 2006 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GNU Classpath.
|
||||
|
||||
|
@ -36,38 +36,9 @@ obligated to do so. If you do not wish to do so, delete this
|
|||
exception statement from your version. */
|
||||
|
||||
package gnu.regexp;
|
||||
import java.io.Serializable;
|
||||
|
||||
class CharIndexedString implements CharIndexed, Serializable {
|
||||
private String s;
|
||||
private int anchor;
|
||||
private int len;
|
||||
|
||||
class CharIndexedString extends CharIndexedCharSequence {
|
||||
CharIndexedString(String str, int index) {
|
||||
s = str;
|
||||
len = s.length();
|
||||
anchor = index;
|
||||
}
|
||||
|
||||
public char charAt(int index) {
|
||||
int pos = anchor + index;
|
||||
return ((pos < len) && (pos >= 0)) ? s.charAt(pos) : OUT_OF_BOUNDS;
|
||||
}
|
||||
|
||||
public boolean isValid() {
|
||||
return (anchor < len);
|
||||
}
|
||||
|
||||
public boolean move(int index) {
|
||||
return ((anchor += index) < len);
|
||||
}
|
||||
|
||||
public CharIndexed lookBehind(int index, int length) {
|
||||
if (length > (anchor + index)) length = anchor + index;
|
||||
return new CharIndexedString(s, anchor + index - length);
|
||||
}
|
||||
|
||||
public int length() {
|
||||
return len - anchor;
|
||||
super(str, index);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* gnu/regexp/CharIndexedStringBuffer.java
|
||||
Copyright (C) 1998-2001, 2004, 2006 Free Software Foundation, Inc.
|
||||
Copyright (C) 2006 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GNU Classpath.
|
||||
|
||||
|
@ -36,36 +36,10 @@ obligated to do so. If you do not wish to do so, delete this
|
|||
exception statement from your version. */
|
||||
|
||||
package gnu.regexp;
|
||||
import java.io.Serializable;
|
||||
|
||||
class CharIndexedStringBuffer implements CharIndexed, Serializable {
|
||||
private StringBuffer s;
|
||||
private int anchor;
|
||||
class CharIndexedStringBuffer extends CharIndexedCharSequence {
|
||||
|
||||
CharIndexedStringBuffer(StringBuffer str, int index) {
|
||||
s = str;
|
||||
anchor = index;
|
||||
super(str, index);
|
||||
}
|
||||
|
||||
public char charAt(int index) {
|
||||
int pos = anchor + index;
|
||||
return ((pos < s.length()) && (pos >= 0)) ? s.charAt(pos) : OUT_OF_BOUNDS;
|
||||
}
|
||||
|
||||
public boolean isValid() {
|
||||
return (anchor < s.length());
|
||||
}
|
||||
|
||||
public boolean move(int index) {
|
||||
return ((anchor += index) < s.length());
|
||||
}
|
||||
|
||||
public CharIndexed lookBehind(int index, int length) {
|
||||
if (length > (anchor + index)) length = anchor + index;
|
||||
return new CharIndexedStringBuffer(s, anchor + index - length);
|
||||
}
|
||||
|
||||
public int length() {
|
||||
return s.length() - anchor;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -41,6 +41,7 @@ import java.io.Serializable;
|
|||
import java.util.Locale;
|
||||
import java.util.PropertyResourceBundle;
|
||||
import java.util.ResourceBundle;
|
||||
import java.util.Stack;
|
||||
import java.util.Vector;
|
||||
|
||||
/**
|
||||
|
@ -78,13 +79,18 @@ import java.util.Vector;
|
|||
* <P>
|
||||
*
|
||||
* These methods all have similar argument lists. The input can be a
|
||||
* String, a character array, a StringBuffer, or an
|
||||
* CharIndexed, String, a character array, a StringBuffer, or an
|
||||
* InputStream of some sort. Note that when using an
|
||||
* InputStream, the stream read position cannot be guaranteed after
|
||||
* attempting a match (this is not a bug, but a consequence of the way
|
||||
* regular expressions work). Using an REMatchEnumeration can
|
||||
* eliminate most positioning problems.
|
||||
*
|
||||
* Although the input object can be of various types, it is recommended
|
||||
* that it should be a CharIndexed because {@link CharIndexed#getLastMatch()}
|
||||
* can show the last match found on this input, which helps the expression
|
||||
* \G work as the end of the previous match.
|
||||
*
|
||||
* <P>
|
||||
*
|
||||
* The optional index argument specifies the offset from the beginning
|
||||
|
@ -235,6 +241,17 @@ public class RE extends REToken {
|
|||
*/
|
||||
public static final int REG_REPLACE_USE_BACKSLASHESCAPE = 0x0200;
|
||||
|
||||
/**
|
||||
* Compilation flag. Allow whitespace and comments in pattern.
|
||||
* This is equivalent to the "/x" operator in Perl.
|
||||
*/
|
||||
public static final int REG_X_COMMENTS = 0x0400;
|
||||
|
||||
/**
|
||||
* Compilation flag. If set, REG_ICASE is effective only for US-ASCII.
|
||||
*/
|
||||
public static final int REG_ICASE_USASCII = 0x0800;
|
||||
|
||||
/** Returns a string representing the version of the gnu.regexp package. */
|
||||
public static final String version() {
|
||||
return VERSION;
|
||||
|
@ -334,6 +351,7 @@ public class RE extends REToken {
|
|||
// Precalculate these so we don't pay for the math every time we
|
||||
// need to access them.
|
||||
boolean insens = ((cflags & REG_ICASE) > 0);
|
||||
boolean insensUSASCII = ((cflags & REG_ICASE_USASCII) > 0);
|
||||
|
||||
// Parse pattern into tokens. Does anyone know if it's more efficient
|
||||
// to use char[] than a String.charAt()? I'm assuming so.
|
||||
|
@ -372,6 +390,31 @@ public class RE extends REToken {
|
|||
if (quot)
|
||||
unit.bk = false;
|
||||
|
||||
if (((cflags & REG_X_COMMENTS) > 0) && (!unit.bk) && (!quot)) {
|
||||
if (Character.isWhitespace(unit.ch)) {
|
||||
continue;
|
||||
}
|
||||
if (unit.ch == '#') {
|
||||
for (int i = index; i < pLength; i++) {
|
||||
if (pattern[i] == '\n') {
|
||||
index = i + 1;
|
||||
continue;
|
||||
}
|
||||
else if (pattern[i] == '\r') {
|
||||
if (i + 1 < pLength && pattern[i + 1] == '\n') {
|
||||
index = i + 2;
|
||||
}
|
||||
else {
|
||||
index = i + 1;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
index = pLength;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// ALTERNATION OPERATOR
|
||||
// \| or | (if RE_NO_BK_VBAR) or newline (if RE_NEWLINE_ALT)
|
||||
// not available if RE_LIMITED_OPS is set
|
||||
|
@ -420,6 +463,7 @@ public class RE extends REToken {
|
|||
else {
|
||||
addToken(currentToken);
|
||||
currentToken = new RETokenChar(subIndex,unit.ch,insens);
|
||||
if (insensUSASCII) currentToken.unicodeAware = false;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -495,8 +539,8 @@ public class RE extends REToken {
|
|||
case 'd':
|
||||
case 'm':
|
||||
case 's':
|
||||
// case 'u': not supported
|
||||
// case 'x': not supported
|
||||
case 'u':
|
||||
case 'x':
|
||||
case '-':
|
||||
if (!syntax.get(RESyntax.RE_EMBEDDED_FLAGS)) break;
|
||||
// Set or reset syntax flags.
|
||||
|
@ -535,8 +579,20 @@ public class RE extends REToken {
|
|||
newCflags |= REG_DOT_NEWLINE;
|
||||
flagIndex++;
|
||||
break;
|
||||
// case 'u': not supported
|
||||
// case 'x': not supported
|
||||
case 'u':
|
||||
if (negate)
|
||||
newCflags |= REG_ICASE_USASCII;
|
||||
else
|
||||
newCflags &= ~REG_ICASE_USASCII;
|
||||
flagIndex++;
|
||||
break;
|
||||
case 'x':
|
||||
if (negate)
|
||||
newCflags &= ~REG_X_COMMENTS;
|
||||
else
|
||||
newCflags |= REG_X_COMMENTS;
|
||||
flagIndex++;
|
||||
break;
|
||||
case '-':
|
||||
negate = true;
|
||||
flagIndex++;
|
||||
|
@ -553,6 +609,7 @@ public class RE extends REToken {
|
|||
syntax = newSyntax;
|
||||
cflags = newCflags;
|
||||
insens = ((cflags & REG_ICASE) > 0);
|
||||
insensUSASCII = ((cflags & REG_ICASE_USASCII) > 0);
|
||||
// This can be treated as though it were a comment.
|
||||
comment = true;
|
||||
index = flagIndex - 1;
|
||||
|
@ -565,6 +622,7 @@ public class RE extends REToken {
|
|||
syntax = newSyntax;
|
||||
cflags = newCflags;
|
||||
insens = ((cflags & REG_ICASE) > 0);
|
||||
insensUSASCII = ((cflags & REG_ICASE_USASCII) > 0);
|
||||
index = flagIndex -1;
|
||||
// Fall through to the next case.
|
||||
}
|
||||
|
@ -673,6 +731,7 @@ public class RE extends REToken {
|
|||
syntax = savedSyntax;
|
||||
cflags = savedCflags;
|
||||
insens = ((cflags & REG_ICASE) > 0);
|
||||
insensUSASCII = ((cflags & REG_ICASE_USASCII) > 0);
|
||||
flagsSaved = false;
|
||||
}
|
||||
} // not a comment
|
||||
|
@ -785,6 +844,7 @@ public class RE extends REToken {
|
|||
index = index - 2 + ce.len;
|
||||
addToken(currentToken);
|
||||
currentToken = new RETokenChar(subIndex,ce.ch,insens);
|
||||
if (insensUSASCII) currentToken.unicodeAware = false;
|
||||
}
|
||||
|
||||
// BACKREFERENCE OPERATOR
|
||||
|
@ -812,6 +872,7 @@ public class RE extends REToken {
|
|||
int num = parseInt(pattern, numBegin, numEnd-numBegin, 10);
|
||||
|
||||
currentToken = new RETokenBackRef(subIndex,num,insens);
|
||||
if (insensUSASCII) currentToken.unicodeAware = false;
|
||||
index = numEnd;
|
||||
}
|
||||
|
||||
|
@ -860,6 +921,7 @@ public class RE extends REToken {
|
|||
else if (unit.bk && (unit.ch == 'd') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
|
||||
addToken(currentToken);
|
||||
currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.DIGIT,insens,false);
|
||||
if (insensUSASCII) currentToken.unicodeAware = false;
|
||||
}
|
||||
|
||||
// NON-DIGIT OPERATOR
|
||||
|
@ -868,6 +930,7 @@ public class RE extends REToken {
|
|||
else if (unit.bk && (unit.ch == 'D') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
|
||||
addToken(currentToken);
|
||||
currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.DIGIT,insens,true);
|
||||
if (insensUSASCII) currentToken.unicodeAware = false;
|
||||
}
|
||||
|
||||
// NEWLINE ESCAPE
|
||||
|
@ -892,6 +955,7 @@ public class RE extends REToken {
|
|||
else if (unit.bk && (unit.ch == 's') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
|
||||
addToken(currentToken);
|
||||
currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.SPACE,insens,false);
|
||||
if (insensUSASCII) currentToken.unicodeAware = false;
|
||||
}
|
||||
|
||||
// NON-WHITESPACE OPERATOR
|
||||
|
@ -900,6 +964,7 @@ public class RE extends REToken {
|
|||
else if (unit.bk && (unit.ch == 'S') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
|
||||
addToken(currentToken);
|
||||
currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.SPACE,insens,true);
|
||||
if (insensUSASCII) currentToken.unicodeAware = false;
|
||||
}
|
||||
|
||||
// TAB ESCAPE
|
||||
|
@ -916,6 +981,7 @@ public class RE extends REToken {
|
|||
else if (unit.bk && (unit.ch == 'w') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
|
||||
addToken(currentToken);
|
||||
currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.ALNUM,insens,false);
|
||||
if (insensUSASCII) currentToken.unicodeAware = false;
|
||||
}
|
||||
|
||||
// NON-ALPHANUMERIC OPERATOR
|
||||
|
@ -924,12 +990,19 @@ public class RE extends REToken {
|
|||
else if (unit.bk && (unit.ch == 'W') && syntax.get(RESyntax.RE_CHAR_CLASS_ESCAPES)) {
|
||||
addToken(currentToken);
|
||||
currentToken = new RETokenPOSIX(subIndex,RETokenPOSIX.ALNUM,insens,true);
|
||||
if (insensUSASCII) currentToken.unicodeAware = false;
|
||||
}
|
||||
|
||||
// END OF STRING OPERATOR
|
||||
// \Z
|
||||
// \Z, \z
|
||||
|
||||
else if (unit.bk && (unit.ch == 'Z') && syntax.get(RESyntax.RE_STRING_ANCHORS)) {
|
||||
// FIXME: \Z and \z are different in that if the input string
|
||||
// ends with a line terminator, \Z matches the position before
|
||||
// the final terminator. This special behavior of \Z is yet
|
||||
// to be implemented.
|
||||
|
||||
else if (unit.bk && (unit.ch == 'Z' || unit.ch == 'z') &&
|
||||
syntax.get(RESyntax.RE_STRING_ANCHORS)) {
|
||||
addToken(currentToken);
|
||||
currentToken = new RETokenEnd(subIndex,null);
|
||||
}
|
||||
|
@ -945,6 +1018,7 @@ public class RE extends REToken {
|
|||
index = index - 2 + ce.len;
|
||||
addToken(currentToken);
|
||||
currentToken = new RETokenChar(subIndex,ce.ch,insens);
|
||||
if (insensUSASCII) currentToken.unicodeAware = false;
|
||||
}
|
||||
|
||||
// NAMED PROPERTY
|
||||
|
@ -958,6 +1032,16 @@ public class RE extends REToken {
|
|||
index = index - 2 + np.len;
|
||||
addToken(currentToken);
|
||||
currentToken = getRETokenNamedProperty(subIndex,np,insens,index);
|
||||
if (insensUSASCII) currentToken.unicodeAware = false;
|
||||
}
|
||||
|
||||
// END OF PREVIOUS MATCH
|
||||
// \G
|
||||
|
||||
else if (unit.bk && (unit.ch == 'G') &&
|
||||
syntax.get(RESyntax.RE_STRING_ANCHORS)) {
|
||||
addToken(currentToken);
|
||||
currentToken = new RETokenEndOfPreviousMatch(subIndex);
|
||||
}
|
||||
|
||||
// NON-SPECIAL CHARACTER (or escape to make literal)
|
||||
|
@ -966,6 +1050,7 @@ public class RE extends REToken {
|
|||
else { // not a special character
|
||||
addToken(currentToken);
|
||||
currentToken = new RETokenChar(subIndex,unit.ch,insens);
|
||||
if (insensUSASCII) currentToken.unicodeAware = false;
|
||||
}
|
||||
} // end while
|
||||
|
||||
|
@ -1006,6 +1091,7 @@ public class RE extends REToken {
|
|||
throws REException {
|
||||
|
||||
boolean insens = ((cflags & REG_ICASE) > 0);
|
||||
boolean insensUSASCII = ((cflags & REG_ICASE_USASCII) > 0);
|
||||
Vector options = new Vector();
|
||||
Vector addition = new Vector();
|
||||
boolean additionAndAppeared = false;
|
||||
|
@ -1035,7 +1121,9 @@ public class RE extends REToken {
|
|||
if ((ch == '-') && (lastCharIsSet)) {
|
||||
if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
|
||||
if ((ch = pattern[index]) == ']') {
|
||||
options.addElement(new RETokenChar(subIndex,lastChar,insens));
|
||||
RETokenChar t = new RETokenChar(subIndex,lastChar,insens);
|
||||
if (insensUSASCII) t.unicodeAware = false;
|
||||
options.addElement(t);
|
||||
lastChar = '-';
|
||||
} else {
|
||||
if ((ch == '\\') && syntax.get(RESyntax.RE_BACKSLASH_ESCAPE_IN_LISTS)) {
|
||||
|
@ -1045,7 +1133,9 @@ public class RE extends REToken {
|
|||
ch = ce.ch;
|
||||
index = index + ce.len - 1;
|
||||
}
|
||||
options.addElement(new RETokenRange(subIndex,lastChar,ch,insens));
|
||||
RETokenRange t = new RETokenRange(subIndex,lastChar,ch,insens);
|
||||
if (insensUSASCII) t.unicodeAware = false;
|
||||
options.addElement(t);
|
||||
lastChar = 0; lastCharIsSet = false;
|
||||
index++;
|
||||
}
|
||||
|
@ -1088,12 +1178,20 @@ public class RE extends REToken {
|
|||
asciiEsc = ce.ch; asciiEscIsSet = true;
|
||||
index = index - 1 + ce.len - 1;
|
||||
}
|
||||
if (lastCharIsSet) options.addElement(new RETokenChar(subIndex,lastChar,insens));
|
||||
if (lastCharIsSet) {
|
||||
RETokenChar t = new RETokenChar(subIndex,lastChar,insens);
|
||||
if (insensUSASCII) t.unicodeAware = false;
|
||||
options.addElement(t);
|
||||
}
|
||||
|
||||
if (posixID != -1) {
|
||||
options.addElement(new RETokenPOSIX(subIndex,posixID,insens,negate));
|
||||
RETokenPOSIX t = new RETokenPOSIX(subIndex,posixID,insens,negate);
|
||||
if (insensUSASCII) t.unicodeAware = false;
|
||||
options.addElement(t);
|
||||
} else if (np != null) {
|
||||
options.addElement(getRETokenNamedProperty(subIndex,np,insens,index));
|
||||
RETokenNamedProperty t = getRETokenNamedProperty(subIndex,np,insens,index);
|
||||
if (insensUSASCII) t.unicodeAware = false;
|
||||
options.addElement(t);
|
||||
} else if (asciiEscIsSet) {
|
||||
lastChar = asciiEsc; lastCharIsSet = true;
|
||||
} else {
|
||||
|
@ -1104,8 +1202,11 @@ public class RE extends REToken {
|
|||
StringBuffer posixSet = new StringBuffer();
|
||||
index = getPosixSet(pattern,index+1,posixSet);
|
||||
int posixId = RETokenPOSIX.intValue(posixSet.toString());
|
||||
if (posixId != -1)
|
||||
options.addElement(new RETokenPOSIX(subIndex,posixId,insens,false));
|
||||
if (posixId != -1) {
|
||||
RETokenPOSIX t = new RETokenPOSIX(subIndex,posixId,insens,false);
|
||||
if (insensUSASCII) t.unicodeAware = false;
|
||||
options.addElement(t);
|
||||
}
|
||||
} else if ((ch == '[') && (syntax.get(RESyntax.RE_NESTED_CHARCLASS))) {
|
||||
ParseCharClassResult result = parseCharClass(
|
||||
subIndex, pattern, index, pLength, cflags, syntax, 0);
|
||||
|
@ -1158,14 +1259,22 @@ public class RE extends REToken {
|
|||
result.index: result.index - 1);
|
||||
}
|
||||
} else {
|
||||
if (lastCharIsSet) options.addElement(new RETokenChar(subIndex,lastChar,insens));
|
||||
if (lastCharIsSet) {
|
||||
RETokenChar t = new RETokenChar(subIndex,lastChar,insens);
|
||||
if (insensUSASCII) t.unicodeAware = false;
|
||||
options.addElement(t);
|
||||
}
|
||||
lastChar = ch; lastCharIsSet = true;
|
||||
}
|
||||
if (index == pLength) throw new REException(getLocalizedMessage("class.no.end"),REException.REG_EBRACK,index);
|
||||
} // while in list
|
||||
// Out of list, index is one past ']'
|
||||
|
||||
if (lastCharIsSet) options.addElement(new RETokenChar(subIndex,lastChar,insens));
|
||||
if (lastCharIsSet) {
|
||||
RETokenChar t = new RETokenChar(subIndex,lastChar,insens);
|
||||
if (insensUSASCII) t.unicodeAware = false;
|
||||
options.addElement(t);
|
||||
}
|
||||
|
||||
ParseCharClassResult result = new ParseCharClassResult();
|
||||
// Create a new RETokenOneOf
|
||||
|
@ -1396,11 +1505,10 @@ public class RE extends REToken {
|
|||
return (input.charAt(0) == CharIndexed.OUT_OF_BOUNDS);
|
||||
REMatch m = new REMatch(numSubs, index, eflags);
|
||||
if (firstToken.match(input, m)) {
|
||||
while (m != null) {
|
||||
if (m != null) {
|
||||
if (input.charAt(m.index) == CharIndexed.OUT_OF_BOUNDS) {
|
||||
return true;
|
||||
}
|
||||
m = m.next;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
|
@ -1508,17 +1616,27 @@ public class RE extends REToken {
|
|||
}
|
||||
|
||||
/* Implements abstract method REToken.match() */
|
||||
boolean match(CharIndexed input, REMatch mymatch) {
|
||||
boolean match(CharIndexed input, REMatch mymatch) {
|
||||
if (firstToken == null) {
|
||||
return next(input, mymatch);
|
||||
}
|
||||
|
||||
// Note the start of this subexpression
|
||||
mymatch.start[subIndex] = mymatch.index;
|
||||
mymatch.start1[subIndex] = mymatch.index;
|
||||
|
||||
return firstToken.match(input, mymatch);
|
||||
}
|
||||
|
||||
|
||||
REMatch findMatch(CharIndexed input, REMatch mymatch) {
|
||||
if (mymatch.backtrackStack == null)
|
||||
mymatch.backtrackStack = new BacktrackStack();
|
||||
boolean b = match(input, mymatch);
|
||||
if (b) {
|
||||
return mymatch;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the first match found in the input. If no match is found,
|
||||
* null is returned.
|
||||
|
@ -1602,6 +1720,7 @@ public class RE extends REToken {
|
|||
*/
|
||||
best.end[0] = best.index;
|
||||
best.finish(input);
|
||||
input.setLastMatch(best);
|
||||
return best;
|
||||
}
|
||||
}
|
||||
|
@ -1942,19 +2061,34 @@ public class RE extends REToken {
|
|||
}
|
||||
|
||||
void dump(StringBuffer os) {
|
||||
os.append('(');
|
||||
os.append("(?#startRE subIndex=" + subIndex + ")");
|
||||
if (subIndex == 0)
|
||||
os.append("?:");
|
||||
if (firstToken != null)
|
||||
firstToken.dumpAll(os);
|
||||
os.append(')');
|
||||
if (subIndex == 0)
|
||||
os.append(")");
|
||||
os.append("(?#endRE subIndex=" + subIndex + ")");
|
||||
}
|
||||
|
||||
// Cast input appropriately or throw exception
|
||||
private static CharIndexed makeCharIndexed(Object input, int index) {
|
||||
// We could let a String fall through to final input, but since
|
||||
// it's the most likely input type, we check it first.
|
||||
if (input instanceof String)
|
||||
// This method was originally a private method, but has been made
|
||||
// public because java.util.regex.Matcher uses this.
|
||||
public static CharIndexed makeCharIndexed(Object input, int index) {
|
||||
// The case where input is already a CharIndexed is supposed
|
||||
// be the most likely because this is the case with
|
||||
// java.util.regex.Matcher.
|
||||
// We could let a String or a CharSequence fall through
|
||||
// to final input, but since it'a very likely input type,
|
||||
// we check it first.
|
||||
if (input instanceof CharIndexed) {
|
||||
CharIndexed ci = (CharIndexed) input;
|
||||
ci.setAnchor(index);
|
||||
return ci;
|
||||
}
|
||||
else if (input instanceof CharSequence)
|
||||
return new CharIndexedCharSequence((CharSequence) input,index);
|
||||
else if (input instanceof String)
|
||||
return new CharIndexedString((String) input,index);
|
||||
else if (input instanceof char[])
|
||||
return new CharIndexedCharArray((char[]) input,index);
|
||||
|
@ -1962,8 +2096,6 @@ public class RE extends REToken {
|
|||
return new CharIndexedStringBuffer((StringBuffer) input,index);
|
||||
else if (input instanceof InputStream)
|
||||
return new CharIndexedInputStream((InputStream) input,index);
|
||||
else if (input instanceof CharIndexed)
|
||||
return (CharIndexed) input; // do we lose index info?
|
||||
else
|
||||
return new CharIndexedString(input.toString(), index);
|
||||
}
|
||||
|
|
|
@ -49,6 +49,7 @@ import java.io.Serializable;
|
|||
*/
|
||||
public final class REMatch implements Serializable, Cloneable {
|
||||
private String matchedText;
|
||||
private CharIndexed matchedCharIndexed;
|
||||
|
||||
// These variables are package scope for fast access within the engine
|
||||
int eflags; // execution flags this match was made using
|
||||
|
@ -64,20 +65,28 @@ public final class REMatch implements Serializable, Cloneable {
|
|||
|
||||
// Package scope; used by RE.
|
||||
int index; // used while matching to mark current match position in input
|
||||
// start1[i] is set when the i-th subexp starts. And start1[i] is copied
|
||||
// to start[i] when the i-th subexp ends. So start[i] keeps the previously
|
||||
// assigned value while the i-th subexp is being processed. This makes
|
||||
// backreference to the i-th subexp within the i-th subexp possible.
|
||||
int[] start; // start positions (relative to offset) for each (sub)exp.
|
||||
int[] start1; // start positions (relative to offset) for each (sub)exp.
|
||||
int[] end; // end positions for the same
|
||||
REMatch next; // other possibility (to avoid having to use arrays)
|
||||
// start[i] == -1 or end[i] == -1 means that the start/end position is void.
|
||||
// start[i] == p or end[i] == p where p < 0 and p != -1 means that
|
||||
// the actual start/end position is (p+1). Start/end positions may
|
||||
// become negative when the subexpression is in a RETokenLookBehind.
|
||||
boolean empty; // empty string matched. This flag is used only within
|
||||
// RETokenRepeated.
|
||||
int matchFlags; // flags passed to match methods
|
||||
static final int MF_FIND_ALL = 0x01;
|
||||
|
||||
BacktrackStack backtrackStack;
|
||||
|
||||
public Object clone() {
|
||||
try {
|
||||
REMatch copy = (REMatch) super.clone();
|
||||
copy.next = null;
|
||||
|
||||
copy.start = (int[]) start.clone();
|
||||
copy.start1 = (int[]) start1.clone();
|
||||
copy.end = (int[]) end.clone();
|
||||
|
||||
return copy;
|
||||
|
@ -88,14 +97,15 @@ public final class REMatch implements Serializable, Cloneable {
|
|||
|
||||
void assignFrom(REMatch other) {
|
||||
start = other.start;
|
||||
start1 = other.start1;
|
||||
end = other.end;
|
||||
index = other.index;
|
||||
// need to deep clone?
|
||||
next = other.next;
|
||||
backtrackStack = other.backtrackStack;
|
||||
}
|
||||
|
||||
REMatch(int subs, int anchor, int eflags) {
|
||||
start = new int[subs+1];
|
||||
start1 = new int[subs+1];
|
||||
end = new int[subs+1];
|
||||
this.anchor = anchor;
|
||||
this.eflags = eflags;
|
||||
|
@ -109,6 +119,7 @@ public final class REMatch implements Serializable, Cloneable {
|
|||
for (i = 0; i < end[0]; i++)
|
||||
sb.append(text.charAt(i));
|
||||
matchedText = sb.toString();
|
||||
matchedCharIndexed = text;
|
||||
for (i = 0; i < start.length; i++) {
|
||||
// If any subexpressions didn't terminate, they don't count
|
||||
// TODO check if this code ever gets hit
|
||||
|
@ -117,7 +128,7 @@ public final class REMatch implements Serializable, Cloneable {
|
|||
end[i] = -1;
|
||||
}
|
||||
}
|
||||
next = null; // cut off alternates
|
||||
backtrackStack = null;
|
||||
}
|
||||
|
||||
/** Clears the current match and moves the offset to the new index. */
|
||||
|
@ -125,9 +136,9 @@ public final class REMatch implements Serializable, Cloneable {
|
|||
offset = index;
|
||||
this.index = 0;
|
||||
for (int i = 0; i < start.length; i++) {
|
||||
start[i] = end[i] = -1;
|
||||
start[i] = start1[i] = end[i] = -1;
|
||||
}
|
||||
next = null; // cut off alternates
|
||||
backtrackStack = null;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -184,7 +195,19 @@ public final class REMatch implements Serializable, Cloneable {
|
|||
if ((sub >= start.length) || sub < 0)
|
||||
throw new IndexOutOfBoundsException("No group " + sub);
|
||||
if (start[sub] == -1) return null;
|
||||
return (matchedText.substring(start[sub],end[sub]));
|
||||
if (start[sub] >= 0 && end[sub] <= matchedText.length())
|
||||
return (matchedText.substring(start[sub],end[sub]));
|
||||
else {
|
||||
// This case occurs with RETokenLookAhead or RETokenLookBehind.
|
||||
StringBuffer sb = new StringBuffer();
|
||||
int s = start[sub];
|
||||
int e = end[sub];
|
||||
if (s < 0) s += 1;
|
||||
if (e < 0) e += 1;
|
||||
for (int i = start[0] + s; i < start[0] + e; i++)
|
||||
sb.append(matchedCharIndexed.charAt(i));
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -198,7 +221,8 @@ public final class REMatch implements Serializable, Cloneable {
|
|||
public int getSubStartIndex(int sub) {
|
||||
if (sub >= start.length) return -1;
|
||||
int x = start[sub];
|
||||
return (x == -1) ? x : offset + x;
|
||||
return (x == -1) ? x :
|
||||
(x >= 0) ? offset + x : offset + x + 1;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -212,7 +236,8 @@ public final class REMatch implements Serializable, Cloneable {
|
|||
public int getStartIndex(int sub) {
|
||||
if (sub >= start.length) return -1;
|
||||
int x = start[sub];
|
||||
return (x == -1) ? x : offset + x;
|
||||
return (x == -1) ? x :
|
||||
(x >= 0) ? offset + x : offset + x + 1;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -226,7 +251,8 @@ public final class REMatch implements Serializable, Cloneable {
|
|||
public int getSubEndIndex(int sub) {
|
||||
if (sub >= start.length) return -1;
|
||||
int x = end[sub];
|
||||
return (x == -1) ? x : offset + x;
|
||||
return (x == -1) ? x :
|
||||
(x >= 0) ? offset + x : offset + x + 1;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -239,7 +265,8 @@ public final class REMatch implements Serializable, Cloneable {
|
|||
public int getEndIndex(int sub) {
|
||||
if (sub >= start.length) return -1;
|
||||
int x = end[sub];
|
||||
return (x == -1) ? x : offset + x;
|
||||
return (x == -1) ? x :
|
||||
(x >= 0) ? offset + x : offset + x + 1;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -279,41 +306,19 @@ public final class REMatch implements Serializable, Cloneable {
|
|||
return output.toString();
|
||||
}
|
||||
|
||||
static class REMatchList {
|
||||
REMatch head;
|
||||
REMatch tail;
|
||||
REMatchList() {
|
||||
head = tail = null;
|
||||
}
|
||||
/* Not used now. But we may need this some day?
|
||||
void addHead(REMatch newone) {
|
||||
if (head == null) {
|
||||
head = newone;
|
||||
tail = newone;
|
||||
while (tail.next != null) {
|
||||
tail = tail.next;
|
||||
}
|
||||
}
|
||||
else {
|
||||
REMatch tmp = newone;
|
||||
while (tmp.next != null) tmp = tmp.next;
|
||||
tmp.next = head;
|
||||
head = newone;
|
||||
}
|
||||
}
|
||||
*/
|
||||
void addTail(REMatch newone) {
|
||||
if (head == null) {
|
||||
head = newone;
|
||||
tail = newone;
|
||||
}
|
||||
else {
|
||||
tail.next = newone;
|
||||
}
|
||||
while (tail.next != null) {
|
||||
tail = tail.next;
|
||||
}
|
||||
}
|
||||
/* The following are used for debugging purpose
|
||||
static String d(REMatch m) {
|
||||
if (m == null) return "null";
|
||||
else return "[" + m.index + "]";
|
||||
}
|
||||
|
||||
String substringUptoIndex(CharIndexed input) {
|
||||
StringBuffer sb = new StringBuffer();
|
||||
for (int i = 0; i < index; i++) {
|
||||
sb.append(input.charAt(i));
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
*/
|
||||
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* gnu/regexp/REToken.java
|
||||
Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
|
||||
Copyright (C) 2006 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GNU Classpath.
|
||||
|
||||
|
@ -43,6 +43,7 @@ abstract class REToken implements Serializable, Cloneable {
|
|||
protected REToken next = null;
|
||||
protected REToken uncle = null;
|
||||
protected int subIndex;
|
||||
protected boolean unicodeAware = true;
|
||||
|
||||
public Object clone() {
|
||||
try {
|
||||
|
@ -70,30 +71,119 @@ abstract class REToken implements Serializable, Cloneable {
|
|||
}
|
||||
|
||||
/** Returns true if the match succeeded, false if it failed. */
|
||||
abstract boolean match(CharIndexed input, REMatch mymatch);
|
||||
boolean match(CharIndexed input, REMatch mymatch) {
|
||||
REMatch m = matchThis(input, mymatch);
|
||||
if (m == null) return false;
|
||||
if (next(input, m)) {
|
||||
mymatch.assignFrom(m);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/** Returns true if the match succeeded, false if it failed.
|
||||
* The matching is done against this REToken only. Chained
|
||||
* tokens are not checked.
|
||||
* This method is used to define the default match method.
|
||||
* Simple subclasses of REToken, for example, such that
|
||||
* matches only one character, should implement this method.
|
||||
* Then the default match method will work. But complicated
|
||||
* subclasses of REToken, which needs a special match method,
|
||||
* do not have to implement this method.
|
||||
*/
|
||||
REMatch matchThis(CharIndexed input, REMatch mymatch) {
|
||||
throw new UnsupportedOperationException(
|
||||
"This REToken does not have a matchThis method");
|
||||
}
|
||||
|
||||
/** Returns true if the rest of the tokens match, false if they fail. */
|
||||
protected boolean next(CharIndexed input, REMatch mymatch) {
|
||||
if (next == null) {
|
||||
if (uncle == null) {
|
||||
return true;
|
||||
} else {
|
||||
return uncle.match(input, mymatch);
|
||||
}
|
||||
} else {
|
||||
return next.match(input, mymatch);
|
||||
}
|
||||
REToken nextToken = getNext();
|
||||
if (nextToken == null) return true;
|
||||
return nextToken.match(input, mymatch);
|
||||
}
|
||||
|
||||
|
||||
/** Returns the next REToken chained to this REToken. */
|
||||
REToken getNext() {
|
||||
return (next != null ? next : uncle);
|
||||
}
|
||||
|
||||
/** Finds a match at the position specified by the given REMatch.
|
||||
* If necessary, adds a BacktrackStack.Backtrack object to backtrackStack
|
||||
* of the REmatch found this time so that another possible match
|
||||
* may be found when backtrack is called.
|
||||
* By default, nothing is added to the backtrackStack.
|
||||
* @param CharIndexed input Input character sequence.
|
||||
* @param mymatch Position at which a match should be found
|
||||
* @return REMatch object if a match was found, null otherwise.
|
||||
*/
|
||||
REMatch findMatch(CharIndexed input, REMatch mymatch) {
|
||||
boolean b = match(input, mymatch);
|
||||
if (b) return mymatch;
|
||||
return null;
|
||||
}
|
||||
|
||||
boolean returnsFixedLengthMatches() {
|
||||
return false;
|
||||
}
|
||||
|
||||
int findFixedLengthMatches(CharIndexed input, REMatch mymatch, int max) {
|
||||
throw new UnsupportedOperationException(
|
||||
"This token does not support findFixedLengthMatches");
|
||||
}
|
||||
|
||||
/**
|
||||
* Backtrack to another possibility.
|
||||
* Ordinary REToken cannot do anything if this method is called.
|
||||
*/
|
||||
REMatch backtrack(CharIndexed input, REMatch mymatch, Object param) {
|
||||
throw new IllegalStateException("This token cannot be backtracked to");
|
||||
}
|
||||
|
||||
boolean chain(REToken token) {
|
||||
next = token;
|
||||
return true; // Token was accepted
|
||||
}
|
||||
|
||||
abstract void dump(StringBuffer os);
|
||||
abstract void dump(StringBuffer os);
|
||||
|
||||
void dumpAll(StringBuffer os) {
|
||||
dump(os);
|
||||
if (next != null) next.dumpAll(os);
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
StringBuffer os = new StringBuffer();
|
||||
dump(os);
|
||||
return os.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts the character argument to lowercase.
|
||||
* @param ch the character to be converted.
|
||||
* @param unicodeAware If true, use java.lang.Character#toLowerCase;
|
||||
* otherwise, only US-ASCII charactes can be converted.
|
||||
* @return the lowercase equivalent of the character, if any;
|
||||
* otherwise, the character itself.
|
||||
*/
|
||||
public static char toLowerCase(char ch, boolean unicodeAware) {
|
||||
if (unicodeAware) return Character.toLowerCase(ch);
|
||||
if (ch >= 'A' && ch <= 'Z') return (char)(ch + 'a' - 'A');
|
||||
return ch;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts the character argument to uppercase.
|
||||
* @param ch the character to be converted.
|
||||
* @param unicodeAware If true, use java.lang.Character#toUpperCase;
|
||||
* otherwise, only US-ASCII charactes can be converted.
|
||||
* @return the uppercase equivalent of the character, if any;
|
||||
* otherwise, the character itself.
|
||||
*/
|
||||
public static char toUpperCase(char ch, boolean unicodeAware) {
|
||||
if (unicodeAware) return Character.toUpperCase(ch);
|
||||
if (ch >= 'a' && ch <= 'z') return (char)(ch + 'A' - 'a');
|
||||
return ch;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* gnu/regexp/RETokenAny.java
|
||||
Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
|
||||
Copyright (C) 2006 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GNU Classpath.
|
||||
|
||||
|
@ -59,15 +59,37 @@ final class RETokenAny extends REToken {
|
|||
return 1;
|
||||
}
|
||||
|
||||
boolean match(CharIndexed input, REMatch mymatch) {
|
||||
char ch = input.charAt(mymatch.index);
|
||||
if ((ch == CharIndexed.OUT_OF_BOUNDS)
|
||||
|| (!newline && (ch == '\n'))
|
||||
|| (matchNull && (ch == 0))) {
|
||||
return false;
|
||||
REMatch matchThis(CharIndexed input, REMatch mymatch) {
|
||||
char ch = input.charAt(mymatch.index);
|
||||
boolean retval = matchOneChar(ch);
|
||||
if (retval) {
|
||||
++mymatch.index;
|
||||
return mymatch;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
++mymatch.index;
|
||||
return next(input, mymatch);
|
||||
|
||||
boolean matchOneChar(char ch) {
|
||||
if ((ch == CharIndexed.OUT_OF_BOUNDS)
|
||||
|| (!newline && (ch == '\n'))
|
||||
|| (matchNull && (ch == 0))) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
boolean returnsFixedLengthMatches() { return true; }
|
||||
|
||||
int findFixedLengthMatches(CharIndexed input, REMatch mymatch, int max) {
|
||||
int index = mymatch.index;
|
||||
int numRepeats = 0;
|
||||
while (true) {
|
||||
if (numRepeats >= max) break;
|
||||
char ch = input.charAt(index++);
|
||||
if (! matchOneChar(ch)) break;
|
||||
numRepeats++;
|
||||
}
|
||||
return numRepeats;
|
||||
}
|
||||
|
||||
void dump(StringBuffer os) {
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* gnu/regexp/RETokenBackRef.java
|
||||
Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
|
||||
Copyright (C) 2006 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GNU Classpath.
|
||||
|
||||
|
@ -50,30 +50,32 @@ final class RETokenBackRef extends REToken {
|
|||
|
||||
// should implement getMinimumLength() -- any ideas?
|
||||
|
||||
boolean match(CharIndexed input, REMatch mymatch) {
|
||||
if (num >= mymatch.start.length) return false;
|
||||
if (num >= mymatch.end.length) return false;
|
||||
REMatch matchThis(CharIndexed input, REMatch mymatch) {
|
||||
if (num >= mymatch.start.length) return null;
|
||||
if (num >= mymatch.end.length) return null;
|
||||
int b,e;
|
||||
b = mymatch.start[num];
|
||||
e = mymatch.end[num];
|
||||
if ((b==-1)||(e==-1)) return false; // this shouldn't happen, but...
|
||||
if ((b==-1)||(e==-1)) return null; // this shouldn't happen, but...
|
||||
if (b < 0) b += 1;
|
||||
if (e < 0) e += 1;
|
||||
for (int i=b; i<e; i++) {
|
||||
char c1 = input.charAt(mymatch.index+i-b);
|
||||
char c2 = input.charAt(i);
|
||||
if (c1 != c2) {
|
||||
if (insens) {
|
||||
if (c1 != Character.toLowerCase(c2) &&
|
||||
c1 != Character.toUpperCase(c2)) {
|
||||
return false;
|
||||
if (c1 != toLowerCase(c2, unicodeAware) &&
|
||||
c1 != toUpperCase(c2, unicodeAware)) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
else {
|
||||
return false;
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
mymatch.index += e-b;
|
||||
return next(input, mymatch);
|
||||
return mymatch;
|
||||
}
|
||||
|
||||
void dump(StringBuffer os) {
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* gnu/regexp/RETokenChar.java
|
||||
Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
|
||||
Copyright (C) 2006 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GNU Classpath.
|
||||
|
||||
|
@ -44,8 +44,9 @@ final class RETokenChar extends REToken {
|
|||
|
||||
RETokenChar(int subIndex, char c, boolean ins) {
|
||||
super(subIndex);
|
||||
insens = ins;
|
||||
ch = new char [1];
|
||||
ch[0] = (insens = ins) ? Character.toLowerCase(c) : c;
|
||||
ch[0] = c;
|
||||
}
|
||||
|
||||
int getMinimumLength() {
|
||||
|
@ -56,18 +57,50 @@ final class RETokenChar extends REToken {
|
|||
return ch.length;
|
||||
}
|
||||
|
||||
boolean match(CharIndexed input, REMatch mymatch) {
|
||||
REMatch matchThis(CharIndexed input, REMatch mymatch) {
|
||||
int z = ch.length;
|
||||
if (matchOneString(input, mymatch.index)) {
|
||||
mymatch.index += z;
|
||||
return mymatch;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
boolean matchOneString(CharIndexed input, int index) {
|
||||
int z = ch.length;
|
||||
char c;
|
||||
for (int i=0; i<z; i++) {
|
||||
c = input.charAt(mymatch.index+i);
|
||||
if (( (insens) ? Character.toLowerCase(c) : c ) != ch[i]) {
|
||||
c = input.charAt(index+i);
|
||||
if (! charEquals(c, ch[i])) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
mymatch.index += z;
|
||||
return true;
|
||||
}
|
||||
|
||||
return next(input, mymatch);
|
||||
private boolean charEquals(char c1, char c2) {
|
||||
if (c1 == c2) return true;
|
||||
if (! insens) return false;
|
||||
if (toLowerCase(c1, unicodeAware) == c2) return true;
|
||||
if (toUpperCase(c1, unicodeAware) == c2) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
boolean returnsFixedLengthMatches() { return true; }
|
||||
|
||||
int findFixedLengthMatches(CharIndexed input, REMatch mymatch, int max) {
|
||||
int index = mymatch.index;
|
||||
int numRepeats = 0;
|
||||
int z = ch.length;
|
||||
while (true) {
|
||||
if (numRepeats >= max) break;
|
||||
if (matchOneString(input, index)) {
|
||||
index += z;
|
||||
numRepeats++;
|
||||
}
|
||||
else break;
|
||||
}
|
||||
return numRepeats;
|
||||
}
|
||||
|
||||
// Overrides REToken.chain() to optimize for strings
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* gnu/regexp/RETokenEnd.java
|
||||
Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
|
||||
Copyright (C) 2006 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GNU Classpath.
|
||||
|
||||
|
@ -53,24 +53,34 @@ final class RETokenEnd extends REToken {
|
|||
return 0;
|
||||
}
|
||||
|
||||
boolean match(CharIndexed input, REMatch mymatch) {
|
||||
REMatch matchThis(CharIndexed input, REMatch mymatch) {
|
||||
char ch = input.charAt(mymatch.index);
|
||||
if (ch == CharIndexed.OUT_OF_BOUNDS)
|
||||
return ((mymatch.eflags & RE.REG_NOTEOL)>0) ?
|
||||
false : next(input, mymatch);
|
||||
null : mymatch;
|
||||
if (newline != null) {
|
||||
char z;
|
||||
int i = 0; // position in newline
|
||||
do {
|
||||
z = newline.charAt(i);
|
||||
if (ch != z) return false;
|
||||
if (ch != z) return null;
|
||||
++i;
|
||||
ch = input.charAt(mymatch.index + i);
|
||||
} while (i < newline.length());
|
||||
|
||||
return next(input, mymatch);
|
||||
return mymatch;
|
||||
}
|
||||
return false;
|
||||
return null;
|
||||
}
|
||||
|
||||
boolean returnsFixedLengthMatches() { return true; }
|
||||
|
||||
int findFixedLengthMatches(CharIndexed input, REMatch mymatch, int max) {
|
||||
REMatch m = (REMatch) mymatch.clone();
|
||||
REToken tk = (REToken) this.clone();
|
||||
tk.chain(null);
|
||||
if (tk.match(input, m)) return max;
|
||||
else return 0;
|
||||
}
|
||||
|
||||
void dump(StringBuffer os) {
|
||||
|
|
72
libjava/classpath/gnu/regexp/RETokenEndOfPreviousMatch.java
Normal file
72
libjava/classpath/gnu/regexp/RETokenEndOfPreviousMatch.java
Normal file
|
@ -0,0 +1,72 @@
|
|||
/* gnu/regexp/RETokenEndOfPreviousMatch.java
|
||||
Copyright (C) 2006 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GNU Classpath.
|
||||
|
||||
GNU Classpath is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
GNU Classpath is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with GNU Classpath; see the file COPYING. If not, write to the
|
||||
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
02110-1301 USA.
|
||||
|
||||
Linking this library statically or dynamically with other modules is
|
||||
making a combined work based on this library. Thus, the terms and
|
||||
conditions of the GNU General Public License cover the whole
|
||||
combination.
|
||||
|
||||
As a special exception, the copyright holders of this library give you
|
||||
permission to link this library with independent modules to produce an
|
||||
executable, regardless of the license terms of these independent
|
||||
modules, and to copy and distribute the resulting executable under
|
||||
terms of your choice, provided that you also meet, for each linked
|
||||
independent module, the terms and conditions of the license of that
|
||||
module. An independent module is a module which is not derived from
|
||||
or based on this library. If you modify this library, you may extend
|
||||
this exception to your version of the library, but you are not
|
||||
obligated to do so. If you do not wish to do so, delete this
|
||||
exception statement from your version. */
|
||||
|
||||
package gnu.regexp;
|
||||
|
||||
class RETokenEndOfPreviousMatch extends RETokenStart {
|
||||
|
||||
RETokenEndOfPreviousMatch(int subIndex) {
|
||||
super(subIndex, null);
|
||||
}
|
||||
|
||||
int getMaximumLength() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
REMatch matchThis(CharIndexed input, REMatch mymatch) {
|
||||
REMatch lastMatch = input.getLastMatch();
|
||||
if (lastMatch == null) return super.matchThis(input, mymatch);
|
||||
if (input.getAnchor()+mymatch.index ==
|
||||
lastMatch.anchor+lastMatch.index) {
|
||||
return mymatch;
|
||||
}
|
||||
else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
boolean returnsFixedLengthmatches() { return true; }
|
||||
|
||||
int findFixedLengthMatches(CharIndexed input, REMatch mymatch, int max) {
|
||||
if (matchThis(input, mymatch) != null) return max;
|
||||
else return 0;
|
||||
}
|
||||
|
||||
void dump(StringBuffer os) {
|
||||
os.append("\\G");
|
||||
}
|
||||
}
|
|
@ -1,5 +1,5 @@
|
|||
/* gnu/regexp/RETokenEndSub.java
|
||||
Copyright (C) 2001, 2004 Free Software Foundation, Inc.
|
||||
Copyright (C) 2006 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GNU Classpath.
|
||||
|
||||
|
@ -46,12 +46,21 @@ final class RETokenEndSub extends REToken {
|
|||
return 0;
|
||||
}
|
||||
|
||||
boolean match(CharIndexed input, REMatch mymatch) {
|
||||
REMatch matchThis(CharIndexed input, REMatch mymatch) {
|
||||
mymatch.start[subIndex] = mymatch.start1[subIndex];
|
||||
mymatch.end[subIndex] = mymatch.index;
|
||||
return next(input, mymatch);
|
||||
return mymatch;
|
||||
}
|
||||
|
||||
|
||||
REMatch findMatch(CharIndexed input, REMatch mymatch) {
|
||||
mymatch.start[subIndex] = mymatch.start1[subIndex];
|
||||
mymatch.end[subIndex] = mymatch.index;
|
||||
return super.findMatch(input, mymatch);
|
||||
}
|
||||
|
||||
void dump(StringBuffer os) {
|
||||
// handled by RE
|
||||
// But add something for debugging.
|
||||
os.append("(?#RETokenEndSub subIndex=" + subIndex + ")");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -57,14 +57,16 @@ final class RETokenIndependent extends REToken
|
|||
return re.getMaximumLength();
|
||||
}
|
||||
|
||||
boolean match(CharIndexed input, REMatch mymatch)
|
||||
REMatch matchThis(CharIndexed input, REMatch mymatch)
|
||||
{
|
||||
if (re.match(input, mymatch)) {
|
||||
boolean b = re.match(input, mymatch);
|
||||
if (b) {
|
||||
// Once we have found a match, we do not see other possible matches.
|
||||
mymatch.next = null;
|
||||
return next(input, mymatch);
|
||||
if (mymatch.backtrackStack != null) mymatch.backtrackStack.clear();
|
||||
return mymatch;
|
||||
|
||||
}
|
||||
return false;
|
||||
return null;
|
||||
}
|
||||
|
||||
void dump(StringBuffer os) {
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* gnu/regexp/RETokenLookAhead.java
|
||||
Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
|
||||
Copyright (C) 2006 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GNU Classpath.
|
||||
|
||||
|
@ -56,28 +56,17 @@ final class RETokenLookAhead extends REToken
|
|||
return 0;
|
||||
}
|
||||
|
||||
boolean match(CharIndexed input, REMatch mymatch)
|
||||
REMatch matchThis(CharIndexed input, REMatch mymatch)
|
||||
{
|
||||
REMatch trymatch = (REMatch)mymatch.clone();
|
||||
REMatch trymatch1 = (REMatch)mymatch.clone();
|
||||
REMatch newMatch = null;
|
||||
if (re.match(input, trymatch)) {
|
||||
if (negative) return false;
|
||||
if (next(input, trymatch1))
|
||||
newMatch = trymatch1;
|
||||
if (negative) return null;
|
||||
trymatch.index = mymatch.index;
|
||||
return trymatch;
|
||||
}
|
||||
|
||||
if (newMatch != null) {
|
||||
if (negative) return false;
|
||||
//else
|
||||
mymatch.assignFrom(newMatch);
|
||||
return true;
|
||||
}
|
||||
else { // no match
|
||||
if (negative)
|
||||
return next(input, mymatch);
|
||||
//else
|
||||
return false;
|
||||
else {
|
||||
if (negative) return mymatch;
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -55,35 +55,37 @@ final class RETokenLookBehind extends REToken
|
|||
return 0;
|
||||
}
|
||||
|
||||
boolean match(CharIndexed input, REMatch mymatch)
|
||||
REMatch matchThis(CharIndexed input, REMatch mymatch)
|
||||
{
|
||||
int max = re.getMaximumLength();
|
||||
CharIndexed behind = input.lookBehind(mymatch.index, max);
|
||||
REMatch trymatch = (REMatch)mymatch.clone();
|
||||
REMatch trymatch1 = (REMatch)mymatch.clone();
|
||||
REMatch newMatch = null;
|
||||
int curIndex = trymatch.index + behind.length() - input.length();
|
||||
int diff = behind.length() - input.length();
|
||||
int curIndex = trymatch.index + diff;
|
||||
trymatch.index = 0;
|
||||
trymatch.offset = 0;
|
||||
RETokenMatchHereOnly stopper = new RETokenMatchHereOnly(curIndex);
|
||||
REToken re1 = (REToken) re.clone();
|
||||
re1.chain(stopper);
|
||||
if (re1.match(behind, trymatch)) {
|
||||
if (negative) return false;
|
||||
if (next(input, trymatch1))
|
||||
newMatch = trymatch1;
|
||||
if (negative) return null;
|
||||
for (int i = 0; i < trymatch.start.length; i++) {
|
||||
if (trymatch.start[i] != -1 && trymatch.end[i] != -1) {
|
||||
trymatch.start[i] -= diff;
|
||||
if (trymatch.start[i] < 0) trymatch.start[i] -= 1;
|
||||
trymatch.end[i] -= diff;
|
||||
if (trymatch.end[i] < 0) trymatch.end[i] -= 1;
|
||||
}
|
||||
}
|
||||
trymatch.index = mymatch.index;
|
||||
trymatch.offset = mymatch.offset;
|
||||
return trymatch;
|
||||
}
|
||||
|
||||
if (newMatch != null) {
|
||||
if (negative) return false;
|
||||
//else
|
||||
mymatch.assignFrom(newMatch);
|
||||
return true;
|
||||
}
|
||||
else { // no match
|
||||
if (negative)
|
||||
return next(input, mymatch);
|
||||
//else
|
||||
return false;
|
||||
else {
|
||||
if (negative) return mymatch;
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -105,8 +107,8 @@ final class RETokenLookBehind extends REToken
|
|||
this.index = index;
|
||||
}
|
||||
|
||||
boolean match(CharIndexed input, REMatch mymatch) {
|
||||
return index == mymatch.index;
|
||||
REMatch matchThis(CharIndexed input, REMatch mymatch) {
|
||||
return (index == mymatch.index ? mymatch : null);
|
||||
}
|
||||
|
||||
void dump(StringBuffer os) {}
|
||||
|
|
|
@ -105,24 +105,43 @@ final class RETokenNamedProperty extends REToken {
|
|||
return 1;
|
||||
}
|
||||
|
||||
boolean match(CharIndexed input, REMatch mymatch) {
|
||||
char ch = input.charAt(mymatch.index);
|
||||
REMatch matchThis(CharIndexed input, REMatch mymatch) {
|
||||
char ch = input.charAt(mymatch.index);
|
||||
boolean retval = matchOneChar(ch);
|
||||
if (retval) {
|
||||
++mymatch.index;
|
||||
return mymatch;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private boolean matchOneChar(char ch) {
|
||||
if (ch == CharIndexed.OUT_OF_BOUNDS)
|
||||
return false;
|
||||
|
||||
boolean retval = handler.includes(ch);
|
||||
if (insens) {
|
||||
retval = retval ||
|
||||
handler.includes(Character.toUpperCase(ch)) ||
|
||||
handler.includes(Character.toLowerCase(ch));
|
||||
handler.includes(toUpperCase(ch, unicodeAware)) ||
|
||||
handler.includes(toLowerCase(ch, unicodeAware));
|
||||
}
|
||||
|
||||
if (negate) retval = !retval;
|
||||
if (retval) {
|
||||
++mymatch.index;
|
||||
return next(input, mymatch);
|
||||
return retval;
|
||||
}
|
||||
|
||||
boolean returnsFixedLengthMatches() { return true; }
|
||||
|
||||
int findFixedLengthMatches(CharIndexed input, REMatch mymatch, int max) {
|
||||
int index = mymatch.index;
|
||||
int numRepeats = 0;
|
||||
while (true) {
|
||||
if (numRepeats >= max) break;
|
||||
char ch = input.charAt(index++);
|
||||
if (! matchOneChar(ch)) break;
|
||||
numRepeats++;
|
||||
}
|
||||
else return false;
|
||||
return numRepeats;
|
||||
}
|
||||
|
||||
void dump(StringBuffer os) {
|
||||
|
@ -246,9 +265,6 @@ final class RETokenNamedProperty extends REToken {
|
|||
|
||||
private static class POSIXHandler extends Handler {
|
||||
private RETokenPOSIX retoken;
|
||||
private REMatch mymatch = new REMatch(0,0,0);
|
||||
private char[] chars = new char[1];
|
||||
private CharIndexedCharArray ca = new CharIndexedCharArray(chars, 0);
|
||||
public POSIXHandler(String name) {
|
||||
int posixId = RETokenPOSIX.intValue(name.toLowerCase());
|
||||
if (posixId != -1)
|
||||
|
@ -257,9 +273,7 @@ final class RETokenNamedProperty extends REToken {
|
|||
throw new RuntimeException("Unknown posix ID: " + name);
|
||||
}
|
||||
public boolean includes(char c) {
|
||||
chars[0] = c;
|
||||
mymatch.index = 0;
|
||||
return retoken.match(ca, mymatch);
|
||||
return retoken.matchOneChar(c);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -42,6 +42,9 @@ import java.util.Stack;
|
|||
final class RETokenOneOf extends REToken {
|
||||
private Vector options;
|
||||
private boolean negative;
|
||||
// True if this RETokenOneOf is supposed to match only one character,
|
||||
// which is typically the case of a character class expression.
|
||||
private boolean matchesOneChar;
|
||||
|
||||
private Vector addition;
|
||||
// This Vector addition is used to store nested character classes.
|
||||
|
@ -76,12 +79,14 @@ final class RETokenOneOf extends REToken {
|
|||
this.negative = negative;
|
||||
for (int i = 0; i < optionsStr.length(); i++)
|
||||
options.addElement(new RETokenChar(subIndex,optionsStr.charAt(i),insens));
|
||||
matchesOneChar = true;
|
||||
}
|
||||
|
||||
RETokenOneOf(int subIndex, Vector options, boolean negative) {
|
||||
super(subIndex);
|
||||
this.options = options;
|
||||
this.negative = negative;
|
||||
matchesOneChar = negative;
|
||||
}
|
||||
|
||||
RETokenOneOf(int subIndex, Vector options, Vector addition, boolean negative) {
|
||||
|
@ -89,12 +94,11 @@ final class RETokenOneOf extends REToken {
|
|||
this.options = options;
|
||||
this.addition = addition;
|
||||
this.negative = negative;
|
||||
matchesOneChar = (negative || addition != null);
|
||||
}
|
||||
|
||||
int getMinimumLength() {
|
||||
// (negative || addition != null) occurs when this token originates from
|
||||
// character class expression.
|
||||
if (negative || addition != null) return 1;
|
||||
if (matchesOneChar) return 1;
|
||||
int min = Integer.MAX_VALUE;
|
||||
int x;
|
||||
for (int i=0; i < options.size(); i++) {
|
||||
|
@ -105,9 +109,7 @@ final class RETokenOneOf extends REToken {
|
|||
}
|
||||
|
||||
int getMaximumLength() {
|
||||
// (negative || addition != null) occurs when this token originates from
|
||||
// character class expression.
|
||||
if (negative || addition != null) return 1;
|
||||
if (matchesOneChar) return 1;
|
||||
int max = 0;
|
||||
int x;
|
||||
for (int i=0; i < options.size(); i++) {
|
||||
|
@ -118,6 +120,11 @@ final class RETokenOneOf extends REToken {
|
|||
}
|
||||
|
||||
boolean match(CharIndexed input, REMatch mymatch) {
|
||||
if (matchesOneChar) return matchOneChar(input, mymatch);
|
||||
else return matchOneRE(input, mymatch);
|
||||
}
|
||||
|
||||
boolean matchOneChar(CharIndexed input, REMatch mymatch) {
|
||||
REMatch tryMatch;
|
||||
boolean tryOnly;
|
||||
if (addition == null) {
|
||||
|
@ -187,41 +194,81 @@ final class RETokenOneOf extends REToken {
|
|||
}
|
||||
|
||||
private boolean matchP(CharIndexed input, REMatch mymatch, boolean tryOnly) {
|
||||
boolean stopMatchingIfSatisfied =
|
||||
(mymatch.matchFlags & REMatch.MF_FIND_ALL) == 0;
|
||||
REMatch.REMatchList newMatch = new REMatch.REMatchList();
|
||||
REToken tk;
|
||||
for (int i=0; i < options.size(); i++) {
|
||||
// In order that the backtracking can work,
|
||||
// each option must be chained to the next token.
|
||||
// But the chain method has some side effect, so
|
||||
// we use clones.
|
||||
tk = (REToken)((REToken) options.elementAt(i)).clone();
|
||||
if (! tryOnly) {
|
||||
tk.chain(this.next);
|
||||
tk.setUncle(this.uncle);
|
||||
tk.subIndex = this.subIndex;
|
||||
}
|
||||
tk = (REToken) options.elementAt(i);
|
||||
REMatch tryMatch = (REMatch) mymatch.clone();
|
||||
if (tk.match(input, tryMatch)) { // match was successful
|
||||
if (tryOnly) return true;
|
||||
newMatch.addTail(tryMatch);
|
||||
if (stopMatchingIfSatisfied) break;
|
||||
} // is a match
|
||||
} // try next option
|
||||
if (tryOnly) return false;
|
||||
|
||||
if (newMatch.head != null) {
|
||||
// set contents of mymatch equal to newMatch
|
||||
|
||||
// try each one that matched
|
||||
mymatch.assignFrom(newMatch.head);
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
if (next(input, tryMatch)) {
|
||||
mymatch.assignFrom(tryMatch);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private boolean matchOneRE(CharIndexed input, REMatch mymatch) {
|
||||
REMatch newMatch = findMatch(input, mymatch);
|
||||
if (newMatch != null) {
|
||||
mymatch.assignFrom(newMatch);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
REMatch findMatch(CharIndexed input, REMatch mymatch) {
|
||||
if (matchesOneChar) return super.findMatch(input, mymatch);
|
||||
return findMatch(input, mymatch, 0);
|
||||
}
|
||||
|
||||
REMatch backtrack(CharIndexed input, REMatch mymatch, Object param) {
|
||||
return findMatch(input, mymatch, ((Integer)param).intValue());
|
||||
}
|
||||
|
||||
private REMatch findMatch(CharIndexed input, REMatch mymatch, int optionIndex) {
|
||||
for (int i = optionIndex; i < options.size(); i++) {
|
||||
REToken tk = (REToken) options.elementAt(i);
|
||||
tk = (REToken) tk.clone();
|
||||
tk.chain(getNext());
|
||||
REMatch tryMatch = (REMatch) mymatch.clone();
|
||||
if (tryMatch.backtrackStack == null) {
|
||||
tryMatch.backtrackStack = new BacktrackStack();
|
||||
}
|
||||
boolean stackPushed = false;
|
||||
if (i + 1 < options.size()) {
|
||||
tryMatch.backtrackStack.push(new BacktrackStack.Backtrack(
|
||||
this, input, mymatch, new Integer(i + 1)));
|
||||
stackPushed = true;
|
||||
}
|
||||
boolean b = tk.match(input, tryMatch);
|
||||
if (b) {
|
||||
return tryMatch;
|
||||
}
|
||||
if (stackPushed) tryMatch.backtrackStack.pop();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
boolean returnsFixedLengthMatches() { return matchesOneChar; }
|
||||
|
||||
int findFixedLengthMatches(CharIndexed input, REMatch mymatch, int max) {
|
||||
if (!matchesOneChar)
|
||||
return super.findFixedLengthMatches(input, mymatch, max);
|
||||
int numRepeats = 0;
|
||||
REMatch m = (REMatch) mymatch.clone();
|
||||
REToken tk = (REToken) this.clone();
|
||||
tk.chain(null);
|
||||
while (true) {
|
||||
if (numRepeats >= max) break;
|
||||
m = tk.findMatch(input, m);
|
||||
if (m == null) break;
|
||||
numRepeats++;
|
||||
}
|
||||
return numRepeats;
|
||||
}
|
||||
|
||||
void dump(StringBuffer os) {
|
||||
os.append(negative ? "[^" : "(?:");
|
||||
for (int i = 0; i < options.size(); i++) {
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* gnu/regexp/RETokenPOSIX.java
|
||||
Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
|
||||
Copyright (C) 2006 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GNU Classpath.
|
||||
|
||||
|
@ -85,8 +85,17 @@ final class RETokenPOSIX extends REToken {
|
|||
return 1;
|
||||
}
|
||||
|
||||
boolean match(CharIndexed input, REMatch mymatch) {
|
||||
char ch = input.charAt(mymatch.index);
|
||||
REMatch matchThis(CharIndexed input, REMatch mymatch) {
|
||||
char ch = input.charAt(mymatch.index);
|
||||
boolean retval = matchOneChar(ch);
|
||||
if (retval) {
|
||||
++mymatch.index;
|
||||
return mymatch;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
boolean matchOneChar(char ch) {
|
||||
if (ch == CharIndexed.OUT_OF_BOUNDS)
|
||||
return false;
|
||||
|
||||
|
@ -134,11 +143,21 @@ final class RETokenPOSIX extends REToken {
|
|||
}
|
||||
|
||||
if (negated) retval = !retval;
|
||||
if (retval) {
|
||||
++mymatch.index;
|
||||
return next(input, mymatch);
|
||||
}
|
||||
else return false;
|
||||
return retval;
|
||||
}
|
||||
|
||||
boolean returnsFixedLengthMatches() { return true; }
|
||||
|
||||
int findFixedLengthMatches(CharIndexed input, REMatch mymatch, int max) {
|
||||
int index = mymatch.index;
|
||||
int numRepeats = 0;
|
||||
while (true) {
|
||||
if (numRepeats >= max) break;
|
||||
char ch = input.charAt(index++);
|
||||
if (! matchOneChar(ch)) break;
|
||||
numRepeats++;
|
||||
}
|
||||
return numRepeats;
|
||||
}
|
||||
|
||||
void dump(StringBuffer os) {
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* gnu/regexp/RETokenRange.java
|
||||
Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
|
||||
Copyright (C) 2006 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GNU Classpath.
|
||||
|
||||
|
@ -56,23 +56,41 @@ final class RETokenRange extends REToken {
|
|||
return 1;
|
||||
}
|
||||
|
||||
boolean match(CharIndexed input, REMatch mymatch) {
|
||||
REMatch matchThis(CharIndexed input, REMatch mymatch) {
|
||||
char c = input.charAt(mymatch.index);
|
||||
if (matchOneChar(c)) {
|
||||
++mymatch.index;
|
||||
return mymatch;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
boolean matchOneChar(char c) {
|
||||
if (c == CharIndexed.OUT_OF_BOUNDS) return false;
|
||||
boolean matches = (c >= lo) && (c <= hi);
|
||||
if (! matches && insens) {
|
||||
char c1 = Character.toLowerCase(c);
|
||||
char c1 = toLowerCase(c, unicodeAware);
|
||||
matches = (c1 >= lo) && (c1 <= hi);
|
||||
if (!matches) {
|
||||
c1 = Character.toUpperCase(c);
|
||||
c1 = toUpperCase(c, unicodeAware);
|
||||
matches = (c1 >= lo) && (c1 <= hi);
|
||||
}
|
||||
}
|
||||
if (matches) {
|
||||
++mymatch.index;
|
||||
return next(input, mymatch);
|
||||
return matches;
|
||||
}
|
||||
|
||||
boolean returnsFixedLengthMatches() { return true; }
|
||||
|
||||
int findFixedLengthMatches(CharIndexed input, REMatch mymatch, int max) {
|
||||
int index = mymatch.index;
|
||||
int numRepeats = 0;
|
||||
while (true) {
|
||||
if (numRepeats >= max) break;
|
||||
char ch = input.charAt(index++);
|
||||
if (! matchOneChar(ch)) break;
|
||||
numRepeats++;
|
||||
}
|
||||
return false;
|
||||
return numRepeats;
|
||||
}
|
||||
|
||||
void dump(StringBuffer os) {
|
||||
|
|
|
@ -38,20 +38,27 @@ exception statement from your version. */
|
|||
|
||||
package gnu.regexp;
|
||||
|
||||
import java.util.Vector;
|
||||
import java.util.Arrays;
|
||||
// import java.util.Vector;
|
||||
// import java.util.Stack;
|
||||
|
||||
final class RETokenRepeated extends REToken {
|
||||
private REToken token;
|
||||
private int min,max;
|
||||
private boolean stingy;
|
||||
private boolean possessive;
|
||||
private int tokenFixedLength;
|
||||
|
||||
RETokenRepeated(int subIndex, REToken token, int min, int max) {
|
||||
super(subIndex);
|
||||
this.token = token;
|
||||
this.min = min;
|
||||
this.max = max;
|
||||
if (token.returnsFixedLengthMatches()) {
|
||||
tokenFixedLength = token.getMaximumLength();
|
||||
}
|
||||
else {
|
||||
tokenFixedLength = -1;
|
||||
}
|
||||
}
|
||||
|
||||
/** Sets the minimal matching mode to true. */
|
||||
|
@ -90,68 +97,234 @@ final class RETokenRepeated extends REToken {
|
|||
return (max * tmax);
|
||||
}
|
||||
|
||||
private static REMatch findDoables(REToken tk,
|
||||
CharIndexed input, REMatch mymatch) {
|
||||
// The comment "MUST make a clone" below means that some tests
|
||||
// failed without doing clone(),
|
||||
|
||||
REMatch.REMatchList doables = new REMatch.REMatchList();
|
||||
private static class DoablesFinder {
|
||||
private REToken tk;
|
||||
private CharIndexed input;
|
||||
private REMatch rematch;
|
||||
private boolean findFirst;
|
||||
|
||||
// try next repeat at all possible positions
|
||||
for (REMatch current = mymatch;
|
||||
current != null; current = current.next) {
|
||||
REMatch recurrent = (REMatch) current.clone();
|
||||
int origin = recurrent.index;
|
||||
tk = (REToken) tk.clone();
|
||||
tk.next = tk.uncle = null;
|
||||
recurrent.matchFlags |= REMatch.MF_FIND_ALL;
|
||||
if (tk.match(input, recurrent)) {
|
||||
for (REMatch m = recurrent; m != null; m = m.next) {
|
||||
m.matchFlags &= ~REMatch.MF_FIND_ALL;
|
||||
}
|
||||
if (recurrent.index == origin) recurrent.empty = true;
|
||||
// add all items in current to doables array
|
||||
doables.addTail(recurrent);
|
||||
}
|
||||
}
|
||||
return doables.head;
|
||||
}
|
||||
|
||||
// We do need to save every possible point, but the number of clone()
|
||||
// invocations here is really a killer for performance on non-stingy
|
||||
// repeat operators. I'm open to suggestions...
|
||||
|
||||
// Hypothetical question: can you have a RE that matches 1 times,
|
||||
// 3 times, 5 times, but not 2 times or 4 times? Does having
|
||||
// the subexpression back-reference operator allow that?
|
||||
|
||||
boolean match(CharIndexed input, REMatch mymatch) {
|
||||
|
||||
boolean stopMatchingIfSatisfied =
|
||||
(mymatch.matchFlags & REMatch.MF_FIND_ALL) == 0;
|
||||
|
||||
REMatch newMatch = matchMinimum(input, mymatch);
|
||||
if (newMatch == null) return false;
|
||||
|
||||
// Array of positions we have already visited
|
||||
int[] visited = initVisited();
|
||||
for (REMatch m = newMatch; m != null; m = m.next) {
|
||||
visited = addVisited(m.index, visited);
|
||||
private DoablesFinder(REToken tk, CharIndexed input, REMatch mymatch) {
|
||||
this.tk = tk;
|
||||
this.input = input;
|
||||
this.rematch = (REMatch) mymatch.clone(); // MUST make a clone
|
||||
this.rematch.backtrackStack = new BacktrackStack();
|
||||
findFirst = true;
|
||||
}
|
||||
|
||||
int max1 = decreaseMax(max, min);
|
||||
private REMatch find() {
|
||||
int origin = rematch.index;
|
||||
REMatch rem;
|
||||
if (findFirst) {
|
||||
rem = tk.findMatch(input, rematch);
|
||||
findFirst = false;
|
||||
}
|
||||
else {
|
||||
while (true) {
|
||||
if (rematch.backtrackStack.empty()) {
|
||||
rem = null;
|
||||
break;
|
||||
}
|
||||
BacktrackStack.Backtrack bt = rematch.backtrackStack.pop();
|
||||
rem = bt.token.backtrack(bt.input, bt.match, bt.param);
|
||||
if (rem != null) break;
|
||||
}
|
||||
}
|
||||
if (rem == null) return null;
|
||||
if (rem.index == origin) rem.empty = true;
|
||||
rematch = rem;
|
||||
return (REMatch) rem.clone(); // MUST make a clone.
|
||||
}
|
||||
|
||||
newMatch = _match(input, newMatch, max1,
|
||||
stopMatchingIfSatisfied, visited);
|
||||
if (newMatch != null) {
|
||||
mymatch.assignFrom(newMatch);
|
||||
boolean noMore() {
|
||||
return rematch.backtrackStack.empty();
|
||||
}
|
||||
}
|
||||
|
||||
REMatch findMatch(CharIndexed input, REMatch mymatch) {
|
||||
if (tokenFixedLength >= 0) return findMatchFixedLength(input, mymatch);
|
||||
BacktrackStack stack = new BacktrackStack();
|
||||
stack.push(new StackedInfo(input, 0, mymatch, null, null));
|
||||
return findMatch(stack);
|
||||
}
|
||||
|
||||
REMatch backtrack(CharIndexed input, REMatch mymatch, Object param) {
|
||||
if (tokenFixedLength >= 0) return backtrackFixedLength(input, mymatch, param);
|
||||
return findMatch((BacktrackStack)param);
|
||||
}
|
||||
|
||||
private static class StackedInfo extends BacktrackStack.Backtrack {
|
||||
int numRepeats;
|
||||
int[] visited;
|
||||
DoablesFinder finder;
|
||||
StackedInfo(CharIndexed input, int numRepeats, REMatch match,
|
||||
int[] visited, DoablesFinder finder) {
|
||||
super(null, input, match, null);
|
||||
this.numRepeats = numRepeats;
|
||||
this.visited = visited;
|
||||
this.finder = finder;
|
||||
}
|
||||
}
|
||||
|
||||
private REMatch findMatch(BacktrackStack stack) {
|
||||
// Avoid using recursive calls.
|
||||
MAIN_LOOP:
|
||||
while (true) {
|
||||
|
||||
if (stack.empty()) return null;
|
||||
StackedInfo si = (StackedInfo)(stack.peek());
|
||||
CharIndexed input = si.input;
|
||||
int numRepeats = si.numRepeats;
|
||||
REMatch mymatch = si.match;
|
||||
int[] visited = si.visited;
|
||||
DoablesFinder finder = si.finder;
|
||||
|
||||
if (mymatch.backtrackStack == null)
|
||||
mymatch.backtrackStack = new BacktrackStack();
|
||||
|
||||
if (numRepeats >= max) {
|
||||
stack.pop();
|
||||
REMatch m1 = matchRest(input, mymatch);
|
||||
if (m1 != null) {
|
||||
if (! stack.empty()) {
|
||||
m1.backtrackStack.push(new BacktrackStack.Backtrack(
|
||||
this, input, mymatch, stack));
|
||||
}
|
||||
return m1;
|
||||
}
|
||||
if (stingy) {
|
||||
continue MAIN_LOOP;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
if (finder == null) {
|
||||
finder = new DoablesFinder(token, input, mymatch);
|
||||
si.finder = finder;
|
||||
}
|
||||
|
||||
if (numRepeats < min) {
|
||||
while (true) {
|
||||
REMatch doable = finder.find();
|
||||
if (doable == null) {
|
||||
if (stack.empty()) return null;
|
||||
stack.pop();
|
||||
continue MAIN_LOOP;
|
||||
}
|
||||
if (finder.noMore()) stack.pop();
|
||||
int newNumRepeats = (doable.empty ? min : numRepeats + 1);
|
||||
stack.push(new StackedInfo(
|
||||
input, newNumRepeats, doable, visited, null));
|
||||
continue MAIN_LOOP;
|
||||
}
|
||||
}
|
||||
|
||||
if (visited == null) visited = initVisited();
|
||||
|
||||
if (stingy) {
|
||||
REMatch nextMatch = finder.find();
|
||||
if (nextMatch != null && !nextMatch.empty) {
|
||||
stack.push(new StackedInfo(
|
||||
input, numRepeats + 1, nextMatch, visited, null));
|
||||
}
|
||||
else {
|
||||
stack.pop();
|
||||
}
|
||||
REMatch m1 = matchRest(input, mymatch);
|
||||
if (m1 != null) {
|
||||
if (!stack.empty()) {
|
||||
m1.backtrackStack.push(new BacktrackStack.Backtrack(
|
||||
this, input, mymatch, stack));
|
||||
}
|
||||
return m1;
|
||||
}
|
||||
else {
|
||||
continue MAIN_LOOP;
|
||||
}
|
||||
}
|
||||
|
||||
visited = addVisited(mymatch.index, visited);
|
||||
|
||||
DO_THIS:
|
||||
do {
|
||||
|
||||
boolean emptyMatchFound = false;
|
||||
|
||||
DO_ONE_DOABLE:
|
||||
while (true) {
|
||||
|
||||
REMatch doable = finder.find();
|
||||
if (doable == null) {
|
||||
break DO_THIS;
|
||||
}
|
||||
if (doable.empty) emptyMatchFound = true;
|
||||
|
||||
if (!emptyMatchFound) {
|
||||
int n = doable.index;
|
||||
if (! visitedContains(n, visited)) {
|
||||
visited = addVisited(n, visited);
|
||||
}
|
||||
else {
|
||||
continue DO_ONE_DOABLE;
|
||||
}
|
||||
stack.push(new StackedInfo(
|
||||
input, numRepeats + 1, doable, visited, null));
|
||||
REMatch m1 = findMatch(stack);
|
||||
if (possessive) {
|
||||
return m1;
|
||||
}
|
||||
if (m1 != null) {
|
||||
m1.backtrackStack.push(new BacktrackStack.Backtrack(
|
||||
this, input, mymatch, stack));
|
||||
return m1;
|
||||
}
|
||||
}
|
||||
else {
|
||||
REMatch m1 = matchRest(input, doable);
|
||||
if (possessive) {
|
||||
return m1;
|
||||
}
|
||||
if (m1 != null) {
|
||||
if (! stack.empty()) {
|
||||
m1.backtrackStack.push(new BacktrackStack.Backtrack(
|
||||
this, input, mymatch, stack));
|
||||
}
|
||||
return m1;
|
||||
}
|
||||
}
|
||||
|
||||
} // DO_ONE_DOABLE
|
||||
|
||||
} while (false); // DO_THIS only once;
|
||||
|
||||
if (!stack.empty()) {
|
||||
stack.pop();
|
||||
}
|
||||
if (possessive) {
|
||||
stack.clear();
|
||||
}
|
||||
REMatch m1 = matchRest(input, mymatch);
|
||||
if (m1 != null) {
|
||||
if (! stack.empty()) {
|
||||
m1.backtrackStack.push(new BacktrackStack.Backtrack(
|
||||
this, input, mymatch, stack));
|
||||
}
|
||||
return m1;
|
||||
}
|
||||
|
||||
} // MAIN_LOOP
|
||||
}
|
||||
|
||||
boolean match(CharIndexed input, REMatch mymatch) {
|
||||
REMatch m1 = findMatch(input, mymatch);
|
||||
if (m1 != null) {
|
||||
mymatch.assignFrom(m1);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private static int decreaseMax(int m, int n) {
|
||||
if (m == Integer.MAX_VALUE) return m;
|
||||
return m - n;
|
||||
}
|
||||
}
|
||||
|
||||
// Array visited is an array of character positions we have already
|
||||
// visited. visited[0] is used to store the effective length of the
|
||||
|
@ -183,135 +356,56 @@ final class RETokenRepeated extends REToken {
|
|||
return visited;
|
||||
}
|
||||
|
||||
private REMatch _match(CharIndexed input, REMatch mymatch,
|
||||
int max1, boolean stopMatchingIfSatisfied,
|
||||
int[] visited) {
|
||||
|
||||
if (max1 == 0) {
|
||||
return matchRest(input, mymatch);
|
||||
}
|
||||
max1 = decreaseMax(max1, 1);
|
||||
|
||||
REMatch.REMatchList allResults = new REMatch.REMatchList();
|
||||
|
||||
// Depth-first search
|
||||
|
||||
for (REMatch cur = mymatch; cur != null; cur = cur.next) {
|
||||
|
||||
REMatch cur1 = (REMatch) cur.clone();
|
||||
|
||||
if (stingy) {
|
||||
REMatch results = matchRest(input, cur1);
|
||||
if (results != null) {
|
||||
if (stopMatchingIfSatisfied) {
|
||||
return results;
|
||||
}
|
||||
allResults.addTail(results);
|
||||
}
|
||||
}
|
||||
|
||||
DO_THIS:
|
||||
do {
|
||||
|
||||
boolean emptyMatchFound = false;
|
||||
REMatch doables = findDoables(token, input, cur1);
|
||||
if (doables == null) break DO_THIS;
|
||||
if (doables.empty) emptyMatchFound = true;
|
||||
|
||||
if (!emptyMatchFound) {
|
||||
REMatch.REMatchList list = new REMatch.REMatchList();
|
||||
for (REMatch m = doables; m != null; m = m.next) {
|
||||
REMatch m1 = (REMatch) m.clone();
|
||||
int n = m1.index;
|
||||
if (! visitedContains(n, visited)) {
|
||||
visited = addVisited(n, visited);
|
||||
list.addTail(m1);
|
||||
}
|
||||
}
|
||||
if (list.head == null) break DO_THIS;
|
||||
doables = list.head;
|
||||
}
|
||||
|
||||
for (REMatch m = doables; m != null; m = m.next) {
|
||||
if (! emptyMatchFound) {
|
||||
REMatch m1 = _match(input, m, max1,
|
||||
stopMatchingIfSatisfied, visited);
|
||||
if (possessive) return m1;
|
||||
if (m1 != null) {
|
||||
if (stopMatchingIfSatisfied) {
|
||||
return m1;
|
||||
}
|
||||
allResults.addTail(m1);
|
||||
}
|
||||
}
|
||||
else {
|
||||
REMatch m1 = matchRest(input, m);
|
||||
if (m1 != null) {
|
||||
if (stopMatchingIfSatisfied) {
|
||||
return m1;
|
||||
}
|
||||
allResults.addTail(m1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} while (false); // DO_THIS only once;
|
||||
|
||||
// This point itself is a candidate.
|
||||
if (!stingy) {
|
||||
REMatch m2 = matchRest(input, cur1);
|
||||
if (m2 != null) {
|
||||
if (stopMatchingIfSatisfied) {
|
||||
return m2;
|
||||
}
|
||||
allResults.addTail(m2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return allResults.head;
|
||||
}
|
||||
|
||||
private REMatch matchMinimum(CharIndexed input, final REMatch mymatch) {
|
||||
// Possible positions for the next repeat to match at
|
||||
REMatch newMatch = mymatch;
|
||||
|
||||
// number of times we've matched so far
|
||||
int numRepeats = 0;
|
||||
|
||||
while (numRepeats < min) {
|
||||
REMatch doables = findDoables(token, input, newMatch);
|
||||
|
||||
// if none of the possibilities worked out,
|
||||
// it means that minimum number of repeats could not be found.
|
||||
if (doables == null) return null;
|
||||
|
||||
// reassign where the next repeat can match
|
||||
newMatch = doables;
|
||||
|
||||
// increment how many repeats we've successfully found
|
||||
++numRepeats;
|
||||
|
||||
if (newMatch.empty) break;
|
||||
}
|
||||
return newMatch;
|
||||
}
|
||||
|
||||
private REMatch matchRest(CharIndexed input, final REMatch newMatch) {
|
||||
REMatch current, single;
|
||||
REMatch.REMatchList doneIndex = new REMatch.REMatchList();
|
||||
// Test all possible matches for this number of repeats
|
||||
for (current = newMatch; current != null; current = current.next) {
|
||||
// clone() separates a single match from the chain
|
||||
single = (REMatch) current.clone();
|
||||
if (next(input, single)) {
|
||||
// chain results to doneIndex
|
||||
doneIndex.addTail(single);
|
||||
}
|
||||
if (next(input, newMatch)) {
|
||||
return newMatch;
|
||||
}
|
||||
return doneIndex.head;
|
||||
return null;
|
||||
}
|
||||
|
||||
private REMatch findMatchFixedLength(CharIndexed input, REMatch mymatch) {
|
||||
if (mymatch.backtrackStack == null)
|
||||
mymatch.backtrackStack = new BacktrackStack();
|
||||
int numRepeats = token.findFixedLengthMatches(input, (REMatch)mymatch.clone(), max);
|
||||
if (numRepeats == Integer.MAX_VALUE) numRepeats = min;
|
||||
int count = numRepeats - min + 1;
|
||||
if (count <= 0) return null;
|
||||
int index = 0;
|
||||
if (!stingy) index = mymatch.index + (tokenFixedLength * numRepeats);
|
||||
else index = mymatch.index + (tokenFixedLength * min);
|
||||
return findMatchFixedLength(input, mymatch, index, count);
|
||||
}
|
||||
|
||||
private REMatch backtrackFixedLength(CharIndexed input, REMatch mymatch,
|
||||
Object param) {
|
||||
int[] params = (int[])param;
|
||||
int index = params[0];
|
||||
int count = params[1];
|
||||
return findMatchFixedLength(input, mymatch, index, count);
|
||||
}
|
||||
|
||||
private REMatch findMatchFixedLength(CharIndexed input, REMatch mymatch,
|
||||
int index, int count) {
|
||||
REMatch tryMatch = (REMatch) mymatch.clone();
|
||||
while (true) {
|
||||
tryMatch.index = index;
|
||||
REMatch m = matchRest(input, tryMatch);
|
||||
count--;
|
||||
if (stingy) index += tokenFixedLength;
|
||||
else index -= tokenFixedLength;
|
||||
if (possessive) return m;
|
||||
if (m != null) {
|
||||
if (count > 0) {
|
||||
m.backtrackStack.push(new BacktrackStack.Backtrack(
|
||||
this, input, mymatch,
|
||||
new int[] {index, count}));
|
||||
}
|
||||
return m;
|
||||
}
|
||||
if (count <= 0) return null;
|
||||
}
|
||||
}
|
||||
|
||||
void dump(StringBuffer os) {
|
||||
os.append("(?:");
|
||||
token.dumpAll(os);
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* gnu/regexp/RETokenStart.java
|
||||
Copyright (C) 1998-2001, 2004 Free Software Foundation, Inc.
|
||||
Copyright (C) 2006 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GNU Classpath.
|
||||
|
||||
|
@ -49,7 +49,7 @@ class RETokenStart extends REToken {
|
|||
return 0;
|
||||
}
|
||||
|
||||
boolean match(CharIndexed input, REMatch mymatch) {
|
||||
REMatch matchThis(CharIndexed input, REMatch mymatch) {
|
||||
// charAt(index-n) may be unknown on a Reader/InputStream. FIXME
|
||||
// Match after a newline if in multiline mode
|
||||
|
||||
|
@ -70,19 +70,26 @@ class RETokenStart extends REToken {
|
|||
ch = input.charAt(mymatch.index - len + i);
|
||||
} while (i < len);
|
||||
|
||||
if (found) return next(input, mymatch);
|
||||
if (found) return mymatch;
|
||||
}
|
||||
}
|
||||
|
||||
// Don't match at all if REG_NOTBOL is set.
|
||||
if ((mymatch.eflags & RE.REG_NOTBOL) > 0) return false;
|
||||
if ((mymatch.eflags & RE.REG_NOTBOL) > 0) return null;
|
||||
|
||||
if ((mymatch.eflags & RE.REG_ANCHORINDEX) > 0)
|
||||
return (mymatch.anchor == mymatch.offset) ?
|
||||
next(input, mymatch) : false;
|
||||
mymatch : null;
|
||||
else
|
||||
return ((mymatch.index == 0) && (mymatch.offset == 0)) ?
|
||||
next(input, mymatch) : false;
|
||||
mymatch : null;
|
||||
}
|
||||
|
||||
boolean returnsFixedLengthmatches() { return true; }
|
||||
|
||||
int findFixedLengthMatches(CharIndexed input, REMatch mymatch, int max) {
|
||||
if (matchThis(input, mymatch) != null) return max;
|
||||
else return 0;
|
||||
}
|
||||
|
||||
void dump(StringBuffer os) {
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/* gnu/regexp/RETokenWordBoundary.java
|
||||
Copyright (C) 2001, 2004 Free Software Foundation, Inc.
|
||||
Copyright (C) 2006 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GNU Classpath.
|
||||
|
||||
|
@ -58,7 +58,7 @@ final class RETokenWordBoundary extends REToken {
|
|||
}
|
||||
|
||||
|
||||
boolean match(CharIndexed input, REMatch mymatch) {
|
||||
REMatch matchThis(CharIndexed input, REMatch mymatch) {
|
||||
// Word boundary means input[index-1] was a word character
|
||||
// and input[index] is not, or input[index] is a word character
|
||||
// and input[index-1] was not
|
||||
|
@ -94,7 +94,14 @@ final class RETokenWordBoundary extends REToken {
|
|||
|
||||
if (negated) doNext = !doNext;
|
||||
|
||||
return (doNext ? next(input, mymatch) : false);
|
||||
return (doNext ? mymatch : null);
|
||||
}
|
||||
|
||||
boolean returnsFixedLengthMatches() { return true; }
|
||||
|
||||
int findFixedLengthMatches(CharIndexed input, REMatch mymatch, int max) {
|
||||
if(matchThis(input, mymatch) != null) return max;
|
||||
else return 0;
|
||||
}
|
||||
|
||||
void dump(StringBuffer os) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue