
Imported GNU Classpath 0.90 * scripts/makemake.tcl: LocaleData.java moved to gnu/java/locale. * sources.am: Regenerated. * gcj/javaprims.h: Regenerated. * Makefile.in: Regenerated. * gcj/Makefile.in: Regenerated. * include/Makefile.in: Regenerated. * testsuite/Makefile.in: Regenerated. * gnu/java/lang/VMInstrumentationImpl.java: New override. * gnu/java/net/local/LocalSocketImpl.java: Likewise. * gnu/classpath/jdwp/VMMethod.java: Likewise. * gnu/classpath/jdwp/VMVirtualMachine.java: Update to latest interface. * java/lang/Thread.java: Add UncaughtExceptionHandler. * java/lang/reflect/Method.java: Implements GenericDeclaration and isSynthetic(), * java/lang/reflect/Field.java: Likewise. * java/lang/reflect/Constructor.java * java/lang/Class.java: Implements Type, GenericDeclaration, getSimpleName() and getEnclosing*() methods. * java/lang/Class.h: Add new public methods. * java/lang/Math.java: Add signum(), ulp() and log10(). * java/lang/natMath.cc (log10): New function. * java/security/VMSecureRandom.java: New override. * java/util/logging/Logger.java: Updated to latest classpath version. * java/util/logging/LogManager.java: New override. From-SVN: r113887
5273 lines
150 KiB
Java
5273 lines
150 KiB
Java
/* XMLParser.java --
|
|
Copyright (C) 2005 Free Software Foundation, Inc.
|
|
|
|
This file is part of GNU Classpath.
|
|
|
|
GNU Classpath is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 2, or (at your option)
|
|
any later version.
|
|
|
|
GNU Classpath is distributed in the hope that it will be useful, but
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with GNU Classpath; see the file COPYING. If not, write to the
|
|
Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
02110-1301 USA.
|
|
|
|
Linking this library statically or dynamically with other modules is
|
|
making a combined work based on this library. Thus, the terms and
|
|
conditions of the GNU General Public License cover the whole
|
|
combination.
|
|
|
|
As a special exception, the copyright holders of this library give you
|
|
permission to link this library with independent modules to produce an
|
|
executable, regardless of the license terms of these independent
|
|
modules, and to copy and distribute the resulting executable under
|
|
terms of your choice, provided that you also meet, for each linked
|
|
independent module, the terms and conditions of the license of that
|
|
module. An independent module is a module which is not derived from
|
|
or based on this library. If you modify this library, you may extend
|
|
this exception to your version of the library, but you are not
|
|
obligated to do so. If you do not wish to do so, delete this
|
|
exception statement from your version.
|
|
|
|
Partly derived from code which carried the following notice:
|
|
|
|
Copyright (c) 1997, 1998 by Microstar Software Ltd.
|
|
|
|
AElfred is free for both commercial and non-commercial use and
|
|
redistribution, provided that Microstar's copyright and disclaimer are
|
|
retained intact. You are free to modify AElfred for your own use and
|
|
to redistribute AElfred with your modifications, provided that the
|
|
modifications are clearly documented.
|
|
|
|
This program is distributed in the hope that it will be useful, but
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
merchantability or fitness for a particular purpose. Please use it AT
|
|
YOUR OWN RISK.
|
|
*/
|
|
|
|
package gnu.xml.stream;
|
|
|
|
import java.io.BufferedInputStream;
|
|
import java.io.EOFException;
|
|
import java.io.File;
|
|
import java.io.InputStream;
|
|
import java.io.InputStreamReader;
|
|
import java.io.IOException;
|
|
import java.io.Reader;
|
|
import java.io.StringReader;
|
|
import java.io.UnsupportedEncodingException;
|
|
import java.net.MalformedURLException;
|
|
import java.net.URL;
|
|
import java.util.ArrayList;
|
|
import java.util.Collections;
|
|
import java.util.HashSet;
|
|
import java.util.Iterator;
|
|
import java.util.LinkedHashMap;
|
|
import java.util.LinkedList;
|
|
import java.util.Map;
|
|
import java.util.NoSuchElementException;
|
|
import java.util.StringTokenizer;
|
|
|
|
import javax.xml.XMLConstants;
|
|
import javax.xml.namespace.NamespaceContext;
|
|
import javax.xml.namespace.QName;
|
|
import javax.xml.stream.Location;
|
|
import javax.xml.stream.XMLInputFactory;
|
|
import javax.xml.stream.XMLReporter;
|
|
import javax.xml.stream.XMLResolver;
|
|
import javax.xml.stream.XMLStreamConstants;
|
|
import javax.xml.stream.XMLStreamException;
|
|
import javax.xml.stream.XMLStreamReader;
|
|
|
|
import gnu.java.net.CRLFInputStream;
|
|
|
|
/**
|
|
* An XML parser.
|
|
* This parser supports the following additional StAX properties:
|
|
* <table>
|
|
* <tr><td>gnu.xml.stream.stringInterning</td>
|
|
* <td>Boolean</td>
|
|
* <td>Indicates whether markup strings will be interned</td></tr>
|
|
* <tr><td>gnu.xml.stream.xmlBase</td>
|
|
* <td>Boolean</td>
|
|
* <td>Indicates whether XML Base processing will be performed</td></tr>
|
|
* <tr><td>gnu.xml.stream.baseURI</td>
|
|
* <td>String</td>
|
|
* <td>Returns the base URI of the current event</td></tr>
|
|
* </table>
|
|
*
|
|
* @see http://www.w3.org/TR/REC-xml/
|
|
* @see http://www.w3.org/TR/xml11/
|
|
* @see http://www.w3.org/TR/REC-xml-names
|
|
* @see http://www.w3.org/TR/xml-names11
|
|
* @see http://www.w3.org/TR/xmlbase/
|
|
*
|
|
* @author <a href='mailto:dog@gnu.org'>Chris Burdess</a>
|
|
*/
|
|
public class XMLParser
|
|
implements XMLStreamReader, NamespaceContext
|
|
{
|
|
|
|
// -- parser state machine states --
|
|
private static final int INIT = 0; // start state
|
|
private static final int PROLOG = 1; // in prolog
|
|
private static final int CONTENT = 2; // in content
|
|
private static final int EMPTY_ELEMENT = 3; // empty element state
|
|
private static final int MISC = 4; // in Misc (after root element)
|
|
|
|
// -- parameters for parsing literals --
|
|
private final static int LIT_ENTITY_REF = 2;
|
|
private final static int LIT_NORMALIZE = 4;
|
|
private final static int LIT_ATTRIBUTE = 8;
|
|
private final static int LIT_DISABLE_PE = 16;
|
|
private final static int LIT_DISABLE_CREF = 32;
|
|
private final static int LIT_DISABLE_EREF = 64;
|
|
private final static int LIT_PUBID = 256;
|
|
|
|
// -- types of attribute values --
|
|
final static int ATTRIBUTE_DEFAULT_UNDECLARED = 30;
|
|
final static int ATTRIBUTE_DEFAULT_SPECIFIED = 31;
|
|
final static int ATTRIBUTE_DEFAULT_IMPLIED = 32;
|
|
final static int ATTRIBUTE_DEFAULT_REQUIRED = 33;
|
|
final static int ATTRIBUTE_DEFAULT_FIXED = 34;
|
|
|
|
// -- additional event types --
|
|
final static int START_ENTITY = 50;
|
|
final static int END_ENTITY = 51;
|
|
|
|
/**
|
|
* The current input.
|
|
*/
|
|
private Input input;
|
|
|
|
/**
|
|
* Stack of inputs representing XML general entities.
|
|
* The input representing the XML input stream or reader is always the
|
|
* first element in this stack.
|
|
*/
|
|
private LinkedList inputStack = new LinkedList();
|
|
|
|
/**
|
|
* Stack of start-entity events to be reported.
|
|
*/
|
|
private LinkedList startEntityStack = new LinkedList();
|
|
|
|
/**
|
|
* Stack of end-entity events to be reported.
|
|
*/
|
|
private LinkedList endEntityStack = new LinkedList();
|
|
|
|
/**
|
|
* Current parser state within the main state machine.
|
|
*/
|
|
private int state = INIT;
|
|
|
|
/**
|
|
* The (type of the) current event.
|
|
*/
|
|
private int event;
|
|
|
|
/**
|
|
* Whether we are looking ahead. Used by hasNext.
|
|
*/
|
|
private boolean lookahead;
|
|
|
|
/**
|
|
* The element name stack. The first element in this stack will be the
|
|
* root element.
|
|
*/
|
|
private LinkedList stack = new LinkedList();
|
|
|
|
/**
|
|
* Stack of namespace contexts. These are maps specifying prefix-to-URI
|
|
* mappings. The first element in this stack is the most recent namespace
|
|
* context (i.e. the other way around from the element name stack).
|
|
*/
|
|
private LinkedList namespaces = new LinkedList();
|
|
|
|
/**
|
|
* The base-URI stack. This holds the base URI context for each element.
|
|
* The first element in this stack is the most recent context (i.e. the
|
|
* other way around from the element name stack).
|
|
*/
|
|
private LinkedList bases = new LinkedList();
|
|
|
|
/**
|
|
* The list of attributes for the current element, in the order defined in
|
|
* the XML stream.
|
|
*/
|
|
private ArrayList attrs = new ArrayList();
|
|
|
|
/**
|
|
* Buffer for text and character data.
|
|
*/
|
|
private StringBuffer buf = new StringBuffer();
|
|
|
|
/**
|
|
* Buffer for NMTOKEN strings (markup).
|
|
*/
|
|
private StringBuffer nmtokenBuf = new StringBuffer();
|
|
|
|
/**
|
|
* Buffer for string literals. (e.g. attribute values)
|
|
*/
|
|
private StringBuffer literalBuf = new StringBuffer();
|
|
|
|
/**
|
|
* Temporary Unicode character buffer used during character data reads.
|
|
*/
|
|
private int[] tmpBuf = new int[1024];
|
|
|
|
/**
|
|
* The element content model for the current element.
|
|
*/
|
|
private ContentModel currentContentModel;
|
|
|
|
/**
|
|
* The validation stack. This holds lists of the elements seen for each
|
|
* element, in order to determine whether the names and order of these
|
|
* elements match the content model for the element. The last entry in
|
|
* this stack represents the current element.
|
|
*/
|
|
private LinkedList validationStack;
|
|
|
|
/**
|
|
* These sets contain the IDs and the IDREFs seen in the document, to
|
|
* ensure that IDs are unique and that each IDREF refers to an ID in the
|
|
* document.
|
|
*/
|
|
private HashSet ids, idrefs;
|
|
|
|
/**
|
|
* The target and data associated with the current processing instruction
|
|
* event.
|
|
*/
|
|
private String piTarget, piData;
|
|
|
|
/**
|
|
* The XML version declared in the XML declaration.
|
|
*/
|
|
private String xmlVersion;
|
|
|
|
/**
|
|
* The encoding declared in the XML declaration.
|
|
*/
|
|
private String xmlEncoding;
|
|
|
|
/**
|
|
* The standalone value declared in the XML declaration.
|
|
*/
|
|
private Boolean xmlStandalone;
|
|
|
|
/**
|
|
* The document type definition.
|
|
*/
|
|
Doctype doctype;
|
|
|
|
/**
|
|
* State variables for determining parameter-entity expansion.
|
|
*/
|
|
private boolean expandPE, peIsError;
|
|
|
|
/**
|
|
* Whether this is a validating parser.
|
|
*/
|
|
private final boolean validating;
|
|
|
|
/**
|
|
* Whether strings representing markup will be interned.
|
|
*/
|
|
private final boolean stringInterning;
|
|
|
|
/**
|
|
* If true, CDATA sections will be merged with adjacent text nodes into a
|
|
* single event.
|
|
*/
|
|
private final boolean coalescing;
|
|
|
|
/**
|
|
* Whether to replace general entity references with their replacement
|
|
* text automatically during parsing.
|
|
* Otherwise entity-reference events will be issued.
|
|
*/
|
|
private final boolean replaceERefs;
|
|
|
|
/**
|
|
* Whether to support external entities.
|
|
*/
|
|
private final boolean externalEntities;
|
|
|
|
/**
|
|
* Whether to support DTDs.
|
|
*/
|
|
private final boolean supportDTD;
|
|
|
|
/**
|
|
* Whether to support XML namespaces. If true, namespace information will
|
|
* be available. Otherwise namespaces will simply be reported as ordinary
|
|
* attributes.
|
|
*/
|
|
private final boolean namespaceAware;
|
|
|
|
/**
|
|
* Whether to support XML Base. If true, URIs specified in xml:base
|
|
* attributes will be honoured when resolving external entities.
|
|
*/
|
|
private final boolean baseAware;
|
|
|
|
/**
|
|
* Whether to report extended event types (START_ENTITY and END_ENTITY)
|
|
* in addition to the standard event types. Used by the SAX parser.
|
|
*/
|
|
private final boolean extendedEventTypes;
|
|
|
|
/**
|
|
* The reporter to receive parsing warnings.
|
|
*/
|
|
final XMLReporter reporter;
|
|
|
|
/**
|
|
* Callback interface for resolving external entities.
|
|
*/
|
|
final XMLResolver resolver;
|
|
|
|
// -- Constants for testing the next kind of markup event --
|
|
private static final String TEST_START_ELEMENT = "<";
|
|
private static final String TEST_END_ELEMENT = "</";
|
|
private static final String TEST_COMMENT = "<!--";
|
|
private static final String TEST_PI = "<?";
|
|
private static final String TEST_CDATA = "<![CDATA[";
|
|
private static final String TEST_XML_DECL = "<?xml";
|
|
private static final String TEST_DOCTYPE_DECL = "<!DOCTYPE";
|
|
private static final String TEST_ELEMENT_DECL = "<!ELEMENT";
|
|
private static final String TEST_ATTLIST_DECL = "<!ATTLIST";
|
|
private static final String TEST_ENTITY_DECL = "<!ENTITY";
|
|
private static final String TEST_NOTATION_DECL = "<!NOTATION";
|
|
private static final String TEST_KET = ">";
|
|
private static final String TEST_END_COMMENT = "--";
|
|
private static final String TEST_END_PI = "?>";
|
|
private static final String TEST_END_CDATA = "]]>";
|
|
|
|
/**
|
|
* The general entities predefined by the XML specification.
|
|
*/
|
|
private static final LinkedHashMap PREDEFINED_ENTITIES = new LinkedHashMap();
|
|
static
|
|
{
|
|
PREDEFINED_ENTITIES.put("amp", "&");
|
|
PREDEFINED_ENTITIES.put("lt", "<");
|
|
PREDEFINED_ENTITIES.put("gt", ">");
|
|
PREDEFINED_ENTITIES.put("apos", "'");
|
|
PREDEFINED_ENTITIES.put("quot", "\"");
|
|
}
|
|
|
|
/**
|
|
* Creates a new XML parser for the given input stream.
|
|
* This constructor should be used where possible, as it allows the
|
|
* encoding of the XML data to be correctly determined from the stream.
|
|
* @param in the input stream
|
|
* @param systemId the URL from which the input stream was retrieved
|
|
* (necessary if there are external entities to be resolved)
|
|
* @param validating if the parser is to be a validating parser
|
|
* @param namespaceAware if the parser should support XML Namespaces
|
|
* @param coalescing if CDATA sections should be merged into adjacent text
|
|
* nodes
|
|
* @param replaceERefs if entity references should be automatically
|
|
* replaced by their replacement text (otherwise they will be reported as
|
|
* entity-reference events)
|
|
* @param externalEntities if external entities should be loaded
|
|
* @param supportDTD if support for the XML DTD should be enabled
|
|
* @param baseAware if the parser should support XML Base to resolve
|
|
* external entities
|
|
* @param stringInterning whether strings will be interned during parsing
|
|
* @param reporter the reporter to receive warnings during processing
|
|
* @param resolver the callback interface used to resolve external
|
|
* entities
|
|
*/
|
|
public XMLParser(InputStream in, String systemId,
|
|
boolean validating,
|
|
boolean namespaceAware,
|
|
boolean coalescing,
|
|
boolean replaceERefs,
|
|
boolean externalEntities,
|
|
boolean supportDTD,
|
|
boolean baseAware,
|
|
boolean stringInterning,
|
|
boolean extendedEventTypes,
|
|
XMLReporter reporter,
|
|
XMLResolver resolver)
|
|
{
|
|
this.validating = validating;
|
|
this.namespaceAware = namespaceAware;
|
|
this.coalescing = coalescing;
|
|
this.replaceERefs = replaceERefs;
|
|
this.externalEntities = externalEntities;
|
|
this.supportDTD = supportDTD;
|
|
this.baseAware = baseAware;
|
|
this.stringInterning = stringInterning;
|
|
this.extendedEventTypes = extendedEventTypes;
|
|
this.reporter = reporter;
|
|
this.resolver = resolver;
|
|
if (validating)
|
|
{
|
|
validationStack = new LinkedList();
|
|
ids = new HashSet();
|
|
idrefs = new HashSet();
|
|
}
|
|
pushInput(new Input(in, null, null, systemId, null, null, false, true));
|
|
}
|
|
|
|
/**
|
|
* Creates a new XML parser for the given character stream.
|
|
* This constructor is only available for compatibility with the JAXP
|
|
* APIs, which permit XML to be parsed from a character stream. Because
|
|
* the encoding specified by the character stream may conflict with that
|
|
* specified in the XML declaration, this method should be avoided where
|
|
* possible.
|
|
* @param in the input stream
|
|
* @param systemId the URL from which the input stream was retrieved
|
|
* (necessary if there are external entities to be resolved)
|
|
* @param validating if the parser is to be a validating parser
|
|
* @param namespaceAware if the parser should support XML Namespaces
|
|
* @param coalescing if CDATA sections should be merged into adjacent text
|
|
* nodes
|
|
* @param replaceERefs if entity references should be automatically
|
|
* replaced by their replacement text (otherwise they will be reported as
|
|
* entity-reference events)
|
|
* @param externalEntities if external entities should be loaded
|
|
* @param supportDTD if support for the XML DTD should be enabled
|
|
* @param baseAware if the parser should support XML Base to resolve
|
|
* external entities
|
|
* @param stringInterning whether strings will be interned during parsing
|
|
* @param reporter the reporter to receive warnings during processing
|
|
* @param resolver the callback interface used to resolve external
|
|
* entities
|
|
*/
|
|
public XMLParser(Reader reader, String systemId,
|
|
boolean validating,
|
|
boolean namespaceAware,
|
|
boolean coalescing,
|
|
boolean replaceERefs,
|
|
boolean externalEntities,
|
|
boolean supportDTD,
|
|
boolean baseAware,
|
|
boolean stringInterning,
|
|
boolean extendedEventTypes,
|
|
XMLReporter reporter,
|
|
XMLResolver resolver)
|
|
{
|
|
this.validating = validating;
|
|
this.namespaceAware = namespaceAware;
|
|
this.coalescing = coalescing;
|
|
this.replaceERefs = replaceERefs;
|
|
this.externalEntities = externalEntities;
|
|
this.supportDTD = supportDTD;
|
|
this.baseAware = baseAware;
|
|
this.stringInterning = stringInterning;
|
|
this.extendedEventTypes = extendedEventTypes;
|
|
this.reporter = reporter;
|
|
this.resolver = resolver;
|
|
if (validating)
|
|
{
|
|
validationStack = new LinkedList();
|
|
ids = new HashSet();
|
|
idrefs = new HashSet();
|
|
}
|
|
pushInput(new Input(null, reader, null, systemId, null, null, false, true));
|
|
}
|
|
|
|
// -- NamespaceContext --
|
|
|
|
public String getNamespaceURI(String prefix)
|
|
{
|
|
if (XMLConstants.XML_NS_PREFIX.equals(prefix))
|
|
return XMLConstants.XML_NS_URI;
|
|
if (XMLConstants.XMLNS_ATTRIBUTE.equals(prefix))
|
|
return XMLConstants.XMLNS_ATTRIBUTE_NS_URI;
|
|
for (Iterator i = namespaces.iterator(); i.hasNext(); )
|
|
{
|
|
LinkedHashMap ctx = (LinkedHashMap) i.next();
|
|
String namespaceURI = (String) ctx.get(prefix);
|
|
if (namespaceURI != null)
|
|
return namespaceURI;
|
|
}
|
|
return null;
|
|
}
|
|
|
|
public String getPrefix(String namespaceURI)
|
|
{
|
|
if (XMLConstants.XML_NS_URI.equals(namespaceURI))
|
|
return XMLConstants.XML_NS_PREFIX;
|
|
if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(namespaceURI))
|
|
return XMLConstants.XMLNS_ATTRIBUTE;
|
|
for (Iterator i = namespaces.iterator(); i.hasNext(); )
|
|
{
|
|
LinkedHashMap ctx = (LinkedHashMap) i.next();
|
|
if (ctx.containsValue(namespaceURI))
|
|
{
|
|
for (Iterator j = ctx.entrySet().iterator(); j.hasNext(); )
|
|
{
|
|
Map.Entry entry = (Map.Entry) i.next();
|
|
String uri = (String) entry.getValue();
|
|
if (uri.equals(namespaceURI))
|
|
return (String) entry.getKey();
|
|
}
|
|
}
|
|
}
|
|
return null;
|
|
}
|
|
|
|
public Iterator getPrefixes(String namespaceURI)
|
|
{
|
|
if (XMLConstants.XML_NS_URI.equals(namespaceURI))
|
|
return Collections.singleton(XMLConstants.XML_NS_PREFIX).iterator();
|
|
if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(namespaceURI))
|
|
return Collections.singleton(XMLConstants.XMLNS_ATTRIBUTE).iterator();
|
|
LinkedList acc = new LinkedList();
|
|
for (Iterator i = namespaces.iterator(); i.hasNext(); )
|
|
{
|
|
LinkedHashMap ctx = (LinkedHashMap) i.next();
|
|
if (ctx.containsValue(namespaceURI))
|
|
{
|
|
for (Iterator j = ctx.entrySet().iterator(); j.hasNext(); )
|
|
{
|
|
Map.Entry entry = (Map.Entry) i.next();
|
|
String uri = (String) entry.getValue();
|
|
if (uri.equals(namespaceURI))
|
|
acc.add(entry.getKey());
|
|
}
|
|
}
|
|
}
|
|
return acc.iterator();
|
|
}
|
|
|
|
// -- XMLStreamReader --
|
|
|
|
public void close()
|
|
throws XMLStreamException
|
|
{
|
|
stack = null;
|
|
namespaces = null;
|
|
bases = null;
|
|
buf = null;
|
|
attrs = null;
|
|
doctype = null;
|
|
|
|
inputStack = null;
|
|
validationStack = null;
|
|
ids = null;
|
|
idrefs = null;
|
|
}
|
|
|
|
public NamespaceContext getNamespaceContext()
|
|
{
|
|
return this;
|
|
}
|
|
|
|
public int getAttributeCount()
|
|
{
|
|
return attrs.size();
|
|
}
|
|
|
|
public String getAttributeLocalName(int index)
|
|
{
|
|
Attribute a = (Attribute) attrs.get(index);
|
|
return a.localName;
|
|
}
|
|
|
|
public String getAttributeNamespace(int index)
|
|
{
|
|
String prefix = getAttributePrefix(index);
|
|
return getNamespaceURI(prefix);
|
|
}
|
|
|
|
public String getAttributePrefix(int index)
|
|
{
|
|
Attribute a = (Attribute) attrs.get(index);
|
|
return a.prefix;
|
|
}
|
|
|
|
public QName getAttributeName(int index)
|
|
{
|
|
Attribute a = (Attribute) attrs.get(index);
|
|
String namespaceURI = getNamespaceURI(a.prefix);
|
|
return new QName(namespaceURI, a.localName, a.prefix);
|
|
}
|
|
|
|
public String getAttributeType(int index)
|
|
{
|
|
Attribute a = (Attribute) attrs.get(index);
|
|
return a.type;
|
|
}
|
|
|
|
private String getAttributeType(String elementName, String attName)
|
|
{
|
|
if (doctype != null)
|
|
{
|
|
AttributeDecl att = doctype.getAttributeDecl(elementName, attName);
|
|
if (att != null)
|
|
return att.type;
|
|
}
|
|
return "CDATA";
|
|
}
|
|
|
|
public String getAttributeValue(int index)
|
|
{
|
|
Attribute a = (Attribute) attrs.get(index);
|
|
return a.value;
|
|
}
|
|
|
|
public String getAttributeValue(String namespaceURI, String localName)
|
|
{
|
|
for (Iterator i = attrs.iterator(); i.hasNext(); )
|
|
{
|
|
Attribute a = (Attribute) i.next();
|
|
if (a.localName.equals(localName))
|
|
{
|
|
String uri = getNamespaceURI(a.prefix);
|
|
if ((uri == null && namespaceURI == null) ||
|
|
(uri != null && uri.equals(namespaceURI)))
|
|
return a.value;
|
|
}
|
|
}
|
|
return null;
|
|
}
|
|
|
|
boolean isAttributeDeclared(int index)
|
|
{
|
|
if (doctype == null)
|
|
return false;
|
|
Attribute a = (Attribute) attrs.get(index);
|
|
String qn = ("".equals(a.prefix)) ? a.localName :
|
|
a.prefix + ":" + a.localName;
|
|
String elementName = buf.toString();
|
|
return doctype.isAttributeDeclared(elementName, qn);
|
|
}
|
|
|
|
public String getCharacterEncodingScheme()
|
|
{
|
|
return xmlEncoding;
|
|
}
|
|
|
|
public String getElementText()
|
|
throws XMLStreamException
|
|
{
|
|
if (event != XMLStreamConstants.START_ELEMENT)
|
|
throw new XMLStreamException("current event must be START_ELEMENT");
|
|
StringBuffer elementText = new StringBuffer();
|
|
int depth = stack.size();
|
|
while (event != XMLStreamConstants.END_ELEMENT || stack.size() > depth)
|
|
{
|
|
switch (next())
|
|
{
|
|
case XMLStreamConstants.CHARACTERS:
|
|
case XMLStreamConstants.SPACE:
|
|
elementText.append(buf.toString());
|
|
}
|
|
}
|
|
return elementText.toString();
|
|
}
|
|
|
|
public String getEncoding()
|
|
{
|
|
return (input.inputEncoding == null) ? "UTF-8" : input.inputEncoding;
|
|
}
|
|
|
|
public int getEventType()
|
|
{
|
|
return event;
|
|
}
|
|
|
|
public String getLocalName()
|
|
{
|
|
switch (event)
|
|
{
|
|
case XMLStreamConstants.START_ELEMENT:
|
|
case XMLStreamConstants.END_ELEMENT:
|
|
String qName = buf.toString();
|
|
int ci = qName.indexOf(':');
|
|
return (ci == -1) ? qName : qName.substring(ci + 1);
|
|
default:
|
|
return null;
|
|
}
|
|
}
|
|
|
|
public Location getLocation()
|
|
{
|
|
return input;
|
|
}
|
|
|
|
public QName getName()
|
|
{
|
|
switch (event)
|
|
{
|
|
case XMLStreamConstants.START_ELEMENT:
|
|
case XMLStreamConstants.END_ELEMENT:
|
|
String qName = buf.toString();
|
|
int ci = qName.indexOf(':');
|
|
String localName = (ci == -1) ? qName : qName.substring(ci + 1);
|
|
String prefix = (ci == -1) ?
|
|
(namespaceAware ? XMLConstants.DEFAULT_NS_PREFIX : null) :
|
|
qName.substring(0, ci);
|
|
String namespaceURI = getNamespaceURI(prefix);
|
|
return new QName(namespaceURI, localName, prefix);
|
|
default:
|
|
return null;
|
|
}
|
|
}
|
|
|
|
public int getNamespaceCount()
|
|
{
|
|
if (!namespaceAware || namespaces.isEmpty())
|
|
return 0;
|
|
switch (event)
|
|
{
|
|
case XMLStreamConstants.START_ELEMENT:
|
|
case XMLStreamConstants.END_ELEMENT:
|
|
LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
|
|
return ctx.size();
|
|
default:
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
public String getNamespacePrefix(int index)
|
|
{
|
|
LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
|
|
int count = 0;
|
|
for (Iterator i = ctx.keySet().iterator(); i.hasNext(); )
|
|
{
|
|
String prefix = (String) i.next();
|
|
if (count++ == index)
|
|
return prefix;
|
|
}
|
|
return null;
|
|
}
|
|
|
|
public String getNamespaceURI()
|
|
{
|
|
switch (event)
|
|
{
|
|
case XMLStreamConstants.START_ELEMENT:
|
|
case XMLStreamConstants.END_ELEMENT:
|
|
String qName = buf.toString();
|
|
int ci = qName.indexOf(':');
|
|
if (ci == -1)
|
|
return null;
|
|
String prefix = qName.substring(0, ci);
|
|
return getNamespaceURI(prefix);
|
|
default:
|
|
return null;
|
|
}
|
|
}
|
|
|
|
public String getNamespaceURI(int index)
|
|
{
|
|
LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
|
|
int count = 0;
|
|
for (Iterator i = ctx.values().iterator(); i.hasNext(); )
|
|
{
|
|
String uri = (String) i.next();
|
|
if (count++ == index)
|
|
return uri;
|
|
}
|
|
return null;
|
|
}
|
|
|
|
public String getPIData()
|
|
{
|
|
return piData;
|
|
}
|
|
|
|
public String getPITarget()
|
|
{
|
|
return piTarget;
|
|
}
|
|
|
|
public String getPrefix()
|
|
{
|
|
switch (event)
|
|
{
|
|
case XMLStreamConstants.START_ELEMENT:
|
|
case XMLStreamConstants.END_ELEMENT:
|
|
String qName = buf.toString();
|
|
int ci = qName.indexOf(':');
|
|
return (ci == -1) ?
|
|
(namespaceAware ? XMLConstants.DEFAULT_NS_PREFIX : null) :
|
|
qName.substring(0, ci);
|
|
default:
|
|
return null;
|
|
}
|
|
}
|
|
|
|
public Object getProperty(String name)
|
|
throws IllegalArgumentException
|
|
{
|
|
if (name == null)
|
|
throw new IllegalArgumentException("name is null");
|
|
if (XMLInputFactory.ALLOCATOR.equals(name))
|
|
return null;
|
|
if (XMLInputFactory.IS_COALESCING.equals(name))
|
|
return coalescing ? Boolean.TRUE : Boolean.FALSE;
|
|
if (XMLInputFactory.IS_NAMESPACE_AWARE.equals(name))
|
|
return namespaceAware ? Boolean.TRUE : Boolean.FALSE;
|
|
if (XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES.equals(name))
|
|
return replaceERefs ? Boolean.TRUE : Boolean.FALSE;
|
|
if (XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES.equals(name))
|
|
return externalEntities ? Boolean.TRUE : Boolean.FALSE;
|
|
if (XMLInputFactory.IS_VALIDATING.equals(name))
|
|
return Boolean.FALSE;
|
|
if (XMLInputFactory.REPORTER.equals(name))
|
|
return reporter;
|
|
if (XMLInputFactory.RESOLVER.equals(name))
|
|
return resolver;
|
|
if (XMLInputFactory.SUPPORT_DTD.equals(name))
|
|
return supportDTD ? Boolean.TRUE : Boolean.FALSE;
|
|
if ("gnu.xml.stream.stringInterning".equals(name))
|
|
return stringInterning ? Boolean.TRUE : Boolean.FALSE;
|
|
if ("gnu.xml.stream.xmlBase".equals(name))
|
|
return baseAware ? Boolean.TRUE : Boolean.FALSE;
|
|
if ("gnu.xml.stream.baseURI".equals(name))
|
|
return getXMLBase();
|
|
return null;
|
|
}
|
|
|
|
public String getText()
|
|
{
|
|
return buf.toString();
|
|
}
|
|
|
|
public char[] getTextCharacters()
|
|
{
|
|
return buf.toString().toCharArray();
|
|
}
|
|
|
|
public int getTextCharacters(int sourceStart, char[] target,
|
|
int targetStart, int length)
|
|
throws XMLStreamException
|
|
{
|
|
length = Math.min(sourceStart + buf.length(), length);
|
|
int sourceEnd = sourceStart + length;
|
|
buf.getChars(sourceStart, sourceEnd, target, targetStart);
|
|
return length;
|
|
}
|
|
|
|
public int getTextLength()
|
|
{
|
|
return buf.length();
|
|
}
|
|
|
|
public int getTextStart()
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
public String getVersion()
|
|
{
|
|
return (xmlVersion == null) ? "1.0" : xmlVersion;
|
|
}
|
|
|
|
public boolean hasName()
|
|
{
|
|
switch (event)
|
|
{
|
|
case XMLStreamConstants.START_ELEMENT:
|
|
case XMLStreamConstants.END_ELEMENT:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
public boolean hasText()
|
|
{
|
|
switch (event)
|
|
{
|
|
case XMLStreamConstants.CHARACTERS:
|
|
case XMLStreamConstants.SPACE:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
public boolean isAttributeSpecified(int index)
|
|
{
|
|
Attribute a = (Attribute) attrs.get(index);
|
|
return a.specified;
|
|
}
|
|
|
|
public boolean isCharacters()
|
|
{
|
|
return (event == XMLStreamConstants.CHARACTERS);
|
|
}
|
|
|
|
public boolean isEndElement()
|
|
{
|
|
return (event == XMLStreamConstants.END_ELEMENT);
|
|
}
|
|
|
|
public boolean isStandalone()
|
|
{
|
|
return Boolean.TRUE.equals(xmlStandalone);
|
|
}
|
|
|
|
public boolean isStartElement()
|
|
{
|
|
return (event == XMLStreamConstants.START_ELEMENT);
|
|
}
|
|
|
|
public boolean isWhiteSpace()
|
|
{
|
|
return (event == XMLStreamConstants.SPACE);
|
|
}
|
|
|
|
public int nextTag()
|
|
throws XMLStreamException
|
|
{
|
|
do
|
|
{
|
|
switch (next())
|
|
{
|
|
case XMLStreamConstants.START_ELEMENT:
|
|
case XMLStreamConstants.END_ELEMENT:
|
|
case XMLStreamConstants.CHARACTERS:
|
|
case XMLStreamConstants.SPACE:
|
|
case XMLStreamConstants.COMMENT:
|
|
case XMLStreamConstants.PROCESSING_INSTRUCTION:
|
|
break;
|
|
default:
|
|
throw new XMLStreamException("Unexpected event type: " + event);
|
|
}
|
|
}
|
|
while (event != XMLStreamConstants.START_ELEMENT &&
|
|
event != XMLStreamConstants.END_ELEMENT);
|
|
return event;
|
|
}
|
|
|
|
public void require(int type, String namespaceURI, String localName)
|
|
throws XMLStreamException
|
|
{
|
|
if (event != type)
|
|
throw new XMLStreamException("Current event type is " + event);
|
|
if (event == XMLStreamConstants.START_ELEMENT ||
|
|
event == XMLStreamConstants.END_ELEMENT)
|
|
{
|
|
String ln = getLocalName();
|
|
if (!ln.equals(localName))
|
|
throw new XMLStreamException("Current local-name is " + ln);
|
|
String uri = getNamespaceURI();
|
|
if ((uri == null && namespaceURI != null) ||
|
|
(uri != null && !uri.equals(namespaceURI)))
|
|
throw new XMLStreamException("Current namespace URI is " + uri);
|
|
}
|
|
}
|
|
|
|
public boolean standaloneSet()
|
|
{
|
|
return (xmlStandalone != null);
|
|
}
|
|
|
|
public boolean hasNext()
|
|
throws XMLStreamException
|
|
{
|
|
if (event == XMLStreamConstants.END_DOCUMENT)
|
|
return false;
|
|
if (!lookahead)
|
|
{
|
|
next();
|
|
lookahead = true;
|
|
}
|
|
return event != -1;
|
|
}
|
|
|
|
public int next()
|
|
throws XMLStreamException
|
|
{
|
|
if (lookahead)
|
|
{
|
|
lookahead = false;
|
|
return event;
|
|
}
|
|
if (event == XMLStreamConstants.END_ELEMENT)
|
|
{
|
|
// Pop namespace context
|
|
if (namespaceAware && !namespaces.isEmpty())
|
|
namespaces.removeFirst();
|
|
// Pop base context
|
|
if (baseAware && !bases.isEmpty())
|
|
bases.removeFirst();
|
|
}
|
|
if (!startEntityStack.isEmpty())
|
|
{
|
|
String entityName = (String) startEntityStack.removeFirst();
|
|
buf.setLength(0);
|
|
buf.append(entityName);
|
|
event = START_ENTITY;
|
|
return extendedEventTypes ? event : next();
|
|
}
|
|
else if (!endEntityStack.isEmpty())
|
|
{
|
|
String entityName = (String) endEntityStack.removeFirst();
|
|
buf.setLength(0);
|
|
buf.append(entityName);
|
|
event = END_ENTITY;
|
|
return extendedEventTypes ? event : next();
|
|
}
|
|
try
|
|
{
|
|
if (!input.initialized)
|
|
input.init();
|
|
switch (state)
|
|
{
|
|
case CONTENT:
|
|
if (tryRead(TEST_END_ELEMENT))
|
|
{
|
|
readEndElement();
|
|
if (stack.isEmpty())
|
|
state = MISC;
|
|
event = XMLStreamConstants.END_ELEMENT;
|
|
}
|
|
else if (tryRead(TEST_COMMENT))
|
|
{
|
|
readComment(false);
|
|
event = XMLStreamConstants.COMMENT;
|
|
}
|
|
else if (tryRead(TEST_PI))
|
|
{
|
|
readPI(false);
|
|
event = XMLStreamConstants.PROCESSING_INSTRUCTION;
|
|
}
|
|
else if (tryRead(TEST_CDATA))
|
|
{
|
|
readCDSect();
|
|
event = XMLStreamConstants.CDATA;
|
|
}
|
|
else if (tryRead(TEST_START_ELEMENT))
|
|
{
|
|
state = readStartElement();
|
|
event = XMLStreamConstants.START_ELEMENT;
|
|
}
|
|
else
|
|
{
|
|
// Check for character reference or predefined entity
|
|
mark(8);
|
|
int c = readCh();
|
|
if (c == 0x26) // '&'
|
|
{
|
|
c = readCh();
|
|
if (c == 0x23) // '#'
|
|
{
|
|
reset();
|
|
event = readCharData(null);
|
|
}
|
|
else
|
|
{
|
|
// entity reference
|
|
reset();
|
|
readCh(); // &
|
|
readReference();
|
|
String ref = buf.toString();
|
|
String text = (String) PREDEFINED_ENTITIES.get(ref);
|
|
if (text != null)
|
|
{
|
|
event = readCharData(text);
|
|
}
|
|
else if (replaceERefs && !isUnparsedEntity(ref))
|
|
{
|
|
// this will report a start-entity event
|
|
boolean external = false;
|
|
if (doctype != null)
|
|
{
|
|
Object entity = doctype.getEntity(ref);
|
|
if (entity instanceof ExternalIds)
|
|
external = true;
|
|
}
|
|
expandEntity(ref, false, external);
|
|
event = next();
|
|
}
|
|
else
|
|
{
|
|
event = XMLStreamConstants.ENTITY_REFERENCE;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
reset();
|
|
event = readCharData(null);
|
|
if (validating && doctype != null)
|
|
validatePCData(buf.toString());
|
|
}
|
|
}
|
|
break;
|
|
case EMPTY_ELEMENT:
|
|
String elementName = (String) stack.removeLast();
|
|
buf.setLength(0);
|
|
buf.append(elementName);
|
|
state = stack.isEmpty() ? MISC : CONTENT;
|
|
event = XMLStreamConstants.END_ELEMENT;
|
|
if (validating && doctype != null)
|
|
endElementValidationHook();
|
|
break;
|
|
case INIT: // XMLDecl?
|
|
if (tryRead(TEST_XML_DECL))
|
|
readXMLDecl();
|
|
input.finalizeEncoding();
|
|
event = XMLStreamConstants.START_DOCUMENT;
|
|
state = PROLOG;
|
|
break;
|
|
case PROLOG: // Misc* (doctypedecl Misc*)?
|
|
skipWhitespace();
|
|
if (doctype == null && tryRead(TEST_DOCTYPE_DECL))
|
|
{
|
|
readDoctypeDecl();
|
|
event = XMLStreamConstants.DTD;
|
|
}
|
|
else if (tryRead(TEST_COMMENT))
|
|
{
|
|
readComment(false);
|
|
event = XMLStreamConstants.COMMENT;
|
|
}
|
|
else if (tryRead(TEST_PI))
|
|
{
|
|
readPI(false);
|
|
event = XMLStreamConstants.PROCESSING_INSTRUCTION;
|
|
}
|
|
else if (tryRead(TEST_START_ELEMENT))
|
|
{
|
|
state = readStartElement();
|
|
event = XMLStreamConstants.START_ELEMENT;
|
|
}
|
|
else
|
|
{
|
|
int c = readCh();
|
|
error("no root element: U+" + Integer.toHexString(c));
|
|
}
|
|
break;
|
|
case MISC: // Comment | PI | S
|
|
skipWhitespace();
|
|
if (tryRead(TEST_COMMENT))
|
|
{
|
|
readComment(false);
|
|
event = XMLStreamConstants.COMMENT;
|
|
}
|
|
else if (tryRead(TEST_PI))
|
|
{
|
|
readPI(false);
|
|
event = XMLStreamConstants.PROCESSING_INSTRUCTION;
|
|
}
|
|
else
|
|
{
|
|
if (event == XMLStreamConstants.END_DOCUMENT)
|
|
throw new NoSuchElementException();
|
|
int c = readCh();
|
|
if (c != -1)
|
|
error("Only comments and PIs may appear after " +
|
|
"the root element");
|
|
event = XMLStreamConstants.END_DOCUMENT;
|
|
}
|
|
break;
|
|
default:
|
|
event = -1;
|
|
}
|
|
return event;
|
|
}
|
|
catch (IOException e)
|
|
{
|
|
XMLStreamException e2 = new XMLStreamException();
|
|
e2.initCause(e);
|
|
throw e2;
|
|
}
|
|
}
|
|
|
|
// package private
|
|
|
|
/**
|
|
* Returns the current element name.
|
|
*/
|
|
String getCurrentElement()
|
|
{
|
|
return (String) stack.getLast();
|
|
}
|
|
|
|
// private
|
|
|
|
private void mark(int limit)
|
|
throws IOException
|
|
{
|
|
input.mark(limit);
|
|
}
|
|
|
|
private void reset()
|
|
throws IOException
|
|
{
|
|
input.reset();
|
|
}
|
|
|
|
private int read()
|
|
throws IOException
|
|
{
|
|
return input.read();
|
|
}
|
|
|
|
private int read(int[] b, int off, int len)
|
|
throws IOException
|
|
{
|
|
return input.read(b, off, len);
|
|
}
|
|
|
|
/**
|
|
* Parsed character read.
|
|
*/
|
|
private int readCh()
|
|
throws IOException, XMLStreamException
|
|
{
|
|
int c = read();
|
|
if (expandPE && c == 0x25) // '%'
|
|
{
|
|
if (peIsError)
|
|
error("PE reference within decl in internal subset.");
|
|
expandPEReference();
|
|
return readCh();
|
|
}
|
|
return c;
|
|
}
|
|
|
|
/**
|
|
* Reads the next character, ensuring it is the character specified.
|
|
* @param delim the character to match
|
|
* @exception XMLStreamException if the next character is not the
|
|
* specified one
|
|
*/
|
|
private void require(char delim)
|
|
throws IOException, XMLStreamException
|
|
{
|
|
mark(1);
|
|
int c = readCh();
|
|
if (delim != c)
|
|
{
|
|
reset();
|
|
error("required character (got U+" + Integer.toHexString(c) + ")",
|
|
new Character(delim));
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Reads the next few characters, ensuring they match the string specified.
|
|
* @param delim the string to match
|
|
* @exception XMLStreamException if the next characters do not match the
|
|
* specified string
|
|
*/
|
|
private void require(String delim)
|
|
throws IOException, XMLStreamException
|
|
{
|
|
char[] chars = delim.toCharArray();
|
|
int len = chars.length;
|
|
mark(len);
|
|
int off = 0;
|
|
do
|
|
{
|
|
int l2 = read(tmpBuf, off, len - off);
|
|
if (l2 == -1)
|
|
{
|
|
reset();
|
|
error("EOF before required string", delim);
|
|
}
|
|
off += l2;
|
|
}
|
|
while (off < len);
|
|
for (int i = 0; i < chars.length; i++)
|
|
{
|
|
if (chars[i] != tmpBuf[i])
|
|
{
|
|
reset();
|
|
error("required string", delim);
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Try to read a single character. On failure, reset the stream.
|
|
* @param delim the character to test
|
|
* @return true if the character matched delim, false otherwise.
|
|
*/
|
|
private boolean tryRead(char delim)
|
|
throws IOException, XMLStreamException
|
|
{
|
|
mark(1);
|
|
int c = readCh();
|
|
if (delim != c)
|
|
{
|
|
reset();
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Tries to read the specified characters.
|
|
* If successful, the stream is positioned after the last character,
|
|
* otherwise it is reset.
|
|
* @param test the string to test
|
|
* @return true if the characters matched the test string, false otherwise.
|
|
*/
|
|
private boolean tryRead(String test)
|
|
throws IOException
|
|
{
|
|
char[] chars = test.toCharArray();
|
|
int len = chars.length;
|
|
mark(len);
|
|
int count = 0;
|
|
int l2 = read(tmpBuf, 0, len);
|
|
if (l2 == -1)
|
|
{
|
|
reset();
|
|
return false;
|
|
}
|
|
count += l2;
|
|
while (count < len)
|
|
{
|
|
// force read
|
|
int c = read();
|
|
if (c == -1)
|
|
{
|
|
reset();
|
|
return false;
|
|
}
|
|
tmpBuf[count++] = (char) c;
|
|
}
|
|
for (int i = 0; i < len; i++)
|
|
{
|
|
if (chars[i] != tmpBuf[i])
|
|
{
|
|
reset();
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Reads characters until the specified test string is encountered.
|
|
* @param delim the string delimiting the end of the characters
|
|
*/
|
|
private void readUntil(String delim)
|
|
throws IOException, XMLStreamException
|
|
{
|
|
int startLine = input.line;
|
|
try
|
|
{
|
|
while (!tryRead(delim))
|
|
{
|
|
int c = readCh();
|
|
if (c == -1)
|
|
throw new EOFException();
|
|
else if (input.xml11)
|
|
{
|
|
if (!isXML11Char(c) || isXML11RestrictedChar(c))
|
|
error("illegal XML 1.1 character",
|
|
"U+" + Integer.toHexString(c));
|
|
}
|
|
else if (!isChar(c))
|
|
error("illegal XML character",
|
|
"U+" + Integer.toHexString(c));
|
|
buf.append(Character.toChars(c));
|
|
}
|
|
}
|
|
catch (EOFException e)
|
|
{
|
|
error("end of input while looking for delimiter "+
|
|
"(started on line " + startLine + ')', delim);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Reads any whitespace characters.
|
|
* @return true if whitespace characters were read, false otherwise
|
|
*/
|
|
private boolean tryWhitespace()
|
|
throws IOException, XMLStreamException
|
|
{
|
|
boolean white;
|
|
boolean ret = false;
|
|
do
|
|
{
|
|
mark(1);
|
|
int c = readCh();
|
|
while (c == -1 && inputStack.size() > 1)
|
|
{
|
|
popInput();
|
|
c = readCh();
|
|
}
|
|
white = (c == 0x20 || c == 0x09 || c == 0x0a || c == 0x0d);
|
|
if (white)
|
|
ret = true;
|
|
}
|
|
while (white);
|
|
reset();
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* Skip over any whitespace characters.
|
|
*/
|
|
private void skipWhitespace()
|
|
throws IOException, XMLStreamException
|
|
{
|
|
boolean white;
|
|
do
|
|
{
|
|
mark(1);
|
|
int c = readCh();
|
|
while (c == -1 && inputStack.size() > 1)
|
|
{
|
|
popInput();
|
|
c = readCh();
|
|
}
|
|
white = (c == 0x20 || c == 0x09 || c == 0x0a || c == 0x0d);
|
|
}
|
|
while (white);
|
|
reset();
|
|
}
|
|
|
|
/**
|
|
* Try to read as many whitespace characters as are available.
|
|
* @exception XMLStreamException if no whitespace characters were seen
|
|
*/
|
|
private void requireWhitespace()
|
|
throws IOException, XMLStreamException
|
|
{
|
|
if (!tryWhitespace())
|
|
error("whitespace required");
|
|
}
|
|
|
|
/**
|
|
* Returns the current base URI for resolving external entities.
|
|
*/
|
|
String getXMLBase()
|
|
{
|
|
if (baseAware)
|
|
{
|
|
for (Iterator i = bases.iterator(); i.hasNext(); )
|
|
{
|
|
String base = (String) i.next();
|
|
if (base != null)
|
|
return base;
|
|
}
|
|
}
|
|
return input.systemId;
|
|
}
|
|
|
|
/**
|
|
* Push the specified text input source.
|
|
*/
|
|
private void pushInput(String name, String text, boolean report,
|
|
boolean normalize)
|
|
throws IOException, XMLStreamException
|
|
{
|
|
// Check for recursion
|
|
if (name != null && !"".equals(name))
|
|
{
|
|
for (Iterator i = inputStack.iterator(); i.hasNext(); )
|
|
{
|
|
Input ctx = (Input) i.next();
|
|
if (name.equals(ctx.name))
|
|
error("entities may not be self-recursive", name);
|
|
}
|
|
}
|
|
else
|
|
report = false;
|
|
pushInput(new Input(null, new StringReader(text), input.publicId,
|
|
input.systemId, name, input.inputEncoding, report,
|
|
normalize));
|
|
}
|
|
|
|
/**
|
|
* Push the specified external input source.
|
|
*/
|
|
private void pushInput(String name, ExternalIds ids, boolean report,
|
|
boolean normalize)
|
|
throws IOException, XMLStreamException
|
|
{
|
|
if (!externalEntities)
|
|
return;
|
|
String url = absolutize(input.systemId, ids.systemId);
|
|
// Check for recursion
|
|
for (Iterator i = inputStack.iterator(); i.hasNext(); )
|
|
{
|
|
Input ctx = (Input) i.next();
|
|
if (url.equals(ctx.systemId))
|
|
error("entities may not be self-recursive", url);
|
|
if (name != null && !"".equals(name) && name.equals(ctx.name))
|
|
error("entities may not be self-recursive", name);
|
|
}
|
|
if (name == null || "".equals(name))
|
|
report = false;
|
|
InputStream in = null;
|
|
if (resolver != null)
|
|
{
|
|
Object obj = resolver.resolveEntity(ids.publicId, url, getXMLBase(),
|
|
null);
|
|
if (obj instanceof InputStream)
|
|
in = (InputStream) obj;
|
|
}
|
|
if (in == null)
|
|
in = resolve(url);
|
|
if (in == null)
|
|
error("unable to resolve external entity",
|
|
(ids.systemId != null) ? ids.systemId : ids.publicId);
|
|
pushInput(new Input(in, null, ids.publicId, url, name, null, report,
|
|
normalize));
|
|
input.init();
|
|
if (tryRead(TEST_XML_DECL))
|
|
readTextDecl();
|
|
input.finalizeEncoding();
|
|
}
|
|
|
|
/**
|
|
* Push the specified input source (general entity) onto the input stack.
|
|
*/
|
|
private void pushInput(Input input)
|
|
{
|
|
if (input.report)
|
|
startEntityStack.addFirst(input.name);
|
|
inputStack.addLast(input);
|
|
if (this.input != null)
|
|
input.xml11 = this.input.xml11;
|
|
this.input = input;
|
|
}
|
|
|
|
/**
|
|
* "Absolutize" a URL. This resolves a relative URL into an absolute one.
|
|
* @param base the current base URL
|
|
* @param href the (absolute or relative) URL to resolve
|
|
*/
|
|
public static String absolutize(String base, String href)
|
|
throws MalformedURLException
|
|
{
|
|
if (href == null)
|
|
return null;
|
|
int ci = href.indexOf(':');
|
|
if (ci > 1 && isURLScheme(href.substring(0, ci)))
|
|
{
|
|
// href is absolute already
|
|
return href;
|
|
}
|
|
if (base == null)
|
|
base = "";
|
|
else
|
|
{
|
|
int i = base.lastIndexOf('/');
|
|
if (i != -1)
|
|
base = base.substring(0, i + 1);
|
|
else
|
|
base = "";
|
|
}
|
|
if ("".equals(base))
|
|
{
|
|
// assume file URL relative to current directory
|
|
base = System.getProperty("user.dir");
|
|
if (base.charAt(0) == '/')
|
|
base = base.substring(1);
|
|
base = "file:///" + base.replace(File.separatorChar, '/');
|
|
if (!base.endsWith("/"))
|
|
base += "/";
|
|
}
|
|
return new URL(new URL(base), href).toString();
|
|
}
|
|
|
|
/**
|
|
* Indicates whether the specified characters match the scheme portion of
|
|
* a URL.
|
|
* @see RFC 1738 section 2.1
|
|
*/
|
|
private static boolean isURLScheme(String text)
|
|
{
|
|
int len = text.length();
|
|
for (int i = 0; i < len; i++)
|
|
{
|
|
char c = text.charAt(i);
|
|
if (c == '+' || c == '.' || c == '-')
|
|
continue;
|
|
if (c < 65 || (c > 90 && c < 97) || c > 122)
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Returns an input stream for the given URL.
|
|
*/
|
|
static InputStream resolve(String url)
|
|
throws IOException
|
|
{
|
|
try
|
|
{
|
|
return new URL(url).openStream();
|
|
}
|
|
catch (MalformedURLException e)
|
|
{
|
|
return null;
|
|
}
|
|
catch (IOException e)
|
|
{
|
|
IOException e2 = new IOException("error resolving " + url);
|
|
e2.initCause(e);
|
|
throw e2;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Pops the current input source (general entity) off the stack.
|
|
*/
|
|
private void popInput()
|
|
{
|
|
Input old = (Input) inputStack.removeLast();
|
|
if (old.report)
|
|
endEntityStack.addFirst(old.name);
|
|
input = (Input) inputStack.getLast();
|
|
}
|
|
|
|
/**
|
|
* Parse an entity text declaration.
|
|
*/
|
|
private void readTextDecl()
|
|
throws IOException, XMLStreamException
|
|
{
|
|
final int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF;
|
|
requireWhitespace();
|
|
if (tryRead("version"))
|
|
{
|
|
readEq();
|
|
String v = readLiteral(flags, false);
|
|
if ("1.0".equals(v))
|
|
input.xml11 = false;
|
|
else if ("1.1".equals(v))
|
|
{
|
|
Input i1 = (Input) inputStack.getFirst();
|
|
if (!i1.xml11)
|
|
error("external entity specifies later version number");
|
|
input.xml11 = true;
|
|
}
|
|
else
|
|
throw new XMLStreamException("illegal XML version: " + v);
|
|
requireWhitespace();
|
|
}
|
|
require("encoding");
|
|
readEq();
|
|
String enc = readLiteral(flags, false);
|
|
skipWhitespace();
|
|
require("?>");
|
|
input.setInputEncoding(enc);
|
|
}
|
|
|
|
/**
|
|
* Parse the XML declaration.
|
|
*/
|
|
private void readXMLDecl()
|
|
throws IOException, XMLStreamException
|
|
{
|
|
final int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF;
|
|
|
|
requireWhitespace();
|
|
require("version");
|
|
readEq();
|
|
xmlVersion = readLiteral(flags, false);
|
|
if ("1.0".equals(xmlVersion))
|
|
input.xml11 = false;
|
|
else if ("1.1".equals(xmlVersion))
|
|
input.xml11 = true;
|
|
else
|
|
throw new XMLStreamException("illegal XML version: " + xmlVersion);
|
|
|
|
boolean white = tryWhitespace();
|
|
|
|
if (tryRead("encoding"))
|
|
{
|
|
if (!white)
|
|
error("whitespace required before 'encoding='");
|
|
readEq();
|
|
xmlEncoding = readLiteral(flags, false);
|
|
white = tryWhitespace();
|
|
}
|
|
|
|
if (tryRead("standalone"))
|
|
{
|
|
if (!white)
|
|
error("whitespace required before 'standalone='");
|
|
readEq();
|
|
String standalone = readLiteral(flags, false);
|
|
if ("yes".equals(standalone))
|
|
xmlStandalone = Boolean.TRUE;
|
|
else if ("no".equals(standalone))
|
|
xmlStandalone = Boolean.FALSE;
|
|
else
|
|
error("standalone flag must be 'yes' or 'no'", standalone);
|
|
}
|
|
|
|
skipWhitespace();
|
|
require("?>");
|
|
if (xmlEncoding != null)
|
|
input.setInputEncoding(xmlEncoding);
|
|
}
|
|
|
|
/**
|
|
* Parse the DOCTYPE declaration.
|
|
*/
|
|
private void readDoctypeDecl()
|
|
throws IOException, XMLStreamException
|
|
{
|
|
if (!supportDTD)
|
|
error("parser was configured not to support DTDs");
|
|
requireWhitespace();
|
|
String rootName = readNmtoken(true);
|
|
skipWhitespace();
|
|
ExternalIds ids = readExternalIds(false, true);
|
|
doctype =
|
|
this.new Doctype(rootName, ids.publicId, ids.systemId);
|
|
|
|
// Parse internal subset first
|
|
skipWhitespace();
|
|
if (tryRead('['))
|
|
{
|
|
while (true)
|
|
{
|
|
expandPE = true;
|
|
skipWhitespace();
|
|
expandPE = false;
|
|
if (tryRead(']'))
|
|
break;
|
|
else
|
|
readMarkupdecl(false);
|
|
}
|
|
}
|
|
skipWhitespace();
|
|
require('>');
|
|
|
|
// Parse external subset
|
|
if (ids.systemId != null && externalEntities)
|
|
{
|
|
pushInput("", ">", false, false);
|
|
pushInput("[dtd]", ids, true, true);
|
|
// loop until we get back to ">"
|
|
while (true)
|
|
{
|
|
expandPE = true;
|
|
skipWhitespace();
|
|
expandPE = false;
|
|
mark(1);
|
|
int c = readCh();
|
|
if (c == 0x3e) // '>'
|
|
break;
|
|
else if (c == -1)
|
|
popInput();
|
|
else
|
|
{
|
|
reset();
|
|
expandPE = true;
|
|
readMarkupdecl(true);
|
|
expandPE = true;
|
|
}
|
|
}
|
|
if (inputStack.size() != 2)
|
|
error("external subset has unmatched '>'");
|
|
popInput();
|
|
}
|
|
checkDoctype();
|
|
if (validating)
|
|
validateDoctype();
|
|
|
|
// Make rootName available for reading
|
|
buf.setLength(0);
|
|
buf.append(rootName);
|
|
}
|
|
|
|
/**
|
|
* Checks the well-formedness of the DTD.
|
|
*/
|
|
private void checkDoctype()
|
|
throws XMLStreamException
|
|
{
|
|
// TODO check entity recursion
|
|
}
|
|
|
|
/**
|
|
* Parse the markupdecl production.
|
|
*/
|
|
private void readMarkupdecl(boolean inExternalSubset)
|
|
throws IOException, XMLStreamException
|
|
{
|
|
boolean saved = expandPE;
|
|
mark(1);
|
|
require('<');
|
|
reset();
|
|
expandPE = false;
|
|
if (tryRead(TEST_ELEMENT_DECL))
|
|
{
|
|
expandPE = saved;
|
|
readElementDecl();
|
|
}
|
|
else if (tryRead(TEST_ATTLIST_DECL))
|
|
{
|
|
expandPE = saved;
|
|
readAttlistDecl();
|
|
}
|
|
else if (tryRead(TEST_ENTITY_DECL))
|
|
{
|
|
expandPE = saved;
|
|
readEntityDecl(inExternalSubset);
|
|
}
|
|
else if (tryRead(TEST_NOTATION_DECL))
|
|
{
|
|
expandPE = saved;
|
|
readNotationDecl(inExternalSubset);
|
|
}
|
|
else if (tryRead(TEST_PI))
|
|
{
|
|
readPI(true);
|
|
expandPE = saved;
|
|
}
|
|
else if (tryRead(TEST_COMMENT))
|
|
{
|
|
readComment(true);
|
|
expandPE = saved;
|
|
}
|
|
else if (tryRead("<!["))
|
|
{
|
|
// conditional section
|
|
expandPE = saved;
|
|
if (inputStack.size() < 2)
|
|
error("conditional sections illegal in internal subset");
|
|
skipWhitespace();
|
|
if (tryRead("INCLUDE"))
|
|
{
|
|
skipWhitespace();
|
|
require('[');
|
|
skipWhitespace();
|
|
while (!tryRead("]]>"))
|
|
{
|
|
readMarkupdecl(inExternalSubset);
|
|
skipWhitespace();
|
|
}
|
|
}
|
|
else if (tryRead("IGNORE"))
|
|
{
|
|
skipWhitespace();
|
|
require('[');
|
|
expandPE = false;
|
|
for (int nesting = 1; nesting > 0; )
|
|
{
|
|
int c = readCh();
|
|
switch (c)
|
|
{
|
|
case 0x3c: // '<'
|
|
if (tryRead("!["))
|
|
nesting++;
|
|
break;
|
|
case 0x5d: // ']'
|
|
if (tryRead("]>"))
|
|
nesting--;
|
|
break;
|
|
case -1:
|
|
throw new EOFException();
|
|
}
|
|
}
|
|
expandPE = saved;
|
|
}
|
|
else
|
|
error("conditional section must begin with INCLUDE or IGNORE");
|
|
}
|
|
else
|
|
error("expected markup declaration");
|
|
}
|
|
|
|
/**
|
|
* Parse the elementdecl production.
|
|
*/
|
|
private void readElementDecl()
|
|
throws IOException, XMLStreamException
|
|
{
|
|
requireWhitespace();
|
|
boolean saved = expandPE;
|
|
expandPE = (inputStack.size() > 1);
|
|
String name = readNmtoken(true);
|
|
expandPE = saved;
|
|
requireWhitespace();
|
|
readContentspec(name);
|
|
skipWhitespace();
|
|
require('>');
|
|
}
|
|
|
|
/**
|
|
* Parse the contentspec production.
|
|
*/
|
|
private void readContentspec(String elementName)
|
|
throws IOException, XMLStreamException
|
|
{
|
|
if (tryRead("EMPTY"))
|
|
doctype.addElementDecl(elementName, "EMPTY", new EmptyContentModel());
|
|
else if (tryRead("ANY"))
|
|
doctype.addElementDecl(elementName, "ANY", new AnyContentModel());
|
|
else
|
|
{
|
|
ContentModel model;
|
|
StringBuffer acc = new StringBuffer();
|
|
require('(');
|
|
acc.append('(');
|
|
skipWhitespace();
|
|
if (tryRead("#PCDATA"))
|
|
{
|
|
// mixed content
|
|
acc.append("#PCDATA");
|
|
MixedContentModel mm = new MixedContentModel();
|
|
model = mm;
|
|
skipWhitespace();
|
|
if (tryRead(')'))
|
|
{
|
|
acc.append(")");
|
|
if (tryRead('*'))
|
|
{
|
|
mm.min = 0;
|
|
mm.max = -1;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
while (!tryRead(")"))
|
|
{
|
|
require('|');
|
|
acc.append('|');
|
|
skipWhitespace();
|
|
String name = readNmtoken(true);
|
|
acc.append(name);
|
|
mm.addName(name);
|
|
skipWhitespace();
|
|
}
|
|
require('*');
|
|
acc.append(")*");
|
|
mm.min = 0;
|
|
mm.max = -1;
|
|
}
|
|
}
|
|
else
|
|
model = readElements(acc);
|
|
doctype.addElementDecl(elementName, acc.toString(), model);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Parses an element content model.
|
|
*/
|
|
private ElementContentModel readElements(StringBuffer acc)
|
|
throws IOException, XMLStreamException
|
|
{
|
|
int separator;
|
|
ElementContentModel model = new ElementContentModel();
|
|
|
|
// Parse first content particle
|
|
skipWhitespace();
|
|
model.addContentParticle(readContentParticle(acc));
|
|
// End or separator
|
|
skipWhitespace();
|
|
int c = readCh();
|
|
switch (c)
|
|
{
|
|
case 0x29: // ')'
|
|
acc.append(')');
|
|
mark(1);
|
|
c = readCh();
|
|
switch (c)
|
|
{
|
|
case 0x3f: // '?'
|
|
acc.append('?');
|
|
model.min = 0;
|
|
model.max = 1;
|
|
break;
|
|
case 0x2a: // '*'
|
|
acc.append('*');
|
|
model.min = 0;
|
|
model.max = -1;
|
|
break;
|
|
case 0x2b: // '+'
|
|
acc.append('+');
|
|
model.min = 1;
|
|
model.max = -1;
|
|
break;
|
|
default:
|
|
reset();
|
|
}
|
|
return model; // done
|
|
case 0x7c: // '|'
|
|
model.or = true;
|
|
// fall through
|
|
case 0x2c: // ','
|
|
separator = c;
|
|
acc.append(Character.toChars(c));
|
|
break;
|
|
default:
|
|
error("bad separator in content model",
|
|
"U+" + Integer.toHexString(c));
|
|
return model;
|
|
}
|
|
// Parse subsequent content particles
|
|
while (true)
|
|
{
|
|
skipWhitespace();
|
|
model.addContentParticle(readContentParticle(acc));
|
|
skipWhitespace();
|
|
c = readCh();
|
|
if (c == 0x29) // ')'
|
|
{
|
|
acc.append(')');
|
|
break;
|
|
}
|
|
else if (c != separator)
|
|
{
|
|
error("bad separator in content model",
|
|
"U+" + Integer.toHexString(c));
|
|
return model;
|
|
}
|
|
else
|
|
acc.append(c);
|
|
}
|
|
// Check for occurrence indicator
|
|
mark(1);
|
|
c = readCh();
|
|
switch (c)
|
|
{
|
|
case 0x3f: // '?'
|
|
acc.append('?');
|
|
model.min = 0;
|
|
model.max = 1;
|
|
break;
|
|
case 0x2a: // '*'
|
|
acc.append('*');
|
|
model.min = 0;
|
|
model.max = -1;
|
|
break;
|
|
case 0x2b: // '+'
|
|
acc.append('+');
|
|
model.min = 1;
|
|
model.max = -1;
|
|
break;
|
|
default:
|
|
reset();
|
|
}
|
|
return model;
|
|
}
|
|
|
|
/**
|
|
* Parse a cp production.
|
|
*/
|
|
private ContentParticle readContentParticle(StringBuffer acc)
|
|
throws IOException, XMLStreamException
|
|
{
|
|
ContentParticle cp = new ContentParticle();
|
|
if (tryRead('('))
|
|
{
|
|
acc.append('(');
|
|
cp.content = readElements(acc);
|
|
}
|
|
else
|
|
{
|
|
String name = readNmtoken(true);
|
|
acc.append(name);
|
|
cp.content = name;
|
|
mark(1);
|
|
int c = readCh();
|
|
switch (c)
|
|
{
|
|
case 0x3f: // '?'
|
|
acc.append('?');
|
|
cp.min = 0;
|
|
cp.max = 1;
|
|
break;
|
|
case 0x2a: // '*'
|
|
acc.append('*');
|
|
cp.min = 0;
|
|
cp.max = -1;
|
|
break;
|
|
case 0x2b: // '+'
|
|
acc.append('+');
|
|
cp.min = 1;
|
|
cp.max = -1;
|
|
break;
|
|
default:
|
|
reset();
|
|
}
|
|
}
|
|
return cp;
|
|
}
|
|
|
|
/**
|
|
* Parse an attribute-list definition.
|
|
*/
|
|
private void readAttlistDecl()
|
|
throws IOException, XMLStreamException
|
|
{
|
|
requireWhitespace();
|
|
boolean saved = expandPE;
|
|
expandPE = (inputStack.size() > 1);
|
|
String elementName = readNmtoken(true);
|
|
expandPE = saved;
|
|
boolean white = tryWhitespace();
|
|
while (!tryRead('>'))
|
|
{
|
|
if (!white)
|
|
error("whitespace required before attribute definition");
|
|
readAttDef(elementName);
|
|
white = tryWhitespace();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Parse a single attribute definition.
|
|
*/
|
|
private void readAttDef(String elementName)
|
|
throws IOException, XMLStreamException
|
|
{
|
|
String name = readNmtoken(true);
|
|
requireWhitespace();
|
|
StringBuffer acc = new StringBuffer();
|
|
HashSet values = new HashSet();
|
|
String type = readAttType(acc, values);
|
|
if (validating)
|
|
{
|
|
if ("ID".equals(type))
|
|
{
|
|
// VC: One ID per Element Type
|
|
for (Iterator i = doctype.attlistIterator(elementName);
|
|
i.hasNext(); )
|
|
{
|
|
Map.Entry entry = (Map.Entry) i.next();
|
|
AttributeDecl decl = (AttributeDecl) entry.getValue();
|
|
if ("ID".equals(decl.type))
|
|
error("element types must not have more than one ID " +
|
|
"attribute");
|
|
}
|
|
}
|
|
else if ("NOTATION".equals(type))
|
|
{
|
|
// VC: One Notation Per Element Type
|
|
for (Iterator i = doctype.attlistIterator(elementName);
|
|
i.hasNext(); )
|
|
{
|
|
Map.Entry entry = (Map.Entry) i.next();
|
|
AttributeDecl decl = (AttributeDecl) entry.getValue();
|
|
if ("NOTATION".equals(decl.type))
|
|
error("element types must not have more than one NOTATION " +
|
|
"attribute");
|
|
}
|
|
// VC: No Notation on Empty Element
|
|
ContentModel model = doctype.getElementModel(elementName);
|
|
if (model != null && model.type == ContentModel.EMPTY)
|
|
error("attributes of type NOTATION must not be declared on an " +
|
|
"element declared EMPTY");
|
|
}
|
|
}
|
|
String enumer = null;
|
|
if ("ENUMERATION".equals(type) || "NOTATION".equals(type))
|
|
enumer = acc.toString();
|
|
else
|
|
values = null;
|
|
requireWhitespace();
|
|
readDefault(elementName, name, type, enumer, values);
|
|
}
|
|
|
|
/**
|
|
* Parse an attribute type.
|
|
*/
|
|
private String readAttType(StringBuffer acc, HashSet values)
|
|
throws IOException, XMLStreamException
|
|
{
|
|
if (tryRead('('))
|
|
{
|
|
readEnumeration(false, acc, values);
|
|
return "ENUMERATION";
|
|
}
|
|
else
|
|
{
|
|
String typeString = readNmtoken(true);
|
|
if ("NOTATION".equals(typeString))
|
|
{
|
|
readNotationType(acc, values);
|
|
return typeString;
|
|
}
|
|
else if ("CDATA".equals(typeString) ||
|
|
"ID".equals(typeString) ||
|
|
"IDREF".equals(typeString) ||
|
|
"IDREFS".equals(typeString) ||
|
|
"ENTITY".equals(typeString) ||
|
|
"ENTITIES".equals(typeString) ||
|
|
"NMTOKEN".equals(typeString) ||
|
|
"NMTOKENS".equals(typeString))
|
|
return typeString;
|
|
else
|
|
{
|
|
error("illegal attribute type", typeString);
|
|
return null;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Parse an enumeration.
|
|
*/
|
|
private void readEnumeration(boolean isNames, StringBuffer acc,
|
|
HashSet values)
|
|
throws IOException, XMLStreamException
|
|
{
|
|
acc.append('(');
|
|
// first token
|
|
skipWhitespace();
|
|
String token = readNmtoken(isNames);
|
|
acc.append(token);
|
|
values.add(token);
|
|
// subsequent tokens
|
|
skipWhitespace();
|
|
while (!tryRead(')'))
|
|
{
|
|
require('|');
|
|
acc.append('|');
|
|
skipWhitespace();
|
|
token = readNmtoken(isNames);
|
|
// VC: No Duplicate Tokens
|
|
if (validating && values.contains(token))
|
|
error("duplicate token", token);
|
|
acc.append(token);
|
|
values.add(token);
|
|
skipWhitespace();
|
|
}
|
|
acc.append(')');
|
|
}
|
|
|
|
/**
|
|
* Parse a notation type for an attribute.
|
|
*/
|
|
private void readNotationType(StringBuffer acc, HashSet values)
|
|
throws IOException, XMLStreamException
|
|
{
|
|
requireWhitespace();
|
|
require('(');
|
|
readEnumeration(true, acc, values);
|
|
}
|
|
|
|
/**
|
|
* Parse the default value for an attribute.
|
|
*/
|
|
private void readDefault(String elementName, String name,
|
|
String type, String enumeration, HashSet values)
|
|
throws IOException, XMLStreamException
|
|
{
|
|
int valueType = ATTRIBUTE_DEFAULT_SPECIFIED;
|
|
int flags = LIT_ATTRIBUTE;
|
|
String value = null, defaultType = null;
|
|
boolean saved = expandPE;
|
|
|
|
if (!"CDATA".equals(type))
|
|
flags |= LIT_NORMALIZE;
|
|
|
|
expandPE = false;
|
|
if (tryRead('#'))
|
|
{
|
|
if (tryRead("FIXED"))
|
|
{
|
|
defaultType = "#FIXED";
|
|
valueType = ATTRIBUTE_DEFAULT_FIXED;
|
|
requireWhitespace();
|
|
value = readLiteral(flags, false);
|
|
}
|
|
else if (tryRead("REQUIRED"))
|
|
{
|
|
defaultType = "#REQUIRED";
|
|
valueType = ATTRIBUTE_DEFAULT_REQUIRED;
|
|
}
|
|
else if (tryRead("IMPLIED"))
|
|
{
|
|
defaultType = "#IMPLIED";
|
|
valueType = ATTRIBUTE_DEFAULT_IMPLIED;
|
|
}
|
|
else
|
|
error("illegal keyword for attribute default value");
|
|
}
|
|
else
|
|
value = readLiteral(flags, false);
|
|
expandPE = saved;
|
|
if (validating)
|
|
{
|
|
if ("ID".equals(type))
|
|
{
|
|
// VC: Attribute Default Value Syntactically Correct
|
|
if (value != null && !isNmtoken(value, true))
|
|
error("default value must match Name production", value);
|
|
// VC: ID Attribute Default
|
|
if (valueType != ATTRIBUTE_DEFAULT_REQUIRED &&
|
|
valueType != ATTRIBUTE_DEFAULT_IMPLIED)
|
|
error("ID attributes must have a declared default of " +
|
|
"#IMPLIED or #REQUIRED");
|
|
}
|
|
else if (value != null)
|
|
{
|
|
// VC: Attribute Default Value Syntactically Correct
|
|
if ("IDREF".equals(type) || "ENTITY".equals(type))
|
|
{
|
|
if (!isNmtoken(value, true))
|
|
error("default value must match Name production", value);
|
|
}
|
|
else if ("IDREFS".equals(type) || "ENTITIES".equals(type))
|
|
{
|
|
StringTokenizer st = new StringTokenizer(value);
|
|
while (st.hasMoreTokens())
|
|
{
|
|
String token = st.nextToken();
|
|
if (!isNmtoken(token, true))
|
|
error("default value must match Name production", token);
|
|
}
|
|
}
|
|
else if ("NMTOKEN".equals(type) || "ENUMERATION".equals(type))
|
|
{
|
|
if (!isNmtoken(value, false))
|
|
error("default value must match Nmtoken production", value);
|
|
}
|
|
else if ("NMTOKENS".equals(type))
|
|
{
|
|
StringTokenizer st = new StringTokenizer(value);
|
|
while (st.hasMoreTokens())
|
|
{
|
|
String token = st.nextToken();
|
|
if (!isNmtoken(token, false))
|
|
error("default value must match Nmtoken production",
|
|
token);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// Register attribute def
|
|
AttributeDecl attribute =
|
|
new AttributeDecl(type, value, valueType, enumeration, values,
|
|
inputStack.size() != 1);
|
|
doctype.addAttributeDecl(elementName, name, attribute);
|
|
}
|
|
|
|
/**
|
|
* Parse the EntityDecl production.
|
|
*/
|
|
private void readEntityDecl(boolean inExternalSubset)
|
|
throws IOException, XMLStreamException
|
|
{
|
|
int flags = 0;
|
|
// Check if parameter entity
|
|
boolean peFlag = false;
|
|
expandPE = false;
|
|
requireWhitespace();
|
|
if (tryRead('%'))
|
|
{
|
|
peFlag = true;
|
|
requireWhitespace();
|
|
}
|
|
expandPE = true;
|
|
// Read entity name
|
|
String name = readNmtoken(true);
|
|
if (name.indexOf(':') != -1)
|
|
error("illegal character ':' in entity name", name);
|
|
if (peFlag)
|
|
name = "%" + name;
|
|
requireWhitespace();
|
|
mark(1);
|
|
int c = readCh();
|
|
reset();
|
|
if (c == 0x22 || c == 0x27) // " | '
|
|
{
|
|
// Internal entity replacement text
|
|
String value = readLiteral(flags | LIT_DISABLE_EREF, true);
|
|
int ai = value.indexOf('&');
|
|
while (ai != -1)
|
|
{
|
|
int sci = value.indexOf(';', ai);
|
|
if (sci == -1)
|
|
error("malformed reference in entity value", value);
|
|
String ref = value.substring(ai + 1, sci);
|
|
int[] cp = UnicodeReader.toCodePointArray(ref);
|
|
if (cp.length == 0)
|
|
error("malformed reference in entity value", value);
|
|
if (cp[0] == 0x23) // #
|
|
{
|
|
if (cp.length == 1)
|
|
error("malformed reference in entity value", value);
|
|
if (cp[1] == 0x78) // 'x'
|
|
{
|
|
if (cp.length == 2)
|
|
error("malformed reference in entity value", value);
|
|
for (int i = 2; i < cp.length; i++)
|
|
{
|
|
int x = cp[i];
|
|
if (x < 0x30 ||
|
|
(x > 0x39 && x < 0x41) ||
|
|
(x > 0x46 && x < 0x61) ||
|
|
x > 0x66)
|
|
error("malformed character reference in entity value",
|
|
value);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (int i = 1; i < cp.length; i++)
|
|
{
|
|
int x = cp[i];
|
|
if (x < 0x30 || x > 0x39)
|
|
error("malformed character reference in entity value",
|
|
value);
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (!isNameStartCharacter(cp[0], input.xml11))
|
|
error("malformed reference in entity value", value);
|
|
for (int i = 1; i < cp.length; i++)
|
|
{
|
|
if (!isNameCharacter(cp[i], input.xml11))
|
|
error("malformed reference in entity value", value);
|
|
}
|
|
}
|
|
ai = value.indexOf('&', sci);
|
|
}
|
|
doctype.addEntityDecl(name, value, inExternalSubset);
|
|
}
|
|
else
|
|
{
|
|
ExternalIds ids = readExternalIds(false, false);
|
|
// Check for NDATA
|
|
boolean white = tryWhitespace();
|
|
if (!peFlag && tryRead("NDATA"))
|
|
{
|
|
if (!white)
|
|
error("whitespace required before NDATA");
|
|
requireWhitespace();
|
|
ids.notationName = readNmtoken(true);
|
|
}
|
|
doctype.addEntityDecl(name, ids, inExternalSubset);
|
|
}
|
|
// finish
|
|
skipWhitespace();
|
|
require('>');
|
|
}
|
|
|
|
/**
|
|
* Parse the NotationDecl production.
|
|
*/
|
|
private void readNotationDecl(boolean inExternalSubset)
|
|
throws IOException, XMLStreamException
|
|
{
|
|
requireWhitespace();
|
|
String notationName = readNmtoken(true);
|
|
if (notationName.indexOf(':') != -1)
|
|
error("illegal character ':' in notation name", notationName);
|
|
if (validating)
|
|
{
|
|
// VC: Unique Notation Name
|
|
ExternalIds notation = doctype.getNotation(notationName);
|
|
if (notation != null)
|
|
error("duplicate notation name", notationName);
|
|
}
|
|
requireWhitespace();
|
|
ExternalIds ids = readExternalIds(true, false);
|
|
ids.notationName = notationName;
|
|
doctype.addNotationDecl(notationName, ids, inExternalSubset);
|
|
skipWhitespace();
|
|
require('>');
|
|
}
|
|
|
|
/**
|
|
* Returns a tuple {publicId, systemId}.
|
|
*/
|
|
private ExternalIds readExternalIds(boolean inNotation, boolean isSubset)
|
|
throws IOException, XMLStreamException
|
|
{
|
|
int c;
|
|
int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF;
|
|
ExternalIds ids = new ExternalIds();
|
|
|
|
if (tryRead("PUBLIC"))
|
|
{
|
|
requireWhitespace();
|
|
ids.publicId = readLiteral(LIT_NORMALIZE | LIT_PUBID | flags, false);
|
|
if (inNotation)
|
|
{
|
|
skipWhitespace();
|
|
mark(1);
|
|
c = readCh();
|
|
reset();
|
|
if (c == 0x22 || c == 0x27) // " | '
|
|
{
|
|
String href = readLiteral(flags, false);
|
|
ids.systemId = absolutize(input.systemId, href);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
requireWhitespace();
|
|
String href = readLiteral(flags, false);
|
|
ids.systemId = absolutize(input.systemId, href);
|
|
}
|
|
// Check valid URI characters
|
|
for (int i = 0; i < ids.publicId.length(); i++)
|
|
{
|
|
char d = ids.publicId.charAt(i);
|
|
if (d >= 'a' && d <= 'z')
|
|
continue;
|
|
if (d >= 'A' && d <= 'Z')
|
|
continue;
|
|
if (" \r\n0123456789-' ()+,./:=?;!*#@$_%".indexOf(d) != -1)
|
|
continue;
|
|
error("illegal PUBLIC id character",
|
|
"U+" + Integer.toHexString(d));
|
|
}
|
|
}
|
|
else if (tryRead("SYSTEM"))
|
|
{
|
|
requireWhitespace();
|
|
String href = readLiteral(flags, false);
|
|
ids.systemId = absolutize(input.systemId, href);
|
|
}
|
|
else if (!isSubset)
|
|
{
|
|
error("missing SYSTEM or PUBLIC keyword");
|
|
}
|
|
if (ids.systemId != null && !inNotation)
|
|
{
|
|
if (ids.systemId.indexOf('#') != -1)
|
|
error("SYSTEM id has a URI fragment", ids.systemId);
|
|
}
|
|
return ids;
|
|
}
|
|
|
|
/**
|
|
* Parse the start of an element.
|
|
* @return the state of the parser afterwards (EMPTY_ELEMENT or CONTENT)
|
|
*/
|
|
private int readStartElement()
|
|
throws IOException, XMLStreamException
|
|
{
|
|
// Read element name
|
|
String elementName = readNmtoken(true);
|
|
attrs.clear();
|
|
// Push namespace context
|
|
if (namespaceAware)
|
|
{
|
|
if (elementName.charAt(0) == ':' ||
|
|
elementName.charAt(elementName.length() - 1) == ':')
|
|
error("not a QName", elementName);
|
|
namespaces.addFirst(new LinkedHashMap());
|
|
}
|
|
// Read element content
|
|
boolean white = tryWhitespace();
|
|
mark(1);
|
|
int c = readCh();
|
|
while (c != 0x2f && c != 0x3e) // '/' | '>'
|
|
{
|
|
// Read attribute
|
|
reset();
|
|
if (!white)
|
|
error("need whitespace between attributes");
|
|
readAttribute(elementName);
|
|
white = tryWhitespace();
|
|
mark(1);
|
|
c = readCh();
|
|
}
|
|
// supply defaulted attributes
|
|
if (doctype != null)
|
|
{
|
|
for (Iterator i = doctype.attlistIterator(elementName); i.hasNext(); )
|
|
{
|
|
Map.Entry entry = (Map.Entry) i.next();
|
|
String attName = (String) entry.getKey();
|
|
AttributeDecl decl = (AttributeDecl) entry.getValue();
|
|
if (validating)
|
|
{
|
|
switch (decl.valueType)
|
|
{
|
|
case ATTRIBUTE_DEFAULT_REQUIRED:
|
|
// VC: Required Attribute
|
|
if (decl.value == null && !attributeSpecified(attName))
|
|
error("value for " + attName + " attribute is required");
|
|
break;
|
|
case ATTRIBUTE_DEFAULT_FIXED:
|
|
// VC: Fixed Attribute Default
|
|
for (Iterator j = attrs.iterator(); j.hasNext(); )
|
|
{
|
|
Attribute a = (Attribute) j.next();
|
|
if (attName.equals(a.name) &&
|
|
!decl.value.equals(a.value))
|
|
error("value for " + attName + " attribute must be " +
|
|
decl.value);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
if (namespaceAware && attName.equals("xmlns"))
|
|
{
|
|
LinkedHashMap ctx =
|
|
(LinkedHashMap) namespaces.getFirst();
|
|
if (ctx.containsKey(XMLConstants.DEFAULT_NS_PREFIX))
|
|
continue; // namespace was specified
|
|
}
|
|
else if (namespaceAware && attName.startsWith("xmlns:"))
|
|
{
|
|
LinkedHashMap ctx =
|
|
(LinkedHashMap) namespaces.getFirst();
|
|
if (ctx.containsKey(attName.substring(6)))
|
|
continue; // namespace was specified
|
|
}
|
|
else if (attributeSpecified(attName))
|
|
continue;
|
|
if (decl.value == null)
|
|
continue;
|
|
// VC: Standalone Document Declaration
|
|
if (validating && decl.external && xmlStandalone == Boolean.TRUE)
|
|
error("standalone must be 'no' if attributes inherit values " +
|
|
"from externally declared markup declarations");
|
|
Attribute attr =
|
|
new Attribute(attName, decl.type, false, decl.value);
|
|
if (namespaceAware)
|
|
{
|
|
if (!addNamespace(attr))
|
|
attrs.add(attr);
|
|
}
|
|
else
|
|
attrs.add(attr);
|
|
}
|
|
}
|
|
if (baseAware)
|
|
{
|
|
String uri = getAttributeValue(XMLConstants.XML_NS_URI, "base");
|
|
String base = getXMLBase();
|
|
bases.addFirst(absolutize(base, uri));
|
|
}
|
|
if (namespaceAware)
|
|
{
|
|
// check prefix bindings
|
|
int ci = elementName.indexOf(':');
|
|
if (ci != -1)
|
|
{
|
|
String prefix = elementName.substring(0, ci);
|
|
String uri = getNamespaceURI(prefix);
|
|
if (uri == null)
|
|
error("unbound element prefix", prefix);
|
|
else if (input.xml11 && "".equals(uri))
|
|
error("XML 1.1 unbound element prefix", prefix);
|
|
}
|
|
for (Iterator i = attrs.iterator(); i.hasNext(); )
|
|
{
|
|
Attribute attr = (Attribute) i.next();
|
|
if (attr.prefix != null &&
|
|
!XMLConstants.XMLNS_ATTRIBUTE.equals(attr.prefix))
|
|
{
|
|
String uri = getNamespaceURI(attr.prefix);
|
|
if (uri == null)
|
|
error("unbound attribute prefix", attr.prefix);
|
|
else if (input.xml11 && "".equals(uri))
|
|
error("XML 1.1 unbound attribute prefix", attr.prefix);
|
|
}
|
|
}
|
|
}
|
|
if (validating && doctype != null)
|
|
{
|
|
validateStartElement(elementName);
|
|
currentContentModel = doctype.getElementModel(elementName);
|
|
if (currentContentModel == null)
|
|
error("no element declaration", elementName);
|
|
validationStack.add(new LinkedList());
|
|
}
|
|
// make element name available for read
|
|
buf.setLength(0);
|
|
buf.append(elementName);
|
|
// push element onto stack
|
|
stack.addLast(elementName);
|
|
switch (c)
|
|
{
|
|
case 0x3e: // '>'
|
|
return CONTENT;
|
|
case 0x2f: // '/'
|
|
require('>');
|
|
return EMPTY_ELEMENT;
|
|
}
|
|
return -1; // to satisfy compiler
|
|
}
|
|
|
|
/**
|
|
* Indicates whether the specified attribute name was specified for the
|
|
* current element.
|
|
*/
|
|
private boolean attributeSpecified(String attName)
|
|
{
|
|
for (Iterator j = attrs.iterator(); j.hasNext(); )
|
|
{
|
|
Attribute a = (Attribute) j.next();
|
|
if (attName.equals(a.name))
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Parse an attribute.
|
|
*/
|
|
private void readAttribute(String elementName)
|
|
throws IOException, XMLStreamException
|
|
{
|
|
// Read attribute name
|
|
String attributeName = readNmtoken(true);
|
|
String type = getAttributeType(elementName, attributeName);
|
|
readEq();
|
|
// Read literal
|
|
final int flags = LIT_ATTRIBUTE | LIT_ENTITY_REF;
|
|
String value = (type == null || "CDATA".equals(type)) ?
|
|
readLiteral(flags, false) : readLiteral(flags | LIT_NORMALIZE, false);
|
|
// add attribute event
|
|
Attribute attr = this.new Attribute(attributeName, type, true, value);
|
|
if (namespaceAware)
|
|
{
|
|
if (attributeName.charAt(0) == ':' ||
|
|
attributeName.charAt(attributeName.length() - 1) == ':')
|
|
error("not a QName", attributeName);
|
|
else if (attributeName.equals("xmlns"))
|
|
{
|
|
LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
|
|
if (ctx.containsKey(XMLConstants.DEFAULT_NS_PREFIX))
|
|
error("duplicate default namespace");
|
|
}
|
|
else if (attributeName.startsWith("xmlns:"))
|
|
{
|
|
LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
|
|
if (ctx.containsKey(attributeName.substring(6)))
|
|
error("duplicate namespace", attributeName.substring(6));
|
|
}
|
|
else if (attrs.contains(attr))
|
|
error("duplicate attribute", attributeName);
|
|
}
|
|
else if (attrs.contains(attr))
|
|
error("duplicate attribute", attributeName);
|
|
if (validating && doctype != null)
|
|
{
|
|
// VC: Attribute Value Type
|
|
AttributeDecl decl =
|
|
doctype.getAttributeDecl(elementName, attributeName);
|
|
if (decl == null)
|
|
error("attribute must be declared", attributeName);
|
|
if ("ENUMERATION".equals(decl.type))
|
|
{
|
|
// VC: Enumeration
|
|
if (!decl.values.contains(value))
|
|
error("value does not match enumeration " + decl.enumeration,
|
|
value);
|
|
}
|
|
else if ("ID".equals(decl.type))
|
|
{
|
|
// VC: ID
|
|
if (!isNmtoken(value, true))
|
|
error("ID values must match the Name production");
|
|
if (ids.contains(value))
|
|
error("Duplicate ID", value);
|
|
ids.add(value);
|
|
}
|
|
else if ("IDREF".equals(decl.type) || "IDREFS".equals(decl.type))
|
|
{
|
|
StringTokenizer st = new StringTokenizer(value);
|
|
while (st.hasMoreTokens())
|
|
{
|
|
String token = st.nextToken();
|
|
// VC: IDREF
|
|
if (!isNmtoken(token, true))
|
|
error("IDREF values must match the Name production");
|
|
idrefs.add(token);
|
|
}
|
|
}
|
|
else if ("NMTOKEN".equals(decl.type) || "NMTOKENS".equals(decl.type))
|
|
{
|
|
StringTokenizer st = new StringTokenizer(value);
|
|
while (st.hasMoreTokens())
|
|
{
|
|
String token = st.nextToken();
|
|
// VC: Name Token
|
|
if (!isNmtoken(token, false))
|
|
error("NMTOKEN values must match the Nmtoken production");
|
|
}
|
|
}
|
|
else if ("ENTITY".equals(decl.type))
|
|
{
|
|
// VC: Entity Name
|
|
if (!isNmtoken(value, true))
|
|
error("ENTITY values must match the Name production");
|
|
Object entity = doctype.getEntity(value);
|
|
if (entity == null || !(entity instanceof ExternalIds) ||
|
|
((ExternalIds) entity).notationName == null)
|
|
error("ENTITY values must match the name of an unparsed " +
|
|
"entity declared in the DTD");
|
|
}
|
|
else if ("NOTATION".equals(decl.type))
|
|
{
|
|
if (!decl.values.contains(value))
|
|
error("NOTATION values must match a declared notation name",
|
|
value);
|
|
// VC: Notation Attributes
|
|
ExternalIds notation = doctype.getNotation(value);
|
|
if (notation == null)
|
|
error("NOTATION values must match the name of a notation " +
|
|
"declared in the DTD", value);
|
|
}
|
|
}
|
|
if (namespaceAware)
|
|
{
|
|
if (!addNamespace(attr))
|
|
attrs.add(attr);
|
|
}
|
|
else
|
|
attrs.add(attr);
|
|
}
|
|
|
|
/**
|
|
* Determines whether the specified attribute is a namespace declaration,
|
|
* and adds it to the current namespace context if so. Returns false if
|
|
* the attribute is an ordinary attribute.
|
|
*/
|
|
private boolean addNamespace(Attribute attr)
|
|
throws XMLStreamException
|
|
{
|
|
if ("xmlns".equals(attr.name))
|
|
{
|
|
LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
|
|
if (ctx.get(XMLConstants.DEFAULT_NS_PREFIX) != null)
|
|
error("Duplicate default namespace declaration");
|
|
if (XMLConstants.XML_NS_URI.equals(attr.value))
|
|
error("can't bind XML namespace");
|
|
ctx.put(XMLConstants.DEFAULT_NS_PREFIX, attr.value);
|
|
return true;
|
|
}
|
|
else if ("xmlns".equals(attr.prefix))
|
|
{
|
|
LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
|
|
if (ctx.get(attr.localName) != null)
|
|
error("Duplicate namespace declaration for prefix",
|
|
attr.localName);
|
|
if (XMLConstants.XML_NS_PREFIX.equals(attr.localName))
|
|
{
|
|
if (!XMLConstants.XML_NS_URI.equals(attr.value))
|
|
error("can't redeclare xml prefix");
|
|
else
|
|
return false; // treat as attribute
|
|
}
|
|
if (XMLConstants.XML_NS_URI.equals(attr.value))
|
|
error("can't bind non-xml prefix to XML namespace");
|
|
if (XMLConstants.XMLNS_ATTRIBUTE.equals(attr.localName))
|
|
error("can't redeclare xmlns prefix");
|
|
if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(attr.value))
|
|
error("can't bind non-xmlns prefix to XML Namespace namespace");
|
|
if ("".equals(attr.value) && !input.xml11)
|
|
error("illegal use of 1.1-style prefix unbinding in 1.0 document");
|
|
ctx.put(attr.localName, attr.value);
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Parse a closing tag.
|
|
*/
|
|
private void readEndElement()
|
|
throws IOException, XMLStreamException
|
|
{
|
|
// pop element off stack
|
|
String expected = (String) stack.removeLast();
|
|
require(expected);
|
|
skipWhitespace();
|
|
require('>');
|
|
// Make element name available
|
|
buf.setLength(0);
|
|
buf.append(expected);
|
|
if (validating && doctype != null)
|
|
endElementValidationHook();
|
|
}
|
|
|
|
/**
|
|
* Validate the end of an element.
|
|
* Called on an end-element or empty element if validating.
|
|
*/
|
|
private void endElementValidationHook()
|
|
throws XMLStreamException
|
|
{
|
|
validateEndElement();
|
|
validationStack.removeLast();
|
|
if (stack.isEmpty())
|
|
currentContentModel = null;
|
|
else
|
|
{
|
|
String parent = (String) stack.getLast();
|
|
currentContentModel = doctype.getElementModel(parent);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Parse a comment.
|
|
*/
|
|
private void readComment(boolean inDTD)
|
|
throws IOException, XMLStreamException
|
|
{
|
|
boolean saved = expandPE;
|
|
expandPE = false;
|
|
buf.setLength(0);
|
|
readUntil(TEST_END_COMMENT);
|
|
require('>');
|
|
expandPE = saved;
|
|
if (inDTD)
|
|
doctype.addComment(buf.toString());
|
|
}
|
|
|
|
/**
|
|
* Parse a processing instruction.
|
|
*/
|
|
private void readPI(boolean inDTD)
|
|
throws IOException, XMLStreamException
|
|
{
|
|
boolean saved = expandPE;
|
|
expandPE = false;
|
|
piTarget = readNmtoken(true);
|
|
if (piTarget.indexOf(':') != -1)
|
|
error("illegal character in PI target", new Character(':'));
|
|
if ("xml".equalsIgnoreCase(piTarget))
|
|
error("illegal PI target", piTarget);
|
|
if (tryRead(TEST_END_PI))
|
|
piData = null;
|
|
else
|
|
{
|
|
if (!tryWhitespace())
|
|
error("whitespace required between PI target and data");
|
|
buf.setLength(0);
|
|
readUntil(TEST_END_PI);
|
|
piData = buf.toString();
|
|
}
|
|
expandPE = saved;
|
|
if (inDTD)
|
|
doctype.addPI(piTarget, piData);
|
|
}
|
|
|
|
/**
|
|
* Parse an entity reference.
|
|
*/
|
|
private void readReference()
|
|
throws IOException, XMLStreamException
|
|
{
|
|
buf.setLength(0);
|
|
String entityName = readNmtoken(true);
|
|
require(';');
|
|
buf.setLength(0);
|
|
buf.append(entityName);
|
|
}
|
|
|
|
/**
|
|
* Read an CDATA section.
|
|
*/
|
|
private void readCDSect()
|
|
throws IOException, XMLStreamException
|
|
{
|
|
buf.setLength(0);
|
|
readUntil(TEST_END_CDATA);
|
|
}
|
|
|
|
/**
|
|
* Read character data.
|
|
* @return the type of text read (CHARACTERS or SPACE)
|
|
*/
|
|
private int readCharData(String prefix)
|
|
throws IOException, XMLStreamException
|
|
{
|
|
boolean white = true;
|
|
buf.setLength(0);
|
|
if (prefix != null)
|
|
buf.append(prefix);
|
|
boolean done = false;
|
|
boolean entities = false;
|
|
while (!done)
|
|
{
|
|
// Block read
|
|
mark(tmpBuf.length);
|
|
int len = read(tmpBuf, 0, tmpBuf.length);
|
|
if (len == -1)
|
|
{
|
|
if (inputStack.size() > 1)
|
|
{
|
|
popInput();
|
|
// report end-entity
|
|
done = true;
|
|
}
|
|
else
|
|
throw new EOFException();
|
|
}
|
|
for (int i = 0; i < len && !done; i++)
|
|
{
|
|
int c = tmpBuf[i];
|
|
switch (c)
|
|
{
|
|
case 0x20:
|
|
case 0x09:
|
|
case 0x0a:
|
|
case 0x0d:
|
|
buf.append(Character.toChars(c));
|
|
break; // whitespace
|
|
case 0x26: // '&'
|
|
reset();
|
|
read(tmpBuf, 0, i);
|
|
// character reference?
|
|
mark(3);
|
|
c = readCh(); // &
|
|
c = readCh();
|
|
if (c == 0x23) // '#'
|
|
{
|
|
mark(1);
|
|
c = readCh();
|
|
boolean hex = (c == 0x78); // 'x'
|
|
if (!hex)
|
|
reset();
|
|
char[] ch = readCharacterRef(hex ? 16 : 10);
|
|
buf.append(ch, 0, ch.length);
|
|
for (int j = 0; j < ch.length; j++)
|
|
{
|
|
switch (ch[j])
|
|
{
|
|
case 0x20:
|
|
case 0x09:
|
|
case 0x0a:
|
|
case 0x0d:
|
|
break; // whitespace
|
|
default:
|
|
white = false;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// entity reference
|
|
reset();
|
|
c = readCh(); // &
|
|
String entityName = readNmtoken(true);
|
|
require(';');
|
|
String text =
|
|
(String) PREDEFINED_ENTITIES.get(entityName);
|
|
if (text != null)
|
|
buf.append(text);
|
|
else
|
|
{
|
|
pushInput("", "&" + entityName + ";", false, false);
|
|
done = true;
|
|
break;
|
|
}
|
|
}
|
|
// continue processing
|
|
i = -1;
|
|
mark(tmpBuf.length);
|
|
len = read(tmpBuf, 0, tmpBuf.length);
|
|
if (len == -1)
|
|
{
|
|
if (inputStack.size() > 1)
|
|
{
|
|
popInput();
|
|
done = true;
|
|
}
|
|
else
|
|
throw new EOFException();
|
|
}
|
|
entities = true;
|
|
break; // end of text sequence
|
|
case 0x3e: // '>'
|
|
int l = buf.length();
|
|
if (l > 1 &&
|
|
buf.charAt(l - 1) == ']' &&
|
|
buf.charAt(l - 2) == ']')
|
|
error("Character data may not contain unescaped ']]>'");
|
|
buf.append(Character.toChars(c));
|
|
break;
|
|
case 0x3c: // '<'
|
|
reset();
|
|
// read i characters
|
|
int count = 0, remaining = i;
|
|
do
|
|
{
|
|
int r = read(tmpBuf, 0, remaining);
|
|
count += r;
|
|
remaining -= r;
|
|
}
|
|
while (count < i);
|
|
i = len;
|
|
if (coalescing && tryRead(TEST_CDATA))
|
|
readUntil(TEST_END_CDATA); // read CDATA section into buf
|
|
else
|
|
done = true; // end of text sequence
|
|
break;
|
|
default:
|
|
if (input.xml11)
|
|
{
|
|
if (!isXML11Char(c) || isXML11RestrictedChar(c))
|
|
error("illegal XML 1.1 character",
|
|
"U+" + Integer.toHexString(c));
|
|
}
|
|
else if (!isChar(c))
|
|
error("illegal XML character",
|
|
"U+" + Integer.toHexString(c));
|
|
white = false;
|
|
buf.append(Character.toChars(c));
|
|
}
|
|
}
|
|
// if text buffer >= 2MB, return it as a chunk
|
|
// to avoid excessive memory use
|
|
if (buf.length() >= 2097152)
|
|
done = true;
|
|
}
|
|
if (entities)
|
|
normalizeCRLF(buf);
|
|
return white ? XMLStreamConstants.SPACE : XMLStreamConstants.CHARACTERS;
|
|
}
|
|
|
|
/**
|
|
* Expands the specified entity.
|
|
*/
|
|
private void expandEntity(String name, boolean inAttr, boolean normalize)
|
|
throws IOException, XMLStreamException
|
|
{
|
|
if (doctype != null)
|
|
{
|
|
Object value = doctype.getEntity(name);
|
|
if (value != null)
|
|
{
|
|
if (xmlStandalone == Boolean.TRUE)
|
|
{
|
|
// VC: Standalone Document Declaration
|
|
if (doctype.isEntityExternal(name))
|
|
error("reference to external entity in standalone document");
|
|
else if (value instanceof ExternalIds)
|
|
{
|
|
ExternalIds ids = (ExternalIds) value;
|
|
if (ids.notationName != null &&
|
|
doctype.isNotationExternal(ids.notationName))
|
|
error("reference to external notation in " +
|
|
"standalone document");
|
|
}
|
|
}
|
|
if (value instanceof String)
|
|
{
|
|
String text = (String) value;
|
|
if (inAttr && text.indexOf('<') != -1)
|
|
error("< in attribute value");
|
|
pushInput(name, text, !inAttr, normalize);
|
|
}
|
|
else if (inAttr)
|
|
error("reference to external entity in attribute value", name);
|
|
else
|
|
pushInput(name, (ExternalIds) value, !inAttr, normalize);
|
|
return;
|
|
}
|
|
}
|
|
error("reference to undeclared entity", name);
|
|
}
|
|
|
|
/**
|
|
* Indicates whether the specified entity is unparsed.
|
|
*/
|
|
private boolean isUnparsedEntity(String name)
|
|
{
|
|
if (doctype != null)
|
|
{
|
|
Object value = doctype.getEntity(name);
|
|
if (value != null && value instanceof ExternalIds)
|
|
return ((ExternalIds) value).notationName != null;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Read an equals sign.
|
|
*/
|
|
private void readEq()
|
|
throws IOException, XMLStreamException
|
|
{
|
|
skipWhitespace();
|
|
require('=');
|
|
skipWhitespace();
|
|
}
|
|
|
|
/**
|
|
* Character read for reading literals.
|
|
* @param recognizePEs whether to recognize parameter-entity references
|
|
*/
|
|
private int literalReadCh(boolean recognizePEs)
|
|
throws IOException, XMLStreamException
|
|
{
|
|
int c = recognizePEs ? readCh() : read();
|
|
while (c == -1)
|
|
{
|
|
if (inputStack.size() > 1)
|
|
{
|
|
inputStack.removeLast();
|
|
input = (Input) inputStack.getLast();
|
|
// Don't issue end-entity
|
|
c = recognizePEs ? readCh() : read();
|
|
}
|
|
else
|
|
throw new EOFException();
|
|
}
|
|
return c;
|
|
}
|
|
|
|
/**
|
|
* Read a string literal.
|
|
*/
|
|
private String readLiteral(int flags, boolean recognizePEs)
|
|
throws IOException, XMLStreamException
|
|
{
|
|
boolean saved = expandPE;
|
|
int delim = readCh();
|
|
if (delim != 0x27 && delim != 0x22)
|
|
error("expected '\"' or \"'\"", "U+" + Integer.toHexString(delim));
|
|
literalBuf.setLength(0);
|
|
if ((flags & LIT_DISABLE_PE) != 0)
|
|
expandPE = false;
|
|
boolean entities = false;
|
|
int inputStackSize = inputStack.size();
|
|
do
|
|
{
|
|
int c = literalReadCh(recognizePEs);
|
|
if (c == delim && inputStackSize == inputStack.size())
|
|
break;
|
|
switch (c)
|
|
{
|
|
case 0x0a:
|
|
case 0x0d:
|
|
if ((flags & (LIT_ATTRIBUTE | LIT_PUBID)) != 0)
|
|
c = 0x20; // normalize to space
|
|
break;
|
|
case 0x09:
|
|
if ((flags & LIT_ATTRIBUTE) != 0)
|
|
c = 0x20; // normalize to space
|
|
break;
|
|
case 0x26: // '&'
|
|
mark(2);
|
|
c = readCh();
|
|
if (c == 0x23) // '#'
|
|
{
|
|
if ((flags & LIT_DISABLE_CREF) != 0)
|
|
{
|
|
reset();
|
|
c = 0x26; // '&'
|
|
}
|
|
else
|
|
{
|
|
mark(1);
|
|
c = readCh();
|
|
boolean hex = (c == 0x78); // 'x'
|
|
if (!hex)
|
|
reset();
|
|
char[] ref = readCharacterRef(hex ? 16 : 10);
|
|
for (int i = 0; i < ref.length; i++)
|
|
literalBuf.append(ref[i]);
|
|
entities = true;
|
|
continue;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if ((flags & LIT_DISABLE_EREF) != 0)
|
|
{
|
|
reset();
|
|
c = 0x26; // '&'
|
|
}
|
|
else
|
|
{
|
|
reset();
|
|
String entityName = readNmtoken(true);
|
|
require(';');
|
|
String text =
|
|
(String) PREDEFINED_ENTITIES.get(entityName);
|
|
if (text != null)
|
|
literalBuf.append(text);
|
|
else
|
|
expandEntity(entityName,
|
|
(flags & LIT_ATTRIBUTE) != 0,
|
|
true);
|
|
entities = true;
|
|
continue;
|
|
}
|
|
}
|
|
break;
|
|
case 0x3c: // '<'
|
|
if ((flags & LIT_ATTRIBUTE) != 0)
|
|
error("attribute values may not contain '<'");
|
|
break;
|
|
case -1:
|
|
if (inputStack.size() > 1)
|
|
{
|
|
popInput();
|
|
continue;
|
|
}
|
|
throw new EOFException();
|
|
default:
|
|
if ((c < 0x0020 || c > 0xfffd) ||
|
|
(c >= 0xd800 && c < 0xdc00) ||
|
|
(input.xml11 && (c >= 0x007f) &&
|
|
(c <= 0x009f) && (c != 0x0085)))
|
|
error("illegal character", "U+" + Integer.toHexString(c));
|
|
}
|
|
literalBuf.append(Character.toChars(c));
|
|
}
|
|
while (true);
|
|
expandPE = saved;
|
|
if (entities)
|
|
normalizeCRLF(literalBuf);
|
|
if ((flags & LIT_NORMALIZE) > 0)
|
|
literalBuf = normalize(literalBuf);
|
|
return literalBuf.toString();
|
|
}
|
|
|
|
/**
|
|
* Performs attribute-value normalization of the text buffer.
|
|
* This discards leading and trailing whitespace, and replaces sequences
|
|
* of whitespace with a single space.
|
|
*/
|
|
private StringBuffer normalize(StringBuffer buf)
|
|
{
|
|
StringBuffer acc = new StringBuffer();
|
|
int len = buf.length();
|
|
int avState = 0;
|
|
for (int i = 0; i < len; i++)
|
|
{
|
|
char c = buf.charAt(i);
|
|
if (c == ' ')
|
|
avState = (avState == 0) ? 0 : 1;
|
|
else
|
|
{
|
|
if (avState == 1)
|
|
acc.append(' ');
|
|
acc.append(c);
|
|
avState = 2;
|
|
}
|
|
}
|
|
return acc;
|
|
}
|
|
|
|
/**
|
|
* Replace any CR/LF pairs in the buffer with LF.
|
|
* This may be necessary if combinations of CR or LF were declared as
|
|
* (character) entity references in the input.
|
|
*/
|
|
private void normalizeCRLF(StringBuffer buf)
|
|
{
|
|
int len = buf.length() - 1;
|
|
for (int i = 0; i < len; i++)
|
|
{
|
|
char c = buf.charAt(i);
|
|
if (c == '\r' && buf.charAt(i + 1) == '\n')
|
|
{
|
|
buf.deleteCharAt(i--);
|
|
len--;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Parse and expand a parameter entity reference.
|
|
*/
|
|
private void expandPEReference()
|
|
throws IOException, XMLStreamException
|
|
{
|
|
String name = readNmtoken(true, new StringBuffer());
|
|
require(';');
|
|
mark(1); // ensure we don't reset to before the semicolon
|
|
if (doctype != null)
|
|
{
|
|
String entityName = "%" + name;
|
|
Object entity = doctype.getEntity(entityName);
|
|
if (entity != null)
|
|
{
|
|
if (xmlStandalone == Boolean.TRUE)
|
|
{
|
|
if (doctype.isEntityExternal(entityName))
|
|
error("reference to external parameter entity in " +
|
|
"standalone document");
|
|
}
|
|
if (entity instanceof String)
|
|
{
|
|
pushInput(name, (String) entity, false, input.normalize);
|
|
//pushInput(name, " " + (String) entity + " ");
|
|
}
|
|
else
|
|
{
|
|
//pushInput("", " ");
|
|
pushInput(name, (ExternalIds) entity, false, input.normalize);
|
|
//pushInput("", " ");
|
|
}
|
|
}
|
|
else
|
|
error("reference to undeclared parameter entity", name);
|
|
}
|
|
else
|
|
error("reference to parameter entity without doctype", name);
|
|
}
|
|
|
|
/**
|
|
* Parse the digits in a character reference.
|
|
* @param base the base of the digits (10 or 16)
|
|
*/
|
|
private char[] readCharacterRef(int base)
|
|
throws IOException, XMLStreamException
|
|
{
|
|
StringBuffer b = new StringBuffer();
|
|
for (int c = readCh(); c != 0x3b && c != -1; c = readCh())
|
|
b.append(Character.toChars(c));
|
|
try
|
|
{
|
|
int ord = Integer.parseInt(b.toString(), base);
|
|
if (input.xml11)
|
|
{
|
|
if (!isXML11Char(ord))
|
|
error("illegal XML 1.1 character reference " +
|
|
"U+" + Integer.toHexString(ord));
|
|
}
|
|
else
|
|
{
|
|
if ((ord < 0x20 && !(ord == 0x0a || ord == 0x09 || ord == 0x0d))
|
|
|| (ord >= 0xd800 && ord <= 0xdfff)
|
|
|| ord == 0xfffe || ord == 0xffff
|
|
|| ord > 0x0010ffff)
|
|
error("illegal XML character reference " +
|
|
"U+" + Integer.toHexString(ord));
|
|
}
|
|
return Character.toChars(ord);
|
|
}
|
|
catch (NumberFormatException e)
|
|
{
|
|
error("illegal characters in character reference", b.toString());
|
|
return null;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Parses an NMTOKEN or Name production.
|
|
* @param isName if a Name, otherwise an NMTOKEN
|
|
*/
|
|
private String readNmtoken(boolean isName)
|
|
throws IOException, XMLStreamException
|
|
{
|
|
return readNmtoken(isName, nmtokenBuf);
|
|
}
|
|
|
|
/**
|
|
* Parses an NMTOKEN or Name production using the specified buffer.
|
|
* @param isName if a Name, otherwise an NMTOKEN
|
|
* @param buf the character buffer to use
|
|
*/
|
|
private String readNmtoken(boolean isName, StringBuffer buf)
|
|
throws IOException, XMLStreamException
|
|
{
|
|
buf.setLength(0);
|
|
int c = readCh();
|
|
if (isName)
|
|
{
|
|
if (!isNameStartCharacter(c, input.xml11))
|
|
error("not a name start character",
|
|
"U+" + Integer.toHexString(c));
|
|
}
|
|
else
|
|
{
|
|
if (!isNameCharacter(c, input.xml11))
|
|
error("not a name character",
|
|
"U+" + Integer.toHexString(c));
|
|
}
|
|
buf.append(Character.toChars(c));
|
|
do
|
|
{
|
|
mark(1);
|
|
c = readCh();
|
|
switch (c)
|
|
{
|
|
case 0x25: // '%'
|
|
case 0x3c: // '<'
|
|
case 0x3e: // '>'
|
|
case 0x26: // '&'
|
|
case 0x2c: // ','
|
|
case 0x7c: // '|'
|
|
case 0x2a: // '*'
|
|
case 0x2b: // '+'
|
|
case 0x3f: // '?'
|
|
case 0x29: // ')'
|
|
case 0x3d: // '='
|
|
case 0x27: // '\''
|
|
case 0x22: // '"'
|
|
case 0x5b: // '['
|
|
case 0x20: // ' '
|
|
case 0x09: // '\t'
|
|
case 0x0a: // '\n'
|
|
case 0x0d: // '\r'
|
|
case 0x3b: // ';'
|
|
case 0x2f: // '/'
|
|
case -1:
|
|
reset();
|
|
return intern(buf.toString());
|
|
default:
|
|
if (!isNameCharacter(c, input.xml11))
|
|
error("not a name character",
|
|
"U+" + Integer.toHexString(c));
|
|
else
|
|
buf.append(Character.toChars(c));
|
|
}
|
|
}
|
|
while (true);
|
|
}
|
|
|
|
/**
|
|
* Indicates whether the specified Unicode character is an XML 1.1 Char.
|
|
*/
|
|
public static boolean isXML11Char(int c)
|
|
{
|
|
return ((c >= 0x0001 && c <= 0xD7FF) ||
|
|
(c >= 0xE000 && c < 0xFFFE) ||
|
|
(c >= 0x10000 && c <= 0x10FFFF));
|
|
}
|
|
|
|
/**
|
|
* Indicates whether the specified Unicode character is an XML 1.1
|
|
* RestrictedChar.
|
|
*/
|
|
public static boolean isXML11RestrictedChar(int c)
|
|
{
|
|
return ((c >= 0x0001 && c <= 0x0008) ||
|
|
(c >= 0x000B && c <= 0x000C) ||
|
|
(c >= 0x000E && c <= 0x001F) ||
|
|
(c >= 0x007F && c <= 0x0084) ||
|
|
(c >= 0x0086 && c <= 0x009F));
|
|
}
|
|
|
|
/**
|
|
* Indicates whether the specified text matches the Name or Nmtoken
|
|
* production.
|
|
*/
|
|
private boolean isNmtoken(String text, boolean isName)
|
|
{
|
|
try
|
|
{
|
|
int[] cp = UnicodeReader.toCodePointArray(text);
|
|
if (cp.length == 0)
|
|
return false;
|
|
if (isName)
|
|
{
|
|
if (!isNameStartCharacter(cp[0], input.xml11))
|
|
return false;
|
|
}
|
|
else
|
|
{
|
|
if (!isNameCharacter(cp[0], input.xml11))
|
|
return false;
|
|
}
|
|
for (int i = 1; i < cp.length; i++)
|
|
{
|
|
if (!isNameCharacter(cp[i], input.xml11))
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
catch (IOException e)
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Indicates whether the specified Unicode character is a Name start
|
|
* character.
|
|
*/
|
|
public static boolean isNameStartCharacter(int c, boolean xml11)
|
|
{
|
|
if (xml11)
|
|
return ((c >= 0x0041 && c <= 0x005a) ||
|
|
(c >= 0x0061 && c <= 0x007a) ||
|
|
c == 0x3a |
|
|
c == 0x5f |
|
|
(c >= 0xC0 && c <= 0xD6) ||
|
|
(c >= 0xD8 && c <= 0xF6) ||
|
|
(c >= 0xF8 && c <= 0x2FF) ||
|
|
(c >= 0x370 && c <= 0x37D) ||
|
|
(c >= 0x37F && c <= 0x1FFF) ||
|
|
(c >= 0x200C && c <= 0x200D) ||
|
|
(c >= 0x2070 && c <= 0x218F) ||
|
|
(c >= 0x2C00 && c <= 0x2FEF) ||
|
|
(c >= 0x3001 && c <= 0xD7FF) ||
|
|
(c >= 0xF900 && c <= 0xFDCF) ||
|
|
(c >= 0xFDF0 && c <= 0xFFFD) ||
|
|
(c >= 0x10000 && c <= 0xEFFFF));
|
|
else
|
|
return (c == 0x5f || c == 0x3a || isLetter(c));
|
|
}
|
|
|
|
/**
|
|
* Indicates whether the specified Unicode character is a Name non-initial
|
|
* character.
|
|
*/
|
|
public static boolean isNameCharacter(int c, boolean xml11)
|
|
{
|
|
if (xml11)
|
|
return ((c >= 0x0041 && c <= 0x005a) ||
|
|
(c >= 0x0061 && c <= 0x007a) ||
|
|
(c >= 0x0030 && c <= 0x0039) ||
|
|
c == 0x3a |
|
|
c == 0x5f |
|
|
c == 0x2d |
|
|
c == 0x2e |
|
|
c == 0xB7 |
|
|
(c >= 0xC0 && c <= 0xD6) ||
|
|
(c >= 0xD8 && c <= 0xF6) ||
|
|
(c >= 0xF8 && c <= 0x2FF) ||
|
|
(c >= 0x300 && c <= 0x37D) ||
|
|
(c >= 0x37F && c <= 0x1FFF) ||
|
|
(c >= 0x200C && c <= 0x200D) ||
|
|
(c >= 0x203F && c <= 0x2040) ||
|
|
(c >= 0x2070 && c <= 0x218F) ||
|
|
(c >= 0x2C00 && c <= 0x2FEF) ||
|
|
(c >= 0x3001 && c <= 0xD7FF) ||
|
|
(c >= 0xF900 && c <= 0xFDCF) ||
|
|
(c >= 0xFDF0 && c <= 0xFFFD) ||
|
|
(c >= 0x10000 && c <= 0xEFFFF));
|
|
else
|
|
return (c == 0x2e || c == 0x2d || c == 0x5f || c == 0x3a ||
|
|
isLetter(c) || isDigit(c) ||
|
|
isCombiningChar(c) || isExtender(c));
|
|
}
|
|
|
|
/**
|
|
* Indicates whether the specified Unicode character matches the Letter
|
|
* production.
|
|
*/
|
|
public static boolean isLetter(int c)
|
|
{
|
|
if ((c >= 0x0041 && c <= 0x005A) ||
|
|
(c >= 0x0061 && c <= 0x007A) ||
|
|
(c >= 0x00C0 && c <= 0x00D6) ||
|
|
(c >= 0x00D8 && c <= 0x00F6) ||
|
|
(c >= 0x00F8 && c <= 0x00FF) ||
|
|
(c >= 0x0100 && c <= 0x0131) ||
|
|
(c >= 0x0134 && c <= 0x013E) ||
|
|
(c >= 0x0141 && c <= 0x0148) ||
|
|
(c >= 0x014A && c <= 0x017E) ||
|
|
(c >= 0x0180 && c <= 0x01C3) ||
|
|
(c >= 0x01CD && c <= 0x01F0) ||
|
|
(c >= 0x01F4 && c <= 0x01F5) ||
|
|
(c >= 0x01FA && c <= 0x0217) ||
|
|
(c >= 0x0250 && c <= 0x02A8) ||
|
|
(c >= 0x02BB && c <= 0x02C1) ||
|
|
c == 0x0386 ||
|
|
(c >= 0x0388 && c <= 0x038A) ||
|
|
c == 0x038C ||
|
|
(c >= 0x038E && c <= 0x03A1) ||
|
|
(c >= 0x03A3 && c <= 0x03CE) ||
|
|
(c >= 0x03D0 && c <= 0x03D6) ||
|
|
c == 0x03DA ||
|
|
c == 0x03DC ||
|
|
c == 0x03DE ||
|
|
c == 0x03E0 ||
|
|
(c >= 0x03E2 && c <= 0x03F3) ||
|
|
(c >= 0x0401 && c <= 0x040C) ||
|
|
(c >= 0x040E && c <= 0x044F) ||
|
|
(c >= 0x0451 && c <= 0x045C) ||
|
|
(c >= 0x045E && c <= 0x0481) ||
|
|
(c >= 0x0490 && c <= 0x04C4) ||
|
|
(c >= 0x04C7 && c <= 0x04C8) ||
|
|
(c >= 0x04CB && c <= 0x04CC) ||
|
|
(c >= 0x04D0 && c <= 0x04EB) ||
|
|
(c >= 0x04EE && c <= 0x04F5) ||
|
|
(c >= 0x04F8 && c <= 0x04F9) ||
|
|
(c >= 0x0531 && c <= 0x0556) ||
|
|
c == 0x0559 ||
|
|
(c >= 0x0561 && c <= 0x0586) ||
|
|
(c >= 0x05D0 && c <= 0x05EA) ||
|
|
(c >= 0x05F0 && c <= 0x05F2) ||
|
|
(c >= 0x0621 && c <= 0x063A) ||
|
|
(c >= 0x0641 && c <= 0x064A) ||
|
|
(c >= 0x0671 && c <= 0x06B7) ||
|
|
(c >= 0x06BA && c <= 0x06BE) ||
|
|
(c >= 0x06C0 && c <= 0x06CE) ||
|
|
(c >= 0x06D0 && c <= 0x06D3) ||
|
|
c == 0x06D5 ||
|
|
(c >= 0x06E5 && c <= 0x06E6) ||
|
|
(c >= 0x0905 && c <= 0x0939) ||
|
|
c == 0x093D ||
|
|
(c >= 0x0958 && c <= 0x0961) ||
|
|
(c >= 0x0985 && c <= 0x098C) ||
|
|
(c >= 0x098F && c <= 0x0990) ||
|
|
(c >= 0x0993 && c <= 0x09A8) ||
|
|
(c >= 0x09AA && c <= 0x09B0) ||
|
|
c == 0x09B2 ||
|
|
(c >= 0x09B6 && c <= 0x09B9) ||
|
|
(c >= 0x09DC && c <= 0x09DD) ||
|
|
(c >= 0x09DF && c <= 0x09E1) ||
|
|
(c >= 0x09F0 && c <= 0x09F1) ||
|
|
(c >= 0x0A05 && c <= 0x0A0A) ||
|
|
(c >= 0x0A0F && c <= 0x0A10) ||
|
|
(c >= 0x0A13 && c <= 0x0A28) ||
|
|
(c >= 0x0A2A && c <= 0x0A30) ||
|
|
(c >= 0x0A32 && c <= 0x0A33) ||
|
|
(c >= 0x0A35 && c <= 0x0A36) ||
|
|
(c >= 0x0A38 && c <= 0x0A39) ||
|
|
(c >= 0x0A59 && c <= 0x0A5C) ||
|
|
c == 0x0A5E ||
|
|
(c >= 0x0A72 && c <= 0x0A74) ||
|
|
(c >= 0x0A85 && c <= 0x0A8B) ||
|
|
c == 0x0A8D ||
|
|
(c >= 0x0A8F && c <= 0x0A91) ||
|
|
(c >= 0x0A93 && c <= 0x0AA8) ||
|
|
(c >= 0x0AAA && c <= 0x0AB0) ||
|
|
(c >= 0x0AB2 && c <= 0x0AB3) ||
|
|
(c >= 0x0AB5 && c <= 0x0AB9) ||
|
|
c == 0x0ABD ||
|
|
c == 0x0AE0 ||
|
|
(c >= 0x0B05 && c <= 0x0B0C) ||
|
|
(c >= 0x0B0F && c <= 0x0B10) ||
|
|
(c >= 0x0B13 && c <= 0x0B28) ||
|
|
(c >= 0x0B2A && c <= 0x0B30) ||
|
|
(c >= 0x0B32 && c <= 0x0B33) ||
|
|
(c >= 0x0B36 && c <= 0x0B39) ||
|
|
c == 0x0B3D ||
|
|
(c >= 0x0B5C && c <= 0x0B5D) ||
|
|
(c >= 0x0B5F && c <= 0x0B61) ||
|
|
(c >= 0x0B85 && c <= 0x0B8A) ||
|
|
(c >= 0x0B8E && c <= 0x0B90) ||
|
|
(c >= 0x0B92 && c <= 0x0B95) ||
|
|
(c >= 0x0B99 && c <= 0x0B9A) ||
|
|
c == 0x0B9C ||
|
|
(c >= 0x0B9E && c <= 0x0B9F) ||
|
|
(c >= 0x0BA3 && c <= 0x0BA4) ||
|
|
(c >= 0x0BA8 && c <= 0x0BAA) ||
|
|
(c >= 0x0BAE && c <= 0x0BB5) ||
|
|
(c >= 0x0BB7 && c <= 0x0BB9) ||
|
|
(c >= 0x0C05 && c <= 0x0C0C) ||
|
|
(c >= 0x0C0E && c <= 0x0C10) ||
|
|
(c >= 0x0C12 && c <= 0x0C28) ||
|
|
(c >= 0x0C2A && c <= 0x0C33) ||
|
|
(c >= 0x0C35 && c <= 0x0C39) ||
|
|
(c >= 0x0C60 && c <= 0x0C61) ||
|
|
(c >= 0x0C85 && c <= 0x0C8C) ||
|
|
(c >= 0x0C8E && c <= 0x0C90) ||
|
|
(c >= 0x0C92 && c <= 0x0CA8) ||
|
|
(c >= 0x0CAA && c <= 0x0CB3) ||
|
|
(c >= 0x0CB5 && c <= 0x0CB9) ||
|
|
c == 0x0CDE ||
|
|
(c >= 0x0CE0 && c <= 0x0CE1) ||
|
|
(c >= 0x0D05 && c <= 0x0D0C) ||
|
|
(c >= 0x0D0E && c <= 0x0D10) ||
|
|
(c >= 0x0D12 && c <= 0x0D28) ||
|
|
(c >= 0x0D2A && c <= 0x0D39) ||
|
|
(c >= 0x0D60 && c <= 0x0D61) ||
|
|
(c >= 0x0E01 && c <= 0x0E2E) ||
|
|
c == 0x0E30 ||
|
|
(c >= 0x0E32 && c <= 0x0E33) ||
|
|
(c >= 0x0E40 && c <= 0x0E45) ||
|
|
(c >= 0x0E81 && c <= 0x0E82) ||
|
|
c == 0x0E84 ||
|
|
(c >= 0x0E87 && c <= 0x0E88) ||
|
|
c == 0x0E8A ||
|
|
c == 0x0E8D ||
|
|
(c >= 0x0E94 && c <= 0x0E97) ||
|
|
(c >= 0x0E99 && c <= 0x0E9F) ||
|
|
(c >= 0x0EA1 && c <= 0x0EA3) ||
|
|
c == 0x0EA5 ||
|
|
c == 0x0EA7 ||
|
|
(c >= 0x0EAA && c <= 0x0EAB) ||
|
|
(c >= 0x0EAD && c <= 0x0EAE) ||
|
|
c == 0x0EB0 ||
|
|
(c >= 0x0EB2 && c <= 0x0EB3) ||
|
|
c == 0x0EBD ||
|
|
(c >= 0x0EC0 && c <= 0x0EC4) ||
|
|
(c >= 0x0F40 && c <= 0x0F47) ||
|
|
(c >= 0x0F49 && c <= 0x0F69) ||
|
|
(c >= 0x10A0 && c <= 0x10C5) ||
|
|
(c >= 0x10D0 && c <= 0x10F6) ||
|
|
c == 0x1100 ||
|
|
(c >= 0x1102 && c <= 0x1103) ||
|
|
(c >= 0x1105 && c <= 0x1107) ||
|
|
c == 0x1109 ||
|
|
(c >= 0x110B && c <= 0x110C) ||
|
|
(c >= 0x110E && c <= 0x1112) ||
|
|
c == 0x113C ||
|
|
c == 0x113E ||
|
|
c == 0x1140 ||
|
|
c == 0x114C ||
|
|
c == 0x114E ||
|
|
c == 0x1150 ||
|
|
(c >= 0x1154 && c <= 0x1155) ||
|
|
c == 0x1159 ||
|
|
(c >= 0x115F && c <= 0x1161) ||
|
|
c == 0x1163 ||
|
|
c == 0x1165 ||
|
|
c == 0x1167 ||
|
|
c == 0x1169 ||
|
|
(c >= 0x116D && c <= 0x116E) ||
|
|
(c >= 0x1172 && c <= 0x1173) ||
|
|
c == 0x1175 ||
|
|
c == 0x119E ||
|
|
c == 0x11A8 ||
|
|
c == 0x11AB ||
|
|
(c >= 0x11AE && c <= 0x11AF) ||
|
|
(c >= 0x11B7 && c <= 0x11B8) ||
|
|
c == 0x11BA ||
|
|
(c >= 0x11BC && c <= 0x11C2) ||
|
|
c == 0x11EB ||
|
|
c == 0x11F0 ||
|
|
c == 0x11F9 ||
|
|
(c >= 0x1E00 && c <= 0x1E9B) ||
|
|
(c >= 0x1EA0 && c <= 0x1EF9) ||
|
|
(c >= 0x1F00 && c <= 0x1F15) ||
|
|
(c >= 0x1F18 && c <= 0x1F1D) ||
|
|
(c >= 0x1F20 && c <= 0x1F45) ||
|
|
(c >= 0x1F48 && c <= 0x1F4D) ||
|
|
(c >= 0x1F50 && c <= 0x1F57) ||
|
|
c == 0x1F59 ||
|
|
c == 0x1F5B ||
|
|
c == 0x1F5D ||
|
|
(c >= 0x1F5F && c <= 0x1F7D) ||
|
|
(c >= 0x1F80 && c <= 0x1FB4) ||
|
|
(c >= 0x1FB6 && c <= 0x1FBC) ||
|
|
c == 0x1FBE ||
|
|
(c >= 0x1FC2 && c <= 0x1FC4) ||
|
|
(c >= 0x1FC6 && c <= 0x1FCC) ||
|
|
(c >= 0x1FD0 && c <= 0x1FD3) ||
|
|
(c >= 0x1FD6 && c <= 0x1FDB) ||
|
|
(c >= 0x1FE0 && c <= 0x1FEC) ||
|
|
(c >= 0x1FF2 && c <= 0x1FF4) ||
|
|
(c >= 0x1FF6 && c <= 0x1FFC) ||
|
|
c == 0x2126 ||
|
|
(c >= 0x212A && c <= 0x212B) ||
|
|
c == 0x212E ||
|
|
(c >= 0x2180 && c <= 0x2182) ||
|
|
(c >= 0x3041 && c <= 0x3094) ||
|
|
(c >= 0x30A1 && c <= 0x30FA) ||
|
|
(c >= 0x3105 && c <= 0x312C) ||
|
|
(c >= 0xAC00 && c <= 0xD7A3))
|
|
return true; // BaseChar
|
|
if ((c >= 0x4e00 && c <= 0x9fa5) ||
|
|
c == 0x3007 ||
|
|
(c >= 0x3021 && c <= 0x3029))
|
|
return true; // Ideographic
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Indicates whether the specified Unicode character matches the Digit
|
|
* production.
|
|
*/
|
|
public static boolean isDigit(int c)
|
|
{
|
|
return ((c >= 0x0030 && c <= 0x0039) ||
|
|
(c >= 0x0660 && c <= 0x0669) ||
|
|
(c >= 0x06F0 && c <= 0x06F9) ||
|
|
(c >= 0x0966 && c <= 0x096F) ||
|
|
(c >= 0x09E6 && c <= 0x09EF) ||
|
|
(c >= 0x0A66 && c <= 0x0A6F) ||
|
|
(c >= 0x0AE6 && c <= 0x0AEF) ||
|
|
(c >= 0x0B66 && c <= 0x0B6F) ||
|
|
(c >= 0x0BE7 && c <= 0x0BEF) ||
|
|
(c >= 0x0C66 && c <= 0x0C6F) ||
|
|
(c >= 0x0CE6 && c <= 0x0CEF) ||
|
|
(c >= 0x0D66 && c <= 0x0D6F) ||
|
|
(c >= 0x0E50 && c <= 0x0E59) ||
|
|
(c >= 0x0ED0 && c <= 0x0ED9) ||
|
|
(c >= 0x0F20 && c <= 0x0F29));
|
|
}
|
|
|
|
/**
|
|
* Indicates whether the specified Unicode character matches the
|
|
* CombiningChar production.
|
|
*/
|
|
public static boolean isCombiningChar(int c)
|
|
{
|
|
return ((c >= 0x0300 && c <= 0x0345) ||
|
|
(c >= 0x0360 && c <= 0x0361) ||
|
|
(c >= 0x0483 && c <= 0x0486) ||
|
|
(c >= 0x0591 && c <= 0x05A1) ||
|
|
(c >= 0x05A3 && c <= 0x05B9) ||
|
|
(c >= 0x05BB && c <= 0x05BD) ||
|
|
c == 0x05BF ||
|
|
(c >= 0x05C1 && c <= 0x05C2) ||
|
|
c == 0x05C4 ||
|
|
(c >= 0x064B && c <= 0x0652) ||
|
|
c == 0x0670 ||
|
|
(c >= 0x06D6 && c <= 0x06DC) ||
|
|
(c >= 0x06DD && c <= 0x06DF) ||
|
|
(c >= 0x06E0 && c <= 0x06E4) ||
|
|
(c >= 0x06E7 && c <= 0x06E8) ||
|
|
(c >= 0x06EA && c <= 0x06ED) ||
|
|
(c >= 0x0901 && c <= 0x0903) ||
|
|
c == 0x093C ||
|
|
(c >= 0x093E && c <= 0x094C) ||
|
|
c == 0x094D ||
|
|
(c >= 0x0951 && c <= 0x0954) ||
|
|
(c >= 0x0962 && c <= 0x0963) ||
|
|
(c >= 0x0981 && c <= 0x0983) ||
|
|
c == 0x09BC ||
|
|
c == 0x09BE ||
|
|
c == 0x09BF ||
|
|
(c >= 0x09C0 && c <= 0x09C4) ||
|
|
(c >= 0x09C7 && c <= 0x09C8) ||
|
|
(c >= 0x09CB && c <= 0x09CD) ||
|
|
c == 0x09D7 ||
|
|
(c >= 0x09E2 && c <= 0x09E3) ||
|
|
c == 0x0A02 ||
|
|
c == 0x0A3C ||
|
|
c == 0x0A3E ||
|
|
c == 0x0A3F ||
|
|
(c >= 0x0A40 && c <= 0x0A42) ||
|
|
(c >= 0x0A47 && c <= 0x0A48) ||
|
|
(c >= 0x0A4B && c <= 0x0A4D) ||
|
|
(c >= 0x0A70 && c <= 0x0A71) ||
|
|
(c >= 0x0A81 && c <= 0x0A83) ||
|
|
c == 0x0ABC ||
|
|
(c >= 0x0ABE && c <= 0x0AC5) ||
|
|
(c >= 0x0AC7 && c <= 0x0AC9) ||
|
|
(c >= 0x0ACB && c <= 0x0ACD) ||
|
|
(c >= 0x0B01 && c <= 0x0B03) ||
|
|
c == 0x0B3C ||
|
|
(c >= 0x0B3E && c <= 0x0B43) ||
|
|
(c >= 0x0B47 && c <= 0x0B48) ||
|
|
(c >= 0x0B4B && c <= 0x0B4D) ||
|
|
(c >= 0x0B56 && c <= 0x0B57) ||
|
|
(c >= 0x0B82 && c <= 0x0B83) ||
|
|
(c >= 0x0BBE && c <= 0x0BC2) ||
|
|
(c >= 0x0BC6 && c <= 0x0BC8) ||
|
|
(c >= 0x0BCA && c <= 0x0BCD) ||
|
|
c == 0x0BD7 ||
|
|
(c >= 0x0C01 && c <= 0x0C03) ||
|
|
(c >= 0x0C3E && c <= 0x0C44) ||
|
|
(c >= 0x0C46 && c <= 0x0C48) ||
|
|
(c >= 0x0C4A && c <= 0x0C4D) ||
|
|
(c >= 0x0C55 && c <= 0x0C56) ||
|
|
(c >= 0x0C82 && c <= 0x0C83) ||
|
|
(c >= 0x0CBE && c <= 0x0CC4) ||
|
|
(c >= 0x0CC6 && c <= 0x0CC8) ||
|
|
(c >= 0x0CCA && c <= 0x0CCD) ||
|
|
(c >= 0x0CD5 && c <= 0x0CD6) ||
|
|
(c >= 0x0D02 && c <= 0x0D03) ||
|
|
(c >= 0x0D3E && c <= 0x0D43) ||
|
|
(c >= 0x0D46 && c <= 0x0D48) ||
|
|
(c >= 0x0D4A && c <= 0x0D4D) ||
|
|
c == 0x0D57 ||
|
|
c == 0x0E31 ||
|
|
(c >= 0x0E34 && c <= 0x0E3A) ||
|
|
(c >= 0x0E47 && c <= 0x0E4E) ||
|
|
c == 0x0EB1 ||
|
|
(c >= 0x0EB4 && c <= 0x0EB9) ||
|
|
(c >= 0x0EBB && c <= 0x0EBC) ||
|
|
(c >= 0x0EC8 && c <= 0x0ECD) ||
|
|
(c >= 0x0F18 && c <= 0x0F19) ||
|
|
c == 0x0F35 ||
|
|
c == 0x0F37 ||
|
|
c == 0x0F39 ||
|
|
c == 0x0F3E ||
|
|
c == 0x0F3F ||
|
|
(c >= 0x0F71 && c <= 0x0F84) ||
|
|
(c >= 0x0F86 && c <= 0x0F8B) ||
|
|
(c >= 0x0F90 && c <= 0x0F95) ||
|
|
c == 0x0F97 ||
|
|
(c >= 0x0F99 && c <= 0x0FAD) ||
|
|
(c >= 0x0FB1 && c <= 0x0FB7) ||
|
|
c == 0x0FB9 ||
|
|
(c >= 0x20D0 && c <= 0x20DC) ||
|
|
c == 0x20E1 ||
|
|
(c >= 0x302A && c <= 0x302F) ||
|
|
c == 0x3099 ||
|
|
c == 0x309A);
|
|
}
|
|
|
|
/**
|
|
* Indicates whether the specified Unicode character matches the Extender
|
|
* production.
|
|
*/
|
|
public static boolean isExtender(int c)
|
|
{
|
|
return (c == 0x00B7 ||
|
|
c == 0x02D0 ||
|
|
c == 0x02D1 ||
|
|
c == 0x0387 ||
|
|
c == 0x0640 ||
|
|
c == 0x0E46 ||
|
|
c == 0x0EC6 ||
|
|
c == 0x3005 ||
|
|
(c >= 0x3031 && c <= 0x3035) ||
|
|
(c >= 0x309D && c <= 0x309E) ||
|
|
(c >= 0x30FC && c <= 0x30FE));
|
|
}
|
|
|
|
/**
|
|
* Indicates whether the specified Unicode character matches the Char
|
|
* production.
|
|
*/
|
|
public static boolean isChar(int c)
|
|
{
|
|
return (c >= 0x20 && c < 0xd800) ||
|
|
(c >= 0xe00 && c < 0xfffe) ||
|
|
(c >= 0x10000 && c < 0x110000) ||
|
|
c == 0xa || c == 0x9 || c == 0xd;
|
|
}
|
|
|
|
/**
|
|
* Interns the specified text or not, depending on the value of
|
|
* stringInterning.
|
|
*/
|
|
private String intern(String text)
|
|
{
|
|
return stringInterning ? text.intern() : text;
|
|
}
|
|
|
|
/**
|
|
* Report a parsing error.
|
|
*/
|
|
private void error(String message)
|
|
throws XMLStreamException
|
|
{
|
|
error(message, null);
|
|
}
|
|
|
|
/**
|
|
* Report a parsing error.
|
|
*/
|
|
private void error(String message, Object info)
|
|
throws XMLStreamException
|
|
{
|
|
if (info != null)
|
|
{
|
|
if (info instanceof String)
|
|
message += ": \"" + ((String) info) + "\"";
|
|
else if (info instanceof Character)
|
|
message += ": '" + ((Character) info) + "'";
|
|
}
|
|
throw new XMLStreamException(message);
|
|
}
|
|
|
|
/**
|
|
* Perform validation of a start-element event.
|
|
*/
|
|
private void validateStartElement(String elementName)
|
|
throws XMLStreamException
|
|
{
|
|
if (currentContentModel == null)
|
|
{
|
|
// root element
|
|
// VC: Root Element Type
|
|
if (!elementName.equals(doctype.rootName))
|
|
error("root element name must match name in DTD");
|
|
return;
|
|
}
|
|
// VC: Element Valid
|
|
switch (currentContentModel.type)
|
|
{
|
|
case ContentModel.EMPTY:
|
|
error("child element found in empty element", elementName);
|
|
break;
|
|
case ContentModel.ELEMENT:
|
|
LinkedList ctx = (LinkedList) validationStack.getLast();
|
|
ctx.add(elementName);
|
|
break;
|
|
case ContentModel.MIXED:
|
|
MixedContentModel mm = (MixedContentModel) currentContentModel;
|
|
if (!mm.containsName(elementName))
|
|
error("illegal element for content model", elementName);
|
|
break;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Perform validation of an end-element event.
|
|
*/
|
|
private void validateEndElement()
|
|
throws XMLStreamException
|
|
{
|
|
if (currentContentModel == null)
|
|
{
|
|
// root element
|
|
// VC: IDREF
|
|
if (!idrefs.containsAll(ids))
|
|
error("IDREF values must match the value of some ID attribute");
|
|
return;
|
|
}
|
|
// VC: Element Valid
|
|
switch (currentContentModel.type)
|
|
{
|
|
case ContentModel.ELEMENT:
|
|
LinkedList ctx = (LinkedList) validationStack.getLast();
|
|
ElementContentModel ecm = (ElementContentModel) currentContentModel;
|
|
validateElementContent(ecm, ctx);
|
|
break;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Perform validation of character data.
|
|
*/
|
|
private void validatePCData(String text)
|
|
throws XMLStreamException
|
|
{
|
|
// VC: Element Valid
|
|
switch (currentContentModel.type)
|
|
{
|
|
case ContentModel.EMPTY:
|
|
error("character data found in empty element", text);
|
|
break;
|
|
case ContentModel.ELEMENT:
|
|
boolean white = true;
|
|
int len = text.length();
|
|
for (int i = 0; i < len; i++)
|
|
{
|
|
char c = text.charAt(i);
|
|
if (c != ' ' && c != '\t' && c != '\n' && c != '\r')
|
|
{
|
|
white = false;
|
|
break;
|
|
}
|
|
}
|
|
if (!white)
|
|
error("character data found in element with element content", text);
|
|
else if (xmlStandalone == Boolean.TRUE && currentContentModel.external)
|
|
// VC: Standalone Document Declaration
|
|
error("whitespace in element content of externally declared " +
|
|
"element in standalone document");
|
|
break;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Validates the specified validation context (list of child elements)
|
|
* against the element content model for the current element.
|
|
*/
|
|
private void validateElementContent(ElementContentModel model,
|
|
LinkedList children)
|
|
throws XMLStreamException
|
|
{
|
|
// Use regular expression
|
|
StringBuffer buf = new StringBuffer();
|
|
for (Iterator i = children.iterator(); i.hasNext(); )
|
|
{
|
|
buf.append((String) i.next());
|
|
buf.append(' ');
|
|
}
|
|
String c = buf.toString();
|
|
String regex = createRegularExpression(model);
|
|
if (!c.matches(regex))
|
|
error("element content "+model.text+" does not match expression "+regex, c);
|
|
}
|
|
|
|
/**
|
|
* Creates the regular expression used to validate an element content
|
|
* model.
|
|
*/
|
|
private String createRegularExpression(ElementContentModel model)
|
|
{
|
|
if (model.regex == null)
|
|
{
|
|
StringBuffer buf = new StringBuffer();
|
|
buf.append('(');
|
|
for (Iterator i = model.contentParticles.iterator(); i.hasNext(); )
|
|
{
|
|
ContentParticle cp = (ContentParticle) i.next();
|
|
if (cp.content instanceof String)
|
|
{
|
|
buf.append('(');
|
|
buf.append((String) cp.content);
|
|
buf.append(' ');
|
|
buf.append(')');
|
|
if (cp.max == -1)
|
|
{
|
|
if (cp.min == 0)
|
|
buf.append('*');
|
|
else
|
|
buf.append('+');
|
|
}
|
|
else if (cp.min == 0)
|
|
buf.append('?');
|
|
}
|
|
else
|
|
{
|
|
ElementContentModel ecm = (ElementContentModel) cp.content;
|
|
buf.append(createRegularExpression(ecm));
|
|
}
|
|
if (model.or && i.hasNext())
|
|
buf.append('|');
|
|
}
|
|
buf.append(')');
|
|
if (model.max == -1)
|
|
{
|
|
if (model.min == 0)
|
|
buf.append('*');
|
|
else
|
|
buf.append('+');
|
|
}
|
|
else if (model.min == 0)
|
|
buf.append('?');
|
|
model.regex = buf.toString();
|
|
}
|
|
return model.regex;
|
|
}
|
|
|
|
/**
|
|
* Performs validation of a document type declaration event.
|
|
*/
|
|
void validateDoctype()
|
|
throws XMLStreamException
|
|
{
|
|
for (Iterator i = doctype.entityIterator(); i.hasNext(); )
|
|
{
|
|
Map.Entry entry = (Map.Entry) i.next();
|
|
Object entity = entry.getValue();
|
|
if (entity instanceof ExternalIds)
|
|
{
|
|
ExternalIds ids = (ExternalIds) entity;
|
|
if (ids.notationName != null)
|
|
{
|
|
// VC: Notation Declared
|
|
ExternalIds notation = doctype.getNotation(ids.notationName);
|
|
if (notation == null)
|
|
error("Notation name must match the declared name of a " +
|
|
"notation", ids.notationName);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Simple test harness for reading an XML file.
|
|
* args[0] is the filename of the XML file
|
|
* If args[1] is "-x", enable XInclude processing
|
|
*/
|
|
public static void main(String[] args)
|
|
throws Exception
|
|
{
|
|
boolean xIncludeAware = false;
|
|
if (args.length > 1 && "-x".equals(args[1]))
|
|
xIncludeAware = true;
|
|
XMLParser p = new XMLParser(new java.io.FileInputStream(args[0]),
|
|
absolutize(null, args[0]),
|
|
true, // validating
|
|
true, // namespaceAware
|
|
true, // coalescing,
|
|
true, // replaceERefs
|
|
true, // externalEntities
|
|
true, // supportDTD
|
|
true, // baseAware
|
|
true, // stringInterning
|
|
true, // extendedEventTypes
|
|
null,
|
|
null);
|
|
XMLStreamReader reader = p;
|
|
if (xIncludeAware)
|
|
reader = new XIncludeFilter(p, args[0], true, true, true);
|
|
try
|
|
{
|
|
int event;
|
|
//do
|
|
while (reader.hasNext())
|
|
{
|
|
event = reader.next();
|
|
Location loc = reader.getLocation();
|
|
System.out.print(loc.getLineNumber()+":"+loc.getColumnNumber()+" ");
|
|
switch (event)
|
|
{
|
|
case XMLStreamConstants.START_DOCUMENT:
|
|
System.out.println("START_DOCUMENT version="+reader.getVersion()+
|
|
" encoding="+reader.getEncoding());
|
|
break;
|
|
case XMLStreamConstants.END_DOCUMENT:
|
|
System.out.println("END_DOCUMENT");
|
|
break;
|
|
case XMLStreamConstants.START_ELEMENT:
|
|
System.out.println("START_ELEMENT "+reader.getName());
|
|
int l = reader.getNamespaceCount();
|
|
for (int i = 0; i < l; i++)
|
|
System.out.println("\tnamespace "+reader.getNamespacePrefix(i)+
|
|
"='"+reader.getNamespaceURI(i)+"'");
|
|
l = reader.getAttributeCount();
|
|
for (int i = 0; i < l; i++)
|
|
System.out.println("\tattribute "+reader.getAttributeName(i)+
|
|
"='"+reader.getAttributeValue(i)+"'");
|
|
break;
|
|
case XMLStreamConstants.END_ELEMENT:
|
|
System.out.println("END_ELEMENT "+reader.getName());
|
|
break;
|
|
case XMLStreamConstants.CHARACTERS:
|
|
System.out.println("CHARACTERS '"+encodeText(reader.getText())+"'");
|
|
break;
|
|
case XMLStreamConstants.CDATA:
|
|
System.out.println("CDATA '"+encodeText(reader.getText())+"'");
|
|
break;
|
|
case XMLStreamConstants.SPACE:
|
|
System.out.println("SPACE '"+encodeText(reader.getText())+"'");
|
|
break;
|
|
case XMLStreamConstants.DTD:
|
|
System.out.println("DTD "+reader.getText());
|
|
break;
|
|
case XMLStreamConstants.ENTITY_REFERENCE:
|
|
System.out.println("ENTITY_REFERENCE "+reader.getText());
|
|
break;
|
|
case XMLStreamConstants.COMMENT:
|
|
System.out.println("COMMENT '"+encodeText(reader.getText())+"'");
|
|
break;
|
|
case XMLStreamConstants.PROCESSING_INSTRUCTION:
|
|
System.out.println("PROCESSING_INSTRUCTION "+reader.getPITarget()+
|
|
" "+reader.getPIData());
|
|
break;
|
|
case START_ENTITY:
|
|
System.out.println("START_ENTITY "+reader.getText());
|
|
break;
|
|
case END_ENTITY:
|
|
System.out.println("END_ENTITY "+reader.getText());
|
|
break;
|
|
default:
|
|
System.out.println("Unknown event: "+event);
|
|
}
|
|
}
|
|
}
|
|
catch (XMLStreamException e)
|
|
{
|
|
Location l = reader.getLocation();
|
|
System.out.println("At line "+l.getLineNumber()+
|
|
", column "+l.getColumnNumber()+
|
|
" of "+l.getSystemId());
|
|
throw e;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Escapes control characters in the specified text. For debugging.
|
|
*/
|
|
private static String encodeText(String text)
|
|
{
|
|
StringBuffer b = new StringBuffer();
|
|
int len = text.length();
|
|
for (int i = 0; i < len; i++)
|
|
{
|
|
char c = text.charAt(i);
|
|
switch (c)
|
|
{
|
|
case '\t':
|
|
b.append("\\t");
|
|
break;
|
|
case '\n':
|
|
b.append("\\n");
|
|
break;
|
|
case '\r':
|
|
b.append("\\r");
|
|
break;
|
|
default:
|
|
b.append(c);
|
|
}
|
|
}
|
|
return b.toString();
|
|
}
|
|
|
|
/**
|
|
* An attribute instance.
|
|
*/
|
|
class Attribute
|
|
{
|
|
|
|
/**
|
|
* Attribute name.
|
|
*/
|
|
final String name;
|
|
|
|
/**
|
|
* Attribute type as declared in the DTD, or CDATA otherwise.
|
|
*/
|
|
final String type;
|
|
|
|
/**
|
|
* Whether the attribute was specified or defaulted.
|
|
*/
|
|
final boolean specified;
|
|
|
|
/**
|
|
* The attribute value.
|
|
*/
|
|
final String value;
|
|
|
|
/**
|
|
* The namespace prefix.
|
|
*/
|
|
final String prefix;
|
|
|
|
/**
|
|
* The namespace local-name.
|
|
*/
|
|
final String localName;
|
|
|
|
Attribute(String name, String type, boolean specified, String value)
|
|
{
|
|
this.name = name;
|
|
this.type = type;
|
|
this.specified = specified;
|
|
this.value = value;
|
|
int ci = name.indexOf(':');
|
|
if (ci == -1)
|
|
{
|
|
prefix = null;
|
|
localName = intern(name);
|
|
}
|
|
else
|
|
{
|
|
prefix = intern(name.substring(0, ci));
|
|
localName = intern(name.substring(ci + 1));
|
|
}
|
|
}
|
|
|
|
public boolean equals(Object other)
|
|
{
|
|
if (other instanceof Attribute)
|
|
{
|
|
Attribute a = (Attribute) other;
|
|
if (namespaceAware)
|
|
{
|
|
if (!a.localName.equals(localName))
|
|
return false;
|
|
String auri = getNamespaceURI(a.prefix);
|
|
String uri = getNamespaceURI(prefix);
|
|
if (uri == null && (auri == null ||
|
|
(input.xml11 && "".equals(auri))))
|
|
return true;
|
|
if (uri != null)
|
|
{
|
|
if ("".equals(uri) && input.xml11 && "".equals(auri))
|
|
return true;
|
|
return uri.equals(auri);
|
|
}
|
|
return false;
|
|
}
|
|
else
|
|
return a.name.equals(name);
|
|
}
|
|
return false;
|
|
}
|
|
|
|
}
|
|
|
|
/**
|
|
* Representation of a DTD.
|
|
*/
|
|
class Doctype
|
|
{
|
|
|
|
/**
|
|
* Name of the root element.
|
|
*/
|
|
final String rootName;
|
|
|
|
/**
|
|
* Public ID, if any, of external subset.
|
|
*/
|
|
final String publicId;
|
|
|
|
/**
|
|
* System ID (URL), if any, of external subset.
|
|
*/
|
|
final String systemId;
|
|
|
|
/**
|
|
* Map of element names to content models.
|
|
*/
|
|
private final LinkedHashMap elements = new LinkedHashMap();
|
|
|
|
/**
|
|
* Map of element names to maps of attribute declarations.
|
|
*/
|
|
private final LinkedHashMap attlists = new LinkedHashMap();
|
|
|
|
/**
|
|
* Map of entity names to entities (String or ExternalIds).
|
|
*/
|
|
private final LinkedHashMap entities = new LinkedHashMap();
|
|
|
|
/**
|
|
* Map of notation names to ExternalIds.
|
|
*/
|
|
private final LinkedHashMap notations = new LinkedHashMap();
|
|
|
|
/**
|
|
* Map of anonymous keys to comments.
|
|
*/
|
|
private final LinkedHashMap comments = new LinkedHashMap();
|
|
|
|
/**
|
|
* Map of anonymous keys to processing instructions (String[2]
|
|
* containing {target, data}).
|
|
*/
|
|
private final LinkedHashMap pis = new LinkedHashMap();
|
|
|
|
/**
|
|
* List of keys to all markup entries in the DTD.
|
|
*/
|
|
private final LinkedList entries = new LinkedList();
|
|
|
|
/**
|
|
* Set of the entities defined in the external subset.
|
|
*/
|
|
private final HashSet externalEntities = new HashSet();
|
|
|
|
/**
|
|
* Set of the notations defined in the external subset.
|
|
*/
|
|
private final HashSet externalNotations = new HashSet();
|
|
|
|
/**
|
|
* Counter for making anonymous keys.
|
|
*/
|
|
private int anon = 1;
|
|
|
|
/**
|
|
* Constructor.
|
|
*/
|
|
Doctype(String rootName, String publicId, String systemId)
|
|
{
|
|
this.rootName = rootName;
|
|
this.publicId = publicId;
|
|
this.systemId = systemId;
|
|
}
|
|
|
|
/**
|
|
* Adds an element declaration.
|
|
* @param name the element name
|
|
* @param text the content model text
|
|
* @param model the parsed content model
|
|
*/
|
|
void addElementDecl(String name, String text, ContentModel model)
|
|
{
|
|
if (elements.containsKey(name))
|
|
return;
|
|
model.text = text;
|
|
model.external = (inputStack.size() != 1);
|
|
elements.put(name, model);
|
|
entries.add("E" + name);
|
|
}
|
|
|
|
/**
|
|
* Adds an attribute declaration.
|
|
* @param ename the element name
|
|
* @param aname the attribute name
|
|
* @param decl the attribute declaration details
|
|
*/
|
|
void addAttributeDecl(String ename, String aname, AttributeDecl decl)
|
|
{
|
|
LinkedHashMap attlist = (LinkedHashMap) attlists.get(ename);
|
|
if (attlist == null)
|
|
{
|
|
attlist = new LinkedHashMap();
|
|
attlists.put(ename, attlist);
|
|
}
|
|
else if (attlist.containsKey(aname))
|
|
return;
|
|
attlist.put(aname, decl);
|
|
String key = "A" + ename;
|
|
if (!entries.contains(key))
|
|
entries.add(key);
|
|
}
|
|
|
|
/**
|
|
* Adds an entity declaration.
|
|
* @param name the entity name
|
|
* @param text the entity replacement text
|
|
* @param inExternalSubset if we are in the exernal subset
|
|
*/
|
|
void addEntityDecl(String name, String text, boolean inExternalSubset)
|
|
{
|
|
if (entities.containsKey(name))
|
|
return;
|
|
entities.put(name, text);
|
|
entries.add("e" + name);
|
|
if (inExternalSubset)
|
|
externalEntities.add(name);
|
|
}
|
|
|
|
/**
|
|
* Adds an entity declaration.
|
|
* @param name the entity name
|
|
* @param ids the external IDs
|
|
* @param inExternalSubset if we are in the exernal subset
|
|
*/
|
|
void addEntityDecl(String name, ExternalIds ids, boolean inExternalSubset)
|
|
{
|
|
if (entities.containsKey(name))
|
|
return;
|
|
entities.put(name, ids);
|
|
entries.add("e" + name);
|
|
if (inExternalSubset)
|
|
externalEntities.add(name);
|
|
}
|
|
|
|
/**
|
|
* Adds a notation declaration.
|
|
* @param name the notation name
|
|
* @param ids the external IDs
|
|
* @param inExternalSubset if we are in the exernal subset
|
|
*/
|
|
void addNotationDecl(String name, ExternalIds ids, boolean inExternalSubset)
|
|
{
|
|
if (notations.containsKey(name))
|
|
return;
|
|
notations.put(name, ids);
|
|
entries.add("n" + name);
|
|
if (inExternalSubset)
|
|
externalNotations.add(name);
|
|
}
|
|
|
|
/**
|
|
* Adds a comment.
|
|
*/
|
|
void addComment(String text)
|
|
{
|
|
String key = Integer.toString(anon++);
|
|
comments.put(key, text);
|
|
entries.add("c" + key);
|
|
}
|
|
|
|
/**
|
|
* Adds a processing instruction.
|
|
*/
|
|
void addPI(String target, String data)
|
|
{
|
|
String key = Integer.toString(anon++);
|
|
pis.put(key, new String[] {target, data});
|
|
entries.add("p" + key);
|
|
}
|
|
|
|
/**
|
|
* Returns the content model for the specified element.
|
|
* @param name the element name
|
|
*/
|
|
ContentModel getElementModel(String name)
|
|
{
|
|
return (ContentModel) elements.get(name);
|
|
}
|
|
|
|
/**
|
|
* Returns the attribute definition for the given attribute
|
|
* @param ename the element name
|
|
* @param aname the attribute name
|
|
*/
|
|
AttributeDecl getAttributeDecl(String ename, String aname)
|
|
{
|
|
LinkedHashMap attlist = (LinkedHashMap) attlists.get(ename);
|
|
return (attlist == null) ? null : (AttributeDecl) attlist.get(aname);
|
|
}
|
|
|
|
/**
|
|
* Indicates whether the specified attribute was declared in the DTD.
|
|
* @param ename the element name
|
|
* @param aname the attribute name
|
|
*/
|
|
boolean isAttributeDeclared(String ename, String aname)
|
|
{
|
|
LinkedHashMap attlist = (LinkedHashMap) attlists.get(ename);
|
|
return (attlist == null) ? false : attlist.containsKey(aname);
|
|
}
|
|
|
|
/**
|
|
* Returns an iterator over the entries in the attribute list for the
|
|
* given element.
|
|
* @param ename the element name
|
|
*/
|
|
Iterator attlistIterator(String ename)
|
|
{
|
|
LinkedHashMap attlist = (LinkedHashMap) attlists.get(ename);
|
|
return (attlist == null) ? Collections.EMPTY_LIST.iterator() :
|
|
attlist.entrySet().iterator();
|
|
}
|
|
|
|
/**
|
|
* Returns the entity (String or ExternalIds) for the given entity name.
|
|
*/
|
|
Object getEntity(String name)
|
|
{
|
|
return entities.get(name);
|
|
}
|
|
|
|
/**
|
|
* Indicates whether the specified entity was declared in the external
|
|
* subset.
|
|
*/
|
|
boolean isEntityExternal(String name)
|
|
{
|
|
return externalEntities.contains(name);
|
|
}
|
|
|
|
/**
|
|
* Returns an iterator over the entity map entries.
|
|
*/
|
|
Iterator entityIterator()
|
|
{
|
|
return entities.entrySet().iterator();
|
|
}
|
|
|
|
/**
|
|
* Returns the notation IDs for the given notation name.
|
|
*/
|
|
ExternalIds getNotation(String name)
|
|
{
|
|
return (ExternalIds) notations.get(name);
|
|
}
|
|
|
|
/**
|
|
* Indicates whether the specified notation was declared in the external
|
|
* subset.
|
|
*/
|
|
boolean isNotationExternal(String name)
|
|
{
|
|
return externalNotations.contains(name);
|
|
}
|
|
|
|
/**
|
|
* Returns the comment associated with the specified (anonymous) key.
|
|
*/
|
|
String getComment(String key)
|
|
{
|
|
return (String) comments.get(key);
|
|
}
|
|
|
|
/**
|
|
* Returns the processing instruction associated with the specified
|
|
* (anonymous) key.
|
|
*/
|
|
String[] getPI(String key)
|
|
{
|
|
return (String[]) pis.get(key);
|
|
}
|
|
|
|
/**
|
|
* Returns an iterator over the keys of the markup entries in this DTD,
|
|
* in the order declared.
|
|
*/
|
|
Iterator entryIterator()
|
|
{
|
|
return entries.iterator();
|
|
}
|
|
|
|
}
|
|
|
|
/**
|
|
* Combination of an ExternalID and an optional NDataDecl.
|
|
*/
|
|
class ExternalIds
|
|
{
|
|
|
|
/**
|
|
* The public ID.
|
|
*/
|
|
String publicId;
|
|
|
|
/**
|
|
* The system ID.
|
|
*/
|
|
String systemId;
|
|
|
|
/**
|
|
* The notation name declared with the NDATA keyword.
|
|
*/
|
|
String notationName;
|
|
}
|
|
|
|
/**
|
|
* A content model.
|
|
*/
|
|
abstract class ContentModel
|
|
{
|
|
static final int EMPTY = 0;
|
|
static final int ANY = 1;
|
|
static final int ELEMENT = 2;
|
|
static final int MIXED = 3;
|
|
|
|
int min;
|
|
int max;
|
|
final int type;
|
|
String text;
|
|
boolean external;
|
|
|
|
ContentModel(int type)
|
|
{
|
|
this.type = type;
|
|
min = 1;
|
|
max = 1;
|
|
}
|
|
|
|
}
|
|
|
|
/**
|
|
* The EMPTY content model.
|
|
*/
|
|
class EmptyContentModel
|
|
extends ContentModel
|
|
{
|
|
|
|
EmptyContentModel()
|
|
{
|
|
super(ContentModel.EMPTY);
|
|
min = 0;
|
|
max = 0;
|
|
}
|
|
|
|
}
|
|
|
|
/**
|
|
* The ANY content model.
|
|
*/
|
|
class AnyContentModel
|
|
extends ContentModel
|
|
{
|
|
|
|
AnyContentModel()
|
|
{
|
|
super(ContentModel.ANY);
|
|
min = 0;
|
|
max = -1;
|
|
}
|
|
|
|
}
|
|
|
|
/**
|
|
* An element content model.
|
|
*/
|
|
class ElementContentModel
|
|
extends ContentModel
|
|
{
|
|
|
|
LinkedList contentParticles;
|
|
boolean or;
|
|
String regex; // regular expression cache
|
|
|
|
ElementContentModel()
|
|
{
|
|
super(ContentModel.ELEMENT);
|
|
contentParticles = new LinkedList();
|
|
}
|
|
|
|
void addContentParticle(ContentParticle cp)
|
|
{
|
|
contentParticles.add(cp);
|
|
}
|
|
|
|
}
|
|
|
|
class ContentParticle
|
|
{
|
|
|
|
int min = 1;
|
|
int max = 1;
|
|
Object content; // Name (String) or ElementContentModel
|
|
|
|
}
|
|
|
|
/**
|
|
* A mixed content model.
|
|
*/
|
|
class MixedContentModel
|
|
extends ContentModel
|
|
{
|
|
|
|
private HashSet names;
|
|
|
|
MixedContentModel()
|
|
{
|
|
super(ContentModel.MIXED);
|
|
names = new HashSet();
|
|
}
|
|
|
|
void addName(String name)
|
|
{
|
|
names.add(name);
|
|
}
|
|
|
|
boolean containsName(String name)
|
|
{
|
|
return names.contains(name);
|
|
}
|
|
|
|
}
|
|
|
|
/**
|
|
* An attribute definition.
|
|
*/
|
|
class AttributeDecl
|
|
{
|
|
|
|
/**
|
|
* The attribute type (CDATA, ID, etc).
|
|
*/
|
|
final String type;
|
|
|
|
/**
|
|
* The default value.
|
|
*/
|
|
final String value;
|
|
|
|
/**
|
|
* The value type (#FIXED, #IMPLIED, etc).
|
|
*/
|
|
final int valueType;
|
|
|
|
/**
|
|
* The enumeration text.
|
|
*/
|
|
final String enumeration;
|
|
|
|
/**
|
|
* The enumeration tokens.
|
|
*/
|
|
final HashSet values;
|
|
|
|
/**
|
|
* Whether this attribute declaration occurred in the external subset.
|
|
*/
|
|
final boolean external;
|
|
|
|
AttributeDecl(String type, String value,
|
|
int valueType, String enumeration,
|
|
HashSet values, boolean external)
|
|
{
|
|
this.type = type;
|
|
this.value = value;
|
|
this.valueType = valueType;
|
|
this.enumeration = enumeration;
|
|
this.values = values;
|
|
this.external = external;
|
|
}
|
|
|
|
}
|
|
|
|
/**
|
|
* An XML input source.
|
|
*/
|
|
static class Input
|
|
implements Location
|
|
{
|
|
|
|
int line = 1, markLine;
|
|
int column, markColumn;
|
|
int offset, markOffset;
|
|
final String publicId, systemId, name;
|
|
final boolean report; // report start- and end-entity
|
|
final boolean normalize; // normalize CR, etc to LF
|
|
|
|
InputStream in;
|
|
Reader reader;
|
|
UnicodeReader unicodeReader;
|
|
boolean initialized;
|
|
boolean encodingDetected;
|
|
String inputEncoding;
|
|
boolean xml11;
|
|
|
|
Input(InputStream in, Reader reader, String publicId, String systemId,
|
|
String name, String inputEncoding, boolean report,
|
|
boolean normalize)
|
|
{
|
|
if (inputEncoding == null)
|
|
inputEncoding = "UTF-8";
|
|
this.inputEncoding = inputEncoding;
|
|
this.publicId = publicId;
|
|
this.systemId = systemId;
|
|
this.name = name;
|
|
this.report = report;
|
|
this.normalize = normalize;
|
|
if (in != null)
|
|
{
|
|
if (reader != null)
|
|
throw new IllegalStateException("both byte and char streams "+
|
|
"specified");
|
|
if (normalize)
|
|
in = new CRLFInputStream(in);
|
|
in = new BufferedInputStream(in);
|
|
this.in = in;
|
|
}
|
|
else
|
|
{
|
|
this.reader = normalize ? new CRLFReader(reader) : reader;
|
|
unicodeReader = new UnicodeReader(this.reader);
|
|
}
|
|
initialized = false;
|
|
}
|
|
|
|
// -- Location --
|
|
|
|
public int getCharacterOffset()
|
|
{
|
|
return offset;
|
|
}
|
|
|
|
public int getColumnNumber()
|
|
{
|
|
return column;
|
|
}
|
|
|
|
public int getLineNumber()
|
|
{
|
|
return line;
|
|
}
|
|
|
|
public String getPublicId()
|
|
{
|
|
return publicId;
|
|
}
|
|
|
|
public String getSystemId()
|
|
{
|
|
return systemId;
|
|
}
|
|
|
|
void init()
|
|
throws IOException
|
|
{
|
|
if (initialized)
|
|
return;
|
|
if (in != null)
|
|
detectEncoding();
|
|
initialized = true;
|
|
}
|
|
|
|
void mark(int len)
|
|
throws IOException
|
|
{
|
|
//System.out.println(" mark:"+len);
|
|
markOffset = offset;
|
|
markLine = line;
|
|
markColumn = column;
|
|
if (unicodeReader != null)
|
|
unicodeReader.mark(len);
|
|
else
|
|
in.mark(len);
|
|
}
|
|
|
|
/**
|
|
* Character read.
|
|
*/
|
|
int read()
|
|
throws IOException
|
|
{
|
|
offset++;
|
|
int ret = (unicodeReader != null) ? unicodeReader.read() : in.read();
|
|
if (normalize &&
|
|
(ret == 0x0d || (xml11 && (ret == 0x85 || ret == 0x2028))))
|
|
{
|
|
// Normalize CR etc to LF
|
|
ret = 0x0a;
|
|
}
|
|
// Locator handling
|
|
if (ret == 0x0a)
|
|
{
|
|
line++;
|
|
column = 0;
|
|
}
|
|
else
|
|
column++;
|
|
return ret;
|
|
}
|
|
|
|
/**
|
|
* Block read.
|
|
*/
|
|
int read(int[] b, int off, int len)
|
|
throws IOException
|
|
{
|
|
int ret;
|
|
if (unicodeReader != null)
|
|
ret = unicodeReader.read(b, off, len);
|
|
else
|
|
{
|
|
byte[] b2 = new byte[len];
|
|
ret = in.read(b2, 0, len);
|
|
if (ret != -1)
|
|
{
|
|
String s = new String(b2, 0, ret, inputEncoding);
|
|
int[] c = UnicodeReader.toCodePointArray(s);
|
|
ret = c.length;
|
|
System.arraycopy(c, 0, b, off, ret);
|
|
}
|
|
}
|
|
if (ret != -1)
|
|
{
|
|
// Locator handling
|
|
for (int i = 0; i < ret; i++)
|
|
{
|
|
int c = b[off + i];
|
|
if (normalize &&
|
|
(c == 0x0d || (xml11 && (c == 0x85 || c == 0x2028))))
|
|
{
|
|
// Normalize CR etc to LF
|
|
c = 0x0a;
|
|
b[off + i] = c;
|
|
}
|
|
if (c == 0x0a)
|
|
{
|
|
line++;
|
|
column = 0;
|
|
}
|
|
else
|
|
column++;
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
void reset()
|
|
throws IOException
|
|
{
|
|
if (unicodeReader != null)
|
|
unicodeReader.reset();
|
|
else
|
|
in.reset();
|
|
offset = markOffset;
|
|
line = markLine;
|
|
column = markColumn;
|
|
}
|
|
|
|
// Detection of input encoding
|
|
|
|
private static final int[] SIGNATURE_UCS_4_1234 =
|
|
new int[] { 0x00, 0x00, 0x00, 0x3c };
|
|
private static final int[] SIGNATURE_UCS_4_4321 =
|
|
new int[] { 0x3c, 0x00, 0x00, 0x00 };
|
|
private static final int[] SIGNATURE_UCS_4_2143 =
|
|
new int[] { 0x00, 0x00, 0x3c, 0x00 };
|
|
private static final int[] SIGNATURE_UCS_4_3412 =
|
|
new int[] { 0x00, 0x3c, 0x00, 0x00 };
|
|
private static final int[] SIGNATURE_UCS_2_12 =
|
|
new int[] { 0xfe, 0xff };
|
|
private static final int[] SIGNATURE_UCS_2_21 =
|
|
new int[] { 0xff, 0xfe };
|
|
private static final int[] SIGNATURE_UCS_2_12_NOBOM =
|
|
new int[] { 0x00, 0x3c, 0x00, 0x3f };
|
|
private static final int[] SIGNATURE_UCS_2_21_NOBOM =
|
|
new int[] { 0x3c, 0x00, 0x3f, 0x00 };
|
|
private static final int[] SIGNATURE_UTF_8 =
|
|
new int[] { 0x3c, 0x3f, 0x78, 0x6d };
|
|
private static final int[] SIGNATURE_UTF_8_BOM =
|
|
new int[] { 0xef, 0xbb, 0xbf };
|
|
|
|
/**
|
|
* Detect the input encoding.
|
|
*/
|
|
private void detectEncoding()
|
|
throws IOException
|
|
{
|
|
int[] signature = new int[4];
|
|
in.mark(4);
|
|
for (int i = 0; i < 4; i++)
|
|
signature[i] = in.read();
|
|
in.reset();
|
|
|
|
// 4-byte encodings
|
|
if (equals(SIGNATURE_UCS_4_1234, signature))
|
|
{
|
|
in.read();
|
|
in.read();
|
|
in.read();
|
|
in.read();
|
|
setInputEncoding("UTF-32BE");
|
|
encodingDetected = true;
|
|
}
|
|
else if (equals(SIGNATURE_UCS_4_4321, signature))
|
|
{
|
|
in.read();
|
|
in.read();
|
|
in.read();
|
|
in.read();
|
|
setInputEncoding("UTF-32LE");
|
|
encodingDetected = true;
|
|
}
|
|
else if (equals(SIGNATURE_UCS_4_2143, signature) ||
|
|
equals(SIGNATURE_UCS_4_3412, signature))
|
|
throw new UnsupportedEncodingException("unsupported UCS-4 byte ordering");
|
|
|
|
// 2-byte encodings
|
|
else if (equals(SIGNATURE_UCS_2_12, signature))
|
|
{
|
|
in.read();
|
|
in.read();
|
|
setInputEncoding("UTF-16BE");
|
|
encodingDetected = true;
|
|
}
|
|
else if (equals(SIGNATURE_UCS_2_21, signature))
|
|
{
|
|
in.read();
|
|
in.read();
|
|
setInputEncoding("UTF-16LE");
|
|
encodingDetected = true;
|
|
}
|
|
else if (equals(SIGNATURE_UCS_2_12_NOBOM, signature))
|
|
{
|
|
//setInputEncoding("UTF-16BE");
|
|
throw new UnsupportedEncodingException("no byte-order mark for UCS-2 entity");
|
|
}
|
|
else if (equals(SIGNATURE_UCS_2_21_NOBOM, signature))
|
|
{
|
|
//setInputEncoding("UTF-16LE");
|
|
throw new UnsupportedEncodingException("no byte-order mark for UCS-2 entity");
|
|
}
|
|
// ASCII-derived encodings
|
|
else if (equals(SIGNATURE_UTF_8, signature))
|
|
{
|
|
// UTF-8 input encoding implied, TextDecl
|
|
}
|
|
else if (equals(SIGNATURE_UTF_8_BOM, signature))
|
|
{
|
|
in.read();
|
|
in.read();
|
|
in.read();
|
|
setInputEncoding("UTF-8");
|
|
encodingDetected = true;
|
|
}
|
|
}
|
|
|
|
private static boolean equals(int[] b1, int[] b2)
|
|
{
|
|
for (int i = 0; i < b1.length; i++)
|
|
{
|
|
if (b1[i] != b2[i])
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
void setInputEncoding(String encoding)
|
|
throws IOException
|
|
{
|
|
if (encoding.equals(inputEncoding))
|
|
return;
|
|
if ("UTF-16".equalsIgnoreCase(encoding) &&
|
|
inputEncoding.startsWith("UTF-16"))
|
|
return;
|
|
if (encodingDetected)
|
|
throw new UnsupportedEncodingException("document is not in its " +
|
|
"declared encoding " +
|
|
inputEncoding +
|
|
": " + encoding);
|
|
inputEncoding = encoding;
|
|
finalizeEncoding();
|
|
}
|
|
|
|
void finalizeEncoding()
|
|
throws IOException
|
|
{
|
|
if (reader != null)
|
|
return;
|
|
reader = new BufferedReader(new InputStreamReader(in, inputEncoding));
|
|
unicodeReader = new UnicodeReader(reader);
|
|
mark(1);
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|