com.caucho.xml2.XmlParser Maven / Gradle / Ivy
* Copyright (c) 1998-2018 Caucho Technology -- all rights reserved
* This file is part of Resin(R) Open Source
* Each copy or derived work must preserve the copyright notice and this
* notice unmodified.
* Resin Open Source is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
* Resin Open Source is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* of NON-INFRINGEMENT. See the GNU General Public License for more
* details.
* You should have received a copy of the GNU General Public License
* along with Resin Open Source; if not, write to the
* Free Software Foundation, Inc.
* 59 Temple Place, Suite 330
* Boston, MA 02111-1307 USA
* @author Scott Ferguson
package com.caucho.xml2;
import com.caucho.util.CharBuffer;
import com.caucho.vfs.*;
import com.caucho.xml2.readers.MacroReader;
import com.caucho.xml2.readers.Utf16Reader;
import com.caucho.xml2.readers.Utf8Reader;
import com.caucho.xml2.readers.XmlReader;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import javax.xml.namespace.QName;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.logging.Level;
* A configurable XML parser. Loose versions of XML and HTML are supported
* by changing the Policy object.
* Normally, applications will use Xml, LooseXml, Html, or LooseHtml.
public class XmlParser extends AbstractParser {
// Xerces uses the following
public static final String XMLNS = "";
public static final String XML = "";
static final QName DOC_NAME = new QName("#document");
static final QName TEXT_NAME = new QName("#text");
static final QName WHITESPACE_NAME = new QName("#whitespace");
private static final boolean []XML_NAME_CHAR;
QAttributes _attributes;
QAttributes _nullAttributes;
CharBuffer _text;
CharBuffer _eltName;
CharBuffer _cb;
CharBuffer _buf = new CharBuffer();
String _textFilename;
int _textLine;
TempCharBuffer _tempInputBuffer;
char []_inputBuffer;
int _inputOffset;
int _inputLength;
char []_textBuffer = new char[1024];
int _textLength;
int _textCapacity = _textBuffer.length;
boolean _isIgnorableWhitespace;
char []_valueBuffer = _textBuffer;
CharBuffer _name = new CharBuffer();
CharBuffer _nameBuffer = new CharBuffer();
MacroReader _macro = new MacroReader();
int _macroIndex = 0;
int _macroLength = 0;
char []_macroBuffer;
int []_elementLines = new int[64];
int _elementTop;
ArrayList _attrNames = new ArrayList();
ArrayList _attrValues = new ArrayList();
ReadStream _is;
XmlReader _reader;
String _extPublicId;
String _extSystemId;
NamespaceContextImpl _namespace = new NamespaceContextImpl();
SaxIntern _intern = new SaxIntern(_namespace);;
QName _activeNode;
QName _topNamespaceNode;
boolean _isTagStart;
boolean _stopOnIncludeEnd;
boolean _hasTopElement;
boolean _hasDoctype;
Locator _locator = new LocatorImpl(this);
public XmlParser()
* Creates a new parser with a given parsing policy and dtd.
* @param policy the parsing policy, handling optional tags.
* @param dtd the parser's dtd.
XmlParser(QDocumentType dtd)
* Initialize the parser.
void init()
_attributes = new QAttributes();
_nullAttributes = new QAttributes();
_eltName = new CharBuffer();
_text = new CharBuffer();
_textLength = 0;
_isIgnorableWhitespace = true;
_elementTop = 0;
_elementLines[0] = 1;
_line = 1;
_dtd = null;
_isTagStart = false;
_stopOnIncludeEnd = false;
_extPublicId = null;
_extSystemId = null;
_filename = null;
_publicId = null;
_systemId = null;
_hasTopElement = false;
_hasDoctype = false;
_macroIndex = 0;
_macroLength = 0;
_reader = null;
// _owner = null;
* Parse the document from a read stream.
* @param is read stream to parse from.
* @return The parsed document.
Document parseInt(ReadStream is)
throws IOException, SAXException
_tempInputBuffer = TempCharBuffer.allocate();
_inputBuffer = _tempInputBuffer.getBuffer();
_inputLength = _inputOffset = 0;
_is = is;
if (_filename == null && _systemId != null)
_filename = _systemId;
else if (_filename == null)
_filename = _is.getUserPath();
if (_systemId == null) {
_systemId = _is.getPath().getURL();
if ("null:".equals(_systemId) || "string:".equals(_systemId))
_systemId = "stream";
if (_filename == null)
_filename = _systemId;
if (_filename == null)
_filename = "stream";
if (_dtd != null)
if (_builder != null) {
if (! "string:".equals(_systemId) && ! "stream".equals(_systemId))
if (_contentHandler == null)
_contentHandler = new org.xml.sax.helpers.DefaultHandler();
if (_owner == null)
_owner = new QDocument();
if (_defaultEncoding != null)
_owner.setAttribute("encoding", _defaultEncoding);
_activeNode = DOC_NAME;
if (dbg.canWrite()) {
printDebugNode(dbg, doc, 0);
if (! _hasTopElement)
throw error(L.l("XML file has no top-element. All well-formed XML files have a single top-level element."));
QDocument owner = _owner;
_owner = null;
return owner;
* The main dispatch loop.
* @param node the current node
* @param ch the next character
private void parseNode()
throws IOException, SAXException
char []valueBuffer = _valueBuffer;
int valueLength = valueBuffer.length;
int valueOffset = 0;
boolean isWhitespace = true;
char []inputBuffer = _inputBuffer;
int inputLength = _inputLength;
int inputOffset = _inputOffset;
while (true) {
int ch;
if (inputOffset < inputLength)
ch = inputBuffer[inputOffset++];
else if (fillBuffer()) {
inputBuffer = _inputBuffer;
inputOffset = _inputOffset;
inputLength = _inputLength;
ch = inputBuffer[inputOffset++];
else {
if (valueOffset > 0)
addText(valueBuffer, 0, valueOffset, isWhitespace);
_inputOffset = inputOffset;
_inputLength = inputLength;
switch (ch) {
case '\n':
valueBuffer[valueOffset++] = (char) ch;
case ' ': case '\t': case '\r':
valueBuffer[valueOffset++] = (char) ch;
case 0xffff:
// marker for end of text for serialization (?)
if (valueOffset > 0)
addText(valueBuffer, 0, valueOffset, isWhitespace);
_inputOffset = inputOffset;
_inputLength = inputLength;
case '&':
if (valueOffset > 0)
addText(valueBuffer, 0, valueOffset, isWhitespace);
_inputOffset = inputOffset;
_inputLength = inputLength;
inputOffset = _inputOffset;
inputLength = _inputOffset;
case '<':
if (valueOffset > 0)
addText(valueBuffer, 0, valueOffset, isWhitespace);
_inputOffset = inputOffset;
_inputLength = inputLength;
ch = read();
if (ch == '/') {
SaxIntern.Entry entry = parseName(0, false);
ch = read();
if (ch != '>') {
throw error(L.l("'{0}>' expected '>' at {1}. Closing tags must close immediately after the tag name.",
entry.getName(), badChar(ch)));
// element: ...
else if (XmlChar.isNameStart(ch)) {
ch = read();
if ((ch = read()) == '[') {
ch = read();
else if (ch == '-') {
ch = read();
else if (XmlChar.isNameStart(ch)) {
SaxIntern.Entry entry = parseName(0, false);
String declName = entry.getName();
if (declName.equals("DOCTYPE")) {
if (_contentHandler instanceof DOMBuilder)
((DOMBuilder) _contentHandler).dtd(_dtd);
throw error(L.l("expected '
else if (ch == '?') {
else {
throw error(L.l("expected tag name after '<' at {0}. Open tag names must immediately follow the open brace like ''", badChar(ch)));
inputOffset = _inputOffset;
inputLength = _inputLength;
isWhitespace = false;
valueBuffer[valueOffset++] = (char) ch;
if (valueOffset == valueLength) {
addText(valueBuffer, 0, valueOffset, isWhitespace);
valueOffset = 0;
* Parses the <!DOCTYPE> declaration.
private void parseDoctype()
throws IOException, SAXException
if (_activeNode != DOC_NAME)
throw error(L.l(" declaration."));
int ch = skipWhitespace(read());
ch = _reader.parseName(_nameBuffer, ch);
String name = _nameBuffer.toString();
ch = skipWhitespace(ch);
if (_dtd == null)
_dtd = new QDocumentType(name);
if (XmlChar.isNameStart(ch)) {
ch = parseExternalID(ch);
ch = skipWhitespace(ch);
_dtd._publicId = _extPublicId;
_dtd._systemId = _extSystemId;
if (_dtd._systemId != null && ! _dtd._systemId.equals("")) {
InputStream is = null;
XmlReader oldReader = _reader;
boolean hasInclude = false;
try {
pushInclude(_extPublicId, _extSystemId);
hasInclude = true;
} catch (Exception e) {
if (log.isLoggable(Level.FINEST))
log.log(Level.FINER, e.toString(), e);
if (hasInclude) {
_stopOnIncludeEnd = true;
try {
DtdParser dtdParser = new DtdParser(this, _dtd);
ch = dtdParser.parseDoctypeDecl(_dtd);
} catch (XmlParseException e) {
if (_extSystemId != null &&
_extSystemId.startsWith("http")) {
log.log(Level.FINE, e.toString(), e);
throw e;
_stopOnIncludeEnd = false;
while (_reader != null && _reader != oldReader)
if (_reader != null)
ch = skipWhitespace(read());
if (ch == '[') {
DtdParser dtdParser = new DtdParser(this, _dtd);
ch = dtdParser.parseDoctypeDecl(_dtd);
ch = skipWhitespace(ch);
if (ch != '>')
throw error(L.l("expected '>' in ' && ch != '/') {
ch = parseAttributes(ch, true);
QName qName = entry.getQName();
if (_isValidating && _dtd != null) {
QElementDef elementDef = _dtd.getElement(qName.getLocalPart());
if (elementDef != null)
_hasTopElement = true;
if (ch == '/') {
// empty tag:
if ((ch = read()) == '>') {
// short tag:
else {
throw error(L.l("unexpected character {0} after '/', expected '/>'",
badChar(ch), entry.getName()));
else if (ch != '>') {
throw error(L.l("unexpected character {0} while parsing '{1}' attributes. Expected an attribute name or '>' or '/>'. XML element syntax is:\n ",
badChar(ch), entry.getName()));
* Parses the attributes in an element.
* @param ch the next character to
* @return the next character to read.
private int parseAttributes(int ch, boolean isElement)
throws IOException, SAXException
while (ch != -1) {
boolean hasWhitespace = false;
while (ch <= 0x20
&& (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n')) {
hasWhitespace = true;
ch = read();
if (! XmlChar.isNameStart(ch)) {
if (! hasWhitespace)
throw error(L.l("attributes must be separated by whitespace"));
hasWhitespace = false;
SaxIntern.Entry entry = parseName(0, true);
ch = read();
while (ch <= 0x20
&& (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n')) {
ch = read();
String value = null;
if (ch != '=') {
throw error(L.l("attribute '{0}' expects value at {1}. XML requires attributes to have explicit values.",
entry.getName(), badChar(ch)));
ch = read();
while (ch <= 0x20
&& (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n')) {
ch = read();
value = parseValue(ch);
ch = read();
if (entry.isXmlns()) {
String prefix;
if (entry.getPrefix() != null)
prefix = entry.getLocalName();
prefix = "";
String uri = value;
if (_isXmlnsPrefix) {
_contentHandler.startPrefixMapping(prefix, uri);
// needed for xml/032e
if (isElement && _isXmlnsAttribute) {
_attributes.add(entry.getQName(), uri);
else {
int len = _attrNames.size();
for (int i = 0; i < len; i++) {
SaxIntern.Entry attrEntry = _attrNames.get(i);
String value = _attrValues.get(i);
QName name = attrEntry.getQName();
_attributes.add(name, value);
return ch;
* Parses an entity reference:
* er ::= d+;
* ::= &name;
private int parseEntityReference()
throws IOException, SAXException
int ch;
ch = read();
// character reference
if (ch == '#') {
addText((char) parseCharacterReference());
return read();
// entity reference
else if (XmlChar.isNameStart(ch)) {
ch = _reader.parseName(_buf, ch);
if (ch != ';' && _strictXml)
throw error(L.l("'&{0};' expected ';' at {0}. Entity references have a '&name;' syntax.", _buf, badChar(ch)));
else if (ch != ';') {
return ch;
ch = read();
return ch;
} else if (_strictXml) {
throw error(L.l("expected name at {0}", badChar(ch)));
} else {
return ch;
private int parseCharacterReference()
throws IOException, SAXException
int ch = read();
int radix = 10;
if (ch == 'x') {
radix = 16;
ch = read();
int value = 0;
for (; ch != ';'; ch = read()) {
if (ch >= '0' && ch <= '9')
value = radix * value + ch - '0';
else if (radix == 16 && ch >= 'a' && ch <= 'f')
value = radix * value + ch - 'a' + 10;
else if (radix == 16 && ch >= 'A' && ch <= 'F')
value = radix * value + ch - 'A' + 10;
throw error(L.l("malformed entity ref at {0}", badChar(ch)));
if (value > 0xffff)
throw error(L.l("malformed entity ref at {0}", "" + value));
// xml/0072
if (_strictCharacters && ! isChar(value))
throw error(L.l("illegal character ref at {0}", badChar(value)));
return value;
* Looks up a named entity reference, filling the text.
private void addEntityReference(String name)
throws IOException, SAXException
boolean expand = ! _entitiesAsText || _hasDoctype;
// XXX: not quite the right logic. There should be a soft expandEntities
if (! expand) {
addText("&" + name + ";");
int ch = _entities.getEntity(name);
if (ch >= 0 && ch <= 0xffff) {
addText((char) ch);
QEntity entity = _dtd == null ? null : _dtd.getEntity(name);
if (! _expandEntities) {
addText("&" + name + ";");
if (entity == null && (_dtd == null || _dtd.getName() == null ||
! _dtd.isExternal())) {
throw error(L.l("'&{0};' is an unknown entity. XML predefines only '<', '&', '>', ''' and '"'. All other entities must be defined in an <!ENTITY> definition in the DTD.", name));
else if (entity != null) {
if (entity._isSpecial && entity._value != null)
else if (entity.getSystemId() != null) {
if (pushSystemEntity(entity)) {
/* XXX:??
else if (strictXml) {
throw error(L.l("can't open external entity at '&{0};'", name));
else if (_contentHandler instanceof DOMBuilder) {
((DOMBuilder) _contentHandler).entityReference(name);
addText("&" + name + ";");
else if (expand && entity._value != null)
addText("&" + name + ";");
else {
if (_contentHandler instanceof DOMBuilder) {
((DOMBuilder) _contentHandler).entityReference(name);
else // XXX: error?
addText("&" + name + ";");
private boolean pushSystemEntity(QEntity entity)
throws IOException, SAXException
String publicId = entity.getPublicId();
String systemId = entity.getSystemId();
String value = null;
InputSource source = null;
ReadStream is = null;
if (_entityResolver != null)
source = _entityResolver.resolveEntity(publicId, systemId);
if (source != null && source.getByteStream() != null)
is = Vfs.openRead(source.getByteStream());
else if (source != null && source.getCharacterStream() != null)
is = Vfs.openRead(source.getCharacterStream());
else if (source != null && source.getSystemId() != null &&
_searchPath.lookup(source.getSystemId()).isFile()) {
is = _searchPath.lookup(source.getSystemId()).openRead();
else if (systemId != null && ! systemId.equals("")) {
String path = systemId;
if (path.startsWith("file:"))
path = path.substring(5);
if (_searchPath.lookup(path).isFile()) {
is = _searchPath.lookup(path).openRead();
if (is == null)
return false;
_filename = systemId;
_systemId = systemId;
Path oldSearchPath = _searchPath;
Path path = is.getPath();
if (path != null) {
if (_searchPath != null) {
_searchPath = path.getParent();
_is = is;
_line = 1;
XmlReader oldReader = _reader;
_reader = null;
return true;
private boolean isAttributeChar(int ch)
switch (ch) {
case ' ': case '\t': case '\n': case '\r':
return false;
case '<': case '>': case '\'':case '"': case '=':
return false;
return true;
private int parsePI()
throws IOException, SAXException
int ch;
ch = read();
if (! XmlChar.isNameStart(ch))
throw error(L.l("expected name after '' at {0}. Processing instructions expect a name like ", badChar(ch)));
ch = _reader.parseName(_text, ch);
String piName = _text.toString();
if (! piName.equals("xml"))
return parsePITail(piName, ch);
else {
throw error(L.l(" occurs after content. The prolog must be at the document start."));
private int parsePITail(String piName, int ch)
throws IOException, SAXException
ch = skipWhitespace(ch);
while (ch != -1) {
if (ch == '?') {
if ((ch = read()) == '>')
} else {
_text.append((char) ch);
ch = read();
_contentHandler.processingInstruction(piName, _text.toString());
return read();
* Parses a comment. The "<!--" has already been read.
private void parseComment()
throws IOException, SAXException
int ch = read();
if (ch != '-')
throw error(L.l("expected comment at {0}", badChar(ch)));
ch = read();
if (! _skipComments)
while (ch != -1) {
if (ch == '-') {
ch = read();
while (ch == '-') {
if ((ch = read()) == '>')
break comment;
else if (_strictComments)
throw error(L.l("XML forbids '--' in comments"));
else if (ch == '-') {
if (! _skipComments)
else {
if (! _skipComments)
} else if (! XmlChar.isChar(ch)) {
throw error(L.l("bad character {0}", hex(ch)));
} else {
_buf.append((char) ch);
ch = read();
if (_skipComments) {
else if (_contentHandler instanceof XMLWriter && ! _skipComments) {
((XMLWriter) _contentHandler).comment(_buf.toString());
_isIgnorableWhitespace = true;
else if (_lexicalHandler != null) {
_lexicalHandler.comment(_buf.getBuffer(), 0, _buf.getLength());
_isIgnorableWhitespace = true;
* Parses the contents of a cdata section.
* cdata ::= <![CDATA[ ... ]]>
private void parseCdata()
throws IOException, SAXException
int ch;
if ((ch = read()) != 'C' ||
(ch = read()) != 'D' ||
(ch = read()) != 'A' ||
(ch = read()) != 'T' ||
(ch = read()) != 'A' ||
(ch = read()) != '[') {
throw error(L.l("expected '')
break cdata;
else if (ch == ']')
else {
} else if (_strictCharacters && ! isChar(ch)) {
throw error(L.l("expected character in cdata at {0}", badChar(ch)));
} else {
addText((char) ch);
ch = read();
if (_lexicalHandler != null) {
* Expands the macro value of a PE reference.
private void addPEReference(CharBuffer value, String name)
throws IOException, SAXException
QEntity entity = _dtd.getParameterEntity(name);
if (entity == null && ! _dtd.isExternal())
throw error(L.l("'%{0};' is an unknown parameter entity. Parameter entities must be defined in an declaration before use.", name));
else if (entity != null && entity._value != null) {
else if (entity != null && entity.getSystemId() != null) {
pushInclude(entity.getPublicId(), entity.getSystemId());
else {
private static String toAttrDefault(CharBuffer text)
for (int i = 0; i < text.length(); i++) {
int ch = text.charAt(i);
if (ch == '"') {
text.delete(i, i + 1);
text.insert(i, """);
} else if (ch == '\'') {
text.delete(i, i + 1);
text.insert(i, "'");
return text.toString();
* externalID ::= PUBLIC publicId systemId
* ::= SYSTEM systemId
private int parseExternalID(int ch)
throws IOException, SAXException
ch = _reader.parseName(_text, ch);
String key = _text.toString();
ch = skipWhitespace(ch);
_extSystemId = null;
_extPublicId = null;
if (key.equals("PUBLIC")) {
_extPublicId = parseValue(ch);
ch = skipWhitespace(read());
if (_extPublicId.indexOf('&') > 0)
throw error(L.l("Illegal character '&' in PUBLIC identifier '{0}'",
_extSystemId = parseValue(ch);
ch = skipWhitespace(read());
else if (key.equals("SYSTEM")) {
_extSystemId = parseValue(ch);
ch = read();
throw error(L.l("expected PUBLIC or SYSTEM at '{0}'", key));
return ch;
* Parses an attribute value.
* value ::= '[^']*'
* ::= "[^"]*"
* ::= [^ />]*
* @param value the CharBuffer which will contain the value.
* @param ch the next character from the input stream.
* @param isGeneral true if general entities are allowed.
* @return the following character from the input stream
private String parseValue(int ch)
throws IOException, SAXException
int end = ch;
char []valueBuffer = _valueBuffer;
int valueLength = 0;
if (end != '\'' && end != '"') {
valueBuffer[valueLength++] = (char) end;
for (ch = read();
ch >= 0 && XmlChar.isNameChar(ch);
ch = read()) {
valueBuffer[valueLength++] = (char) ch;
String value = new String(valueBuffer, 0, valueLength);
throw error(L.l("XML attribute value must be quoted at '{0}'. XML attribute syntax is either attr=\"value\" or attr='value'.",
ch = read();
while (ch >= 0 && ch != end) {
if (ch == '&') {
if ((ch = read()) == '#') {
valueBuffer[valueLength++] = (char) parseCharacterReference();
else if (XmlChar.isNameStart(ch)) {
ch = _reader.parseName(_buf, ch);
String name = _buf.toString();
if (ch != ';')
throw error(L.l("expected '{0}' at {1}", ";", badChar(ch)));
else {
int lookup = _entities.getEntity(name);
if (lookup >= 0 && lookup <= 0xffff) {
ch = read();
valueBuffer[valueLength++] = (char) lookup;
QEntity entity = _dtd == null ? null : _dtd.getEntity(name);
if (entity != null && entity._value != null)
throw error(L.l("expected local reference at '&{0};'", name));
else {
if (ch == '\r') {
ch = read();
if (ch != '\n') {
valueBuffer[valueLength++] = '\n';
valueBuffer[valueLength++] = (char) ch;
ch = read();
return new String(valueBuffer, 0, valueLength);
private boolean isWhitespace(int ch)
return ch <= 0x20 && (ch == 0x20 || ch == 0x9 || ch == 0xa || ch == 0xd);
private boolean isChar(int ch)
return (ch >= 0x20 && ch <= 0xd7ff ||
ch == 0x9 ||
ch == 0xa ||
ch == 0xd ||
ch >= 0xe000 && ch <= 0xfffd);
* Returns the hex representation of a byte.
private static String hex(int value)
CharBuffer cb = CharBuffer.allocate();
for (int b = 3; b >= 0; b--) {
int v = (value >> (4 * b)) & 0xf;
if (v < 10)
cb.append((char) (v + '0'));
cb.append((char) (v - 10 + 'a'));
return cb.close();
* Returns the current filename.
public String getFilename()
return _filename;
* Returns the current line.
public int getLine()
return _line;
* Returns the current column.
int getColumn()
return -1;
* Returns the opening line of the current node.
int getNodeLine()
if (_elementTop > 0)
return _elementLines[_elementTop - 1];
return 1;
* Returns the current public id being read.
public String getPublicId()
if (_reader != null)
return _reader.getPublicId();
return _publicId;
* Returns the current system id being read.
public String getSystemId()
if (_reader != null)
return _reader.getSystemId();
else if (_systemId != null)
return _systemId;
return _filename;
public void setLine(int line)
_line = line;
public int getLineNumber() { return getLine(); }
public int getColumnNumber() { return getColumn(); }
* Adds a string to the current text buffer.
private void addText(String s)
throws IOException, SAXException
int len = s.length();
for (int i = 0; i < len; i++)
* Adds a character to the current text buffer.
private void addText(char ch)
throws IOException, SAXException
if (_textLength > 0 && _textBuffer[_textLength - 1] == '\r') {
_textBuffer[_textLength - 1] = '\n';
if (ch == '\n')
if (_isIgnorableWhitespace && ! XmlChar.isWhitespace(ch))
_isIgnorableWhitespace = false;
_textBuffer[_textLength++] = ch;
* Flushes the text buffer to the SAX callback.
private void addText(char []buffer, int offset, int length,
boolean isWhitespace)
throws IOException, SAXException
if (length <= 0)
if (_namespace.getDepth() == 1) {
if (! isWhitespace) {
throw error(L.l("expected top element at '{0}'",
new String(buffer, offset, length)));
else {
_contentHandler.ignorableWhitespace(buffer, offset, length);
_contentHandler.characters(buffer, offset, length);
* Parses a name.
private SaxIntern.Entry parseName(int offset, boolean isAttribute)
throws IOException
char []inputBuf = _inputBuffer;
int inputLength = _inputLength;
int inputOffset = _inputOffset;
char []valueBuf = _valueBuffer;
int valueLength = offset;
int colon = 0;
while (true) {
if (inputOffset < inputLength) {
char ch = inputBuf[inputOffset++];
if (XML_NAME_CHAR[ch]) {
valueBuf[valueLength++] = ch;
else if (ch == ':') {
if (colon <= 0)
colon = valueLength;
valueBuf[valueLength++] = ch;
else {
_inputOffset = inputOffset - 1;
return _intern.add(valueBuf, offset, valueLength - offset,
colon, isAttribute);
else if (fillBuffer()) {
inputLength = _inputLength;
inputOffset = 0;
else {
return _intern.add(valueBuf, offset, valueLength - offset,
colon, isAttribute);
final int skipWhitespace(int ch)
throws IOException, SAXException
while (ch <= 0x20 && (ch == 0x20 || ch == 0x9 || ch == 0xa || ch == 0xd)) {
ch = read();
return ch;
public void setReader(XmlReader reader)
_reader = reader;
* Adds text to the macro, escaping attribute values.
void setMacroAttr(String text)
throws IOException, SAXException
if (_reader != _macro) {
_macro.init(this, _reader);
_reader = _macro;
int j = _macroIndex;
for (int i = 0; i < text.length(); i++) {
int ch = text.charAt(i);
if (ch == '\'')
else if (ch == '"')
_macro.add((char) ch);
void pushInclude(String systemId)
throws IOException, SAXException
pushInclude(null, systemId);
* Pushes the named file as a lexical include.
* @param systemId the name of the file to include.
void pushInclude(String publicId, String systemId)
throws IOException, SAXException
InputStream stream = openStream(systemId, publicId);
if (stream == null)
throw new FileNotFoundException(systemId);
_is = Vfs.openRead(stream);
Path oldSearchPath = _searchPath;
Path path = _is.getPath();
if (path != null) {
if (_searchPath != null) {
_searchPath = path.getParent();
_filename = systemId;
XmlReader nextReader;
if (_reader instanceof Utf8Reader)
nextReader = new Utf8Reader(this, _is);
else {
nextReader = new XmlReader(this, _is);
_reader = nextReader;
XmlReader oldReader = _reader;
_reader = null;
_line = 1;
int ch = read();
XmlReader reader = _reader;
if (reader instanceof MacroReader)
reader = reader.getNext();
private void popInclude()
throws IOException, SAXException
XmlReader oldReader = _reader;
_reader = _reader.getNext();
_filename = _reader.getFilename();
_line = _reader.getLine();
_is = _reader.getReadStream();
if (_reader.getSearchPath() != null)
_searchPath = _reader.getSearchPath();
void setMacro(String text)
throws IOException, SAXException
if (_reader == _macro) {
else if (_macro.getNext() == null) {
_macro.init(this, _reader);
_reader = _macro;
else {
_macro = new MacroReader();
_macro.init(this, _reader);
_reader = _macro;
protected final int read()
throws IOException, SAXException
int inputOffset = _inputOffset;
if (inputOffset < _inputLength) {
char ch = _inputBuffer[inputOffset];
_inputOffset = inputOffset + 1;
return ch;
else if (fillBuffer()) {
return _inputBuffer[_inputOffset++];
return -1;
public final void unread(int ch)
if (ch < 0 || _inputOffset <= 0)
protected boolean fillBuffer()
throws IOException
int len =, 0, _inputBuffer.length);
if (len >= 0) {
_inputLength = len;
_inputOffset = 0;
return true;
else {
_inputLength = 0;
_inputOffset = 0;
return false;
private void parseXMLDeclaration(XmlReader oldReader)
throws IOException, SAXException
int startOffset = _is.getOffset();
boolean isEBCDIC = false;
int ch =;
XmlReader reader = null;
// utf-16 starts with \xfe \xff
if (ch == 0xfe) {
ch =;
if (ch == 0xff) {
_owner.setAttribute("encoding", "UTF-16");
reader = new Utf16Reader(this, _is);
ch =;
// utf-16 rev starts with \xff \xfe
else if (ch == 0xff) {
ch =;
if (ch == 0xfe) {
_owner.setAttribute("encoding", "UTF-16");
reader = new Utf16Reader(this, _is);
((Utf16Reader) reader).setReverse(true);
ch =;
// utf-16 can also start with \x00 <
else if (ch == 0x00) {
ch =;
_owner.setAttribute("encoding", "UTF-16");
reader = new Utf16Reader(this, _is);
// utf-8 BOM is \xef \xbb \xbf
else if (ch == 0xef) {
ch =;
if (ch == 0xbb) {
ch =;
if (ch == 0xbf) {
ch =;
_owner.setAttribute("encoding", "UTF-8");
reader = new Utf8Reader(this, _is);
else if (ch == 0x4c) {
// ebcdic
// xml/00l1
// _is.setEncoding("cp037");
isEBCDIC = true;
reader = new XmlReader(this, _is);
ch =;
else {
int ch2 =;
if (ch2 == 0x00) {
_owner.setAttribute("encoding", "UTF-16LE");
reader = new Utf16Reader(this, _is);
((Utf16Reader) reader).setReverse(true);
else if (ch2 > 0)
if (reader != null && reader != oldReader) {
else if (_is.getSource() instanceof ReaderWriterStream) {
reader = new XmlReader(this, _is);
else {
reader = new Utf8Reader(this, _is);
if (ch == '\n')
if (_systemId == null)
_reader = reader;
/* XXX: this might be too strict. */
if (! strictXml) {
for (; XmlChar.isWhitespace(ch); ch = {
if (ch != '<') {
if (parseXMLDecl(_reader) && isEBCDIC) {
// EBCDIC requires a re-read
ch = read();
if (ch != '<')
throw new IllegalStateException();
private boolean parseXMLDecl(XmlReader reader)
throws IOException, SAXException
int ch = readByte();
if (ch != '?') {
unreadByte((char) ch);
return false;
ch = read();
if (! XmlChar.isNameStart(ch))
throw error(L.l("expected name after '' at {0}. Processing instructions expect a name like ", badChar(ch)));
ch = _reader.parseName(_text, ch);
String piName = _text.toString();
if (! piName.equals("xml")) {
ch = parsePITail(piName, ch);
return false;
ch = parseAttributes(ch, false);
if (ch != '?')
throw error(L.l("expected '?' at {0}. Processing instructions end with '?>' like ", badChar(ch)));
if ((ch = read()) != '>')
throw error(L.l("expected '>' at {0}. Processing instructions end with '?>' like ", ">", badChar(ch)));
for (int i = 0; i < _attributes.getLength(); i++) {
QName name = _attributes.getName(i);
String value = _attributes.getValue(i);
if (_owner != null)
_owner.setAttribute(name.getLocalPart(), value);
if (name.getLocalPart().equals("encoding")) { // xml/00hb // && ! _inDtd) {
String encoding = value;
if (! _isStaticEncoding &&
! encoding.equalsIgnoreCase("UTF-8") &&
! encoding.equalsIgnoreCase("UTF-16") &&
! (_is.getSource() instanceof ReaderWriterStream)) {
XmlReader oldReader = _reader;
_reader = new XmlReader(this, _is);
// _reader.setNext(oldReader);
return true;
protected int readByte()
throws IOException
protected void unreadByte(int ch)
* Returns an error including the current line.
* @param text the error message text.
XmlParseException error(String text)
if (_errorHandler != null) {
SAXParseException e = new SAXParseException(text, _locator);
try {
} catch (SAXException e1) {
return new XmlParseException(_filename + ":" + _line + ": " + text);
public void free()
int parseName(CharBuffer cb, int ch)
throws IOException, SAXException
return _reader.parseName(cb, ch);
* Returns a user-readable string for an error character.
static String badChar(int ch)
if (ch < 0 || ch == 0xffff)
return L.l("end of file");
else if (ch == '\n' || ch == '\r')
return L.l("end of line");
else if (ch >= 0x20 && ch <= 0x7f)
return "'" + (char) ch + "'";
return "'" + (char) ch + "' (\\u" + hex(ch) + ")";
private void printDebugNode(WriteStream s, Node node, int depth)
throws IOException
if (node == null)
for (int i = 0; i < depth; i++)
s.print(' ');
if (node.getFirstChild() != null) {
s.println("<" + node.getNodeName() + ">");
for (Node child = node.getFirstChild();
child != null;
child = child.getNextSibling()) {
printDebugNode(s, child, depth + 2);
for (int i = 0; i < depth; i++)
s.print(' ');
s.println("" + node.getNodeName() + ">");
s.println("<" + node.getNodeName() + "/>");
public void close()
TempCharBuffer tempInputBuffer = _tempInputBuffer;
_tempInputBuffer = null;
_inputBuffer = null;
if (tempInputBuffer != null);
public static class LocatorImpl implements ExtendedLocator {
XmlParser _parser;
LocatorImpl(XmlParser parser)
_parser = parser;
public String getSystemId()
if (_parser._reader != null && _parser._reader.getSystemId() != null)
return _parser._reader.getSystemId();
else if (_parser.getSystemId() != null)
return _parser.getSystemId();
else if (_parser._reader != null && _parser._reader.getFilename() != null)
return _parser._reader.getFilename();
else if (_parser.getFilename() != null)
return _parser.getFilename();
return null;
public String getFilename()
if (_parser._reader != null && _parser._reader.getFilename() != null)
return _parser._reader.getFilename();
else if (_parser.getFilename() != null)
return _parser.getFilename();
else if (_parser._reader != null && _parser._reader.getSystemId() != null)
return _parser._reader.getSystemId();
else if (_parser.getSystemId() != null)
return _parser.getSystemId();
return null;
public String getPublicId()
if (_parser._reader != null)
return _parser._reader.getPublicId();
return _parser.getPublicId();
public int getLineNumber()
if (_parser._reader != null)
return _parser._reader.getLine();
return _parser.getLineNumber();
public int getColumnNumber()
return _parser.getColumnNumber();
static {
XML_NAME_CHAR = new boolean[65536];
for (int i = 0; i < 65536; i++) {
XML_NAME_CHAR[i] = XmlChar.isNameChar(i) && i != ':';