org.codehaus.plexus.util.xml.pull.MXParser Maven / Gradle / Ivy
Show all versions of plexus-utils Show documentation
/* -*- c-basic-offset: 4; indent-tabs-mode: nil; -*- //------100-columns-wide------>|*/
/*
* Copyright (c) 2003 Extreme! Lab, Indiana University. All rights reserved.
*
* This software is open source. See the bottom of this file for the licence.
*
* $Id$
*/
package org.codehaus.plexus.util.xml.pull;
import java.io.EOFException;
import java.io.IOException;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import org.codehaus.plexus.util.ReaderFactory;
//import java.util.Hashtable;
//TODO best handling of interning issues
// have isAllNewStringInterned ???
//TODO handling surrogate pairs: http://www.unicode.org/unicode/faq/utf_bom.html#6
//TODO review code for use of bufAbsoluteStart when keeping pos between next()/fillBuf()
/**
* Absolutely minimal implementation of XMLPULL V1 API. Encoding handling done with XmlReader
*
* @see org.codehaus.plexus.util.xml.XmlReader
* @author Aleksander Slominski
*/
public class MXParser
implements XmlPullParser
{
// NOTE: no interning of those strings --> by Java leng spec they MUST be already interned
protected final static String XML_URI = "http://www.w3.org/XML/1998/namespace";
protected final static String XMLNS_URI = "http://www.w3.org/2000/xmlns/";
protected final static String FEATURE_XML_ROUNDTRIP =
// "http://xmlpull.org/v1/doc/features.html#xml-roundtrip";
"http://xmlpull.org/v1/doc/features.html#xml-roundtrip";
protected final static String FEATURE_NAMES_INTERNED = "http://xmlpull.org/v1/doc/features.html#names-interned";
protected final static String PROPERTY_XMLDECL_VERSION =
"http://xmlpull.org/v1/doc/properties.html#xmldecl-version";
protected final static String PROPERTY_XMLDECL_STANDALONE =
"http://xmlpull.org/v1/doc/properties.html#xmldecl-standalone";
protected final static String PROPERTY_XMLDECL_CONTENT =
"http://xmlpull.org/v1/doc/properties.html#xmldecl-content";
protected final static String PROPERTY_LOCATION = "http://xmlpull.org/v1/doc/properties.html#location";
/**
* Implementation notice: the is instance variable that controls if newString() is interning.
*
* NOTE: newStringIntern always returns interned strings and newString MAY return interned String
* depending on this variable.
*
* NOTE: by default in this minimal implementation it is false!
*/
protected boolean allStringsInterned;
protected void resetStringCache()
{
// System.out.println("resetStringCache() minimum called");
}
protected String newString( char[] cbuf, int off, int len )
{
return new String( cbuf, off, len );
}
protected String newStringIntern( char[] cbuf, int off, int len )
{
return ( new String( cbuf, off, len ) ).intern();
}
private static final boolean TRACE_SIZING = false;
// NOTE: features are not resetable and typically defaults to false ...
protected boolean processNamespaces;
protected boolean roundtripSupported;
// global parser state
protected String location;
protected int lineNumber;
protected int columnNumber;
protected boolean seenRoot;
protected boolean reachedEnd;
protected int eventType;
protected boolean emptyElementTag;
// element stack
protected int depth;
protected char[] elRawName[];
protected int elRawNameEnd[];
protected int elRawNameLine[];
protected String elName[];
protected String elPrefix[];
protected String elUri[];
// protected String elValue[];
protected int elNamespaceCount[];
/**
* Make sure that we have enough space to keep element stack if passed size. It will always create one additional
* slot then current depth
*/
protected void ensureElementsCapacity()
{
final int elStackSize = elName != null ? elName.length : 0;
if ( ( depth + 1 ) >= elStackSize )
{
// we add at least one extra slot ...
final int newSize = ( depth >= 7 ? 2 * depth : 8 ) + 2; // = lucky 7 + 1 //25
if ( TRACE_SIZING )
{
System.err.println( "TRACE_SIZING elStackSize " + elStackSize + " ==> " + newSize );
}
final boolean needsCopying = elStackSize > 0;
String[] arr = null;
// resue arr local variable slot
arr = new String[newSize];
if ( needsCopying )
System.arraycopy( elName, 0, arr, 0, elStackSize );
elName = arr;
arr = new String[newSize];
if ( needsCopying )
System.arraycopy( elPrefix, 0, arr, 0, elStackSize );
elPrefix = arr;
arr = new String[newSize];
if ( needsCopying )
System.arraycopy( elUri, 0, arr, 0, elStackSize );
elUri = arr;
int[] iarr = new int[newSize];
if ( needsCopying )
{
System.arraycopy( elNamespaceCount, 0, iarr, 0, elStackSize );
}
else
{
// special initialization
iarr[0] = 0;
}
elNamespaceCount = iarr;
// TODO: avoid using element raw name ...
iarr = new int[newSize];
if ( needsCopying )
{
System.arraycopy( elRawNameEnd, 0, iarr, 0, elStackSize );
}
elRawNameEnd = iarr;
iarr = new int[newSize];
if ( needsCopying )
{
System.arraycopy( elRawNameLine, 0, iarr, 0, elStackSize );
}
elRawNameLine = iarr;
final char[][] carr = new char[newSize][];
if ( needsCopying )
{
System.arraycopy( elRawName, 0, carr, 0, elStackSize );
}
elRawName = carr;
// arr = new String[newSize];
// if(needsCopying) System.arraycopy(elLocalName, 0, arr, 0, elStackSize);
// elLocalName = arr;
// arr = new String[newSize];
// if(needsCopying) System.arraycopy(elDefaultNs, 0, arr, 0, elStackSize);
// elDefaultNs = arr;
// int[] iarr = new int[newSize];
// if(needsCopying) System.arraycopy(elNsStackPos, 0, iarr, 0, elStackSize);
// for (int i = elStackSize; i < iarr.length; i++)
// {
// iarr[i] = (i > 0) ? -1 : 0;
// }
// elNsStackPos = iarr;
// assert depth < elName.length;
}
}
// attribute stack
protected int attributeCount;
protected String attributeName[];
protected int attributeNameHash[];
// protected int attributeNameStart[];
// protected int attributeNameEnd[];
protected String attributePrefix[];
protected String attributeUri[];
protected String attributeValue[];
// protected int attributeValueStart[];
// protected int attributeValueEnd[];
/**
* Make sure that in attributes temporary array is enough space.
*/
protected void ensureAttributesCapacity( int size )
{
final int attrPosSize = attributeName != null ? attributeName.length : 0;
if ( size >= attrPosSize )
{
final int newSize = size > 7 ? 2 * size : 8; // = lucky 7 + 1 //25
if ( TRACE_SIZING )
{
System.err.println( "TRACE_SIZING attrPosSize " + attrPosSize + " ==> " + newSize );
}
final boolean needsCopying = attrPosSize > 0;
String[] arr = null;
arr = new String[newSize];
if ( needsCopying )
System.arraycopy( attributeName, 0, arr, 0, attrPosSize );
attributeName = arr;
arr = new String[newSize];
if ( needsCopying )
System.arraycopy( attributePrefix, 0, arr, 0, attrPosSize );
attributePrefix = arr;
arr = new String[newSize];
if ( needsCopying )
System.arraycopy( attributeUri, 0, arr, 0, attrPosSize );
attributeUri = arr;
arr = new String[newSize];
if ( needsCopying )
System.arraycopy( attributeValue, 0, arr, 0, attrPosSize );
attributeValue = arr;
if ( !allStringsInterned )
{
final int[] iarr = new int[newSize];
if ( needsCopying )
System.arraycopy( attributeNameHash, 0, iarr, 0, attrPosSize );
attributeNameHash = iarr;
}
arr = null;
// //assert attrUri.length > size
}
}
// namespace stack
protected int namespaceEnd;
protected String namespacePrefix[];
protected int namespacePrefixHash[];
protected String namespaceUri[];
protected void ensureNamespacesCapacity( int size )
{
final int namespaceSize = namespacePrefix != null ? namespacePrefix.length : 0;
if ( size >= namespaceSize )
{
final int newSize = size > 7 ? 2 * size : 8; // = lucky 7 + 1 //25
if ( TRACE_SIZING )
{
System.err.println( "TRACE_SIZING namespaceSize " + namespaceSize + " ==> " + newSize );
}
final String[] newNamespacePrefix = new String[newSize];
final String[] newNamespaceUri = new String[newSize];
if ( namespacePrefix != null )
{
System.arraycopy( namespacePrefix, 0, newNamespacePrefix, 0, namespaceEnd );
System.arraycopy( namespaceUri, 0, newNamespaceUri, 0, namespaceEnd );
}
namespacePrefix = newNamespacePrefix;
namespaceUri = newNamespaceUri;
if ( !allStringsInterned )
{
final int[] newNamespacePrefixHash = new int[newSize];
if ( namespacePrefixHash != null )
{
System.arraycopy( namespacePrefixHash, 0, newNamespacePrefixHash, 0, namespaceEnd );
}
namespacePrefixHash = newNamespacePrefixHash;
}
// prefixesSize = newSize;
// //assert nsPrefixes.length > size && nsPrefixes.length == newSize
}
}
/**
* simplistic implementation of hash function that has constant time to compute - so it also means
* diminishing hash quality for long strings but for XML parsing it should be good enough ...
*/
protected static final int fastHash( char ch[], int off, int len )
{
if ( len == 0 )
return 0;
// assert len >0
int hash = ch[off]; // hash at beginning
// try {
hash = ( hash << 7 ) + ch[off + len - 1]; // hash at the end
// } catch(ArrayIndexOutOfBoundsException aie) {
// aie.printStackTrace(); //should never happen ...
// throw new RuntimeException("this is violation of pre-condition");
// }
if ( len > 16 )
hash = ( hash << 7 ) + ch[off + ( len / 4 )]; // 1/4 from beginning
if ( len > 8 )
hash = ( hash << 7 ) + ch[off + ( len / 2 )]; // 1/2 of string size ...
// notice that hash is at most done 3 times <<7 so shifted by 21 bits 8 bit value
// so max result == 29 bits so it is quite just below 31 bits for long (2^32) ...
// assert hash >= 0;
return hash;
}
// entity replacement stack
protected int entityEnd;
protected String entityName[];
protected char[] entityNameBuf[];
protected String entityReplacement[];
protected char[] entityReplacementBuf[];
protected int entityNameHash[];
private final EntityReplacementMap replacementMapTemplate;
protected void ensureEntityCapacity()
{
final int entitySize = entityReplacementBuf != null ? entityReplacementBuf.length : 0;
if ( entityEnd >= entitySize )
{
final int newSize = entityEnd > 7 ? 2 * entityEnd : 8; // = lucky 7 + 1 //25
if ( TRACE_SIZING )
{
System.err.println( "TRACE_SIZING entitySize " + entitySize + " ==> " + newSize );
}
final String[] newEntityName = new String[newSize];
final char[] newEntityNameBuf[] = new char[newSize][];
final String[] newEntityReplacement = new String[newSize];
final char[] newEntityReplacementBuf[] = new char[newSize][];
if ( entityName != null )
{
System.arraycopy( entityName, 0, newEntityName, 0, entityEnd );
System.arraycopy( entityNameBuf, 0, newEntityNameBuf, 0, entityEnd );
System.arraycopy( entityReplacement, 0, newEntityReplacement, 0, entityEnd );
System.arraycopy( entityReplacementBuf, 0, newEntityReplacementBuf, 0, entityEnd );
}
entityName = newEntityName;
entityNameBuf = newEntityNameBuf;
entityReplacement = newEntityReplacement;
entityReplacementBuf = newEntityReplacementBuf;
if ( !allStringsInterned )
{
final int[] newEntityNameHash = new int[newSize];
if ( entityNameHash != null )
{
System.arraycopy( entityNameHash, 0, newEntityNameHash, 0, entityEnd );
}
entityNameHash = newEntityNameHash;
}
}
}
// input buffer management
protected static final int READ_CHUNK_SIZE = 8 * 1024; // max data chars in one read() call
protected Reader reader;
protected String inputEncoding;
protected int bufLoadFactor = 95; // 99%
// protected int bufHardLimit; // only matters when expanding
protected char buf[] = new char[Runtime.getRuntime().freeMemory() > 1000000L ? READ_CHUNK_SIZE : 256];
protected int bufSoftLimit = ( bufLoadFactor * buf.length ) / 100; // desirable size of buffer
protected boolean preventBufferCompaction;
protected int bufAbsoluteStart; // this is buf
protected int bufStart;
protected int bufEnd;
protected int pos;
protected int posStart;
protected int posEnd;
protected char pc[] = new char[Runtime.getRuntime().freeMemory() > 1000000L ? READ_CHUNK_SIZE : 64];
protected int pcStart;
protected int pcEnd;
// parsing state
// protected boolean needsMore;
// protected boolean seenMarkup;
protected boolean usePC;
protected boolean seenStartTag;
protected boolean seenEndTag;
protected boolean pastEndTag;
protected boolean seenAmpersand;
protected boolean seenMarkup;
protected boolean seenDocdecl;
// transient variable set during each call to next/Token()
protected boolean tokenize;
protected String text;
protected String entityRefName;
protected String xmlDeclVersion;
protected Boolean xmlDeclStandalone;
protected String xmlDeclContent;
protected void reset()
{
// System.out.println("reset() called");
location = null;
lineNumber = 1;
columnNumber = 0;
seenRoot = false;
reachedEnd = false;
eventType = START_DOCUMENT;
emptyElementTag = false;
depth = 0;
attributeCount = 0;
namespaceEnd = 0;
entityEnd = 0;
setupFromTemplate();
reader = null;
inputEncoding = null;
preventBufferCompaction = false;
bufAbsoluteStart = 0;
bufEnd = bufStart = 0;
pos = posStart = posEnd = 0;
pcEnd = pcStart = 0;
usePC = false;
seenStartTag = false;
seenEndTag = false;
pastEndTag = false;
seenAmpersand = false;
seenMarkup = false;
seenDocdecl = false;
xmlDeclVersion = null;
xmlDeclStandalone = null;
xmlDeclContent = null;
resetStringCache();
}
public MXParser()
{
replacementMapTemplate = null;
}
public MXParser( EntityReplacementMap entityReplacementMap )
{
this.replacementMapTemplate = entityReplacementMap;
}
public void setupFromTemplate()
{
if ( replacementMapTemplate != null )
{
int length = replacementMapTemplate.entityEnd;
// This is a bit cheeky, since the EntityReplacementMap contains exact-sized arrays,
// and elements are always added to the array, we can use the array from the template.
// Kids; dont do this at home.
entityName = replacementMapTemplate.entityName;
entityNameBuf = replacementMapTemplate.entityNameBuf;
entityReplacement = replacementMapTemplate.entityReplacement;
entityReplacementBuf = replacementMapTemplate.entityReplacementBuf;
entityNameHash = replacementMapTemplate.entityNameHash;
entityEnd = length;
}
}
/**
* Method setFeature
*
* @param name a String
* @param state a boolean
* @throws XmlPullParserException
*/
public void setFeature( String name, boolean state )
throws XmlPullParserException
{
if ( name == null )
throw new IllegalArgumentException( "feature name should not be null" );
if ( FEATURE_PROCESS_NAMESPACES.equals( name ) )
{
if ( eventType != START_DOCUMENT )
throw new XmlPullParserException( "namespace processing feature can only be changed before parsing",
this, null );
processNamespaces = state;
// } else if(FEATURE_REPORT_NAMESPACE_ATTRIBUTES.equals(name)) {
// if(type != START_DOCUMENT) throw new XmlPullParserException(
// "namespace reporting feature can only be changed before parsing", this, null);
// reportNsAttribs = state;
}
else if ( FEATURE_NAMES_INTERNED.equals( name ) )
{
if ( state != false )
{
throw new XmlPullParserException( "interning names in this implementation is not supported" );
}
}
else if ( FEATURE_PROCESS_DOCDECL.equals( name ) )
{
if ( state != false )
{
throw new XmlPullParserException( "processing DOCDECL is not supported" );
}
// } else if(REPORT_DOCDECL.equals(name)) {
// paramNotifyDoctype = state;
}
else if ( FEATURE_XML_ROUNDTRIP.equals( name ) )
{
// if(state == false) {
// throw new XmlPullParserException(
// "roundtrip feature can not be switched off");
// }
roundtripSupported = state;
}
else
{
throw new XmlPullParserException( "unsupported feature " + name );
}
}
/**
* Unknown properties are always returned as false
*/
public boolean getFeature( String name )
{
if ( name == null )
throw new IllegalArgumentException( "feature name should not be null" );
if ( FEATURE_PROCESS_NAMESPACES.equals( name ) )
{
return processNamespaces;
// } else if(FEATURE_REPORT_NAMESPACE_ATTRIBUTES.equals(name)) {
// return reportNsAttribs;
}
else if ( FEATURE_NAMES_INTERNED.equals( name ) )
{
return false;
}
else if ( FEATURE_PROCESS_DOCDECL.equals( name ) )
{
return false;
// } else if(REPORT_DOCDECL.equals(name)) {
// return paramNotifyDoctype;
}
else if ( FEATURE_XML_ROUNDTRIP.equals( name ) )
{
// return true;
return roundtripSupported;
}
return false;
}
public void setProperty( String name, Object value )
throws XmlPullParserException
{
if ( PROPERTY_LOCATION.equals( name ) )
{
location = (String) value;
}
else
{
throw new XmlPullParserException( "unsupported property: '" + name + "'" );
}
}
public Object getProperty( String name )
{
if ( name == null )
throw new IllegalArgumentException( "property name should not be null" );
if ( PROPERTY_XMLDECL_VERSION.equals( name ) )
{
return xmlDeclVersion;
}
else if ( PROPERTY_XMLDECL_STANDALONE.equals( name ) )
{
return xmlDeclStandalone;
}
else if ( PROPERTY_XMLDECL_CONTENT.equals( name ) )
{
return xmlDeclContent;
}
else if ( PROPERTY_LOCATION.equals( name ) )
{
return location;
}
return null;
}
public void setInput( Reader in )
throws XmlPullParserException
{
reset();
reader = in;
}
public void setInput( java.io.InputStream inputStream, String inputEncoding )
throws XmlPullParserException
{
if ( inputStream == null )
{
throw new IllegalArgumentException( "input stream can not be null" );
}
Reader reader;
try
{
if ( inputEncoding != null )
{
reader = ReaderFactory.newReader( inputStream, inputEncoding );
}
else
{
reader = ReaderFactory.newXmlReader( inputStream );
}
}
catch ( UnsupportedEncodingException une )
{
throw new XmlPullParserException( "could not create reader for encoding " + inputEncoding + " : " + une,
this, une );
}
catch ( IOException e )
{
throw new XmlPullParserException( "could not create reader : " + e, this, e );
}
setInput( reader );
// must be here as reset() was called in setInput() and has set this.inputEncoding to null ...
this.inputEncoding = inputEncoding;
}
public String getInputEncoding()
{
return inputEncoding;
}
public void defineEntityReplacementText( String entityName, String replacementText )
throws XmlPullParserException
{
// throw new XmlPullParserException("not allowed");
if ( !replacementText.startsWith( "" ) && this.entityName != null && replacementText.length() > 1 )
{
String tmp = replacementText.substring( 1, replacementText.length() - 1 );
for ( int i = 0; i < this.entityName.length; i++ )
{
if ( this.entityName[i] != null && this.entityName[i].equals( tmp ) )
{
replacementText = this.entityReplacement[i];
}
}
}
// protected char[] entityReplacement[];
ensureEntityCapacity();
// this is to make sure that if interning works we will take advantage of it ...
char[] entityNameCharData = entityName.toCharArray();
this.entityName[entityEnd] = newString( entityNameCharData, 0, entityName.length() );
entityNameBuf[entityEnd] = entityNameCharData;
entityReplacement[entityEnd] = replacementText;
entityReplacementBuf[entityEnd] = replacementText.toCharArray();
if ( !allStringsInterned )
{
entityNameHash[entityEnd] = fastHash( entityNameBuf[entityEnd], 0, entityNameBuf[entityEnd].length );
}
++entityEnd;
// TODO disallow < or & in entity replacement text (or ]]>???)
// TOOD keepEntityNormalizedForAttributeValue cached as well ...
}
public int getNamespaceCount( int depth )
throws XmlPullParserException
{
if ( processNamespaces == false || depth == 0 )
{
return 0;
}
// int maxDepth = eventType == END_TAG ? this.depth + 1 : this.depth;
// if(depth < 0 || depth > maxDepth) throw new IllegalArgumentException(
if ( depth < 0 || depth > this.depth )
throw new IllegalArgumentException( "namespace count may be for depth 0.." + this.depth + " not " + depth );
return elNamespaceCount[depth];
}
public String getNamespacePrefix( int pos )
throws XmlPullParserException
{
// int end = eventType == END_TAG ? elNamespaceCount[ depth + 1 ] : namespaceEnd;
// if(pos < end) {
if ( pos < namespaceEnd )
{
return namespacePrefix[pos];
}
else
{
throw new XmlPullParserException( "position " + pos + " exceeded number of available namespaces "
+ namespaceEnd );
}
}
public String getNamespaceUri( int pos )
throws XmlPullParserException
{
// int end = eventType == END_TAG ? elNamespaceCount[ depth + 1 ] : namespaceEnd;
// if(pos < end) {
if ( pos < namespaceEnd )
{
return namespaceUri[pos];
}
else
{
throw new XmlPullParserException( "position " + pos + " exceeded number of available namespaces "
+ namespaceEnd );
}
}
public String getNamespace( String prefix )
// throws XmlPullParserException
{
// int count = namespaceCount[ depth ];
if ( prefix != null )
{
for ( int i = namespaceEnd - 1; i >= 0; i-- )
{
if ( prefix.equals( namespacePrefix[i] ) )
{
return namespaceUri[i];
}
}
if ( "xml".equals( prefix ) )
{
return XML_URI;
}
else if ( "xmlns".equals( prefix ) )
{
return XMLNS_URI;
}
}
else
{
for ( int i = namespaceEnd - 1; i >= 0; i-- )
{
if ( namespacePrefix[i] == null )
{ // "") { //null ) { //TODO check FIXME Alek
return namespaceUri[i];
}
}
}
return null;
}
public int getDepth()
{
return depth;
}
private static int findFragment( int bufMinPos, char[] b, int start, int end )
{
// System.err.println("bufStart="+bufStart+" b="+printable(new String(b, start, end - start))+" start="+start+"
// end="+end);
if ( start < bufMinPos )
{
start = bufMinPos;
if ( start > end )
start = end;
return start;
}
if ( end - start > 65 )
{
start = end - 10; // try to find good location
}
int i = start + 1;
while ( --i > bufMinPos )
{
if ( ( end - i ) > 65 )
break;
final char c = b[i];
if ( c == '<' && ( start - i ) > 10 )
break;
}
return i;
}
/**
* Return string describing current position of parsers as text 'STATE [seen %s...] @line:column'.
*/
public String getPositionDescription()
{
String fragment = null;
if ( posStart <= pos )
{
final int start = findFragment( 0, buf, posStart, pos );
// System.err.println("start="+start);
if ( start < pos )
{
fragment = new String( buf, start, pos - start );
}
if ( bufAbsoluteStart > 0 || start > 0 )
fragment = "..." + fragment;
}
// return " at line "+tokenizerPosRow
// +" and column "+(tokenizerPosCol-1)
// +(fragment != null ? " seen "+printable(fragment)+"..." : "");
return " " + TYPES[eventType] + ( fragment != null ? " seen " + printable( fragment ) + "..." : "" ) + " "
+ ( location != null ? location : "" ) + "@" + getLineNumber() + ":" + getColumnNumber();
}
public int getLineNumber()
{
return lineNumber;
}
public int getColumnNumber()
{
return columnNumber;
}
public boolean isWhitespace()
throws XmlPullParserException
{
if ( eventType == TEXT || eventType == CDSECT )
{
if ( usePC )
{
for ( int i = pcStart; i < pcEnd; i++ )
{
if ( !isS( pc[i] ) )
return false;
}
return true;
}
else
{
for ( int i = posStart; i < posEnd; i++ )
{
if ( !isS( buf[i] ) )
return false;
}
return true;
}
}
else if ( eventType == IGNORABLE_WHITESPACE )
{
return true;
}
throw new XmlPullParserException( "no content available to check for whitespaces" );
}
public String getText()
{
if ( eventType == START_DOCUMENT || eventType == END_DOCUMENT )
{
// throw new XmlPullParserException("no content available to read");
// if(roundtripSupported) {
// text = new String(buf, posStart, posEnd - posStart);
// } else {
return null;
// }
}
else if ( eventType == ENTITY_REF )
{
return text;
}
if ( text == null )
{
if ( !usePC || eventType == START_TAG || eventType == END_TAG )
{
text = new String( buf, posStart, posEnd - posStart );
}
else
{
text = new String( pc, pcStart, pcEnd - pcStart );
}
}
return text;
}
public char[] getTextCharacters( int[] holderForStartAndLength )
{
if ( eventType == TEXT )
{
if ( usePC )
{
holderForStartAndLength[0] = pcStart;
holderForStartAndLength[1] = pcEnd - pcStart;
return pc;
}
else
{
holderForStartAndLength[0] = posStart;
holderForStartAndLength[1] = posEnd - posStart;
return buf;
}
}
else if ( eventType == START_TAG || eventType == END_TAG || eventType == CDSECT || eventType == COMMENT
|| eventType == ENTITY_REF || eventType == PROCESSING_INSTRUCTION || eventType == IGNORABLE_WHITESPACE
|| eventType == DOCDECL )
{
holderForStartAndLength[0] = posStart;
holderForStartAndLength[1] = posEnd - posStart;
return buf;
}
else if ( eventType == START_DOCUMENT || eventType == END_DOCUMENT )
{
// throw new XmlPullParserException("no content available to read");
holderForStartAndLength[0] = holderForStartAndLength[1] = -1;
return null;
}
else
{
throw new IllegalArgumentException( "unknown text eventType: " + eventType );
}
// String s = getText();
// char[] cb = null;
// if(s!= null) {
// cb = s.toCharArray();
// holderForStartAndLength[0] = 0;
// holderForStartAndLength[1] = s.length();
// } else {
// }
// return cb;
}
public String getNamespace()
{
if ( eventType == START_TAG )
{
// return processNamespaces ? elUri[ depth - 1 ] : NO_NAMESPACE;
return processNamespaces ? elUri[depth] : NO_NAMESPACE;
}
else if ( eventType == END_TAG )
{
return processNamespaces ? elUri[depth] : NO_NAMESPACE;
}
return null;
// String prefix = elPrefix[ maxDepth ];
// if(prefix != null) {
// for( int i = namespaceEnd -1; i >= 0; i--) {
// if( prefix.equals( namespacePrefix[ i ] ) ) {
// return namespaceUri[ i ];
// }
// }
// } else {
// for( int i = namespaceEnd -1; i >= 0; i--) {
// if( namespacePrefix[ i ] == null ) {
// return namespaceUri[ i ];
// }
// }
//
// }
// return "";
}
public String getName()
{
if ( eventType == START_TAG )
{
// return elName[ depth - 1 ] ;
return elName[depth];
}
else if ( eventType == END_TAG )
{
return elName[depth];
}
else if ( eventType == ENTITY_REF )
{
if ( entityRefName == null )
{
entityRefName = newString( buf, posStart, posEnd - posStart );
}
return entityRefName;
}
else
{
return null;
}
}
public String getPrefix()
{
if ( eventType == START_TAG )
{
// return elPrefix[ depth - 1 ] ;
return elPrefix[depth];
}
else if ( eventType == END_TAG )
{
return elPrefix[depth];
}
return null;
// if(eventType != START_TAG && eventType != END_TAG) return null;
// int maxDepth = eventType == END_TAG ? depth : depth - 1;
// return elPrefix[ maxDepth ];
}
public boolean isEmptyElementTag()
throws XmlPullParserException
{
if ( eventType != START_TAG )
throw new XmlPullParserException( "parser must be on START_TAG to check for empty element", this, null );
return emptyElementTag;
}
public int getAttributeCount()
{
if ( eventType != START_TAG )
return -1;
return attributeCount;
}
public String getAttributeNamespace( int index )
{
if ( eventType != START_TAG )
throw new IndexOutOfBoundsException( "only START_TAG can have attributes" );
if ( processNamespaces == false )
return NO_NAMESPACE;
if ( index < 0 || index >= attributeCount )
throw new IndexOutOfBoundsException( "attribute position must be 0.." + ( attributeCount - 1 ) + " and not "
+ index );
return attributeUri[index];
}
public String getAttributeName( int index )
{
if ( eventType != START_TAG )
throw new IndexOutOfBoundsException( "only START_TAG can have attributes" );
if ( index < 0 || index >= attributeCount )
throw new IndexOutOfBoundsException( "attribute position must be 0.." + ( attributeCount - 1 ) + " and not "
+ index );
return attributeName[index];
}
public String getAttributePrefix( int index )
{
if ( eventType != START_TAG )
throw new IndexOutOfBoundsException( "only START_TAG can have attributes" );
if ( processNamespaces == false )
return null;
if ( index < 0 || index >= attributeCount )
throw new IndexOutOfBoundsException( "attribute position must be 0.." + ( attributeCount - 1 ) + " and not "
+ index );
return attributePrefix[index];
}
public String getAttributeType( int index )
{
if ( eventType != START_TAG )
throw new IndexOutOfBoundsException( "only START_TAG can have attributes" );
if ( index < 0 || index >= attributeCount )
throw new IndexOutOfBoundsException( "attribute position must be 0.." + ( attributeCount - 1 ) + " and not "
+ index );
return "CDATA";
}
public boolean isAttributeDefault( int index )
{
if ( eventType != START_TAG )
throw new IndexOutOfBoundsException( "only START_TAG can have attributes" );
if ( index < 0 || index >= attributeCount )
throw new IndexOutOfBoundsException( "attribute position must be 0.." + ( attributeCount - 1 ) + " and not "
+ index );
return false;
}
public String getAttributeValue( int index )
{
if ( eventType != START_TAG )
throw new IndexOutOfBoundsException( "only START_TAG can have attributes" );
if ( index < 0 || index >= attributeCount )
throw new IndexOutOfBoundsException( "attribute position must be 0.." + ( attributeCount - 1 ) + " and not "
+ index );
return attributeValue[index];
}
public String getAttributeValue( String namespace, String name )
{
if ( eventType != START_TAG )
throw new IndexOutOfBoundsException( "only START_TAG can have attributes" + getPositionDescription() );
if ( name == null )
{
throw new IllegalArgumentException( "attribute name can not be null" );
}
// TODO make check if namespace is interned!!! etc. for names!!!
if ( processNamespaces )
{
if ( namespace == null )
{
namespace = "";
}
for ( int i = 0; i < attributeCount; ++i )
{
if ( ( namespace == attributeUri[i] || namespace.equals( attributeUri[i] ) )
// (namespace != null && namespace.equals(attributeUri[ i ]))
// taking advantage of String.intern()
&& name.equals( attributeName[i] ) )
{
return attributeValue[i];
}
}
}
else
{
if ( namespace != null && namespace.length() == 0 )
{
namespace = null;
}
if ( namespace != null )
throw new IllegalArgumentException( "when namespaces processing is disabled attribute namespace must be null" );
for ( int i = 0; i < attributeCount; ++i )
{
if ( name.equals( attributeName[i] ) )
{
return attributeValue[i];
}
}
}
return null;
}
public int getEventType()
throws XmlPullParserException
{
return eventType;
}
public void require( int type, String namespace, String name )
throws XmlPullParserException, IOException
{
if ( processNamespaces == false && namespace != null )
{
throw new XmlPullParserException( "processing namespaces must be enabled on parser (or factory)"
+ " to have possible namespaces declared on elements" + ( " (position:" + getPositionDescription() )
+ ")" );
}
if ( type != getEventType() || ( namespace != null && !namespace.equals( getNamespace() ) )
|| ( name != null && !name.equals( getName() ) ) )
{
throw new XmlPullParserException( "expected event " + TYPES[type]
+ ( name != null ? " with name '" + name + "'" : "" )
+ ( namespace != null && name != null ? " and" : "" )
+ ( namespace != null ? " with namespace '" + namespace + "'" : "" ) + " but got"
+ ( type != getEventType() ? " " + TYPES[getEventType()] : "" )
+ ( name != null && getName() != null && !name.equals( getName() ) ? " name '" + getName() + "'" : "" )
+ ( namespace != null && name != null && getName() != null && !name.equals( getName() )
&& getNamespace() != null && !namespace.equals( getNamespace() ) ? " and" : "" )
+ ( namespace != null && getNamespace() != null && !namespace.equals( getNamespace() )
? " namespace '" + getNamespace() + "'"
: "" )
+ ( " (position:" + getPositionDescription() ) + ")" );
}
}
/**
* Skip sub tree that is currently parser positioned on.
* NOTE: parser must be on START_TAG and when function returns parser will be positioned on corresponding END_TAG
*/
public void skipSubTree()
throws XmlPullParserException, IOException
{
require( START_TAG, null, null );
int level = 1;
while ( level > 0 )
{
int eventType = next();
if ( eventType == END_TAG )
{
--level;
}
else if ( eventType == START_TAG )
{
++level;
}
}
}
// public String readText() throws XmlPullParserException, IOException
// {
// if (getEventType() != TEXT) return "";
// String result = getText();
// next();
// return result;
// }
public String nextText()
throws XmlPullParserException, IOException
{
// String result = null;
// boolean onStartTag = false;
// if(eventType == START_TAG) {
// onStartTag = true;
// next();
// }
// if(eventType == TEXT) {
// result = getText();
// next();
// } else if(onStartTag && eventType == END_TAG) {
// result = "";
// } else {
// throw new XmlPullParserException(
// "parser must be on START_TAG or TEXT to read text", this, null);
// }
// if(eventType != END_TAG) {
// throw new XmlPullParserException(
// "event TEXT it must be immediately followed by END_TAG", this, null);
// }
// return result;
if ( getEventType() != START_TAG )
{
throw new XmlPullParserException( "parser must be on START_TAG to read next text", this, null );
}
int eventType = next();
if ( eventType == TEXT )
{
final String result = getText();
eventType = next();
if ( eventType != END_TAG )
{
throw new XmlPullParserException( "TEXT must be immediately followed by END_TAG and not "
+ TYPES[getEventType()], this, null );
}
return result;
}
else if ( eventType == END_TAG )
{
return "";
}
else
{
throw new XmlPullParserException( "parser must be on START_TAG or TEXT to read text", this, null );
}
}
public int nextTag()
throws XmlPullParserException, IOException
{
next();
if ( eventType == TEXT && isWhitespace() )
{ // skip whitespace
next();
}
if ( eventType != START_TAG && eventType != END_TAG )
{
throw new XmlPullParserException( "expected START_TAG or END_TAG not " + TYPES[getEventType()], this,
null );
}
return eventType;
}
public int next()
throws XmlPullParserException, IOException
{
tokenize = false;
return nextImpl();
}
public int nextToken()
throws XmlPullParserException, IOException
{
tokenize = true;
return nextImpl();
}
protected int nextImpl()
throws XmlPullParserException, IOException
{
text = null;
pcEnd = pcStart = 0;
usePC = false;
bufStart = posEnd;
if ( pastEndTag )
{
pastEndTag = false;
--depth;
namespaceEnd = elNamespaceCount[depth]; // less namespaces available
}
if ( emptyElementTag )
{
emptyElementTag = false;
pastEndTag = true;
return eventType = END_TAG;
}
// [1] document ::= prolog element Misc*
if ( depth > 0 )
{
if ( seenStartTag )
{
seenStartTag = false;
return eventType = parseStartTag();
}
if ( seenEndTag )
{
seenEndTag = false;
return eventType = parseEndTag();
}
// ASSUMPTION: we are _on_ first character of content or markup!!!!
// [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
char ch;
if ( seenMarkup )
{ // we have read ahead ...
seenMarkup = false;
ch = '<';
}
else if ( seenAmpersand )
{
seenAmpersand = false;
ch = '&';
}
else
{
ch = more();
}
posStart = pos - 1; // VERY IMPORTANT: this is correct start of event!!!
// when true there is some potential event TEXT to return - keep gathering
boolean hadCharData = false;
// when true TEXT data is not continuous (like ) and requires PC merging
boolean needsMerging = false;
MAIN_LOOP: while ( true )
{
// work on MARKUP
if ( ch == '<' )
{
if ( hadCharData )
{
// posEnd = pos - 1;
if ( tokenize )
{
seenMarkup = true;
return eventType = TEXT;
}
}
ch = more();
if ( ch == '/' )
{
if ( !tokenize && hadCharData )
{
seenEndTag = true;
// posEnd = pos - 2;
return eventType = TEXT;
}
return eventType = parseEndTag();
}
else if ( ch == '!' )
{
ch = more();
if ( ch == '-' )
{
// note: if(tokenize == false) posStart/End is NOT changed!!!!
parseComment();
if ( tokenize )
return eventType = COMMENT;
if ( !usePC && hadCharData )
{
needsMerging = true;
}
else
{
posStart = pos; // completely ignore comment
}
}
else if ( ch == '[' )
{
// posEnd = pos - 3;
// must remember previous posStart/End as it merges with content of CDATA
// int oldStart = posStart + bufAbsoluteStart;
// int oldEnd = posEnd + bufAbsoluteStart;
parseCDSect( hadCharData );
if ( tokenize )
return eventType = CDSECT;
final int cdStart = posStart;
final int cdEnd = posEnd;
final int cdLen = cdEnd - cdStart;
if ( cdLen > 0 )
{ // was there anything inside CDATA section?
hadCharData = true;
if ( !usePC )
{
needsMerging = true;
}
}
// posStart = oldStart;
// posEnd = oldEnd;
// if(cdLen > 0) { // was there anything inside CDATA section?
// if(hadCharData) {
// // do merging if there was anything in CDSect!!!!
// // if(!usePC) {
// // // posEnd is correct already!!!
// // if(posEnd > posStart) {
// // joinPC();
// // } else {
// // usePC = true;
// // pcStart = pcEnd = 0;
// // }
// // }
// // if(pcEnd + cdLen >= pc.length) ensurePC(pcEnd + cdLen);
// // // copy [cdStart..cdEnd) into PC
// // System.arraycopy(buf, cdStart, pc, pcEnd, cdLen);
// // pcEnd += cdLen;
// if(!usePC) {
// needsMerging = true;
// posStart = cdStart;
// posEnd = cdEnd;
// }
// } else {
// if(!usePC) {
// needsMerging = true;
// posStart = cdStart;
// posEnd = cdEnd;
// hadCharData = true;
// }
// }
// //hadCharData = true;
// } else {
// if( !usePC && hadCharData ) {
// needsMerging = true;
// }
// }
}
else
{
throw new XmlPullParserException( "unexpected character in markup " + printable( ch ), this,
null );
}
}
else if ( ch == '?' )
{
parsePI();
if ( tokenize )
return eventType = PROCESSING_INSTRUCTION;
if ( !usePC && hadCharData )
{
needsMerging = true;
}
else
{
posStart = pos; // completely ignore PI
}
}
else if ( isNameStartChar( ch ) )
{
if ( !tokenize && hadCharData )
{
seenStartTag = true;
// posEnd = pos - 2;
return eventType = TEXT;
}
return eventType = parseStartTag();
}
else
{
throw new XmlPullParserException( "unexpected character in markup " + printable( ch ), this,
null );
}
// do content compaction if it makes sense!!!!
}
else if ( ch == '&' )
{
// work on ENTITY
// posEnd = pos - 1;
if ( tokenize && hadCharData )
{
seenAmpersand = true;
return eventType = TEXT;
}
final int oldStart = posStart + bufAbsoluteStart;
final int oldEnd = posEnd + bufAbsoluteStart;
final char[] resolvedEntity = parseEntityRef();
if ( tokenize )
return eventType = ENTITY_REF;
// check if replacement text can be resolved !!!
if ( resolvedEntity == null )
{
if ( entityRefName == null )
{
entityRefName = newString( buf, posStart, posEnd - posStart );
}
throw new XmlPullParserException( "could not resolve entity named '"
+ printable( entityRefName ) + "'", this, null );
}
// int entStart = posStart;
// int entEnd = posEnd;
posStart = oldStart - bufAbsoluteStart;
posEnd = oldEnd - bufAbsoluteStart;
if ( !usePC )
{
if ( hadCharData )
{
joinPC(); // posEnd is already set correctly!!!
needsMerging = false;
}
else
{
usePC = true;
pcStart = pcEnd = 0;
}
}
// assert usePC == true;
// write into PC replacement text - do merge for replacement text!!!!
for ( char aResolvedEntity : resolvedEntity )
{
if ( pcEnd >= pc.length )
{
ensurePC( pcEnd );
}
pc[pcEnd++] = aResolvedEntity;
}
hadCharData = true;
// assert needsMerging == false;
}
else
{
if ( needsMerging )
{
// assert usePC == false;
joinPC(); // posEnd is already set correctly!!!
// posStart = pos - 1;
needsMerging = false;
}
// no MARKUP not ENTITIES so work on character data ...
// [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
hadCharData = true;
boolean normalizedCR = false;
final boolean normalizeInput = tokenize == false || roundtripSupported == false;
// use loop locality here!!!!
boolean seenBracket = false;
boolean seenBracketBracket = false;
do
{
// check that ]]> does not show in
if ( ch == ']' )
{
if ( seenBracket )
{
seenBracketBracket = true;
}
else
{
seenBracket = true;
}
}
else if ( seenBracketBracket && ch == '>' )
{
throw new XmlPullParserException( "characters ]]> are not allowed in content", this, null );
}
else
{
if ( seenBracket )
{
seenBracketBracket = seenBracket = false;
}
// assert seenTwoBrackets == seenBracket == false;
}
if ( normalizeInput )
{
// deal with normalization issues ...
if ( ch == '\r' )
{
normalizedCR = true;
posEnd = pos - 1;
// posEnd is already set
if ( !usePC )
{
if ( posEnd > posStart )
{
joinPC();
}
else
{
usePC = true;
pcStart = pcEnd = 0;
}
}
// assert usePC == true;
if ( pcEnd >= pc.length )
ensurePC( pcEnd );
pc[pcEnd++] = '\n';
}
else if ( ch == '\n' )
{
// if(!usePC) { joinPC(); } else { if(pcEnd >= pc.length) ensurePC(); }
if ( !normalizedCR && usePC )
{
if ( pcEnd >= pc.length )
ensurePC( pcEnd );
pc[pcEnd++] = '\n';
}
normalizedCR = false;
}
else
{
if ( usePC )
{
if ( pcEnd >= pc.length )
ensurePC( pcEnd );
pc[pcEnd++] = ch;
}
normalizedCR = false;
}
}
ch = more();
}
while ( ch != '<' && ch != '&' );
posEnd = pos - 1;
continue MAIN_LOOP; // skip ch = more() from below - we are already ahead ...
}
ch = more();
} // endless while(true)
}
else
{
if ( seenRoot )
{
return parseEpilog();
}
else
{
return parseProlog();
}
}
}
protected int parseProlog()
throws XmlPullParserException, IOException
{
// [2] prolog: ::= XMLDecl? Misc* (doctypedecl Misc*)? and look for [39] element
char ch;
if ( seenMarkup )
{
ch = buf[pos - 1];
}
else
{
ch = more();
}
if ( eventType == START_DOCUMENT )
{
// bootstrap parsing with getting first character input!
// deal with BOM
// detect BOM and crop it (Unicode int Order Mark)
if ( ch == '\uFFFE' )
{
throw new XmlPullParserException( "first character in input was UNICODE noncharacter (0xFFFE)"
+ "- input requires int swapping", this, null );
}
if ( ch == '\uFEFF' )
{
// skipping UNICODE int Order Mark (so called BOM)
ch = more();
}
}
seenMarkup = false;
boolean gotS = false;
posStart = pos - 1;
final boolean normalizeIgnorableWS = tokenize == true && roundtripSupported == false;
boolean normalizedCR = false;
while ( true )
{
// deal with Misc
// [27] Misc ::= Comment | PI | S
// deal with docdecl --> mark it!
// else parseStartTag seen <[^/]
if ( ch == '<' )
{
if ( gotS && tokenize )
{
posEnd = pos - 1;
seenMarkup = true;
return eventType = IGNORABLE_WHITESPACE;
}
ch = more();
if ( ch == '?' )
{
// check if it is 'xml'
// deal with XMLDecl
boolean isXMLDecl = parsePI();
if ( tokenize )
{
if ( isXMLDecl )
{
return eventType = START_DOCUMENT;
}
return eventType = PROCESSING_INSTRUCTION;
}
}
else if ( ch == '!' )
{
ch = more();
if ( ch == 'D' )
{
if ( seenDocdecl )
{
throw new XmlPullParserException( "only one docdecl allowed in XML document", this, null );
}
seenDocdecl = true;
parseDocdecl();
if ( tokenize )
return eventType = DOCDECL;
}
else if ( ch == '-' )
{
parseComment();
if ( tokenize )
return eventType = COMMENT;
}
else
{
throw new XmlPullParserException( "unexpected markup posStart )
{
joinPC();
}
else
{
usePC = true;
pcStart = pcEnd = 0;
}
}
// assert usePC == true;
if ( pcEnd >= pc.length )
ensurePC( pcEnd );
pc[pcEnd++] = '\n';
}
else if ( ch == '\n' )
{
if ( !normalizedCR && usePC )
{
if ( pcEnd >= pc.length )
ensurePC( pcEnd );
pc[pcEnd++] = '\n';
}
normalizedCR = false;
}
else
{
if ( usePC )
{
if ( pcEnd >= pc.length )
ensurePC( pcEnd );
pc[pcEnd++] = ch;
}
normalizedCR = false;
}
}
}
else
{
throw new XmlPullParserException( "only whitespace content allowed before start tag and not "
+ printable( ch ), this, null );
}
ch = more();
}
}
protected int parseEpilog()
throws XmlPullParserException, IOException
{
if ( eventType == END_DOCUMENT )
{
throw new XmlPullParserException( "already reached end of XML input", this, null );
}
if ( reachedEnd )
{
return eventType = END_DOCUMENT;
}
boolean gotS = false;
final boolean normalizeIgnorableWS = tokenize == true && roundtripSupported == false;
boolean normalizedCR = false;
try
{
// epilog: Misc*
char ch;
if ( seenMarkup )
{
ch = buf[pos - 1];
}
else
{
ch = more();
}
seenMarkup = false;
posStart = pos - 1;
if ( !reachedEnd )
{
while ( true )
{
// deal with Misc
// [27] Misc ::= Comment | PI | S
if ( ch == '<' )
{
if ( gotS && tokenize )
{
posEnd = pos - 1;
seenMarkup = true;
return eventType = IGNORABLE_WHITESPACE;
}
ch = more();
if ( reachedEnd )
{
break;
}
if ( ch == '?' )
{
// check if it is 'xml'
// deal with XMLDecl
parsePI();
if ( tokenize )
return eventType = PROCESSING_INSTRUCTION;
}
else if ( ch == '!' )
{
ch = more();
if ( reachedEnd )
{
break;
}
if ( ch == 'D' )
{
parseDocdecl(); // FIXME
if ( tokenize )
return eventType = DOCDECL;
}
else if ( ch == '-' )
{
parseComment();
if ( tokenize )
return eventType = COMMENT;
}
else
{
throw new XmlPullParserException( "unexpected markup posStart )
{
joinPC();
}
else
{
usePC = true;
pcStart = pcEnd = 0;
}
}
// assert usePC == true;
if ( pcEnd >= pc.length )
ensurePC( pcEnd );
pc[pcEnd++] = '\n';
}
else if ( ch == '\n' )
{
if ( !normalizedCR && usePC )
{
if ( pcEnd >= pc.length )
ensurePC( pcEnd );
pc[pcEnd++] = '\n';
}
normalizedCR = false;
}
else
{
if ( usePC )
{
if ( pcEnd >= pc.length )
ensurePC( pcEnd );
pc[pcEnd++] = ch;
}
normalizedCR = false;
}
}
}
else
{
throw new XmlPullParserException( "in epilog non whitespace content is not allowed but got "
+ printable( ch ), this, null );
}
ch = more();
if ( reachedEnd )
{
break;
}
}
}
// throw Exception("unexpected content in epilog
// catch EOFException return END_DOCUMENT
// try {
}
catch ( EOFException ex )
{
reachedEnd = true;
}
if ( reachedEnd )
{
if ( tokenize && gotS )
{
posEnd = pos; // well - this is LAST available character pos
return eventType = IGNORABLE_WHITESPACE;
}
return eventType = END_DOCUMENT;
}
else
{
throw new XmlPullParserException( "internal error in parseEpilog" );
}
}
public int parseEndTag()
throws XmlPullParserException, IOException
{
// ASSUMPTION ch is past ""
// [42] ETag ::= '' Name S? '>'
char ch = more();
if ( !isNameStartChar( ch ) )
{
throw new XmlPullParserException( "expected name start and not " + printable( ch ), this, null );
}
posStart = pos - 3;
final int nameStart = pos - 1 + bufAbsoluteStart;
do
{
ch = more();
}
while ( isNameChar( ch ) );
// now we go one level down -- do checks
// --depth; //FIXME
// check that end tag name is the same as start tag
// String name = new String(buf, nameStart - bufAbsoluteStart,
// (pos - 1) - (nameStart - bufAbsoluteStart));
// int last = pos - 1;
int off = nameStart - bufAbsoluteStart;
// final int len = last - off;
final int len = ( pos - 1 ) - off;
final char[] cbuf = elRawName[depth];
if ( elRawNameEnd[depth] != len )
{
// construct strings for exception
final String startname = new String( cbuf, 0, elRawNameEnd[depth] );
final String endname = new String( buf, off, len );
throw new XmlPullParserException( "end tag name " + endname + "> must match start tag name <" + startname
+ ">" + " from line " + elRawNameLine[depth], this, null );
}
for ( int i = 0; i < len; i++ )
{
if ( buf[off++] != cbuf[i] )
{
// construct strings for exception
final String startname = new String( cbuf, 0, len );
final String endname = new String( buf, off - i - 1, len );
throw new XmlPullParserException( "end tag name " + endname + "> must be the same as start tag <"
+ startname + ">" + " from line " + elRawNameLine[depth], this, null );
}
}
while ( isS( ch ) )
{
ch = more();
} // skip additional white spaces
if ( ch != '>' )
{
throw new XmlPullParserException( "expected > to finsh end tag not " + printable( ch ) + " from line "
+ elRawNameLine[depth], this, null );
}
// namespaceEnd = elNamespaceCount[ depth ]; //FIXME
posEnd = pos;
pastEndTag = true;
return eventType = END_TAG;
}
public int parseStartTag()
throws XmlPullParserException, IOException
{
// ASSUMPTION ch is past '
// [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
++depth; // FIXME
posStart = pos - 2;
emptyElementTag = false;
attributeCount = 0;
// retrieve name
final int nameStart = pos - 1 + bufAbsoluteStart;
int colonPos = -1;
char ch = buf[pos - 1];
if ( ch == ':' && processNamespaces )
throw new XmlPullParserException( "when namespaces processing enabled colon can not be at element name start",
this, null );
while ( true )
{
ch = more();
if ( !isNameChar( ch ) )
break;
if ( ch == ':' && processNamespaces )
{
if ( colonPos != -1 )
throw new XmlPullParserException( "only one colon is allowed in name of element when namespaces are enabled",
this, null );
colonPos = pos - 1 + bufAbsoluteStart;
}
}
// retrieve name
ensureElementsCapacity();
// TODO check for efficient interning and then use elRawNameInterned!!!!
int elLen = ( pos - 1 ) - ( nameStart - bufAbsoluteStart );
if ( elRawName[depth] == null || elRawName[depth].length < elLen )
{
elRawName[depth] = new char[2 * elLen];
}
System.arraycopy( buf, nameStart - bufAbsoluteStart, elRawName[depth], 0, elLen );
elRawNameEnd[depth] = elLen;
elRawNameLine[depth] = lineNumber;
String name = null;
// work on prefixes and namespace URI
String prefix = null;
if ( processNamespaces )
{
if ( colonPos != -1 )
{
prefix = elPrefix[depth] = newString( buf, nameStart - bufAbsoluteStart, colonPos - nameStart );
name = elName[depth] = newString( buf, colonPos + 1 - bufAbsoluteStart,
// (pos -1) - (colonPos + 1));
pos - 2 - ( colonPos - bufAbsoluteStart ) );
}
else
{
prefix = elPrefix[depth] = null;
name = elName[depth] = newString( buf, nameStart - bufAbsoluteStart, elLen );
}
}
else
{
name = elName[depth] = newString( buf, nameStart - bufAbsoluteStart, elLen );
}
while ( true )
{
while ( isS( ch ) )
{
ch = more();
} // skip additional white spaces
if ( ch == '>' )
{
break;
}
else if ( ch == '/' )
{
if ( emptyElementTag )
throw new XmlPullParserException( "repeated / in tag declaration", this, null );
emptyElementTag = true;
ch = more();
if ( ch != '>' )
throw new XmlPullParserException( "expected > to end empty tag not " + printable( ch ), this,
null );
break;
}
else if ( isNameStartChar( ch ) )
{
ch = parseAttribute();
ch = more();
continue;
}
else
{
throw new XmlPullParserException( "start tag unexpected character " + printable( ch ), this, null );
}
// ch = more(); // skip space
}
// now when namespaces were declared we can resolve them
if ( processNamespaces )
{
String uri = getNamespace( prefix );
if ( uri == null )
{
if ( prefix == null )
{ // no prefix and no uri => use default namespace
uri = NO_NAMESPACE;
}
else
{
throw new XmlPullParserException( "could not determine namespace bound to element prefix " + prefix,
this, null );
}
}
elUri[depth] = uri;
// String uri = getNamespace(prefix);
// if(uri == null && prefix == null) { // no prefix and no uri => use default namespace
// uri = "";
// }
// resolve attribute namespaces
for ( int i = 0; i < attributeCount; i++ )
{
final String attrPrefix = attributePrefix[i];
if ( attrPrefix != null )
{
final String attrUri = getNamespace( attrPrefix );
if ( attrUri == null )
{
throw new XmlPullParserException( "could not determine namespace bound to attribute prefix "
+ attrPrefix, this, null );
}
attributeUri[i] = attrUri;
}
else
{
attributeUri[i] = NO_NAMESPACE;
}
}
// TODO
// [ WFC: Unique Att Spec ]
// check namespaced attribute uniqueness constraint!!!
for ( int i = 1; i < attributeCount; i++ )
{
for ( int j = 0; j < i; j++ )
{
if ( attributeUri[j] == attributeUri[i]
&& ( allStringsInterned && attributeName[j].equals( attributeName[i] )
|| ( !allStringsInterned && attributeNameHash[j] == attributeNameHash[i]
&& attributeName[j].equals( attributeName[i] ) ) )
)
{
// prepare data for nice error message?
String attr1 = attributeName[j];
if ( attributeUri[j] != null )
attr1 = attributeUri[j] + ":" + attr1;
String attr2 = attributeName[i];
if ( attributeUri[i] != null )
attr2 = attributeUri[i] + ":" + attr2;
throw new XmlPullParserException( "duplicated attributes " + attr1 + " and " + attr2, this,
null );
}
}
}
}
else
{ // ! processNamespaces
// [ WFC: Unique Att Spec ]
// check raw attribute uniqueness constraint!!!
for ( int i = 1; i < attributeCount; i++ )
{
for ( int j = 0; j < i; j++ )
{
if ( ( allStringsInterned && attributeName[j].equals( attributeName[i] )
|| ( !allStringsInterned && attributeNameHash[j] == attributeNameHash[i]
&& attributeName[j].equals( attributeName[i] ) ) )
)
{
// prepare data for nice error message?
final String attr1 = attributeName[j];
final String attr2 = attributeName[i];
throw new XmlPullParserException( "duplicated attributes " + attr1 + " and " + attr2, this,
null );
}
}
}
}
elNamespaceCount[depth] = namespaceEnd;
posEnd = pos;
return eventType = START_TAG;
}
protected char parseAttribute()
throws XmlPullParserException, IOException
{
// parse attribute
// [41] Attribute ::= Name Eq AttValue
// [WFC: No External Entity References]
// [WFC: No < in Attribute Values]
final int prevPosStart = posStart + bufAbsoluteStart;
final int nameStart = pos - 1 + bufAbsoluteStart;
int colonPos = -1;
char ch = buf[pos - 1];
if ( ch == ':' && processNamespaces )
throw new XmlPullParserException( "when namespaces processing enabled colon can not be at attribute name start",
this, null );
boolean startsWithXmlns = processNamespaces && ch == 'x';
int xmlnsPos = 0;
ch = more();
while ( isNameChar( ch ) )
{
if ( processNamespaces )
{
if ( startsWithXmlns && xmlnsPos < 5 )
{
++xmlnsPos;
if ( xmlnsPos == 1 )
{
if ( ch != 'm' )
startsWithXmlns = false;
}
else if ( xmlnsPos == 2 )
{
if ( ch != 'l' )
startsWithXmlns = false;
}
else if ( xmlnsPos == 3 )
{
if ( ch != 'n' )
startsWithXmlns = false;
}
else if ( xmlnsPos == 4 )
{
if ( ch != 's' )
startsWithXmlns = false;
}
else if ( xmlnsPos == 5 )
{
if ( ch != ':' )
throw new XmlPullParserException( "after xmlns in attribute name must be colon"
+ "when namespaces are enabled", this, null );
// colonPos = pos - 1 + bufAbsoluteStart;
}
}
if ( ch == ':' )
{
if ( colonPos != -1 )
throw new XmlPullParserException( "only one colon is allowed in attribute name"
+ " when namespaces are enabled", this, null );
colonPos = pos - 1 + bufAbsoluteStart;
}
}
ch = more();
}
ensureAttributesCapacity( attributeCount );
// --- start processing attributes
String name = null;
String prefix = null;
// work on prefixes and namespace URI
if ( processNamespaces )
{
if ( xmlnsPos < 4 )
startsWithXmlns = false;
if ( startsWithXmlns )
{
if ( colonPos != -1 )
{
// prefix = attributePrefix[ attributeCount ] = null;
final int nameLen = pos - 2 - ( colonPos - bufAbsoluteStart );
if ( nameLen == 0 )
{
throw new XmlPullParserException( "namespace prefix is required after xmlns: "
+ " when namespaces are enabled", this, null );
}
name = // attributeName[ attributeCount ] =
newString( buf, colonPos - bufAbsoluteStart + 1, nameLen );
// pos - 1 - (colonPos + 1 - bufAbsoluteStart)
}
}
else
{
if ( colonPos != -1 )
{
int prefixLen = colonPos - nameStart;
prefix =
attributePrefix[attributeCount] = newString( buf, nameStart - bufAbsoluteStart, prefixLen );
// colonPos - (nameStart - bufAbsoluteStart));
int nameLen = pos - 2 - ( colonPos - bufAbsoluteStart );
name = attributeName[attributeCount] = newString( buf, colonPos - bufAbsoluteStart + 1, nameLen );
// pos - 1 - (colonPos + 1 - bufAbsoluteStart));
// name.substring(0, colonPos-nameStart);
}
else
{
prefix = attributePrefix[attributeCount] = null;
name = attributeName[attributeCount] =
newString( buf, nameStart - bufAbsoluteStart, pos - 1 - ( nameStart - bufAbsoluteStart ) );
}
if ( !allStringsInterned )
{
attributeNameHash[attributeCount] = name.hashCode();
}
}
}
else
{
// retrieve name
name = attributeName[attributeCount] =
newString( buf, nameStart - bufAbsoluteStart, pos - 1 - ( nameStart - bufAbsoluteStart ) );
//// assert name != null;
if ( !allStringsInterned )
{
attributeNameHash[attributeCount] = name.hashCode();
}
}
// [25] Eq ::= S? '=' S?
while ( isS( ch ) )
{
ch = more();
} // skip additional spaces
if ( ch != '=' )
throw new XmlPullParserException( "expected = after attribute name", this, null );
ch = more();
while ( isS( ch ) )
{
ch = more();
} // skip additional spaces
// [10] AttValue ::= '"' ([^<&"] | Reference)* '"'
// | "'" ([^<&'] | Reference)* "'"
final char delimit = ch;
if ( delimit != '"' && delimit != '\'' )
throw new XmlPullParserException( "attribute value must start with quotation or apostrophe not "
+ printable( delimit ), this, null );
// parse until delimit or < and resolve Reference
// [67] Reference ::= EntityRef | CharRef
// int valueStart = pos + bufAbsoluteStart;
boolean normalizedCR = false;
usePC = false;
pcStart = pcEnd;
posStart = pos;
while ( true )
{
ch = more();
if ( ch == delimit )
{
break;
}
if ( ch == '<' )
{
throw new XmlPullParserException( "markup not allowed inside attribute value - illegal < ", this,
null );
}
if ( ch == '&' )
{
// extractEntityRef
posEnd = pos - 1;
if ( !usePC )
{
final boolean hadCharData = posEnd > posStart;
if ( hadCharData )
{
// posEnd is already set correctly!!!
joinPC();
}
else
{
usePC = true;
pcStart = pcEnd = 0;
}
}
// assert usePC == true;
final char[] resolvedEntity = parseEntityRef();
// check if replacement text can be resolved !!!
if ( resolvedEntity == null )
{
if ( entityRefName == null )
{
entityRefName = newString( buf, posStart, posEnd - posStart );
}
throw new XmlPullParserException( "could not resolve entity named '" + printable( entityRefName )
+ "'", this, null );
}
// write into PC replacement text - do merge for replacement text!!!!
for ( char aResolvedEntity : resolvedEntity )
{
if ( pcEnd >= pc.length )
{
ensurePC( pcEnd );
}
pc[pcEnd++] = aResolvedEntity;
}
}
else if ( ch == '\t' || ch == '\n' || ch == '\r' )
{
// do attribute value normalization
// as described in http://www.w3.org/TR/REC-xml#AVNormalize
// TODO add test for it form spec ...
// handle EOL normalization ...
if ( !usePC )
{
posEnd = pos - 1;
if ( posEnd > posStart )
{
joinPC();
}
else
{
usePC = true;
pcEnd = pcStart = 0;
}
}
// assert usePC == true;
if ( pcEnd >= pc.length )
ensurePC( pcEnd );
if ( ch != '\n' || !normalizedCR )
{
pc[pcEnd++] = ' '; // '\n';
}
}
else
{
if ( usePC )
{
if ( pcEnd >= pc.length )
ensurePC( pcEnd );
pc[pcEnd++] = ch;
}
}
normalizedCR = ch == '\r';
}
if ( processNamespaces && startsWithXmlns )
{
String ns = null;
if ( !usePC )
{
ns = newStringIntern( buf, posStart, pos - 1 - posStart );
}
else
{
ns = newStringIntern( pc, pcStart, pcEnd - pcStart );
}
ensureNamespacesCapacity( namespaceEnd );
int prefixHash = -1;
if ( colonPos != -1 )
{
if ( ns.length() == 0 )
{
throw new XmlPullParserException( "non-default namespace can not be declared to be empty string",
this, null );
}
// declare new namespace
namespacePrefix[namespaceEnd] = name;
if ( !allStringsInterned )
{
prefixHash = namespacePrefixHash[namespaceEnd] = name.hashCode();
}
}
else
{
// declare new default namespace...
namespacePrefix[namespaceEnd] = null; // ""; //null; //TODO check FIXME Alek
if ( !allStringsInterned )
{
prefixHash = namespacePrefixHash[namespaceEnd] = -1;
}
}
namespaceUri[namespaceEnd] = ns;
// detect duplicate namespace declarations!!!
final int startNs = elNamespaceCount[depth - 1];
for ( int i = namespaceEnd - 1; i >= startNs; --i )
{
if ( ( ( allStringsInterned || name == null ) && namespacePrefix[i] == name ) || ( !allStringsInterned
&& name != null && namespacePrefixHash[i] == prefixHash && name.equals( namespacePrefix[i] ) ) )
{
final String s = name == null ? "default" : "'" + name + "'";
throw new XmlPullParserException( "duplicated namespace declaration for " + s + " prefix", this,
null );
}
}
++namespaceEnd;
}
else
{
if ( !usePC )
{
attributeValue[attributeCount] = new String( buf, posStart, pos - 1 - posStart );
}
else
{
attributeValue[attributeCount] = new String( pc, pcStart, pcEnd - pcStart );
}
++attributeCount;
}
posStart = prevPosStart - bufAbsoluteStart;
return ch;
}
protected char[] charRefOneCharBuf = new char[1];
protected char[] parseEntityRef()
throws XmlPullParserException, IOException
{
// entity reference http://www.w3.org/TR/2000/REC-xml-20001006#NT-Reference
// [67] Reference ::= EntityRef | CharRef
// ASSUMPTION just after &
entityRefName = null;
posStart = pos;
char ch = more();
if ( ch == '#' )
{
// parse character reference
char charRef = 0;
ch = more();
StringBuilder sb = new StringBuilder();
boolean isHex = ( ch == 'x' );
if ( isHex )
{
// encoded in hex
while ( true )
{
ch = more();
if ( ch >= '0' && ch <= '9' )
{
charRef = (char) ( charRef * 16 + ( ch - '0' ) );
sb.append( ch );
}
else if ( ch >= 'a' && ch <= 'f' )
{
charRef = (char) ( charRef * 16 + ( ch - ( 'a' - 10 ) ) );
sb.append( ch );
}
else if ( ch >= 'A' && ch <= 'F' )
{
charRef = (char) ( charRef * 16 + ( ch - ( 'A' - 10 ) ) );
sb.append( ch );
}
else if ( ch == ';' )
{
break;
}
else
{
throw new XmlPullParserException( "character reference (with hex value) may not contain "
+ printable( ch ), this, null );
}
}
}
else
{
// encoded in decimal
while ( true )
{
if ( ch >= '0' && ch <= '9' )
{
charRef = (char) ( charRef * 10 + ( ch - '0' ) );
sb.append( ch );
}
else if ( ch == ';' )
{
break;
}
else
{
throw new XmlPullParserException( "character reference (with decimal value) may not contain "
+ printable( ch ), this, null );
}
ch = more();
}
}
posEnd = pos - 1;
try
{
charRefOneCharBuf = toChars( Integer.parseInt( sb.toString(), isHex ? 16 : 10 ) );
}
catch ( IllegalArgumentException e )
{
throw new XmlPullParserException( "character reference (with " + ( isHex ? "hex" : "decimal" )
+ " value " + sb.toString() + ") is invalid", this, null );
}
if ( tokenize )
{
text = newString( charRefOneCharBuf, 0, charRefOneCharBuf.length );
}
return charRefOneCharBuf;
}
else
{
// [68] EntityRef ::= '&' Name ';'
// scan anem until ;
if ( !isNameStartChar( ch ) )
{
throw new XmlPullParserException( "entity reference names can not start with character '"
+ printable( ch ) + "'", this, null );
}
while ( true )
{
ch = more();
if ( ch == ';' )
{
break;
}
if ( !isNameChar( ch ) )
{
throw new XmlPullParserException( "entity reference name can not contain character "
+ printable( ch ) + "'", this, null );
}
}
posEnd = pos - 1;
// determine what name maps to
final int len = posEnd - posStart;
if ( len == 2 && buf[posStart] == 'l' && buf[posStart + 1] == 't' )
{
if ( tokenize )
{
text = "<";
}
charRefOneCharBuf[0] = '<';
return charRefOneCharBuf;
// if(paramPC || isParserTokenizing) {
// if(pcEnd >= pc.length) ensurePC();
// pc[pcEnd++] = '<';
// }
}
else if ( len == 3 && buf[posStart] == 'a' && buf[posStart + 1] == 'm' && buf[posStart + 2] == 'p' )
{
if ( tokenize )
{
text = "&";
}
charRefOneCharBuf[0] = '&';
return charRefOneCharBuf;
}
else if ( len == 2 && buf[posStart] == 'g' && buf[posStart + 1] == 't' )
{
if ( tokenize )
{
text = ">";
}
charRefOneCharBuf[0] = '>';
return charRefOneCharBuf;
}
else if ( len == 4 && buf[posStart] == 'a' && buf[posStart + 1] == 'p' && buf[posStart + 2] == 'o'
&& buf[posStart + 3] == 's' )
{
if ( tokenize )
{
text = "'";
}
charRefOneCharBuf[0] = '\'';
return charRefOneCharBuf;
}
else if ( len == 4 && buf[posStart] == 'q' && buf[posStart + 1] == 'u' && buf[posStart + 2] == 'o'
&& buf[posStart + 3] == 't' )
{
if ( tokenize )
{
text = "\"";
}
charRefOneCharBuf[0] = '"';
return charRefOneCharBuf;
}
else
{
final char[] result = lookuEntityReplacement( len );
if ( result != null )
{
return result;
}
}
if ( tokenize )
text = null;
return null;
}
}
protected char[] lookuEntityReplacement( int entityNameLen )
throws XmlPullParserException, IOException
{
if ( !allStringsInterned )
{
final int hash = fastHash( buf, posStart, posEnd - posStart );
LOOP: for ( int i = entityEnd - 1; i >= 0; --i )
{
if ( hash == entityNameHash[i] && entityNameLen == entityNameBuf[i].length )
{
final char[] entityBuf = entityNameBuf[i];
for ( int j = 0; j < entityNameLen; j++ )
{
if ( buf[posStart + j] != entityBuf[j] )
continue LOOP;
}
if ( tokenize )
text = entityReplacement[i];
return entityReplacementBuf[i];
}
}
}
else
{
entityRefName = newString( buf, posStart, posEnd - posStart );
for ( int i = entityEnd - 1; i >= 0; --i )
{
// take advantage that interning for newString is enforced
if ( entityRefName == entityName[i] )
{
if ( tokenize )
text = entityReplacement[i];
return entityReplacementBuf[i];
}
}
}
return null;
}
protected void parseComment()
throws XmlPullParserException, IOException
{
// implements XML 1.0 Section 2.5 Comments
// ASSUMPTION: seen
ch = more();
if ( seenDashDash && ch != '>' )
{
throw new XmlPullParserException( "in comment after two dashes (--) next character must be >"
+ " not " + printable( ch ), this, null );
}
if ( ch == '-' )
{
if ( !seenDash )
{
seenDash = true;
}
else
{
seenDashDash = true;
seenDash = false;
}
}
else if ( ch == '>' )
{
if ( seenDashDash )
{
break; // found end sequence!!!!
}
else
{
seenDashDash = false;
}
seenDash = false;
}
else
{
seenDash = false;
}
if ( normalizeIgnorableWS )
{
if ( ch == '\r' )
{
normalizedCR = true;
// posEnd = pos -1;
// joinPC();
// posEnd is alreadys set
if ( !usePC )
{
posEnd = pos - 1;
if ( posEnd > posStart )
{
joinPC();
}
else
{
usePC = true;
pcStart = pcEnd = 0;
}
}
// assert usePC == true;
if ( pcEnd >= pc.length )
ensurePC( pcEnd );
pc[pcEnd++] = '\n';
}
else if ( ch == '\n' )
{
if ( !normalizedCR && usePC )
{
if ( pcEnd >= pc.length )
ensurePC( pcEnd );
pc[pcEnd++] = '\n';
}
normalizedCR = false;
}
else
{
if ( usePC )
{
if ( pcEnd >= pc.length )
ensurePC( pcEnd );
pc[pcEnd++] = ch;
}
normalizedCR = false;
}
}
}
}
catch ( EOFException ex )
{
// detect EOF and create meaningful error ...
throw new XmlPullParserException( "comment started on line " + curLine + " and column " + curColumn
+ " was not closed", this, ex );
}
if ( tokenize )
{
posEnd = pos - 3;
if ( usePC )
{
pcEnd -= 2;
}
}
}
protected boolean parsePI()
throws XmlPullParserException, IOException
{
// implements XML 1.0 Section 2.6 Processing Instructions
// [16] PI ::= '' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
// [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
// ASSUMPTION: seen
if ( tokenize )
posStart = pos;
final int curLine = lineNumber;
final int curColumn = columnNumber;
int piTargetStart = pos;
int piTargetEnd = -1;
final boolean normalizeIgnorableWS = tokenize == true && roundtripSupported == false;
boolean normalizedCR = false;
try
{
boolean seenPITarget = false;
boolean seenQ = false;
char ch = more();
if ( isS( ch ) )
{
throw new XmlPullParserException( "processing instruction PITarget must be exactly after and not white space character",
this, null );
}
while ( true )
{
// scan until it hits ?>
// ch = more();
if ( ch == '?' )
{
if ( !seenPITarget )
{
throw new XmlPullParserException( "processing instruction PITarget name not found", this,
null );
}
seenQ = true;
}
else if ( ch == '>' )
{
if ( seenQ )
{
break; // found end sequence!!!!
}
if ( !seenPITarget )
{
throw new XmlPullParserException( "processing instruction PITarget name not found", this,
null );
}
}
else
{
if ( piTargetEnd == -1 && isS( ch ) )
{
piTargetEnd = pos - 1;
// [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
if ( ( piTargetEnd - piTargetStart ) == 3 )
{
if ( ( buf[piTargetStart] == 'x' || buf[piTargetStart] == 'X' )
&& ( buf[piTargetStart + 1] == 'm' || buf[piTargetStart + 1] == 'M' )
&& ( buf[piTargetStart + 2] == 'l' || buf[piTargetStart + 2] == 'L' ) )
{
if ( piTargetStart > 3 )
{ // posStart )
{
joinPC();
}
else
{
usePC = true;
pcStart = pcEnd = 0;
}
}
// assert usePC == true;
if ( pcEnd >= pc.length )
ensurePC( pcEnd );
pc[pcEnd++] = '\n';
}
else if ( ch == '\n' )
{
if ( !normalizedCR && usePC )
{
if ( pcEnd >= pc.length )
ensurePC( pcEnd );
pc[pcEnd++] = '\n';
}
normalizedCR = false;
}
else
{
if ( usePC )
{
if ( pcEnd >= pc.length )
ensurePC( pcEnd );
pc[pcEnd++] = ch;
}
normalizedCR = false;
}
}
seenPITarget = true;
ch = more();
}
}
catch ( EOFException ex )
{
// detect EOF and create meaningful error ...
throw new XmlPullParserException( "processing instruction started on line " + curLine + " and column "
+ curColumn + " was not closed", this, ex );
}
if ( piTargetEnd == -1 )
{
piTargetEnd = pos - 2 + bufAbsoluteStart;
// throw new XmlPullParserException(
// "processing instruction must have PITarget name", this, null);
}
if ( tokenize )
{
posEnd = pos - 2;
if ( normalizeIgnorableWS )
{
--pcEnd;
}
}
return true;
}
// protected final static char[] VERSION = {'v','e','r','s','i','o','n'};
// protected final static char[] NCODING = {'n','c','o','d','i','n','g'};
// protected final static char[] TANDALONE = {'t','a','n','d','a','l','o','n','e'};
// protected final static char[] YES = {'y','e','s'};
// protected final static char[] NO = {'n','o'};
protected final static char[] VERSION = "version".toCharArray();
protected final static char[] NCODING = "ncoding".toCharArray();
protected final static char[] TANDALONE = "tandalone".toCharArray();
protected final static char[] YES = "yes".toCharArray();
protected final static char[] NO = "no".toCharArray();
protected void parseXmlDecl( char ch )
throws XmlPullParserException, IOException
{
// [23] XMLDecl ::= ''
// first make sure that relative positions will stay OK
preventBufferCompaction = true;
bufStart = 0; // necessary to keep pos unchanged during expansion!
// --- parse VersionInfo
// [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"')
// parse is positioned just on first S past 'z' ) && ( ch < 'A' || ch > 'Z' ) && ( ch < '0' || ch > '9' ) && ch != '_'
&& ch != '.' && ch != ':' && ch != '-' )
{
throw new XmlPullParserException( " 'z' ) && ( ch < 'A' || ch > 'Z' ) )
{
throw new XmlPullParserException( " 'z' ) && ( ch < 'A' || ch > 'Z' ) && ( ch < '0' || ch > '9' ) && ch != '.'
&& ch != '_' && ch != '-' )
{
throw new XmlPullParserException( " as last part of ' )
{
throw new XmlPullParserException( "expected ?> as last part of '
int bracketLevel = 0;
final boolean normalizeIgnorableWS = tokenize == true && roundtripSupported == false;
boolean normalizedCR = false;
while ( true )
{
ch = more();
if ( ch == '[' )
++bracketLevel;
if ( ch == ']' )
--bracketLevel;
if ( ch == '>' && bracketLevel == 0 )
break;
if ( normalizeIgnorableWS )
{
if ( ch == '\r' )
{
normalizedCR = true;
// posEnd = pos -1;
// joinPC();
// posEnd is alreadys set
if ( !usePC )
{
posEnd = pos - 1;
if ( posEnd > posStart )
{
joinPC();
}
else
{
usePC = true;
pcStart = pcEnd = 0;
}
}
// assert usePC == true;
if ( pcEnd >= pc.length )
ensurePC( pcEnd );
pc[pcEnd++] = '\n';
}
else if ( ch == '\n' )
{
if ( !normalizedCR && usePC )
{
if ( pcEnd >= pc.length )
ensurePC( pcEnd );
pc[pcEnd++] = '\n';
}
normalizedCR = false;
}
else
{
if ( usePC )
{
if ( pcEnd >= pc.length )
ensurePC( pcEnd );
pc[pcEnd++] = ch;
}
normalizedCR = false;
}
}
}
posEnd = pos - 1;
}
protected void parseCDSect( boolean hadCharData )
throws XmlPullParserException, IOException
{
// implements XML 1.0 Section 2.7 CDATA Sections
// [18] CDSect ::= CDStart CData CDEnd
// [19] CDStart ::= '' Char*))
// [21] CDEnd ::= ']]>'
// ASSUMPTION: seen posStart )
{
joinPC();
}
else
{
usePC = true;
pcStart = pcEnd = 0;
}
}
}
}
boolean seenBracket = false;
boolean seenBracketBracket = false;
boolean normalizedCR = false;
while ( true )
{
// scan until it hits "]]>"
ch = more();
if ( ch == ']' )
{
if ( !seenBracket )
{
seenBracket = true;
}
else
{
seenBracketBracket = true;
// seenBracket = false;
}
}
else if ( ch == '>' )
{
if ( seenBracket && seenBracketBracket )
{
break; // found end sequence!!!!
}
else
{
seenBracketBracket = false;
}
seenBracket = false;
}
else
{
if ( seenBracket )
{
seenBracket = false;
}
}
if ( normalizeInput )
{
// deal with normalization issues ...
if ( ch == '\r' )
{
normalizedCR = true;
posStart = cdStart - bufAbsoluteStart;
posEnd = pos - 1; // posEnd is alreadys set
if ( !usePC )
{
if ( posEnd > posStart )
{
joinPC();
}
else
{
usePC = true;
pcStart = pcEnd = 0;
}
}
// assert usePC == true;
if ( pcEnd >= pc.length )
ensurePC( pcEnd );
pc[pcEnd++] = '\n';
}
else if ( ch == '\n' )
{
if ( !normalizedCR && usePC )
{
if ( pcEnd >= pc.length )
ensurePC( pcEnd );
pc[pcEnd++] = '\n';
}
normalizedCR = false;
}
else
{
if ( usePC )
{
if ( pcEnd >= pc.length )
ensurePC( pcEnd );
pc[pcEnd++] = ch;
}
normalizedCR = false;
}
}
}
}
catch ( EOFException ex )
{
// detect EOF and create meaningful error ...
throw new XmlPullParserException( "CDATA section started on line " + curLine + " and column " + curColumn
+ " was not closed", this, ex );
}
if ( normalizeInput )
{
if ( usePC )
{
pcEnd = pcEnd - 2;
}
}
posStart = cdStart - bufAbsoluteStart;
posEnd = pos - 3;
}
protected void fillBuf()
throws IOException, XmlPullParserException
{
if ( reader == null )
throw new XmlPullParserException( "reader must be set before parsing is started" );
// see if we are in compaction area
if ( bufEnd > bufSoftLimit )
{
// expand buffer it makes sense!!!!
boolean compact = bufStart > bufSoftLimit;
boolean expand = false;
if ( preventBufferCompaction )
{
compact = false;
expand = true;
}
else if ( !compact )
{
// freeSpace
if ( bufStart < buf.length / 2 )
{
// less then half buffer available for compacting --> expand instead!!!
expand = true;
}
else
{
// at least half of buffer can be reclaimed --> worthwhile effort!!!
compact = true;
}
}
// if buffer almost full then compact it
if ( compact )
{
// TODO: look on trashing
// //assert bufStart > 0
System.arraycopy( buf, bufStart, buf, 0, bufEnd - bufStart );
if ( TRACE_SIZING )
System.out.println( "TRACE_SIZING fillBuf() compacting " + bufStart + " bufEnd=" + bufEnd + " pos="
+ pos + " posStart=" + posStart + " posEnd=" + posEnd + " buf first 100 chars:"
+ new String( buf, bufStart, bufEnd - bufStart < 100 ? bufEnd - bufStart : 100 ) );
}
else if ( expand )
{
final int newSize = 2 * buf.length;
final char newBuf[] = new char[newSize];
if ( TRACE_SIZING )
System.out.println( "TRACE_SIZING fillBuf() " + buf.length + " => " + newSize );
System.arraycopy( buf, bufStart, newBuf, 0, bufEnd - bufStart );
buf = newBuf;
if ( bufLoadFactor > 0 )
{
bufSoftLimit = ( bufLoadFactor * buf.length ) / 100;
}
}
else
{
throw new XmlPullParserException( "internal error in fillBuffer()" );
}
bufEnd -= bufStart;
pos -= bufStart;
posStart -= bufStart;
posEnd -= bufStart;
bufAbsoluteStart += bufStart;
bufStart = 0;
if ( TRACE_SIZING )
System.out.println( "TRACE_SIZING fillBuf() after bufEnd=" + bufEnd + " pos=" + pos + " posStart="
+ posStart + " posEnd=" + posEnd + " buf first 100 chars:"
+ new String( buf, 0, bufEnd < 100 ? bufEnd : 100 ) );
}
// at least one character must be read or error
final int len = buf.length - bufEnd > READ_CHUNK_SIZE ? READ_CHUNK_SIZE : buf.length - bufEnd;
final int ret = reader.read( buf, bufEnd, len );
if ( ret > 0 )
{
bufEnd += ret;
if ( TRACE_SIZING )
System.out.println( "TRACE_SIZING fillBuf() after filling in buffer" + " buf first 100 chars:"
+ new String( buf, 0, bufEnd < 100 ? bufEnd : 100 ) );
return;
}
if ( ret == -1 )
{
if ( bufAbsoluteStart == 0 && pos == 0 )
{
throw new EOFException( "input contained no data" );
}
else
{
if ( seenRoot && depth == 0 )
{ // inside parsing epilog!!!
reachedEnd = true;
return;
}
else
{
StringBuilder expectedTagStack = new StringBuilder();
if ( depth > 0 )
{
if ( elRawName == null || elRawName[depth] == null )
{
String tagName = new String( buf, posStart + 1, pos - posStart - 1 );
expectedTagStack.append( " - expected the opening tag <" ).append( tagName ).append( "...>" );
}
else
{
// final char[] cbuf = elRawName[depth];
// final String startname = new String(cbuf, 0, elRawNameEnd[depth]);
expectedTagStack.append( " - expected end tag" );
if ( depth > 1 )
{
expectedTagStack.append( "s" ); // more than one end tag
}
expectedTagStack.append( " " );
for ( int i = depth; i > 0; i-- )
{
if ( elRawName == null || elRawName[i] == null )
{
String tagName = new String( buf, posStart + 1, pos - posStart - 1 );
expectedTagStack.append( " - expected the opening tag <" ).append( tagName ).append( "...>" );
}
else
{
String tagName = new String( elRawName[i], 0, elRawNameEnd[i] );
expectedTagStack.append( "" ).append( tagName ).append( '>' );
}
}
expectedTagStack.append( " to close" );
for ( int i = depth; i > 0; i-- )
{
if ( i != depth )
{
expectedTagStack.append( " and" ); // more than one end tag
}
if ( elRawName == null || elRawName[i] == null )
{
String tagName = new String( buf, posStart + 1, pos - posStart - 1 );
expectedTagStack.append( " start tag <" ).append( tagName ).append( ">" );
expectedTagStack.append( " from line " ).append( elRawNameLine[i] );
}
else
{
String tagName = new String( elRawName[i], 0, elRawNameEnd[i] );
expectedTagStack.append( " start tag <" ).append( tagName ).append( ">" );
expectedTagStack.append( " from line " ).append( elRawNameLine[i] );
}
}
expectedTagStack.append( ", parser stopped on" );
}
}
throw new EOFException( "no more data available" + expectedTagStack.toString()
+ getPositionDescription() );
}
}
}
else
{
throw new IOException( "error reading input, returned " + ret );
}
}
protected char more()
throws IOException, XmlPullParserException
{
if ( pos >= bufEnd )
{
fillBuf();
// this return value should be ignored as it is used in epilog parsing ...
if ( reachedEnd )
return (char) -1;
}
final char ch = buf[pos++];
// line/columnNumber
if ( ch == '\n' )
{
++lineNumber;
columnNumber = 1;
}
else
{
++columnNumber;
}
// System.out.print(ch);
return ch;
}
// /**
// * This function returns position of parser in XML input stream
// * (how many characters were processed.
// * NOTE: this logical position and not byte offset as encodings
// * such as UTF8 may use more than one byte to encode one character.
// */
// public int getCurrentInputPosition() {
// return pos + bufAbsoluteStart;
// }
protected void ensurePC( int end )
{
// assert end >= pc.length;
final int newSize = end > READ_CHUNK_SIZE ? 2 * end : 2 * READ_CHUNK_SIZE;
final char[] newPC = new char[newSize];
if ( TRACE_SIZING )
System.out.println( "TRACE_SIZING ensurePC() " + pc.length + " ==> " + newSize + " end=" + end );
System.arraycopy( pc, 0, newPC, 0, pcEnd );
pc = newPC;
// assert end < pc.length;
}
protected void joinPC()
{
// assert usePC == false;
// assert posEnd > posStart;
final int len = posEnd - posStart;
final int newEnd = pcEnd + len + 1;
if ( newEnd >= pc.length )
ensurePC( newEnd ); // add 1 for extra space for one char
// assert newEnd < pc.length;
System.arraycopy( buf, posStart, pc, pcEnd, len );
pcEnd += len;
usePC = true;
}
protected char requireInput( char ch, char[] input )
throws XmlPullParserException, IOException
{
for ( char anInput : input )
{
if ( ch != anInput )
{
throw new XmlPullParserException( "expected " + printable( anInput ) + " in " + new String( input )
+ " and not " + printable( ch ), this, null );
}
ch = more();
}
return ch;
}
protected char requireNextS()
throws XmlPullParserException, IOException
{
final char ch = more();
if ( !isS( ch ) )
{
throw new XmlPullParserException( "white space is required and not " + printable( ch ), this, null );
}
return skipS( ch );
}
protected char skipS( char ch )
throws XmlPullParserException, IOException
{
while ( isS( ch ) )
{
ch = more();
} // skip additional spaces
return ch;
}
// nameStart / name lookup tables based on XML 1.1 http://www.w3.org/TR/2001/WD-xml11-20011213/
protected static final int LOOKUP_MAX = 0x400;
protected static final char LOOKUP_MAX_CHAR = (char) LOOKUP_MAX;
// protected static int lookupNameStartChar[] = new int[ LOOKUP_MAX_CHAR / 32 ];
// protected static int lookupNameChar[] = new int[ LOOKUP_MAX_CHAR / 32 ];
protected static boolean lookupNameStartChar[] = new boolean[LOOKUP_MAX];
protected static boolean lookupNameChar[] = new boolean[LOOKUP_MAX];
private static final void setName( char ch )
// { lookupNameChar[ (int)ch / 32 ] |= (1 << (ch % 32)); }
{
lookupNameChar[ch] = true;
}
private static final void setNameStart( char ch )
// { lookupNameStartChar[ (int)ch / 32 ] |= (1 << (ch % 32)); setName(ch); }
{
lookupNameStartChar[ch] = true;
setName( ch );
}
static
{
setNameStart( ':' );
for ( char ch = 'A'; ch <= 'Z'; ++ch )
setNameStart( ch );
setNameStart( '_' );
for ( char ch = 'a'; ch <= 'z'; ++ch )
setNameStart( ch );
for ( char ch = '\u00c0'; ch <= '\u02FF'; ++ch )
setNameStart( ch );
for ( char ch = '\u0370'; ch <= '\u037d'; ++ch )
setNameStart( ch );
for ( char ch = '\u037f'; ch < '\u0400'; ++ch )
setNameStart( ch );
setName( '-' );
setName( '.' );
for ( char ch = '0'; ch <= '9'; ++ch )
setName( ch );
setName( '\u00b7' );
for ( char ch = '\u0300'; ch <= '\u036f'; ++ch )
setName( ch );
}
// private final static boolean isNameStartChar(char ch) {
protected boolean isNameStartChar( char ch )
{
return ( ch < LOOKUP_MAX_CHAR && lookupNameStartChar[ch] ) || ( ch >= LOOKUP_MAX_CHAR && ch <= '\u2027' )
|| ( ch >= '\u202A' && ch <= '\u218F' ) || ( ch >= '\u2800' && ch <= '\uFFEF' );
// if(ch < LOOKUP_MAX_CHAR) return lookupNameStartChar[ ch ];
// else return ch <= '\u2027'
// || (ch >= '\u202A' && ch <= '\u218F')
// || (ch >= '\u2800' && ch <= '\uFFEF')
// ;
// return false;
// return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == ':'
// || (ch >= '0' && ch <= '9');
// if(ch < LOOKUP_MAX_CHAR) return (lookupNameStartChar[ (int)ch / 32 ] & (1 << (ch % 32))) != 0;
// if(ch <= '\u2027') return true;
// //[#x202A-#x218F]
// if(ch < '\u202A') return false;
// if(ch <= '\u218F') return true;
// // added parts [#x2800-#xD7FF] | [#xE000-#xFDCF] | [#xFDE0-#xFFEF] | [#x10000-#x10FFFF]
// if(ch < '\u2800') return false;
// if(ch <= '\uFFEF') return true;
// return false;
// else return (supportXml11 && ( (ch < '\u2027') || (ch > '\u2029' && ch < '\u2200') ...
}
// private final static boolean isNameChar(char ch) {
protected boolean isNameChar( char ch )
{
// return isNameStartChar(ch);
// if(ch < LOOKUP_MAX_CHAR) return (lookupNameChar[ (int)ch / 32 ] & (1 << (ch % 32))) != 0;
return ( ch < LOOKUP_MAX_CHAR && lookupNameChar[ch] ) || ( ch >= LOOKUP_MAX_CHAR && ch <= '\u2027' )
|| ( ch >= '\u202A' && ch <= '\u218F' ) || ( ch >= '\u2800' && ch <= '\uFFEF' );
// return false;
// return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == ':'
// || (ch >= '0' && ch <= '9');
// if(ch < LOOKUP_MAX_CHAR) return (lookupNameStartChar[ (int)ch / 32 ] & (1 << (ch % 32))) != 0;
// else return
// else if(ch <= '\u2027') return true;
// //[#x202A-#x218F]
// else if(ch < '\u202A') return false;
// else if(ch <= '\u218F') return true;
// // added parts [#x2800-#xD7FF] | [#xE000-#xFDCF] | [#xFDE0-#xFFEF] | [#x10000-#x10FFFF]
// else if(ch < '\u2800') return false;
// else if(ch <= '\uFFEF') return true;
// else return false;
}
protected boolean isS( char ch )
{
return ( ch == ' ' || ch == '\n' || ch == '\r' || ch == '\t' );
// || (supportXml11 && (ch == '\u0085' || ch == '\u2028');
}
// protected boolean isChar(char ch) { return (ch < '\uD800' || ch > '\uDFFF')
// ch != '\u0000' ch < '\uFFFE'
// protected char printable(char ch) { return ch; }
protected String printable( char ch )
{
if ( ch == '\n' )
{
return "\\n";
}
else if ( ch == '\r' )
{
return "\\r";
}
else if ( ch == '\t' )
{
return "\\t";
}
else if ( ch == '\'' )
{
return "\\'";
}
if ( ch > 127 || ch < 32 )
{
return "\\u" + Integer.toHexString( (int) ch );
}
return "" + ch;
}
protected String printable( String s )
{
if ( s == null )
return null;
final int sLen = s.length();
StringBuilder buf = new StringBuilder( sLen + 10 );
for ( int i = 0; i < sLen; ++i )
{
buf.append( printable( s.charAt( i ) ) );
}
s = buf.toString();
return s;
}
//
// Imported code from ASF Harmony project rev 770909
// http://svn.apache.org/repos/asf/harmony/enhanced/classlib/trunk/modules/luni/src/main/java/java/lang/Character.java
//
private static int toCodePoint( char high, char low )
{
// See RFC 2781, Section 2.2
// http://www.faqs.org/rfcs/rfc2781.html
int h = ( high & 0x3FF ) << 10;
int l = low & 0x3FF;
return ( h | l ) + 0x10000;
}
private static final char MIN_HIGH_SURROGATE = '\uD800';
private static final char MAX_HIGH_SURROGATE = '\uDBFF';
private static boolean isHighSurrogate( char ch )
{
return ( MIN_HIGH_SURROGATE <= ch && MAX_HIGH_SURROGATE >= ch );
}
private static final int MAX_CODE_POINT = 0x10FFFF;
private static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x10000;
/**
* Check if the provided parameter is a valid Char, according to: {@link https://www.w3.org/TR/REC-xml/#NT-Char}
*
* @param codePoint the numeric value to check
* @return true if it is a valid numeric character reference. False otherwise.
*/
private static boolean isValidCodePoint( int codePoint )
{
// Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
return codePoint == 0x9 || codePoint == 0xA || codePoint == 0xD || ( 0x20 <= codePoint && codePoint <= 0xD7FF )
|| ( 0xE000 <= codePoint && codePoint <= 0xFFFD ) || ( 0x10000 <= codePoint && codePoint <= 0x10FFFF );
}
private static boolean isSupplementaryCodePoint( int codePoint )
{
return ( MIN_SUPPLEMENTARY_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint );
}
/**
* TODO add javadoc
*
* @param codePoint
* @return
*/
public static char[] toChars( int codePoint )
{
if ( !isValidCodePoint( codePoint ) )
{
throw new IllegalArgumentException();
}
if ( isSupplementaryCodePoint( codePoint ) )
{
int cpPrime = codePoint - 0x10000;
int high = 0xD800 | ( ( cpPrime >> 10 ) & 0x3FF );
int low = 0xDC00 | ( cpPrime & 0x3FF );
return new char[] { (char) high, (char) low };
}
return new char[] { (char) codePoint };
}
}
/*
* Indiana University Extreme! Lab Software License, Version 1.2 Copyright (C) 2003 The Trustees of Indiana University.
* All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met: 1) All redistributions of source code must retain the above copyright
* notice, the list of authors in the original source code, this list of conditions and the disclaimer listed in this
* license; 2) All redistributions in binary form must reproduce the above copyright notice, this list of conditions and
* the disclaimer listed in this license in the documentation and/or other materials provided with the distribution; 3)
* Any documentation included with all redistributions must include the following acknowledgement: "This product
* includes software developed by the Indiana University Extreme! Lab. For further information please visit
* http://www.extreme.indiana.edu/" Alternatively, this acknowledgment may appear in the software itself, and wherever
* such third-party acknowledgments normally appear. 4) The name "Indiana University" or "Indiana University Extreme!
* Lab" shall not be used to endorse or promote products derived from this software without prior written permission
* from Indiana University. For written permission, please contact http://www.extreme.indiana.edu/. 5) Products derived
* from this software may not use "Indiana University" name nor may "Indiana University" appear in their name, without
* prior written permission of the Indiana University. Indiana University provides no reassurances that the source code
* provided does not infringe the patent or any other intellectual property rights of any other entity. Indiana
* University disclaims any liability to any recipient for claims brought by any other entity based on infringement of
* intellectual property rights or otherwise. LICENSEE UNDERSTANDS THAT SOFTWARE IS PROVIDED "AS IS" FOR WHICH NO
* WARRANTIES AS TO CAPABILITIES OR ACCURACY ARE MADE. INDIANA UNIVERSITY GIVES NO WARRANTIES AND MAKES NO
* REPRESENTATION THAT SOFTWARE IS FREE OF INFRINGEMENT OF THIRD PARTY PATENT, COPYRIGHT, OR OTHER PROPRIETARY RIGHTS.
* INDIANA UNIVERSITY MAKES NO WARRANTIES THAT SOFTWARE IS FREE FROM "BUGS", "VIRUSES", "TROJAN HORSES", "TRAP
* DOORS", "WORMS", OR OTHER HARMFUL CODE. LICENSEE ASSUMES THE ENTIRE RISK AS TO THE PERFORMANCE OF SOFTWARE AND/OR
* ASSOCIATED MATERIALS, AND TO THE PERFORMANCE AND VALIDITY OF INFORMATION GENERATED USING SOFTWARE.
*/