All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.codehaus.plexus.util.xml.pull.MXParser Maven / Gradle / Ivy

There is a newer version: 5.17.0
Show newest version
/* -*-             c-basic-offset: 4; indent-tabs-mode: nil; -*-  //------100-columns-wide------>|*/
/*
 * Copyright (c) 2003 Extreme! Lab, Indiana University. All rights reserved.
 *
 * This software is open source. See the bottom of this file for the licence.
 *
 * $Id$
 */

package org.codehaus.plexus.util.xml.pull;

import java.io.EOFException;
import java.io.IOException;
import java.io.Reader;
import java.io.UnsupportedEncodingException;

import org.codehaus.plexus.util.ReaderFactory;

//import java.util.Hashtable;

//TODO best handling of interning issues
//   have isAllNewStringInterned ???

//TODO handling surrogate pairs: http://www.unicode.org/unicode/faq/utf_bom.html#6

//TODO review code for use of bufAbsoluteStart when keeping pos between next()/fillBuf()

/**
 * Absolutely minimal implementation of XMLPULL V1 API. Encoding handling done with XmlReader
 *
 * @see org.codehaus.plexus.util.xml.XmlReader
 * @author Aleksander Slominski
 */

public class MXParser
    implements XmlPullParser
{
    //NOTE: no interning of those strings --> by Java leng spec they MUST be already interned
    protected final static String XML_URI = "http://www.w3.org/XML/1998/namespace";
    protected final static String XMLNS_URI = "http://www.w3.org/2000/xmlns/";
    protected final static String FEATURE_XML_ROUNDTRIP=
        //"http://xmlpull.org/v1/doc/features.html#xml-roundtrip";
        "http://xmlpull.org/v1/doc/features.html#xml-roundtrip";
    protected final static String FEATURE_NAMES_INTERNED =
        "http://xmlpull.org/v1/doc/features.html#names-interned";
    protected final static String PROPERTY_XMLDECL_VERSION =
        "http://xmlpull.org/v1/doc/properties.html#xmldecl-version";
    protected final static String PROPERTY_XMLDECL_STANDALONE =
        "http://xmlpull.org/v1/doc/properties.html#xmldecl-standalone";
    protected final static String PROPERTY_XMLDECL_CONTENT =
        "http://xmlpull.org/v1/doc/properties.html#xmldecl-content";
    protected final static String PROPERTY_LOCATION =
        "http://xmlpull.org/v1/doc/properties.html#location";


    /**
     * Implementation notice:
     * the is instance variable that controls if newString() is interning.
     * 

NOTE: newStringIntern always returns interned strings * and newString MAY return interned String depending on this variable. *

NOTE: by default in this minimal implementation it is false! */ protected boolean allStringsInterned; protected void resetStringCache() { //System.out.println("resetStringCache() minimum called"); } protected String newString(char[] cbuf, int off, int len) { return new String(cbuf, off, len); } protected String newStringIntern(char[] cbuf, int off, int len) { return (new String(cbuf, off, len)).intern(); } private static final boolean TRACE_SIZING = false; // NOTE: features are not resetable and typically defaults to false ... protected boolean processNamespaces; protected boolean roundtripSupported; // global parser state protected String location; protected int lineNumber; protected int columnNumber; protected boolean seenRoot; protected boolean reachedEnd; protected int eventType; protected boolean emptyElementTag; // element stack protected int depth; protected char[] elRawName[]; protected int elRawNameEnd[]; protected int elRawNameLine[]; protected String elName[]; protected String elPrefix[]; protected String elUri[]; //protected String elValue[]; protected int elNamespaceCount[]; /** * Make sure that we have enough space to keep element stack if passed size. * It will always create one additional slot then current depth */ protected void ensureElementsCapacity() { final int elStackSize = elName != null ? elName.length : 0; if( (depth + 1) >= elStackSize) { // we add at least one extra slot ... final int newSize = (depth >= 7 ? 2 * depth : 8) + 2; // = lucky 7 + 1 //25 if(TRACE_SIZING) { System.err.println("TRACE_SIZING elStackSize "+elStackSize+" ==> "+newSize); } final boolean needsCopying = elStackSize > 0; String[] arr = null; // resue arr local variable slot arr = new String[newSize]; if(needsCopying) System.arraycopy(elName, 0, arr, 0, elStackSize); elName = arr; arr = new String[newSize]; if(needsCopying) System.arraycopy(elPrefix, 0, arr, 0, elStackSize); elPrefix = arr; arr = new String[newSize]; if(needsCopying) System.arraycopy(elUri, 0, arr, 0, elStackSize); elUri = arr; int[] iarr = new int[newSize]; if(needsCopying) { System.arraycopy(elNamespaceCount, 0, iarr, 0, elStackSize); } else { // special initialization iarr[0] = 0; } elNamespaceCount = iarr; //TODO: avoid using element raw name ... iarr = new int[newSize]; if(needsCopying) { System.arraycopy(elRawNameEnd, 0, iarr, 0, elStackSize); } elRawNameEnd = iarr; iarr = new int[newSize]; if(needsCopying) { System.arraycopy(elRawNameLine, 0, iarr, 0, elStackSize); } elRawNameLine = iarr; final char[][] carr = new char[newSize][]; if(needsCopying) { System.arraycopy(elRawName, 0, carr, 0, elStackSize); } elRawName = carr; // arr = new String[newSize]; // if(needsCopying) System.arraycopy(elLocalName, 0, arr, 0, elStackSize); // elLocalName = arr; // arr = new String[newSize]; // if(needsCopying) System.arraycopy(elDefaultNs, 0, arr, 0, elStackSize); // elDefaultNs = arr; // int[] iarr = new int[newSize]; // if(needsCopying) System.arraycopy(elNsStackPos, 0, iarr, 0, elStackSize); // for (int i = elStackSize; i < iarr.length; i++) // { // iarr[i] = (i > 0) ? -1 : 0; // } // elNsStackPos = iarr; //assert depth < elName.length; } } // attribute stack protected int attributeCount; protected String attributeName[]; protected int attributeNameHash[]; //protected int attributeNameStart[]; //protected int attributeNameEnd[]; protected String attributePrefix[]; protected String attributeUri[]; protected String attributeValue[]; //protected int attributeValueStart[]; //protected int attributeValueEnd[]; /** * Make sure that in attributes temporary array is enough space. */ protected void ensureAttributesCapacity(int size) { final int attrPosSize = attributeName != null ? attributeName.length : 0; if(size >= attrPosSize) { final int newSize = size > 7 ? 2 * size : 8; // = lucky 7 + 1 //25 if(TRACE_SIZING) { System.err.println("TRACE_SIZING attrPosSize "+attrPosSize+" ==> "+newSize); } final boolean needsCopying = attrPosSize > 0; String[] arr = null; arr = new String[newSize]; if(needsCopying) System.arraycopy(attributeName, 0, arr, 0, attrPosSize); attributeName = arr; arr = new String[newSize]; if(needsCopying) System.arraycopy(attributePrefix, 0, arr, 0, attrPosSize); attributePrefix = arr; arr = new String[newSize]; if(needsCopying) System.arraycopy(attributeUri, 0, arr, 0, attrPosSize); attributeUri = arr; arr = new String[newSize]; if(needsCopying) System.arraycopy(attributeValue, 0, arr, 0, attrPosSize); attributeValue = arr; if( ! allStringsInterned ) { final int[] iarr = new int[newSize]; if(needsCopying) System.arraycopy(attributeNameHash, 0, iarr, 0, attrPosSize); attributeNameHash = iarr; } arr = null; // //assert attrUri.length > size } } // namespace stack protected int namespaceEnd; protected String namespacePrefix[]; protected int namespacePrefixHash[]; protected String namespaceUri[]; protected void ensureNamespacesCapacity(int size) { final int namespaceSize = namespacePrefix != null ? namespacePrefix.length : 0; if(size >= namespaceSize) { final int newSize = size > 7 ? 2 * size : 8; // = lucky 7 + 1 //25 if(TRACE_SIZING) { System.err.println("TRACE_SIZING namespaceSize "+namespaceSize+" ==> "+newSize); } final String[] newNamespacePrefix = new String[newSize]; final String[] newNamespaceUri = new String[newSize]; if(namespacePrefix != null) { System.arraycopy( namespacePrefix, 0, newNamespacePrefix, 0, namespaceEnd); System.arraycopy( namespaceUri, 0, newNamespaceUri, 0, namespaceEnd); } namespacePrefix = newNamespacePrefix; namespaceUri = newNamespaceUri; if( ! allStringsInterned ) { final int[] newNamespacePrefixHash = new int[newSize]; if(namespacePrefixHash != null) { System.arraycopy( namespacePrefixHash, 0, newNamespacePrefixHash, 0, namespaceEnd); } namespacePrefixHash = newNamespacePrefixHash; } //prefixesSize = newSize; // //assert nsPrefixes.length > size && nsPrefixes.length == newSize } } /** * simplistic implementation of hash function that has constant * time to compute - so it also means diminishing hash quality for long strings * but for XML parsing it should be good enough ... */ protected static final int fastHash( char ch[], int off, int len ) { if(len == 0) return 0; //assert len >0 int hash = ch[off]; // hash at beginning //try { hash = (hash << 7) + ch[ off + len - 1 ]; // hash at the end //} catch(ArrayIndexOutOfBoundsException aie) { // aie.printStackTrace(); //should never happen ... // throw new RuntimeException("this is violation of pre-condition"); //} if(len > 16) hash = (hash << 7) + ch[ off + (len / 4)]; // 1/4 from beginning if(len > 8) hash = (hash << 7) + ch[ off + (len / 2)]; // 1/2 of string size ... // notice that hash is at most done 3 times <<7 so shifted by 21 bits 8 bit value // so max result == 29 bits so it is quite just below 31 bits for long (2^32) ... //assert hash >= 0; return hash; } // entity replacement stack protected int entityEnd; protected String entityName[]; protected char[] entityNameBuf[]; protected String entityReplacement[]; protected char[] entityReplacementBuf[]; protected int entityNameHash[]; private final EntityReplacementMap replacementMapTemplate; protected void ensureEntityCapacity() { final int entitySize = entityReplacementBuf != null ? entityReplacementBuf.length : 0; if(entityEnd >= entitySize) { final int newSize = entityEnd > 7 ? 2 * entityEnd : 8; // = lucky 7 + 1 //25 if(TRACE_SIZING) { System.err.println("TRACE_SIZING entitySize "+entitySize+" ==> "+newSize); } final String[] newEntityName = new String[newSize]; final char[] newEntityNameBuf[] = new char[newSize][]; final String[] newEntityReplacement = new String[newSize]; final char[] newEntityReplacementBuf[] = new char[newSize][]; if(entityName != null) { System.arraycopy(entityName, 0, newEntityName, 0, entityEnd); System.arraycopy(entityNameBuf, 0, newEntityNameBuf, 0, entityEnd); System.arraycopy(entityReplacement, 0, newEntityReplacement, 0, entityEnd); System.arraycopy(entityReplacementBuf, 0, newEntityReplacementBuf, 0, entityEnd); } entityName = newEntityName; entityNameBuf = newEntityNameBuf; entityReplacement = newEntityReplacement; entityReplacementBuf = newEntityReplacementBuf; if( ! allStringsInterned ) { final int[] newEntityNameHash = new int[newSize]; if(entityNameHash != null) { System.arraycopy(entityNameHash, 0, newEntityNameHash, 0, entityEnd); } entityNameHash = newEntityNameHash; } } } // input buffer management protected static final int READ_CHUNK_SIZE = 8*1024; //max data chars in one read() call protected Reader reader; protected String inputEncoding; protected int bufLoadFactor = 95; // 99% //protected int bufHardLimit; // only matters when expanding protected char buf[] = new char[ Runtime.getRuntime().freeMemory() > 1000000L ? READ_CHUNK_SIZE : 256 ]; protected int bufSoftLimit = ( bufLoadFactor * buf.length ) /100; // desirable size of buffer protected boolean preventBufferCompaction; protected int bufAbsoluteStart; // this is buf protected int bufStart; protected int bufEnd; protected int pos; protected int posStart; protected int posEnd; protected char pc[] = new char[ Runtime.getRuntime().freeMemory() > 1000000L ? READ_CHUNK_SIZE : 64 ]; protected int pcStart; protected int pcEnd; // parsing state //protected boolean needsMore; //protected boolean seenMarkup; protected boolean usePC; protected boolean seenStartTag; protected boolean seenEndTag; protected boolean pastEndTag; protected boolean seenAmpersand; protected boolean seenMarkup; protected boolean seenDocdecl; // transient variable set during each call to next/Token() protected boolean tokenize; protected String text; protected String entityRefName; protected String xmlDeclVersion; protected Boolean xmlDeclStandalone; protected String xmlDeclContent; protected void reset() { //System.out.println("reset() called"); location = null; lineNumber = 1; columnNumber = 0; seenRoot = false; reachedEnd = false; eventType = START_DOCUMENT; emptyElementTag = false; depth = 0; attributeCount = 0; namespaceEnd = 0; entityEnd = 0; setupFromTemplate(); reader = null; inputEncoding = null; preventBufferCompaction = false; bufAbsoluteStart = 0; bufEnd = bufStart = 0; pos = posStart = posEnd = 0; pcEnd = pcStart = 0; usePC = false; seenStartTag = false; seenEndTag = false; pastEndTag = false; seenAmpersand = false; seenMarkup = false; seenDocdecl = false; xmlDeclVersion = null; xmlDeclStandalone = null; xmlDeclContent = null; resetStringCache(); } public MXParser() { replacementMapTemplate = null; } public MXParser(EntityReplacementMap entityReplacementMap){ this.replacementMapTemplate = entityReplacementMap; } public void setupFromTemplate() { if ( replacementMapTemplate != null){ int length = replacementMapTemplate.entityEnd; // This is a bit cheeky, since the EntityReplacementMap contains exact-sized arrays, // and elements are always added to the array, we can use the array from the template. // Kids; dont do this at home. entityName = replacementMapTemplate.entityName; entityNameBuf = replacementMapTemplate.entityNameBuf; entityReplacement = replacementMapTemplate.entityReplacement; entityReplacementBuf = replacementMapTemplate.entityReplacementBuf; entityNameHash = replacementMapTemplate.entityNameHash; entityEnd = length; } } /** * Method setFeature * * @param name a String * @param state a boolean * * @throws XmlPullParserException * */ public void setFeature(String name, boolean state) throws XmlPullParserException { if(name == null) throw new IllegalArgumentException("feature name should not be null"); if(FEATURE_PROCESS_NAMESPACES.equals(name)) { if(eventType != START_DOCUMENT) throw new XmlPullParserException( "namespace processing feature can only be changed before parsing", this, null); processNamespaces = state; // } else if(FEATURE_REPORT_NAMESPACE_ATTRIBUTES.equals(name)) { // if(type != START_DOCUMENT) throw new XmlPullParserException( // "namespace reporting feature can only be changed before parsing", this, null); // reportNsAttribs = state; } else if(FEATURE_NAMES_INTERNED.equals(name)) { if(state != false) { throw new XmlPullParserException( "interning names in this implementation is not supported"); } } else if(FEATURE_PROCESS_DOCDECL.equals(name)) { if(state != false) { throw new XmlPullParserException( "processing DOCDECL is not supported"); } //} else if(REPORT_DOCDECL.equals(name)) { // paramNotifyDoctype = state; } else if(FEATURE_XML_ROUNDTRIP.equals(name)) { //if(state == false) { // throw new XmlPullParserException( // "roundtrip feature can not be switched off"); //} roundtripSupported = state; } else { throw new XmlPullParserException("unsupporte feature "+name); } } /** Unknown properties are always returned as false */ public boolean getFeature(String name) { if(name == null) throw new IllegalArgumentException("feature name should not be nulll"); if(FEATURE_PROCESS_NAMESPACES.equals(name)) { return processNamespaces; // } else if(FEATURE_REPORT_NAMESPACE_ATTRIBUTES.equals(name)) { // return reportNsAttribs; } else if(FEATURE_NAMES_INTERNED.equals(name)) { return false; } else if(FEATURE_PROCESS_DOCDECL.equals(name)) { return false; //} else if(REPORT_DOCDECL.equals(name)) { // return paramNotifyDoctype; } else if(FEATURE_XML_ROUNDTRIP.equals(name)) { //return true; return roundtripSupported; } return false; } public void setProperty(String name, Object value) throws XmlPullParserException { if(PROPERTY_LOCATION.equals(name)) { location = (String) value; } else { throw new XmlPullParserException("unsupported property: '"+name+"'"); } } public Object getProperty(String name) { if(name == null) throw new IllegalArgumentException("property name should not be nulll"); if(PROPERTY_XMLDECL_VERSION.equals(name)) { return xmlDeclVersion; } else if(PROPERTY_XMLDECL_STANDALONE.equals(name)) { return xmlDeclStandalone; } else if(PROPERTY_XMLDECL_CONTENT.equals(name)) { return xmlDeclContent; } else if(PROPERTY_LOCATION.equals(name)) { return location; } return null; } public void setInput(Reader in) throws XmlPullParserException { reset(); reader = in; } public void setInput(java.io.InputStream inputStream, String inputEncoding) throws XmlPullParserException { if(inputStream == null) { throw new IllegalArgumentException("input stream can not be null"); } Reader reader; try { if(inputEncoding != null) { reader = ReaderFactory.newReader(inputStream, inputEncoding); } else { reader = ReaderFactory.newXmlReader(inputStream); } } catch (UnsupportedEncodingException une) { throw new XmlPullParserException( "could not create reader for encoding "+inputEncoding+" : "+une, this, une); } catch ( IOException e ) { throw new XmlPullParserException( "could not create reader : "+e, this, e); } setInput(reader); //must be here as reset() was called in setInput() and has set this.inputEncoding to null ... this.inputEncoding = inputEncoding; } public String getInputEncoding() { return inputEncoding; } public void defineEntityReplacementText(String entityName, String replacementText) throws XmlPullParserException { // throw new XmlPullParserException("not allowed"); if ( !replacementText.startsWith( "&#" ) && this.entityName != null && replacementText.length() > 1 ) { String tmp = replacementText.substring( 1, replacementText.length() - 1 ); for ( int i = 0; i < this.entityName.length; i++ ) { if ( this.entityName[i] != null && this.entityName[i].equals( tmp ) ) { replacementText = this.entityReplacement[i]; } } } //protected char[] entityReplacement[]; ensureEntityCapacity(); // this is to make sure that if interning works we will take advantage of it ... char[] entityNameCharData = entityName.toCharArray(); this.entityName[entityEnd] = newString( entityNameCharData, 0, entityName.length()); entityNameBuf[entityEnd] = entityNameCharData; entityReplacement[entityEnd] = replacementText; entityReplacementBuf[entityEnd] = replacementText.toCharArray(); if(!allStringsInterned) { entityNameHash[ entityEnd ] = fastHash(entityNameBuf[entityEnd], 0, entityNameBuf[entityEnd].length); } ++entityEnd; //TODO disallow < or & in entity replacement text (or ]]>???) // TOOD keepEntityNormalizedForAttributeValue cached as well ... } public int getNamespaceCount(int depth) throws XmlPullParserException { if(processNamespaces == false || depth == 0) { return 0; } //int maxDepth = eventType == END_TAG ? this.depth + 1 : this.depth; //if(depth < 0 || depth > maxDepth) throw new IllegalArgumentException( if(depth < 0 || depth > this.depth) throw new IllegalArgumentException( "napespace count mayt be for depth 0.."+this.depth+" not "+depth); return elNamespaceCount[ depth ]; } public String getNamespacePrefix(int pos) throws XmlPullParserException { //int end = eventType == END_TAG ? elNamespaceCount[ depth + 1 ] : namespaceEnd; //if(pos < end) { if(pos < namespaceEnd) { return namespacePrefix[ pos ]; } else { throw new XmlPullParserException( "position "+pos+" exceeded number of available namespaces "+namespaceEnd); } } public String getNamespaceUri(int pos) throws XmlPullParserException { //int end = eventType == END_TAG ? elNamespaceCount[ depth + 1 ] : namespaceEnd; //if(pos < end) { if(pos < namespaceEnd) { return namespaceUri[ pos ]; } else { throw new XmlPullParserException( "position "+pos+" exceedded number of available namespaces "+namespaceEnd); } } public String getNamespace( String prefix ) //throws XmlPullParserException { //int count = namespaceCount[ depth ]; if(prefix != null) { for( int i = namespaceEnd -1; i >= 0; i--) { if( prefix.equals( namespacePrefix[ i ] ) ) { return namespaceUri[ i ]; } } if("xml".equals( prefix )) { return XML_URI; } else if("xmlns".equals( prefix )) { return XMLNS_URI; } } else { for( int i = namespaceEnd -1; i >= 0; i--) { if( namespacePrefix[ i ] == null) { //"") { //null ) { //TODO check FIXME Alek return namespaceUri[ i ]; } } } return null; } public int getDepth() { return depth; } private static int findFragment(int bufMinPos, char[] b, int start, int end) { //System.err.println("bufStart="+bufStart+" b="+printable(new String(b, start, end - start))+" start="+start+" end="+end); if(start < bufMinPos) { start = bufMinPos; if(start > end) start = end; return start; } if(end - start > 65) { start = end - 10; // try to find good location } int i = start + 1; while(--i > bufMinPos) { if((end - i) > 65) break; final char c = b[i]; if(c == '<' && (start - i) > 10) break; } return i; } /** * Return string describing current position of parsers as * text 'STATE [seen %s...] @line:column'. */ public String getPositionDescription () { String fragment = null; if(posStart <= pos) { final int start = findFragment(0, buf, posStart, pos); //System.err.println("start="+start); if(start < pos) { fragment = new String(buf, start, pos - start); } if(bufAbsoluteStart > 0 || start > 0) fragment = "..." + fragment; } // return " at line "+tokenizerPosRow // +" and column "+(tokenizerPosCol-1) // +(fragment != null ? " seen "+printable(fragment)+"..." : ""); return " "+TYPES[ eventType ] + (fragment != null ? " seen "+printable(fragment)+"..." : "") +" "+(location != null ? location : "") +"@"+getLineNumber()+":"+getColumnNumber(); } public int getLineNumber() { return lineNumber; } public int getColumnNumber() { return columnNumber; } public boolean isWhitespace() throws XmlPullParserException { if(eventType == TEXT || eventType == CDSECT) { if(usePC) { for (int i = pcStart; i = 0; i--) { // if( prefix.equals( namespacePrefix[ i ] ) ) { // return namespaceUri[ i ]; // } // } // } else { // for( int i = namespaceEnd -1; i >= 0; i--) { // if( namespacePrefix[ i ] == null ) { // return namespaceUri[ i ]; // } // } // // } // return ""; } public String getName() { if(eventType == START_TAG) { //return elName[ depth - 1 ] ; return elName[ depth ] ; } else if(eventType == END_TAG) { return elName[ depth ] ; } else if(eventType == ENTITY_REF) { if(entityRefName == null) { entityRefName = newString(buf, posStart, posEnd - posStart); } return entityRefName; } else { return null; } } public String getPrefix() { if(eventType == START_TAG) { //return elPrefix[ depth - 1 ] ; return elPrefix[ depth ] ; } else if(eventType == END_TAG) { return elPrefix[ depth ] ; } return null; // if(eventType != START_TAG && eventType != END_TAG) return null; // int maxDepth = eventType == END_TAG ? depth : depth - 1; // return elPrefix[ maxDepth ]; } public boolean isEmptyElementTag() throws XmlPullParserException { if(eventType != START_TAG) throw new XmlPullParserException( "parser must be on START_TAG to check for empty element", this, null); return emptyElementTag; } public int getAttributeCount() { if(eventType != START_TAG) return -1; return attributeCount; } public String getAttributeNamespace(int index) { if(eventType != START_TAG) throw new IndexOutOfBoundsException( "only START_TAG can have attributes"); if(processNamespaces == false) return NO_NAMESPACE; if(index < 0 || index >= attributeCount) throw new IndexOutOfBoundsException( "attribute position must be 0.."+(attributeCount-1)+" and not "+index); return attributeUri[ index ]; } public String getAttributeName(int index) { if(eventType != START_TAG) throw new IndexOutOfBoundsException( "only START_TAG can have attributes"); if(index < 0 || index >= attributeCount) throw new IndexOutOfBoundsException( "attribute position must be 0.."+(attributeCount-1)+" and not "+index); return attributeName[ index ]; } public String getAttributePrefix(int index) { if(eventType != START_TAG) throw new IndexOutOfBoundsException( "only START_TAG can have attributes"); if(processNamespaces == false) return null; if(index < 0 || index >= attributeCount) throw new IndexOutOfBoundsException( "attribute position must be 0.."+(attributeCount-1)+" and not "+index); return attributePrefix[ index ]; } public String getAttributeType(int index) { if(eventType != START_TAG) throw new IndexOutOfBoundsException( "only START_TAG can have attributes"); if(index < 0 || index >= attributeCount) throw new IndexOutOfBoundsException( "attribute position must be 0.."+(attributeCount-1)+" and not "+index); return "CDATA"; } public boolean isAttributeDefault(int index) { if(eventType != START_TAG) throw new IndexOutOfBoundsException( "only START_TAG can have attributes"); if(index < 0 || index >= attributeCount) throw new IndexOutOfBoundsException( "attribute position must be 0.."+(attributeCount-1)+" and not "+index); return false; } public String getAttributeValue(int index) { if(eventType != START_TAG) throw new IndexOutOfBoundsException( "only START_TAG can have attributes"); if(index < 0 || index >= attributeCount) throw new IndexOutOfBoundsException( "attribute position must be 0.."+(attributeCount-1)+" and not "+index); return attributeValue[ index ]; } public String getAttributeValue(String namespace, String name) { if(eventType != START_TAG) throw new IndexOutOfBoundsException( "only START_TAG can have attributes"+getPositionDescription()); if(name == null) { throw new IllegalArgumentException("attribute name can not be null"); } // TODO make check if namespace is interned!!! etc. for names!!! if(processNamespaces) { if(namespace == null) { namespace = ""; } for(int i = 0; i < attributeCount; ++i) { if((namespace == attributeUri[ i ] || namespace.equals(attributeUri[ i ]) ) //(namespace != null && namespace.equals(attributeUri[ i ])) // taking advantage of String.intern() && name.equals(attributeName[ i ]) ) { return attributeValue[i]; } } } else { if(namespace != null && namespace.length() == 0) { namespace = null; } if(namespace != null) throw new IllegalArgumentException( "when namespaces processing is disabled attribute namespace must be null"); for(int i = 0; i < attributeCount; ++i) { if(name.equals(attributeName[i])) { return attributeValue[i]; } } } return null; } public int getEventType() throws XmlPullParserException { return eventType; } public void require(int type, String namespace, String name) throws XmlPullParserException, IOException { if(processNamespaces == false && namespace != null) { throw new XmlPullParserException( "processing namespaces must be enabled on parser (or factory)"+ " to have possible namespaces delcared on elements" +(" (postion:"+ getPositionDescription())+")"); } if (type != getEventType() || (namespace != null && !namespace.equals (getNamespace())) || (name != null && !name.equals (getName ())) ) { throw new XmlPullParserException ( "expected event "+TYPES[ type ] +(name != null ? " with name '"+name+"'" : "") +(namespace != null && name != null ? " and" : "") +(namespace != null ? " with namespace '"+namespace+"'" : "") +" but got" +(type != getEventType() ? " "+TYPES[ getEventType() ] : "") +(name != null && getName() != null && !name.equals (getName ()) ? " name '"+getName()+"'" : "") +(namespace != null && name != null && getName() != null && !name.equals (getName ()) && getNamespace() != null && !namespace.equals (getNamespace()) ? " and" : "") +(namespace != null && getNamespace() != null && !namespace.equals (getNamespace()) ? " namespace '"+getNamespace()+"'" : "") +(" (postion:"+ getPositionDescription())+")"); } } /** * Skip sub tree that is currently porser positioned on. *
NOTE: parser must be on START_TAG and when funtion returns * parser will be positioned on corresponding END_TAG */ public void skipSubTree() throws XmlPullParserException, IOException { require(START_TAG, null, null); int level = 1; while(level > 0) { int eventType = next(); if(eventType == END_TAG) { --level; } else if(eventType == START_TAG) { ++level; } } } // public String readText() throws XmlPullParserException, IOException // { // if (getEventType() != TEXT) return ""; // String result = getText(); // next(); // return result; // } public String nextText() throws XmlPullParserException, IOException { // String result = null; // boolean onStartTag = false; // if(eventType == START_TAG) { // onStartTag = true; // next(); // } // if(eventType == TEXT) { // result = getText(); // next(); // } else if(onStartTag && eventType == END_TAG) { // result = ""; // } else { // throw new XmlPullParserException( // "parser must be on START_TAG or TEXT to read text", this, null); // } // if(eventType != END_TAG) { // throw new XmlPullParserException( // "event TEXT it must be immediately followed by END_TAG", this, null); // } // return result; if(getEventType() != START_TAG) { throw new XmlPullParserException( "parser must be on START_TAG to read next text", this, null); } int eventType = next(); if(eventType == TEXT) { final String result = getText(); eventType = next(); if(eventType != END_TAG) { throw new XmlPullParserException( "TEXT must be immediately followed by END_TAG and not " +TYPES[ getEventType() ], this, null); } return result; } else if(eventType == END_TAG) { return ""; } else { throw new XmlPullParserException( "parser must be on START_TAG or TEXT to read text", this, null); } } public int nextTag() throws XmlPullParserException, IOException { next(); if(eventType == TEXT && isWhitespace()) { // skip whitespace next(); } if (eventType != START_TAG && eventType != END_TAG) { throw new XmlPullParserException("expected START_TAG or END_TAG not " +TYPES[ getEventType() ], this, null); } return eventType; } public int next() throws XmlPullParserException, IOException { tokenize = false; return nextImpl(); } public int nextToken() throws XmlPullParserException, IOException { tokenize = true; return nextImpl(); } protected int nextImpl() throws XmlPullParserException, IOException { text = null; pcEnd = pcStart = 0; usePC = false; bufStart = posEnd; if(pastEndTag) { pastEndTag = false; --depth; namespaceEnd = elNamespaceCount[ depth ]; // less namespaces available } if(emptyElementTag) { emptyElementTag = false; pastEndTag = true; return eventType = END_TAG; } // [1] document ::= prolog element Misc* if(depth > 0) { if(seenStartTag) { seenStartTag = false; return eventType = parseStartTag(); } if(seenEndTag) { seenEndTag = false; return eventType = parseEndTag(); } // ASSUMPTION: we are _on_ first character of content or markup!!!! // [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* char ch; if(seenMarkup) { // we have read ahead ... seenMarkup = false; ch = '<'; } else if(seenAmpersand) { seenAmpersand = false; ch = '&'; } else { ch = more(); } posStart = pos - 1; // VERY IMPORTANT: this is correct start of event!!! // when true there is some potential event TEXT to return - keep gathering boolean hadCharData = false; // when true TEXT data is not continuous (like ) and requires PC merging boolean needsMerging = false; MAIN_LOOP: while(true) { // work on MARKUP if(ch == '<') { if(hadCharData) { //posEnd = pos - 1; if(tokenize) { seenMarkup = true; return eventType = TEXT; } } ch = more(); if(ch == '/') { if(!tokenize && hadCharData) { seenEndTag = true; //posEnd = pos - 2; return eventType = TEXT; } return eventType = parseEndTag(); } else if(ch == '!') { ch = more(); if(ch == '-') { // note: if(tokenize == false) posStart/End is NOT changed!!!! parseComment(); if(tokenize) return eventType = COMMENT; if( !usePC && hadCharData ) { needsMerging = true; } else { posStart = pos; //completely ignore comment } } else if(ch == '[') { //posEnd = pos - 3; // must remember previous posStart/End as it merges with content of CDATA //int oldStart = posStart + bufAbsoluteStart; //int oldEnd = posEnd + bufAbsoluteStart; parseCDSect(hadCharData); if(tokenize) return eventType = CDSECT; final int cdStart = posStart; final int cdEnd = posEnd; final int cdLen = cdEnd - cdStart; if(cdLen > 0) { // was there anything inside CDATA section? hadCharData = true; if(!usePC) { needsMerging = true; } } // posStart = oldStart; // posEnd = oldEnd; // if(cdLen > 0) { // was there anything inside CDATA section? // if(hadCharData) { // // do merging if there was anything in CDSect!!!! // // if(!usePC) { // // // posEnd is correct already!!! // // if(posEnd > posStart) { // // joinPC(); // // } else { // // usePC = true; // // pcStart = pcEnd = 0; // // } // // } // // if(pcEnd + cdLen >= pc.length) ensurePC(pcEnd + cdLen); // // // copy [cdStart..cdEnd) into PC // // System.arraycopy(buf, cdStart, pc, pcEnd, cdLen); // // pcEnd += cdLen; // if(!usePC) { // needsMerging = true; // posStart = cdStart; // posEnd = cdEnd; // } // } else { // if(!usePC) { // needsMerging = true; // posStart = cdStart; // posEnd = cdEnd; // hadCharData = true; // } // } // //hadCharData = true; // } else { // if( !usePC && hadCharData ) { // needsMerging = true; // } // } } else { throw new XmlPullParserException( "unexpected character in markup "+printable(ch), this, null); } } else if(ch == '?') { parsePI(); if(tokenize) return eventType = PROCESSING_INSTRUCTION; if( !usePC && hadCharData ) { needsMerging = true; } else { posStart = pos; //completely ignore PI } } else if( isNameStartChar(ch) ) { if(!tokenize && hadCharData) { seenStartTag = true; //posEnd = pos - 2; return eventType = TEXT; } return eventType = parseStartTag(); } else { throw new XmlPullParserException( "unexpected character in markup "+printable(ch), this, null); } // do content comapctation if it makes sense!!!! } else if(ch == '&') { // work on ENTITTY //posEnd = pos - 1; if(tokenize && hadCharData) { seenAmpersand = true; return eventType = TEXT; } final int oldStart = posStart + bufAbsoluteStart; final int oldEnd = posEnd + bufAbsoluteStart; final char[] resolvedEntity = parseEntityRef(); if(tokenize) return eventType = ENTITY_REF; // check if replacement text can be resolved !!! if(resolvedEntity == null) { if(entityRefName == null) { entityRefName = newString(buf, posStart, posEnd - posStart); } throw new XmlPullParserException( "could not resolve entity named '"+printable(entityRefName)+"'", this, null); } //int entStart = posStart; //int entEnd = posEnd; posStart = oldStart - bufAbsoluteStart; posEnd = oldEnd - bufAbsoluteStart; if(!usePC) { if(hadCharData) { joinPC(); // posEnd is already set correctly!!! needsMerging = false; } else { usePC = true; pcStart = pcEnd = 0; } } //assert usePC == true; // write into PC replacement text - do merge for replacement text!!!! for ( char aResolvedEntity : resolvedEntity ) { if ( pcEnd >= pc.length ) { ensurePC( pcEnd ); } pc[pcEnd++] = aResolvedEntity; } hadCharData = true; //assert needsMerging == false; } else { if(needsMerging) { //assert usePC == false; joinPC(); // posEnd is already set correctly!!! //posStart = pos - 1; needsMerging = false; } //no MARKUP not ENTITIES so work on character data ... // [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) hadCharData = true; boolean normalizedCR = false; final boolean normalizeInput = tokenize == false || roundtripSupported == false; // use loop locality here!!!! boolean seenBracket = false; boolean seenBracketBracket = false; do { // check that ]]> does not show in if(ch == ']') { if(seenBracket) { seenBracketBracket = true; } else { seenBracket = true; } } else if(seenBracketBracket && ch == '>') { throw new XmlPullParserException( "characters ]]> are not allowed in content", this, null); } else { if(seenBracket) { seenBracketBracket = seenBracket = false; } // assert seenTwoBrackets == seenBracket == false; } if(normalizeInput) { // deal with normalization issues ... if(ch == '\r') { normalizedCR = true; posEnd = pos -1; // posEnd is already set if(!usePC) { if(posEnd > posStart) { joinPC(); } else { usePC = true; pcStart = pcEnd = 0; } } //assert usePC == true; if(pcEnd >= pc.length) ensurePC(pcEnd); pc[pcEnd++] = '\n'; } else if(ch == '\n') { // if(!usePC) { joinPC(); } else { if(pcEnd >= pc.length) ensurePC(); } if(!normalizedCR && usePC) { if(pcEnd >= pc.length) ensurePC(pcEnd); pc[pcEnd++] = '\n'; } normalizedCR = false; } else { if(usePC) { if(pcEnd >= pc.length) ensurePC(pcEnd); pc[pcEnd++] = ch; } normalizedCR = false; } } ch = more(); } while(ch != '<' && ch != '&'); posEnd = pos - 1; continue MAIN_LOOP; // skip ch = more() from below - we are already ahead ... } ch = more(); } // endless while(true) } else { if(seenRoot) { return parseEpilog(); } else { return parseProlog(); } } } protected int parseProlog() throws XmlPullParserException, IOException { // [2] prolog: ::= XMLDecl? Misc* (doctypedecl Misc*)? and look for [39] element char ch; if(seenMarkup) { ch = buf[ pos - 1 ]; } else { ch = more(); } if(eventType == START_DOCUMENT) { // bootstrap parsing with getting first character input! // deal with BOM // detect BOM and crop it (Unicode int Order Mark) if(ch == '\uFFFE') { throw new XmlPullParserException( "first character in input was UNICODE noncharacter (0xFFFE)"+ "- input requires int swapping", this, null); } if(ch == '\uFEFF') { // skipping UNICODE int Order Mark (so called BOM) ch = more(); } } seenMarkup = false; boolean gotS = false; posStart = pos - 1; final boolean normalizeIgnorableWS = tokenize == true && roundtripSupported == false; boolean normalizedCR = false; while(true) { // deal with Misc // [27] Misc ::= Comment | PI | S // deal with docdecl --> mark it! // else parseStartTag seen <[^/] if(ch == '<') { if(gotS && tokenize) { posEnd = pos - 1; seenMarkup = true; return eventType = IGNORABLE_WHITESPACE; } ch = more(); if(ch == '?') { // check if it is 'xml' // deal with XMLDecl boolean isXMLDecl = parsePI(); if(tokenize) { if (isXMLDecl) { return eventType = START_DOCUMENT; } return eventType = PROCESSING_INSTRUCTION; } } else if(ch == '!') { ch = more(); if(ch == 'D') { if(seenDocdecl) { throw new XmlPullParserException( "only one docdecl allowed in XML document", this, null); } seenDocdecl = true; parseDocdecl(); if(tokenize) return eventType = DOCDECL; } else if(ch == '-') { parseComment(); if(tokenize) return eventType = COMMENT; } else { throw new XmlPullParserException( "unexpected markup posStart) { joinPC(); } else { usePC = true; pcStart = pcEnd = 0; } } //assert usePC == true; if(pcEnd >= pc.length) ensurePC(pcEnd); pc[pcEnd++] = '\n'; } else if(ch == '\n') { if(!normalizedCR && usePC) { if(pcEnd >= pc.length) ensurePC(pcEnd); pc[pcEnd++] = '\n'; } normalizedCR = false; } else { if(usePC) { if(pcEnd >= pc.length) ensurePC(pcEnd); pc[pcEnd++] = ch; } normalizedCR = false; } } } else { throw new XmlPullParserException( "only whitespace content allowed before start tag and not "+printable(ch), this, null); } ch = more(); } } protected int parseEpilog() throws XmlPullParserException, IOException { if(eventType == END_DOCUMENT) { throw new XmlPullParserException("already reached end of XML input", this, null); } if(reachedEnd) { return eventType = END_DOCUMENT; } boolean gotS = false; final boolean normalizeIgnorableWS = tokenize == true && roundtripSupported == false; boolean normalizedCR = false; try { // epilog: Misc* char ch; if(seenMarkup) { ch = buf[ pos - 1 ]; } else { ch = more(); } seenMarkup = false; posStart = pos - 1; if(!reachedEnd) { while(true) { // deal with Misc // [27] Misc ::= Comment | PI | S if(ch == '<') { if(gotS && tokenize) { posEnd = pos - 1; seenMarkup = true; return eventType = IGNORABLE_WHITESPACE; } ch = more(); if(reachedEnd) { break; } if(ch == '?') { // check if it is 'xml' // deal with XMLDecl parsePI(); if(tokenize) return eventType = PROCESSING_INSTRUCTION; } else if(ch == '!') { ch = more(); if(reachedEnd) { break; } if(ch == 'D') { parseDocdecl(); //FIXME if(tokenize) return eventType = DOCDECL; } else if(ch == '-') { parseComment(); if(tokenize) return eventType = COMMENT; } else { throw new XmlPullParserException( "unexpected markup posStart) { joinPC(); } else { usePC = true; pcStart = pcEnd = 0; } } //assert usePC == true; if(pcEnd >= pc.length) ensurePC(pcEnd); pc[pcEnd++] = '\n'; } else if(ch == '\n') { if(!normalizedCR && usePC) { if(pcEnd >= pc.length) ensurePC(pcEnd); pc[pcEnd++] = '\n'; } normalizedCR = false; } else { if(usePC) { if(pcEnd >= pc.length) ensurePC(pcEnd); pc[pcEnd++] = ch; } normalizedCR = false; } } } else { throw new XmlPullParserException( "in epilog non whitespace content is not allowed but got "+printable(ch), this, null); } ch = more(); if(reachedEnd) { break; } } } // throw Exception("unexpected content in epilog // catch EOFException return END_DOCUMENT //try { } catch(EOFException ex) { reachedEnd = true; } if(reachedEnd) { if(tokenize && gotS) { posEnd = pos; // well - this is LAST available character pos return eventType = IGNORABLE_WHITESPACE; } return eventType = END_DOCUMENT; } else { throw new XmlPullParserException("internal error in parseEpilog"); } } public int parseEndTag() throws XmlPullParserException, IOException { //ASSUMPTION ch is past "' char ch = more(); if(!isNameStartChar(ch)) { throw new XmlPullParserException( "expected name start and not "+printable(ch), this, null); } posStart = pos - 3; final int nameStart = pos - 1 + bufAbsoluteStart; do { ch = more(); } while(isNameChar(ch)); // now we go one level down -- do checks //--depth; //FIXME // check that end tag name is the same as start tag //String name = new String(buf, nameStart - bufAbsoluteStart, // (pos - 1) - (nameStart - bufAbsoluteStart)); //int last = pos - 1; int off = nameStart - bufAbsoluteStart; //final int len = last - off; final int len = (pos - 1) - off; final char[] cbuf = elRawName[depth]; if(elRawNameEnd[depth] != len) { // construct strings for exception final String startname = new String(cbuf, 0, elRawNameEnd[depth]); final String endname = new String(buf, off, len); throw new XmlPullParserException( "end tag name must match start tag name <"+startname+">" +" from line "+elRawNameLine[depth], this, null); } for (int i = 0; i < len; i++) { if(buf[off++] != cbuf[i]) { // construct strings for exception final String startname = new String(cbuf, 0, len); final String endname = new String(buf, off - i - 1, len); throw new XmlPullParserException( "end tag name must be the same as start tag <"+startname+">" +" from line "+elRawNameLine[depth], this, null); } } while(isS(ch)) { ch = more(); } // skip additional white spaces if(ch != '>') { throw new XmlPullParserException( "expected > to finsh end tag not "+printable(ch) +" from line "+elRawNameLine[depth], this, null); } //namespaceEnd = elNamespaceCount[ depth ]; //FIXME posEnd = pos; pastEndTag = true; return eventType = END_TAG; } public int parseStartTag() throws XmlPullParserException, IOException { //ASSUMPTION ch is past ' // [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' ++depth; //FIXME posStart = pos - 2; emptyElementTag = false; attributeCount = 0; // retrieve name final int nameStart = pos - 1 + bufAbsoluteStart; int colonPos = -1; char ch = buf[ pos - 1]; if(ch == ':' && processNamespaces) throw new XmlPullParserException( "when namespaces processing enabled colon can not be at element name start", this, null); while(true) { ch = more(); if(!isNameChar(ch)) break; if(ch == ':' && processNamespaces) { if(colonPos != -1) throw new XmlPullParserException( "only one colon is allowed in name of element when namespaces are enabled", this, null); colonPos = pos - 1 + bufAbsoluteStart; } } // retrieve name ensureElementsCapacity(); //TODO check for efficient interning and then use elRawNameInterned!!!! int elLen = (pos - 1) - (nameStart - bufAbsoluteStart); if(elRawName[ depth ] == null || elRawName[ depth ].length < elLen) { elRawName[ depth ] = new char[ 2 * elLen ]; } System.arraycopy(buf, nameStart - bufAbsoluteStart, elRawName[ depth ], 0, elLen); elRawNameEnd[ depth ] = elLen; elRawNameLine[ depth ] = lineNumber; String name = null; // work on prefixes and namespace URI String prefix = null; if(processNamespaces) { if(colonPos != -1) { prefix = elPrefix[ depth ] = newString(buf, nameStart - bufAbsoluteStart, colonPos - nameStart); name = elName[ depth ] = newString(buf, colonPos + 1 - bufAbsoluteStart, //(pos -1) - (colonPos + 1)); pos - 2 - (colonPos - bufAbsoluteStart)); } else { prefix = elPrefix[ depth ] = null; name = elName[ depth ] = newString(buf, nameStart - bufAbsoluteStart, elLen); } } else { name = elName[ depth ] = newString(buf, nameStart - bufAbsoluteStart, elLen); } while(true) { while(isS(ch)) { ch = more(); } // skip additional white spaces if(ch == '>') { break; } else if(ch == '/') { if(emptyElementTag) throw new XmlPullParserException( "repeated / in tag declaration", this, null); emptyElementTag = true; ch = more(); if(ch != '>') throw new XmlPullParserException( "expected > to end empty tag not "+printable(ch), this, null); break; } else if(isNameStartChar(ch)) { ch = parseAttribute(); ch = more(); continue; } else { throw new XmlPullParserException( "start tag unexpected character "+printable(ch), this, null); } //ch = more(); // skip space } // now when namespaces were declared we can resolve them if(processNamespaces) { String uri = getNamespace(prefix); if(uri == null) { if(prefix == null) { // no prefix and no uri => use default namespace uri = NO_NAMESPACE; } else { throw new XmlPullParserException( "could not determine namespace bound to element prefix "+prefix, this, null); } } elUri[ depth ] = uri; //String uri = getNamespace(prefix); //if(uri == null && prefix == null) { // no prefix and no uri => use default namespace // uri = ""; //} // resolve attribute namespaces for (int i = 0; i < attributeCount; i++) { final String attrPrefix = attributePrefix[ i ]; if(attrPrefix != null) { final String attrUri = getNamespace(attrPrefix); if(attrUri == null) { throw new XmlPullParserException( "could not determine namespace bound to attribute prefix "+attrPrefix, this, null); } attributeUri[ i ] = attrUri; } else { attributeUri[ i ] = NO_NAMESPACE; } } //TODO //[ WFC: Unique Att Spec ] // check namespaced attribute uniqueness contraint!!! for (int i = 1; i < attributeCount; i++) { for (int j = 0; j < i; j++) { if( attributeUri[j] == attributeUri[i] && (allStringsInterned && attributeName[j].equals(attributeName[i]) || (!allStringsInterned && attributeNameHash[ j ] == attributeNameHash[ i ] && attributeName[j].equals(attributeName[i])) ) ) { // prepare data for nice error messgae? String attr1 = attributeName[j]; if(attributeUri[j] != null) attr1 = attributeUri[j]+":"+attr1; String attr2 = attributeName[i]; if(attributeUri[i] != null) attr2 = attributeUri[i]+":"+attr2; throw new XmlPullParserException( "duplicated attributes "+attr1+" and "+attr2, this, null); } } } } else { // ! processNamespaces //[ WFC: Unique Att Spec ] // check raw attribute uniqueness contraint!!! for (int i = 1; i < attributeCount; i++) { for (int j = 0; j < i; j++) { if((allStringsInterned && attributeName[j].equals(attributeName[i]) || (!allStringsInterned && attributeNameHash[ j ] == attributeNameHash[ i ] && attributeName[j].equals(attributeName[i])) ) ) { // prepare data for nice error messgae? final String attr1 = attributeName[j]; final String attr2 = attributeName[i]; throw new XmlPullParserException( "duplicated attributes "+attr1+" and "+attr2, this, null); } } } } elNamespaceCount[ depth ] = namespaceEnd; posEnd = pos; return eventType = START_TAG; } protected char parseAttribute() throws XmlPullParserException, IOException { // parse attribute // [41] Attribute ::= Name Eq AttValue // [WFC: No External Entity References] // [WFC: No < in Attribute Values] final int prevPosStart = posStart + bufAbsoluteStart; final int nameStart = pos - 1 + bufAbsoluteStart; int colonPos = -1; char ch = buf[ pos - 1 ]; if(ch == ':' && processNamespaces) throw new XmlPullParserException( "when namespaces processing enabled colon can not be at attribute name start", this, null); boolean startsWithXmlns = processNamespaces && ch == 'x'; int xmlnsPos = 0; ch = more(); while(isNameChar(ch)) { if(processNamespaces) { if(startsWithXmlns && xmlnsPos < 5) { ++xmlnsPos; if(xmlnsPos == 1) { if(ch != 'm') startsWithXmlns = false; } else if(xmlnsPos == 2) { if(ch != 'l') startsWithXmlns = false; } else if(xmlnsPos == 3) { if(ch != 'n') startsWithXmlns = false; } else if(xmlnsPos == 4) { if(ch != 's') startsWithXmlns = false; } else if(xmlnsPos == 5) { if(ch != ':') throw new XmlPullParserException( "after xmlns in attribute name must be colon" +"when namespaces are enabled", this, null); //colonPos = pos - 1 + bufAbsoluteStart; } } if(ch == ':') { if(colonPos != -1) throw new XmlPullParserException( "only one colon is allowed in attribute name" +" when namespaces are enabled", this, null); colonPos = pos - 1 + bufAbsoluteStart; } } ch = more(); } ensureAttributesCapacity(attributeCount); // --- start processing attributes String name = null; String prefix = null; // work on prefixes and namespace URI if(processNamespaces) { if(xmlnsPos < 4) startsWithXmlns = false; if(startsWithXmlns) { if(colonPos != -1) { //prefix = attributePrefix[ attributeCount ] = null; final int nameLen = pos - 2 - (colonPos - bufAbsoluteStart); if(nameLen == 0) { throw new XmlPullParserException( "namespace prefix is required after xmlns: " +" when namespaces are enabled", this, null); } name = //attributeName[ attributeCount ] = newString(buf, colonPos - bufAbsoluteStart + 1, nameLen); //pos - 1 - (colonPos + 1 - bufAbsoluteStart) } } else { if(colonPos != -1) { int prefixLen = colonPos - nameStart; prefix = attributePrefix[ attributeCount ] = newString(buf, nameStart - bufAbsoluteStart,prefixLen); //colonPos - (nameStart - bufAbsoluteStart)); int nameLen = pos - 2 - (colonPos - bufAbsoluteStart); name = attributeName[ attributeCount ] = newString(buf, colonPos - bufAbsoluteStart + 1, nameLen); //pos - 1 - (colonPos + 1 - bufAbsoluteStart)); //name.substring(0, colonPos-nameStart); } else { prefix = attributePrefix[ attributeCount ] = null; name = attributeName[ attributeCount ] = newString(buf, nameStart - bufAbsoluteStart, pos - 1 - (nameStart - bufAbsoluteStart)); } if(!allStringsInterned) { attributeNameHash[ attributeCount ] = name.hashCode(); } } } else { // retrieve name name = attributeName[ attributeCount ] = newString(buf, nameStart - bufAbsoluteStart, pos - 1 - (nameStart - bufAbsoluteStart)); ////assert name != null; if(!allStringsInterned) { attributeNameHash[ attributeCount ] = name.hashCode(); } } // [25] Eq ::= S? '=' S? while(isS(ch)) { ch = more(); } // skip additional spaces if(ch != '=') throw new XmlPullParserException( "expected = after attribute name", this, null); ch = more(); while(isS(ch)) { ch = more(); } // skip additional spaces // [10] AttValue ::= '"' ([^<&"] | Reference)* '"' // | "'" ([^<&'] | Reference)* "'" final char delimit = ch; if(delimit != '"' && delimit != '\'') throw new XmlPullParserException( "attribute value must start with quotation or apostrophe not " +printable(delimit), this, null); // parse until delimit or < and resolve Reference //[67] Reference ::= EntityRef | CharRef //int valueStart = pos + bufAbsoluteStart; boolean normalizedCR = false; usePC = false; pcStart = pcEnd; posStart = pos; while(true) { ch = more(); if(ch == delimit) { break; } if(ch == '<') { throw new XmlPullParserException( "markup not allowed inside attribute value - illegal < ", this, null); } if(ch == '&') { // extractEntityRef posEnd = pos - 1; if(!usePC) { final boolean hadCharData = posEnd > posStart; if(hadCharData) { // posEnd is already set correctly!!! joinPC(); } else { usePC = true; pcStart = pcEnd = 0; } } //assert usePC == true; final char[] resolvedEntity = parseEntityRef(); // check if replacement text can be resolved !!! if(resolvedEntity == null) { if(entityRefName == null) { entityRefName = newString(buf, posStart, posEnd - posStart); } throw new XmlPullParserException( "could not resolve entity named '"+printable(entityRefName)+"'", this, null); } // write into PC replacement text - do merge for replacement text!!!! for ( char aResolvedEntity : resolvedEntity ) { if ( pcEnd >= pc.length ) { ensurePC( pcEnd ); } pc[pcEnd++] = aResolvedEntity; } } else if(ch == '\t' || ch == '\n' || ch == '\r') { // do attribute value normalization // as described in http://www.w3.org/TR/REC-xml#AVNormalize // TODO add test for it form spec ... // handle EOL normalization ... if(!usePC) { posEnd = pos - 1; if(posEnd > posStart) { joinPC(); } else { usePC = true; pcEnd = pcStart = 0; } } //assert usePC == true; if(pcEnd >= pc.length) ensurePC(pcEnd); if(ch != '\n' || !normalizedCR) { pc[pcEnd++] = ' '; //'\n'; } } else { if(usePC) { if(pcEnd >= pc.length) ensurePC(pcEnd); pc[pcEnd++] = ch; } } normalizedCR = ch == '\r'; } if(processNamespaces && startsWithXmlns) { String ns = null; if(!usePC) { ns = newStringIntern(buf, posStart, pos - 1 - posStart); } else { ns = newStringIntern(pc, pcStart, pcEnd - pcStart); } ensureNamespacesCapacity(namespaceEnd); int prefixHash = -1; if(colonPos != -1) { if(ns.length() == 0) { throw new XmlPullParserException( "non-default namespace can not be declared to be empty string", this, null); } // declare new namespace namespacePrefix[ namespaceEnd ] = name; if(!allStringsInterned) { prefixHash = namespacePrefixHash[ namespaceEnd ] = name.hashCode(); } } else { // declare new default namespace... namespacePrefix[ namespaceEnd ] = null; //""; //null; //TODO check FIXME Alek if(!allStringsInterned) { prefixHash = namespacePrefixHash[ namespaceEnd ] = -1; } } namespaceUri[ namespaceEnd ] = ns; // detect duplicate namespace declarations!!! final int startNs = elNamespaceCount[ depth - 1 ]; for (int i = namespaceEnd - 1; i >= startNs; --i) { if(((allStringsInterned || name == null) && namespacePrefix[ i ] == name) || (!allStringsInterned && name != null && namespacePrefixHash[ i ] == prefixHash && name.equals(namespacePrefix[ i ]) )) { final String s = name == null ? "default" : "'"+name+"'"; throw new XmlPullParserException( "duplicated namespace declaration for "+s+" prefix", this, null); } } ++namespaceEnd; } else { if(!usePC) { attributeValue[ attributeCount ] = new String(buf, posStart, pos - 1 - posStart); } else { attributeValue[ attributeCount ] = new String(pc, pcStart, pcEnd - pcStart); } ++attributeCount; } posStart = prevPosStart - bufAbsoluteStart; return ch; } protected char[] charRefOneCharBuf = new char[1]; protected char[] parseEntityRef() throws XmlPullParserException, IOException { // entity reference http://www.w3.org/TR/2000/REC-xml-20001006#NT-Reference // [67] Reference ::= EntityRef | CharRef // ASSUMPTION just after & entityRefName = null; posStart = pos; char ch = more(); StringBuilder sb = new StringBuilder(); if(ch == '#') { // parse character reference char charRef = 0; ch = more(); if(ch == 'x') { //encoded in hex while(true) { ch = more(); if(ch >= '0' && ch <= '9') { charRef = (char)(charRef * 16 + (ch - '0')); sb.append( ch ); } else if(ch >= 'a' && ch <= 'f') { charRef = (char)(charRef * 16 + (ch - ('a' - 10))); sb.append( ch ); } else if(ch >= 'A' && ch <= 'F') { charRef = (char)(charRef * 16 + (ch - ('A' - 10))); sb.append( ch ); } else if(ch == ';') { break; } else { throw new XmlPullParserException( "character reference (with hex value) may not contain " +printable(ch), this, null); } } } else { // encoded in decimal while(true) { if(ch >= '0' && ch <= '9') { charRef = (char)(charRef * 10 + (ch - '0')); } else if(ch == ';') { break; } else { throw new XmlPullParserException( "character reference (with decimal value) may not contain " +printable(ch), this, null); } ch = more(); } } posEnd = pos - 1; if ( sb.length() > 0 ) { char[] tmp = toChars( Integer.parseInt( sb.toString(), 16 ) ); charRefOneCharBuf = tmp; if ( tokenize ) { text = newString( charRefOneCharBuf, 0, charRefOneCharBuf.length ); } return charRefOneCharBuf; } charRefOneCharBuf[0] = charRef; if(tokenize) { text = newString(charRefOneCharBuf, 0, 1); } return charRefOneCharBuf; } else { // [68] EntityRef ::= '&' Name ';' // scan anem until ; if(!isNameStartChar(ch)) { throw new XmlPullParserException( "entity reference names can not start with character '" +printable(ch)+"'", this, null); } while(true) { ch = more(); if(ch == ';') { break; } if(!isNameChar(ch)) { throw new XmlPullParserException( "entity reference name can not contain character " +printable(ch)+"'", this, null); } } posEnd = pos - 1; // determine what name maps to final int len = posEnd - posStart; if(len == 2 && buf[posStart] == 'l' && buf[posStart+1] == 't') { if(tokenize) { text = "<"; } charRefOneCharBuf[0] = '<'; return charRefOneCharBuf; //if(paramPC || isParserTokenizing) { // if(pcEnd >= pc.length) ensurePC(); // pc[pcEnd++] = '<'; //} } else if(len == 3 && buf[posStart] == 'a' && buf[posStart+1] == 'm' && buf[posStart+2] == 'p') { if(tokenize) { text = "&"; } charRefOneCharBuf[0] = '&'; return charRefOneCharBuf; } else if(len == 2 && buf[posStart] == 'g' && buf[posStart+1] == 't') { if(tokenize) { text = ">"; } charRefOneCharBuf[0] = '>'; return charRefOneCharBuf; } else if(len == 4 && buf[posStart] == 'a' && buf[posStart+1] == 'p' && buf[posStart+2] == 'o' && buf[posStart+3] == 's') { if(tokenize) { text = "'"; } charRefOneCharBuf[0] = '\''; return charRefOneCharBuf; } else if(len == 4 && buf[posStart] == 'q' && buf[posStart+1] == 'u' && buf[posStart+2] == 'o' && buf[posStart+3] == 't') { if(tokenize) { text = "\""; } charRefOneCharBuf[0] = '"'; return charRefOneCharBuf; } else { final char[] result = lookuEntityReplacement(len); if(result != null) { return result; } } if(tokenize) text = null; return null; } } protected char[] lookuEntityReplacement(int entitNameLen) throws XmlPullParserException, IOException { if(!allStringsInterned) { final int hash = fastHash(buf, posStart, posEnd - posStart); LOOP: for (int i = entityEnd - 1; i >= 0; --i) { if(hash == entityNameHash[ i ] && entitNameLen == entityNameBuf[ i ].length) { final char[] entityBuf = entityNameBuf[ i ]; for (int j = 0; j < entitNameLen; j++) { if(buf[posStart + j] != entityBuf[j]) continue LOOP; } if(tokenize) text = entityReplacement[ i ]; return entityReplacementBuf[ i ]; } } } else { entityRefName = newString(buf, posStart, posEnd - posStart); for (int i = entityEnd - 1; i >= 0; --i) { // take advantage that interning for newStirng is enforced if(entityRefName == entityName[ i ]) { if(tokenize) text = entityReplacement[ i ]; return entityReplacementBuf[ i ]; } } } return null; } protected void parseComment() throws XmlPullParserException, IOException { // implements XML 1.0 Section 2.5 Comments //ASSUMPTION: seen ch = more(); if(seenDashDash && ch != '>') { throw new XmlPullParserException( "in comment after two dashes (--) next character must be >" +" not "+printable(ch), this, null); } if(ch == '-') { if(!seenDash) { seenDash = true; } else { seenDashDash = true; seenDash = false; } } else if(ch == '>') { if(seenDashDash) { break; // found end sequence!!!! } else { seenDashDash = false; } seenDash = false; } else { seenDash = false; } if(normalizeIgnorableWS) { if(ch == '\r') { normalizedCR = true; //posEnd = pos -1; //joinPC(); // posEnd is alreadys set if(!usePC) { posEnd = pos -1; if(posEnd > posStart) { joinPC(); } else { usePC = true; pcStart = pcEnd = 0; } } //assert usePC == true; if(pcEnd >= pc.length) ensurePC(pcEnd); pc[pcEnd++] = '\n'; } else if(ch == '\n') { if(!normalizedCR && usePC) { if(pcEnd >= pc.length) ensurePC(pcEnd); pc[pcEnd++] = '\n'; } normalizedCR = false; } else { if(usePC) { if(pcEnd >= pc.length) ensurePC(pcEnd); pc[pcEnd++] = ch; } normalizedCR = false; } } } } catch(EOFException ex) { // detect EOF and create meaningful error ... throw new XmlPullParserException( "comment started on line "+curLine+" and column "+curColumn+" was not closed", this, ex); } if(tokenize) { posEnd = pos - 3; if(usePC) { pcEnd -= 2; } } } protected boolean parsePI() throws XmlPullParserException, IOException { // implements XML 1.0 Section 2.6 Processing Instructions // [16] PI ::= '' Char*)))? '?>' // [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) //ASSUMPTION: seen //ch = more(); if(ch == '?') { seenQ = true; } else if(ch == '>') { if(seenQ) { break; // found end sequence!!!! } seenQ = false; } else { if(piTargetEnd == -1 && isS(ch)) { piTargetEnd = pos - 1 + bufAbsoluteStart; // [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) if((piTargetEnd - piTargetStart) == 3) { if((buf[piTargetStart] == 'x' || buf[piTargetStart] == 'X') && (buf[piTargetStart+1] == 'm' || buf[piTargetStart+1] == 'M') && (buf[piTargetStart+2] == 'l' || buf[piTargetStart+2] == 'L') ) { if(piTargetStart > 3) { // posStart) { joinPC(); } else { usePC = true; pcStart = pcEnd = 0; } } //assert usePC == true; if(pcEnd >= pc.length) ensurePC(pcEnd); pc[pcEnd++] = '\n'; } else if(ch == '\n') { if(!normalizedCR && usePC) { if(pcEnd >= pc.length) ensurePC(pcEnd); pc[pcEnd++] = '\n'; } normalizedCR = false; } else { if(usePC) { if(pcEnd >= pc.length) ensurePC(pcEnd); pc[pcEnd++] = ch; } normalizedCR = false; } } ch = more(); } } catch(EOFException ex) { // detect EOF and create meaningful error ... throw new XmlPullParserException( "processing instruction started on line "+curLine+" and column "+curColumn +" was not closed", this, ex); } if(piTargetEnd == -1) { piTargetEnd = pos - 2 + bufAbsoluteStart; //throw new XmlPullParserException( // "processing instruction must have PITarget name", this, null); } piTargetStart -= bufAbsoluteStart; piTargetEnd -= bufAbsoluteStart; if(tokenize) { posEnd = pos - 2; if(normalizeIgnorableWS) { --pcEnd; } } return true; } // protected final static char[] VERSION = {'v','e','r','s','i','o','n'}; // protected final static char[] NCODING = {'n','c','o','d','i','n','g'}; // protected final static char[] TANDALONE = {'t','a','n','d','a','l','o','n','e'}; // protected final static char[] YES = {'y','e','s'}; // protected final static char[] NO = {'n','o'}; protected final static char[] VERSION = "version".toCharArray(); protected final static char[] NCODING = "ncoding".toCharArray(); protected final static char[] TANDALONE = "tandalone".toCharArray(); protected final static char[] YES = "yes".toCharArray(); protected final static char[] NO = "no".toCharArray(); protected void parseXmlDecl(char ch) throws XmlPullParserException, IOException { // [23] XMLDecl ::= '' // first make sure that relative positions will stay OK preventBufferCompaction = true; bufStart = 0; // necessary to keep pos unchanged during expansion! // --- parse VersionInfo // [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') // parse is positioned just on first S past 'z') && (ch < 'A' || ch > 'Z') && (ch < '0' || ch > '9') && ch != '_' && ch != '.' && ch != ':' && ch != '-') { throw new XmlPullParserException( " 'z') && (ch < 'A' || ch > 'Z')) { throw new XmlPullParserException( " 'z') && (ch < 'A' || ch > 'Z') && (ch < '0' || ch > '9') && ch != '.' && ch != '_' && ch != '-') { throw new XmlPullParserException( " as last part of ') { throw new XmlPullParserException( "expected ?> as last part of ' int bracketLevel = 0; final boolean normalizeIgnorableWS = tokenize == true && roundtripSupported == false; boolean normalizedCR = false; while(true) { ch = more(); if(ch == '[') ++bracketLevel; if(ch == ']') --bracketLevel; if(ch == '>' && bracketLevel == 0) break; if(normalizeIgnorableWS) { if(ch == '\r') { normalizedCR = true; //posEnd = pos -1; //joinPC(); // posEnd is alreadys set if(!usePC) { posEnd = pos -1; if(posEnd > posStart) { joinPC(); } else { usePC = true; pcStart = pcEnd = 0; } } //assert usePC == true; if(pcEnd >= pc.length) ensurePC(pcEnd); pc[pcEnd++] = '\n'; } else if(ch == '\n') { if(!normalizedCR && usePC) { if(pcEnd >= pc.length) ensurePC(pcEnd); pc[pcEnd++] = '\n'; } normalizedCR = false; } else { if(usePC) { if(pcEnd >= pc.length) ensurePC(pcEnd); pc[pcEnd++] = ch; } normalizedCR = false; } } } posEnd = pos - 1; } protected void parseCDSect(boolean hadCharData) throws XmlPullParserException, IOException { // implements XML 1.0 Section 2.7 CDATA Sections // [18] CDSect ::= CDStart CData CDEnd // [19] CDStart ::= '' Char*)) // [21] CDEnd ::= ']]>' //ASSUMPTION: seen posStart) { joinPC(); } else { usePC = true; pcStart = pcEnd = 0; } } } } boolean seenBracket = false; boolean seenBracketBracket = false; boolean normalizedCR = false; while(true) { // scan until it hits "]]>" ch = more(); if(ch == ']') { if(!seenBracket) { seenBracket = true; } else { seenBracketBracket = true; //seenBracket = false; } } else if(ch == '>') { if(seenBracket && seenBracketBracket) { break; // found end sequence!!!! } else { seenBracketBracket = false; } seenBracket = false; } else { if(seenBracket) { seenBracket = false; } } if(normalizeInput) { // deal with normalization issues ... if(ch == '\r') { normalizedCR = true; posStart = cdStart - bufAbsoluteStart; posEnd = pos - 1; // posEnd is alreadys set if(!usePC) { if(posEnd > posStart) { joinPC(); } else { usePC = true; pcStart = pcEnd = 0; } } //assert usePC == true; if(pcEnd >= pc.length) ensurePC(pcEnd); pc[pcEnd++] = '\n'; } else if(ch == '\n') { if(!normalizedCR && usePC) { if(pcEnd >= pc.length) ensurePC(pcEnd); pc[pcEnd++] = '\n'; } normalizedCR = false; } else { if(usePC) { if(pcEnd >= pc.length) ensurePC(pcEnd); pc[pcEnd++] = ch; } normalizedCR = false; } } } } catch(EOFException ex) { // detect EOF and create meaningful error ... throw new XmlPullParserException( "CDATA section started on line "+curLine+" and column "+curColumn+" was not closed", this, ex); } if(normalizeInput) { if(usePC) { pcEnd = pcEnd - 2; } } posStart = cdStart - bufAbsoluteStart; posEnd = pos - 3; } protected void fillBuf() throws IOException, XmlPullParserException { if(reader == null) throw new XmlPullParserException( "reader must be set before parsing is started"); // see if we are in compaction area if(bufEnd > bufSoftLimit) { // expand buffer it makes sense!!!! boolean compact = bufStart > bufSoftLimit; boolean expand = false; if(preventBufferCompaction) { compact = false; expand = true; } else if(!compact) { //freeSpace if(bufStart < buf.length / 2) { // less then half buffer available forcompactin --> expand instead!!! expand = true; } else { // at least half of buffer can be reclaimed --> worthwhile effort!!! compact = true; } } // if buffer almost full then compact it if(compact) { //TODO: look on trashing // //assert bufStart > 0 System.arraycopy(buf, bufStart, buf, 0, bufEnd - bufStart); if(TRACE_SIZING) System.out.println( "TRACE_SIZING fillBuf() compacting "+bufStart +" bufEnd="+bufEnd +" pos="+pos+" posStart="+posStart+" posEnd="+posEnd +" buf first 100 chars:"+new String(buf, bufStart, bufEnd - bufStart < 100 ? bufEnd - bufStart : 100 )); } else if(expand) { final int newSize = 2 * buf.length; final char newBuf[] = new char[ newSize ]; if(TRACE_SIZING) System.out.println("TRACE_SIZING fillBuf() "+buf.length+" => "+newSize); System.arraycopy(buf, bufStart, newBuf, 0, bufEnd - bufStart); buf = newBuf; if(bufLoadFactor > 0) { bufSoftLimit = ( bufLoadFactor * buf.length ) /100; } } else { throw new XmlPullParserException("internal error in fillBuffer()"); } bufEnd -= bufStart; pos -= bufStart; posStart -= bufStart; posEnd -= bufStart; bufAbsoluteStart += bufStart; bufStart = 0; if(TRACE_SIZING) System.out.println( "TRACE_SIZING fillBuf() after bufEnd="+bufEnd +" pos="+pos+" posStart="+posStart+" posEnd="+posEnd +" buf first 100 chars:"+new String(buf, 0, bufEnd < 100 ? bufEnd : 100)); } // at least one charcter must be read or error final int len = buf.length - bufEnd > READ_CHUNK_SIZE ? READ_CHUNK_SIZE : buf.length - bufEnd; final int ret = reader.read(buf, bufEnd, len); if(ret > 0) { bufEnd += ret; if(TRACE_SIZING) System.out.println( "TRACE_SIZING fillBuf() after filling in buffer" +" buf first 100 chars:"+new String(buf, 0, bufEnd < 100 ? bufEnd : 100)); return; } if(ret == -1) { if(bufAbsoluteStart == 0 && pos == 0) { throw new EOFException("input contained no data"); } else { if(seenRoot && depth == 0) { // inside parsing epilog!!! reachedEnd = true; return; } else { StringBuilder expectedTagStack = new StringBuilder(); if(depth > 0) { //final char[] cbuf = elRawName[depth]; //final String startname = new String(cbuf, 0, elRawNameEnd[depth]); expectedTagStack.append(" - expected end tag"); if(depth > 1) { expectedTagStack.append("s"); //more than one end tag } expectedTagStack.append(" "); for (int i = depth; i > 0; i--) { String tagName = new String(elRawName[i], 0, elRawNameEnd[i]); expectedTagStack.append("'); } expectedTagStack.append(" to close"); for (int i = depth; i > 0; i--) { if(i != depth) { expectedTagStack.append(" and"); //more than one end tag } String tagName = new String(elRawName[i], 0, elRawNameEnd[i]); expectedTagStack.append( " start tag <" ).append( tagName ).append( ">" ); expectedTagStack.append( " from line " ).append( elRawNameLine[i] ); } expectedTagStack.append(", parser stopped on"); } throw new EOFException("no more data available" +expectedTagStack.toString()+getPositionDescription()); } } } else { throw new IOException("error reading input, returned "+ret); } } protected char more() throws IOException, XmlPullParserException { if(pos >= bufEnd) { fillBuf(); // this return value should be ignonored as it is used in epilog parsing ... if(reachedEnd) return (char)-1; } final char ch = buf[pos++]; //line/columnNumber if(ch == '\n') { ++lineNumber; columnNumber = 1; } else { ++columnNumber; } //System.out.print(ch); return ch; } // /** // * This function returns position of parser in XML input stream // * (how many characters were processed. // *

NOTE: this logical position and not byte offset as encodings // * such as UTF8 may use more than one byte to encode one character. // */ // public int getCurrentInputPosition() { // return pos + bufAbsoluteStart; // } protected void ensurePC(int end) { //assert end >= pc.length; final int newSize = end > READ_CHUNK_SIZE ? 2 * end : 2 * READ_CHUNK_SIZE; final char[] newPC = new char[ newSize ]; if(TRACE_SIZING) System.out.println("TRACE_SIZING ensurePC() "+pc.length+" ==> "+newSize+" end="+end); System.arraycopy(pc, 0, newPC, 0, pcEnd); pc = newPC; //assert end < pc.length; } protected void joinPC() { //assert usePC == false; //assert posEnd > posStart; final int len = posEnd - posStart; final int newEnd = pcEnd + len + 1; if(newEnd >= pc.length) ensurePC(newEnd); // add 1 for extra space for one char //assert newEnd < pc.length; System.arraycopy(buf, posStart, pc, pcEnd, len); pcEnd += len; usePC = true; } protected char requireInput(char ch, char[] input) throws XmlPullParserException, IOException { for ( char anInput : input ) { if ( ch != anInput ) { throw new XmlPullParserException( "expected " + printable( anInput ) + " in " + new String( input ) + " and not " + printable( ch ), this, null ); } ch = more(); } return ch; } protected char requireNextS() throws XmlPullParserException, IOException { final char ch = more(); if(!isS(ch)) { throw new XmlPullParserException( "white space is required and not "+printable(ch), this, null); } return skipS(ch); } protected char skipS(char ch) throws XmlPullParserException, IOException { while(isS(ch)) { ch = more(); } // skip additional spaces return ch; } // nameStart / name lookup tables based on XML 1.1 http://www.w3.org/TR/2001/WD-xml11-20011213/ protected static final int LOOKUP_MAX = 0x400; protected static final char LOOKUP_MAX_CHAR = (char)LOOKUP_MAX; // protected static int lookupNameStartChar[] = new int[ LOOKUP_MAX_CHAR / 32 ]; // protected static int lookupNameChar[] = new int[ LOOKUP_MAX_CHAR / 32 ]; protected static boolean lookupNameStartChar[] = new boolean[ LOOKUP_MAX ]; protected static boolean lookupNameChar[] = new boolean[ LOOKUP_MAX ]; private static final void setName(char ch) //{ lookupNameChar[ (int)ch / 32 ] |= (1 << (ch % 32)); } { lookupNameChar[ ch ] = true; } private static final void setNameStart(char ch) //{ lookupNameStartChar[ (int)ch / 32 ] |= (1 << (ch % 32)); setName(ch); } { lookupNameStartChar[ ch ] = true; setName(ch); } static { setNameStart(':'); for (char ch = 'A'; ch <= 'Z'; ++ch) setNameStart(ch); setNameStart('_'); for (char ch = 'a'; ch <= 'z'; ++ch) setNameStart(ch); for (char ch = '\u00c0'; ch <= '\u02FF'; ++ch) setNameStart(ch); for (char ch = '\u0370'; ch <= '\u037d'; ++ch) setNameStart(ch); for (char ch = '\u037f'; ch < '\u0400'; ++ch) setNameStart(ch); setName('-'); setName('.'); for (char ch = '0'; ch <= '9'; ++ch) setName(ch); setName('\u00b7'); for (char ch = '\u0300'; ch <= '\u036f'; ++ch) setName(ch); } //private final static boolean isNameStartChar(char ch) { protected boolean isNameStartChar(char ch) { return (ch < LOOKUP_MAX_CHAR && lookupNameStartChar[ ch ]) || (ch >= LOOKUP_MAX_CHAR && ch <= '\u2027') || (ch >= '\u202A' && ch <= '\u218F') || (ch >= '\u2800' && ch <= '\uFFEF') ; // if(ch < LOOKUP_MAX_CHAR) return lookupNameStartChar[ ch ]; // else return ch <= '\u2027' // || (ch >= '\u202A' && ch <= '\u218F') // || (ch >= '\u2800' && ch <= '\uFFEF') // ; //return false; // return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == ':' // || (ch >= '0' && ch <= '9'); // if(ch < LOOKUP_MAX_CHAR) return (lookupNameStartChar[ (int)ch / 32 ] & (1 << (ch % 32))) != 0; // if(ch <= '\u2027') return true; // //[#x202A-#x218F] // if(ch < '\u202A') return false; // if(ch <= '\u218F') return true; // // added pairts [#x2800-#xD7FF] | [#xE000-#xFDCF] | [#xFDE0-#xFFEF] | [#x10000-#x10FFFF] // if(ch < '\u2800') return false; // if(ch <= '\uFFEF') return true; // return false; // else return (supportXml11 && ( (ch < '\u2027') || (ch > '\u2029' && ch < '\u2200') ... } //private final static boolean isNameChar(char ch) { protected boolean isNameChar(char ch) { //return isNameStartChar(ch); // if(ch < LOOKUP_MAX_CHAR) return (lookupNameChar[ (int)ch / 32 ] & (1 << (ch % 32))) != 0; return (ch < LOOKUP_MAX_CHAR && lookupNameChar[ ch ]) || (ch >= LOOKUP_MAX_CHAR && ch <= '\u2027') || (ch >= '\u202A' && ch <= '\u218F') || (ch >= '\u2800' && ch <= '\uFFEF') ; //return false; // return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == ':' // || (ch >= '0' && ch <= '9'); // if(ch < LOOKUP_MAX_CHAR) return (lookupNameStartChar[ (int)ch / 32 ] & (1 << (ch % 32))) != 0; //else return // else if(ch <= '\u2027') return true; // //[#x202A-#x218F] // else if(ch < '\u202A') return false; // else if(ch <= '\u218F') return true; // // added pairts [#x2800-#xD7FF] | [#xE000-#xFDCF] | [#xFDE0-#xFFEF] | [#x10000-#x10FFFF] // else if(ch < '\u2800') return false; // else if(ch <= '\uFFEF') return true; //else return false; } protected boolean isS(char ch) { return (ch == ' ' || ch == '\n' || ch == '\r' || ch == '\t'); // || (supportXml11 && (ch == '\u0085' || ch == '\u2028'); } //protected boolean isChar(char ch) { return (ch < '\uD800' || ch > '\uDFFF') // ch != '\u0000' ch < '\uFFFE' //protected char printable(char ch) { return ch; } protected String printable(char ch) { if(ch == '\n') { return "\\n"; } else if(ch == '\r') { return "\\r"; } else if(ch == '\t') { return "\\t"; } else if(ch == '\'') { return "\\'"; } if(ch > 127 || ch < 32) { return "\\u"+Integer.toHexString((int)ch); } return ""+ch; } protected String printable(String s) { if(s == null) return null; final int sLen = s.length(); StringBuilder buf = new StringBuilder(sLen + 10); for(int i = 0; i < sLen; ++i) { buf.append(printable(s.charAt(i))); } s = buf.toString(); return s; } // // Imported code from ASF Harmony project rev 770909 // http://svn.apache.org/repos/asf/harmony/enhanced/classlib/trunk/modules/luni/src/main/java/java/lang/Character.java // private static int toCodePoint( char high, char low ) { // See RFC 2781, Section 2.2 // http://www.faqs.org/rfcs/rfc2781.html int h = ( high & 0x3FF ) << 10; int l = low & 0x3FF; return ( h | l ) + 0x10000; } private static final char MIN_HIGH_SURROGATE = '\uD800'; private static final char MAX_HIGH_SURROGATE = '\uDBFF'; private static boolean isHighSurrogate( char ch ) { return ( MIN_HIGH_SURROGATE <= ch && MAX_HIGH_SURROGATE >= ch ); } private static final int MIN_CODE_POINT = 0x000000; private static final int MAX_CODE_POINT = 0x10FFFF; private static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x10000; private static boolean isValidCodePoint( int codePoint ) { return ( MIN_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint ); } private static boolean isSupplementaryCodePoint( int codePoint ) { return ( MIN_SUPPLEMENTARY_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint ); } /** * TODO add javadoc * * @param codePoint * @return */ public static char[] toChars( int codePoint ) { if ( !isValidCodePoint( codePoint ) ) { throw new IllegalArgumentException(); } if ( isSupplementaryCodePoint( codePoint ) ) { int cpPrime = codePoint - 0x10000; int high = 0xD800 | ( ( cpPrime >> 10 ) & 0x3FF ); int low = 0xDC00 | ( cpPrime & 0x3FF ); return new char[] { (char) high, (char) low }; } return new char[] { (char) codePoint }; } } /* * Indiana University Extreme! Lab Software License, Version 1.2 * * Copyright (C) 2003 The Trustees of Indiana University. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * 1) All redistributions of source code must retain the above * copyright notice, the list of authors in the original source * code, this list of conditions and the disclaimer listed in this * license; * * 2) All redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the disclaimer * listed in this license in the documentation and/or other * materials provided with the distribution; * * 3) Any documentation included with all redistributions must include * the following acknowledgement: * * "This product includes software developed by the Indiana * University Extreme! Lab. For further information please visit * http://www.extreme.indiana.edu/" * * Alternatively, this acknowledgment may appear in the software * itself, and wherever such third-party acknowledgments normally * appear. * * 4) The name "Indiana University" or "Indiana University * Extreme! Lab" shall not be used to endorse or promote * products derived from this software without prior written * permission from Indiana University. For written permission, * please contact http://www.extreme.indiana.edu/. * * 5) Products derived from this software may not use "Indiana * University" name nor may "Indiana University" appear in their name, * without prior written permission of the Indiana University. * * Indiana University provides no reassurances that the source code * provided does not infringe the patent or any other intellectual * property rights of any other entity. Indiana University disclaims any * liability to any recipient for claims brought by any other entity * based on infringement of intellectual property rights or otherwise. * * LICENSEE UNDERSTANDS THAT SOFTWARE IS PROVIDED "AS IS" FOR WHICH * NO WARRANTIES AS TO CAPABILITIES OR ACCURACY ARE MADE. INDIANA * UNIVERSITY GIVES NO WARRANTIES AND MAKES NO REPRESENTATION THAT * SOFTWARE IS FREE OF INFRINGEMENT OF THIRD PARTY PATENT, COPYRIGHT, OR * OTHER PROPRIETARY RIGHTS. INDIANA UNIVERSITY MAKES NO WARRANTIES THAT * SOFTWARE IS FREE FROM "BUGS", "VIRUSES", "TROJAN HORSES", "TRAP * DOORS", "WORMS", OR OTHER HARMFUL CODE. LICENSEE ASSUMES THE ENTIRE * RISK AS TO THE PERFORMANCE OF SOFTWARE AND/OR ASSOCIATED MATERIALS, * AND TO THE PERFORMANCE AND VALIDITY OF INFORMATION GENERATED USING * SOFTWARE. */





© 2015 - 2025 Weber Informatics LLC | Privacy Policy