All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hfg.xml.parser.SaxyParser Maven / Gradle / Ivy

There is a newer version: 20240423
Show newest version
package com.hfg.xml.parser;

import java.util.*;
import java.io.InputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.BufferedReader;
import javax.xml.parsers.SAXParser;

import com.hfg.util.StringBuilderPlus;
import org.xml.sax.*;
import org.xml.sax.ext.LexicalHandler;
import org.xml.sax.helpers.AttributesImpl;

import com.hfg.util.BooleanUtil;
import com.hfg.xml.Doctype;
import com.hfg.xml.XMLNamespace;

//------------------------------------------------------------------------------
/**
  SaxyParser is a lightweight SAX parser.
  
Important (for me) differences between SaxyParser and Xerces:
  • SaxyParser does NOT close InputSources upon the completion of parsing.
  • SaxyParser can handle the interleaving of content and subtags.
@author J. Alex Taylor, hairyfatguy.com */ //------------------------------------------------------------------------------ // com.hfg XML/HTML Coding Library // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // // J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com // [email protected] //------------------------------------------------------------------------------ public class SaxyParser extends SAXParser implements XMLReader { //########################################################################### // PUBLIC FIELDS //########################################################################### /** Parser property to enable entity expansion.
    http://hairyfatguy.com/sax/properties/entity-expansion
    
False by default. */ public static final String ENTITY_EXPANSION_PROPERTY = "http://hairyfatguy.com/sax/properties/entity-expansion"; /** Parser property to enable strict parsing mode.
    http://hairyfatguy.com/sax/properties/strict
    
False by default. */ public static final String STRICT_PROPERTY = "http://hairyfatguy.com/sax/properties/strict"; //########################################################################### // PRIVATE FIELDS //########################################################################### private Doctype mDoctype; private ContentHandler mContentHandler; private LexicalHandler mLexicalHandler; private DTDHandler mDTDHandler; private ErrorHandler mErrHandler; private EntityResolver mEntityResolver; private boolean debug = false; private boolean mExpandEntities = false; private boolean mStrict = false; private BufferedReader mBufferedReader; private int mLineCount; private int mColumn; private boolean mEOL; private char[] mTempBuffer = new char[100]; private int mState; private Stack mTagStack; private Stack mStateStack; private int mDepth; private Map mNamespaceMap; private Stack mDefaultNamespaceStack; private boolean mLenientHTMLParsing; private Collection mLenientHTMLEmptyTags = sLenientHTMLEmptyTags; // Enumerated parser states private static final int INITIAL = 0; private static final int IN_START_OF_TAG = 1; private static final int IN_XML_DECLARATION = 2; private static final int IN_DOCTYPE = 3; private static final int IN_START_TAG = 4; private static final int IN_END_TAG = 5; private static final int IN_CONTENT = 6; private static final int IN_ENTITY = 7; private static final int IN_CDATA = 8; private static final int IN_COMMENT = 9; private static final int IN_EMPTY_TAG = 10; private static final int IN_MIDDLE_OF_START_TAG = 11; private static final int IN_ATT_NAME = 12; private static final int IN_ATT_VALUE = 13; private static final int DONE = 14; private static final int TAG_START = '<'; private static final int TAG_END = '>'; private static final int ENTITY_START = '&'; private static final int MAX_ENTITY_LENGTH = 12; private static String XMLNS = "xmlns".intern(); private static final String LEXICAL_HANDLER_PROPERTY = "http://xml.org/sax/properties/lexical-handler"; private static Set sLenientHTMLEmptyTags; private static Set sLenientHTMLTags; static { sLenientHTMLEmptyTags = new HashSet<>(10); sLenientHTMLEmptyTags.add("br"); sLenientHTMLEmptyTags.add("hr"); sLenientHTMLEmptyTags.add("img"); sLenientHTMLEmptyTags.add("input"); sLenientHTMLEmptyTags.add("link"); sLenientHTMLEmptyTags.add("meta"); sLenientHTMLTags = new HashSet<>(10); sLenientHTMLTags.add("a"); sLenientHTMLTags.add("p"); } //########################################################################### // CONSTRUCTORS //########################################################################### //--------------------------------------------------------------------------- public SaxyParser() { super(); } //########################################################################### // PUBLIC METHODS //########################################################################### //---------------------------------------------------------------------- public SaxyParser setLenientHTMLParsing(boolean inValue) { mLenientHTMLParsing = inValue; return this; } //---------------------------------------------------------------------- public SaxyParser setLenientHTMLEmptyTags(Collection inEmptyTags) { mLenientHTMLEmptyTags = inEmptyTags; return this; } //--------------------------------------------------------------------------- /** After parsing, the Doctype (if one was set) can be retrieved here. */ public Doctype getDoctype() { return mDoctype; } // METHODS REQUIRED TO EXTEND SaxParser //--------------------------------------------------------------------------- @Override @SuppressWarnings("deprecation") public Parser getParser() throws SAXException { throw new SAXException("getParser() not supported. User getXMLReader()."); } //--------------------------------------------------------------------------- @Override public XMLReader getXMLReader() { return this; } //--------------------------------------------------------------------------- @Override public boolean isNamespaceAware() { return true; } //--------------------------------------------------------------------------- @Override public boolean isValidating() { return false; } //--------------------------------------------------------------------------- @Override public void setProperty(String inName, Object inValue) throws SAXNotRecognizedException { if (inName.equals(LEXICAL_HANDLER_PROPERTY)) { setLexicalHandler((LexicalHandler)inValue); } else if (inName.equals(ENTITY_EXPANSION_PROPERTY)) { mExpandEntities = BooleanUtil.valueOf(inValue); } else if (inName.equals(STRICT_PROPERTY)) { mStrict = BooleanUtil.valueOf(inValue); } else { throw new SAXNotRecognizedException("Property '" + inName + "' not supported."); } } //--------------------------------------------------------------------------- @Override public Object getProperty(String inName) throws SAXNotRecognizedException { Object obj; if (inName.equals(LEXICAL_HANDLER_PROPERTY)) { obj = getLexicalHandler(); } else if (inName.equals(ENTITY_EXPANSION_PROPERTY)) { obj = Boolean.valueOf(mExpandEntities); } else if (inName.equals(STRICT_PROPERTY)) { obj = Boolean.valueOf(mStrict); } else { throw new SAXNotRecognizedException("Property '" + inName + "' not supported."); } return obj; } // METHODS REQUIRED BY THE XMLReader interface //--------------------------------------------------------------------------- @Override public ContentHandler getContentHandler() { return mContentHandler; } //--------------------------------------------------------------------------- @Override public void setContentHandler(ContentHandler handler) { mContentHandler = handler; if (handler instanceof LexicalHandler && null == mLexicalHandler) { mLexicalHandler = (LexicalHandler) handler; } if (handler instanceof EntityResolver && null == mEntityResolver) { mEntityResolver = (EntityResolver) handler; } } //--------------------------------------------------------------------------- @Override public DTDHandler getDTDHandler() { return mDTDHandler; } //--------------------------------------------------------------------------- @Override public void setDTDHandler(DTDHandler handler) { mDTDHandler = handler; } //--------------------------------------------------------------------------- @Override public ErrorHandler getErrorHandler() { return mErrHandler; } //--------------------------------------------------------------------------- @Override public void setErrorHandler(ErrorHandler handler) { mErrHandler = handler; } //--------------------------------------------------------------------------- @Override public EntityResolver getEntityResolver() { return mEntityResolver; } //--------------------------------------------------------------------------- @Override public void setEntityResolver(EntityResolver resolver) { mEntityResolver = resolver; } //--------------------------------------------------------------------------- @Override public void setFeature(String inName, boolean inValue) throws SAXNotRecognizedException { if (inName.equals("http://xml.org/sax/features/namespaces")) { // Namespaces is always on. } else if (inName.equals("http://xml.org/sax/features/namespace-prefixes")) { // Always set to false. } else { throw new SAXNotRecognizedException("Feature '" + inName + "' not supported."); } } //--------------------------------------------------------------------------- @Override public boolean getFeature(String inName) throws SAXNotRecognizedException { boolean outValue = false; if (inName.equals("http://xml.org/sax/features/namespaces")) { outValue = true; } else if (inName.equals("http://xml.org/sax/features/namespace-prefixes")) { outValue = false; } else { throw new SAXNotRecognizedException("Feature '" + inName + "' not supported."); } return outValue; } /* //--------------------------------------------------------------------------- public void setLocale(Locale locale) throws SAXException { throw new SAXException("setLocale() is not supported by this parser."); } */ //--------------------------------------------------------------------------- public LexicalHandler getLexicalHandler() { return mLexicalHandler; } //--------------------------------------------------------------------------- public void setLexicalHandler(LexicalHandler handler) { mLexicalHandler = handler; } //--------------------------------------------------------------------------- public void parse(String inSystemId) throws SAXException, IOException { parse(new InputSource(inSystemId)); } //--------------------------------------------------------------------------- public XMLNamespace getCurrentDefaultNamespace() { return (mDefaultNamespaceStack.size() > 0 ? XMLNamespace.getNamespace(mDefaultNamespaceStack.peek().getURI()) : null); } //--------------------------------------------------------------------------- private int nextChar() throws IOException { int c = mBufferedReader.read(); if (c == -1) throw new EOFException(); // Need to map \r, \r\n, and \n to \n // See XML spec section 2.11 if (c <= '\n') { if (c == '\n' && mEOL) { mEOL = false; c = nextChar(); } else if (mEOL) { mEOL = false; } else if (c == '\n') { mLineCount++; mColumn = 0; } else if (c == '\r') { mEOL = true; c = '\n'; mLineCount++; mColumn = 0; } else { mColumn++; } } else { mColumn++; } return c; } //--------------------------------------------------------------------------- private void skipEndTag(int c) throws SAXException, IOException { String openTag; try { openTag = (String) mTagStack.peek(); } catch (EmptyStackException e) { StringBuffer extraEndTag = new StringBuffer(c); while ((c = nextChar()) != TAG_END) { extraEndTag.append(c); } throw new SAXException("End tag '" + extraEndTag + "' didn't have a matching start tag!"); } mTempBuffer[0] = (char)c; int bytesToRead = openTag.length(); int bytesRead = mBufferedReader.read(mTempBuffer, 1, bytesToRead); // Calls to read() are not guaranteed to read the full requested length. // Allow for partial reads. while (bytesRead < bytesToRead) { if (bytesRead <= 0) { throw new SAXException("Expected end tag '" + openTag + "' but reached the end of the file."); } bytesRead += mBufferedReader.read(mTempBuffer, bytesRead + 1, bytesToRead - bytesRead); } mColumn += bytesToRead; if (! openTag.equals(new String(mTempBuffer, 0, openTag.length()))) { // The end tag isn't a match. For a clean error message, determine the end tag name. StringBuffer unexpectedEndTag = new StringBuffer(); for (int i = 0; i <= openTag.length(); i++) { c = mTempBuffer[i]; if (TAG_END == c) { break; } unexpectedEndTag.append((char) c); } // If we haven't yet reached the ending bracket, keep going. if (c != TAG_END) { while ((c = nextChar()) != TAG_END) { unexpectedEndTag.append((char) c); } } if (! mLenientHTMLParsing || ! sLenientHTMLTags.contains(openTag.toLowerCase())) { throw new SAXException("Expected '" + openTag + "' end tag but found '" + unexpectedEndTag + "' end tag instead!" + " Line: " + mLineCount + " Col: " + mColumn); } } if (mTempBuffer[bytesToRead] != TAG_END) { throw new SAXException("Problem with end tag '" + mTempBuffer[bytesToRead] + "'! Line: " + mLineCount + " Col: " + mColumn); } } //--------------------------------------------------------------------------- @Override public void parse(InputSource source) throws SAXException, IOException { initNamespaceStructures(); mStateStack = new Stack<>(); mTagStack = new Stack<>(); mDepth = 0; XMLStringBuffer buffer = new XMLStringBuffer(); XMLStringBuffer entity = new XMLStringBuffer(); String tagName = null; String tagURI = null; String tagLocalName = null; String attName = null; AttributesImpl attributes = new AttributesImpl(); mLineCount = 1; mColumn = 0; mEOL = false; int quoteChar = -1; // I'm not sure of any other way that will work to even out the differences between character and binary sources. try { mBufferedReader = new BufferedReader(source.getCharacterStream(), 8192); } catch (NullPointerException e) { mBufferedReader = new BufferedReader(new InputStreamReader(source.getByteStream()), 8192); } mContentHandler.startDocument(); mState = INITIAL; try { int c; while (mState != DONE) { if (mState == INITIAL) { pushState(); } c = nextChar(); switch (mState) { case IN_CONTENT: while (true) { // We are in tag content if (c == TAG_START) { pushState(); mState = IN_START_OF_TAG; if (buffer.length() > 0) { if (buffer.hasNonwhitespaceContent()) { mContentHandler.characters(buffer.getCharArray(), 0, buffer.length()); } buffer.clear(); } break; } else if (c == ENTITY_START) { pushState(); mState = IN_ENTITY; entity.clear(); break; } else { buffer.append((char) c); } c = nextChar(); } break; case INITIAL: while (true) { if (c == -1) { mState = DONE; break; } else if (c == TAG_START) { pushState(); mState = IN_START_OF_TAG; break; } else if (!Character.isWhitespace((char) c)) { throw new SAXException("Unexpected text outside of tags (" + (char) c + ")!" + " Line: " + mLineCount + " Col: " + mColumn); } c = nextChar(); } break; case IN_START_OF_TAG: // What type of tag does it look like we've wandered into? if (c == '/') { popState(); mState = IN_END_TAG; tagName = null; tagLocalName = null; } else if (c == '?') { mState = IN_XML_DECLARATION; } else if (tagLocalName != null && tagLocalName.equalsIgnoreCase("script") && mLenientHTMLParsing) { // The '<' was found within a script tag. Treat it as part of the content. popState(); mState = IN_CONTENT; } else { mState = IN_START_TAG; tagName = null; attributes.clear(); buffer.append((char) c); } break; case IN_START_TAG: while (true) { if (Character.isWhitespace((char) c)) { tagName = buffer.toString(); pushDepth(tagName); buffer.clear(); mState = IN_MIDDLE_OF_START_TAG; break; } else if (c == TAG_END) { if (null == tagName) tagName = buffer.toString(); pushDepth(tagName); tagURI = getNamespaceURI(getNamespacePrefix(tagName)); tagLocalName = getLocalName(tagName); mContentHandler.startElement(tagURI, tagLocalName, tagName, attributes); if (mLenientHTMLParsing && mLenientHTMLEmptyTags.contains(tagLocalName.toLowerCase())) { mContentHandler.endElement(tagURI, tagLocalName, tagName); mTagStack.pop(); buffer.clear(); popDepth(); popState(); } else { mState = IN_CONTENT; } buffer.clear(); break; } else if (c == '/') { if (null == tagName) tagName = buffer.toString(); pushDepth(tagName); mState = IN_EMPTY_TAG; break; } else if (c == '-' && buffer.toString().equals("!-")) { mState = IN_COMMENT; break; } else if (c == '[' && buffer.toString().equals("![CDATA")) { mState = IN_CDATA; if (mLexicalHandler != null) mLexicalHandler.startCDATA(); buffer.clear(); break; } else if ((c == 'E' || c == 'e') && buffer.toString().equalsIgnoreCase("!DOCTYP")) { mState = IN_DOCTYPE; // buffer.clear(); buffer.prepend("<"); buffer.append((char) c); break; } else { buffer.append((char) c); } c = nextChar(); } break; case IN_MIDDLE_OF_START_TAG: while (true) { if (c == TAG_END) { tagURI = getNamespaceURI(getNamespacePrefix(tagName)); tagLocalName = getLocalName(tagName); mContentHandler.startElement(tagURI, tagLocalName, tagName, attributes); if (mLenientHTMLParsing && mLenientHTMLEmptyTags.contains(tagLocalName.toLowerCase())) { mContentHandler.endElement(tagURI, tagLocalName, tagName); mTagStack.pop(); popDepth(); popState(); } else { mState = IN_CONTENT; } buffer.clear(); break; } else if (c == '/') { mState = IN_EMPTY_TAG; break; } else if (!Character.isWhitespace((char) c)) { mState = IN_ATT_NAME; attName = null; buffer.append((char) c); break; } c = nextChar(); } break; case IN_ATT_NAME: while (true) { if (c == '=') { attName = buffer.toString(); mState = IN_ATT_VALUE; buffer.clear(); break; } else { buffer.append((char) c); } c = nextChar(); } break; case IN_ATT_VALUE: // It should start with a quote char (single or double quote) if (quoteChar == -1) { if (c == '"' || c == '\'') { quoteChar = c; c = nextChar(); } else if (! mLenientHTMLParsing) { throw new SAXException("Improper attribute construction. Expected a quote character!" + mLineCount); } } while (true) { if (c == quoteChar || (quoteChar == -1 && "> \t\r\n".contains(((char)c) + ""))) { String attValue = buffer.toString(); //debugMsg("attName: '" + attName + "'"); if (attName.startsWith("xmlns:") || attName.equals(XMLNS)) { extractNamespaceDeclaration(attName, attValue); } attributes.addAttribute("", attName, attName, "CDATA", attValue); buffer.clear(); quoteChar = -1; if (c == '>') { tagURI = getNamespaceURI(getNamespacePrefix(tagName)); tagLocalName = getLocalName(tagName); mContentHandler.startElement(tagURI, tagLocalName, tagName, attributes); mState = IN_CONTENT; } else { mState = IN_MIDDLE_OF_START_TAG; } break; } else if (c == ENTITY_START) { pushState(); mState = IN_ENTITY; entity.clear(); break; } else if (" \r\n\u0009".indexOf(c) >= 0) { buffer.append(' '); } else { buffer.append((char) c); } c = nextChar(); } break; case IN_EMPTY_TAG: if (c != TAG_END) { throw new SAXException("Expected '>' for tag <" + tagName + "/>" + " Line: " + mLineCount + " Col: " + mColumn); } tagURI = getNamespaceURI(getNamespacePrefix(tagName)); tagLocalName = getLocalName(tagName); mContentHandler.startElement(tagURI, tagLocalName, tagName, attributes); mContentHandler.endElement(tagURI, tagLocalName, tagName); mTagStack.pop(); buffer.clear(); popDepth(); popState(); break; case IN_END_TAG: // The ending tag better be the one currently open. // (Otherwise it's an error.) Try to take advantage of this precognition. skipEndTag(c); tagName = (String) mTagStack.pop(); mContentHandler.endElement(tagURI, getLocalName(tagName), tagName); buffer.clear(); popDepth(); popState(); break; case IN_ENTITY: boolean recoverFromInvalidEntity = false; while (true) { if (c == ';') { String expandedEntity = expandEntity(entity.toString()); buffer.append((expandedEntity != null ? expandedEntity : "&" + entity.toString() + ";")); entity.clear(); popState(); break; } else if (Character.isWhitespace((char) c)) { if (mStrict) { throw new SAXException("Character entity contains whitespace !?" + " Line: " + mLineCount + " Col: " + mColumn); } else { recoverFromInvalidEntity = true; break; } } else if (c == '"' || c == '\'' || c == '>') { if (mStrict) { throw new SAXException("Character entity contains illegal character !?" + " Line: " + mLineCount + " Col: " + mColumn); } else { recoverFromInvalidEntity = true; break; } } else if (entity.length() >= MAX_ENTITY_LENGTH) { // Either the entity was malformed or the ampersand should have been escaped. if (mStrict) { throw new SAXException("Max entity length (" + MAX_ENTITY_LENGTH + ") exceeded!?" + " Line: " + mLineCount + " Col: " + mColumn); } else { recoverFromInvalidEntity = true; break; } } else { entity.append((char) c); } c = nextChar(); } if (recoverFromInvalidEntity) { entity.append((char) c); buffer.append("&" + entity.toString()); entity.clear(); popState(); if (IN_ATT_VALUE == mState) { if (c == quoteChar) { String attValue = buffer.toString(); //debugMsg("attName: '" + attName + "'"); if (attName.startsWith("xmlns:") || attName.equals(XMLNS)) { extractNamespaceDeclaration(attName, attValue); } attributes.addAttribute("", attName, attName, "CDATA", attValue); buffer.clear(); quoteChar = -1; mState = IN_MIDDLE_OF_START_TAG; break; } } else if (IN_CONTENT == mState) { if (c == TAG_START) { pushState(); mState = IN_START_OF_TAG; if (buffer.length() > 0) { if (buffer.hasNonwhitespaceContent()) { mContentHandler.characters(buffer.getCharArray(), 0, buffer.length()); } buffer.clear(); } } } } break; case IN_CDATA: while (true) { if (c == TAG_END && buffer.toString().endsWith("]]")) { buffer.setLength(buffer.length() - 2); mContentHandler.characters(buffer.getCharArray(), 0, buffer.length()); if (mLexicalHandler != null) mLexicalHandler.endCDATA(); popState(); buffer.clear(); break; } else { buffer.append((char) c); } c = nextChar(); } break; case IN_COMMENT: while (true) { // Inside a comment: if (c == TAG_END && buffer.toString().endsWith("--")) { if (mLexicalHandler != null) mLexicalHandler.comment(buffer.getCharArray(), 2, buffer.length() - 4); popState(); buffer.clear(); break; } else { buffer.append((char) c); } c = nextChar(); } break; case IN_XML_DECLARATION: while (true) { buffer.append((char) c); if (c == TAG_END) { popState(); if (mState == DONE) mState = INITIAL; buffer.clear(); break; } c = nextChar(); } break; case IN_DOCTYPE: while (true) { buffer.append((char) c); if (c == TAG_END) { popState(); if (mState == DONE) mState = INITIAL; mDoctype = Doctype.valueOf(buffer.toString()); buffer.clear(); break; } c = nextChar(); } break; } } } catch (EOFException e) { // Ignore. } catch (SAXException e) { throw e; } catch (Exception e) { throw new SAXException("Problem parsing XML! Line: " + mLineCount + " Col: " + mColumn, e); } if (mDepth != 0) { StringBuilderPlus msg = new StringBuilderPlus("XML Document is not properly ended! Remaining unclosed tags: " + mTagStack.pop()).setDelimiter(", "); while (mTagStack.size() > 0) { msg.delimitedAppend(mTagStack.pop()); } throw new SAXException(msg.toString()); } else if (mState != INITIAL && mState != DONE) { throw new SAXException("Parser ended in bad state (" + mState + ")!"); } else { mContentHandler.endDocument(); } } //########################################################################### // PRIVATE METHODS //########################################################################### //--------------------------------------------------------------------------- private void debugMsg(String inMsg) { if (debug) { System.err.println(inMsg); } } //--------------------------------------------------------------------------- private String expandEntity(String inEntity) throws SAXException { String expandedEntity = null; if (mExpandEntities) { if (inEntity.startsWith("#")) { if (inEntity.charAt(1) == 'x') { // Hex expandedEntity = "" + (char) Integer.parseInt(inEntity.substring(2), 16); } else { // Decimal expandedEntity = "" + (char) Integer.parseInt(inEntity.substring(1)); } } else { // Try to resolve it with our battery of standard entity classes. expandedEntity = SpecialCharacterEntities.resolveEntity(inEntity); if (null == expandedEntity) { expandedEntity = Latin1Entities.getInstance().getNumericEntity(inEntity); } if (null == expandedEntity) { expandedEntity = SymbolEntities.getInstance().getNumericEntity(inEntity); } if (expandedEntity != null && expandedEntity.startsWith("#")) { expandedEntity = "" + (char) Integer.parseInt(expandedEntity.substring(1)); } } // Have we resolved it yet? If not, use the custom entity resolver if one // has been specified. if (null == expandedEntity) { if (mEntityResolver != null) { InputStream inStream = null; try { InputSource expandedEntitySource = mEntityResolver.resolveEntity(null, inEntity); if (expandedEntitySource != null) { inStream = expandedEntitySource.getByteStream(); StringBuffer entityBuffer = new StringBuffer(); int c = 0; while ((c = inStream.read()) != -1) { entityBuffer.append((char) c); } expandedEntity = entityBuffer.toString(); } else { throw new SAXException("'" + inEntity + "' is not a recognized entity."); } } catch (IOException e) { throw new SAXException(e); } finally { if (inStream != null) { try { inStream.close(); } catch (IOException e) { } } } } } } return expandedEntity; } //--------------------------------------------------------------------------- private Attributes generateAttributeList(HashMap inAttributes) { // NOTE: WE ARE NOT CURRENTLY SUPPORTING ATTRIBUTE TYPES XXXXXXXXXXXXXX // The attribute type is one of the strings "CDATA", "ID", "IDREF", "IDREFS", // "NMTOKEN", "NMTOKENS", "ENTITY", "ENTITIES", or "NOTATION" (always in upper case). // // If the parser has not read a declaration for the attribute, or if the parser // does not report attribute types, then it must return the value "CDATA" as // stated in the XML 1.0 Recommentation (clause 3.3.3, "Attribute-Value Normalization"). // // For an enumerated attribute that is not a notation, the parser will report // the type as "NMTOKEN". String attType = "CDATA"; AttributesImpl attList = new AttributesImpl(); if (inAttributes != null) { Iterator iter = inAttributes.entrySet().iterator(); while (iter.hasNext()) { Map.Entry att = (Map.Entry) iter.next(); attList.addAttribute("", (String) att.getKey(), (String) att.getKey(), attType, (String) att.getValue()); } } return attList; } //--------------------------------------------------------------------------- private void initNamespaceStructures() { mNamespaceMap = new HashMap<>(); mDefaultNamespaceStack = null; } //--------------------------------------------------------------------------- private void pushState() { mStateStack.push(Integer.valueOf(mState)); //debugMsg(mState + " pushed onto the StateStack"); } //--------------------------------------------------------------------------- private void popState() { mState = ((Integer) mStateStack.pop()).intValue(); //debugMsg(mState + " popped off the StateStack"); } //--------------------------------------------------------------------------- private void pushDepth(String inTagName) { mDepth++; //debugMsg("Depth pushed to " + mDepth); mTagStack.push(inTagName); } //--------------------------------------------------------------------------- private void popDepth() throws SAXException { //debugMsg("Depth popped to " + (mDepth - 1)); if (mNamespaceMap.size() > 0) { // Can't use an iterator because we might delete the map entry and that would // cause a ConcurrentModificaitonException. Object[] prefixes = mNamespaceMap.keySet().toArray(); for (int i = 0; i < prefixes.length; i++) { String prefix = (String) prefixes[i]; Stack namespaceStack = (Stack) mNamespaceMap.get(prefix); // There must be at least one value on the stack or we should have deleted // the map entry for the prefix. Namespace namespace = (Namespace) namespaceStack.peek(); if (namespace.getTagLevel() >= mDepth) { namespaceStack.pop(); if (namespaceStack.size() == 0) { mNamespaceMap.remove(prefix); } } } } // Adjust the default namespace stack if necessary. if (mDefaultNamespaceStack != null && mDefaultNamespaceStack.size() > 0) { Namespace namespace = (Namespace) mDefaultNamespaceStack.peek(); if (namespace.getTagLevel() >= mDepth) { mDefaultNamespaceStack.pop(); } } mDepth--; /* String openTag = (String) mTagStack.pop(); if (!inTagName.equals(openTag)) { throw new SAXException("Tag mismatch! Tag '" + openTag + "' was open " + "but the closing tag was '" + inTagName + "'!"); } */ if (mDepth == 0) popState(); } //--------------------------------------------------------------------------- private void extractNamespaceDeclaration(String inName, String inURI) { int index = inName.indexOf(":"); if (index > 0) { String prefix = inName.substring(index + 1); //debugMsg("Adding namespace '" + prefix + "': " + inURI); Stack namespaceStack = (Stack) mNamespaceMap.get(prefix); if (null == namespaceStack) { namespaceStack = new Stack(); mNamespaceMap.put(prefix, namespaceStack); } namespaceStack.push(new Namespace(inURI, mDepth)); XMLNamespace.getNamespace(prefix, inURI); // Register the namespace } else { //debugMsg("Setting default namespace: " + inURI); if (null == mDefaultNamespaceStack) { mDefaultNamespaceStack = new Stack(); } mDefaultNamespaceStack.push(new Namespace(inURI, mDepth)); XMLNamespace.getNamespace(inURI); // Register the namespace } } //--------------------------------------------------------------------------- private String getNamespaceURI(String inPrefix) { String uri = null; if (inPrefix != null) { Stack namespaceStack = (Stack) mNamespaceMap.get(inPrefix); if (namespaceStack != null) { uri = ((Namespace) namespaceStack.peek()).getURI(); } } if (null == uri && mDefaultNamespaceStack != null && mDefaultNamespaceStack.size() > 0) { uri = ((Namespace)mDefaultNamespaceStack.peek()).getURI(); } if (null == uri) uri = ""; return uri; } //--------------------------------------------------------------------------- private String getNamespacePrefix(String inValue) { String returnValue = null; int i = inValue.indexOf(":"); if (i > 0) { returnValue = inValue.substring(0, i); } return returnValue; } //--------------------------------------------------------------------------- private String getLocalName(String inValue) { String returnValue = inValue; int i = inValue.indexOf(":"); if (i > 0) { returnValue = inValue.substring(i + 1); } return returnValue; } //########################################################################### // INNER CLASSES //########################################################################### private class EOFException extends RuntimeException { } private class Namespace { private String mURI; private int mTagLevel; //------------------------------------------------------------------------ public Namespace(String inURI, int inTagLevel) { mURI = inURI; mTagLevel = inTagLevel; } //------------------------------------------------------------------------ public String getURI() { return mURI; } //------------------------------------------------------------------------ public int getTagLevel() { return mTagLevel; } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy