com.adobe.xfa.SaxHandler Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of aem-sdk-api Show documentation
The Adobe Experience Manager SDK
There is a newer version: 2024.11.18751.20241128T090041Z-241100
/*
 * ADOBE CONFIDENTIAL
 *
 * Copyright 2005 Adobe Systems Incorporated All Rights Reserved.
 *
 * NOTICE: All information contained herein is, and remains the property of
 * Adobe Systems Incorporated and its suppliers, if any. The intellectual and
 * technical concepts contained herein are proprietary to Adobe Systems
 * Incorporated and its suppliers and may be covered by U.S. and Foreign
 * Patents, patents in process, and are protected by trade secret or copyright
 * law. Dissemination of this information or reproduction of this material
 * is strictly forbidden unless prior written permission is obtained from
 * Adobe Systems Incorporated.
 */
package com.adobe.xfa;

import java.util.ArrayList;
import java.util.List;

import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.ErrorHandler;
import org.xml.sax.ext.LexicalHandler;
import org.xml.sax.Locator;
import org.xml.sax.SAXParseException;

import com.adobe.xfa.Model.DualDomModel;
import com.adobe.xfa.content.ExDataValue;
import com.adobe.xfa.ut.ExFull;
import com.adobe.xfa.ut.FindBugsSuppress;
import com.adobe.xfa.ut.ResId;


/**
 *
 * @exclude from published api -- Mike Tardif, May 2006.
 */
public final class SaxHandler implements ContentHandler, LexicalHandler, ErrorHandler {
	private final Document mDoc;
	private final String mFileName;
	private final AppModel mAppModel;
	
	private Model mCurrentModel;
	private Element[] mParentStack = new Element[16];
	private int mParentDepth;
	private Node mPreviousSibling;
	
	private Element mStartingParent;
	private boolean mbIgnoreNextElement;
	
	private Locator mLocator;
	
	private Generator mGenerator;
	
	private char[] mCharBuffer = new char[128];
	private int mCharCount;
	
	private final List mLoadedModelsAndPackets = new ArrayList();
	
	/**
	 * A cache of whitespace nodes that we expect to see frequently in formatted 
	 * XML (newline, spaces).
	 * Note that there is no attempt to synchronize this cache since multiple 
	 * initialization is not a problem. 
	 */
	private static String[] whitespaceCache = new String[32];
	
	
	SaxHandler(Document d) {
		mDoc = d;
		mFileName = d.getParseFileName();		
		mAppModel = d.getAppModel();
	}
	

	/**
	 * Sets the context in an existing model where parsed content is to be loaded.
	 * 
	 * @param startingModel the model used to create and validate nodes for
	 * 			the document being parsed.
	 * @param startingParent the existing Element that content from the
	 * 			document being parsed will be appended to. 
	 * @param bIgnoreAggregating if true, the root node of the document being
	 * 			parsed is ignored and its children are appended to startingParent.
	 */
	void setContext(Model startingModel, Element startingParent, boolean bIgnoreAggregating) {
		
		// mbIgnoreNextElement can be true when doing a Model.loadXMLImpl().
		
		assert startingParent != null;
		
		// We would normally expect startingModel to be non-null, but it can
		// be null in the case where we are using this class to load some
		// generic XML via Document.loadIntoDocument.
		//assert startingModel != null;
	
		mCurrentModel = startingModel;
		mStartingParent = startingParent;
		mbIgnoreNextElement = bIgnoreAggregating;
	}
	
	// ------------------------------------------------------------------------
	// interface org.xml.sax.ContentHandler

	public void setDocumentLocator(Locator locator) {
		mLocator = locator;
	}
	
	public void startDocument() {
		pushParent(mDoc);
		
		mDoc.setWillDirty(false);
	}

	public void endDocument() {
		
		// AppModel.postLoad() only deals with packet filtering, so it should
		// be safe to call postLoad on it multiple times.
		if (mAppModel != null)
			mAppModel.postLoad();
		
		// Call postLoad() on each Model that was loaded during this parse.
		// We can't defer this to AppModel since it might call postLoad on
		// a model that was loaded previously.
		for (int i = 0; i < mLoadedModelsAndPackets.size(); i++) {
			Element element = mLoadedModelsAndPackets.get(i);
			
			if (element instanceof Model) {
				
				Model model = (Model)element;
			
				model.disposeSymbolTable();
				
				// JavaPort: The following logic is from XFAModelImpl::loadNode
				
				// Strip any unnecessary white space before processing the node
				// We do not strip white space if any children that are text nodes 
				// have non-whitespace values.
				
				boolean stripWhiteSpace = true;
				
				Element node = model instanceof DualDomModel ? (Element)model.getXmlPeer() : model;
				
				for (Node child = model.getFirstXMLChild(); child != null; child = child.getNextXMLSibling()) {
					if (child instanceof Chars && !((Chars)child).isXMLSpace()) {
						stripWhiteSpace = false;
						break;				
					}
				}
				
				if (stripWhiteSpace)
					node.removeWhiteSpace();
				
				mDoc.declareXFAId(model.getNSInternal(), XFA.ID);
				mDoc.indexSubtree(model, false);
				
				mDoc.setWillDirty(true);
				model.postLoad();
				mDoc.setWillDirty(false);
			}
			else {
				
				// Packet - still need to do ID indexing
				mDoc.declareXFAId(element.getNSInternal(), XFA.ID);
				mDoc.indexSubtree(element, false);
			}
		}
		
		mDoc.setWillDirty(true);
	}
	
	public void startPrefixMapping(String prefix, String uri) {
	}

	public void endPrefixMapping(String prefix) {
	}
	
	@FindBugsSuppress(code="ES")
	public void startElement(
			String uri, 
			String localName, 
			String qName,
			Attributes attributes) {

		flushCharacters();

		Element parent;

		if (mStartingParent != null) {
			pushParent(mStartingParent);
			parent = mStartingParent;
			mPreviousSibling = mStartingParent.getLastXMLChild();
			mStartingParent = null;
			
			if (mCurrentModel != null)
				mLoadedModelsAndPackets.add(mCurrentModel);
			
			if (mbIgnoreNextElement) {
				mbIgnoreNextElement = false;
				return;
			}
		}
		else {
			parent = peekParent();
		}


		int nLineNumber = mLocator.getLineNumber();
		Element e;
		
		validateIdUniqueness(uri, localName, attributes);

		if (mCurrentModel == null) {

			if (mParentDepth == 1) { // [ #document ]
				 
				if (mAppModel.isXFANode(uri, localName, qName)) {
				
					// Create an XML peer attached to the current Document
					e = new ModelPeer(
							mDoc, mPreviousSibling,
							uri, localName, qName,
							attributes,
							mAppModel);
					
					// In XFA4J a Document and xfa peer are created whenever an AppModel
					// is created, whereas in C++, they are only created as needed.
					// If the AppModel is owned by a default document then the AppModel's
					// peer should be replaced by the one just parsed.
					if (mAppModel.getOwnerDocument().isDefaultDocument()) {
						mAppModel.setXmlPeer(e);
						mAppModel.setDocument(mDoc);
					}
					else {
						// Copy any uuid and timestamp attributes to the existing peer of the AppModel.
						// This seems strange, but it reproduces the behaviour in XFAAppModelImpl::add.
						
						int index = e.findAttr("", XFA.UUID);
						if (index != -1)
							mAppModel.setAttribute(new StringAttr(XFA.UUID, e.getAttrVal(index)), XFA.UUIDTAG);
						
						index = e.findAttr("", XFA.TIMESTAMP);
						if (index != -1)
							mAppModel.setAttribute(new StringAttr(XFA.TIMESTAMP, e.getAttrVal(index)), XFA.TIMESTAMPTAG);
					}
				}
				else {
					
					try {
						// The one exceptional case is when we haven't
						// parsed an xfa/xdp node yet, we can accept a third-party XML
						// node. DataModelFactory needs to determine whether an xfa/xdp node
						// has been parsed in this document load yet to make the choice. The
						// AppModel.getAllowThirdPartyXml() property provides a back-channel 
						// to communicate this information.				
						mAppModel.setAllowThirdPartyXml(true);
						
						e = createModelOrPacket(parent, uri, localName, qName, attributes, nLineNumber);
					}
					finally {
						mAppModel.setAllowThirdPartyXml(false);
					}
				}
			}
			else if (mParentDepth == 2 && parent instanceof ModelPeer) { // [ #document, AppModel ]
				
				e = createModelOrPacket(parent, uri, localName, qName, attributes, nLineNumber);
			}			
			else {
				// Any element at this point must be plain XML. This element could be:
				//	- the contents of a Packet
				//	- top-level plain XML, or its children, or
				//	- the XML contents of a DataModel.
				
				assert parent.getClassTag() == XFA.INVALID_ELEMENT || parent.getClassTag() == XFA.PACKETTAG;
				
				e = new Element(parent, mPreviousSibling, uri, localName, qName, attributes, XFA.INVALID_ELEMENT, "");
			}
			
			e.setLineNumber(nLineNumber);
		} 
		else {

			if (parent instanceof ExDataValue) {
				// exData is a special case since we can't validate it's children at parse time.
				// Any element children of exData (in a namespace other than the template) are created as generic nodes, and may
				// be promoted to #xHTML or #xml in TemplateModel.doLoadNode.
				e = new Element(parent, mPreviousSibling, uri, localName, qName, attributes, XFA.INVALID_ELEMENT, "");
				e.setLineNumber(nLineNumber);
			}
			else {			
				e = mCurrentModel.createElement(parent, mPreviousSibling, uri, localName,
						qName, attributes, nLineNumber, mFileName);
			}
		}

		e.setDocument(mDoc);
		
		pushParent(e);
		mPreviousSibling = null;
	}
	
	private void validateIdUniqueness(String aElementNameSpaceURI, String aElementLocalName, Attributes attributes) {
		
		// If uniquifying IDs, just pass them through and the process of indexing
		// will uniquify any id attributes with duplicate values.
		if (mDoc.uniquifyIDsOnParse())
			return;
		
		for (int i = 0; i < attributes.getLength(); i++) {
			if (mDoc.isId(aElementNameSpaceURI, aElementLocalName, attributes.getURI(i), attributes.getLocalName(i))) {
				// verify that no other Id shares the same value
				if (mDoc.idValueInUse(attributes.getValue(i)))
					throw new ExFull(ResId.DOM_DUPLICATE_ID_ERR);
			}
		}
	}
	
	private Element createModelOrPacket(
			Element xmlParent, 
			String uri, String localName, String qName, 
			Attributes attributes, int nLineNumber) {
		
		Element e = mAppModel.createElement(mAppModel, null, 
				uri, localName, qName, 
				attributes, nLineNumber, mFileName);
		
		Element xmlPeer = (Element)((Element.DualDomNode)e).getXmlPeer();
		
		e.setDocument(mDoc);
		xmlPeer.setDocument(mDoc);
		
		if (e instanceof Model) {
			
			mCurrentModel = (Model)e;
			mLoadedModelsAndPackets.add(mCurrentModel);
			
			// In XFA4J, parsing the document and loading are mostly combined into
			// a single step, so mark the model as loading during parsing.
			mCurrentModel.isLoading(true);
			
			mCurrentModel.initializeSymbolTable();
			
			mCurrentModel.setGenerator(mGenerator);
		} 
		else {
			assert e instanceof Packet;
		}
		
		if (e instanceof DualDomModel) {
			// The ModelFactory will already have created an XML peer,
			// but we need to wire it into the Document.
			// The remainder of this model will be parsed into the XML side.			
			xmlParent.appendChild(xmlPeer);
			
			// Element.appendChild will have set the ModelPeer's model to the
			// parent's (AppModel) model. This is wrong, so fix it.
			xmlPeer.setModel((Model)e);
			
			e = xmlPeer;
		}
		else {
			// The methods for creating a Model will create the XFA side of the model
			// correctly, but they don't pass in the XML parent, so we may need to fix it.
			if (xmlPeer.getXMLParent() != null && xmlPeer.getXMLParent() != xmlParent) {				
				xmlParent.appendChild(xmlPeer);
			}
		}
				
		return e;
	}
	
	public void endElement(String uri, String localName, String qName) {
		
		flushCharacters();
		
		mPreviousSibling = popParent();
		
	  	if (mPreviousSibling instanceof Model || mPreviousSibling instanceof ModelPeer) {
	  		Model model = mPreviousSibling.getModel();
	  		
	  		// We aren't parsing this model anymore, so mark the model as not
			// loading. The model might be put in a loading state again during 
	  		// postLoad processing.
			model.isLoading(false);
			
			// While parsing the Model, we might have been appending to the
			// XFA or XML DOM, but now that we are done, ensure that we are back
			// to appending to the XML DOM.
			mPreviousSibling = model.getXmlPeer();
				
			mCurrentModel = null;
	  	}
	  	else if (mPreviousSibling instanceof Packet) {
	  		mPreviousSibling = ((Packet)mPreviousSibling).getXmlPeer();
	  	}
	}

	public void characters(char[] ch, int start, int length) {
		//
		// Don't load any content until we've skipped the next element.
		//
		if (mbIgnoreNextElement)
			return;

		if (length == 0)
			return;
		
		if (length > mCharBuffer.length - mCharCount) {
			int requiredLength = mCharCount + length;
			int newCapacity = mCharBuffer.length;
			
			do {
				newCapacity *= 2;
				if (newCapacity < 0)
					newCapacity = Integer.MAX_VALUE;
			}
			while (newCapacity < requiredLength);
			
			char[] newCharBuffer = new char[newCapacity];
			System.arraycopy(mCharBuffer, 0, newCharBuffer, 0, mCharCount);
			mCharBuffer = newCharBuffer;
		}
		
		System.arraycopy(ch, start, mCharBuffer, mCharCount, length);
		mCharCount += length;
	}
	
    private void flushCharacters() {
    	
    	if (mCharCount == 0)
    		return;
    	
    	Element parent = peekParent();
    	
    	if (parent instanceof AppModel) {
    		mPreviousSibling = new Chars(parent, mPreviousSibling, mCharBuffer, 0, mCharCount);
    		mCharCount = 0;
    		return;
    	}
    	
    	TextNode t;
    	
    	if (mCurrentModel != null && parent != null && parent.processTextChildrenDuringParse()) {
			
			// Validate the #text node as a valid child.
			// This check is bypassed for those parts of the schema that cannot be resolved at parse time,
			// but must wait until the entire tree is loaded before any text node processing can be done.
			
			// Any whitespace that appear within a parent that doesn't allow
			// whitespace is considered to be ignorable whitespace.
			// In C++, this check is deferred to XFAModelImpl::preLoadNode,
			// but here we avoid creating the whitespace nodes early on.
			
			ChildReln childReln = parent.getChildReln(XFA.TEXTNODETAG);
			if (childReln == null && Chars.isXMLSpace(0, mCharCount, mCharBuffer)) {
				mCharCount = 0;
				return;
			}
			
			// For elements that allow a single text node, consolidate all text nodes
			// into the first text node. In C++, this is done in preLoadNode().
			
			if (childReln != null && childReln.getMax() == 1) {
				
				for (Node child = parent.getFirstXFAChild(); child != null; child = child.getNextXFASibling()) {
					if (child instanceof TextNode) {
						TextNode firstTextNode = (TextNode) child;
						StringBuilder sText = new StringBuilder(firstTextNode.getText());
						sText.append(mCharBuffer, 0, mCharCount);
						firstTextNode.setText(sText.toString());
						mCharCount = 0;
						return;
					}
				}				 
			}
		
			try {
				if (!parent.isValidChild(XFA.TEXTNODETAG, ResId.InvalidChildAppendException, true, false)) {
					mCharCount = 0;
					return;
				}
			}
			catch (ExFull ex) {
				mCurrentModel.addErrorList(ex, LogMessage.MSG_VALIDATION_ERROR, parent);
				mCharCount = 0;
				return;
			}
			
			t = mCurrentModel.createTextNode(parent, mPreviousSibling, mCharBuffer, 0, mCharCount);
		} 
		else {
			
			t = new TextNode(parent, mPreviousSibling, getTextNodeStringFromCharBuffer());
		}
		
    	
		mCharCount = 0;
		mPreviousSibling = t;
    }
    
    /**
     * Gets the String value from the characters currently in the character buffer.
     * If the character buffer contains one of the commonly-occurring whitespace
     * sequences, a cached String value is used instead of creating a new String.
     * 
     * This will only optimize heap space use for the case where the document
     * being loaded is formatted XML. The documents we will load from a PDF will
     * typically be formatted using simple format, so we won't see the whitespace
     * nodes at all, and this optimization won't apply.
     * 
     * @return a String value from the characters currently in the character buffer.
     */
    private String getTextNodeStringFromCharBuffer() {
    	
    	assert mCharCount > 0;
    	
    	if (isCacheableWhitespace()) {
			if (mCharCount >= whitespaceCache.length) {
				String[] newCache = new String[whitespaceCache.length * 2];
				System.arraycopy(whitespaceCache, 0, newCache, 0, whitespaceCache.length);
				whitespaceCache = newCache;
			}
			
			String result = whitespaceCache[mCharCount];
			if (result == null) {
				result = new String(mCharBuffer, 0, mCharCount);
				whitespaceCache[mCharCount] = result;
			}
			
			return result;
    	}
    	
    	return new String(mCharBuffer, 0, mCharCount);
    }
    
    /**
     * Determines if the character buffer contains a newline followed by zero or
     * more spaces.
     * 
     * Because of pretty-printing of XML documents, we end up seeing a lot of
     * text nodes that start with a newline and are followed by zero or more
     * spaces. Typically, they will outnumber non-whitespace nodes by 8:1.
     * @return true if the character buffer contains a cacheable
     * whitespace string.
     */
    private boolean isCacheableWhitespace() {
    	
    	char[] buffer = mCharBuffer;    	
    	
    	if (buffer[0] != '\n')
    		return false;
    	
    	// Are all characters following the initial newline blanks?
    	for (int i = mCharCount - 1; i > 0; i--)
    		if (buffer[i] != ' ')
    			return false;
    	
    	return true;
    }
    
    public void ignorableWhitespace(char[] ch, int start, int length) {
	}

	public void processingInstruction(String target, String data) {
		
		flushCharacters();
		
		Element parent = peekParent();
		
		ProcessingInstruction pi = new ProcessingInstruction(parent, mPreviousSibling, target, data);
		
		if (pi.getName() == XFA.XFA) {
			mGenerator = new Generator(pi);
		}
		
		if (mCurrentModel != null)
			mCurrentModel.preLoadNode(parent, pi, mGenerator);
		
		mPreviousSibling = pi;
	}

	public void skippedEntity(String name) {
	}			
	
	// ------------------------------------------------------------------------
	// interface org.xml.sax.ext.LexicalHandler
	
	public void startDTD(String name, String publicId, String systemId) {
	}

	public void endDTD() {
	}

	public void startEntity(String name) {
	}

	public void endEntity(String name) {
	}

	public void startCDATA() {
	}

	public void endCDATA() {
	}

	public void comment(char[] ch, int start, int length) {
		
		flushCharacters();
	
		Element parent = peekParent();
		String data = new String(ch, start, length);
		
		Comment c = new Comment(parent, mPreviousSibling, data);
		
		if (mCurrentModel != null)
			mCurrentModel.preLoadNode(parent, c, mGenerator);
		
		mPreviousSibling = c;
	}
	
	// ------------------------------------------------------------------------
	// interface org.xml.sax.ErrorHandler
	
	public void warning(SAXParseException exception) {
		throw createXFAException (exception);
    }

    public void error(SAXParseException exception) {
		throw createXFAException (exception);
    }

    public void fatalError(SAXParseException exception) {
		throw createXFAException (exception);
    }
    
    // ------------------------------------------------------------------------
    // Parent Element stack
    
    private void pushParent(Element e) {
    	if (mParentDepth == mParentStack.length) {
			Element[] newStack = new Element[mParentStack.length * 2];
			System.arraycopy(mParentStack, 0, newStack, 0, mParentDepth);
			mParentStack = newStack;
		}
    	
		mParentStack[mParentDepth++] = e;
    }
    
    private Element peekParent() {
    	if (mParentDepth == 0) return null;
    	
    	return mParentStack[mParentDepth - 1];
    }
    
    private Element popParent() {
    	Element e = mParentStack[mParentDepth - 1];
    	mParentStack[mParentDepth - 1] = null;
    	mParentDepth--;
    	return e;
    }

    private static ExFull createXFAException (SAXParseException inException) {
    	return new ExFull (ResId.EXPAT_ERROR, inException.getMessage());
    }
}