org.apache.xml.dtm.ref.sax2dtm.SAX2RTFDTM Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* $Id: SAX2RTFDTM.java 468653 2006-10-28 07:07:05Z minchau $
*/
package org.apache.xml.dtm.ref.sax2dtm;
import javax.xml.transform.Source;
import org.apache.xml.dtm.DTM;
import org.apache.xml.dtm.DTMManager;
import org.apache.xml.dtm.DTMWSFilter;
import org.apache.xml.utils.IntStack;
import org.apache.xml.utils.IntVector;
import org.apache.xml.utils.StringVector;
import org.apache.xml.utils.XMLStringFactory;
import org.xml.sax.SAXException;
/**
* This is a subclass of SAX2DTM which has been modified to meet the needs of
* Result Tree Frameworks (RTFs). The differences are:
*
* 1) Multiple XML trees may be appended to the single DTM. This means
* that the root node of each document is _not_ node 0. Some code has
* had to be deoptimized to support this mode of operation, and an
* explicit mechanism for obtaining the Node Handle of the root node
* has been provided.
*
* 2) A stack of these documents is maintained, allowing us to "tail-prune" the
* most recently added trees off the end of the DTM as stylesheet elements
* (and thus variable contexts) are exited.
*
* PLEASE NOTE that this class may be _heavily_ dependent upon the
* internals of the SAX2DTM superclass, and must be maintained in
* parallel with that code. Arguably, they should be conditionals
* within a single class... but they have deen separated for
* performance reasons. (In fact, one could even argue about which is
* the superclass and which is the subclass; the current arrangement
* is as much about preserving stability of existing code during
* development as anything else.)
*
* %REVIEW% In fact, since the differences are so minor, I think it
* may be possible/practical to fold them back into the base
* SAX2DTM. Consider that as a future code-size optimization.
* */
public class SAX2RTFDTM extends SAX2DTM
{
/** Set true to monitor SAX events and similar diagnostic info. */
private static final boolean DEBUG = false;
/** Most recently started Document, or null if the DTM is empty. */
private int m_currentDocumentNode=NULL;
/** Tail-pruning mark: Number of nodes in use */
IntStack mark_size=new IntStack();
/** Tail-pruning mark: Number of data items in use */
IntStack mark_data_size=new IntStack();
/** Tail-pruning mark: Number of size-of-data fields in use */
IntStack mark_char_size=new IntStack();
/** Tail-pruning mark: Number of dataOrQName slots in use */
IntStack mark_doq_size=new IntStack();
/** Tail-pruning mark: Number of namespace declaration sets in use
* %REVIEW% I don't think number of NS sets is ever different from number
* of NS elements. We can probabably reduce these to a single stack and save
* some storage.
* */
IntStack mark_nsdeclset_size=new IntStack();
/** Tail-pruning mark: Number of naespace declaration elements in use
* %REVIEW% I don't think number of NS sets is ever different from number
* of NS elements. We can probabably reduce these to a single stack and save
* some storage.
*/
IntStack mark_nsdeclelem_size=new IntStack();
/**
* Tail-pruning mark: initial number of nodes in use
*/
int m_emptyNodeCount;
/**
* Tail-pruning mark: initial number of namespace declaration sets
*/
int m_emptyNSDeclSetCount;
/**
* Tail-pruning mark: initial number of namespace declaration elements
*/
int m_emptyNSDeclSetElemsCount;
/**
* Tail-pruning mark: initial number of data items in use
*/
int m_emptyDataCount;
/**
* Tail-pruning mark: initial number of characters in use
*/
int m_emptyCharsCount;
/**
* Tail-pruning mark: default initial number of dataOrQName slots in use
*/
int m_emptyDataQNCount;
public SAX2RTFDTM(DTMManager mgr, Source source, int dtmIdentity,
DTMWSFilter whiteSpaceFilter,
XMLStringFactory xstringfactory,
boolean doIndexing)
{
super(mgr, source, dtmIdentity, whiteSpaceFilter,
xstringfactory, doIndexing);
// NEVER track source locators for RTFs; they aren't meaningful. I think.
// (If we did track them, we'd need to tail-prune these too.)
//org.apache.xalan.processor.TransformerFactoryImpl.m_source_location;
m_useSourceLocationProperty=false;
m_sourceSystemId = (m_useSourceLocationProperty) ? new StringVector()
: null;
m_sourceLine = (m_useSourceLocationProperty) ? new IntVector() : null;
m_sourceColumn = (m_useSourceLocationProperty) ? new IntVector() : null;
// Record initial sizes of fields that are pushed and restored
// for RTF tail-pruning. More entries can be popped than pushed, so
// we need this to mark the primordial state of the DTM.
m_emptyNodeCount = m_size;
m_emptyNSDeclSetCount = (m_namespaceDeclSets == null)
? 0 : m_namespaceDeclSets.size();
m_emptyNSDeclSetElemsCount = (m_namespaceDeclSetElements == null)
? 0 : m_namespaceDeclSetElements.size();
m_emptyDataCount = m_data.size();
m_emptyCharsCount = m_chars.size();
m_emptyDataQNCount = m_dataOrQName.size();
}
/**
* Given a DTM, find the owning document node. In the case of
* SAX2RTFDTM, which may contain multiple documents, this returns
* the most recently started document, or null if the DTM is
* empty or no document is currently under construction.
*
* %REVIEW% Should we continue to report the most recent after
* construction has ended? I think not, given that it may have been
* tail-pruned.
*
* @return int Node handle of Document node, or null if this DTM does not
* contain an "active" document.
* */
public int getDocument()
{
return makeNodeHandle(m_currentDocumentNode);
}
/**
* Given a node handle, find the owning document node, using DTM semantics
* (Document owns itself) rather than DOM semantics (Document has no owner).
*
* (I'm counting on the fact that getOwnerDocument() is implemented on top
* of this call, in the superclass, to avoid having to rewrite that one.
* Be careful if that code changes!)
*
* @param nodeHandle the id of the node.
* @return int Node handle of owning document
*/
public int getDocumentRoot(int nodeHandle)
{
for (int id=makeNodeIdentity(nodeHandle); id!=NULL; id=_parent(id)) {
if (_type(id)==DTM.DOCUMENT_NODE) {
return makeNodeHandle(id);
}
}
return DTM.NULL; // Safety net; should never happen
}
/**
* Given a node identifier, find the owning document node. Unlike the DOM,
* this considers the owningDocument of a Document to be itself. Note that
* in shared DTMs this may not be zero.
*
* @param nodeIdentifier the id of the starting node.
* @return int Node identifier of the root of this DTM tree
*/
protected int _documentRoot(int nodeIdentifier)
{
if(nodeIdentifier==NULL) return NULL;
for (int parent=_parent(nodeIdentifier);
parent!=NULL;
nodeIdentifier=parent,parent=_parent(nodeIdentifier))
;
return nodeIdentifier;
}
/**
* Receive notification of the beginning of a new RTF document.
*
* %REVIEW% Y'know, this isn't all that much of a deoptimization. We
* might want to consider folding the start/endDocument changes back
* into the main SAX2DTM so we don't have to expose so many fields
* (even as Protected) and carry the additional code.
*
* @throws SAXException Any SAX exception, possibly
* wrapping another exception.
* @see org.xml.sax.ContentHandler#startDocument
* */
public void startDocument() throws SAXException
{
// Re-initialize the tree append process
m_endDocumentOccured = false;
m_prefixMappings = new java.util.Vector();
m_contextIndexes = new IntStack();
m_parents = new IntStack();
m_currentDocumentNode=m_size;
super.startDocument();
}
/**
* Receive notification of the end of the document.
*
* %REVIEW% Y'know, this isn't all that much of a deoptimization. We
* might want to consider folding the start/endDocument changes back
* into the main SAX2DTM so we don't have to expose so many fields
* (even as Protected).
*
* @throws SAXException Any SAX exception, possibly
* wrapping another exception.
* @see org.xml.sax.ContentHandler#endDocument
* */
public void endDocument() throws SAXException
{
charactersFlush();
m_nextsib.setElementAt(NULL,m_currentDocumentNode);
if (m_firstch.elementAt(m_currentDocumentNode) == NOTPROCESSED)
m_firstch.setElementAt(NULL,m_currentDocumentNode);
if (DTM.NULL != m_previous)
m_nextsib.setElementAt(DTM.NULL,m_previous);
m_parents = null;
m_prefixMappings = null;
m_contextIndexes = null;
m_currentDocumentNode= NULL; // no longer open
m_endDocumentOccured = true;
}
/** "Tail-pruning" support for RTFs.
*
* This function pushes information about the current size of the
* DTM's data structures onto a stack, for use by popRewindMark()
* (which see).
*
* %REVIEW% I have no idea how to rewind m_elemIndexes. However,
* RTFs will not be indexed, so I can simply panic if that case
* arises. Hey, it works...
* */
public void pushRewindMark()
{
if(m_indexing || m_elemIndexes!=null)
throw new java.lang.NullPointerException("Coding error; Don't try to mark/rewind an indexed DTM");
// Values from DTMDefaultBase
// %REVIEW% Can the namespace stack sizes ever differ? If not, save space!
mark_size.push(m_size);
mark_nsdeclset_size.push((m_namespaceDeclSets==null)
? 0
: m_namespaceDeclSets.size());
mark_nsdeclelem_size.push((m_namespaceDeclSetElements==null)
? 0
: m_namespaceDeclSetElements.size());
// Values from SAX2DTM
mark_data_size.push(m_data.size());
mark_char_size.push(m_chars.size());
mark_doq_size.push(m_dataOrQName.size());
}
/** "Tail-pruning" support for RTFs.
*
* This function pops the information previously saved by
* pushRewindMark (which see) and uses it to discard all nodes added
* to the DTM after that time. We expect that this will allow us to
* reuse storage more effectively.
*
* This is _not_ intended to be called while a document is still being
* constructed -- only between endDocument and the next startDocument
*
* %REVIEW% WARNING: This is the first use of some of the truncation
* methods. If Xalan blows up after this is called, that's a likely
* place to check.
*
* %REVIEW% Our original design for DTMs permitted them to share
* string pools. If there any risk that this might be happening, we
* can _not_ rewind and recover the string storage. One solution
* might to assert that DTMs used for RTFs Must Not take advantage
* of that feature, but this seems excessively fragile. Another, much
* less attractive, would be to just let them leak... Nah.
*
* @return true if and only if the pop completely emptied the
* RTF. That response is used when determining how to unspool
* RTF-started-while-RTF-open situations.
* */
public boolean popRewindMark()
{
boolean top=mark_size.empty();
m_size=top ? m_emptyNodeCount : mark_size.pop();
m_exptype.setSize(m_size);
m_firstch.setSize(m_size);
m_nextsib.setSize(m_size);
m_prevsib.setSize(m_size);
m_parent.setSize(m_size);
m_elemIndexes=null;
int ds= top ? m_emptyNSDeclSetCount : mark_nsdeclset_size.pop();
if (m_namespaceDeclSets!=null) {
m_namespaceDeclSets.setSize(ds);
}
int ds1= top ? m_emptyNSDeclSetElemsCount : mark_nsdeclelem_size.pop();
if (m_namespaceDeclSetElements!=null) {
m_namespaceDeclSetElements.setSize(ds1);
}
// Values from SAX2DTM - m_data always has a reserved entry
m_data.setSize(top ? m_emptyDataCount : mark_data_size.pop());
m_chars.setLength(top ? m_emptyCharsCount : mark_char_size.pop());
m_dataOrQName.setSize(top ? m_emptyDataQNCount : mark_doq_size.pop());
// Return true iff DTM now empty
return m_size==0;
}
/** @return true if a DTM tree is currently under construction.
* */
public boolean isTreeIncomplete()
{
return !m_endDocumentOccured;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy