org.docx4j.model.datastorage.DomToXPathMap Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of docx4j-core Show documentation
Show all versions of docx4j-core Show documentation
docx4j is a library which helps you to work with the Office Open
XML file format as used in docx
documents, pptx presentations, and xlsx spreadsheets.
package org.docx4j.model.datastorage;
import java.util.HashMap;
import java.util.Map;
import java.util.Stack;
import org.docx4j.model.datastorage.xpathtracker.Histgram;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
public class DomToXPathMap {
private static Logger log = LoggerFactory.getLogger(DomToXPathMap.class);
private final Stack histgrams = new Stack();
private Document document;
private Map pathMap = null;
public Map getPathMap() {
return pathMap;
}
/**
* count the number of child nodes; used for pre-calculation
* of (1) repeat xpaths, and (2) certain simple conditions.
*
* By default, an entry counts the number of children which
* are the same element as the first element child, since this
* is what we need for repeats.
*
* If there are elements with different names, the count
* is put in the map with PREFIX_ALL_NODES prefix.
*
* @since 3.3.6
*/
private Map countMap = null;
public static final String PREFIX_ALL_NODES = "_all_";
public Map getCountMap() {
return countMap;
}
public DomToXPathMap(Document document) {
this.document = document;
}
public void map() {
histgrams.clear();
histgrams.push(new Histgram());
pathMap = new HashMap();
countMap = new HashMap();
walkTree(document);
}
private String getLocalName(Node sourceNode) {
if (sourceNode.getLocalName()==null) {
// eg element was created using createElement()
return sourceNode.getNodeName();
} else {
return sourceNode.getLocalName();
}
}
public void walkTree( Node sourceNode ) {
// log.debug("node type" + sourceNode.getNodeType());
switch (sourceNode.getNodeType() ) {
case Node.DOCUMENT_NODE: // type 9
case Node.DOCUMENT_FRAGMENT_NODE: // type 11
// log.debug("DOCUMENT:" + w3CDomNodeToString(sourceNode) );
// if (sourceNode.getChildNodes().getLength()==0) {
// log.debug("..no children!");
// }
// recurse on each child
NodeList nodes = sourceNode.getChildNodes();
if (nodes != null) {
for (int i=0; i0) {
val = actualCount + countOtherElements;
} else {
val = countTextNodes;
}
countMap.put(PREFIX_ALL_NODES + nxpath, val);
/* NB XPath spec says
*
* The count function returns the number of nodes in the argument node-set.
*
* which I suspect includes text nodes.
*/
if (log.isDebugEnabled()) {
log.debug(PREFIX_ALL_NODES + " {} : {}="{}" + {} + {} ", nxpath, val, actualCount, countOtherElements, countTextNodes ); } } } histgrams.pop(); break; case Node.TEXT_NODE: better than doing getTextContent() at the element level?? String xpath="getXPath();" String existing="pathMap.get(xpath);" if (existing="=null)" { if (sourceNode.getNodeValue().endsWith("\n") || sourceNode.getNodeValue().endsWith("\r")) { pathMap.put(xpath, sourceNode.getNodeValue()); some whitespace is significant } else { Happens a lot log.debug("concat.."); pathMap.put(xpath, existing + sourceNode.getNodeValue()); } log.debug("Put " + xpath + "=" + sourceNode.getNodeValue());
break;
// case Node.CDATA_SECTION_NODE:
// writer.write(" ");
// break;
//
// case Node.COMMENT_NODE:
// writer.write(indentLevel + "");
// writer.write(lineSeparator);
// break;
//
// case Node.PROCESSING_INSTRUCTION_NODE:
// writer.write("");
// writer.write(lineSeparator);
// break;
//
// case Node.ENTITY_REFERENCE_NODE:
// writer.write("&" + node.getNodeName() + ";");
// break;
//
// case Node.DOCUMENT_TYPE_NODE:
// DocumentType docType = (DocumentType)node;
// writer.write("");
// writer.write(lineSeparator);
// break;
}
}
/**
* Gets the XPath to the current element.
*/
public String getXPath() {
StringBuilder buf = new StringBuilder();
for (Histgram h : histgrams) {
h.appendPath(buf);
}
return buf.toString();
}
}