
org.xerial.xml.dom.DOMUtil Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of xerial-xml Show documentation
Show all versions of xerial-xml Show documentation
XML Parser/Indexing library.
The newest version!
/*--------------------------------------------------------------------------
* Copyright 2004 Taro L. Saito
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*--------------------------------------------------------------------------*/
//--------------------------------------
// XerialJ
//
// DOMUtil.java
// Since: 2004/12/24
//
// $URL$
// $Author$
//--------------------------------------
package org.xerial.xml.dom;
import static org.w3c.dom.Node.CDATA_SECTION_NODE;
import static org.w3c.dom.Node.ELEMENT_NODE;
import static org.w3c.dom.Node.TEXT_NODE;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.LinkedList;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Attr;
import org.w3c.dom.CDATASection;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;
import org.xerial.util.StringUtil;
import org.xerial.xml.XMLErrorCode;
import org.xerial.xml.XMLException;
import org.xml.sax.SAXException;
/**
* Utilities for manipulating DOM
*
* @author leo
*
*/
public class DOMUtil {
private DOMUtil() {}
/**
* Retrieves text contents under the specified element
*
* @param parentElement
* @param tagName
* @return text contents under the element
*/
static public String getTextContent(Element parentElement, String tagName) {
//NodeList tagList = parentElement.getElementsByTagName(tagName);
// TODO impl
return null;
}
/**
* Gets the text data enclosed by the specified element. If xml data has
* several separated text data, the returned text will be their
* concatination. For example, in the following XML data,
* {@link #getText(Element)} for the wiki element gives " Hello World! Nice
* to meet you.".
*
*
* <wiki>
* Hello World!
* <author>leo</author>
* Nice to meet you.
* <wiki>
*
*
* The result removes ignorable white spaces between tags.
*
* @param element
* the target element
* @return the text content of the element, or null if no text content is
* found under the element
*/
static public String getText(Element element) {
NodeList nodeList = element.getChildNodes();
StringBuilder buf = new StringBuilder();
int numTextNodes = 0;
for (int i = 0; i < nodeList.getLength(); i++) {
Node node = nodeList.item(i);
if (node.getNodeType() == Node.TEXT_NODE) {
if (!((Text) node).isElementContentWhitespace()) {
numTextNodes++;
buf.append(node.getNodeValue());
}
}
}
return numTextNodes > 0 ? buf.toString() : null;
}
static public HashMap getTextContentMap(InputStream xmlStream)
throws XMLException, IOException {
try {
DocumentBuilder docBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
Document doc = docBuilder.parse(xmlStream);
Element rootElem = doc.getDocumentElement();
return getTextContentMap(rootElem);
}
catch (ParserConfigurationException e) {
throw new XMLException(XMLErrorCode.INVALID_PARSER_CONFIGURATION, e);
}
catch (SAXException e) {
throw new XMLException(XMLErrorCode.SAX_ERROR, e);
}
}
/**
* @param element
* the element from which the search starts
*
* @return Relative Path Expression -> Text Content HashMap
*/
static public HashMap getTextContentMap(Element element) {
//String currentPath = "";
DOMReadProcess readProcess = new DOMReadProcess();
readProcess.traceSubTree(element);
return readProcess.getContentMap();
}
static class DOMReadProcess {
LinkedList _relativePath = new LinkedList();
HashMap _path2contentMap = new HashMap();
public DOMReadProcess() {}
public void traceSubTree(Element subtreeRoot) {
String tagName = subtreeRoot.getTagName();
_relativePath.add(tagName);
if (subtreeRoot.hasAttributes()) {
String currentPath = getCurrentPath();
NamedNodeMap attribMap = subtreeRoot.getAttributes();
for (int i = 0; i < attribMap.getLength(); i++) {
Attr attrib = (Attr) attribMap.item(i);
String attribPath = currentPath + "/@" + attrib.getName();
_path2contentMap.put(attribPath, attrib.getValue());
}
}
StringBuilder contentBuffer = new StringBuilder();
IterableNodeList childNodes = new IterableNodeList(subtreeRoot.getChildNodes());
for (Node node : childNodes) {
switch (node.getNodeType()) {
case ELEMENT_NODE:
traceSubTree((Element) node);
break;
case TEXT_NODE:
Text text = (Text) node;
contentBuffer.append(text.getData());
break;
case CDATA_SECTION_NODE:
CDATASection cdata = (CDATASection) node;
contentBuffer.append(cdata.getData());
break;
}
}
String textContent = contentBuffer.toString();
if (!StringUtil.isWhiteSpace(textContent))
_path2contentMap.put(getCurrentPath(), contentBuffer.toString());
_relativePath.removeLast();
}
public HashMap getContentMap() {
return _path2contentMap;
}
String getCurrentPath() {
StringBuilder pathExprBuilder = new StringBuilder();
for (String tag : _relativePath) {
pathExprBuilder.append("/");
pathExprBuilder.append(tag);
}
return pathExprBuilder.substring(1); // relative path
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy