
com.marklogic.developer.corb.FileUrisXMLLoader Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of marklogic-corb Show documentation
Show all versions of marklogic-corb Show documentation
CoRB is a Java tool designed for bulk content-reprocessing of documents stored in MarkLogic.
/*
* Copyright (c) 2004-2016 MarkLogic Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* The use of the Apache License does not indicate that this project is
* affiliated with the Apache Software Foundation.
*/
package com.marklogic.developer.corb;
/**
*
* @author Praveen Venkata
*/
import static com.marklogic.developer.corb.Options.XML_FILE;
import static com.marklogic.developer.corb.Options.XML_NODE;
import static com.marklogic.developer.corb.util.StringUtils.isBlank;
import static com.marklogic.developer.corb.util.StringUtils.trim;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathFactory;
import java.io.File;
import java.io.IOException;
import java.io.StringWriter;
import java.util.Iterator;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
public class FileUrisXMLLoader extends AbstractUrisLoader {
protected static final Logger LOG = Logger.getLogger(FileUrisXMLLoader.class.getName());
String nextUri;
Iterator nodeIterator;
Document doc;
Map nodeMap;
TransformerFactory transformerFactory;
@Override
public void open() throws CorbException {
try {
String fileName = getProperty(XML_FILE);
String xpathRootNode = getProperty(XML_NODE);
File fXmlFile = new File(fileName);
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
doc = dBuilder.parse(fXmlFile);
//Get Child nodes for parent node which is a wrapper node
NodeList nodeList;
if (xpathRootNode == null) {
//default processing will select child elements
nodeList = doc.getChildNodes().item(0).getChildNodes();
} else {
XPathFactory factory = XPathFactory.newInstance();
//using this factory to create an XPath object:
XPath xpath = factory.newXPath();
// XPath Query for showing all nodes value
XPathExpression expr = xpath.compile(xpathRootNode);
Object result = expr.evaluate(doc, XPathConstants.NODESET);
nodeList = (NodeList) result;
}
nodeMap = new ConcurrentHashMap(nodeList.getLength());
for (int i = 0; i < nodeList.getLength(); i++) {
Node node = nodeList.item(i);
if (xpathRootNode == null && node.getNodeType() != Node.ELEMENT_NODE) {
continue; //default processing without an XPath selects only /*
}
nodeMap.put(i, node);
}
total = nodeMap.size();
nodeIterator = nodeMap.values().iterator();
} catch (Exception exc) {
throw new CorbException("Problem loading data from xml file ", exc);
}
}
private String nodeToString(Node node) throws CorbException {
StringWriter sw = new StringWriter();
try {
//Creating a transformerFactory is expensive, only do it once
if (transformerFactory == null) {
transformerFactory = TransformerFactory.newInstance();
}
Transformer t = transformerFactory.newTransformer();
t.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
t.setOutputProperty(OutputKeys.INDENT, "yes");
t.transform(new DOMSource(node), new StreamResult(sw));
} catch (TransformerException te) {
throw new CorbException("nodeToString Transformer Exception", te);
}
return sw.toString();
}
private String readNextNode() throws IOException, CorbException {
if (nodeIterator.hasNext()) {
Node nextNode = nodeIterator.next();
short nextNodeType = nextNode.getNodeType();
String line = null;
if (nextNodeType == Node.ELEMENT_NODE || nextNodeType == Node.DOCUMENT_NODE) {
line = trim(nodeToString(nextNode));
} else {
line = nextNode.getNodeValue();
}
if (isBlank(line)) {
line = readNextNode();
}
return line;
}
return null;
}
@Override
public boolean hasNext() throws CorbException {
if (nextUri == null) {
try {
nextUri = readNextNode();
} catch (Exception exc) {
throw new CorbException("Problem while reading the xml file");
}
}
return nextUri != null;
}
@Override
public String next() throws CorbException {
String node;
if (nextUri != null) {
node = nextUri;
nextUri = null;
} else {
try {
node = readNextNode();
} catch (Exception exc) {
throw new CorbException("Problem while reading the xml file");
}
}
return node;
}
@Override
public void close() {
if (doc != null) {
LOG.info("closing xml file reader");
try {
doc = null;
if (nodeMap != null) {
nodeMap.clear();
}
} catch (Exception exc) {
LOG.log(Level.SEVERE, "while closing xml file reader", exc);
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy