org.simple4j.wsclient.parser.impl.XMLParser Maven / Gradle / Ivy

Go to download
package org.simple4j.wsclient.parser.impl;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;

import org.simple4j.wsclient.exception.SystemException;
import org.simple4j.wsclient.parser.IParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

/**
 * This implementation parses JSON string to Java Collections object tree.
 * 
 * @author jsrinivas108
 */
public class XMLParser implements IParser
{

	private static Logger logger = LoggerFactory.getLogger(XMLParser.class);

	private ThreadLocal>> xpathEvalCacheTL = new ThreadLocal>>();

	/**
	 * This flag controls if the xml namespace prefix is retained or removed from
	 * the Map keys in the output.
	 */
	private boolean removePrefix = true;

	/**
	 * This java.util.List contains list of xpath expressions that can occur
	 * multiple times under the same parent node. If this is not specified and a
	 * node happens to occur more than once, the parser automatically adds as a
	 * List. If this is not specified and a node happens to occur only once, the
	 * parser will add it as a String.
	 * 
	 * For example:      
	 *     
	 * 
	 * If this list is empty, the first book will have non-List for author key and
	 * the second book will have List of authors.
	 * 
	 * If this list contains /books/book/author, the author key will always have
	 * List of authors with first book having List of size 1.
	 */
	private List listElementXpaths = new ArrayList();

	/**
	 * This java.util.List contains list of xpath expressions that identifies if the
	 * node can have attributes. If this is not specified and a node happens to have
	 * attribute, the parser automatically adds as a Map. If this is not specified
	 * and a node happens to not have any attribute, the parser will add it as a
	 * String.
	 * 
	 * For example:      
	 *    
	 * 
	 * If this list is empty, the first book will have String for author key and the
	 * second book will have Map for author.
	 * 
	 * If this list contains /books/book/author, the author key will always have Map
	 * for author with key of the text node being configured value in textNodeKey
	 * property.
	 */
	private List attributedElementXpaths = new ArrayList();

	/**
	 * This is the name of the key for text node when the containing node has
	 * attributes.
	 */
	private String textNodeKey = "TEXT";

	public boolean isRemovePrefix()
	{
		return removePrefix;
	}

	public void setRemovePrefix(boolean removePrefix)
	{
		this.removePrefix = removePrefix;
	}

	public List getListElementXpaths()
	{
		return listElementXpaths;
	}

	public void setListElementXpaths(List listElementXpaths)
	{
		this.listElementXpaths = listElementXpaths;
	}

	public List getAttributedElementXpaths()
	{
		return attributedElementXpaths;
	}

	public void setAttributedElementXpaths(List attributedElementXpaths)
	{
		this.attributedElementXpaths = attributedElementXpaths;
	}

	public String getTextNodeKey()
	{
		return textNodeKey;
	}

	public void setTextNodeKey(String textNodeKey)
	{
		this.textNodeKey = textNodeKey;
	}

	public Map parseData(String inputXMLStr)
	{
		try
		{
			byte[] inputXMLBA = inputXMLStr.getBytes();
			DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
			DocumentBuilder builder = factory.newDocumentBuilder();
			ByteArrayInputStream bais = new ByteArrayInputStream(inputXMLBA);
			Document document = builder.parse(bais);

			Node node = document;

			this.xpathEvalCacheTL.set(new HashMap>());
			Map convertedMap = convert2Collections(node, null);
			return convertedMap;
		} catch (ParserConfigurationException | SAXException | IOException | XPathExpressionException e)
		{
			throw new SystemException("XML_PARSE_FAILED", e);
		}
	}

	private Map convert2Collections(Node node, Node parent)
			throws XPathExpressionException, ParserConfigurationException, SAXException, IOException
	{
		logger.trace("processing node:" + node.getNodeName());
		logger.trace("processing node no prefix:" + handlePrefix(node.getNodeName()));

		logger.trace("processing nodelocalname:" + node.getLocalName());
		logger.trace(node.getPrefix());
		logger.trace(node.getNamespaceURI());
		logger.trace(node.getBaseURI());

		Map ret = new HashMap();
		if (node == null)
			return ret;
		if (node.getNodeType() == Node.COMMENT_NODE)
			return ret;
		if (node.getNodeType() == Node.TEXT_NODE || node.getNodeType() == Node.CDATA_SECTION_NODE)
		{
			ret.put(getTextNodeKey(), (Object) node.getNodeValue());
			return ret;
		}

		NamedNodeMap attributes = node.getAttributes();
		if (attributes != null)
		{
			for (int i = 0; i < attributes.getLength(); i++)
			{
				Node attribute = attributes.item(i);
				String prefixHandledAttributeName = handlePrefix(attribute.getNodeName());
				if (prefixHandledAttributeName.equalsIgnoreCase("nill")
						&& "true".equalsIgnoreCase(attribute.getNodeValue()))
					return null;
				ret.put(prefixHandledAttributeName, attribute.getNodeValue());
			}
		}

		NodeList childNodes = node.getChildNodes();
		if (childNodes != null)
		{
			for (int i = 0; i < childNodes.getLength(); i++)
			{
				Node child = childNodes.item(i);
				if (child.getNodeType() == Node.ELEMENT_NODE || child.getNodeType() == Node.CDATA_SECTION_NODE
						|| child.getNodeType() == Node.TEXT_NODE)
				{
					Map tempMap = convert2Collections(child, node);
					String childNodeName = handlePrefix(child.getNodeName());
					logger.trace(childNodeName + " processed values:" + tempMap);
					if (child.getNodeType() == Node.CDATA_SECTION_NODE || child.getNodeType() == Node.TEXT_NODE)
					{
						// below if condition is to remove unwanted text nodes between real element
						// nodes
						if (childNodes.getLength() == 1)
							ret.putAll(tempMap);
					} else
					{
						// 1-N check
						boolean isListElement = doesNodeMatchAnyXpath(child, this.getListElementXpaths());

						if (isListElement)
						{
							Object value = processAttributedElements(child, tempMap);

							List