All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.ikasan.filetransfer.xml.parser.DefaultXMLParser Maven / Gradle / Ivy

There is a newer version: 4.0.4
Show newest version
/*
 * $Id$
 * $URL$
 * 
 * =============================================================================
 * Ikasan Enterprise Integration Platform
 * 
 * Distributed under the Modified BSD License.
 * Copyright notice: The copyright for this software and a full listing 
 * of individual contributors are as shown in the packaged copyright.txt 
 * file. 
 * 
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without 
 * modification, are permitted provided that the following conditions are met:
 *
 *  - Redistributions of source code must retain the above copyright notice, 
 *    this list of conditions and the following disclaimer.
 *
 *  - Redistributions in binary form must reproduce the above copyright notice, 
 *    this list of conditions and the following disclaimer in the documentation 
 *    and/or other materials provided with the distribution.
 *
 *  - Neither the name of the ORGANIZATION nor the names of its contributors may
 *    be used to endorse or promote products derived from this software without 
 *    specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE 
 * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 * =============================================================================
 */
package org.ikasan.filetransfer.xml.parser;

// Imported ikasan classes
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;

import javax.xml.XMLConstants;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

import org.apache.log4j.Logger;
import org.ikasan.filetransfer.CommonXMLParser;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.xml.sax.Attributes;
import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.helpers.DefaultHandler;

/**
 * This class wraps JAXP API for ease of XML well-formedness, validation and DOM document creation. Basically, it parses
 * (and validates if the flag is turned on) a specified DTD or XML-Schema based XML document via
 * javax.xml.parsers.DocumentBuilder and returns a DOM Document instance, org.w3c.dom.Document
 * object.
 * 

* *

 *    Usage Example:
 * 
 *    String xmlURI = "http://www.abc.com/data/foo.xml";
 * 
 *    Document doc = null;
 *    try
 *    {
 *        DefaultXMLParser parser = new DefaultXMLParser();
 *        parser.setValidation(true, XMLConstants.W3C_XML_SCHEMA_NS_URI);
 *        doc = parser.parse(xmlURI);
 * 
 *        // Get the root name
 *        String rootName = parser.getRootName(doc);
 *        System.out.println("Root name: [" + rootName + "].");
 * 
 *        // Do something else with this Document
 * 
 *        // Remove indentation
 *        // Then normalise the DOM tree to combine all adjacent text nodes
 *        parser.removeIndent(doc);
 *        doc.normalize();
 *    }
 *    catch (Exception e)
 *    {
 *        e.printStackTrace(System.err);
 *    }
 * 
* * @author Ikasan Development Team */ public class DefaultXMLParser implements CommonXMLParser { /** The logger instance. */ private static Logger logger = Logger.getLogger(DefaultXMLParser.class); /** The document builder factory */ private DocumentBuilderFactory factory; /** default entity resolver is null */ private EntityResolver entityResolver; /** The XML schema types are based on imported XMLConstants. */ private String schemaType; /** * Creates a new instance of DefaultXMLParser. This is created with the following defaults, validation * defaulted to false namespaceAware defaulted to true schemaType defaults to XML_DTD_NS_URI * */ public DefaultXMLParser() { this.factory = DocumentBuilderFactory.newInstance(); this.factory.setNamespaceAware(true); this.factory.setValidating(false); this.schemaType = XMLConstants.XML_DTD_NS_URI; this.entityResolver = null; } /** * Sets a flag indicating whether to validate an incoming XML document as it is parsed. By default the value of this * is set to false. * * @deprecated - use the two individual setter methods of setValidation(Boolean) and setSchemaType(String). * * @param validation - true if the parser produced will validate a document as it is parsed. * @param schemaType - XML document schema type. Currently, DTD and XSD are supported. */ @Deprecated public void setValidation(Boolean validation, String schemaType) { this.setValidation(validation); this.setSchemaType(schemaType); } /** * Sets a flag indicating whether to validate an incoming XML document as it is parsed. By default the value of this * is set to false. * * @param validation - true if the parser produced will validate a document as it is parsed. */ public void setValidation(final Boolean validation) { this.factory.setValidating(validation.booleanValue()); } /** * Sets the XML schema type for validation. * * @param schemaType - XML document schema type. Currently, DTD and XSD are supported. */ public void setSchemaType(final String schemaType) { this.schemaType = schemaType; } /** * Set the flag for this document to be namespace aware. * * @param namespaceAware namespace aware flag to set */ public void setNamespaceAware(Boolean namespaceAware) { this.factory.setNamespaceAware(namespaceAware.booleanValue()); } /** * Set a specific entityResolver. * * @param entityResolver entity resolver to set */ public void setEntityResolver(EntityResolver entityResolver) { this.entityResolver = entityResolver; } /** * Set the default entity resolver. */ public void setEntityResolver() { this.entityResolver = new DefaultEntityResolver(); } /* * Returns a flag indicating whether to validate an incoming XML document. Currently, DTD and * XSD are supported. */ public Boolean isValidating() { return new Boolean(this.factory.isValidating()); } /* * Returns a flag indicating whether to validate an incoming XML document. Currently, DTD and * XSD are supported. */ public Boolean isNamspaceAware() { return new Boolean(this.factory.isNamespaceAware()); } /* * Returns the schema type of the incoming XML document. */ public String getXMLSchemaType() { return this.schemaType; } /** * Parses (and validates if the flag is turned on) the specified DTD or XML-Schema based XML document using JAXP API * and returns org.w3c.dom.Document object. * * @param xmlObject - XML document to be parsed. * * @return Parsed document * * @throws ParserConfigurationException Exception if we could not configure the parser * @throws SAXException Exception from a SAX related problem * @throws IOException Exception from a File i/O problem */ private Document doParse(Object xmlObject) throws ParserConfigurationException, SAXException, IOException { if (xmlObject == null) { throw new NullPointerException("Object that is holding XML document can't be null"); //$NON-NLS-1$ } if (this.schemaType == XMLConstants.W3C_XML_SCHEMA_NS_URI) { logger.debug("Setting attribute for XMLSchema validation..."); //$NON-NLS-1$ this.factory.setAttribute(org.apache.xerces.jaxp.JAXPConstants.JAXP_SCHEMA_LANGUAGE, org.apache.xerces.jaxp.JAXPConstants.W3C_XML_SCHEMA); } DocumentBuilder builder = this.factory.newDocumentBuilder(); builder.setErrorHandler(new DefaultErrorHandler()); // set entity resolver if defined if (this.entityResolver != null) builder.setEntityResolver(entityResolver); if (xmlObject instanceof String) { logger.debug("Parsing XML doc as URI [" + xmlObject + "]..."); //$NON-NLS-1$ //$NON-NLS-2$ return builder.parse((String) xmlObject); } else if (xmlObject instanceof byte[]) { if (logger.isDebugEnabled()) { logger.debug("Parsing XML doc as XML string..."); //$NON-NLS-1$ logger.debug("XML document content ="); //$NON-NLS-1$ logger.debug("[\n" + String.valueOf(xmlObject) + "\n]"); //$NON-NLS-1$ //$NON-NLS-2$ } InputStream is = new ByteArrayInputStream((byte[]) xmlObject); return builder.parse(is); } else if (xmlObject instanceof File) { if (logger.isDebugEnabled()) { logger.debug("Parsing XML doc as file..."); //$NON-NLS-1$ logger.debug("XML document URI =[" //$NON-NLS-1$ + ((File) xmlObject).toString() + "]."); //$NON-NLS-1$ } return builder.parse((File) xmlObject); } else if (xmlObject instanceof InputStream) { logger.debug("Parsing XML doc as input stream..."); //$NON-NLS-1$ return builder.parse((InputStream) xmlObject); } else if (xmlObject instanceof InputSource) { logger.debug("Parsing XML doc as input source..."); //$NON-NLS-1$ return builder.parse((InputSource) xmlObject); } else if (xmlObject instanceof Document) { logger.debug("Already Docuent object, returning..."); //$NON-NLS-1$ return (Document) xmlObject; } else { throw new IllegalArgumentException("Unsupported object '" + xmlObject.getClass().getName() + "'"); //$NON-NLS-1$ //$NON-NLS-2$ } } /* * Parses (and validates if the flag is turned on) the specified DTD or XML-Schema based XML document using JAXP API * and returns org.w3c.dom.Document object. * * @param uri - location of XML document to be parsed. */ public Document parse(String uri) throws ParserConfigurationException, SAXException, IOException { return this.doParse(uri); } /* * Parses (and validates if the flag is turned on) the specified DTD or XML-Schema based XML document using JAXP API * and returns org.w3c.dom.Document object. * * @param xmlDoc - array of byte containing XML document to be parsed. */ public Document parse(byte[] xmlDoc) throws ParserConfigurationException, SAXException, IOException { return this.doParse(xmlDoc); } /* * Parses (and validates if the flag is turned on) the specified DTD or XML-Schema based XML document using JAXP API * and returns org.w3c.dom.Document object. * * @param file - XML document file to be parsed. */ public Document parse(File file) throws ParserConfigurationException, SAXException, IOException { return this.doParse(file); } /* * Parses (and validates if the flag is turned on) the specified DTD or XML-Schema based XML document using JAXP API * and returns org.w3c.dom.Document object. * * @param is - InputSource containing XML document to be parsed. */ public Document parse(InputSource is) throws ParserConfigurationException, SAXException, IOException { return this.doParse(is); } /* * Parses (and validates if the flag is turned on) the specified DTD or XML-Schema based XML document using JAXP API * and returns org.w3c.dom.Document object. * * @param is - InputStream containing XML document to be parsed. */ public Document parse(InputStream is) throws ParserConfigurationException, SAXException, IOException { return this.doParse(is); } /** * Returns the root name of the specified XML document without validation. * * @param xmlObject is the XML document as String or InputSource or File or * InputStream or Document being parsed. * * @return the root name. * @throws ParserConfigurationException Exception if we could not configure the parser * @throws SAXException Exception from a SAX related problem * @throws IOException Exception from a File i/O problem */ private static String doGetRootName(Object xmlObject) throws ParserConfigurationException, IOException, SAXException { if (xmlObject == null) { throw new NullPointerException("Object that is holding XML document can't be null"); //$NON-NLS-1$ } else if (xmlObject instanceof Document) { Document doc = (Document) xmlObject; Node node = doc.getDocumentElement(); node.normalize(); return node.getNodeName(); } // Custom DefaultHandler // to obtain a root name class MyDefaultHandler extends DefaultHandler { boolean isFirstTime = false; String rootName = null; DefaultErrorHandler errHandler = new DefaultErrorHandler(); /** Constructor */ public MyDefaultHandler() { // Do Nothing } @Override public void startElement(String namespaceUri, String localName, String qname, Attributes attrs) { if (isFirstTime == false) { this.rootName = qname; this.isFirstTime = true; } } @Override public void warning(SAXParseException e) throws SAXException { this.errHandler.warning(e); } @Override public void error(SAXParseException e) throws SAXException { this.errHandler.error(e); } @Override public void fatalError(SAXParseException e) throws SAXException { this.errHandler.fatalError(e); } } MyDefaultHandler handler = new MyDefaultHandler(); SAXParserFactory factory = SAXParserFactory.newInstance(); // TODO - defaulting to true maybe an issue for certain XML documents. factory.setNamespaceAware(true); // jun No need to validate an XML document at the moment // factory.setValidating(true); SAXParser saxParser = factory.newSAXParser(); if (xmlObject instanceof String) { saxParser.parse((String) xmlObject, handler); } else if (xmlObject instanceof byte[]) { InputStream is = new ByteArrayInputStream((byte[]) xmlObject); saxParser.parse(is, handler); } else if (xmlObject instanceof File) { saxParser.parse((File) xmlObject, handler); } else if (xmlObject instanceof InputStream) { saxParser.parse((InputStream) xmlObject, handler); } else if (xmlObject instanceof InputSource) { saxParser.parse((InputSource) xmlObject, handler); } else { throw new IllegalArgumentException("Unsupported object [" + xmlObject.getClass().getName() + "]"); //$NON-NLS-1$ //$NON-NLS-2$ } return handler.rootName; } /** * Returns the root name of the specified XML document without validation. * * @param uri - location of XML document to be parsed. * * @return the root name. * @throws ParserConfigurationException Exception if we could not configure the parser * @throws SAXException Exception from a SAX related problem * @throws IOException Exception from a File i/O problem */ public String getRootName(String uri) throws ParserConfigurationException, IOException, SAXException { return doGetRootName(uri); } /** * Returns the root name of the specified XML document without validation. * * @param xmlDoc - array of byte containing XML document to be parsed. * * @return the root name. * @throws ParserConfigurationException Exception if we could not configure the parser * @throws SAXException Exception from a SAX related problem * @throws IOException Exception from a File i/O problem */ public String getRootName(byte xmlDoc[]) throws ParserConfigurationException, IOException, SAXException { return doGetRootName(xmlDoc); } /** * Returns the root name of the specified XML document without validation. * * @param file - XML document file to be parsed. * * @return the root name. * @throws ParserConfigurationException Exception if we could not configure the parser * @throws SAXException Exception from a SAX related problem * @throws IOException Exception from a File i/O problem */ public String getRootName(File file) throws ParserConfigurationException, IOException, SAXException { return doGetRootName(file); } /** * Returns the root name of the specified XML document without validation. * * @param is - InputSource containing XML document to be parsed. * * @return the root name. * @throws ParserConfigurationException Exception if we could not configure the parser * @throws SAXException Exception from a SAX related problem * @throws IOException Exception from a File i/O problem */ public String getRootName(InputSource is) throws ParserConfigurationException, IOException, SAXException { return doGetRootName(is); } /** * Returns the root name of the specified XML document without validation. * * @param is - InputStream containing XML document to be parsed. * * @return the root name. * @throws ParserConfigurationException Exception if we could not configure the parser * @throws SAXException Exception from a SAX related problem * @throws IOException Exception from a File i/O problem */ public String getRootName(InputStream is) throws ParserConfigurationException, IOException, SAXException { return doGetRootName(is); } /** * Returns the root name of the specified XML document without validation. * * @param doc - Document containing XML document to be parsed. * * @return the root name. * @throws ParserConfigurationException Exception if we could not configure the parser * @throws SAXException Exception from a SAX related problem * @throws IOException Exception from a File i/O problem */ public String getRootName(Document doc) throws ParserConfigurationException, IOException, SAXException { return doGetRootName(doc); } /** * Walks the document and removes all text nodes used for indentation. * * @param node - Node containing XML document to be examined. * */ public void removeIndent(Node node) { // Is there anything to do? if (node == null) return; short type = node.getNodeType(); switch (type) { // It's show time! case Node.DOCUMENT_NODE: { Document document = (Document) node; removeIndent(document.getDocumentElement()); break; } // Remove all text nodes, but the one that is the only one child case Node.ELEMENT_NODE: { int numOfChildren = (node.getChildNodes() != null) ? node.getChildNodes().getLength() : 0; Node child = node.getFirstChild(); while (child != null) { // We've got TEXT node if (child.getNodeType() == Node.TEXT_NODE) { // It's got only whitespaces and brothers & sisters // so it should be just indentation, remove this node if (child.getNodeValue() != null && child.getNodeValue().trim().length() == 0 && numOfChildren > 1) { Node nextSibling = child.getNextSibling(); child = node.removeChild(child); child = nextSibling; continue; } } removeIndent(child); child = child.getNextSibling(); } break; } case Node.ENTITY_REFERENCE_NODE: { Node child = node.getFirstChild(); while (child != null) { removeIndent(child); child = child.getNextSibling(); } break; } case Node.DOCUMENT_TYPE_NODE: break; case Node.CDATA_SECTION_NODE: break; case Node.TEXT_NODE: break; case Node.PROCESSING_INSTRUCTION_NODE: break; case Node.COMMENT_NODE: break; default: break; } } // main() method // ///////////////////////////////////////////////////////////////////////////// /** * Runs this class for testing. * * TODO Unit Test * * @param args arguments */ public static void main(String args[]) { String xmlURI = null; String xmlStr = defaultXmlStr; boolean validate = false; boolean xmlSchema = false; // Parse the command-line parameters for (int i = 0; i < args.length; i++) { // Display usage then get outta here if (args[i].equalsIgnoreCase("-help")) { usage(); System.exit(1); } // XML URI else if (args[i].equalsIgnoreCase("-xml")) { xmlURI = args[++i].trim(); } // Validation flag for parse() method else if (args[i].equalsIgnoreCase("-validate")) { String value = args[++i].trim(); validate = (value.equalsIgnoreCase("true")) ? true : false; } // XML Schema flag for parse() method else if (args[i].equalsIgnoreCase("-xmlschema")) { String value = args[++i].trim(); xmlSchema = (value.equalsIgnoreCase("true")) ? true : false; } else { System.err.println("Invalid option - [" + args[i] + "]."); usage(); System.exit(1); } } System.out.println(""); System.out.println("XML URI =[" + xmlURI + "]."); System.out.println("validation =[" + validate + "]."); System.out.println("XML schema =[" + xmlSchema + "]."); if (xmlURI == null || xmlURI.trim().length() == 0) { System.out.println("XML doc string (default) ="); System.out.println(xmlStr); } System.out.println(""); try { DefaultXMLParser parser = new DefaultXMLParser(); String schemaType = xmlSchema ? XMLConstants.W3C_XML_SCHEMA_NS_URI : XMLConstants.XML_DTD_NS_URI; parser.setValidation(validate, schemaType); if (xmlURI != null && xmlURI.length() > 0) { System.out.println("Parsing URI [" + xmlURI + "]..."); parser.parse(xmlURI); } else { System.out.println("Parsing the default XML string..."); parser.parse(xmlStr.getBytes()); } System.out.println("=> Successful."); } catch (Exception e) { e.printStackTrace(System.err); } System.exit(0); } /** * Displays the usage for main() method. * * TODO Unit Test */ private static void usage() { String fqClassName = DefaultXMLParser.class.getName(); System.err.println(""); System.err.println("Usage:"); System.err.println("java " + fqClassName + " [-options]"); System.err.println(""); System.err.println("where options include:"); System.err.println(" -xml to specify input XML document location (See below for default XML)"); System.err.println(" -validate "); System.err.println(" to validate the XML document (false)"); System.err.println(" -xmlschema "); System.err.println(" to indicate whether the XML document is XML Schema based (false)"); System.err.println(""); System.err.println("Note that the following default XML string will be used if XML URI is not specified:-"); System.err.println(defaultXmlStr); } /** * Default XML string used in main() method * * TODO Unit Test */ private static String defaultXmlStr = "\n" + "\n" + " data_a1\n" + " data_b1\n" + " data_c1\n" + " \n" + ""; }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy