
org.ikasan.filetransfer.xml.parser.DefaultXMLParser Maven / Gradle / Ivy
/*
* $Id$
* $URL$
*
* =============================================================================
* Ikasan Enterprise Integration Platform
*
* Distributed under the Modified BSD License.
* Copyright notice: The copyright for this software and a full listing
* of individual contributors are as shown in the packaged copyright.txt
* file.
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* - Neither the name of the ORGANIZATION nor the names of its contributors may
* be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* =============================================================================
*/
package org.ikasan.filetransfer.xml.parser;
// Imported ikasan classes
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import javax.xml.XMLConstants;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.apache.log4j.Logger;
import org.ikasan.filetransfer.CommonXMLParser;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.xml.sax.Attributes;
import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.helpers.DefaultHandler;
/**
* This class wraps JAXP API for ease of XML well-formedness, validation and DOM document creation. Basically, it parses
* (and validates if the flag is turned on) a specified DTD or XML-Schema based XML document via
* javax.xml.parsers.DocumentBuilder
and returns a DOM Document instance, org.w3c.dom.Document
* object.
*
*
*
* Usage Example:
*
* String xmlURI = "http://www.abc.com/data/foo.xml";
*
* Document doc = null;
* try
* {
* DefaultXMLParser parser = new DefaultXMLParser();
* parser.setValidation(true, XMLConstants.W3C_XML_SCHEMA_NS_URI);
* doc = parser.parse(xmlURI);
*
* // Get the root name
* String rootName = parser.getRootName(doc);
* System.out.println("Root name: [" + rootName + "].");
*
* // Do something else with this Document
*
* // Remove indentation
* // Then normalise the DOM tree to combine all adjacent text nodes
* parser.removeIndent(doc);
* doc.normalize();
* }
* catch (Exception e)
* {
* e.printStackTrace(System.err);
* }
*
*
* @author Ikasan Development Team
*/
public class DefaultXMLParser implements CommonXMLParser
{
/** The logger instance. */
private static Logger logger = Logger.getLogger(DefaultXMLParser.class);
/** The document builder factory */
private DocumentBuilderFactory factory;
/** default entity resolver is null */
private EntityResolver entityResolver;
/** The XML schema types are based on imported XMLConstants. */
private String schemaType;
/**
* Creates a new instance of DefaultXMLParser
. This is created with the following defaults, validation
* defaulted to false namespaceAware defaulted to true schemaType defaults to XML_DTD_NS_URI
*
*/
public DefaultXMLParser()
{
this.factory = DocumentBuilderFactory.newInstance();
this.factory.setNamespaceAware(true);
this.factory.setValidating(false);
this.schemaType = XMLConstants.XML_DTD_NS_URI;
this.entityResolver = null;
}
/**
* Sets a flag indicating whether to validate an incoming XML document as it is parsed. By default the value of this
* is set to false
.
*
* @deprecated - use the two individual setter methods of setValidation(Boolean) and setSchemaType(String).
*
* @param validation - true if the parser produced will validate a document as it is parsed.
* @param schemaType - XML document schema type. Currently, DTD
and XSD
are supported.
*/
@Deprecated
public void setValidation(Boolean validation, String schemaType)
{
this.setValidation(validation);
this.setSchemaType(schemaType);
}
/**
* Sets a flag indicating whether to validate an incoming XML document as it is parsed. By default the value of this
* is set to false
.
*
* @param validation - true if the parser produced will validate a document as it is parsed.
*/
public void setValidation(final Boolean validation)
{
this.factory.setValidating(validation.booleanValue());
}
/**
* Sets the XML schema type for validation.
*
* @param schemaType - XML document schema type. Currently, DTD
and XSD
are supported.
*/
public void setSchemaType(final String schemaType)
{
this.schemaType = schemaType;
}
/**
* Set the flag for this document to be namespace aware.
*
* @param namespaceAware namespace aware flag to set
*/
public void setNamespaceAware(Boolean namespaceAware)
{
this.factory.setNamespaceAware(namespaceAware.booleanValue());
}
/**
* Set a specific entityResolver.
*
* @param entityResolver entity resolver to set
*/
public void setEntityResolver(EntityResolver entityResolver)
{
this.entityResolver = entityResolver;
}
/**
* Set the default entity resolver.
*/
public void setEntityResolver()
{
this.entityResolver = new DefaultEntityResolver();
}
/*
* Returns a flag indicating whether to validate an incoming XML document. Currently, DTD
and
* XSD
are supported.
*/
public Boolean isValidating()
{
return new Boolean(this.factory.isValidating());
}
/*
* Returns a flag indicating whether to validate an incoming XML document. Currently, DTD
and
* XSD
are supported.
*/
public Boolean isNamspaceAware()
{
return new Boolean(this.factory.isNamespaceAware());
}
/*
* Returns the schema type of the incoming XML document.
*/
public String getXMLSchemaType()
{
return this.schemaType;
}
/**
* Parses (and validates if the flag is turned on) the specified DTD or XML-Schema based XML document using JAXP API
* and returns org.w3c.dom.Document
object.
*
* @param xmlObject - XML document to be parsed.
*
* @return Parsed document
*
* @throws ParserConfigurationException Exception if we could not configure the parser
* @throws SAXException Exception from a SAX related problem
* @throws IOException Exception from a File i/O problem
*/
private Document doParse(Object xmlObject) throws ParserConfigurationException, SAXException, IOException
{
if (xmlObject == null)
{
throw new NullPointerException("Object that is holding XML document can't be null"); //$NON-NLS-1$
}
if (this.schemaType == XMLConstants.W3C_XML_SCHEMA_NS_URI)
{
logger.debug("Setting attribute for XMLSchema validation..."); //$NON-NLS-1$
this.factory.setAttribute(org.apache.xerces.jaxp.JAXPConstants.JAXP_SCHEMA_LANGUAGE,
org.apache.xerces.jaxp.JAXPConstants.W3C_XML_SCHEMA);
}
DocumentBuilder builder = this.factory.newDocumentBuilder();
builder.setErrorHandler(new DefaultErrorHandler());
// set entity resolver if defined
if (this.entityResolver != null) builder.setEntityResolver(entityResolver);
if (xmlObject instanceof String)
{
logger.debug("Parsing XML doc as URI [" + xmlObject + "]..."); //$NON-NLS-1$ //$NON-NLS-2$
return builder.parse((String) xmlObject);
}
else if (xmlObject instanceof byte[])
{
if (logger.isDebugEnabled())
{
logger.debug("Parsing XML doc as XML string..."); //$NON-NLS-1$
logger.debug("XML document content ="); //$NON-NLS-1$
logger.debug("[\n" + String.valueOf(xmlObject) + "\n]"); //$NON-NLS-1$ //$NON-NLS-2$
}
InputStream is = new ByteArrayInputStream((byte[]) xmlObject);
return builder.parse(is);
}
else if (xmlObject instanceof File)
{
if (logger.isDebugEnabled())
{
logger.debug("Parsing XML doc as file..."); //$NON-NLS-1$
logger.debug("XML document URI =[" //$NON-NLS-1$
+ ((File) xmlObject).toString() + "]."); //$NON-NLS-1$
}
return builder.parse((File) xmlObject);
}
else if (xmlObject instanceof InputStream)
{
logger.debug("Parsing XML doc as input stream..."); //$NON-NLS-1$
return builder.parse((InputStream) xmlObject);
}
else if (xmlObject instanceof InputSource)
{
logger.debug("Parsing XML doc as input source..."); //$NON-NLS-1$
return builder.parse((InputSource) xmlObject);
}
else if (xmlObject instanceof Document)
{
logger.debug("Already Docuent object, returning..."); //$NON-NLS-1$
return (Document) xmlObject;
}
else
{
throw new IllegalArgumentException("Unsupported object '" + xmlObject.getClass().getName() + "'"); //$NON-NLS-1$ //$NON-NLS-2$
}
}
/*
* Parses (and validates if the flag is turned on) the specified DTD or XML-Schema based XML document using JAXP API
* and returns org.w3c.dom.Document
object.
*
* @param uri - location of XML document to be parsed.
*/
public Document parse(String uri) throws ParserConfigurationException, SAXException, IOException
{
return this.doParse(uri);
}
/*
* Parses (and validates if the flag is turned on) the specified DTD or XML-Schema based XML document using JAXP API
* and returns org.w3c.dom.Document
object.
*
* @param xmlDoc - array of byte containing XML document to be parsed.
*/
public Document parse(byte[] xmlDoc) throws ParserConfigurationException, SAXException, IOException
{
return this.doParse(xmlDoc);
}
/*
* Parses (and validates if the flag is turned on) the specified DTD or XML-Schema based XML document using JAXP API
* and returns org.w3c.dom.Document
object.
*
* @param file - XML document file to be parsed.
*/
public Document parse(File file) throws ParserConfigurationException, SAXException, IOException
{
return this.doParse(file);
}
/*
* Parses (and validates if the flag is turned on) the specified DTD or XML-Schema based XML document using JAXP API
* and returns org.w3c.dom.Document
object.
*
* @param is - InputSource
containing XML document to be parsed.
*/
public Document parse(InputSource is) throws ParserConfigurationException, SAXException, IOException
{
return this.doParse(is);
}
/*
* Parses (and validates if the flag is turned on) the specified DTD or XML-Schema based XML document using JAXP API
* and returns org.w3c.dom.Document
object.
*
* @param is - InputStream
containing XML document to be parsed.
*/
public Document parse(InputStream is) throws ParserConfigurationException, SAXException, IOException
{
return this.doParse(is);
}
/**
* Returns the root name of the specified XML document without validation.
*
* @param xmlObject is the XML document as String
or InputSource
or File
or
* InputStream
or Document
being parsed.
*
* @return the root name.
* @throws ParserConfigurationException Exception if we could not configure the parser
* @throws SAXException Exception from a SAX related problem
* @throws IOException Exception from a File i/O problem
*/
private static String doGetRootName(Object xmlObject) throws ParserConfigurationException, IOException,
SAXException
{
if (xmlObject == null)
{
throw new NullPointerException("Object that is holding XML document can't be null"); //$NON-NLS-1$
}
else if (xmlObject instanceof Document)
{
Document doc = (Document) xmlObject;
Node node = doc.getDocumentElement();
node.normalize();
return node.getNodeName();
}
// Custom DefaultHandler
// to obtain a root name
class MyDefaultHandler extends DefaultHandler
{
boolean isFirstTime = false;
String rootName = null;
DefaultErrorHandler errHandler = new DefaultErrorHandler();
/** Constructor */
public MyDefaultHandler()
{
// Do Nothing
}
@Override
public void startElement(String namespaceUri, String localName, String qname, Attributes attrs)
{
if (isFirstTime == false)
{
this.rootName = qname;
this.isFirstTime = true;
}
}
@Override
public void warning(SAXParseException e) throws SAXException
{
this.errHandler.warning(e);
}
@Override
public void error(SAXParseException e) throws SAXException
{
this.errHandler.error(e);
}
@Override
public void fatalError(SAXParseException e) throws SAXException
{
this.errHandler.fatalError(e);
}
}
MyDefaultHandler handler = new MyDefaultHandler();
SAXParserFactory factory = SAXParserFactory.newInstance();
// TODO - defaulting to true maybe an issue for certain XML documents.
factory.setNamespaceAware(true);
// jun No need to validate an XML document at the moment
// factory.setValidating(true);
SAXParser saxParser = factory.newSAXParser();
if (xmlObject instanceof String)
{
saxParser.parse((String) xmlObject, handler);
}
else if (xmlObject instanceof byte[])
{
InputStream is = new ByteArrayInputStream((byte[]) xmlObject);
saxParser.parse(is, handler);
}
else if (xmlObject instanceof File)
{
saxParser.parse((File) xmlObject, handler);
}
else if (xmlObject instanceof InputStream)
{
saxParser.parse((InputStream) xmlObject, handler);
}
else if (xmlObject instanceof InputSource)
{
saxParser.parse((InputSource) xmlObject, handler);
}
else
{
throw new IllegalArgumentException("Unsupported object [" + xmlObject.getClass().getName() + "]"); //$NON-NLS-1$ //$NON-NLS-2$
}
return handler.rootName;
}
/**
* Returns the root name of the specified XML document without validation.
*
* @param uri - location of XML document to be parsed.
*
* @return the root name.
* @throws ParserConfigurationException Exception if we could not configure the parser
* @throws SAXException Exception from a SAX related problem
* @throws IOException Exception from a File i/O problem
*/
public String getRootName(String uri) throws ParserConfigurationException, IOException, SAXException
{
return doGetRootName(uri);
}
/**
* Returns the root name of the specified XML document without validation.
*
* @param xmlDoc - array of byte containing XML document to be parsed.
*
* @return the root name.
* @throws ParserConfigurationException Exception if we could not configure the parser
* @throws SAXException Exception from a SAX related problem
* @throws IOException Exception from a File i/O problem
*/
public String getRootName(byte xmlDoc[]) throws ParserConfigurationException, IOException, SAXException
{
return doGetRootName(xmlDoc);
}
/**
* Returns the root name of the specified XML document without validation.
*
* @param file - XML document file to be parsed.
*
* @return the root name.
* @throws ParserConfigurationException Exception if we could not configure the parser
* @throws SAXException Exception from a SAX related problem
* @throws IOException Exception from a File i/O problem
*/
public String getRootName(File file) throws ParserConfigurationException, IOException, SAXException
{
return doGetRootName(file);
}
/**
* Returns the root name of the specified XML document without validation.
*
* @param is - InputSource
containing XML document to be parsed.
*
* @return the root name.
* @throws ParserConfigurationException Exception if we could not configure the parser
* @throws SAXException Exception from a SAX related problem
* @throws IOException Exception from a File i/O problem
*/
public String getRootName(InputSource is) throws ParserConfigurationException, IOException, SAXException
{
return doGetRootName(is);
}
/**
* Returns the root name of the specified XML document without validation.
*
* @param is - InputStream
containing XML document to be parsed.
*
* @return the root name.
* @throws ParserConfigurationException Exception if we could not configure the parser
* @throws SAXException Exception from a SAX related problem
* @throws IOException Exception from a File i/O problem
*/
public String getRootName(InputStream is) throws ParserConfigurationException, IOException, SAXException
{
return doGetRootName(is);
}
/**
* Returns the root name of the specified XML document without validation.
*
* @param doc - Document
containing XML document to be parsed.
*
* @return the root name.
* @throws ParserConfigurationException Exception if we could not configure the parser
* @throws SAXException Exception from a SAX related problem
* @throws IOException Exception from a File i/O problem
*/
public String getRootName(Document doc) throws ParserConfigurationException, IOException, SAXException
{
return doGetRootName(doc);
}
/**
* Walks the document and removes all text nodes used for indentation.
*
* @param node - Node
containing XML document to be examined.
*
*/
public void removeIndent(Node node)
{
// Is there anything to do?
if (node == null) return;
short type = node.getNodeType();
switch (type)
{
// It's show time!
case Node.DOCUMENT_NODE:
{
Document document = (Document) node;
removeIndent(document.getDocumentElement());
break;
}
// Remove all text nodes, but the one that is the only one child
case Node.ELEMENT_NODE:
{
int numOfChildren = (node.getChildNodes() != null) ? node.getChildNodes().getLength() : 0;
Node child = node.getFirstChild();
while (child != null)
{
// We've got TEXT node
if (child.getNodeType() == Node.TEXT_NODE)
{
// It's got only whitespaces and brothers & sisters
// so it should be just indentation, remove this node
if (child.getNodeValue() != null && child.getNodeValue().trim().length() == 0 && numOfChildren > 1)
{
Node nextSibling = child.getNextSibling();
child = node.removeChild(child);
child = nextSibling;
continue;
}
}
removeIndent(child);
child = child.getNextSibling();
}
break;
}
case Node.ENTITY_REFERENCE_NODE:
{
Node child = node.getFirstChild();
while (child != null)
{
removeIndent(child);
child = child.getNextSibling();
}
break;
}
case Node.DOCUMENT_TYPE_NODE:
break;
case Node.CDATA_SECTION_NODE:
break;
case Node.TEXT_NODE:
break;
case Node.PROCESSING_INSTRUCTION_NODE:
break;
case Node.COMMENT_NODE:
break;
default:
break;
}
}
// main() method
// /////////////////////////////////////////////////////////////////////////////
/**
* Runs this class for testing.
*
* TODO Unit Test
*
* @param args arguments
*/
public static void main(String args[])
{
String xmlURI = null;
String xmlStr = defaultXmlStr;
boolean validate = false;
boolean xmlSchema = false;
// Parse the command-line parameters
for (int i = 0; i < args.length; i++)
{
// Display usage then get outta here
if (args[i].equalsIgnoreCase("-help"))
{
usage();
System.exit(1);
}
// XML URI
else if (args[i].equalsIgnoreCase("-xml"))
{
xmlURI = args[++i].trim();
}
// Validation flag for parse() method
else if (args[i].equalsIgnoreCase("-validate"))
{
String value = args[++i].trim();
validate = (value.equalsIgnoreCase("true")) ? true : false;
}
// XML Schema flag for parse() method
else if (args[i].equalsIgnoreCase("-xmlschema"))
{
String value = args[++i].trim();
xmlSchema = (value.equalsIgnoreCase("true")) ? true : false;
}
else
{
System.err.println("Invalid option - [" + args[i] + "].");
usage();
System.exit(1);
}
}
System.out.println("");
System.out.println("XML URI =[" + xmlURI + "].");
System.out.println("validation =[" + validate + "].");
System.out.println("XML schema =[" + xmlSchema + "].");
if (xmlURI == null || xmlURI.trim().length() == 0)
{
System.out.println("XML doc string (default) =");
System.out.println(xmlStr);
}
System.out.println("");
try
{
DefaultXMLParser parser = new DefaultXMLParser();
String schemaType = xmlSchema ? XMLConstants.W3C_XML_SCHEMA_NS_URI : XMLConstants.XML_DTD_NS_URI;
parser.setValidation(validate, schemaType);
if (xmlURI != null && xmlURI.length() > 0)
{
System.out.println("Parsing URI [" + xmlURI + "]...");
parser.parse(xmlURI);
}
else
{
System.out.println("Parsing the default XML string...");
parser.parse(xmlStr.getBytes());
}
System.out.println("=> Successful.");
}
catch (Exception e)
{
e.printStackTrace(System.err);
}
System.exit(0);
}
/**
* Displays the usage for main() method.
*
* TODO Unit Test
*/
private static void usage()
{
String fqClassName = DefaultXMLParser.class.getName();
System.err.println("");
System.err.println("Usage:");
System.err.println("java " + fqClassName + " [-options]");
System.err.println("");
System.err.println("where options include:");
System.err.println(" -xml to specify input XML document location (See below for default XML)");
System.err.println(" -validate ");
System.err.println(" to validate the XML document (false)");
System.err.println(" -xmlschema ");
System.err.println(" to indicate whether the XML document is XML Schema based (false)");
System.err.println("");
System.err.println("Note that the following default XML string will be used if XML URI is not specified:-");
System.err.println(defaultXmlStr);
}
/**
* Default XML string used in main() method
*
* TODO Unit Test
*/
private static String defaultXmlStr = "\n" + "\n" + " data_a1\n"
+ " data_b1\n" + " data_c1 \n" + " \n" + " ";
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy