com.tangosol.run.xml.SaxParser Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of coherence Show documentation
Show all versions of coherence Show documentation
Oracle Coherence Community Edition
/*
* Copyright (c) 2000, 2020, Oracle and/or its affiliates.
*
* Licensed under the Universal Permissive License v 1.0 as shown at
* http://oss.oracle.com/licenses/upl.
*/
package com.tangosol.run.xml;
import com.oracle.coherence.common.base.Logger;
import com.tangosol.util.Base;
import com.tangosol.util.ClassHelper;
import com.tangosol.util.Resources;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.io.StringReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.atomic.AtomicBoolean;
import javax.xml.XMLConstants;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Source;
import javax.xml.transform.stream.StreamSource;
import javax.xml.validation.Schema;
import javax.xml.validation.SchemaFactory;
import javax.xml.validation.Validator;
import org.xml.sax.AttributeList;
import org.xml.sax.DocumentHandler;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.Parser;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.helpers.ParserFactory;
/**
* A simple XML parser. The public interface consists of nearly identical
* methods: parseXml(...) which produce a tree of SimpleElement objects
*
* @author gg 2000.10.23
*/
public class SaxParser
extends Base
{
// ----- constructors ---------------------------------------------------
/**
* Construct a SaxParser.
*/
public SaxParser()
{
this(false);
}
/**
* Construct a SaxParser.
*
* @param fAllowComments if true, the resulting tree may contain
* the XMLValue nodes that contain comments;
* otherwize all comments are ignored
*/
public SaxParser(boolean fAllowComments)
{
if (fAllowComments)
{
throw new UnsupportedOperationException("XML comments are not supported");
}
}
/**
* Unit test: create a simple parser, parse and output the result.
*
* @param asParam an array of parameters
*/
public static void main(String[] asParam)
{
if (asParam.length > 0)
{
SaxParser parser = new SaxParser();
try
{
FileInputStream in = new FileInputStream(asParam[0]);
XmlElement root = parser.parseXml(in);
root.writeXml(getOut(), true);
out();
}
catch (Exception e)
{
out(e);
}
}
}
// ----- public API --------------------------------------------------
/**
* Parse the specified String into a tree of XmlElement objects
* ignoring any XML nodes other than elements, text or comments
* (in a case of SaxParser that allows comments).
* In addition, the text value is trimmed for all nodes except leafs.
*
* @param sXml the XML as string
*
* @return the generated XmlElement
*
* @throws SAXException if SAX error occurs
*/
public XmlElement parseXml(String sXml)
throws SAXException
{
return parseXml(new InputSource(new StringReader(sXml)), null);
}
/**
* Parse the specified String into a tree of XmlElement objects
* (same as above) having the specified [empty] XmlElement a root.
*
* Note: this method is used by de-serialization
* (see SimpleElement#readExternal)
*
* @param sXml the XML as string
* @param elRoot the XML root
*
* @return the generated XmlElement
*
* @throws SAXException if SAX error occurs
*/
public XmlElement parseXml(String sXml, XmlElement elRoot)
throws SAXException
{
return parseXml(new InputSource(new StringReader(sXml)), elRoot);
}
/**
* Parse the specified InputStream into a tree of XmlElement objects
* ignoring any XML nodes other than elements, text or comments
* (in a case of SaxParser that allows comments).
* In addition, the text value is trimmed for all nodes except leafs.
*
* @param input the InputStream
*
* @return the generated XmlElement
*
* @throws SAXException if SAX error occurs
*/
public XmlElement parseXml(InputStream input)
throws SAXException
{
return parseXml(new InputSource(input), null);
}
/**
* Parse the specified Reader into a tree of XmlElement objects
* ignoring any XML nodes other than elements, text or comments
* (in a case of SaxParser that allows comments).
* In addition, the text value is trimmed for all nodes except leafs.
*
* @param input the input Reader
*
* @return the generated XmlElement
*
* @throws SAXException if SAX error occurs
*/
public XmlElement parseXml(Reader input)
throws SAXException
{
return parseXml(new InputSource(input), null);
}
/**
* Parse the specified InputSource into a tree of XmlElement objects
* ignoring any XML nodes other than elements, text or comments
* (in a case of SaxParser that allows comments).
* In addition, the text value is trimmed for all nodes except leafs.
*
* @param input the InputSource
*
* @return the generated XmlElement
*
* @throws SAXException if SAX error occurs
*/
public XmlElement parseXml(InputSource input)
throws SAXException
{
return parseXml(input, null);
}
/**
* Actual implementation...
*
* @param input the InputSource
* @param xmlRoot the XML root
*
* @return the generated XmlElement
*
* @throws SAXException if SAX error occurs
*/
protected XmlElement parseXml(InputSource input, XmlElement xmlRoot)
throws SAXException
{
try
{
Parser parser = getParser();
SimpleHandler handler = new SimpleHandler(xmlRoot);
parser.setDocumentHandler(handler);
parser.setErrorHandler(handler);
parser.parse(input);
xmlRoot = handler.m_root;
if (xmlRoot == null)
{
throw new SAXException("Empty document");
}
return xmlRoot;
}
catch (Exception e)
{
throw (e instanceof SAXException ? (SAXException) e : new SAXException(e));
}
}
/**
* XSD aware parsing routine; if XML contains an XSD reference
* to a schemeLocation/noNamespaceSchemaLocation then parse XML
* using provided XSD for validation.
*
* @param sXml the XML to parse (as a string)
* @param xml the XML document object used to obtain schema locations
*
* @throws SAXException if XML contains an XSD reference and does not
* pass validation
* @throws IOException if XML contains a schema that cannot be loaded
* @throws ParserConfigurationException if a parser cannot be created
*/
public void validateXsd(String sXml, XmlDocument xml)
throws SAXException, IOException, ParserConfigurationException
{
if (sXml != null)
{
List listSchemaURIs = XmlHelper.getSchemaLocations(
xml, XmlHelper.getNamespacePrefix(xml,
XMLConstants.W3C_XML_SCHEMA_INSTANCE_NS_URI));
// only validate if we have schemaLocations specified
if (listSchemaURIs.isEmpty())
{
return;
}
SchemaFactory schemaFactory = SchemaFactory
.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
Schema schema = schemaFactory.newSchema(resolveSchemaSources(listSchemaURIs));
Source source = new StreamSource(new StringReader(sXml));
Validator validator = schema.newValidator();
ValidationHandler handler = new ValidationHandler();
if (ATTEMPT_RESTRICT_EXTERNAL.get())
{
try
{
// Disable access during parsing to external resolution to avoid XXE vulnerabilities
validator.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, "");
validator.setProperty(XMLConstants.ACCESS_EXTERNAL_SCHEMA, "");
}
catch (Exception e)
{
// property not supported, warn once and don't attempt to set property again
if (ATTEMPT_RESTRICT_EXTERNAL.compareAndSet(true, false))
{
Logger.warn("Validator does not support JAXP 1.5 properties to restrict access to external XML DTDs and Schemas." + System.lineSeparator() +
"To guard against XXE vulnerabilities, ensure provided XML parser is secure." + System.lineSeparator() +
"Validator: " + validator.getClass().getCanonicalName() + System.lineSeparator() +
"Error: " + e.getLocalizedMessage());
}
}
}
validator.setErrorHandler(handler);
validator.validate(source);
// optimize error handling to report all errors
// prior to failing; this is easier for user that
// has multiple problems to config files.
if (handler.isError())
{
throw (handler.getException());
}
}
}
/**
* For a given set of XSD URIs, return the {@link Source}s to be
* used by the XML parser to validate an XML document.
*
* @param listUri list of XSD URIs to convert
*
* @return an array of {@link Source}s to be used by the XML parser
*
* @throws IOException if the resource cannot be located or loaded
*/
protected Source[] resolveSchemaSources(List listUri)
throws IOException
{
List