All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.inlong.sort.filesystem.shaded.com.amazonaws.util.XpathUtils Maven / Gradle / Ivy

There is a newer version: 1.13.0
Show newest version
/*
 * Copyright 2010-2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License").
 * You may not use this file except in compliance with the License.
 * A copy of the License is located at
 *
 *  http://aws.amazon.com/apache2.0
 *
 * or in the "license" file accompanying this file. This file is distributed
 * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 * express or implied. See the License for the specific language governing
 * permissions and limitations under the License.
 */
package com.amazonaws.util;

import com.amazonaws.internal.SdkThreadLocalsRegistry;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.lang.reflect.Method;
import java.net.URL;
import java.nio.ByteBuffer;
import java.util.Date;
import javax.xml.XMLConstants;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.ErrorHandler;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;

/**
 * Utility methods for extracting data from XML documents using Xpath
 * expressions.
 */
public class XpathUtils {

    /** The default property name to load the Xalan DTM manager. */
    private static final String DTM_MANAGER_DEFAULT_PROP_NAME = "com.sun.org.apache.xml.internal.dtm.DTMManager";

    /** The default property name to load the Xalan Document Builder Factory. */
    private static final String DOCUMENT_BUILDER_FACTORY_PROP_NAME = "javax.xml.parsers.DocumentBuilderFactory";

    /** The FQCN of the desired DocumentBuilderFactory implementation. */
    private static final String DOCUMENT_BUILDER_FACTORY_IMPL_CLASS_NAME = "com.sun.org.apache.xerces.internal.jaxp.DocumentBuilderFactoryImpl";

    /** The FQCN of the internal XPathContext class. */
    private static final String XPATH_CONTEXT_CLASS_NAME = "com.sun.org.apache.xpath.internal.XPathContext";

    /** The FQCN of the desired DTMManager implementation. */
    private static final String DTM_MANAGER_IMPL_CLASS_NAME = "com.sun.org.apache.xml.internal.dtm.ref.DTMManagerDefault";

    private static final Log log = LogFactory.getLog(XpathUtils.class);

    private static final ErrorHandler ERROR_HANDLER = new ErrorHandler() {

        @Override
        public void warning(SAXParseException e) throws SAXException {
            if (log.isDebugEnabled()) {
                log.debug("xml parse warning: " + e.getMessage(), e);
            }
        }

        @Override
        public void fatalError(SAXParseException e) throws SAXException {
            throw e;
        }

        @Override
        public void error(SAXParseException e) throws SAXException {
            if (log.isDebugEnabled()) {
                log.debug("xml parse error: " + e.getMessage(), e);
            }
        }
    };

    private static volatile DocumentBuilderInfo cachedDocumentBuilderInfo = null;

    /**
     * Shared factory for creating XML Factory
     */
    private static final ThreadLocal X_PATH_FACTORY = SdkThreadLocalsRegistry.register(
            new ThreadLocal() {
                @Override
                protected XPathFactory initialValue() {
                    return XPathFactory.newInstance();
                }
            });

    /**
     * Used to optimize performance by avoiding expensive file access every time
     * a DTMManager is constructed as a result of constructing a Xalan xpath
     * context!
     */
    private static void speedUpDTMManager() throws Exception {
        // https://github.com/aws/aws-sdk-java/issues/238
        // http://stackoverflow.com/questions/6340802/java-xpath-apache-jaxp-implementation-performance
        if (System.getProperty(DTM_MANAGER_DEFAULT_PROP_NAME) == null) {
            Class XPathContextClass = Class.forName(XPATH_CONTEXT_CLASS_NAME);
            Method getDTMManager = XPathContextClass.getMethod("getDTMManager");
            Object XPathContext = XPathContextClass.newInstance();
            Object dtmManager = getDTMManager.invoke(XPathContext);

            if (DTM_MANAGER_IMPL_CLASS_NAME.equals(dtmManager.getClass().getName())) {
                // This would avoid the file system to be accessed every time
                // the internal XPathContext is instantiated.
                System.setProperty(DTM_MANAGER_DEFAULT_PROP_NAME,
                        DTM_MANAGER_IMPL_CLASS_NAME);
            }
        }
    }

    /**
     * Used to optimize performance by avoiding expensive file access every time
     * a DocumentBuilderFactory is constructed as a result of constructing a
     * Xalan document factory.
     */
    private static void speedUpDcoumentBuilderFactory() {
        if (System.getProperty(DOCUMENT_BUILDER_FACTORY_PROP_NAME) == null) {
            DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
            if (DOCUMENT_BUILDER_FACTORY_IMPL_CLASS_NAME.equals(factory.getClass().getName())) {
                // This would avoid the file system to be accessed every time
                // the internal DocumentBuilderFactory is instantiated.
                System.setProperty(DOCUMENT_BUILDER_FACTORY_PROP_NAME,
                        DOCUMENT_BUILDER_FACTORY_IMPL_CLASS_NAME);
            }
        }
    }

    // static initialization block to conservatively speed things up whenever we
    // can
    static {
        try {
            speedUpDcoumentBuilderFactory();
        } catch(Throwable t) {
            log.debug("Ingore failure in speeding up DocumentBuilderFactory", t);
        }
        try {
            speedUpDTMManager();
        } catch(Throwable t) {
            log.debug("Ingore failure in speeding up DTMManager", t);
        }
    }

    // XPath is not thread safe and not reentrant.
    /**
     * Returns a new instance of XPath, which is not thread safe and not
     * reentrant.
     */
    public static XPath xpath() {
        return X_PATH_FACTORY.get().newXPath();
    }

    /**
     * This method closes the given input stream upon completion.
     */
    public static Document documentFrom(InputStream is)
            throws SAXException, IOException, ParserConfigurationException {
        is = new NamespaceRemovingInputStream(is);
        // DocumentBuilderFactory is not thread safe
        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();

        configureDocumentBuilderFactory(factory);

        DocumentBuilder builder = factory.newDocumentBuilder();

        // ensure that parser writes error/warning messages to the logger
        // rather than stderr
        builder.setErrorHandler(ERROR_HANDLER);
        Document doc = builder.parse(is);
        is.close();
        return doc;
    }

    public static Document documentFrom(String xml) throws SAXException,
            IOException, ParserConfigurationException {
        return documentFrom(new ByteArrayInputStream(xml.getBytes(StringUtils.UTF8)));
    }

    public static Document documentFrom(URL url) throws SAXException,
            IOException, ParserConfigurationException {
        return documentFrom(url.openStream());
    }

    /**
     * Evaluates the specified XPath expression and returns the results as a
     * Double.
     * 

* This method can be expensive as a new xpath is instantiated per * invocation. Consider passing in the xpath explicitly via { * {@link #asDouble(String, Node, XPath)} instead. Note {@link XPath} is * not thread-safe and not reentrant. * * @param expression * The XPath expression to evaluate. * @param node * The node to run the expression on. * * @return The Double result. * * @throws XPathExpressionException * If there was a problem processing the specified XPath * expression. */ public static Double asDouble(String expression, Node node) throws XPathExpressionException { return asDouble(expression, node, xpath()); } /** * Same as {@link #asDouble(String, Node)} but allows an xpath to be passed * in explicitly for reuse. */ public static Double asDouble(String expression, Node node, XPath xpath) throws XPathExpressionException { String doubleString = evaluateAsString(expression, node, xpath); return (isEmptyString(doubleString)) ? null : Double.parseDouble(doubleString); } /** * Evaluates the specified XPath expression and returns the result as a * string. *

* This method can be expensive as a new xpath is instantiated per * invocation. Consider passing in the xpath explicitly via { * {@link #asDouble(String, Node, XPath)} instead. Note {@link XPath} is * not thread-safe and not reentrant. * * @param expression * The XPath expression to evaluate. * @param node * The node to run the expression on. * * @return The string result. * * @throws XPathExpressionException * If there was a problem processing the specified XPath * expression. */ public static String asString(String expression, Node node) throws XPathExpressionException { return evaluateAsString(expression, node, xpath()); } /** * Same as {@link #asString(String, Node)} but allows an xpath to be passed * in explicitly for reuse. */ public static String asString(String expression, Node node, XPath xpath) throws XPathExpressionException { return evaluateAsString(expression, node, xpath); } /** * Evaluates the specified XPath expression and returns the result as an * Integer. *

* This method can be expensive as a new xpath is instantiated per * invocation. Consider passing in the xpath explicitly via { * {@link #asDouble(String, Node, XPath)} instead. Note {@link XPath} is * not thread-safe and not reentrant. * * @param expression * The XPath expression to evaluate. * @param node * The node to run the expression on. * * @return The Integer result. * * @throws XPathExpressionException * If there was a problem processing the specified XPath * expression. */ public static Integer asInteger(String expression, Node node) throws XPathExpressionException { return asInteger(expression, node, xpath()); } /** * Same as {@link #asInteger(String, Node)} but allows an xpath to be passed * in explicitly for reuse. */ public static Integer asInteger(String expression, Node node, XPath xpath) throws XPathExpressionException { String intString = evaluateAsString(expression, node, xpath); return (isEmptyString(intString)) ? null : Integer.parseInt(intString); } /** * Evaluates the specified XPath expression and returns the result as a * Boolean. *

* This method can be expensive as a new xpath is instantiated per * invocation. Consider passing in the xpath explicitly via { * {@link #asDouble(String, Node, XPath)} instead. Note {@link XPath} is * not thread-safe and not reentrant. * * @param expression * The XPath expression to evaluate. * @param node * The node to run the expression on. * * @return The Boolean result. * * @throws XPathExpressionException * If there was a problem processing the specified XPath * expression. */ public static Boolean asBoolean(String expression, Node node) throws XPathExpressionException { return asBoolean(expression, node, xpath()); } /** * Same as {@link #asBoolean(String, Node)} but allows an xpath to be passed * in explicitly for reuse. */ public static Boolean asBoolean(String expression, Node node, XPath xpath) throws XPathExpressionException { String booleanString = evaluateAsString(expression, node, xpath); return (isEmptyString(booleanString)) ? null : Boolean.parseBoolean(booleanString); } /** * Evaluates the specified XPath expression and returns the result as a * Float. *

* This method can be expensive as a new xpath is instantiated per * invocation. Consider passing in the xpath explicitly via { * {@link #asDouble(String, Node, XPath)} instead. Note {@link XPath} is * not thread-safe and not reentrant. * * @param expression * The XPath expression to evaluate. * @param node * The node to run the expression on. * * @return The Float result. * * @throws XPathExpressionException * If there was a problem processing the specified XPath * expression. */ public static Float asFloat(String expression, Node node) throws XPathExpressionException { return asFloat(expression, node, xpath()); } /** * Same as {@link #asFloat(String, Node)} but allows an xpath to be passed * in explicitly for reuse. */ public static Float asFloat(String expression, Node node, XPath xpath) throws XPathExpressionException { String floatString = evaluateAsString(expression, node, xpath); return (isEmptyString(floatString)) ? null : Float.valueOf(floatString); } /** * Evaluates the specified XPath expression and returns the result as a * Long. *

* This method can be expensive as a new xpath is instantiated per * invocation. Consider passing in the xpath explicitly via { * {@link #asDouble(String, Node, XPath)} instead. Note {@link XPath} is * not thread-safe and not reentrant. * * @param expression * The XPath expression to evaluate. * @param node * The node to run the expression on. * * @return The Long result. * * @throws XPathExpressionException * If there was a problem processing the specified XPath * expression. */ public static Long asLong(String expression, Node node) throws XPathExpressionException { return asLong(expression, node, xpath()); } /** * Same as {@link #asLong(String, Node)} but allows an xpath to be passed * in explicitly for reuse. */ public static Long asLong(String expression, Node node, XPath xpath) throws XPathExpressionException { String longString = evaluateAsString(expression, node, xpath); return (isEmptyString(longString)) ? null : Long.parseLong(longString); } /** * Evaluates the specified XPath expression and returns the result as a * Byte. *

* This method can be expensive as a new xpath is instantiated per * invocation. Consider passing in the xpath explicitly via { * {@link #asDouble(String, Node, XPath)} instead. Note {@link XPath} is * not thread-safe and not reentrant. * * @param expression * The XPath expression to evaluate. * @param node * The node to run the expression on. * * @return The Byte result. * * @throws XPathExpressionException * If there was a problem processing the specified XPath * expression. */ public static Byte asByte(String expression, Node node) throws XPathExpressionException { return asByte(expression, node, xpath()); } /** * Same as {@link #asByte(String, Node)} but allows an xpath to be passed * in explicitly for reuse. */ public static Byte asByte(String expression, Node node, XPath xpath) throws XPathExpressionException { String byteString = evaluateAsString(expression, node, xpath); return (isEmptyString(byteString)) ? null : Byte.valueOf(byteString); } /** * Evaluates the specified XPath expression and returns the result as a * Date. Assumes that the node's text is formatted as an ISO 8601 date, as * specified by xs:dateTime. *

* This method can be expensive as a new xpath is instantiated per * invocation. Consider passing in the xpath explicitly via { * {@link #asDouble(String, Node, XPath)} instead. Note {@link XPath} is * not thread-safe and not reentrant. * * @param expression * The XPath expression to evaluate. * @param node * The node to run the expression on. * * @return The Date result. * * @throws XPathExpressionException * If there was a problem processing the specified XPath * expression. */ public static Date asDate(String expression, Node node) throws XPathExpressionException { return asDate(expression, node, xpath()); } /** * Same as {@link #asDate(String, Node)} but allows an xpath to be passed * in explicitly for reuse. */ public static Date asDate(String expression, Node node, XPath xpath) throws XPathExpressionException { String dateString = evaluateAsString(expression, node, xpath); if (isEmptyString(dateString)) return null; try { return DateUtils.parseISO8601Date(dateString); } catch (Exception e) { log.warn("Unable to parse date '" + dateString + "': " + e.getMessage(), e); return null; } } /** * Evaluates the specified xpath expression, base64 decodes the data and * returns the result as a ByteBuffer. *

* This method can be expensive as a new xpath is instantiated per * invocation. Consider passing in the xpath explicitly via { * {@link #asDouble(String, Node, XPath)} instead. Note {@link XPath} is * not thread-safe and not reentrant. * * @param expression * The Xpath expression to evaluate. * @param node * The node on which to evaluate the expression. * * @return A ByteBuffer of base64 decoded data from the result of evaluating * the specified Xpath expression. * * @throws XPathExpressionException * If there are any problems evaluating the Xpath expression. */ public static ByteBuffer asByteBuffer(String expression, Node node) throws XPathExpressionException { return asByteBuffer(expression, node, xpath()); } /** * Same as {@link #asByteBuffer(String, Node)} but allows an xpath to be * passed in explicitly for reuse. */ public static ByteBuffer asByteBuffer(String expression, Node node, XPath xpath) throws XPathExpressionException { String base64EncodedString = evaluateAsString(expression, node, xpath); if (isEmptyString(base64EncodedString)) return null; if (!isEmpty(node)) { byte[] decodedBytes = Base64.decode(base64EncodedString); return ByteBuffer.wrap(decodedBytes); } return null; } /** * Returns true if the specified node is null or has no children. * * @param node * The node to test. * * @return True if the specified node is null or has no children. */ public static boolean isEmpty(Node node) { return (node == null); } /** * Evaluates the specified XPath expression and returns the result as a * Node. * * @param nodeName * The XPath expression to evaluate. * @param node * The node to run the expression on. * * @return The Node result. * * @throws XPathExpressionException * If there was a problem processing the specified XPath * expression. */ public static Node asNode(String nodeName, Node node) throws XPathExpressionException { return asNode(nodeName, node, xpath()); } /** * Same as {@link #asNode(String, Node)} but allows an xpath to be * passed in explicitly for reuse. */ public static Node asNode(String nodeName, Node node, XPath xpath) throws XPathExpressionException { if (node == null) return null; return (Node) xpath.evaluate(nodeName, node, XPathConstants.NODE); } /** * Returns the length of the specified node list. * * @param list * The node list to measure. * * @return The length of the specified node list. */ public static int nodeLength(NodeList list) { return list == null ? 0 : list.getLength(); } /** * Evaluates the specified expression on the specified node and returns the * result as a String. * * @param expression * The Xpath expression to evaluate. * @param node * The node on which to evaluate the expression. * * @return The result of evaluating the specified expression, or null if the * evaluation didn't return any result. * * @throws XPathExpressionException * If there are any problems evaluating the Xpath expression. */ private static String evaluateAsString(String expression, Node node, XPath xpath) throws XPathExpressionException { if (isEmpty(node)) return null; if (!expression.equals(".")) { /* * If the expression being evaluated doesn't select a node, we want * to return null to distinguish between cases where a node isn't * present (which should be represented as null) and when a node is * present, but empty (which should be represented as the empty * string). * * We skip this test if the expression is "." since we've already * checked that the node exists. */ if (asNode(expression, node, xpath) == null) return null; } String s = xpath.evaluate(expression, node); return s.trim(); } /** * Returns true if the specified string is null or empty. * * @param s * The string to test. * @return True if the specified string is null or empty. */ private static boolean isEmptyString(String s) { return s == null || s.trim().length() == 0; } private static void configureDocumentBuilderFactory(DocumentBuilderFactory factory) { DocumentBuilderInfo cache = XpathUtils.cachedDocumentBuilderInfo; if (cache != null && cache.clzz.equals(factory.getClass())) { if (cache.xxeMitigationSuccessful) { try { if (isXerces(cache.canonicalName)) { configureXercesFactory(factory); } else { configureGenericFactory(factory); } } catch (Throwable t) { // Should not get to here since we check first if we could successfully configure previously log.warn("Unable to configure DocumentBuilderFactory to protect against XXE attacks", t); } } } else { initialConfigureDocumentBuilderFactory(factory); } } private static void initialConfigureDocumentBuilderFactory(DocumentBuilderFactory factory) { synchronized (XpathUtils.class) { Class clzz = factory.getClass(); String canonicalName = clzz.getCanonicalName(); boolean xxeMitigationSuccessful; try { if (isXerces(canonicalName)) { configureXercesFactory(factory); } else { configureGenericFactory(factory); } xxeMitigationSuccessful = true; } catch (Throwable t) { log.warn("Unable to configure DocumentBuilderFactory to protect against XXE attacks", t); xxeMitigationSuccessful = false; } cachedDocumentBuilderInfo = new DocumentBuilderInfo(clzz, canonicalName, xxeMitigationSuccessful); } } private static boolean isXerces(String canonicalName) { // The included implementation in the JDK is also Xerces, but a fork and under a different package: // https://github.com/openjdk/jdk/blob/3f77a6002ea7c150308409600abd4f1140bfb36a/src/java.xml/share/classes/com/sun/org/apache/xerces/internal/jaxp/DocumentBuilderFactoryImpl.java#L21 return canonicalName.startsWith("org.apache.xerces.") || canonicalName.startsWith("com.sun.org.apache.xerces."); } private static void configureXercesFactory(DocumentBuilderFactory factory) throws ParserConfigurationException { commonConfigureFactory(factory); // https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html factory.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); factory.setFeature("http://xml.org/sax/features/external-general-entities", false); factory.setFeature("http://xml.org/sax/features/external-parameter-entities", false); factory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); } private static void configureGenericFactory(DocumentBuilderFactory factory) throws ParserConfigurationException { commonConfigureFactory(factory); // https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html // https://rules.sonarsource.com/java/tag/owasp/RSPEC-2755 factory.setAttribute("http://javax.xml.XMLConstants/property/accessExternalDTD", ""); factory.setAttribute("http://javax.xml.XMLConstants/property/accessExternalSchema", ""); } private static void commonConfigureFactory(DocumentBuilderFactory factory) throws ParserConfigurationException { factory.setXIncludeAware(false); factory.setExpandEntityReferences(false); factory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); } private static class DocumentBuilderInfo { private final Class clzz; private final String canonicalName; private final boolean xxeMitigationSuccessful; private DocumentBuilderInfo(Class clzz, String canonicalName, boolean xxeMitigationSuccessful) { this.clzz = clzz; this.canonicalName = canonicalName; this.xxeMitigationSuccessful = xxeMitigationSuccessful; } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy