All Downloads are FREE. Search and download functionalities are using the official Maven repository.

nu.validator.xml.PrudentHttpEntityResolver Maven / Gradle / Ivy

Go to download

An HTML-checking library (used by https://html5.validator.nu and the HTML5 facet of the W3C Validator)

There is a newer version: 20.7.2
Show newest version
/*
 * Copyright (c) 2005 Henri Sivonen
 * Copyright (c) 2007-2018 Mozilla Foundation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */

package nu.validator.xml;

import java.io.IOException;
import java.io.InputStream;
import java.security.KeyManagementException;
import java.security.KeyStoreException;
import java.security.NoSuchAlgorithmException;
import java.security.cert.CertificateException;
import java.security.cert.X509Certificate;
import java.util.zip.GZIPInputStream;

import javax.net.ssl.HostnameVerifier;
import javax.net.ssl.SSLContext;
import javax.servlet.http.HttpServletRequest;

import org.relaxng.datatype.DatatypeException;

import nu.validator.datatype.ContentSecurityPolicy;
import nu.validator.datatype.Html5DatatypeException;
import nu.validator.io.BoundedInputStream;
import nu.validator.io.ObservableInputStream;
import nu.validator.io.StreamBoundException;
import nu.validator.io.StreamObserver;
import nu.validator.io.SystemIdIOException;

import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.config.CookieSpecs;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.config.Registry;
import org.apache.http.config.RegistryBuilder;
import org.apache.http.conn.socket.ConnectionSocketFactory;
import org.apache.http.conn.socket.PlainConnectionSocketFactory;
import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
import org.apache.http.conn.ssl.SSLContextBuilder;
import org.apache.http.conn.ssl.TrustStrategy;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.client.LaxRedirectStrategy;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import org.apache.log4j.Logger;

import org.xml.sax.EntityResolver;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;

import io.mola.galimatias.URL;
import io.mola.galimatias.GalimatiasParseException;

/**
 * @version $Id: PrudentHttpEntityResolver.java,v 1.1 2005/01/08 08:11:26
 *          hsivonen Exp $
 * @author hsivonen
 */
@SuppressWarnings("deprecation") public class PrudentHttpEntityResolver
        implements EntityResolver {

    private static final Logger log4j = Logger.getLogger(PrudentHttpEntityResolver.class);

    private static HttpClient client;

    private static int maxRequests;

    private long sizeLimit;

    private final ErrorHandler errorHandler;

    private int requestsLeft;

    private boolean allowRnc = false;

    private boolean allowCss = false;

    private boolean allowHtml = false;

    private boolean allowXhtml = false;

    private boolean acceptAllKnownXmlTypes = false;

    private boolean allowGenericXml = true;

    private final ContentTypeParser contentTypeParser;

    private String userAgent;

    private HttpServletRequest request;

    /**
     * Sets the timeouts of the HTTP client.
     *
     * @param connectionTimeout
     *            timeout until connection established in milliseconds. Zero
     *            means no timeout.
     * @param socketTimeout
     *            timeout for waiting for data in milliseconds. Zero means no
     *            timeout.
     * @param maxRequests
     *            maximum number of connections to a particular host
     */
    public static void setParams(int connectionTimeout, int socketTimeout,
            int maxRequests) {
        PrudentHttpEntityResolver.maxRequests = maxRequests;
        PoolingHttpClientConnectionManager phcConnMgr;
        Registry registry = //
        RegistryBuilder. create() //
        .register("http", PlainConnectionSocketFactory.getSocketFactory()) //
        .register("https", SSLConnectionSocketFactory.getSocketFactory()) //
        .build();
        HttpClientBuilder builder = HttpClients.custom().useSystemProperties();
        builder.setRedirectStrategy(new LaxRedirectStrategy());
        builder.setMaxConnPerRoute(maxRequests);
        builder.setMaxConnTotal(
                Integer.parseInt(System.getProperty("nu.validator.servlet.max-total-connections","200")));
        if ("true".equals(System.getProperty(
                "nu.validator.xml.promiscuous-ssl", "true"))) { //
            try {
                SSLContext promiscuousSSLContext = new SSLContextBuilder() //
                .loadTrustMaterial(null, new TrustStrategy() {
                    @Override
                    public boolean isTrusted(X509Certificate[] arg0, String arg1)
                            throws CertificateException {
                        return true;
                    }
                }).build();
                builder.setSslcontext(promiscuousSSLContext);
                HostnameVerifier verifier = //
                SSLConnectionSocketFactory.ALLOW_ALL_HOSTNAME_VERIFIER;
                SSLConnectionSocketFactory promiscuousSSLConnSocketFactory = //
                new SSLConnectionSocketFactory(promiscuousSSLContext, verifier);
                registry = RegistryBuilder. create() //
                .register("https", promiscuousSSLConnSocketFactory) //
                .register("http",
                        PlainConnectionSocketFactory.getSocketFactory()) //
                .build();
            } catch (KeyManagementException | KeyStoreException
                    | NoSuchAlgorithmException | NumberFormatException e) {
                e.printStackTrace();
            }
        }
        phcConnMgr = new PoolingHttpClientConnectionManager(registry);
        phcConnMgr.setDefaultMaxPerRoute(maxRequests);
        phcConnMgr.setMaxTotal(200);
        builder.setConnectionManager(phcConnMgr);
        RequestConfig.Builder config = RequestConfig.custom();
        config.setCircularRedirectsAllowed(true);
        config.setMaxRedirects(
                Integer.parseInt(System.getProperty("nu.validator.servlet.max-redirects","20")));
        config.setConnectTimeout(connectionTimeout);
        config.setCookieSpec(CookieSpecs.BEST_MATCH);
        config.setSocketTimeout(socketTimeout);
        config.setCookieSpec(CookieSpecs.IGNORE_COOKIES);
        client = builder.setDefaultRequestConfig(config.build()).build();
    }

    public void setUserAgent(String ua) {
        userAgent = ua;
    }

    public PrudentHttpEntityResolver(long sizeLimit, boolean laxContentType,
            ErrorHandler errorHandler, HttpServletRequest request) {
        this.request = request;
        this.sizeLimit = sizeLimit;
        this.requestsLeft = maxRequests;
        this.errorHandler = errorHandler;
        this.contentTypeParser = new ContentTypeParser(errorHandler,
                laxContentType, this.allowRnc, this.allowHtml, this.allowXhtml,
                this.acceptAllKnownXmlTypes, this.allowGenericXml);
    }

    public PrudentHttpEntityResolver(long sizeLimit, boolean laxContentType,
            ErrorHandler errorHandler) {
        this(sizeLimit, laxContentType, errorHandler, null);
    }

    /**
     * @see org.xml.sax.EntityResolver#resolveEntity(java.lang.String,
     *      java.lang.String)
     */
    @Override
    public InputSource resolveEntity(String publicId, String systemId)
            throws SAXException, IOException {
        if (requestsLeft > -1) {
            if (requestsLeft == 0) {
                throw new IOException(
                        "Number of permitted HTTP requests exceeded.");
            } else {
                requestsLeft--;
            }
        }
        HttpGet m = null;
        try {
            URL url = null;
            try {
                url = URL.parse(systemId);
            } catch (GalimatiasParseException e) {
                IOException ioe = (IOException) new IOException(e.getMessage()).initCause(e);
                SAXParseException spe = new SAXParseException(e.getMessage(),
                        publicId, systemId, -1, -1, ioe);
                if (errorHandler != null) {
                    errorHandler.fatalError(spe);
                }
                throw ioe;
            }
            String scheme = url.scheme();
            if (!("http".equals(scheme) || "https".equals(scheme))) {
                String msg = "Unsupported URI scheme: \u201C" + scheme
                        + "\u201D.";
                SAXParseException spe = new SAXParseException(msg, publicId,
                        systemId, -1, -1, new IOException(msg));
                if (errorHandler != null) {
                    errorHandler.fatalError(spe);
                }
                throw spe;
            }
            systemId = url.toString();
            try {
                m = new HttpGet(systemId);
            } catch (IllegalArgumentException e) {
                SAXParseException spe = new SAXParseException(
                        e.getMessage(),
                        publicId,
                        systemId,
                        -1,
                        -1,
                        (IOException) new IOException(e.getMessage()).initCause(e));
                if (errorHandler != null) {
                    errorHandler.fatalError(spe);
                }
                throw spe;
            }
            m.setHeader("User-Agent", userAgent);
            m.setHeader("Accept", buildAccept());
            m.setHeader("Accept-Encoding", "gzip");
            if (request != null && request.getAttribute(
                    "http://validator.nu/properties/accept-language") != null) {
                m.setHeader("Accept-Language", (String) request.getAttribute(
                        "http://validator.nu/properties/accept-language"));
            }
            log4j.info(systemId);
            try {
                if (url.port() > 65535) {
                    throw new IOException(
                            "Port number must be less than 65536.");
                }
            } catch (NumberFormatException e) {
                    throw new IOException(
                            "Port number must be less than 65536.");
            }
            HttpResponse response = client.execute(m);
            boolean ignoreResponseStatus = false;
            if (request != null && request.getAttribute(
                    "http://validator.nu/properties/ignore-response-status") != null) {
                ignoreResponseStatus = (boolean) request.getAttribute(
                        "http://validator.nu/properties/ignore-response-status");
            }
            int statusCode = response.getStatusLine().getStatusCode();
            if (statusCode != 200 && !ignoreResponseStatus) {
                String msg = "HTTP resource not retrievable."
                        + " The HTTP status from the remote server was: "
                        + statusCode + ".";
                SAXParseException spe = new SAXParseException(msg, publicId,
                        m.getURI().toString(), -1, -1,
                        new SystemIdIOException(m.getURI().toString(), msg));
                if (errorHandler != null) {
                    errorHandler.fatalError(spe);
                }
                throw new ResourceNotRetrievableException(
                        String.format("%s: %s", m.getURI().toString(), msg));
            }
            HttpEntity entity = response.getEntity();
            long len = entity.getContentLength();
            if (sizeLimit > -1 && len > sizeLimit) {
                SAXParseException spe = new SAXParseException(
                        "Resource size exceeds limit.",
                        publicId,
                        m.getURI().toString(),
                        -1,
                        -1,
                        new StreamBoundException("Resource size exceeds limit."));
                if (errorHandler != null) {
                    errorHandler.fatalError(spe);
                }
                throw spe;
            }
            TypedInputSource is;
            org.apache.http.Header ct = response.getFirstHeader("Content-Type");
            String contentType = null;
            final String baseUri = m.getURI().toString();
            if (ct != null) {
                contentType = ct.getValue();
            }
            is = contentTypeParser.buildTypedInputSource(baseUri, publicId,
                    contentType);

            Header cl = response.getFirstHeader("Content-Language");
            if (cl != null) {
                is.setLanguage(cl.getValue().trim());
            }

            Header xuac = response.getFirstHeader("X-UA-Compatible");
            if (xuac != null) {
                String val = xuac.getValue().trim();
                if (!"ie=edge".equalsIgnoreCase(val)) {
                    SAXParseException spe = new SAXParseException(
                            "X-UA-Compatible HTTP header must have the value \u201CIE=edge\u201D,"
                                    + " was \u201C" + val + "\u201D.",
                            publicId, systemId, -1, -1);
                    errorHandler.error(spe);
                }
            }

            Header csp = response.getFirstHeader("Content-Security-Policy");
            if (csp != null) {
                try {
                    ContentSecurityPolicy.THE_INSTANCE.checkValid(csp.getValue().trim());
                } catch (DatatypeException e) {
                    SAXParseException spe = new SAXParseException(
                            "Content-Security-Policy HTTP header: "
                                    + e.getMessage(), publicId, systemId, -1,
                            -1);
                    Html5DatatypeException ex5 = (Html5DatatypeException) e;
                    if (ex5.isWarning()) {
                        errorHandler.warning(spe);
                    } else {
                        errorHandler.error(spe);
                    }
                }
            }

            final HttpGet meth = m;
            InputStream stream = entity.getContent();
            if (sizeLimit > -1) {
                stream = new BoundedInputStream(stream, sizeLimit, baseUri);
            }
            Header ce = response.getFirstHeader("Content-Encoding");
            if (ce != null) {
                String val = ce.getValue().trim();
                if ("gzip".equalsIgnoreCase(val)
                        || "x-gzip".equalsIgnoreCase(val)) {
                    stream = new GZIPInputStream(stream);
                    if (sizeLimit > -1) {
                        stream = new BoundedInputStream(stream, sizeLimit,
                                baseUri);
                    }
                }
            }
            is.setByteStream(new ObservableInputStream(stream,
                    new StreamObserver() {
                        private final Logger log4j = Logger.getLogger("nu.validator.xml.PrudentEntityResolver.StreamObserver");

                        private boolean released = false;

                        @Override
                        public void closeCalled() {
                            log4j.debug("closeCalled");
                            if (!released) {
                                log4j.debug("closeCalled, not yet released");
                                released = true;
                                try {
                                    meth.releaseConnection();
                                } catch (Exception e) {
                                    log4j.debug(
                                            "closeCalled, releaseConnection", e);
                                }
                            }
                        }

                        @Override
                        public void exceptionOccurred(Exception ex)
                                throws IOException {
                            if (!released) {
                                released = true;
                                try {
                                    meth.abort();
                                } catch (Exception e) {
                                    log4j.debug("exceptionOccurred, abort", e);
                                } finally {
                                    try {
                                        meth.releaseConnection();
                                    } catch (Exception e) {
                                        log4j.debug(
                                                "exceptionOccurred, releaseConnection",
                                                e);
                                    }
                                }
                            }
                            if (ex instanceof SystemIdIOException) {
                                throw (SystemIdIOException) ex;
                            } else if (ex instanceof IOException) {
                                IOException ioe = (IOException) ex;
                                throw new SystemIdIOException(baseUri,
                                        ioe.getMessage(), ioe);
                            } else if (ex instanceof RuntimeException) {
                                throw (RuntimeException) ex;
                            } else {
                                throw new RuntimeException(
                                        "API contract violation. Wrong exception type.",
                                        ex);
                            }
                        }

                        @Override
                        public void finalizerCalled() {
                            if (!released) {
                                released = true;
                                try {
                                    meth.abort();
                                } catch (Exception e) {
                                    log4j.debug("finalizerCalled, abort", e);
                                } finally {
                                    try {
                                        meth.releaseConnection();
                                    } catch (Exception e) {
                                        log4j.debug(
                                                "finalizerCalled, releaseConnection",
                                                e);
                                    }
                                }
                            }
                        }

                    }));
            return is;
        } catch (IOException | RuntimeException | SAXException e) {
            if (m != null) {
                try {
                    m.abort();
                } catch (Exception ex) {
                    log4j.debug("abort", ex);
                } finally {
                    try {
                        m.releaseConnection();
                    } catch (Exception ex) {
                        log4j.debug("releaseConnection", ex);
                    }
                }
            }
            throw e;
        }
    }

    /**
     * @return Returns the allowRnc.
     */
    public boolean isAllowRnc() {
        return allowRnc;
    }

    /**
     * @param allowRnc
     *            The allowRnc to set.
     */
    public void setAllowRnc(boolean allowRnc) {
        this.allowRnc = allowRnc;
        this.contentTypeParser.setAllowRnc(allowRnc);
    }

    /**
     * @return Returns the allowCss.
     */
    public boolean isAllowCss() {
        return allowCss;
    }

    /**
     * @param allowCss
     *            The allowCss to set.
     */
    public void setAllowCss(boolean allowCss) {
        this.allowCss = allowCss;
        this.contentTypeParser.setAllowCss(allowCss);
    }

    /**
     * @param allowHtml
     */
    public void setAllowHtml(boolean allowHtml) {
        this.allowHtml = allowHtml;
        this.contentTypeParser.setAllowHtml(allowHtml);
    }

    /**
     * Returns the acceptAllKnownXmlTypes.
     *
     * @return the acceptAllKnownXmlTypes
     */
    public boolean isAcceptAllKnownXmlTypes() {
        return acceptAllKnownXmlTypes;
    }

    /**
     * Sets the acceptAllKnownXmlTypes.
     *
     * @param acceptAllKnownXmlTypes
     *            the acceptAllKnownXmlTypes to set
     */
    public void setAcceptAllKnownXmlTypes(boolean acceptAllKnownXmlTypes) {
        this.acceptAllKnownXmlTypes = acceptAllKnownXmlTypes;
        this.contentTypeParser.setAcceptAllKnownXmlTypes(acceptAllKnownXmlTypes);
    }

    /**
     * Returns the allowGenericXml.
     *
     * @return the allowGenericXml
     */
    public boolean isAllowGenericXml() {
        return allowGenericXml;
    }

    /**
     * Sets the allowGenericXml.
     *
     * @param allowGenericXml
     *            the allowGenericXml to set
     */
    public void setAllowGenericXml(boolean allowGenericXml) {
        this.allowGenericXml = allowGenericXml;
        this.contentTypeParser.setAllowGenericXml(allowGenericXml);
    }

    /**
     * Returns the allowXhtml.
     *
     * @return the allowXhtml
     */
    public boolean isAllowXhtml() {
        return allowXhtml;
    }

    /**
     * Sets the allowXhtml.
     *
     * @param allowXhtml
     *            the allowXhtml to set
     */
    public void setAllowXhtml(boolean allowXhtml) {
        this.allowXhtml = allowXhtml;
        this.contentTypeParser.setAllowXhtml(allowXhtml);
    }

    private String buildAccept() {
        return "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
    }

    /**
     * Returns the allowHtml.
     *
     * @return the allowHtml
     */
    public boolean isAllowHtml() {
        return allowHtml;
    }

    public boolean isOnlyHtmlAllowed() {
        return !isAllowGenericXml() && !isAllowRnc() && !isAllowCss()
                && !isAllowXhtml();
    }

    public class ResourceNotRetrievableException extends SAXException {
        public ResourceNotRetrievableException(String message) {
            super(message);
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy