All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.xceptance.xlt.engine.CachingHttpWebConnection Maven / Gradle / Ivy

/*
 * Copyright (c) 2005-2024 Xceptance Software Technologies GmbH
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.xceptance.xlt.engine;

import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Locale;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.lang3.StringUtils;
import org.htmlunit.HttpMethod;
import org.htmlunit.WebConnection;
import org.htmlunit.WebRequest;
import org.htmlunit.WebResponse;

import com.xceptance.common.collection.ConcurrentLRUCache;
import com.xceptance.common.net.HttpHeaderConstants;
import com.xceptance.xlt.api.util.XltLogger;
import com.xceptance.xlt.api.util.XltProperties;
import com.xceptance.xlt.common.XltConstants;

/**
 * The CachingHttpWebConnection class adds caching capabilities to the standard HttpWebConnection. Like a browser, it
 * tries to minimize the network load when querying resources. Resources loaded the first time are put into a cache.
 * Once a previously loaded resource is requested again, it is served from the cache. If the cache time of the content
 * is expired, the server is asked to revalidate the content using a conditional GET request with the
 * "If-Modified-Since" header set. Only if there is new data, the cache is updated.
 * 
 * @author Jörg Werner (Xceptance Software Technologies GmbH)
 */
public class CachingHttpWebConnection extends WebConnectionWrapper
{
    /**
     * The date format used in HTTP header values, for example: "Tue, 11 Sep 2007 08:01:38 GMT".
     */
    private static final String HEADER_DATE_FORMAT = "EEE, d MMM yyyy HH:mm:ss z";

    private static final Pattern MAX_AGE_PATTERN = Pattern.compile(Pattern.quote(HttpHeaderConstants.MAX_AGE) + "=(\\d+)");

    /**
     * An entry in the response cache.
     */
    private static class CacheEntry
    {
        /**
         * The web response.
         */
        public long expires;

        /**
         * The last modified date of the resource (as returned from the server).
         */
        public String lastModified;

        /**
         * The etag value of the resource (as returned from the server).
         */
        public String etag;

        /**
         * The web response.
         */
        public WebResponse webResponse;
    }

    /**
     * Tries to determine the time when the given web response expires. This is done by examining the response headers.
     * A return value which is less than the current time denote a web response that expired immediately.
     * 
     * @param webResponse
     *            the web response to check
     * @return the expiration time
     */
    public static long determineExpirationTime(final WebResponse webResponse)
    {
        // check the "Pragma" header
        final String pragma = webResponse.getResponseHeaderValue(HttpHeaderConstants.PRAGMA);
        if (pragma != null && pragma.length() > 0)
        {
            // is there a "no-cache" value?
            if (pragma.contains(HttpHeaderConstants.NO_CACHE))
            {
                return 0;
            }
        }

        // check the "Cache-Control" header
        final String cacheControl = webResponse.getResponseHeaderValue(HttpHeaderConstants.CACHE_CONTROL);
        if (cacheControl != null && cacheControl.length() > 0)
        {
            // is there a "no-cache" value?
            if (cacheControl.contains(HttpHeaderConstants.NO_CACHE))
            {
                return 0;
            }

            // we are a volatile cache in memory, so we do not need to obey the
            // no-store

            // is there a "must-revalidate" value?
            if (cacheControl.contains(HttpHeaderConstants.MUST_REVALIDATE))
            {
                return 0;
            }

            // is there a "max-age=" value? (Format: "max-age=65289")
            final Matcher m = MAX_AGE_PATTERN.matcher(cacheControl);
            if (m.find() && m.groupCount() > 0)
            {
                try
                {
                    final long maxAge = Long.parseLong(m.group(1));

                    return System.currentTimeMillis() + maxAge * 1000;
                }
                catch (final NumberFormatException ex)
                {
                    // ignore, try next header
                }
            }
        }

        // check the "Expires" header
        final String expires = webResponse.getResponseHeaderValue(HttpHeaderConstants.EXPIRES);
        if (expires != null && expires.length() > 0)
        {
            if (expires.trim().equals("0"))
            {
                // '0' is commonly used to indicate that a response expires immediately, i.e. is not cacheable
                return 0;
            }
            else
            {
                final SimpleDateFormat dateParser = new SimpleDateFormat(HEADER_DATE_FORMAT, Locale.ENGLISH);

                try
                {
                    return dateParser.parse(expires).getTime();
                }
                catch (final ParseException ex)
                {
                    if (XltLogger.runTimeLogger.isWarnEnabled())
                    {
                        XltLogger.runTimeLogger.warn("Header " + HttpHeaderConstants.EXPIRES +
                                                     " does not match a valid date format. Check RFC 2616. Should be " +
                                                     "a valid RFC 1123 format, such as 'Thu, 01 Dec 1994 16:00:00 GMT', but was '" +
                                                     expires + "'.");
                    }

                    // invalid date -> expires immediately
                    return 0;
                }
            }
        }

        // check the "Last-Modified" header
        final String lastModified = webResponse.getResponseHeaderValue(HttpHeaderConstants.LAST_MODIFIED);
        if (lastModified != null && lastModified.length() > 0)
        {
            final SimpleDateFormat dateParser = new SimpleDateFormat(HEADER_DATE_FORMAT, Locale.ENGLISH);

            try
            {
                final long lastModifiedTime = dateParser.parse(lastModified).getTime();
                final long now = System.currentTimeMillis();
                final long age = Math.max(now - lastModifiedTime, 0);

                if (age == 0)
                {
                    return 0;
                }
                else
                {
                    // use 10% of the current age as a heuristic expiration value
                    return now + age / 10;
                }
            }
            catch (final ParseException ex)
            {
                if (XltLogger.runTimeLogger.isWarnEnabled())
                {
                    XltLogger.runTimeLogger.warn("Header " + HttpHeaderConstants.LAST_MODIFIED +
                                                 " does not match a valid date format. Check RFC 2616. Should be " +
                                                 "a valid RFC 1123 format, such as 'Thu, 01 Dec 1994 16:00:00 GMT', but was '" + expires +
                                                 "'.");
                }

                // invalid date -> expires immediately
                return 0;
            }
        }

        // expires immediately
        return 0;
    }

    /**
     * A cache that maps URL strings to their corresponding web responses.
     */
    private final ConcurrentLRUCache cache;

    /**
     * Is true when cache usage is enabled.
     */
    private final boolean useCache;

    /**
     * Creates a new CachingHttpWebConnection.
     * 
     * @param webConnection
     *            the underlying web connection to use
     */
    public CachingHttpWebConnection(final WebConnection webConnection)
    {
        super(webConnection);

        useCache = XltProperties.getInstance().getProperty(XltConstants.XLT_PACKAGE_PATH + ".staticContentCache", false);
        if (useCache)
        {
            final int cacheSize = XltProperties.getInstance().getProperty(XltConstants.XLT_PACKAGE_PATH + ".staticContentCache.size", 100);
            if (cacheSize < ConcurrentLRUCache.MIN_SIZE)
            {
                XltLogger.runTimeLogger.warn("Size of static content cache is lower than minimum size of " + ConcurrentLRUCache.MIN_SIZE +
                                             ". Will use the minimum size.");
            }
            cache = new ConcurrentLRUCache(Math.max(cacheSize, ConcurrentLRUCache.MIN_SIZE));
        }
        else
        {
            cache = null;
        }
    }

    /**
     * Loads the web response for a given set of request parameters. Tries to find the resource in the cache, when
     * request is a GET and caching is active. If found the resource is loaded using an if-modified-since.
     * 
     * @param webRequest
     *            the request parameters
     * @return the web response loaded
     * @throws IOException
     *             if something went wrong
     */
    @Override
    public WebResponse getResponse(final WebRequest webRequest) throws IOException
    {
        // using cache only when active and cache only for a GET
        if (!useCache || !webRequest.getHttpMethod().equals(HttpMethod.GET))
        {
            return getResponse(webRequest, null, null);
        }

        final String url = webRequest.getUrl().toExternalForm();

        // check whether we have an entry for this URL in the cache
        CacheEntry cacheEntry = cache.get(url);
        if (cacheEntry == null)
        {
            // no -> load the response normally
            final WebResponse webResponse = getResponse(webRequest, null, null);

            // check whether the response is cacheable
            final long expires = determineExpirationTime(webResponse);
            if (webResponse.getStatusCode() == 200 && expires > System.currentTimeMillis())
            {
                // yes, put it in the cache
                cacheEntry = new CacheEntry();

                cacheEntry.webResponse = webResponse;
                cacheEntry.expires = expires;
                cacheEntry.lastModified = webResponse.getResponseHeaderValue(HttpHeaderConstants.LAST_MODIFIED);
                cacheEntry.etag = webResponse.getResponseHeaderValue(HttpHeaderConstants.ETAG);

                cache.put(url, cacheEntry);
            }

            // return the response just read
            return webResponse;
        }

        // yes, it's in the cache

        // check whether the entry is expired
        if (cacheEntry.expires < System.currentTimeMillis())
        {
            // expired -> revalidate the cached response (using a conditional GET)
            final WebResponse webResponse = getResponse(webRequest, cacheEntry.lastModified, cacheEntry.etag);

            // check whether the response is cacheable
            final long expires = determineExpirationTime(webResponse);
            if (expires > System.currentTimeMillis())
            {
                cacheEntry.expires = expires;

                // check the HTTP response code
                if (webResponse.getStatusCode() == 200)
                {
                    // there is new content -> save the response
                    cacheEntry.webResponse = webResponse;
                    cacheEntry.lastModified = webResponse.getResponseHeaderValue(HttpHeaderConstants.LAST_MODIFIED);
                    cacheEntry.etag = webResponse.getResponseHeaderValue(HttpHeaderConstants.ETAG);

                    return webResponse;
                }
            }
            if (webResponse.getStatusCode() == 304)
            {
                // already expired or no expiration info at the response
                // however, we got a 304 which means that the content did not change, so use the cache!
                return cacheEntry.webResponse;
            }

            // either expired or wrong response code
            cache.remove(url);

            // return the response just read
            return webResponse;
        }

        // not expired -> return the cached response
        if (XltLogger.runTimeLogger.isInfoEnabled())
        {
            XltLogger.runTimeLogger.info("Return cached response for " + webRequest.getUrl());
        }

        return cacheEntry.webResponse;
    }

    /**
     * Loads the web response for the given set of request parameters. The resource is loaded using a conditional GET
     * with the "If-Modified-Since" request header set to the specified last-modified date and etag (if known). If a
     * additional header flags is null or empty, it won't get attached. If no additional header is
     * attached, a "normal" GET request is executed.
     * 
     * @param webRequest
     *            the request parameters
     * @param lastModifiedHeader
     *            the last-modified date
     * @param etag
     *            the etag value
     * @return the web response loaded
     * @throws IOException
     *             if something went wrong
     */
    protected WebResponse getResponse(final WebRequest webRequest, final String lastModifiedHeader, final String etag) throws IOException
    {
        // add the If-Modified-Since header only if there is a last-modified date
        if (lastModifiedHeader != null && lastModifiedHeader.length() > 0)
        {
            webRequest.setAdditionalHeader(HttpHeaderConstants.IF_MODIFIED_SINCE, lastModifiedHeader);
        }

        // add the If-None-Match header only if there is an etag value
        if (StringUtils.isNotBlank(etag))
        {
            webRequest.setAdditionalHeader(HttpHeaderConstants.IF_NONE_MATCH, etag);
        }

        return getWrappedWebConnection().getResponse(webRequest);
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy