All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.htmlunit.Cache Maven / Gradle / Ivy

Go to download

XLT (Xceptance LoadTest) is an extensive load and performance test tool developed and maintained by Xceptance.

The newest version!
/*
 * Copyright (c) 2002-2024 Gargoyle Software Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * https://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.htmlunit;

import java.io.Serializable;
import java.net.URL;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.htmlunit.cssparser.dom.CSSStyleSheetImpl;
import org.htmlunit.httpclient.HttpClientConverter;
import org.htmlunit.util.HeaderUtils;
import org.htmlunit.util.UrlUtils;

/**
 * 

Simple cache implementation which caches compiled JavaScript files and parsed CSS snippets. Caching * compiled JavaScript files avoids unnecessary web requests and additional compilation overhead, while * caching parsed CSS snippets avoids very expensive CSS parsing.

* * @author Marc Guillemot * @author Daniel Gredler * @author Ahmed Ashour * @author Anton Demydenko * @author Ronald Brill * @author Ashley Frieze */ public class Cache implements Serializable { /** The maximum size of the cache. */ private int maxSize_ = 40; private static final Pattern DATE_HEADER_PATTERN = Pattern.compile("-?\\d+"); static final long DELAY = 10 * org.apache.commons.lang3.time.DateUtils.MILLIS_PER_MINUTE; // for taking ten percent of a number in milliseconds and converting that to the amount in seconds private static final double TEN_PERCENT_OF_MILLISECONDS_IN_SECONDS = 0.0001; /** * The map which holds the cached responses. Note that when keying on URLs, we key on the string version * of the URLs, rather than on the URLs themselves. This is done for performance, because a) the * {@link java.net.URL#hashCode()} method is synchronized, and b) the {@link java.net.URL#hashCode()} * method triggers DNS lookups of the URL hostnames' IPs. As of this writing, the HtmlUnit unit tests * run ~20% faster whey keying on strings rather than on {@link java.net.URL} instances. */ private final Map entries_ = Collections.synchronizedMap(new HashMap<>(maxSize_)); /** * A cache entry. */ private static class Entry implements Comparable, Serializable { private final String key_; private final WebResponse response_; private final Object value_; private long lastAccess_; private final long createdAt_; Entry(final String key, final WebResponse response, final Object value) { key_ = key; response_ = response; value_ = value; createdAt_ = System.currentTimeMillis(); lastAccess_ = createdAt_; } /** * {@inheritDoc} */ @Override public int compareTo(final Entry other) { return Long.compare(lastAccess_, other.lastAccess_); } /** * {@inheritDoc} */ @Override public boolean equals(final Object obj) { return obj instanceof Entry && lastAccess_ == ((Entry) obj).lastAccess_; } /** * {@inheritDoc} */ @Override public int hashCode() { return ((Long) lastAccess_).hashCode(); } /** * Updates the last access date. */ public void touch() { lastAccess_ = System.currentTimeMillis(); } /** * Is this cached entry still fresh? * @param now the current time * @return true if can keep in the cache * @see #isWithinCacheWindow(WebResponse, long, long) */ boolean isStillFresh(final long now) { return Cache.isWithinCacheWindow(response_, now, createdAt_); } } /** *

Find expiry time using * a) s-maxage specified
* b) max-age specified
* c) expired specified
* d) A Last-Update is specified and the time is now within 10% of the difference between download time and update * time

* * @see RFC 7234 * * @param response {@link WebResponse} * @param now the current time * @param createdAt when the request was downloaded * @return true if still fresh */ static boolean isWithinCacheWindow(final WebResponse response, final long now, final long createdAt) { long freshnessLifetime = 0; if (!HeaderUtils.containsPrivate(response) && HeaderUtils.containsSMaxage(response)) { // check s-maxage freshnessLifetime = HeaderUtils.sMaxage(response); } else if (HeaderUtils.containsMaxAge(response)) { // check max-age freshnessLifetime = HeaderUtils.maxAge(response); } else if (response.getResponseHeaderValue(HttpHeader.EXPIRES) != null) { final Date expires = parseDateHeader(response, HttpHeader.EXPIRES); if (expires != null) { // use the same logic as in isCacheableContent() return expires.getTime() - now > DELAY; } } else if (response.getResponseHeaderValue(HttpHeader.LAST_MODIFIED) != null) { final Date lastModified = parseDateHeader(response, HttpHeader.LAST_MODIFIED); if (lastModified != null) { freshnessLifetime = (long) ((createdAt - lastModified.getTime()) * TEN_PERCENT_OF_MILLISECONDS_IN_SECONDS); } } return now - createdAt < freshnessLifetime * org.apache.commons.lang3.time.DateUtils.MILLIS_PER_SECOND; } /** * Caches the specified object, if the corresponding request and response objects indicate * that it is cacheable. * * @param request the request corresponding to the specified compiled script * @param response the response corresponding to the specified compiled script * @param toCache the object that is to be cached, if possible (may be for instance a compiled script or * simply a WebResponse) * @return whether the response was cached or not */ public boolean cacheIfPossible(final WebRequest request, final WebResponse response, final Object toCache) { if (isCacheable(request, response)) { final URL url = request.getUrl(); if (url == null) { return false; } final Entry entry = new Entry(UrlUtils.normalize(url), response, toCache); entries_.put(entry.key_, entry); deleteOverflow(); return true; } return false; } /** * Caches the parsed version of the specified CSS snippet. We key the cache based on CSS snippets (rather * than requests and responses as is done above) because a) this allows us to cache inline CSS, b) CSS is * extremely expensive to parse, so we want to avoid it as much as possible, c) CSS files aren't usually * nearly as large as JavaScript files, so memory bloat won't be too bad, and d) caching on requests and * responses requires checking dynamically (see {@link #isCacheableContent(WebResponse)}), and headers often * aren't set up correctly, disallowing caching when in fact it should be allowed. * * @param css the CSS snippet from which styleSheet is derived * @param styleSheet the parsed version of css */ public void cache(final String css, final CSSStyleSheetImpl styleSheet) { final Entry entry = new Entry(css, null, styleSheet); entries_.put(entry.key_, entry); deleteOverflow(); } /** * Truncates the cache to the maximal number of entries. */ protected void deleteOverflow() { synchronized (entries_) { while (entries_.size() > maxSize_) { final Entry oldestEntry = Collections.min(entries_.values()); entries_.remove(oldestEntry.key_); if (oldestEntry.response_ != null) { oldestEntry.response_.cleanUp(); } } } } /** * Determines if the specified response can be cached. * * @param request the performed request * @param response the received response * @return {@code true} if the response can be cached */ protected boolean isCacheable(final WebRequest request, final WebResponse response) { return HttpMethod.GET == response.getWebRequest().getHttpMethod() && UrlUtils.URL_ABOUT_BLANK != request.getUrl() && isCacheableContent(response); } /** *

Perform prior validation for 'no-store' directive in Cache-Control header.

* *

Tries to guess if the content is dynamic or not.

* *

"Since origin servers do not always provide explicit expiration times, HTTP caches typically * assign heuristic expiration times, employing algorithms that use other header values (such as the * Last-Modified time) to estimate a plausible expiration time".

* *

The current implementation considers as dynamic content everything except responses with a * Last-Modified header with a date older than 10 minutes or with an Expires header * specifying expiration in more than 10 minutes.

* * @see RFC 7234 * @see RFC 2616 * @param response the response to examine * @return {@code true} if the response should be considered as cacheable */ protected boolean isCacheableContent(final WebResponse response) { if (HeaderUtils.containsNoStore(response)) { return false; } final long now = getCurrentTimestamp(); return isWithinCacheWindow(response, now, now); } /** * Gets the current time stamp. As method to allow overriding it, when simulating another time. * @return the current time stamp */ protected long getCurrentTimestamp() { return System.currentTimeMillis(); } /** * Parses and returns the specified date header of the specified response. This method * returns {@code null} if the specified header cannot be found or cannot be parsed as a date. * * @param response the response * @param headerName the header name * @return the specified date header of the specified response */ protected static Date parseDateHeader(final WebResponse response, final String headerName) { final String value = response.getResponseHeaderValue(headerName); if (value == null) { return null; } final Matcher matcher = DATE_HEADER_PATTERN.matcher(value); if (matcher.matches()) { return new Date(); } return HttpClientConverter.parseHttpDate(value); } /** * Returns the cached response corresponding to the specified request. If there is * no corresponding cached object, this method returns {@code null}. * *

Calculates and check if object still fresh(RFC 7234) otherwise returns {@code null}.

* @see RFC 7234 * * @param request the request whose corresponding response is sought * @return the cached response corresponding to the specified request if any */ public WebResponse getCachedResponse(final WebRequest request) { final Entry cachedEntry = getCacheEntry(request); if (cachedEntry == null) { return null; } return cachedEntry.response_; } /** * Returns the cached object corresponding to the specified request. If there is * no corresponding cached object, this method returns {@code null}. * *

Calculates and check if object still fresh(RFC 7234) otherwise returns {@code null}.

* @see RFC 7234 * * @param request the request whose corresponding cached compiled script is sought * @return the cached object corresponding to the specified request if any */ public Object getCachedObject(final WebRequest request) { final Entry cachedEntry = getCacheEntry(request); if (cachedEntry == null) { return null; } return cachedEntry.value_; } private Entry getCacheEntry(final WebRequest request) { if (HttpMethod.GET != request.getHttpMethod()) { return null; } final URL url = request.getUrl(); if (url == null) { return null; } final String normalizedUrl = UrlUtils.normalize(url); final Entry cachedEntry = entries_.get(normalizedUrl); if (cachedEntry == null) { return null; } if (cachedEntry.isStillFresh(getCurrentTimestamp())) { synchronized (entries_) { cachedEntry.touch(); } return cachedEntry; } entries_.remove(UrlUtils.normalize(url)); return null; } /** * Returns the cached parsed version of the specified CSS snippet. If there is no * corresponding cached stylesheet, this method returns {@code null}. * * @param css the CSS snippet whose cached stylesheet is sought * @return the cached stylesheet corresponding to the specified CSS snippet */ public CSSStyleSheetImpl getCachedStyleSheet(final String css) { final Entry cachedEntry = entries_.get(css); if (cachedEntry == null) { return null; } synchronized (entries_) { cachedEntry.touch(); } return (CSSStyleSheetImpl) cachedEntry.value_; } /** * Returns the cache's maximum size. This is the maximum number of files that will * be cached. The default is 25. * * @return the cache's maximum size */ public int getMaxSize() { return maxSize_; } /** * Sets the cache's maximum size. This is the maximum number of files that will * be cached. The default is 25. * * @param maxSize the cache's maximum size (must be >= 0) */ public void setMaxSize(final int maxSize) { if (maxSize < 0) { throw new IllegalArgumentException("Illegal value for maxSize: " + maxSize); } maxSize_ = maxSize; deleteOverflow(); } /** * Returns the number of entries in the cache. * * @return the number of entries in the cache */ public int getSize() { return entries_.size(); } /** * Clears the cache. */ public void clear() { synchronized (entries_) { for (final Entry entry : entries_.values()) { if (entry.response_ != null) { entry.response_.cleanUp(); } } entries_.clear(); } } /** * Removes outdated entries from the cache. */ public void clearOutdated() { synchronized (entries_) { final long now = getCurrentTimestamp(); entries_.entrySet().removeIf(entry -> entry.getValue().response_ == null || !entry.getValue().isStillFresh(now)); } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy